diff --git a/.gitmodules b/.gitmodules index a024019b14..9eb6c53c34 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,6 +10,9 @@ [submodule "third_party/protobuf"] path = third_party/protobuf url = https://github.com/protocolbuffers/protobuf.git +[submodule "akg"] + path = akg + url = https://gitee.com/mindspore/akg.git [submodule "graphengine"] path = graphengine url = https://gitee.com/ms-incubator/graphengine.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 6b69c510d5..37c3288f12 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ endif () include(${CMAKE_SOURCE_DIR}/cmake/options.cmake) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/modules/") -if (ENABLE_GE) +if (NOT CMAKE_SYSTEM_NAME MATCHES "Windows") add_compile_definitions(_GLIBCXX_USE_CXX11_ABI=0) endif () @@ -86,10 +86,18 @@ if (ENABLE_GE OR ENABLE_D OR ENABLE_TESTCASES) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/third_party/fwkacllib/inc/toolchain) endif() +if (ENABLE_AKG AND ENABLE_D) + add_subdirectory("${CMAKE_SOURCE_DIR}/akg") +endif() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden") add_subdirectory(mindspore/ccsrc) if (ENABLE_TESTCASES) add_subdirectory(tests) endif() -include(cmake/package.cmake) \ No newline at end of file +if (ENABLE_SERVING) + add_subdirectory(serving) +endif() + +include(cmake/package.cmake) diff --git a/RELEASE.md b/RELEASE.md index f919bd7a2f..9824f803f0 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -7,6 +7,7 @@ * DeepFM: a factorization-machine based neural network for CTR prediction on Criteo dataset. * DeepLabV3: significantly improves over our previous DeepLab versions without DenseCRF post-processing and attains comparable performance with other state-of-art models on the PASCAL VOC 2007 semantic image segmentation benchmark. * Faster-RCNN: towards real-time object detection with region proposal networks on COCO 2017 dataset. + * SSD: a single stage object detection methods on COCO 2017 dataset. * GoogLeNet: a deep convolutional neural network architecture codenamed Inception V1 for classification and detection on CIFAR-10 dataset. * Wide&Deep: jointly trained wide linear models and deep neural networks for recommender systems on Criteo dataset. * Frontend and User Interface @@ -62,7 +63,7 @@ ## Contributors Thanks goes to these wonderful people: -Alexey Shevlyakov, Amir Lashkari, anthony, baihuawei, biffex, buxue, caifubi, candanzg, caojian05, Cathy Wong, changzherui, chenfei, chengxianbin, chenhaozhe, chenzomi, chujinjin, cristoval, dengwentao, eric, etone-chan, fary86, gaojing, gengdongjie, gongchen, guohongzilong, guozhijian, heleiwang, hesham, He Wei, Hoai Linh Tran h00472437, hongxing, huangdongrun, huanghui, Jamie Nisbet, Jesse Lee, jiangjinsheng, jiangzhiwen, jinyaohui, jjfeing, jonwe, jonyguo, Junhan Hu, Kang, kingfo, kswang, laiyongqiang, leopz, lichenever, lihongkang, limingqi107, liubuyu, liuliyan2, liuwenhao4, liuxiao, liuxiao, liyong, lizhenyu, lvliang, Margaret_wangrui, meixiaowei, ms_yan, Nat Sutyanyong, ougongchang, panfengfeng, panyifeng, Peilin Wang, peixu_ren, qianlong, rick_sanchez, seatea, sheng, shijianning, simson, sunsuodong, Tinazhang, VectorSL, wandongdong, wangcong, wanghua, wangnan39, Wei Luning, wenchunjiang, wilfChen, WilliamLian, wsc, wukesong, wuxuejian, Xiaoda Zhang, xiefangqi, xulei2020, Yang, yangjie159, yangruoqi713, yangyongjie, yangzhenzhang, Yanjun Peng, yanzhenxiang2020, yao_yf, Yi Huaijie, yoonlee666, yujianfeng, YuJianfeng, yvetteliu, z00478463, zhangdengcheng, Zhang Qinghua, zhangz0911gm, zhaojichen, zhaoting, zhaozhenlong, zhoufeng, zhouneng, zhousiyi, zhouyuanshen, Zirui Wu, Ziyan, zjun, ZPaC, lihongzhang +Alexey Shevlyakov, Amir Lashkari, anthony, baihuawei, biffex, buxue, caifubi, candanzg, caojian05, Cathy Wong, changzherui, chenfei, chengxianbin, chenhaozhe, chenzomi, chujinjin, cristoval, dengwentao, eric, etone-chan, fary86, gaojing, gengdongjie, gongchen, guohongzilong, guozhijian, heleiwang, hesham, He Wei, Hoai Linh Tran, hongxing, huangdongrun, huanghui, Jamie Nisbet, Jesse Lee, jiangjinsheng, jiangzhiwen, jinyaohui, jjfeing, jonwe, jonyguo, Junhan Hu, Kang, kingfo, kswang, laiyongqiang, leopz, lichenever, lihongkang, limingqi107, liubuyu, liuliyan2, liuwenhao4, liuxiao, liuxiao, liyong, lizhenyu, lvliang, Margaret_wangrui, meixiaowei, ms_yan, Nat Sutyanyong, ougongchang, panfengfeng, panyifeng, Peilin Wang, peixu_ren, qianlong, rick_sanchez, seatea, sheng, shijianning, simson, sunsuodong, Tinazhang, VectorSL, wandongdong, wangcong, wanghua, wangnan39, Wei Luning, wenchunjiang, wilfChen, WilliamLian, wsc, wukesong, wuxuejian, Xiaoda Zhang, xiefangqi, xulei2020, Yang, yangjie159, yangruoqi713, yangyongjie, yangzhenzhang, Yanjun Peng, yanzhenxiang2020, yao_yf, Yi Huaijie, yoonlee666, yujianfeng, YuJianfeng, yvetteliu, zhangdengcheng, Zhang Qinghua, zhangz0911gm, zhaojichen, zhaoting, zhaozhenlong, zhoufeng, zhouneng, zhousiyi, zhouyuanshen, Zirui Wu, Ziyan, zjun, ZPaC, lihongzhang Contributions of any kind are welcome! diff --git a/Third_Party_Open_Source_Software_Notice b/Third_Party_Open_Source_Software_Notice index 3a5c5403eb..3c29cb49e8 100644 --- a/Third_Party_Open_Source_Software_Notice +++ b/Third_Party_Open_Source_Software_Notice @@ -2245,14 +2245,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Please also refer to the file CONTRIBUTING.md, which clarifies licensing of external contributions to this project including patches, pull requests, etc. -Software: SQLite 3.31.1 +Software: SQLite 3.32.2 Copyright notice: -Copyright 2008 D. Richard Hipp and Hipp, Wyrick & Company, Inc. -Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, 2006, 2007 2008 Free Software Foundation, Inc. -(c) The page number is greater than the largest page that existed in Copyright (c) 1991-2011 Unicode, Inc. +Copyright 2008 D. Richard Hipp and Hipp, Wyrick & Company, Inc. Copyright (c) 2002 by David Gravereaux. Copyright (c) 2006 by Pat Thoyts +(c) The page number is greater than the largest page that existed in +Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, 2006, 2007 2008 Free Software Foundation, Inc. License: Public Domain Anyone is free to copy, modify, publish, use, compile, sell, or distribute this software, either in source code form or as a compiled binary, for any purpose, commercial or non-commercial, and by any means. @@ -3053,6 +3053,646 @@ Copyright 2003 Google Inc. Copyright 2009 Google Inc. Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All +Software: tinyxml2 8.0.0 +Copyright 2011, John Resig. +Copyright 2011, The Dojo Foundation. + +Software: icu 67.1 +Copyright (C) 2000-2004, International Business Machines Corporation +Copyright (C) 2002-2014, International Business Machines(C) Copyright IBM Corp. 1998-2011 - All Rights Reserved +Copyright (C) 2003-2008, International Business Machines +Copyright (C) 2005-2006, International Business Machines +Copyright (C) 2016 and later: Unicode, Inc. and others. +Copyright (c) 2001-2010 International Business Machines +Copyright (C) 2009, International Business Machines +Copyright (c) 2010-2015 International Business Machines Corporation and others. All rights reserved. +Copyright (C) 2002-2015, International Business Machines verbatim (minus copyright and #include) and copied together into this file. +Copyright (c) 1997-2014, International Business Machines Corporation and others. All Rights Reserved. +Copyright (c) 1997-2008, International Business Machines Corporation and +Copyright (c) 1997-2003, International Business Machines Corporation and +Copyright (c) 1996-2012, International Business Machines Corporation and +Copyright (c) 1997-2016, International Business Machines +Copyright (c) 1997-2013 International Business Machines +Copyright (c) 1997-2016, International Business Machines Corporation and +Copyright (c) 1997-2001, International Business Machines Corporation and +Copyright (c) 1997-2012, International Business Machines Corporation and +Copyright (c) 1997-2005, International Business Machines Corporation and +Copyright (c) 1997-2010, International Business Machines Corporation and +Copyright (c) 2011-2016, International Business Machines Corporation +Copyright (c) 1997-2009, International Business Machines Corporation and +Copyright (c) 1997-2002,2008, International Business Machines Corporation and +Copyright (c) 1997-2009,2014, International Business Machines +Copyright (C) 2000-2009, International Business Machines +Copyright (c) 1997-2015, International Business Machines Corporation and +Copyright (c) 1997-2013, International Business Machines Corporation and +Copyright (c) 2001-2016, International Business Machines Corporation and +Copyright (c) 1997-2016, International Business Machines Corporation +Copyright (c) 1997-2003, 2007-2009 International Business Machines Corporation and +Copyright (c) 2011-2014, International Business Machines Corporation +Copyright (c) 2003-2009, International Business Machines +Copyright (c) 2016, International Business Machines Corporation +Copyright (c) 1997-2004, International Business Machines Corporation and +Copyright (C) 2002-2016, International Business Machines +Copyright (C) 1998-2014, International Business Machines Corporation +Copyright (c) 2003-2013, International Business Machines Corporation and +Copyright (c) 2005-2016, International Business Machines Corporation and +Copyright (c) 1999-2013, International Business Machines Corporation and +Copyright (c) 2003-2015, International Business Machines Corporation and +Copyright (C) 2003-2016, International Business Machines +Copyright (C) 2003-2014, International Business Machines +Copyright (C) 2003, International Business Machines +Copyright (c) 1998-2016, International Business Machines Corporation and +Copyright (c) 2004-2015, International Business Machines Corporation and +Copyright (c) 2009-2016, International Business Machines Corporation and +Copyright (C) 2003-2012, International Business Machines +Copyright (c) 2000-2016, International Business Machines Corporation and +Copyright (C) 2001-2014, International Business Machines +Copyright (C) 2001-2016, International Business Machines +Copyright (c) 1997-2014, International Business Machines © 2017 and later: Unicode, Inc. and others. +Copyright (C) 2007-2016, International Business Machines © 2018 and later: Unicode, Inc. and others. +Copyright (c) 2015, International Business Machines Corporation +Copyright (c) 2014-2016, International Business Machines Corporation +Copyright (c) 2002-2016, International Business Machines +Copyright (c) 2001-2011,2015 International Business Machines +Copyright (c) 2001-2016 International Business Machines +Copyright (c) 2005-2013, International Business Machines Corporation and +Copyright (c) 1998-2014, International Business Machines Corporation and +Copyright (C) 1997-2016 International Business Machines +Copyright (C) 2009-2014, International Business Machines Corporation and +Copyright (c) 2002-2014, International Business Machines Corporation +Copyright (c) 2002-2007, International Business Machines Corporation +Copyright (C) 1996-2012, International Business Machines Corporation +Copyright (C) 1996-2008, International Business Machines Corporation +Copyright (C) 2007-2013, International Business Machines Corporation and +Copyright (C) 2008-2015, International Business Machines +Copyright (C) 2003-2013, International Business Machines Corporation and +Copyright (C) 2003-2013, International Business Machines Corporation +Copyright (C) 1997-2016, International Business Machines Corporation and +Copyright (C) 2001-2011, International Business Machines +Copyright (C) 2001-2008, International Business Machines +Copyright (C) 2003 - 2009, International Business Machines Corporation and +Copyright (C) 2003 - 2008, International Business Machines Corporation and +Copyright (C) 2007-2014, International Business Machines Corporation +Copyright (C) 2007-2013, International Business Machines Corporation +Copyright (C) 1997-2013, International Business Machines Corporation and +Copyright (C) 1996-2014, International Business Machines Corporation and +Copyright (C) 2010-2014, International Business Machines +Copyright (C) 2010-2015, International Business Machines +Copyright (C) 2013-2014, International Business Machines +Copyright (C) 1996-2015, International Business Machines +Copyright (C) 1996-2014, International Business Machines +Copyright (C) 2012-2015, International Business Machines +Copyright (C) 2012-2014, International Business Machines +Copyright (C) 2013-2015, International Business Machines +Copyright (C) 2013-2016, International Business Machines +Copyright (C) 1999-2016, International Business Machines +Copyright (C) 1999-2015, International Business Machines +Copyright (C) 1999-2014, International Business Machines +Copyright (C) 2015-2016, International Business Machines Corporation and others. +Copyright (C) 2003 - 2013, International Business Machines Corporation and +Copyright (C) 1999-2011, International Business Machines +Copyright (C) 2005-2016, International Business Machines +Copyright (C) 2005-2012, International Business Machines +Copyright (C) 2005-2015, International Business Machines +Copyright (C) 2005-2013, International Business Machines +Copyright (C) 2005-2014, International Business Machines +Copyright (c) 2004, International Business Machines +Copyright (c) 2004-2014 International Business Machines +Copyright (c) 2004-2014, International Business Machines +Copyright (C) 2013, International Business Machines Corporation +Copyright (C) 1997-2015, International Business Machines Corporation and +Copyright (C) 2016, International Business Machines +Copyright (c) IBM Corporation, 2000-2012. All rights reserved. +Copyright (c) IBM Corporation, 2000-2011. All rights reserved. +Copyright (c) IBM Corporation, 2000-2014. All rights reserved. +Copyright (c) IBM Corporation, 2000-2010. All rights reserved. +Copyright (c) IBM Corporation, 2000-2016. All rights reserved. +Copyright 2010 the V8 project authors. All rights reserved. +Copyright 2006-2008 the V8 project authors. All rights reserved. +Copyright 2012 the V8 project authors. All rights reserved. +Copyright (C) 2008-2016, International Business Machines Corporation and +Copyright (C) 2007-2016, International Business Machines Corporation and +Copyright (C) 2007-2012, International Business Machines Corporation and +Copyright (c) 2001-2011, International Business Machines +Copyright (c) 2001-2007, International Business Machines +Copyright (C) 2010-2014, International Business Machines Corporation and +Copyright (C) 1997-2010, International Business Machines Corporation and +Copyright (C) 1997-2012, International Business Machines Corporation and +Copyright (C) 2009-2015, International Business Machines Corporation and +Copyright (C) 2009-2012, International Business Machines Corporation and +Copyright (c) 2002-2012, International Business Machines Corporation +Copyright (c) 2002-2011, International Business Machines Corporation +Copyright (C) 2008-2013, International Business Machines Corporation and +Copyright (c) 2003-2008, International Business Machines +Copyright (C) 2003-2016, International Business Machines Corporation +Copyright (C) 2003-2014, International Business Machines Corporation +Copyright (C) 2003-2008, International Business Machines Corporation +Copyright (C) 2005-2008, International Business Machines +Copyright (C) 2003-2015, International Business Machines Corporation +Copyright (C) 2003-2009,2012,2016 International Business Machines Corporation and +Copyright (c) 2004-2016, International Business Machines © 2020 and later: Unicode, Inc. and others. +Copyright (C) 2007-2008, International Business Machines Corporation and +Copyright (C) 2001-2007, International Business Machines +Copyright (C) 1997-2012, International Business Machines +Copyright (C) 1997-2015, International Business Machines +Copyright (C) 2001-2010, International Business Machines +Copyright (c) 2000-2005, International Business Machines +Copyright (c) 2000-2007, International Business Machines © 2019 and later: Unicode, Inc. and others. +Copyright (C) 2010-2015, International Business Machines Corporation and +Copyright (C) 2015, International Business Machines Corporation and +Copyright (c) 2003-2013, International Business Machines +Copyright (C) 2001-2012, International Business Machines +Copyright (C) 2001-2011, International Business Machines Corporation +Copyright (C) 2014-2016, International Business Machines +Copyright (C) 1997-2015, International Business Machines Corporation +Copyright (C) 1999-2007, International Business Machines +Copyright (C) 1999-2007, International Business Machines Corporation +Copyright (C) 1999-2011, International Business Machines Corporation +Copyright (C) {1999-2001}, International Business Machines Corporation and others. All Rights Reserved. +Copyright (C) 2002-2016 International Business Machines Corporation and others. +Copyright (C) 2002-2016, International Business Machines Corporation and others. +Copyright (C) 2002-2016 International Business Machines Corporation +Copyright (C) 2002-2015, International Business Machines Corporation and others. +Copyright (C) 2012 International Business Machines Corporation +Copyright (C) 2002-2015 International Business Machines Corporation +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright (C) 2003-2010, International Business Machines Corporation and others. +Copyright (c) 2008-2011, International Business Machines Corporation and +Copyright (c) 2008-2010, International Business Machines Corporation and +Copyright (C) 2014-2016, International Business Machines Corporation and +Copyright (C) 2013, International Business Machines Corporation and +Copyright (c) 2014, International Business Machines +Copyright (C) 2014, International Business Machines +Copyright (C) 2013, International Business Machines +Copyright (C) 2001-2008,2010 IBM and others. All rights reserved. +Copyright (C) 2010 , Yahoo! Inc. +Copyright (c) 1997-2011, International Business Machines Corporation and +Copyright (C) 2013-2014, International Business Machines Corporation and +Copyright (C) 2009-2013, International Business Machines Corporation and +Copyright (C) 1996-2012, International Business Machines Corporation and +Copyright (C) 2015, International Business Machines Corporation +Copyright (c) 2001-2012, International Business Machines Corporation +Copyright (C) 2001-2014 IBM and others. All rights reserved. +Copyright (C) 2008-2014, Google, International Business Machines Corporation and +Copyright (C) 2008, Google, International Business Machines Corporation and +Copyright (C) 2008-2015, Google, International Business Machines Corporation +Copyright (c) 2001-2014, International Business Machines +Copyright (c) 2002-2010, International Business Machines Corporation +Copyright (C) 2011-2015, International Business Machines Corporation and +Copyright (C) 2011-2016, International Business Machines Corporation and +Copyright (C) 2011-2012, International Business Machines Corporation and +Copyright (C) 1996-2016, International Business Machines +Copyright (C) 1998-2014, International Business Machines +Copyright (C) 2004-2016, International Business Machines +Copyright (C) 2010-2011, International Business Machines +Copyright (C) 2009-2015, International Business Machines +Copyright (C) 2015, International Business Machines +Copyright (C) 2012-2016, International Business Machines +Copyright (C) 1999-2012, International Business Machines +Copyright (C) 2001, International Business Machines +Copyright (C) 2013, International Business Machines Corporation and others. +Copyright (C) 2010-2012, International Business Machines +Copyright (C) 2004-2015, International Business Machines +Copyright (C) 2003-2006, International Business Machines +Copyright (C) 2013-2015, International Business Machines Corporation and others. +Copyright (C) 2001-2015 IBM and others. All rights reserved. +Copyright (C) 2008-2015, International Business Machines Corporation +Copyright (C) 2008-2016, International Business Machines +Copyright (C) 2008-2013, International Business Machines Corporation +Copyright (C) 2004-2012, International Business Machines Corporation and +Copyright (C) 1997-2009,2014 International Business Machines +Copyright (C) 2009-2011, International Business Machines Corporation and +Copyright (C) 2009-2016, International Business Machines Corporation and +Copyright (C) 2009-2013, International Business Machines +Copyright (C) 2008-2011, International Business Machines +Copyright (C) 2007-2014, International Business Machines Corporation and +Copyright (C) 2009-2010, International Business Machines Corporation and +Copyright (C) 2001-2016 International Business Machines Corporation +Copyright (c) 2002-2011, International Business Machines +Copyright (C) 2001-2012 IBM, Inc. All Rights Reserved. +Copyright (c) 2013-2016 International Business Machines Corporation and others. All rights reserved. +Copyright (c) 2013-2015 International Business Machines Corporation and others. All rights reserved. +Copyright (c) 2007-2012, International Business Machines Corporation and +Copyright (c) 2007-2012, International Business Machines +Copyright (C) 2010, International Business Machines +Copyright (C) 1997-2011, International Business Machines +Copyright (C) 1997-2005, International Business Machines +Copyright (C) 2009-2011, International Business Machines +Copyright (C) 2003-2015, International Business Machines +Copyright (C) 2009-2016, International Business Machines +Copyright (C) 2008-2012, International Business Machines +Copyright (C) 2008, International Business Machines +Copyright (C) 2011-2014, International Business Machines +Copyright (C) 2011-2013, International Business Machines +Copyright (C) 2005, International Business Machines +Copyright (C) 1999-2013, International Business Machines +Copyright (C) 1998-2016, International Business Machines +Copyright (c) 2007-2014, International Business Machines Corporation and +Copyright (C) 2003-2013, International Business Machines +Copyright (c) 2007-2016, International Business Machines Corporation and +Copyright (c) 2008-2015, International Business Machines +Copyright (C) 1999-2010, International Business Machines +Copyright (C) 2000-2015, International Business Machines +Copyright (C) 2000-2011, International Business Machines +Copyright (C) 2000-2012, International Business Machines +Copyright (C) 2000-2010, International Business Machines +Copyright (C) 2004-2010, International Business Machines +Copyright (C) 2004-2005, International Business Machines +Copyright (c) 2013-2014, International Business Machines +Copyright (c) 1991-2013 Unicode, Inc. © 2019 Unicode®, Inc. +Copyright (C) 2018 and later: Unicode, Inc. and others. +Copyright (c) 2008-2013 International Business Machines +Copyright (C) 2002-2010, International Business Machines +Copyright (c) 2012-2015 International Business Machines © 2020 Unicode®, Inc. +Copyright (c) 2005-2013 IBM Corporation and others. All rights reserved +Copyright (c) 2011-2012, International Business Machines Corporation and +Copyright (C) 1998-2000, International Business Machines © 2017 Unicode®, Inc. +Copyright (c) 2007-2015 International Business Machines +Copyright (C) 2004-2006, International Business Machines +Copyright (C) 2003-2005, International Business Machines +Copyright (c) 1999-2014 International Business Machines +Copyright (c) 2003, International Business Machines +Copyright (C) 2014 International Business Machines +Copyright (c) 2001-2003 International Business Machines +Copyright (c) 2004-2011 International Business Machines +Copyright (C) 2015-2016, International Business Machines +Copyright (c) 2001-2015 International Business Machines +Copyright (C) 2003-2012, International Business Machines Corporation and COPYRIGHT AND PERMISSION NOTICE +Copyright (c) 2003 National Electronics and Computer Technology Center and others +Copyright (C) 2005-2010, International Business Machines +Copyright (c) 2007-2009 IBM Corporation and others. All rights reserved +Copyright (C) 2004-2016 International Business Machines +Copyright (C) 1998-2013, International Business Machines +Copyright (C) 1998-2010, International Business Machines +Copyright (c) 1999-2004, International Business Machines +Copyright (C) 2002-2006 International Business Machines Corporation +Copyright (C) 1999-2006, International Business Machines +Copyright (C) 2002-2016 IBM, Inc. All Rights Reserved. +Copyright (c) 2002-2006, International Business Machines(C) Copyright IBM Corp. 1998-2007 - All Rights Reserved +Copyright (C) 1999-2003, International Business Machines +Copyright (C) 1998-2006, International Business Machines Corporation and +Copyright (C) 1998-2003, International Business Machines Corporation and +Copyright (C) 2003 - 2008, International Business Machines +Copyright (C) 1999-2008, International Business Machines +Copyright (C) 1999-2001, International Business Machines +Copyright (C) 1999-2005, International Business Machines +Copyright (C) 2016 and later: Unicode, Inc. and others. +Copyright (c) 2001-2010 IBM Corporation and others. All Rights Reserved. +Copyright (C) 1998-2005, International Business Machines Corporation and +Copyright (C) 1998-2001, International Business Machines Corporation and +Copyright (c) 2002-2005, International Business Machines Corporation and others. All Rights Reserved. +Copyright (C) 2000-2014, International Business Machines +Copyright (C) 1996-2013, International Business Machines +Copyright (c) 2002-2006, International Business Machines Corporation and +Copyright (c) 2004-2010, International Business Machines Corporation and +Copyright (C) 2004-2011, International Business Machines +Copyright (c) 2002-2005, International Business Machines Corporation and +Copyright (c) 2002-2014, International Business Machines +Copyright (c) 1997-2012, International Business Machines +Copyright (c) 2002-2008, International Business Machines Corporation and others. All Rights Reserved. +Copyright (C) 2011-2013, Apple Inc.; Unicode, Inc.; and others. All Rights Reserved. +Copyright (C) 2011-2013, Apple Inc. and others. All Rights Reserved. +Copyright (c) 2005-2007,2010 Apple Inc., Unicode Inc.,and others. All Rights Reserved. +Copyright (c) 1999-2003, International Business Machines Corporation and +Copyright (c) 2003-2014, International Business Machines +Copyright (c) 2002-2010, International Business Machines Corporation and others. All Rights Reserved. +Copyright (c) 1999-2010, International Business Machines Corporation and +Copyright (c) 1999-2002, International Business Machines Corporation and +Copyright (C) 2002-2003, International Business Machines +Copyright (C) 2002, International Business Machines +Copyright (c) 2007, International Business Machines Corporation and +Copyright (C) 2007, International Business Machines +Copyright (C) 2001-2006, International Business Machines +Copyright (C) 2010-2014, International Business Machines Corporation and others. +Copyright (C) 2005-2016, International Business Machines Corporation and +Copyright (C) 2015-2016, International Business Machines Corporation and +Copyright (C) 2008-2012, International Business Machines Corporation +Copyright (c) 2006-2015 International Business Machines Corporation and others. All rights reserved. +Copyright (c) 2014-2015 International Business Machines Corporation and others. All rights reserved. +Copyright (C) 2002-2011, International Business Machines +Copyright (c) 2003-2010, International Business Machines Corporation and others. All Rights Reserved. +Copyright (C) 2012 IBM Corporation and Others. All Rights Reserved. +Copyright (C) 1998-2012, International Business Machines Corporation +Copyright (c) 2009, International Business Machines Corporation and +Copyright (C) The Internet Society (2002). All Rights Reserved. +Copyright (c) 2015, International Business Machines Corporation and +Copyright (c) 2002, International Business Machines Corporation and others. All Rights Reserved. +Copyright (C) 1998-2016, International Business Machines Corporation +Copyright (c) 2011-2016,International Business Machines +Copyright (C) 2012 International Business Machines Corporation and Others. All Rights Reserved. +Copyright (C) 2011, International Business Machines Corporation and others. All Rights Reserved. +Copyright (C) 2011, International Business Machines Corporation and others. All Rights Reserved. +Copyright (c) 2011-2012,International Business Machines +Copyright (c) 2007, International Business Machines Corporation and others. All Rights Reserved. +Copyright (C) 2007-2007, International Business Machines(C) Copyright IBM Corp. 1998-2014 - All Rights Reserved +Copyright (C) 1998-2002, International Business Machines +Copyright (c) 2001-2007, International Business Machines Corporation and others. All Rights Reserved.(C) Copyright IBM Corp. 1998-2013 - All Rights Reserved +Copyright (C) 1998-2015, International Business Machines +Copyright (C) 2001-2014 International Business Machines +Copyright (C) 2011-2016, International Business Machines +Copyright (C) 2011-2015, International Business Machines +Copyright (c) 1999-2014, International Business Machines Corporation and +Copyright (c) 1999-2009, International Business Machines Corporation and +Copyright (c) 2010,International Business Machines +Copyright (c) 2010-2016,International Business Machines +Copyright (c) 2002-2005, International Business Machines +Copyright (C) 2000-2003, International Business Machines +Copyright (c) 2008-2014, International Business Machines Corporation and +Copyright (C) 2001 - 2005, International Business Machines +Copyright (C) 2001-2005, International Business Machines +Copyright (C) 1995-2014, International Business Machines +Copyright (c) 2000-2004 IBM, Inc. and Others. +Copyright (c) 2002-2014, International Business Machines Corporation and +Copyright (c) 2007-2013, International Business Machines Corporation and +Copyright (c) 2002-2012, International Business Machines Corporation and +Copyright (C) 2002-2012, International Business Machines +Copyright (C) 2009-2011, International Business Machines Corporation, Google and Others. +Copyright (c) 2002, International Business Machines Corporation and others. All Rights Reserved. +Copyright (C) 2009-2014, International Business Machines +Copyright (C) 2008, International Business Machines Corporation and others. +Copyright (C) 2000-2016, International Business Machines +Copyright (C) 2011-2014 International Business Machines +Copyright (C) 1997-2014, International Business Machines +Copyright (C) 1997-2013, International Business Machines +Copyright (c) 2004-2006, International Business Machines +Copyright (C) 1997-2016, International Business Machines +Copyright (C) 1997-2006, International Business Machines +Copyright (C) 1997-2011, International Business Machines Corporation and others. +Copyright (C) 1997-2013, International Business Machines Corporation and others. +Copyright (c) 2004-2015, International Business Machines +Copyright (C) 2009-2017, International Business Machines Corporation,Google, and others. All Rights Reserved. +Copyright (C) 1997-2016, International Business Machines Corporation and others. +Copyright (C) 2008-2015, International Business Machines Corporation and +Copyright (C) 1997-2015, International Business Machines Corporation and others. +Copyright (C) 2014-2016, International Business Machines Corporation and others. +Copyright (c) 2014-2016, International Business Machines +Copyright (C) 2001-2011 IBM and others. All rights reserved. +Copyright (C) 1996-2014, International Business Machines Corporation and others. +Copyright (C) 1996-2016, International Business Machines Corporation and +Copyright (C) 2009-2016, International Business Machines Corporation, +Copyright (C) 2009-2010, Google, International Business Machines Corporation and +Copyright (C) 2008-2014, Google, International Business Machines Corporation +Copyright (C) 1996-2015, International Business Machines Corporation and +Copyright (c) 1996-2015, International Business Machines Corporation and others. +Copyright (C) 2010-2012,2015 International Business Machines +Copyright (C) 2007-2015, International Business Machines +Copyright (C) 2013-2014, International Business Machines Corporation and others. +Copyright (C) 2010-2013, International Business Machines +Copyright (c) 2002-2005, International Business Machines Corporation +Copyright (C) 2001-2011,2014 IBM and others. All rights reserved. +Copyright (C) 2008-2016, International Business Machines Corporation +Copyright (C) 2004 - 2008, International Business Machines Corporation and +Copyright (C) 1997-2011,2014-2015 International Business Machines +Copyright (C) 2001-2003, International Business Machines +Copyright (C) 1999-2009, International Business Machines +Copyright (C) 2020 and later: Unicode, Inc. and others. +Copyright (c) 2002, International Business Machines Corporation and +Copyright (C) 2000-2008, International Business Machines +Copyright (C) 1998-2006, International Business Machines +Copyright (C) 1998-2001, International Business Machines Corporation +Copyright (C) 1998-2004, International Business Machines Corporation +Copyright (C) 2000, International Business Machines +Copyright (c) 1999-2016, International Business Machines Corporation and +Copyright (c) 2015, International Business Machines Corporation and others. All Rights Reserved. +Copyright (c) 1999-2012, International Business Machines Corporation and +Copyright (C) 1998-2011, International Business Machines +Copyright (C) 2008-2014, International Business Machines Corporation and +Copyright (C) 2003-2004, International Business Machines +Copyright (c) 2003-2005, International Business Machines Corporation and others. All Rights Reserved. +Copyright (C) 2002-2006 IBM, Inc. All Rights Reserved. +Copyright (C) 2004-2008, International Business Machines +Copyright (c) 2002-2016 International Business Machines Corporation and +Copyright (c) 2002-2015, International Business Machines Corporation and +Copyright (C) 2002-2016, International Business Machines Corporation +Copyright (c) 2002-2010,International Business Machines +Copyright (c) 2002-2014,International Business Machines +Copyright (c) 2002-2016,International Business Machines +Copyright (C) 2016 International Business Machines Corporation +Copyright © 2019 and later: Unicode, Inc. and others. +Copyright (c) 2016, International Business Machines Corporation and others. All Rights Reserved. +Copyright (c) 2016 International Business Machines Corporation and others. All Rights Reserved. +Copyright (c) 2015-2016, International Business Machines Corporation and others. All Rights Reserved. +Copyright (c) 2005-2006, International Business Machines Corporation and +Copyright (c) 1997-2004, International Business Machines Corporation +Copyright (c) 2012-2016, International Business Machines Corporation +Copyright (c) 2012-2014, International Business Machines Corporation and +Copyright (c) 1997-2014, International Business Machines Corporation +Copyright (c) 1996-2016, International Business Machines Corporation and +Copyright (c) 2003-2013, International Business Machines Corporation +Copyright (c) 2003-2008, International Business Machines Corporation +Copyright (c) 1997-2015, International Business Machines Corporation +Copyright (c) 2002-2016, International Business Machines Corporation and +Copyright (c) 1997-2002, International Business Machines Corporation and +Copyright (C) 1996-2012, International Business Machines +Copyright (c) 1997-2013 International Business Machines Corporation and +Copyright (c) 2010-2012, International Business Machines Corporation and +Copyright (c) 1997-2011, International Business Machines Corporation +Copyright (c) 1997-2006, International Business Machines Corporation and +Copyright (c) 2008-2016 International Business Machines Corporation and +Copyright (c) 2008-2016, International Business Machines Corporation and +Copyright (c) 1997-2016 International Business Machines Corporation and +Copyright (c) 2007-2011, International Business Machines +Copyright (c) 2007-2010, International Business Machines +Copyright (C) 2001-2016, International Business Machines Corporation and +Copyright (C) 2001-2003, International Business Machines Corporation and +Copyright (C) 2003-2011, International Business Machines +Copyright (c) 1997-2007, International Business Machines Corporation and +Copyright (c) 1997-2015, International Business Machines +Copyright (C) 2004-2009, International Business Machines Corporation and +Copyright (C) 2004, International Business Machines Corporation and +Copyright (C) 1996-2009, International Business Machines Corporation and +Copyright (C) 1996-2006, International Business Machines Corporation and +Copyright (C) 2011-2013, International Business Machines Corporation +Copyright (C) 2000-2007, International Business Machines +Copyright (c) 2001, International Business Machines Corporation and +Copyright (C) 2012-2013, International Business Machines +Copyright (c) 2010-2016, International Business Machines Corporation and +Copyright (c) 2010-2016, International Business Machines Corporation +Copyright (c) 1997-2010, International Business Machines Corporation +Copyright (c) 1997-2003, International Business Machines +Copyright (C) 2014-2015, International Business Machines Corporation and +Copyright (c) 1997-2013, International Business Machines Corporation +Copyright (c) 1999-2016, International Business Machines +Copyright (c) 1999-2016 International Business Machines Corporation and +Copyright (c) 2016, International Business Machines Corporation and +Copyright (c) 2016, International Business Machines +Copyright (c) 2013-2016, International Business Machines Corporation +Copyright (c) 2013, International Business Machines Corporation +Copyright (C) 2013-2016, International Business Machines Corporation and +Copyright (c) 2001-2010, International Business Machines Corporation and +Copyright (C) 2014, International Business Machines Corporation and +Copyright (c) 1999-2015, International Business Machines Corporation and +Copyright (C) 2001-2016, International Business Machines orporation +Copyright (c) 2001-2008, International Business Machines Corporation and others +Copyright (C) 2003-2016, International Business Machines Corporation and +Copyright (c) 2004, International Business Machines Corporation +Copyright (C) 2001-2009, International Business Machines +Copyright (c) 2004,2011 International Business Machines +Copyright (c) 2004-2011, International Business Machines +Copyright (c) 2000-2016, International Business Machines Corporation +Copyright (c) 2001-2005, International Business Machines Corporation and +Copyright (C) 2001-2004, International Business Machines +Copyright (c) 2001-2009, International Business Machines +Copyright (c) 1997-2009, International Business Machines Corporation +Copyright (c) 1997-2013, International Business Machines +Copyright (c) 1997-2012, International Business Machines Corporation +Copyright (C) 2007-2015, International Business Machines Corporation and +Copyright (C) 2007-2011, International Business Machines Corporation and +Copyright (C) 2007, International Business Machines Corporation and +Copyright (c) 1998-2005, International Business Machines Corporation and +Copyright (c) 2002-2010, International Business Machines Corporation and +Copyright (C) 1999-2016 International Business Machines Corporation and +Copyright (c) 2004-2011, International Business Machines Corporation and +Copyright (c) 2002-2007, International Business Machines Corporation and +Copyright (C) 2003, International Business Machines Corporation and +Copyright (C) 2005-2011, International Business Machines +Copyright (C) 2011-2012, International Business Machines +Copyright (C) 2007-2012, International Business Machines +Copyright (C) 2006-2016, International Business Machines Corporation +Copyright (C) 2006-2012, International Business Machines Corporation and others. +Copyright 2007 Google Inc. All Rights Reserved. +Copyright (c) 2001-2015, International Business Machines +Copyright (C) 2006-2014, International Business Machines Corporation +Copyright (C) 2008, International Business Machines Corporation and +Copyright (C) 2009-2012, International Business Machines +Copyright (C) 2006 International Business Machines Corporation +Copyright (C) 2010-2016, International Business Machines Corporation and +Copyright (C) 2002-2014, International Business Machines Corporation and +Copyright (C) 2002-2005, International Business Machines Corporation and +Copyright (C) 2011, International Business Machines +Copyright (c) 2003-2010 International Business Machines +Copyright (C) 2003-2003, International Business Machines +Copyright (C) 1999-2016 International Business Machines Corporation +Copyright (C) 1999-2014 International Business Machines Corporation +Copyright (C) 1999-2014 International Business Machines +Copyright (C) 2002-2011, International Business Machines Corporation and others. +Copyright (C) 2002-2008, International Business Machines Corporation and others. +Copyright (C) 2002-2008 International Business Machines Corporation +Copyright (c) 2001-2005, International Business Machines +Copyright (C) 2002-2014 International Business Machines Corporation +Copyright (c) 2003-2011, International Business Machines +Copyright (C) 1998-2012, International Business Machines Corporation and +Copyright (C) 2001-2014, International Business Machines Corporation. +Copyright (C) 2001-2011, International Business Machines Corporation. +Copyright (C) 2001-2014, International Business Machines Corporation and +Copyright (C) 2001-2011, International Business Machines Corporation and +Copyright (C) 2001-2012, International Business Machines Corporation and +Copyright 2004 and onwards Google Inc. +Copyright (C) 2004-2014, International Business Machines +Copyright (C) 2006, International Business Machines +Copyright (C) 2004-2012, International Business Machines +Copyright (C) 2001-2013, International Business Machines +Copyright (C) 1998-2004, International Business Machines +Copyright (C) 2000-2013, International Business Machines +Copyright (C) 1999-2015 International Business Machines +Copyright (C) 2000-2006, International Business Machines +Copyright (C) 1999-2004, International Business Machines +Copyright (C) 2003-2007, International Business Machines +Copyright (C) 2002-2006, International Business Machines +Copyright (C) 2001-2015, International Business Machines +Copyright (c) 2001-2012, International Business Machines +Copyright (c) 2002-2004, International Business Machines +Copyright (C) 1999-2016, International Business Machines Corporation and +Copyright (c) 1996-2014, International Business Machines +Copyright (C) 1999-2016, International Business Machines Corporation +Copyright (C) 2009-2014 International Business Machines +Copyright (C) 2004-2007, International Business Machines +Copyright (c) 2001-2016, International Business Machines +Copyright (C) 2003-2009, International Business Machines +Copyright (C) 1999-2013, International Business Machines Corporation and +Copyright (C) 1999-2015, International Business Machines Corporation and +Copyright (c) 2002-2011, International Business Machines Corporation and others. All Rights Reserved. +Copyright (C) 2001-2016 IBM, Inc. All Rights Reserved. +Copyright (C) 1999-2016 International Business Machines +Copyright (C) 2009-2010 IBM Corporation and Others. All Rights Reserved. +Copyright (C) 1998-2012, International Business Machines +Copyright (C) 1991 and later: Unicode, Inc. and others. +Copyright (C) 1997-2000, International Business Machines +Copyright (c) 1999-2007, International Business Machines Corporation and +Copyright (c) 2000 IBM, Inc. and Others. +Copyright (C) 2008-2013, International Business Machines +Copyright (C) 1998-2003, 2006, International Business Machines Corporation +Copyright (c) 2002-2003,International Business Machines +Copyright (C) 2009 International Business Machines +Copyright (C) 2010-2016 International Business Machines +Copyright (C) 2008-2012 IBM, Inc. All Rights Reserved. +Copyright (C) 1998-2008, International Business Machines +Copyright (C) 2010-2016, International Business Machines +Copyright (C) 1999-2006,2013 IBM Corp. All rights reserved. +Copyright (C) 2008-2009, International Business Machines Corporation and +Copyright (C) 2012,2014 International Business Machines +Copyright (c) 1996-2015, International Business Machines Corporation and +Copyright (C) 1997-2005, International Business Machines Corporation and others. All Rights Reserved. +Copyright (C) 1999-2012, International Business Machines Corporation and +Copyright (C) 1996-2013, International Business Machines Corporation +Copyright (C) 1998-2005, International Business Machines +Copyright 2001 and onwards Google Inc. +Copyright (C) 2010-2012,2014, International Business Machines +Copyright (C) 1996-2015, International Business Machines Corporation and others. +Copyright (c) 2003-2004, International Business Machines +Copyright (C) 2000-2004, International Business Machines +Copyright (C) 2002-2013, International Business Machines +Copyright (C) 2002-2011 International Business Machines Corporation and others. All Rights Reserved. +Copyright (C) 1999-2010, International Business Machines Corporation and others. +Copyright (C) 2001-2005, International Business Machines Corporation and others. All Rights Reserved. +Copyright (c) 1996-2016, International Business Machines Corporation +Copyright (C) 1997-2010, International Business Machines + +Software: libtiff 4.1.0 +Copyright notice: +Copyright © 2015 Open Microscopy Environment / University of Dundee +Copyright (c) 2004, Andrey Kiselev +Copyright (c) 1990-1997 Sam Leffler +Copyright (c) 1991-1997 Silicon Graphics, Inc. +Copyright (c) 1988-1997 Sam Leffler +Copyright (c) 1991-1997 Sam Leffler +Use and Copyright +Copyright (C) 1990, 1995 Frank D. Cringle. +Copyright (c) 1994-1997 Sam Leffler +Copyright (c) 1994-1997 Silicon Graphics, Inc. +Copyright (c) 1997 Greg Ward Larson +Copyright (c) 1997 Silicon Graphics, Inc. +Copyright (c) 2010, Andrey Kiselev +Copyright (c) Joris Van Damme +Copyright (c) AWare Systems +Copyright (c) 1996-1997 Sam Leffler +Copyright (c) 1996 Pixar +Copyright (c) 1995-1997 Sam Leffler +Copyright (c) 1995-1997 Silicon Graphics, Inc. +Copyright (c) 1988-1996 Sam Leffler +Copyright (c) 1991-1996 Silicon Graphics, Inc. +Copyright (c) 1992-1997 Sam Leffler +Copyright (c) 1992-1997 Silicon Graphics, Inc. +Copyright (c) 2018, Mapbox +Copyright (c) 2017, Planet Labs +Copyright (c) 1990 by Sun Microsystems, Inc. +Copyright 1990 by Digital Equipment Corporation, Maynard, Massachusetts. +Copyright 1991 by Digital Equipment Corporation, Maynard, Massachusetts. +Copyright (c) 2002, Andrey Kiselev +Copyright (c) 2003 Ross Finlayson +Additions (c) Richard Nolde 2006-2010 +Copyright (c) 2003, Andrey Kiselev +Copyright (c) 2000, Frank Warmerdam +Copyright (c) 1987, 1993, 1994 +Copyright (c) 1989, 1993 +Copyright (c) 2009 Frank Warmerdam +Copyright (c) 1987, 1993 +Copyright (c) 2005 The DragonFly Project. All rights reserved. +Copyright (c) 2003 Citrus Project, +All rights reserved. +Copyright (c) 1990, 1993 +Copyright (c) 1996 Mike Johnson +Copyright (c) 1996 BancTec AB +Copyright (c) 2004, Andrey Kiselev +Copyright (c) 2012, Frank Warmerdam +Copyright (c) 2019, Even Rouault +Copyright (c) 2007, Frank Warmerdam +Copyright (c) 2019, Thomas Bernard +Copyright (c) 2008, Andrey Kiselev +Copyright (c) 1999, Frank Warmerdam +Copyright (c) 1991-1996 Sam Leffler +Copyright (c) 1996 USAF Phillips Laboratory + Software: opencv 4.2.0 Copyright notice: Copyright (C) 2016, NVIDIA Corporation, all rights reserved. diff --git a/akg b/akg new file mode 160000 index 0000000000..c460176523 --- /dev/null +++ b/akg @@ -0,0 +1 @@ +Subproject commit c460176523d039c8995f1d71089753725ebc0792 diff --git a/build.sh b/build.sh index dd909e9f51..70718bf89b 100755 --- a/build.sh +++ b/build.sh @@ -49,10 +49,11 @@ usage() echo " -Q Enable dump memory, default off" echo " -D Enable dumping of function graph ir, default on" echo " -z Compile dataset & mindrecord, default on" - echo " -M Enable MPI and NCCL for GPU training, default on" + echo " -M Enable MPI and NCCL for GPU training, gpu default on" echo " -V Specify the minimum required cuda version, default CUDA 9.2" echo " -I Compile predict, default off" echo " -K Compile with AKG, default off" + echo " -s Enable serving module, default off" } # check value of input is 'on' or 'off' @@ -86,15 +87,15 @@ checkopts() ENABLE_DUMPE2E="off" ENABLE_DUMP_IR="on" COMPILE_MINDDATA="on" - ENABLE_MPI="on" + ENABLE_MPI="off" CUDA_VERSION="9.2" COMPILE_PREDICT="off" USE_GLOG="on" PREDICT_PLATFORM="" - ENABLE_AKG="off" - + ENABLE_AKG="on" + ENABLE_SERVING="off" # Process the options - while getopts 'drvj:c:t:hsb:a:g:p:ie:m:I:LRP:Q:D:zM:V:K' opt + while getopts 'drvj:c:t:hsb:a:g:p:ie:m:I:LRP:Q:D:zM:V:K:s' opt do OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]') case "${opt}" in @@ -168,6 +169,7 @@ checkopts() if [[ "X$OPTARG" == "Xgpu" ]]; then ENABLE_GPU="on" ENABLE_CPU="on" + ENABLE_MPI="on" elif [[ "X$OPTARG" == "Xd" || "X$OPTARG" == "Xascend" ]]; then ENABLE_D="on" ENABLE_CPU="on" @@ -234,6 +236,10 @@ checkopts() ENABLE_AKG="on" echo "enable compile with akg" ;; + s) + ENABLE_SERVING="on" + echo "enable serving" + ;; *) echo "Unknown option ${opt}!" usage @@ -242,9 +248,12 @@ checkopts() done } checkopts "$@" -echo "---------------- mindspore: build start ----------------" +echo "---------------- MindSpore: build start ----------------" mkdir -pv "${BUILD_PATH}/package/mindspore/lib" git submodule update --init graphengine +if [[ "X$ENABLE_AKG" = "Xon" ]] && [[ "X$ENABLE_D" = "Xon" ]]; then + git submodule update --init --recursive akg +fi build_exit() { @@ -307,9 +316,13 @@ build_mindspore() if [[ "X$USE_GLOG" = "Xon" ]]; then CMAKE_ARGS="${CMAKE_ARGS} -DUSE_GLOG=ON" fi - if [[ "X$ENABLE_AKG" = "Xon" ]]; then + if [[ "X$ENABLE_AKG" = "Xon" ]] && [[ "X$ENABLE_D" = "Xon" ]]; then CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_AKG=ON" fi + if [[ "X$ENABLE_SERVING" = "Xon" ]]; then + CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_SERVING=ON" + fi + echo "${CMAKE_ARGS}" if [[ "X$INC_BUILD" = "Xoff" ]]; then cmake ${CMAKE_ARGS} ../.. diff --git a/cmake/dependency_graphengine.cmake b/cmake/dependency_graphengine.cmake index 991eb2a24a..91a471d1f2 100644 --- a/cmake/dependency_graphengine.cmake +++ b/cmake/dependency_graphengine.cmake @@ -36,6 +36,7 @@ elseif (DEFINED ENV{D_LINK_PATH}) find_library(hccl libhccl.so ${GE_LIB_PATH}) find_library(cce libcce.so ${GE_LIB_PATH}) find_library(resource libresource.so ${GE_LIB_PATH}) + find_library(error_manager liberror_manager.so ${GE_LIB_PATH}) else() # Ascend mode if(DEFINED ENV{ASCEND_CUSTOM_PATH}) @@ -54,6 +55,7 @@ else() find_library(msprof libmsprof.so ${ASCEND_RUNTIME_PATH}) find_library(register libregister.so ${ASCEND_RUNTIME_PATH}) find_library(resource libresource.so ${ASCEND_RUNTIME_PATH}) + find_library(error_manager liberror_manager.so ${ASCEND_RUNTIME_PATH}) endif() # compile libraries from following directories diff --git a/cmake/external_libs/gtest.cmake b/cmake/external_libs/gtest.cmake index df2eaec2cc..eb64655a86 100644 --- a/cmake/external_libs/gtest.cmake +++ b/cmake/external_libs/gtest.cmake @@ -1,4 +1,4 @@ -set(gtest_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2") +set(gtest_CXXFLAGS "-D_FORTIFY_SOURCE=2 -D_GLIBCXX_USE_CXX11_ABI=0 -O2") set(gtest_CFLAGS "-D_FORTIFY_SOURCE=2 -O2") mindspore_add_pkg(gtest VER 1.8.0 diff --git a/cmake/external_libs/icu4c.cmake b/cmake/external_libs/icu4c.cmake new file mode 100644 index 0000000000..7d13e4fd2a --- /dev/null +++ b/cmake/external_libs/icu4c.cmake @@ -0,0 +1,19 @@ +set(LIB_ICU_COMMON icuuc) +set(LIB_ICU_DATA icudata) +set(LIB_ICU_I18N icui18n) +if (CMAKE_SYSTEM_NAME MATCHES "Windows") + message("icu4c thirdparty do not support windows currently.") +else() + mindspore_add_pkg(icu4c + VER 67.1 + LIBS ${LIB_ICU_COMMON} ${LIB_ICU_DATA} ${LIB_ICU_I18N} + URL https://github.com/unicode-org/icu/archive/release-67-1.tar.gz + MD5 0c2662a2b0bc80b0eb56495205247c8f + CONFIGURE_COMMAND ./icu4c/source/runConfigureICU Linux --enable-rpath --disable-tests --disable-samples --disable-icuio --disable-extras ICU_DATA_FILTER_FILE=${CMAKE_SOURCE_DIR}/third_party/icu4c/filter.json + ) + include_directories(${icu4c_INC}) + add_library(mindspore::icuuc ALIAS icu4c::${LIB_ICU_COMMON}) + add_library(mindspore::icudata ALIAS icu4c::${LIB_ICU_DATA}) + add_library(mindspore::icui18n ALIAS icu4c::${LIB_ICU_I18N}) + add_definitions(-D ENABLE_ICU4C) +endif() \ No newline at end of file diff --git a/cmake/external_libs/opencv.cmake b/cmake/external_libs/opencv.cmake index b4f8d55a9e..4c7db821f4 100644 --- a/cmake/external_libs/opencv.cmake +++ b/cmake/external_libs/opencv.cmake @@ -8,7 +8,7 @@ elseif (${CMAKE_SYSTEM_NAME} MATCHES "Windows") set(opencv_CXXFLAGS "${opencv_CXXFLAGS} -Wno-attributes -Wno-unknown-pragmas") set(opencv_CXXFLAGS "${opencv_CXXFLAGS} -Wno-unused-value -Wno-implicit-fallthrough") else() - set(opencv_CXXFLAGS "-fstack-protector-all -Wno-maybe-uninitialized -Wno-unused-parameter -D_FORTIFY_SOURCE=2 -O2") + set(opencv_CXXFLAGS "-fstack-protector-all -Wno-maybe-uninitialized -Wno-unused-parameter -D_FORTIFY_SOURCE=2 -D_GLIBCXX_USE_CXX11_ABI=0 -O2") set(opencv_CFLAGS "-fstack-protector-all -Wno-maybe-uninitialized -Wno-unused-parameter -D_FORTIFY_SOURCE=2 -O2") set(opencv_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack") endif() diff --git a/cmake/external_libs/protobuf.cmake b/cmake/external_libs/protobuf.cmake index 6fe34577af..53cbebfcb9 100644 --- a/cmake/external_libs/protobuf.cmake +++ b/cmake/external_libs/protobuf.cmake @@ -1,9 +1,12 @@ set(protobuf_USE_STATIC_LIBS ON) if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") set(protobuf_CXXFLAGS "-fstack-protector-all -Wno-uninitialized -Wno-unused-parameter -fPIC -fvisibility=hidden -D_FORTIFY_SOURCE=2 -O2") -else() +elseif (${CMAKE_SYSTEM_NAME} MATCHES "Windows") set(protobuf_CXXFLAGS "-fstack-protector-all -Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fvisibility=hidden -D_FORTIFY_SOURCE=2 -O2") +else() + set(protobuf_CXXFLAGS "-fstack-protector-all -Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fvisibility=hidden -D_FORTIFY_SOURCE=2 -D_GLIBCXX_USE_CXX11_ABI=0 -O2") endif() + set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack") set(_ms_tmp_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) set(CMAKE_CXX_FLAGS ${_ms_tmp_CMAKE_CXX_FLAGS}) diff --git a/cmake/external_libs/sqlite.cmake b/cmake/external_libs/sqlite.cmake index 1d280cef4b..6b7a5e24d4 100644 --- a/cmake/external_libs/sqlite.cmake +++ b/cmake/external_libs/sqlite.cmake @@ -1,10 +1,10 @@ if (WIN32) mindspore_add_pkg(sqlite - VER 3.31.1 + VER 3.32.2 LIBS sqlite3 - URL https://sqlite.org/2020/sqlite-amalgamation-3310100.zip - MD5 2b7bfcdd97dc281903a9aee966213fe4 - PATCHES ${CMAKE_SOURCE_DIR}/third_party/patch/sqlite/sqlite.windows.patch001 ${CMAKE_SOURCE_DIR}/third_party/patch/sqlite/sqlite.windows.patch002 ${CMAKE_SOURCE_DIR}/third_party/patch/sqlite/sqlite.windows.patch003 + URL https://sqlite.org/2020/sqlite-amalgamation-3320200.zip + MD5 1eccea18d248eb34c7378b2b3f63f1db + PATCHES ${CMAKE_SOURCE_DIR}/third_party/patch/sqlite/sqlite.windows.patch001 CMAKE_OPTION " " ) @@ -18,11 +18,11 @@ else () endif() set(sqlite_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack") mindspore_add_pkg(sqlite - VER 3.31.1 + VER 3.32.2 LIBS sqlite3 - URL https://github.com/sqlite/sqlite/archive/version-3.31.1.tar.gz - MD5 5f4e7b4016c15f4fb5855615279819da - PATCHES ${CMAKE_SOURCE_DIR}/third_party/patch/sqlite/sqlite.patch001 ${CMAKE_SOURCE_DIR}/third_party/patch/sqlite/sqlite.patch002 ${CMAKE_SOURCE_DIR}/third_party/patch/sqlite/sqlite.patch003 + URL https://github.com/sqlite/sqlite/archive/version-3.32.2.tar.gz + MD5 ea6d3b3289b4ac216fb06081a01ef101 + PATCHES ${CMAKE_SOURCE_DIR}/third_party/patch/sqlite/sqlite.patch001 CONFIGURE_COMMAND ./configure --enable-shared=no --disable-tcl --disable-editline --enable-json1) endif () diff --git a/cmake/mind_expression.cmake b/cmake/mind_expression.cmake index f20683a2d8..86337c1dd2 100644 --- a/cmake/mind_expression.cmake +++ b/cmake/mind_expression.cmake @@ -26,6 +26,9 @@ include_directories(${Python3_INCLUDE_DIRS}) include_directories(${CMAKE_SOURCE_DIR}/third_party) if (ENABLE_CPU) include(${CMAKE_SOURCE_DIR}/cmake/external_libs/mkl_dnn.cmake) + if (ENABLE_MPI) + include(${CMAKE_SOURCE_DIR}/cmake/external_libs/ompi.cmake) + endif() endif() if (ENABLE_GPU) @@ -36,7 +39,6 @@ if (ENABLE_GPU) if (ENABLE_MPI) include(${CMAKE_SOURCE_DIR}/cmake/external_libs/nccl.cmake) - include(${CMAKE_SOURCE_DIR}/cmake/external_libs/ompi.cmake) endif() endif() @@ -52,6 +54,7 @@ elseif(ENABLE_D OR ENABLE_TESTCASES) endif() if (ENABLE_MINDDATA) + include(${CMAKE_SOURCE_DIR}/cmake/external_libs/icu4c.cmake) include(${CMAKE_SOURCE_DIR}/cmake/external_libs/jpeg_turbo.cmake) include(${CMAKE_SOURCE_DIR}/cmake/external_libs/libtiff.cmake) include(${CMAKE_SOURCE_DIR}/cmake/external_libs/opencv.cmake) diff --git a/cmake/package.cmake b/cmake/package.cmake index 875ba5217d..1cff396ef1 100644 --- a/cmake/package.cmake +++ b/cmake/package.cmake @@ -91,7 +91,20 @@ if (ENABLE_MINDDATA) DESTINATION ${INSTALL_LIB_DIR} COMPONENT mindspore ) - + if (CMAKE_SYSTEM_NAME MATCHES "Windows") + message("icu4c does not support windows system temporarily") + else() + file(GLOB_RECURSE ICU4C_LIB_LIST + ${icu4c_LIBPATH}/libicuuc* + ${icu4c_LIBPATH}/libicudata* + ${icu4c_LIBPATH}/libicui18n* + ) + install( + FILES ${ICU4C_LIB_LIST} + DESTINATION ${INSTALL_LIB_DIR} + COMPONENT mindspore + ) + endif() endif () if (ENABLE_CPU) @@ -109,19 +122,20 @@ if (ENABLE_CPU) ) endif () +if (ENABLE_MPI) + install( + TARGETS _ms_mpi + DESTINATION ${INSTALL_BASE_DIR} + COMPONENT mindspore + ) +endif () + if (ENABLE_GPU) - if (ENABLE_MPI) - install( - TARGETS _ms_mpi - DESTINATION ${INSTALL_BASE_DIR} - COMPONENT mindspore - ) install( TARGETS gpu_collective DESTINATION ${INSTALL_LIB_DIR} COMPONENT mindspore ) - endif () install( TARGETS gpu_queue DESTINATION ${INSTALL_LIB_DIR} @@ -222,6 +236,16 @@ if (ENABLE_GPU) endif () endif () +if (ENABLE_D AND ENABLE_AKG) + set (AKG_PATH ${CMAKE_SOURCE_DIR}/build/mindspore/akg) + install( + DIRECTORY + ${AKG_PATH}/akg + DESTINATION ${INSTALL_PY_DIR}/.. + COMPONENT mindspore + ) +endif () + if (EXISTS ${CMAKE_SOURCE_DIR}/mindspore/dataset) install( DIRECTORY ${CMAKE_SOURCE_DIR}/mindspore/dataset diff --git a/cmake/package_script.cmake b/cmake/package_script.cmake index dcc8ee0ad0..94ffc71b49 100644 --- a/cmake/package_script.cmake +++ b/cmake/package_script.cmake @@ -51,7 +51,7 @@ endif () # get git commit id set(GIT_COMMIT_ID "") execute_process( - COMMAND ${GIT} log --format='[sha1]:%h,[branch]:%d' -1 + COMMAND ${GIT} log --format='[sha1]:%h,[branch]:%d' --abbrev=8 -1 OUTPUT_VARIABLE GIT_COMMIT_ID WORKING_DIRECTORY ${MS_ROOT_DIR} ERROR_QUIET) diff --git a/example/googlenet_cifar10/README.md b/example/googlenet_cifar10/README.md deleted file mode 100755 index 1acc7d1e1e..0000000000 --- a/example/googlenet_cifar10/README.md +++ /dev/null @@ -1,106 +0,0 @@ -# Googlenet Example - -## Description - -This example is for Googlenet model training and evaluation. - -## Requirements - -- Install [MindSpore](https://www.mindspore.cn/install/en). - -- Download the CIFAR-10 binary version dataset. - -> Unzip the CIFAR-10 dataset to any path you want and the folder structure should be as follows: -> ``` -> . -> ├── cifar-10-batches-bin # train dataset -> └── cifar-10-verify-bin # infer dataset -> ``` - -## Running the Example - -### Training - -``` -python train.py --data_path=your_data_path --device_id=6 > out.train.log 2>&1 & -``` -The python command above will run in the background, you can view the results through the file `out.train.log`. - -After training, you'll get some checkpoint files under the script folder by default. - -You will get the loss value as following: -``` -# grep "loss is " out.train.log -epoch: 1 step: 390, loss is 1.4842823 -epcoh: 2 step: 390, loss is 1.0897788 -... -``` - -### Evaluation - -``` -python eval.py --data_path=your_data_path --device_id=6 --checkpoint_path=./train_googlenet_cifar10-125-390.ckpt > out.eval.log 2>&1 & -``` -The above python command will run in the background, you can view the results through the file `out.eval.log`. - -You will get the accuracy as following: -``` -# grep "result: " out.eval.log -result: {'acc': 0.934} -``` - -### Distribute Training -``` -sh run_distribute_train.sh rank_table.json your_data_path -``` -The above shell script will run distribute training in the background, you can view the results through the file `train_parallel[X]/log`. - -You will get the loss value as following: -``` -# grep "result: " train_parallel*/log -train_parallel0/log:epoch: 1 step: 48, loss is 1.4302931 -train_parallel0/log:epcoh: 2 step: 48, loss is 1.4023874 -... -train_parallel1/log:epoch: 1 step: 48, loss is 1.3458025 -train_parallel1/log:epcoh: 2 step: 48, loss is 1.3729336 -... -... -``` -> About rank_table.json, you can refer to the [distributed training tutorial](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html). - -## Usage: - -### Training -``` -usage: train.py [--device_target TARGET][--data_path DATA_PATH] - [--device_id DEVICE_ID] - -parameters/options: - --device_target the training backend type, default is Ascend. - --data_path the storage path of dataset - --device_id the device which used to train model. - -``` - -### Evaluation - -``` -usage: eval.py [--device_target TARGET][--data_path DATA_PATH] - [--device_id DEVICE_ID][--checkpoint_path CKPT_PATH] - -parameters/options: - --device_target the evaluation backend type, default is Ascend. - --data_path the storage path of datasetd - --device_id the device which used to evaluate model. - --checkpoint_path the checkpoint file path used to evaluate model. -``` - -### Distribute Training - -``` -Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATA_PATH] - -parameters/options: - MINDSPORE_HCCL_CONFIG_PATH HCCL configuration file path. - DATA_PATH the storage path of dataset. -``` diff --git a/example/graph_to_mindrecord/README.md b/example/graph_to_mindrecord/README.md index cc6f6a1c70..df7ab33444 100644 --- a/example/graph_to_mindrecord/README.md +++ b/example/graph_to_mindrecord/README.md @@ -24,9 +24,6 @@ This example provides an efficient way to generate MindRecord. Users only need t 1. Download and prepare the Cora dataset as required. - > [Cora dataset download address](https://github.com/jzaldi/datasets/tree/master/cora) - - 2. Edit write_cora.sh and modify the parameters ``` --mindrecord_file: output MindRecord file. diff --git a/example/graph_to_mindrecord/citeseer/mr_api.py b/example/graph_to_mindrecord/citeseer/mr_api.py index 8b1f424b0a..aa9e2a2c4d 100644 --- a/example/graph_to_mindrecord/citeseer/mr_api.py +++ b/example/graph_to_mindrecord/citeseer/mr_api.py @@ -15,29 +15,27 @@ """ User-defined API for MindRecord GNN writer. """ -import csv import os +import pickle as pkl import numpy as np import scipy.sparse as sp +from mindspore import log as logger # parse args from command line parameter 'graph_api_args' # args delimiter is ':' args = os.environ['graph_api_args'].split(':') -CITESEER_CONTENT_FILE = args[0] -CITESEER_CITES_FILE = args[1] -CITESEER_MINDRECRD_LABEL_FILE = CITESEER_CONTENT_FILE + "_label_mindrecord" -CITESEER_MINDRECRD_ID_MAP_FILE = CITESEER_CONTENT_FILE + "_id_mindrecord" - -node_id_map = {} +CITESEER_PATH = args[0] +dataset_str = 'citeseer' # profile: (num_features, feature_data_types, feature_shapes) -node_profile = (2, ["float32", "int64"], [[-1], [-1]]) +node_profile = (2, ["float32", "int32"], [[-1], [-1]]) edge_profile = (0, [], []) +node_ids = [] + def _normalize_citeseer_features(features): - features = np.array(features) row_sum = np.array(features.sum(1)) r_inv = np.power(row_sum * 1.0, -1).flatten() r_inv[np.isinf(r_inv)] = 0. @@ -46,6 +44,14 @@ def _normalize_citeseer_features(features): return features +def _parse_index_file(filename): + """Parse index file.""" + index = [] + for line in open(filename): + index.append(int(line.strip())) + return index + + def yield_nodes(task_id=0): """ Generate node data @@ -53,30 +59,47 @@ def yield_nodes(task_id=0): Yields: data (dict): data row which is dict. """ - print("Node task is {}".format(task_id)) - label_types = {} - label_size = 0 - node_num = 0 - with open(CITESEER_CONTENT_FILE) as content_file: - content_reader = csv.reader(content_file, delimiter='\t') - line_count = 0 - for row in content_reader: - if not row[-1] in label_types: - label_types[row[-1]] = label_size - label_size += 1 - if not row[0] in node_id_map: - node_id_map[row[0]] = node_num - node_num += 1 - raw_features = [[int(x) for x in row[1:-1]]] - node = {'id': node_id_map[row[0]], 'type': 0, 'feature_1': _normalize_citeseer_features(raw_features), - 'feature_2': [label_types[row[-1]]]} - yield node - line_count += 1 - print('Processed {} lines for nodes.'.format(line_count)) - # print('label types {}.'.format(label_types)) - with open(CITESEER_MINDRECRD_LABEL_FILE, 'w') as f: - for k in label_types: - print(k + ',' + str(label_types[k]), file=f) + logger.info("Node task is {}".format(task_id)) + names = ['x', 'y', 'tx', 'ty', 'allx', 'ally'] + objects = [] + for name in names: + with open("{}/ind.{}.{}".format(CITESEER_PATH, dataset_str, name), 'rb') as f: + objects.append(pkl.load(f, encoding='latin1')) + x, y, tx, ty, allx, ally = tuple(objects) + test_idx_reorder = _parse_index_file( + "{}/ind.{}.test.index".format(CITESEER_PATH, dataset_str)) + test_idx_range = np.sort(test_idx_reorder) + + tx = _normalize_citeseer_features(tx) + allx = _normalize_citeseer_features(allx) + + # Fix citeseer dataset (there are some isolated nodes in the graph) + # Find isolated nodes, add them as zero-vecs into the right position + test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1) + tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1])) + tx_extended[test_idx_range-min(test_idx_range), :] = tx + tx = tx_extended + ty_extended = np.zeros((len(test_idx_range_full), y.shape[1])) + ty_extended[test_idx_range-min(test_idx_range), :] = ty + ty = ty_extended + + features = sp.vstack((allx, tx)).tolil() + features[test_idx_reorder, :] = features[test_idx_range, :] + features = features.A + + labels = np.vstack((ally, ty)) + labels[test_idx_reorder, :] = labels[test_idx_range, :] + + line_count = 0 + for i, label in enumerate(labels): + if not 1 in label.tolist(): + continue + node = {'id': i, 'type': 0, 'feature_1': features[i].tolist(), + 'feature_2': label.tolist().index(1)} + line_count += 1 + node_ids.append(i) + yield node + logger.info('Processed {} lines for nodes.'.format(line_count)) def yield_edges(task_id=0): @@ -86,24 +109,21 @@ def yield_edges(task_id=0): Yields: data (dict): data row which is dict. """ - print("Edge task is {}".format(task_id)) - # print(map_string_int) - with open(CITESEER_CITES_FILE) as cites_file: - cites_reader = csv.reader(cites_file, delimiter='\t') + logger.info("Edge task is {}".format(task_id)) + with open("{}/ind.{}.graph".format(CITESEER_PATH, dataset_str), 'rb') as f: + graph = pkl.load(f, encoding='latin1') line_count = 0 - for row in cites_reader: - if not row[0] in node_id_map: - print('Source node {} does not exist.'.format(row[0])) - continue - if not row[1] in node_id_map: - print('Destination node {} does not exist.'.format(row[1])) - continue - line_count += 1 - edge = {'id': line_count, - 'src_id': node_id_map[row[0]], 'dst_id': node_id_map[row[1]], 'type': 0} - yield edge - - with open(CITESEER_MINDRECRD_ID_MAP_FILE, 'w') as f: - for k in node_id_map: - print(k + ',' + str(node_id_map[k]), file=f) - print('Processed {} lines for edges.'.format(line_count)) + for i in graph: + for dst_id in graph[i]: + if not i in node_ids: + logger.info('Source node {} does not exist.'.format(i)) + continue + if not dst_id in node_ids: + logger.info('Destination node {} does not exist.'.format( + dst_id)) + continue + edge = {'id': line_count, + 'src_id': i, 'dst_id': dst_id, 'type': 0} + line_count += 1 + yield edge + logger.info('Processed {} lines for edges.'.format(line_count)) diff --git a/example/graph_to_mindrecord/cora/mr_api.py b/example/graph_to_mindrecord/cora/mr_api.py index 0963fd78f7..aeeb0e04de 100644 --- a/example/graph_to_mindrecord/cora/mr_api.py +++ b/example/graph_to_mindrecord/cora/mr_api.py @@ -15,29 +15,24 @@ """ User-defined API for MindRecord GNN writer. """ -import csv import os +import pickle as pkl import numpy as np import scipy.sparse as sp # parse args from command line parameter 'graph_api_args' # args delimiter is ':' args = os.environ['graph_api_args'].split(':') -CORA_CONTENT_FILE = args[0] -CORA_CITES_FILE = args[1] -CORA_MINDRECRD_LABEL_FILE = CORA_CONTENT_FILE + "_label_mindrecord" -CORA_CONTENT_ID_MAP_FILE = CORA_CONTENT_FILE + "_id_mindrecord" - -node_id_map = {} +CORA_PATH = args[0] +dataset_str = 'cora' # profile: (num_features, feature_data_types, feature_shapes) -node_profile = (2, ["float32", "int64"], [[-1], [-1]]) +node_profile = (2, ["float32", "int32"], [[-1], [-1]]) edge_profile = (0, [], []) def _normalize_cora_features(features): - features = np.array(features) row_sum = np.array(features.sum(1)) r_inv = np.power(row_sum * 1.0, -1).flatten() r_inv[np.isinf(r_inv)] = 0. @@ -46,6 +41,14 @@ def _normalize_cora_features(features): return features +def _parse_index_file(filename): + """Parse index file.""" + index = [] + for line in open(filename): + index.append(int(line.strip())) + return index + + def yield_nodes(task_id=0): """ Generate node data @@ -54,32 +57,32 @@ def yield_nodes(task_id=0): data (dict): data row which is dict. """ print("Node task is {}".format(task_id)) - label_types = {} - label_size = 0 - node_num = 0 - with open(CORA_CONTENT_FILE) as content_file: - content_reader = csv.reader(content_file, delimiter=',') - line_count = 0 - for row in content_reader: - if line_count == 0: - line_count += 1 - continue - if not row[0] in node_id_map: - node_id_map[row[0]] = node_num - node_num += 1 - if not row[-1] in label_types: - label_types[row[-1]] = label_size - label_size += 1 - raw_features = [[int(x) for x in row[1:-1]]] - node = {'id': node_id_map[row[0]], 'type': 0, 'feature_1': _normalize_cora_features(raw_features), - 'feature_2': [label_types[row[-1]]]} - yield node - line_count += 1 + + names = ['tx', 'ty', 'allx', 'ally'] + objects = [] + for name in names: + with open("{}/ind.{}.{}".format(CORA_PATH, dataset_str, name), 'rb') as f: + objects.append(pkl.load(f, encoding='latin1')) + tx, ty, allx, ally = tuple(objects) + test_idx_reorder = _parse_index_file( + "{}/ind.{}.test.index".format(CORA_PATH, dataset_str)) + test_idx_range = np.sort(test_idx_reorder) + + features = sp.vstack((allx, tx)).tolil() + features[test_idx_reorder, :] = features[test_idx_range, :] + features = _normalize_cora_features(features) + features = features.A + + labels = np.vstack((ally, ty)) + labels[test_idx_reorder, :] = labels[test_idx_range, :] + + line_count = 0 + for i, label in enumerate(labels): + node = {'id': i, 'type': 0, 'feature_1': features[i].tolist(), + 'feature_2': label.tolist().index(1)} + line_count += 1 + yield node print('Processed {} lines for nodes.'.format(line_count)) - print('label types {}.'.format(label_types)) - with open(CORA_MINDRECRD_LABEL_FILE, 'w') as f: - for k in label_types: - print(k + ',' + str(label_types[k]), file=f) def yield_edges(task_id=0): @@ -90,24 +93,13 @@ def yield_edges(task_id=0): data (dict): data row which is dict. """ print("Edge task is {}".format(task_id)) - with open(CORA_CITES_FILE) as cites_file: - cites_reader = csv.reader(cites_file, delimiter=',') + with open("{}/ind.{}.graph".format(CORA_PATH, dataset_str), 'rb') as f: + graph = pkl.load(f, encoding='latin1') line_count = 0 - for row in cites_reader: - if line_count == 0: + for i in graph: + for dst_id in graph[i]: + edge = {'id': line_count, + 'src_id': i, 'dst_id': dst_id, 'type': 0} line_count += 1 - continue - if not row[0] in node_id_map: - print('Source node {} does not exist.'.format(row[0])) - continue - if not row[1] in node_id_map: - print('Destination node {} does not exist.'.format(row[1])) - continue - edge = {'id': line_count, - 'src_id': node_id_map[row[0]], 'dst_id': node_id_map[row[1]], 'type': 0} - yield edge - line_count += 1 + yield edge print('Processed {} lines for edges.'.format(line_count)) - with open(CORA_CONTENT_ID_MAP_FILE, 'w') as f: - for k in node_id_map: - print(k + ',' + str(node_id_map[k]), file=f) diff --git a/example/graph_to_mindrecord/graph_map_schema.py b/example/graph_to_mindrecord/graph_map_schema.py index e131de9f65..1da1ced2f7 100644 --- a/example/graph_to_mindrecord/graph_map_schema.py +++ b/example/graph_to_mindrecord/graph_map_schema.py @@ -16,6 +16,7 @@ Graph data convert tool for MindRecord. """ import numpy as np +from mindspore import log as logger __all__ = ['GraphMapSchema'] @@ -41,6 +42,7 @@ class GraphMapSchema: "edge_feature_index": {"type": "int32", "shape": [-1]} } + @property def get_schema(self): """ Get schema @@ -52,6 +54,7 @@ class GraphMapSchema: Set node features profile """ if num_features != len(features_data_type) or num_features != len(features_shape): + logger.info("Node feature profile is not match.") raise ValueError("Node feature profile is not match.") self.num_node_features = num_features @@ -66,6 +69,7 @@ class GraphMapSchema: Set edge features profile """ if num_features != len(features_data_type) or num_features != len(features_shape): + logger.info("Edge feature profile is not match.") raise ValueError("Edge feature profile is not match.") self.num_edge_features = num_features @@ -83,6 +87,10 @@ class GraphMapSchema: Returns: graph data with union schema """ + if node is None: + logger.info("node cannot be None.") + raise ValueError("node cannot be None.") + node_graph = {"first_id": node["id"], "second_id": 0, "third_id": 0, "attribute": 'n', "type": node["type"], "node_feature_index": []} for i in range(self.num_node_features): @@ -117,6 +125,10 @@ class GraphMapSchema: Returns: graph data with union schema """ + if edge is None: + logger.info("edge cannot be None.") + raise ValueError("edge cannot be None.") + edge_graph = {"first_id": edge["id"], "second_id": edge["src_id"], "third_id": edge["dst_id"], "attribute": 'e', "type": edge["type"], "edge_feature_index": []} diff --git a/example/graph_to_mindrecord/sns/__init__.py b/example/graph_to_mindrecord/sns/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/example/graph_to_mindrecord/sns/mr_api.py b/example/graph_to_mindrecord/sns/mr_api.py new file mode 100644 index 0000000000..4e01441601 --- /dev/null +++ b/example/graph_to_mindrecord/sns/mr_api.py @@ -0,0 +1,81 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +""" +User-defined API for MindRecord GNN writer. +""" +social_data = [[348, 350], [348, 327], [348, 329], [348, 331], [348, 335], + [348, 336], [348, 337], [348, 338], [348, 340], [348, 341], + [348, 342], [348, 343], [348, 344], [348, 345], [348, 346], + [348, 347], [347, 351], [347, 327], [347, 329], [347, 331], + [347, 335], [347, 341], [347, 345], [347, 346], [346, 335], + [346, 340], [346, 339], [346, 349], [346, 353], [346, 354], + [346, 341], [346, 345], [345, 335], [345, 336], [345, 341], + [344, 338], [344, 342], [343, 332], [343, 338], [343, 342], + [342, 332], [340, 349], [334, 349], [333, 349], [330, 349], + [328, 349], [359, 349], [358, 352], [358, 349], [358, 354], + [358, 356], [357, 350], [357, 354], [357, 356], [356, 350], + [355, 352], [353, 350], [352, 349], [351, 349], [350, 349]] + +# profile: (num_features, feature_data_types, feature_shapes) +node_profile = (0, [], []) +edge_profile = (0, [], []) + + +def yield_nodes(task_id=0): + """ + Generate node data + + Yields: + data (dict): data row which is dict. + """ + print("Node task is {}".format(task_id)) + node_list = [] + for edge in social_data: + src, dst = edge + if src not in node_list: + node_list.append(src) + if dst not in node_list: + node_list.append(dst) + node_list.sort() + print(node_list) + for node_id in node_list: + node = {'id': node_id, 'type': 1} + yield node + + +def yield_edges(task_id=0): + """ + Generate edge data + + Yields: + data (dict): data row which is dict. + """ + print("Edge task is {}".format(task_id)) + line_count = 0 + for undirected_edge in social_data: + line_count += 1 + edge = { + 'id': line_count, + 'src_id': undirected_edge[0], + 'dst_id': undirected_edge[1], + 'type': 1} + yield edge + line_count += 1 + edge = { + 'id': line_count, + 'src_id': undirected_edge[1], + 'dst_id': undirected_edge[0], + 'type': 1} + yield edge diff --git a/example/graph_to_mindrecord/write_citeseer.sh b/example/graph_to_mindrecord/write_citeseer.sh index 33235372fa..523b2b8850 100644 --- a/example/graph_to_mindrecord/write_citeseer.sh +++ b/example/graph_to_mindrecord/write_citeseer.sh @@ -9,4 +9,4 @@ python writer.py --mindrecord_script citeseer \ --mindrecord_partitions 1 \ --mindrecord_header_size_by_bit 18 \ --mindrecord_page_size_by_bit 20 \ ---graph_api_args "$SRC_PATH/citeseer.content:$SRC_PATH/citeseer.cites" +--graph_api_args "$SRC_PATH" diff --git a/example/graph_to_mindrecord/write_cora.sh b/example/graph_to_mindrecord/write_cora.sh index 84ccf34f5e..fd1b6fc92a 100644 --- a/example/graph_to_mindrecord/write_cora.sh +++ b/example/graph_to_mindrecord/write_cora.sh @@ -9,4 +9,4 @@ python writer.py --mindrecord_script cora \ --mindrecord_partitions 1 \ --mindrecord_header_size_by_bit 18 \ --mindrecord_page_size_by_bit 20 \ ---graph_api_args "$SRC_PATH/cora_content.csv:$SRC_PATH/cora_cites.csv" +--graph_api_args "$SRC_PATH" diff --git a/example/graph_to_mindrecord/write_sns.sh b/example/graph_to_mindrecord/write_sns.sh new file mode 100644 index 0000000000..f564ddc8ff --- /dev/null +++ b/example/graph_to_mindrecord/write_sns.sh @@ -0,0 +1,10 @@ +#!/bin/bash +MINDRECORD_PATH=/tmp/sns + +rm -f $MINDRECORD_PATH/* + +python writer.py --mindrecord_script sns \ +--mindrecord_file "$MINDRECORD_PATH/sns" \ +--mindrecord_partitions 1 \ +--mindrecord_header_size_by_bit 14 \ +--mindrecord_page_size_by_bit 15 diff --git a/example/graph_to_mindrecord/writer.py b/example/graph_to_mindrecord/writer.py index 1024c82372..9dce63e265 100644 --- a/example/graph_to_mindrecord/writer.py +++ b/example/graph_to_mindrecord/writer.py @@ -164,7 +164,7 @@ if __name__ == "__main__": num_features, feature_data_types, feature_shapes = mr_api.edge_profile graph_map_schema.set_edge_feature_profile(num_features, feature_data_types, feature_shapes) - graph_schema = graph_map_schema.get_schema() + graph_schema = graph_map_schema.get_schema # init writer writer = init_writer(graph_schema) diff --git a/example/nlp_to_mindrecord/CLUERNER2020/README.md b/example/nlp_to_mindrecord/CLUERNER2020/README.md new file mode 100644 index 0000000000..c862156a47 --- /dev/null +++ b/example/nlp_to_mindrecord/CLUERNER2020/README.md @@ -0,0 +1,82 @@ +# Guideline to Convert Training Data CLUERNER2020 to MindRecord For Bert Fine Tuning + + + +- [What does the example do](#what-does-the-example-do) +- [How to use the example to process CLUERNER2020](#how-to-use-the-example-to-process-cluerner2020) + - [Download CLUERNER2020 and unzip](#download-cluerner2020-and-unzip) + - [Generate MindRecord](#generate-mindrecord) + - [Create MindDataset By MindRecord](#create-minddataset-by-mindrecord) + + + + +## What does the example do + +This example is based on [CLUERNER2020](https://www.cluebenchmarks.com/introduce.html) training data, generating MindRecord file, and finally used for Bert Fine Tuning progress. + +1. run.sh: generate MindRecord entry script +2. run_read.py: create MindDataset by MindRecord entry script. + - create_dataset.py: use MindDataset to read MindRecord to generate dataset. + +## How to use the example to process CLUERNER2020 + +Download CLUERNER2020, convert it to MindRecord, use MindDataset to read MindRecord. + +### Download CLUERNER2020 and unzip + +1. Download the training data zip. + > [CLUERNER2020 dataset download address](https://www.cluebenchmarks.com/introduce.html) **-> 任务介绍 -> CLUENER 细粒度命名实体识别 -> cluener下载链接** + +2. Unzip the training data to dir example/nlp_to_mindrecord/CLUERNER2020/cluener_public. + ``` + unzip -d {your-mindspore}/example/nlp_to_mindrecord/CLUERNER2020/data/cluener_public cluener_public.zip + ``` + +### Generate MindRecord + +1. Run the run.sh script. + ```bash + bash run.sh + ``` + +2. Output like this: + ``` + ... + [INFO] ME(17603:139620983514944,MainProcess):2020-04-28-16:56:12.498.235 [mindspore/mindrecord/filewriter.py:313] The list of mindrecord files created are: ['data/train.mindrecord'], and the list of index files are: ['data/train.mindrecord.db'] + ... + [INFO] ME(17603,python):2020-04-28-16:56:13.400.175 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully. + [INFO] ME(17603,python):2020-04-28-16:56:13.400.863 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully. + [INFO] ME(17603,python):2020-04-28-16:56:13.401.534 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully. + [INFO] ME(17603,python):2020-04-28-16:56:13.402.179 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully. + [INFO] ME(17603,python):2020-04-28-16:56:13.402.702 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully. + ... + [INFO] ME(17603:139620983514944,MainProcess):2020-04-28-16:56:13.431.208 [mindspore/mindrecord/filewriter.py:313] The list of mindrecord files created are: ['data/dev.mindrecord'], and the list of index files are: ['data/dev.mindrecord.db'] + ``` + +3. Generate files like this: + ```bash + $ ls output/ + dev.mindrecord dev.mindrecord.db README.md train.mindrecord train.mindrecord.db + ``` + +### Create MindDataset By MindRecord + +1. Run the run_read.sh script. + ```bash + bash run_read.sh + ``` + +2. Output like this: + ``` + ... + example 1340: input_ids: [ 101 3173 1290 4852 7676 3949 122 3299 123 126 3189 4510 8020 6381 5442 7357 2590 3636 8021 7676 3949 4294 1166 6121 3124 1277 6121 3124 7270 2135 3295 5789 3326 123 126 3189 1355 6134 1093 1325 3173 2399 6590 6791 8024 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + example 1340: input_mask: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + example 1340: segment_ids: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + example 1340: label_ids: [ 0 18 19 20 2 4 0 0 0 0 0 0 0 34 36 26 27 28 0 34 35 35 35 35 35 35 35 35 35 36 26 27 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + example 1341: input_ids: [ 101 1728 711 4293 3868 1168 2190 2150 3791 934 3633 3428 4638 6237 7025 8024 3297 1400 5310 3362 6206 5023 5401 1744 3297 7770 3791 7368 976 1139 1104 2137 511 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + example 1341: input_mask: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + example 1341: segment_ids: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + example 1341: label_ids: [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 18 19 19 19 19 20 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + ... + ``` diff --git a/example/nlp_to_mindrecord/CLUERNER2020/create_dataset.py b/example/nlp_to_mindrecord/CLUERNER2020/create_dataset.py new file mode 100644 index 0000000000..616bc71028 --- /dev/null +++ b/example/nlp_to_mindrecord/CLUERNER2020/create_dataset.py @@ -0,0 +1,36 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""create MindDataset by MindRecord""" +import mindspore.dataset as ds + +def create_dataset(data_file): + """create MindDataset""" + num_readers = 4 + data_set = ds.MindDataset(dataset_file=data_file, num_parallel_workers=num_readers, shuffle=True) + index = 0 + for item in data_set.create_dict_iterator(): + # print("example {}: {}".format(index, item)) + print("example {}: input_ids: {}".format(index, item['input_ids'])) + print("example {}: input_mask: {}".format(index, item['input_mask'])) + print("example {}: segment_ids: {}".format(index, item['segment_ids'])) + print("example {}: label_ids: {}".format(index, item['label_ids'])) + index += 1 + if index % 1000 == 0: + print("read rows: {}".format(index)) + print("total rows: {}".format(index)) + +if __name__ == '__main__': + create_dataset('output/train.mindrecord') + create_dataset('output/dev.mindrecord') diff --git a/example/nlp_to_mindrecord/CLUERNER2020/data/.gitignore b/example/nlp_to_mindrecord/CLUERNER2020/data/.gitignore new file mode 100644 index 0000000000..cbbd6256c0 --- /dev/null +++ b/example/nlp_to_mindrecord/CLUERNER2020/data/.gitignore @@ -0,0 +1 @@ +cluener_public diff --git a/example/nlp_to_mindrecord/CLUERNER2020/data/README.md b/example/nlp_to_mindrecord/CLUERNER2020/data/README.md new file mode 100644 index 0000000000..b54948808e --- /dev/null +++ b/example/nlp_to_mindrecord/CLUERNER2020/data/README.md @@ -0,0 +1 @@ +## The input dataset diff --git a/example/nlp_to_mindrecord/CLUERNER2020/output/README.md b/example/nlp_to_mindrecord/CLUERNER2020/output/README.md new file mode 100644 index 0000000000..7904933f43 --- /dev/null +++ b/example/nlp_to_mindrecord/CLUERNER2020/output/README.md @@ -0,0 +1 @@ +## output dir diff --git a/example/nlp_to_mindrecord/CLUERNER2020/run.sh b/example/nlp_to_mindrecord/CLUERNER2020/run.sh new file mode 100644 index 0000000000..15c6aa4362 --- /dev/null +++ b/example/nlp_to_mindrecord/CLUERNER2020/run.sh @@ -0,0 +1,40 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +rm -f output/train.mindrecord* +rm -f output/dev.mindrecord* + +if [ ! -d "../../../third_party/to_mindrecord/CLUERNER2020" ]; then + echo "The patch base dir ../../../third_party/to_mindrecord/CLUERNER2020 is not exist." + exit 1 +fi + +if [ ! -f "../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch" ]; then + echo "The patch file ../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch is not exist." + exit 1 +fi + +# patch for data_processor_seq.py +patch -p0 -d ../../../third_party/to_mindrecord/CLUERNER2020/ -o data_processor_seq_patched.py < ../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch +if [ $? -ne 0 ]; then + echo "Patch ../../../third_party/to_mindrecord/CLUERNER2020/data_processor_seq.py failed" + exit 1 +fi + +# use patched script +python ../../../third_party/to_mindrecord/CLUERNER2020/data_processor_seq_patched.py \ +--vocab_file=../../../third_party/to_mindrecord/CLUERNER2020/vocab.txt \ +--label2id_file=../../../third_party/to_mindrecord/CLUERNER2020/label2id.json diff --git a/example/nlp_to_mindrecord/CLUERNER2020/run_read.sh b/example/nlp_to_mindrecord/CLUERNER2020/run_read.sh new file mode 100644 index 0000000000..1ffe4de1cf --- /dev/null +++ b/example/nlp_to_mindrecord/CLUERNER2020/run_read.sh @@ -0,0 +1,17 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +python create_dataset.py diff --git a/example/nlp_to_mindrecord/enwiki/README.md b/example/nlp_to_mindrecord/enwiki/README.md new file mode 100644 index 0000000000..e92e8dbcc6 --- /dev/null +++ b/example/nlp_to_mindrecord/enwiki/README.md @@ -0,0 +1,173 @@ +# Guideline to Convert Training Data enwiki to MindRecord For Bert Pre Training + + + +- [What does the example do](#what-does-the-example-do) +- [How to use the example to process enwiki](#how-to-use-the-example-to-process-enwiki) + - [Download enwiki training data](#download-enwiki-training-data) + - [Process the enwiki](#process-the-enwiki) + - [Generate MindRecord](#generate-mindrecord) + - [Create MindDataset By MindRecord](#create-minddataset-by-mindrecord) + + + + +## What does the example do + +This example is based on [enwiki](https://dumps.wikimedia.org/enwiki) training data, generating MindRecord file, and finally used for Bert network training. + +1. run.sh: generate MindRecord entry script. +2. run_read.py: create MindDataset by MindRecord entry script. + - create_dataset.py: use MindDataset to read MindRecord to generate dataset. + +## How to use the example to process enwiki + +Download enwiki data, process it, convert it to MindRecord, use MindDataset to read MindRecord. + +### Download enwiki training data + +> [enwiki dataset download address](https://dumps.wikimedia.org/enwiki) **-> 20200501 -> enwiki-20200501-pages-articles-multistream.xml.bz2** + +### Process the enwiki + +1. Please follow the steps in [process enwiki](https://github.com/mlperf/training/tree/master/language_model/tensorflow/bert) +- All permissions of this step belong to the link address website. + +### Generate MindRecord + +1. Run the run.sh script. + ``` + bash run.sh input_dir output_dir vocab_file + ``` + - input_dir: the directory which contains files like 'part-00251-of-00500'. + - output_dir: which will store the output mindrecord files. + - vocab_file: the vocab file which you can download from other opensource project. + +2. The output like this: + ``` + ... + Begin preprocess Wed Jun 10 09:21:23 CST 2020 + Begin preprocess input file: /mnt/data/results/part-00000-of-00500 + Begin output file: part-00000-of-00500.mindrecord + Total task: 510, processing: 1 + Begin preprocess input file: /mnt/data/results/part-00001-of-00500 + Begin output file: part-00001-of-00500.mindrecord + Total task: 510, processing: 2 + Begin preprocess input file: /mnt/data/results/part-00002-of-00500 + Begin output file: part-00002-of-00500.mindrecord + Total task: 510, processing: 3 + Begin preprocess input file: /mnt/data/results/part-00003-of-00500 + Begin output file: part-00003-of-00500.mindrecord + Total task: 510, processing: 4 + Begin preprocess input file: /mnt/data/results/part-00004-of-00500 + Begin output file: part-00004-of-00500.mindrecord + Total task: 510, processing: 4 + ... + ``` + +3. Generate files like this: + ```bash + $ ls {your_output_dir}/ + part-00000-of-00500.mindrecord part-00000-of-00500.mindrecord.db part-00001-of-00500.mindrecord part-00001-of-00500.mindrecord.db part-00002-of-00500.mindrecord part-00002-of-00500.mindrecord.db ... + ``` + +### Create MindDataset By MindRecord + +1. Run the run_read.sh script. + ```bash + bash run_read.sh input_dir + ``` + - input_dir: the directory which contains mindrecord files. + +2. The output like this: + ``` + ... + example 633: input_ids: [ 101 2043 19781 4305 2140 4520 2041 1010 103 2034 2455 2002 + 7879 2003 1996 2455 1997 103 26378 4160 1012 102 7291 2001 + 1996 103 1011 2343 1997 6327 1010 3423 1998 103 4262 2005 + 1996 2118 1997 2329 3996 103 102 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0] + example 633: input_mask: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 + 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + example 633: segment_ids: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 + 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + example 633: masked_lm_positions: [ 8 17 20 25 33 41 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0] + example 633: masked_lm_ids: [ 1996 16137 1012 3580 2451 1012 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0] + example 633: masked_lm_weights: [1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. + 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. + 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. + 0. 0. 0. 0.] + example 633: next_sentence_labels: [1] + ... + ``` diff --git a/example/nlp_to_mindrecord/enwiki/create_dataset.py b/example/nlp_to_mindrecord/enwiki/create_dataset.py new file mode 100644 index 0000000000..d90d12b7f2 --- /dev/null +++ b/example/nlp_to_mindrecord/enwiki/create_dataset.py @@ -0,0 +1,43 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""create MindDataset by MindRecord""" +import argparse +import mindspore.dataset as ds + +def create_dataset(data_file): + """create MindDataset""" + num_readers = 4 + data_set = ds.MindDataset(dataset_file=data_file, num_parallel_workers=num_readers, shuffle=True) + index = 0 + for item in data_set.create_dict_iterator(): + # print("example {}: {}".format(index, item)) + print("example {}: input_ids: {}".format(index, item['input_ids'])) + print("example {}: input_mask: {}".format(index, item['input_mask'])) + print("example {}: segment_ids: {}".format(index, item['segment_ids'])) + print("example {}: masked_lm_positions: {}".format(index, item['masked_lm_positions'])) + print("example {}: masked_lm_ids: {}".format(index, item['masked_lm_ids'])) + print("example {}: masked_lm_weights: {}".format(index, item['masked_lm_weights'])) + print("example {}: next_sentence_labels: {}".format(index, item['next_sentence_labels'])) + index += 1 + if index % 1000 == 0: + print("read rows: {}".format(index)) + print("total rows: {}".format(index)) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("-i", "--input_file", nargs='+', type=str, help='Input mindreord file') + args = parser.parse_args() + + create_dataset(args.input_file) diff --git a/example/nlp_to_mindrecord/enwiki/run.sh b/example/nlp_to_mindrecord/enwiki/run.sh new file mode 100644 index 0000000000..cf66bed0fd --- /dev/null +++ b/example/nlp_to_mindrecord/enwiki/run.sh @@ -0,0 +1,133 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# -ne 3 ]; then + echo "Usage: $0 input_dir output_dir vocab_file" + exit 1 +fi + +if [ ! -d $1 ]; then + echo "The input dir: $1 is not exist." + exit 1 +fi + +if [ ! -d $2 ]; then + echo "The output dir: $2 is not exist." + exit 1 +fi +rm -fr $2/*.mindrecord* + +if [ ! -f $3 ]; then + echo "The vocab file: $3 is not exist." + exit 1 +fi + +data_dir=$1 +output_dir=$2 +vocab_file=$3 +file_list=() +output_filename=() +file_index=0 + +function getdir() { + elements=`ls $1` + for element in ${elements[*]}; + do + dir_or_file=$1"/"$element + if [ -d $dir_or_file ]; + then + getdir $dir_or_file + else + file_list[$file_index]=$dir_or_file + echo "${dir_or_file}" | tr '/' '\n' > dir_file_list.txt # dir dir file to mapfile + mapfile parent_dir < dir_file_list.txt + rm dir_file_list.txt >/dev/null 2>&1 + tmp_output_filename=${parent_dir[${#parent_dir[@]}-1]}".mindrecord" + output_filename[$file_index]=`echo ${tmp_output_filename} | sed 's/ //g'` + file_index=`expr $file_index + 1` + fi + done +} + +getdir "${data_dir}" +# echo "The input files: "${file_list[@]} +# echo "The output files: "${output_filename[@]} + +if [ ! -d "../../../third_party/to_mindrecord/zhwiki" ]; then + echo "The patch base dir ../../../third_party/to_mindrecord/zhwiki is not exist." + exit 1 +fi + +if [ ! -f "../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then + echo "The patch file ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist." + exit 1 +fi + +# patch for create_pretraining_data.py +patch -p0 -d ../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch +if [ $? -ne 0 ]; then + echo "Patch ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed" + exit 1 +fi + +# get the cpu core count +num_cpu_core=`cat /proc/cpuinfo | grep "processor" | wc -l` +avaiable_core_size=`expr $num_cpu_core / 3 \* 2` + +echo "Begin preprocess `date`" + +# using patched script to generate mindrecord +file_list_len=`expr ${#file_list[*]} - 1` +for index in $(seq 0 $file_list_len); do + echo "Begin preprocess input file: ${file_list[$index]}" + echo "Begin output file: ${output_filename[$index]}" + python ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \ + --input_file=${file_list[$index]} \ + --output_file=${output_dir}/${output_filename[$index]} \ + --partition_number=1 \ + --vocab_file=${vocab_file} \ + --do_lower_case=True \ + --max_seq_length=512 \ + --max_predictions_per_seq=76 \ + --masked_lm_prob=0.15 \ + --random_seed=12345 \ + --dupe_factor=10 >/tmp/${output_filename[$index]}.log 2>&1 & + process_count=`ps -ef | grep create_pretraining_data_patched | grep -v grep | wc -l` + echo "Total task: ${#file_list[*]}, processing: ${process_count}" + if [ $process_count -ge $avaiable_core_size ]; then + while [ 1 ]; do + process_num=`ps -ef | grep create_pretraining_data_patched | grep -v grep | wc -l` + if [ $process_count -gt $process_num ]; then + process_count=$process_num + break; + fi + sleep 2 + done + fi +done + +process_num=`ps -ef | grep create_pretraining_data_patched | grep -v grep | wc -l` +while [ 1 ]; do + if [ $process_num -eq 0 ]; then + break; + fi + echo "There are still ${process_num} preprocess running ..." + sleep 2 + process_num=`ps -ef | grep create_pretraining_data_patched | grep -v grep | wc -l` +done + +echo "Preprocess all the data success." +echo "End preprocess `date`" diff --git a/example/nlp_to_mindrecord/enwiki/run_read.sh b/example/nlp_to_mindrecord/enwiki/run_read.sh new file mode 100644 index 0000000000..737e9375c4 --- /dev/null +++ b/example/nlp_to_mindrecord/enwiki/run_read.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# -ne 1 ]; then + echo "Usage: $0 input_dir" + exit 1 +fi + +if [ ! -d $1 ]; then + echo "The input dir: $1 is not exist." + exit 1 +fi + +file_list=() +file_index=0 + +# get all the mindrecord file from output dir +function getdir() { + elements=`ls $1/part-*.mindrecord` + for element in ${elements[*]}; + do + file_list[$file_index]=$element + file_index=`expr $file_index + 1` + done +} + +getdir $1 +echo "Get all the mindrecord files: "${file_list[*]} + +# create dataset for train +python create_dataset.py --input_file ${file_list[*]} diff --git a/example/nlp_to_mindrecord/zhwiki/README.md b/example/nlp_to_mindrecord/zhwiki/README.md new file mode 100644 index 0000000000..1a9de05114 --- /dev/null +++ b/example/nlp_to_mindrecord/zhwiki/README.md @@ -0,0 +1,113 @@ +# Guideline to Convert Training Data zhwiki to MindRecord For Bert Pre Training + + + +- [What does the example do](#what-does-the-example-do) +- [Run simple test](#run-simple-test) +- [How to use the example to process zhwiki](#how-to-use-the-example-to-process-zhwiki) + - [Download zhwiki training data](#download-zhwiki-training-data) + - [Extract the zhwiki](#extract-the-zhwiki) + - [Generate MindRecord](#generate-mindrecord) + - [Create MindDataset By MindRecord](#create-minddataset-by-mindrecord) + + + + +## What does the example do + +This example is based on [zhwiki](https://dumps.wikimedia.org/zhwiki) training data, generating MindRecord file, and finally used for Bert network training. + +1. run.sh: generate MindRecord entry script. +2. run_read.py: create MindDataset by MindRecord entry script. + - create_dataset.py: use MindDataset to read MindRecord to generate dataset. + +## Run simple test + +Follow the step: + +```bash +bash run_simple.sh # generate output/simple.mindrecord* by ../../../third_party/to_mindrecord/zhwiki/sample_text.txt +bash run_read_simple.sh # use MindDataset to read output/simple.mindrecord* +``` + +## How to use the example to process zhwiki + +Download zhwiki data, extract it, convert it to MindRecord, use MindDataset to read MindRecord. + +### Download zhwiki training data + +> [zhwiki dataset download address](https://dumps.wikimedia.org/zhwiki) **-> 20200401 -> zhwiki-20200401-pages-articles-multistream.xml.bz2** + +- put the zhwiki-20200401-pages-articles-multistream.xml.bz2 in {your-mindspore}/example/nlp_to_mindrecord/zhwiki/data directory. + +### Extract the zhwiki + +1. Download [wikiextractor](https://github.com/attardi/wikiextractor) script to {your-mindspore}/example/nlp_to_mindrecord/zhwiki/data directory. + + ``` + $ ls data/ + README.md wikiextractor zhwiki-20200401-pages-articles-multistream.xml.bz2 + ``` + +2. Extract the zhwiki. + ```python + python data/wikiextractor/WikiExtractor.py data/zhwiki-20200401-pages-articles-multistream.xml.bz2 --processes 4 --templates data/template --bytes 8M --min_text_length 0 --filter_disambig_pages --output data/extract + ``` + +3. Generate like this: + ``` + $ ls data/extract + AA AB + ``` + +### Generate MindRecord + +1. Run the run.sh script. + ``` + bash run.sh + ``` + > Caution: This process maybe slow, please wait patiently. If you do not have a machine with enough memory and cpu, it is recommended that you modify the script to generate mindrecord in step by step. + +2. The output like this: + ``` + patching file create_pretraining_data_patched.py (read from create_pretraining_data.py) + Begin preprocess input file: ./data/extract/AA/wiki_00 + Begin output file: AAwiki_00.mindrecord + Total task: 5, processing: 1 + Begin preprocess input file: ./data/extract/AA/wiki_01 + Begin output file: AAwiki_01.mindrecord + Total task: 5, processing: 2 + Begin preprocess input file: ./data/extract/AA/wiki_02 + Begin output file: AAwiki_02.mindrecord + Total task: 5, processing: 3 + Begin preprocess input file: ./data/extract/AB/wiki_02 + Begin output file: ABwiki_02.mindrecord + Total task: 5, processing: 4 + ... + ``` + +3. Generate files like this: + ```bash + $ ls output/ + AAwiki_00.mindrecord AAwiki_00.mindrecord.db AAwiki_01.mindrecord AAwiki_01.mindrecord.db AAwiki_02.mindrecord AAwiki_02.mindrecord.db ... ABwiki_00.mindrecord ABwiki_00.mindrecord.db ... + ``` + +### Create MindDataset By MindRecord + +1. Run the run_read.sh script. + ```bash + bash run_read.sh + ``` + +2. The output like this: + ``` + ... + example 74: input_ids: [ 101 8168 118 12847 8783 9977 15908 117 8256 9245 11643 8168 8847 8588 11575 8154 8228 143 8384 8376 9197 10241 103 10564 11421 8199 12268 112 161 8228 11541 9586 8436 8174 8363 9864 9702 103 103 119 103 9947 10564 103 8436 8806 11479 103 8912 119 103 103 103 12209 8303 103 8757 8824 117 8256 103 8619 8168 11541 102 11684 8196 103 8228 8847 11523 117 9059 9064 12410 8358 8181 10764 117 11167 11706 9920 148 8332 11390 8936 8205 10951 11997 103 8154 117 103 8670 10467 112 161 10951 13139 12413 117 10288 143 10425 8205 152 10795 8472 8196 103 161 12126 9172 13129 12106 8217 8174 12244 8205 143 103 8461 8277 10628 160 8221 119 102] + example 74: input_mask: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] + example 74: segment_ids: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] + example 74: masked_lm_positions: [ 6 22 37 38 40 43 47 50 51 52 55 60 67 76 89 92 98 109 120 0] + example 74: masked_lm_ids: [ 8118 8165 8329 8890 8554 8458 119 8850 8565 10392 8174 11467 10291 8181 8549 12718 13139 112 158 0] + example 74: masked_lm_weights: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.] + example 74: next_sentence_labels: [0] + ... + ``` diff --git a/example/nlp_to_mindrecord/zhwiki/create_dataset.py b/example/nlp_to_mindrecord/zhwiki/create_dataset.py new file mode 100644 index 0000000000..d90d12b7f2 --- /dev/null +++ b/example/nlp_to_mindrecord/zhwiki/create_dataset.py @@ -0,0 +1,43 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""create MindDataset by MindRecord""" +import argparse +import mindspore.dataset as ds + +def create_dataset(data_file): + """create MindDataset""" + num_readers = 4 + data_set = ds.MindDataset(dataset_file=data_file, num_parallel_workers=num_readers, shuffle=True) + index = 0 + for item in data_set.create_dict_iterator(): + # print("example {}: {}".format(index, item)) + print("example {}: input_ids: {}".format(index, item['input_ids'])) + print("example {}: input_mask: {}".format(index, item['input_mask'])) + print("example {}: segment_ids: {}".format(index, item['segment_ids'])) + print("example {}: masked_lm_positions: {}".format(index, item['masked_lm_positions'])) + print("example {}: masked_lm_ids: {}".format(index, item['masked_lm_ids'])) + print("example {}: masked_lm_weights: {}".format(index, item['masked_lm_weights'])) + print("example {}: next_sentence_labels: {}".format(index, item['next_sentence_labels'])) + index += 1 + if index % 1000 == 0: + print("read rows: {}".format(index)) + print("total rows: {}".format(index)) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("-i", "--input_file", nargs='+', type=str, help='Input mindreord file') + args = parser.parse_args() + + create_dataset(args.input_file) diff --git a/example/nlp_to_mindrecord/zhwiki/data/.gitignore b/example/nlp_to_mindrecord/zhwiki/data/.gitignore new file mode 100644 index 0000000000..f15cab0c89 --- /dev/null +++ b/example/nlp_to_mindrecord/zhwiki/data/.gitignore @@ -0,0 +1,3 @@ +wikiextractor/ +zhwiki-20200401-pages-articles-multistream.xml.bz2 +extract/ diff --git a/example/nlp_to_mindrecord/zhwiki/data/README.md b/example/nlp_to_mindrecord/zhwiki/data/README.md new file mode 100644 index 0000000000..b54948808e --- /dev/null +++ b/example/nlp_to_mindrecord/zhwiki/data/README.md @@ -0,0 +1 @@ +## The input dataset diff --git a/example/nlp_to_mindrecord/zhwiki/output/README.md b/example/nlp_to_mindrecord/zhwiki/output/README.md new file mode 100644 index 0000000000..b7cfba1b47 --- /dev/null +++ b/example/nlp_to_mindrecord/zhwiki/output/README.md @@ -0,0 +1 @@ +## Output the mindrecord diff --git a/example/nlp_to_mindrecord/zhwiki/run.sh b/example/nlp_to_mindrecord/zhwiki/run.sh new file mode 100644 index 0000000000..a057031e6b --- /dev/null +++ b/example/nlp_to_mindrecord/zhwiki/run.sh @@ -0,0 +1,112 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +rm -f output/*.mindrecord* + +data_dir="./data/extract" +file_list=() +output_filename=() +file_index=0 + +function getdir() { + elements=`ls $1` + for element in ${elements[*]}; + do + dir_or_file=$1"/"$element + if [ -d $dir_or_file ]; + then + getdir $dir_or_file + else + file_list[$file_index]=$dir_or_file + echo "${dir_or_file}" | tr '/' '\n' > dir_file_list.txt # dir dir file to mapfile + mapfile parent_dir < dir_file_list.txt + rm dir_file_list.txt >/dev/null 2>&1 + tmp_output_filename=${parent_dir[${#parent_dir[@]}-2]}${parent_dir[${#parent_dir[@]}-1]}".mindrecord" + output_filename[$file_index]=`echo ${tmp_output_filename} | sed 's/ //g'` + file_index=`expr $file_index + 1` + fi + done +} + +getdir "${data_dir}" +# echo "The input files: "${file_list[@]} +# echo "The output files: "${output_filename[@]} + +if [ ! -d "../../../third_party/to_mindrecord/zhwiki" ]; then + echo "The patch base dir ../../../third_party/to_mindrecord/zhwiki is not exist." + exit 1 +fi + +if [ ! -f "../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then + echo "The patch file ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist." + exit 1 +fi + +# patch for create_pretraining_data.py +patch -p0 -d ../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch +if [ $? -ne 0 ]; then + echo "Patch ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed" + exit 1 +fi + +# get the cpu core count +num_cpu_core=`cat /proc/cpuinfo | grep "processor" | wc -l` +avaiable_core_size=`expr $num_cpu_core / 3 \* 2` + +echo "Begin preprocess `date`" + +# using patched script to generate mindrecord +file_list_len=`expr ${#file_list[*]} - 1` +for index in $(seq 0 $file_list_len); do + echo "Begin preprocess input file: ${file_list[$index]}" + echo "Begin output file: ${output_filename[$index]}" + python ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \ + --input_file=${file_list[$index]} \ + --output_file=output/${output_filename[$index]} \ + --partition_number=1 \ + --vocab_file=../../../third_party/to_mindrecord/zhwiki/vocab.txt \ + --do_lower_case=True \ + --max_seq_length=128 \ + --max_predictions_per_seq=20 \ + --masked_lm_prob=0.15 \ + --random_seed=12345 \ + --dupe_factor=10 >/tmp/${output_filename[$index]}.log 2>&1 & # user defined + process_count=`ps -ef | grep create_pretraining_data_patched | grep -v grep | wc -l` + echo "Total task: ${#file_list[*]}, processing: ${process_count}" + if [ $process_count -ge $avaiable_core_size ]; then + while [ 1 ]; do + process_num=`ps -ef | grep create_pretraining_data_patched | grep -v grep | wc -l` + if [ $process_count -gt $process_num ]; then + process_count=$process_num + break; + fi + sleep 2 + done + fi +done + +process_num=`ps -ef | grep create_pretraining_data_patched | grep -v grep | wc -l` +while [ 1 ]; do + if [ $process_num -eq 0 ]; then + break; + fi + echo "There are still ${process_num} preprocess running ..." + sleep 2 + process_num=`ps -ef | grep create_pretraining_data_patched | grep -v grep | wc -l` +done + +echo "Preprocess all the data success." +echo "End preprocess `date`" diff --git a/example/nlp_to_mindrecord/zhwiki/run_read.sh b/example/nlp_to_mindrecord/zhwiki/run_read.sh new file mode 100644 index 0000000000..3cc368457b --- /dev/null +++ b/example/nlp_to_mindrecord/zhwiki/run_read.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +file_list=() +file_index=0 + +# get all the mindrecord file from output dir +function getdir() { + elements=`ls $1/[A-Z]*.mindrecord` + for element in ${elements[*]}; + do + file_list[$file_index]=$element + file_index=`expr $file_index + 1` + done +} + +getdir "./output" +echo "Get all the mindrecord files: "${file_list[*]} + +# create dataset for train +python create_dataset.py --input_file ${file_list[*]} diff --git a/example/nlp_to_mindrecord/zhwiki/run_read_simple.sh b/example/nlp_to_mindrecord/zhwiki/run_read_simple.sh new file mode 100644 index 0000000000..1c26dec449 --- /dev/null +++ b/example/nlp_to_mindrecord/zhwiki/run_read_simple.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +# create dataset for train +python create_dataset.py --input_file=output/simple.mindrecord0 diff --git a/example/nlp_to_mindrecord/zhwiki/run_simple.sh b/example/nlp_to_mindrecord/zhwiki/run_simple.sh new file mode 100644 index 0000000000..20c1d98d66 --- /dev/null +++ b/example/nlp_to_mindrecord/zhwiki/run_simple.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +rm -f output/simple.mindrecord* + +if [ ! -d "../../../third_party/to_mindrecord/zhwiki" ]; then + echo "The patch base dir ../../../third_party/to_mindrecord/zhwiki is not exist." + exit 1 +fi + +if [ ! -f "../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then + echo "The patch file ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist." + exit 1 +fi + +# patch for create_pretraining_data.py +patch -p0 -d ../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch +if [ $? -ne 0 ]; then + echo "Patch ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed" + exit 1 +fi + +# using patched script to generate mindrecord +python ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \ +--input_file=../../../third_party/to_mindrecord/zhwiki/sample_text.txt \ +--output_file=output/simple.mindrecord \ +--partition_number=4 \ +--vocab_file=../../../third_party/to_mindrecord/zhwiki/vocab.txt \ +--do_lower_case=True \ +--max_seq_length=128 \ +--max_predictions_per_seq=20 \ +--masked_lm_prob=0.15 \ +--random_seed=12345 \ +--dupe_factor=10 # user defined diff --git a/example/resnet50_cifar10/train.py b/example/resnet50_cifar10/train.py index 275f7188a7..323695ae29 100755 --- a/example/resnet50_cifar10/train.py +++ b/example/resnet50_cifar10/train.py @@ -15,6 +15,7 @@ """train_imagenet.""" import os import argparse +import numpy as np from dataset import create_dataset from lr_generator import get_lr from config import config @@ -45,6 +46,7 @@ if __name__ == '__main__': target = args_opt.device_target ckpt_save_dir = config.save_checkpoint_path context.set_context(mode=context.GRAPH_MODE, device_target=target, save_graphs=False) + np.random.seed(1) if not args_opt.do_eval and args_opt.run_distribute: if target == "Ascend": device_id = int(os.getenv('DEVICE_ID')) diff --git a/example/resnet50_imagenet2012/train.py b/example/resnet50_imagenet2012/train.py index a76de78f6d..6896320ece 100755 --- a/example/resnet50_imagenet2012/train.py +++ b/example/resnet50_imagenet2012/train.py @@ -15,6 +15,7 @@ """train_imagenet.""" import os import argparse +import numpy as np from dataset import create_dataset from lr_generator import get_lr from config import config @@ -48,6 +49,7 @@ if __name__ == '__main__': target = args_opt.device_target ckpt_save_dir = config.save_checkpoint_path context.set_context(mode=context.GRAPH_MODE, device_target=target, save_graphs=False) + np.random.seed(1) if not args_opt.do_eval and args_opt.run_distribute: if target == "Ascend": device_id = int(os.getenv('DEVICE_ID')) @@ -77,12 +79,12 @@ if __name__ == '__main__': for _, cell in net.cells_and_names(): if isinstance(cell, nn.Conv2d): cell.weight.default_input = weight_init.initializer(weight_init.XavierUniform(), - cell.weight.default_input.shape(), - cell.weight.default_input.dtype()).to_tensor() + cell.weight.default_input.shape, + cell.weight.default_input.dtype).to_tensor() if isinstance(cell, nn.Dense): cell.weight.default_input = weight_init.initializer(weight_init.TruncatedNormal(), - cell.weight.default_input.shape(), - cell.weight.default_input.dtype()).to_tensor() + cell.weight.default_input.shape, + cell.weight.default_input.dtype).to_tensor() if not config.use_label_smooth: config.label_smooth_factor = 0.0 diff --git a/example/resnet50_imagenet2012_THOR/model/dataset_helper.py b/example/resnet50_imagenet2012_THOR/model/dataset_helper.py index 474bccf42f..77f67344c2 100644 --- a/example/resnet50_imagenet2012_THOR/model/dataset_helper.py +++ b/example/resnet50_imagenet2012_THOR/model/dataset_helper.py @@ -15,6 +15,7 @@ """Dataset help for minddata dataset""" from mindspore._checkparam import check_bool from mindspore.parallel._utils import _get_device_num, _get_parallel_mode +from mindspore.train.dataset_helper import _send_data from mindspore.train._utils import _exec_datagraph, _get_types_and_shapes, \ _to_full_shapes from mindspore.train.parallel_utils import ParallelMode @@ -67,7 +68,13 @@ class _DatasetIter: self.loop_size = dataset.get_dataset_size() else: self.loop_size = dataset.__loop_size__ - dataset.__ME_INITED__ = _exec_datagraph(dataset, self.loop_size).queue_name + dataset.__TRANSFER_DATASET__ = _exec_datagraph(dataset, self.loop_size) + dataset.__ME_INITED__ = dataset.__TRANSFER_DATASET__.queue_name + + if not hasattr(dataset, '__no_send__'): + _send_data(dataset) + else: + _send_data(dataset) self.ind = 0 self.dataset = dataset diff --git a/example/resnet50_imagenet2012_THOR/model/model_thor.py b/example/resnet50_imagenet2012_THOR/model/model_thor.py index f3418437a3..25e3dd7f82 100644 --- a/example/resnet50_imagenet2012_THOR/model/model_thor.py +++ b/example/resnet50_imagenet2012_THOR/model/model_thor.py @@ -29,7 +29,7 @@ from mindspore.nn.wrap.cell_wrapper import _VirtualDatasetCell from mindspore.parallel._utils import _get_parallel_mode, _get_device_num, _get_global_rank, \ _get_parameter_broadcast, _device_number_check, _parameter_broadcast_check from mindspore.train import amp -from mindspore.train.callback import _InternalCallbackParam, RunContext, _build_callbacks +from mindspore.train.callback import _InternalCallbackParam, RunContext, _CallbackManager from mindspore.train.parallel_utils import ParallelMode from model.dataset_helper import DatasetHelper @@ -374,7 +374,6 @@ class Model: self._train_network.set_broadcast_flag() # build callback list - list_callback = _build_callbacks(callbacks) cb_params = _InternalCallbackParam() cb_params.train_network = self._train_network cb_params.epoch_num = epoch @@ -385,17 +384,17 @@ class Model: cb_params.parallel_mode = self._parallel_mode cb_params.device_number = self._device_number cb_params.train_dataset = train_dataset - cb_params.list_callback = list_callback + cb_params.list_callback = callbacks - if dataset_sink_mode: - if context.get_context("mode") == context.PYNATIVE_MODE: + with _CallbackManager(callbacks) as list_callback: + if not dataset_sink_mode: + self._train_process(epoch, train_dataset, list_callback, cb_params) + elif context.get_context("mode") == context.PYNATIVE_MODE: logger.warning("The pynative mode cannot support dataset sink mode currently." "So the training process will be performed with dataset not sink.") self._train_process(epoch, train_dataset, list_callback, cb_params) else: self._train_dataset_sink_process(epoch, train_dataset, list_callback, cb_params) - else: - self._train_process(epoch, train_dataset, list_callback, cb_params) def _train_dataset_sink_process(self, epoch, train_dataset, list_callback=None, cb_params=None): """ @@ -408,7 +407,7 @@ class Model: returned and passed to the network. Otherwise, a tuple (data, label) should be returned, and the data and label are passed to the network and loss function respectively. - list_callback (_ListCallback): Executor of callback list. Default: None. + list_callback (Callback): Executor of callback list. Default: None. cb_params (_InternalCallbackParam): Callback parameters. Default: None. """ iter_first_order = self._frequency - 1 @@ -473,7 +472,7 @@ class Model: returned and passed to the network. Otherwise, a tuple (data, label) should be returned, and the data and label are passed to the network and loss function respectively. - list_callback (_ListCallback): Executor of callback list. Default: None. + list_callback (Callback): Executor of callback list. Default: None. cb_params (_InternalCallbackParam): Callback parameters. Default: None. """ dataset_helper, _ = self._exec_preprocess(self._train_network, @@ -580,7 +579,7 @@ class Model: Args: valid_dataset (Dataset): Dataset to evaluate the model. - list_callback (ListCallback): Executor of callback list. Default: None. + list_callback (Callback): Executor of callback list. Default: None. cb_params (_InternalCallbackParam): Callback parameters. Default: None. Returns: @@ -619,7 +618,7 @@ class Model: Args: valid_dataset (Dataset): Dataset to evaluate the model. - list_callback (ListCallback): Executor of callback list. Default: None. + list_callback (Callback): Executor of callback list. Default: None. cb_params (_InternalCallbackParam): Callback parameters. Default: None. Returns: @@ -678,7 +677,6 @@ class Model: if not self._metric_fns: raise ValueError("metric fn can not be None or empty.") - list_callback = _build_callbacks(callbacks) cb_params = _InternalCallbackParam() cb_params.eval_network = self._eval_network cb_params.valid_dataset = valid_dataset @@ -691,9 +689,10 @@ class Model: self._clear_metrics() - if dataset_sink_mode: - return self._eval_dataset_sink_process(valid_dataset, list_callback, cb_params) - return self._eval_process(valid_dataset, list_callback, cb_params) + with _CallbackManager(callbacks) as list_callback: + if dataset_sink_mode: + return self._eval_dataset_sink_process(valid_dataset, list_callback, cb_params) + return self._eval_process(valid_dataset, list_callback, cb_params) def predict(self, *predict_data): """ diff --git a/example/resnet50_imagenet2012_THOR/model/thor.py b/example/resnet50_imagenet2012_THOR/model/thor.py index 0da1714fe6..6786cb7485 100644 --- a/example/resnet50_imagenet2012_THOR/model/thor.py +++ b/example/resnet50_imagenet2012_THOR/model/thor.py @@ -151,6 +151,8 @@ class THOR(Optimizer): temp_g = self.mul(temp_g, matrix_G_inv_max) temp_max = self.mul(matrix_A_max_allreduce[i], matrix_G_max_allreduce[i]) temp_max = self.mul(temp_max, self.feature_map[i]) + temp_a = self.cast(temp_a, mstype.float16) + temp_g = self.cast(temp_g, mstype.float16) if i == 53: g = self.cube_matmul_left_fc(temp_g, g) g = self.cube_matmul_right_fc(g, temp_a, temp_max) diff --git a/example/resnet50_imagenet2012_THOR/model/thor_layer.py b/example/resnet50_imagenet2012_THOR/model/thor_layer.py index fea74605b6..d84cbf7a93 100644 --- a/example/resnet50_imagenet2012_THOR/model/thor_layer.py +++ b/example/resnet50_imagenet2012_THOR/model/thor_layer.py @@ -13,6 +13,8 @@ # limitations under the License. # ============================================================================ """thor_layer""" +import numpy as np + import mindspore as ms import mindspore.common.dtype as mstype from mindspore._checkparam import check_bool, twice, check_int_positive @@ -23,7 +25,6 @@ from mindspore.common.tensor import Tensor from mindspore.nn.cell import Cell from mindspore.nn.layer.activation import get_activation from mindspore.ops import operations as P -import numpy as np C0 = 16 def caculate_device_shape(matrix_dim, channel, is_A): @@ -171,7 +172,6 @@ class Conv2d_Thor(_Conv): self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), name="G_inv_max", requires_grad=False) self.fake_G = Tensor( np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape)) - self.fake_G_inv_max = Tensor(np.zeros([1,]).astype(np.float32)) self.shape = P.Shape() self.reshape = P.Reshape() @@ -286,7 +286,6 @@ class Conv2d_Thor(_Conv): matrix_A_inv = self.device_shape_pad(matrix_A_inv) matrix_A_inv = self.reshape(matrix_A_inv, self.matrix_A_device_temp_shape) matrix_A_inv = self.transpose(matrix_A_inv, (2, 0, 1, 3)) - self.G_inv_max = self.fake_G_inv_max self.matrix_A_inv = matrix_A_inv self.matrix_G_inv = self.fake_G out = self.conv2d(x, self.weight) @@ -339,15 +338,15 @@ class Dense_Thor(Cell): self.has_bias = check_bool(has_bias) self.thor = True if isinstance(weight_init, Tensor): - if weight_init.dim() != 2 or weight_init.shape()[0] != out_channels or \ - weight_init.shape()[1] != in_channels: + if weight_init.dim() != 2 or weight_init.shape[0] != out_channels or \ + weight_init.shape[1] != in_channels: raise ValueError("weight_init shape error") self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight") if self.has_bias: if isinstance(bias_init, Tensor): - if bias_init.dim() != 1 or bias_init.shape()[0] != out_channels: + if bias_init.dim() != 1 or bias_init.shape[0] != out_channels: raise ValueError("bias_init shape error") self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias") diff --git a/example/resnet50_imagenet2012_THOR/train.py b/example/resnet50_imagenet2012_THOR/train.py index 881f3cf598..309018da57 100644 --- a/example/resnet50_imagenet2012_THOR/train.py +++ b/example/resnet50_imagenet2012_THOR/train.py @@ -17,6 +17,8 @@ import argparse import os import random +import numpy as np + from mindspore import Tensor from mindspore import context from mindspore.communication.management import init @@ -28,7 +30,6 @@ from model.model_thor import Model from model.resnet import resnet50 from model.thor import THOR -import numpy as np from config import config from crossentropy import CrossEntropy from dataset_imagenet import create_dataset diff --git a/example/ssd_coco2017/README.md b/example/ssd_coco2017/README.md deleted file mode 100644 index bd43344b8b..0000000000 --- a/example/ssd_coco2017/README.md +++ /dev/null @@ -1,88 +0,0 @@ -# SSD Example - -## Description - -SSD network based on MobileNetV2, with support for training and evaluation. - -## Requirements - -- Install [MindSpore](https://www.mindspore.cn/install/en). - -- Dataset - - We use coco2017 as training dataset in this example by default, and you can also use your own datasets. - - 1. If coco dataset is used. **Select dataset to coco when run script.** - Install Cython and pycocotool. - - ``` - pip install Cython - - pip install pycocotools - ``` - And change the COCO_ROOT and other settings you need in `config.py`. The directory structure is as follows: - - - ``` - └─coco2017 - ├── annotations # annotation jsons - ├── train2017 # train dataset - └── val2017 # infer dataset - ``` - - 2. If your own dataset is used. **Select dataset to other when run script.** - Organize the dataset infomation into a TXT file, each row in the file is as follows: - - ``` - train2017/0000001.jpg 0,259,401,459,7 35,28,324,201,2 0,30,59,80,2 - ``` - - Each row is an image annotation which split by space, the first column is a relative path of image, the others are box and class infomations of the format [xmin,ymin,xmax,ymax,class]. We read image from an image path joined by the `IMAGE_DIR`(dataset directory) and the relative path in `ANNO_PATH`(the TXT file path), `IMAGE_DIR` and `ANNO_PATH` are setting in `config.py`. - - -## Running the example - -### Training - -To train the model, run `train.py`. If the `MINDRECORD_DIR` is empty, it will generate [mindrecord](https://www.mindspore.cn/tutorial/en/master/use/data_preparation/converting_datasets.html) files by `COCO_ROOT`(coco dataset) or `IMAGE_DIR` and `ANNO_PATH`(own dataset). **Note if MINDRECORD_DIR isn't empty, it will use MINDRECORD_DIR instead of raw images.** - - -- Stand alone mode - - ``` - python train.py --dataset coco - - ``` - - You can run ```python train.py -h``` to get more information. - - -- Distribute mode - - ``` - sh run_distribute_train.sh 8 150 coco /data/hccl.json - ``` - - The input parameters are device numbers, epoch size, dataset mode and [hccl json configuration file](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html). **It is better to use absolute path.** - -You will get the loss value of each step as following: - -``` -epoch: 1 step: 455, loss is 5.8653416 -epoch: 2 step: 455, loss is 5.4292373 -epoch: 3 step: 455, loss is 5.458992 -... -epoch: 148 step: 455, loss is 1.8340507 -epoch: 149 step: 455, loss is 2.0876894 -epoch: 150 step: 455, loss is 2.239692 -``` - -### Evaluation - -for evaluation , run `eval.py` with `ckpt_path`. `ckpt_path` is the path of [checkpoint](https://www.mindspore.cn/tutorial/en/master/use/saving_and_loading_model_parameters.html) file. - -``` -python eval.py --ckpt_path ssd.ckpt --dataset coco -``` - -You can run ```python eval.py -h``` to get more information. diff --git a/example/ssd_coco2017/config.py b/example/ssd_coco2017/config.py deleted file mode 100644 index 452aaf9700..0000000000 --- a/example/ssd_coco2017/config.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -"""Config parameters for SSD models.""" - - -class ConfigSSD: - """ - Config parameters for SSD. - - Examples: - ConfigSSD(). - """ - IMG_SHAPE = [300, 300] - NUM_SSD_BOXES = 1917 - NEG_PRE_POSITIVE = 3 - MATCH_THRESHOLD = 0.5 - - NUM_DEFAULT = [3, 6, 6, 6, 6, 6] - EXTRAS_IN_CHANNELS = [256, 576, 1280, 512, 256, 256] - EXTRAS_OUT_CHANNELS = [576, 1280, 512, 256, 256, 128] - EXTRAS_STRIDES = [1, 1, 2, 2, 2, 2] - EXTRAS_RATIO = [0.2, 0.2, 0.2, 0.25, 0.5, 0.25] - FEATURE_SIZE = [19, 10, 5, 3, 2, 1] - SCALES = [21, 45, 99, 153, 207, 261, 315] - ASPECT_RATIOS = [(1,), (2, 3), (2, 3), (2, 3), (2, 3), (2, 3)] - STEPS = (16, 32, 64, 100, 150, 300) - PRIOR_SCALING = (0.1, 0.2) - - - # `MINDRECORD_DIR` and `COCO_ROOT` are better to use absolute path. - MINDRECORD_DIR = "MindRecord_COCO" - COCO_ROOT = "coco2017" - TRAIN_DATA_TYPE = "train2017" - VAL_DATA_TYPE = "val2017" - INSTANCES_SET = "annotations/instances_{}.json" - COCO_CLASSES = ('background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', - 'train', 'truck', 'boat', 'traffic light', 'fire', 'hydrant', - 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', - 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', - 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', - 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', - 'kite', 'baseball bat', 'baseball glove', 'skateboard', - 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', - 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', - 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', - 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', - 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', - 'keyboard', 'cell phone', 'microwave oven', 'toaster', 'sink', - 'refrigerator', 'book', 'clock', 'vase', 'scissors', - 'teddy bear', 'hair drier', 'toothbrush') - NUM_CLASSES = len(COCO_CLASSES) diff --git a/example/ssd_coco2017/dataset.py b/example/ssd_coco2017/dataset.py deleted file mode 100644 index b88b22c862..0000000000 --- a/example/ssd_coco2017/dataset.py +++ /dev/null @@ -1,375 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -"""SSD dataset""" -from __future__ import division - -import os -import math -import itertools as it -import numpy as np -import cv2 - -import mindspore.dataset as de -import mindspore.dataset.transforms.vision.c_transforms as C -from mindspore.mindrecord import FileWriter -from config import ConfigSSD - -config = ConfigSSD() - -class GeneratDefaultBoxes(): - """ - Generate Default boxes for SSD, follows the order of (W, H, archor_sizes). - `self.default_boxes` has a shape of [archor_sizes, H, W, 4], the last dimension is [x, y, w, h]. - `self.default_boxes_ltrb` has a shape as `self.default_boxes`, the last dimension is [x1, y1, x2, y2]. - """ - def __init__(self): - fk = config.IMG_SHAPE[0] / np.array(config.STEPS) - self.default_boxes = [] - for idex, feature_size in enumerate(config.FEATURE_SIZE): - sk1 = config.SCALES[idex] / config.IMG_SHAPE[0] - sk2 = config.SCALES[idex + 1] / config.IMG_SHAPE[0] - sk3 = math.sqrt(sk1 * sk2) - - if config.NUM_DEFAULT[idex] == 3: - all_sizes = [(0.5, 1.0), (1.0, 1.0), (1.0, 0.5)] - else: - all_sizes = [(sk1, sk1), (sk3, sk3)] - for aspect_ratio in config.ASPECT_RATIOS[idex]: - w, h = sk1 * math.sqrt(aspect_ratio), sk1 / math.sqrt(aspect_ratio) - all_sizes.append((w, h)) - all_sizes.append((h, w)) - - assert len(all_sizes) == config.NUM_DEFAULT[idex] - - for i, j in it.product(range(feature_size), repeat=2): - for w, h in all_sizes: - cx, cy = (j + 0.5) / fk[idex], (i + 0.5) / fk[idex] - box = [np.clip(k, 0, 1) for k in (cx, cy, w, h)] - self.default_boxes.append(box) - - def to_ltrb(cx, cy, w, h): - return cx - w / 2, cy - h / 2, cx + w / 2, cy + h / 2 - - # For IoU calculation - self.default_boxes_ltrb = np.array(tuple(to_ltrb(*i) for i in self.default_boxes), dtype='float32') - self.default_boxes = np.array(self.default_boxes, dtype='float32') - - -default_boxes_ltrb = GeneratDefaultBoxes().default_boxes_ltrb -default_boxes = GeneratDefaultBoxes().default_boxes -x1, y1, x2, y2 = np.split(default_boxes_ltrb[:, :4], 4, axis=-1) -vol_anchors = (x2 - x1) * (y2 - y1) -matching_threshold = config.MATCH_THRESHOLD - - -def ssd_bboxes_encode(boxes): - """ - Labels anchors with ground truth inputs. - - Args: - boxex: ground truth with shape [N, 5], for each row, it stores [x, y, w, h, cls]. - - Returns: - gt_loc: location ground truth with shape [num_anchors, 4]. - gt_label: class ground truth with shape [num_anchors, 1]. - num_matched_boxes: number of positives in an image. - """ - - def jaccard_with_anchors(bbox): - """Compute jaccard score a box and the anchors.""" - # Intersection bbox and volume. - xmin = np.maximum(x1, bbox[0]) - ymin = np.maximum(y1, bbox[1]) - xmax = np.minimum(x2, bbox[2]) - ymax = np.minimum(y2, bbox[3]) - w = np.maximum(xmax - xmin, 0.) - h = np.maximum(ymax - ymin, 0.) - - # Volumes. - inter_vol = h * w - union_vol = vol_anchors + (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) - inter_vol - jaccard = inter_vol / union_vol - return np.squeeze(jaccard) - - pre_scores = np.zeros((config.NUM_SSD_BOXES), dtype=np.float32) - t_boxes = np.zeros((config.NUM_SSD_BOXES, 4), dtype=np.float32) - t_label = np.zeros((config.NUM_SSD_BOXES), dtype=np.int64) - for bbox in boxes: - label = int(bbox[4]) - scores = jaccard_with_anchors(bbox) - mask = (scores > matching_threshold) - if not np.any(mask): - mask[np.argmax(scores)] = True - - mask = mask & (scores > pre_scores) - pre_scores = np.maximum(pre_scores, scores) - t_label = mask * label + (1 - mask) * t_label - for i in range(4): - t_boxes[:, i] = mask * bbox[i] + (1 - mask) * t_boxes[:, i] - - index = np.nonzero(t_label) - - # Transform to ltrb. - bboxes = np.zeros((config.NUM_SSD_BOXES, 4), dtype=np.float32) - bboxes[:, [0, 1]] = (t_boxes[:, [0, 1]] + t_boxes[:, [2, 3]]) / 2 - bboxes[:, [2, 3]] = t_boxes[:, [2, 3]] - t_boxes[:, [0, 1]] - - # Encode features. - bboxes_t = bboxes[index] - default_boxes_t = default_boxes[index] - bboxes_t[:, :2] = (bboxes_t[:, :2] - default_boxes_t[:, :2]) / (default_boxes_t[:, 2:] * config.PRIOR_SCALING[0]) - bboxes_t[:, 2:4] = np.log(bboxes_t[:, 2:4] / default_boxes_t[:, 2:4]) / config.PRIOR_SCALING[1] - bboxes[index] = bboxes_t - - num_match_num = np.array([len(np.nonzero(t_label)[0])], dtype=np.int32) - return bboxes, t_label.astype(np.int32), num_match_num - -def ssd_bboxes_decode(boxes, index): - """Decode predict boxes to [x, y, w, h]""" - boxes_t = boxes[index] - default_boxes_t = default_boxes[index] - boxes_t[:, :2] = boxes_t[:, :2] * config.PRIOR_SCALING[0] * default_boxes_t[:, 2:] + default_boxes_t[:, :2] - boxes_t[:, 2:4] = np.exp(boxes_t[:, 2:4] * config.PRIOR_SCALING[1]) * default_boxes_t[:, 2:4] - - bboxes = np.zeros((len(boxes_t), 4), dtype=np.float32) - - bboxes[:, [0, 1]] = boxes_t[:, [0, 1]] - boxes_t[:, [2, 3]] / 2 - bboxes[:, [2, 3]] = boxes_t[:, [0, 1]] + boxes_t[:, [2, 3]] / 2 - - return bboxes - -def preprocess_fn(image, box, is_training): - """Preprocess function for dataset.""" - - def _rand(a=0., b=1.): - """Generate random.""" - return np.random.rand() * (b - a) + a - - def _infer_data(image, input_shape, box): - img_h, img_w, _ = image.shape - input_h, input_w = input_shape - - scale = min(float(input_w) / float(img_w), float(input_h) / float(img_h)) - nw = int(img_w * scale) - nh = int(img_h * scale) - - image = cv2.resize(image, (nw, nh)) - - new_image = np.zeros((input_h, input_w, 3), np.float32) - dh = (input_h - nh) // 2 - dw = (input_w - nw) // 2 - new_image[dh: (nh + dh), dw: (nw + dw), :] = image - image = new_image - - #When the channels of image is 1 - if len(image.shape) == 2: - image = np.expand_dims(image, axis=-1) - image = np.concatenate([image, image, image], axis=-1) - - box = box.astype(np.float32) - - box[:, [0, 2]] = (box[:, [0, 2]] * scale + dw) / input_w - box[:, [1, 3]] = (box[:, [1, 3]] * scale + dh) / input_h - return image, np.array((img_h, img_w), np.float32), box - - def _data_aug(image, box, is_training, image_size=(300, 300)): - """Data augmentation function.""" - ih, iw, _ = image.shape - w, h = image_size - - if not is_training: - return _infer_data(image, image_size, box) - # Random settings - scale_w = _rand(0.75, 1.25) - scale_h = _rand(0.75, 1.25) - - flip = _rand() < .5 - nw = iw * scale_w - nh = ih * scale_h - scale = min(w / nw, h / nh) - nw = int(scale * nw) - nh = int(scale * nh) - - # Resize image - image = cv2.resize(image, (nw, nh)) - - # place image - new_image = np.zeros((h, w, 3), dtype=np.float32) - dw = (w - nw) // 2 - dh = (h - nh) // 2 - new_image[dh:dh + nh, dw:dw + nw, :] = image - image = new_image - - # Flip image or not - if flip: - image = cv2.flip(image, 1, dst=None) - - # Convert image to gray or not - gray = _rand() < .25 - if gray: - image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) - - # When the channels of image is 1 - if len(image.shape) == 2: - image = np.expand_dims(image, axis=-1) - image = np.concatenate([image, image, image], axis=-1) - - box = box.astype(np.float32) - - # Transform box with shape[x1, y1, x2, y2]. - box[:, [0, 2]] = (box[:, [0, 2]] * scale * scale_w + dw) / w - box[:, [1, 3]] = (box[:, [1, 3]] * scale * scale_h + dh) / h - - if flip: - box[:, [0, 2]] = 1 - box[:, [2, 0]] - - box, label, num_match_num = ssd_bboxes_encode(box) - return image, box, label, num_match_num - return _data_aug(image, box, is_training, image_size=config.IMG_SHAPE) - - -def create_coco_label(is_training): - """Get image path and annotation from COCO.""" - from pycocotools.coco import COCO - - coco_root = config.COCO_ROOT - data_type = config.VAL_DATA_TYPE - if is_training: - data_type = config.TRAIN_DATA_TYPE - - #Classes need to train or test. - train_cls = config.COCO_CLASSES - train_cls_dict = {} - for i, cls in enumerate(train_cls): - train_cls_dict[cls] = i - - anno_json = os.path.join(coco_root, config.INSTANCES_SET.format(data_type)) - - coco = COCO(anno_json) - classs_dict = {} - cat_ids = coco.loadCats(coco.getCatIds()) - for cat in cat_ids: - classs_dict[cat["id"]] = cat["name"] - - image_ids = coco.getImgIds() - image_files = [] - image_anno_dict = {} - - for img_id in image_ids: - image_info = coco.loadImgs(img_id) - file_name = image_info[0]["file_name"] - anno_ids = coco.getAnnIds(imgIds=img_id, iscrowd=None) - anno = coco.loadAnns(anno_ids) - image_path = os.path.join(coco_root, data_type, file_name) - annos = [] - for label in anno: - bbox = label["bbox"] - class_name = classs_dict[label["category_id"]] - if class_name in train_cls: - x_min, x_max = bbox[0], bbox[0] + bbox[2] - y_min, y_max = bbox[1], bbox[1] + bbox[3] - annos.append(list(map(round, [x_min, y_min, x_max, y_max])) + [train_cls_dict[class_name]]) - if len(annos) >= 1: - image_files.append(image_path) - image_anno_dict[image_path] = np.array(annos) - return image_files, image_anno_dict - - -def anno_parser(annos_str): - """Parse annotation from string to list.""" - annos = [] - for anno_str in annos_str: - anno = list(map(int, anno_str.strip().split(','))) - annos.append(anno) - return annos - - -def filter_valid_data(image_dir, anno_path): - """Filter valid image file, which both in image_dir and anno_path.""" - image_files = [] - image_anno_dict = {} - if not os.path.isdir(image_dir): - raise RuntimeError("Path given is not valid.") - if not os.path.isfile(anno_path): - raise RuntimeError("Annotation file is not valid.") - - with open(anno_path, "rb") as f: - lines = f.readlines() - for line in lines: - line_str = line.decode("utf-8").strip() - line_split = str(line_str).split(' ') - file_name = line_split[0] - image_path = os.path.join(image_dir, file_name) - if os.path.isfile(image_path): - image_anno_dict[image_path] = anno_parser(line_split[1:]) - image_files.append(image_path) - return image_files, image_anno_dict - - -def data_to_mindrecord_byte_image(dataset="coco", is_training=True, prefix="ssd.mindrecord", file_num=8): - """Create MindRecord file.""" - mindrecord_dir = config.MINDRECORD_DIR - mindrecord_path = os.path.join(mindrecord_dir, prefix) - writer = FileWriter(mindrecord_path, file_num) - if dataset == "coco": - image_files, image_anno_dict = create_coco_label(is_training) - else: - image_files, image_anno_dict = filter_valid_data(config.IMAGE_DIR, config.ANNO_PATH) - - ssd_json = { - "image": {"type": "bytes"}, - "annotation": {"type": "int32", "shape": [-1, 5]}, - } - writer.add_schema(ssd_json, "ssd_json") - - for image_name in image_files: - with open(image_name, 'rb') as f: - img = f.read() - annos = np.array(image_anno_dict[image_name], dtype=np.int32) - row = {"image": img, "annotation": annos} - writer.write_raw_data([row]) - writer.commit() - - -def create_ssd_dataset(mindrecord_file, batch_size=32, repeat_num=10, device_num=1, rank=0, - is_training=True, num_parallel_workers=4): - """Creatr SSD dataset with MindDataset.""" - ds = de.MindDataset(mindrecord_file, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank, - num_parallel_workers=num_parallel_workers, shuffle=is_training) - decode = C.Decode() - ds = ds.map(input_columns=["image"], operations=decode) - compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training)) - - if is_training: - hwc_to_chw = C.HWC2CHW() - ds = ds.map(input_columns=["image", "annotation"], - output_columns=["image", "box", "label", "num_match_num"], - columns_order=["image", "box", "label", "num_match_num"], - operations=compose_map_func, python_multiprocessing=True, num_parallel_workers=num_parallel_workers) - ds = ds.map(input_columns=["image"], operations=hwc_to_chw, python_multiprocessing=True, - num_parallel_workers=num_parallel_workers) - ds = ds.batch(batch_size, drop_remainder=True) - ds = ds.repeat(repeat_num) - else: - hwc_to_chw = C.HWC2CHW() - ds = ds.map(input_columns=["image", "annotation"], - output_columns=["image", "image_shape", "annotation"], - columns_order=["image", "image_shape", "annotation"], - operations=compose_map_func) - ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=num_parallel_workers) - ds = ds.batch(batch_size, drop_remainder=True) - ds = ds.repeat(repeat_num) - return ds diff --git a/example/ssd_coco2017/util.py b/example/ssd_coco2017/util.py deleted file mode 100644 index 6e10285375..0000000000 --- a/example/ssd_coco2017/util.py +++ /dev/null @@ -1,206 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""metrics utils""" - -import numpy as np -from config import ConfigSSD -from dataset import ssd_bboxes_decode - - -def calc_iou(bbox_pred, bbox_ground): - """Calculate iou of predicted bbox and ground truth.""" - bbox_pred = np.expand_dims(bbox_pred, axis=0) - - pred_w = bbox_pred[:, 2] - bbox_pred[:, 0] - pred_h = bbox_pred[:, 3] - bbox_pred[:, 1] - pred_area = pred_w * pred_h - - gt_w = bbox_ground[:, 2] - bbox_ground[:, 0] - gt_h = bbox_ground[:, 3] - bbox_ground[:, 1] - gt_area = gt_w * gt_h - - iw = np.minimum(bbox_pred[:, 2], bbox_ground[:, 2]) - np.maximum(bbox_pred[:, 0], bbox_ground[:, 0]) - ih = np.minimum(bbox_pred[:, 3], bbox_ground[:, 3]) - np.maximum(bbox_pred[:, 1], bbox_ground[:, 1]) - - iw = np.maximum(iw, 0) - ih = np.maximum(ih, 0) - intersection_area = iw * ih - - union_area = pred_area + gt_area - intersection_area - union_area = np.maximum(union_area, np.finfo(float).eps) - - iou = intersection_area * 1. / union_area - return iou - - -def apply_nms(all_boxes, all_scores, thres, max_boxes): - """Apply NMS to bboxes.""" - x1 = all_boxes[:, 0] - y1 = all_boxes[:, 1] - x2 = all_boxes[:, 2] - y2 = all_boxes[:, 3] - areas = (x2 - x1 + 1) * (y2 - y1 + 1) - - order = all_scores.argsort()[::-1] - keep = [] - - while order.size > 0: - i = order[0] - keep.append(i) - - if len(keep) >= max_boxes: - break - - xx1 = np.maximum(x1[i], x1[order[1:]]) - yy1 = np.maximum(y1[i], y1[order[1:]]) - xx2 = np.minimum(x2[i], x2[order[1:]]) - yy2 = np.minimum(y2[i], y2[order[1:]]) - - w = np.maximum(0.0, xx2 - xx1 + 1) - h = np.maximum(0.0, yy2 - yy1 + 1) - inter = w * h - - ovr = inter / (areas[i] + areas[order[1:]] - inter) - - inds = np.where(ovr <= thres)[0] - - order = order[inds + 1] - return keep - - -def calc_ap(recall, precision): - """Calculate AP.""" - correct_recall = np.concatenate(([0.], recall, [1.])) - correct_precision = np.concatenate(([0.], precision, [0.])) - - for i in range(correct_recall.size - 1, 0, -1): - correct_precision[i - 1] = np.maximum(correct_precision[i - 1], correct_precision[i]) - - i = np.where(correct_recall[1:] != correct_recall[:-1])[0] - - ap = np.sum((correct_recall[i + 1] - correct_recall[i]) * correct_precision[i + 1]) - - return ap - -def metrics(pred_data): - """Calculate mAP of predicted bboxes.""" - config = ConfigSSD() - num_classes = config.NUM_CLASSES - - all_detections = [None for i in range(num_classes)] - all_pred_scores = [None for i in range(num_classes)] - all_annotations = [None for i in range(num_classes)] - average_precisions = {} - num = [0 for i in range(num_classes)] - accurate_num = [0 for i in range(num_classes)] - - for sample in pred_data: - pred_boxes = sample['boxes'] - boxes_scores = sample['box_scores'] - annotation = sample['annotation'] - - annotation = np.squeeze(annotation, axis=0) - - pred_labels = np.argmax(boxes_scores, axis=-1) - index = np.nonzero(pred_labels) - pred_boxes = ssd_bboxes_decode(pred_boxes, index) - - pred_boxes = pred_boxes.clip(0, 1) - boxes_scores = np.max(boxes_scores, axis=-1) - boxes_scores = boxes_scores[index] - pred_labels = pred_labels[index] - - top_k = 50 - - for c in range(1, num_classes): - if len(pred_labels) >= 1: - class_box_scores = boxes_scores[pred_labels == c] - class_boxes = pred_boxes[pred_labels == c] - - nms_index = apply_nms(class_boxes, class_box_scores, config.MATCH_THRESHOLD, top_k) - - class_boxes = class_boxes[nms_index] - class_box_scores = class_box_scores[nms_index] - - cmask = class_box_scores > 0.5 - class_boxes = class_boxes[cmask] - class_box_scores = class_box_scores[cmask] - - all_detections[c] = class_boxes - all_pred_scores[c] = class_box_scores - - for c in range(1, num_classes): - if len(annotation) >= 1: - all_annotations[c] = annotation[annotation[:, 4] == c, :4] - - for c in range(1, num_classes): - false_positives = np.zeros((0,)) - true_positives = np.zeros((0,)) - scores = np.zeros((0,)) - num_annotations = 0.0 - - annotations = all_annotations[c] - num_annotations += annotations.shape[0] - detections = all_detections[c] - pred_scores = all_pred_scores[c] - - for index, detection in enumerate(detections): - scores = np.append(scores, pred_scores[index]) - if len(annotations) >= 1: - IoUs = calc_iou(detection, annotations) - assigned_anno = np.argmax(IoUs) - max_overlap = IoUs[assigned_anno] - - if max_overlap >= 0.5: - false_positives = np.append(false_positives, 0) - true_positives = np.append(true_positives, 1) - else: - false_positives = np.append(false_positives, 1) - true_positives = np.append(true_positives, 0) - else: - false_positives = np.append(false_positives, 1) - true_positives = np.append(true_positives, 0) - - if num_annotations == 0: - if c not in average_precisions.keys(): - average_precisions[c] = 0 - continue - accurate_num[c] = 1 - indices = np.argsort(-scores) - false_positives = false_positives[indices] - true_positives = true_positives[indices] - - false_positives = np.cumsum(false_positives) - true_positives = np.cumsum(true_positives) - - recall = true_positives * 1. / num_annotations - precision = true_positives * 1. / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps) - - average_precision = calc_ap(recall, precision) - - if c not in average_precisions.keys(): - average_precisions[c] = average_precision - else: - average_precisions[c] += average_precision - - num[c] += 1 - - count = 0 - for key in average_precisions: - if num[key] != 0: - count += (average_precisions[key] / num[key]) - - mAP = count * 1. / accurate_num.count(1) - return mAP diff --git a/graphengine b/graphengine index c27e428e96..8891f0546c 160000 --- a/graphengine +++ b/graphengine @@ -1 +1 @@ -Subproject commit c27e428e9698dd4f9b198008596676bc2d1b49aa +Subproject commit 8891f0546c4a250095ff68e1262f58772b938fd9 diff --git a/include/inference.h b/include/inference.h new file mode 100644 index 0000000000..7e5ee27d49 --- /dev/null +++ b/include/inference.h @@ -0,0 +1,44 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_INCLUDE_MS_SESSION_H +#define MINDSPORE_INCLUDE_MS_SESSION_H + +#include +#include +#include +#include "include/ms_tensor.h" + +namespace mindspore { +class FuncGraph; +namespace inference { +class MS_API MSSession { + public: + MSSession() = default; + + static std::shared_ptr CreateSession(const std::string &device, uint32_t device_id); + + virtual uint32_t CompileGraph(std::shared_ptr funcGraphPtr) = 0; + + virtual MultiTensor RunGraph(uint32_t graph_id, const std::vector> &inputs) = 0; +}; + +std::shared_ptr MS_API LoadModel(const char *model_buf, size_t size, const std::string &device); + +void MS_API ExitInference(); +} // namespace inference +} // namespace mindspore +#endif // MINDSPORE_INCLUDE_MS_SESSION_H diff --git a/include/ms_tensor.h b/include/ms_tensor.h new file mode 100644 index 0000000000..1f9661df5e --- /dev/null +++ b/include/ms_tensor.h @@ -0,0 +1,69 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_INCLUDE_MS_TENSOR_H_ +#define MINDSPORE_INCLUDE_MS_TENSOR_H_ + +#include +#include +#include +#include "ir/dtype/type_id.h" + +namespace mindspore { +#define MS_API __attribute__((visibility("default"))) +namespace inference { +class MS_API MSTensor { + public: + MSTensor() = default; + // brief Create a MSTensor pointer. + // + // param data_type DataTypeId of tensor to be created. + // param shape Shape of tensor to be created. + // return MSTensor pointer. + static MSTensor *CreateTensor(TypeId data_type, const std::vector &shape); + + ~MSTensor() = default; + + virtual TypeId data_type() const = 0; + + virtual TypeId set_data_type(const TypeId data_type) = 0; + + virtual std::vector shape() const = 0; + + virtual size_t set_shape(const std::vector &shape) = 0; + + virtual int DimensionSize(size_t index) const = 0; + // brief Get number of element in MSTensor. + // + // return Number of element in MSTensor. + virtual int ElementsNum() const = 0; + + virtual std::size_t hash() const = 0; + // brief Get byte size of data in MSTensor. + // + // return Byte size of data in MSTensor. + virtual size_t Size() const = 0; + // brief Get pointer of data in MSTensor. + // + // The data pointer can be used to both write or read data in MSTensor. + // + // return A pointer points to data in MSTensor. + virtual void *MutableData() const = 0; +}; +using MultiTensor = std::vector>; +} // namespace inference +} // namespace mindspore +#endif // MINDSPORE_INCLUDE_MS_TENSOR_H_ diff --git a/mindspore/_akg/gpu/__init__.py b/mindspore/_akg/gpu/__init__.py index f9db48c634..4c11499594 100644 --- a/mindspore/_akg/gpu/__init__.py +++ b/mindspore/_akg/gpu/__init__.py @@ -35,3 +35,5 @@ from .logical_not import LogicalNot, gpu_schedule_LogicalNot from .logical_and import LogicalAnd, gpu_schedule_LogicalAnd from .sub import Sub, gpu_schedule_Sub from .less_equal import LessEqual, gpu_schedule_LessEqual +from .notequal import NotEqual, gpu_schedule_NotEqual +from .greater_equal import GreaterEqual, gpu_schedule_GreaterEqual diff --git a/mindspore/_akg/gpu/greater_equal.py b/mindspore/_akg/gpu/greater_equal.py new file mode 100644 index 0000000000..0212cac03c --- /dev/null +++ b/mindspore/_akg/gpu/greater_equal.py @@ -0,0 +1,41 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""greater_equal""" +import _akg.tvm +from _akg.ops.math import greater_equal +from _akg.topi.generic import schedule_elemwise + +def GreaterEqual(x, y): + """GreaterEqual.""" + return greater_equal.greater_equal(x, y) + + +def gpu_schedule_GreaterEqual(outs): + """ + GPU schedule for GreaterEqual. + + Args: + outs (tvm.tensor.Tensor): Outputs of compute. + + Returns: + sch (schedule.Schedule): The created schedule. + """ + device = 'cuda' + ctx = _akg.tvm.context(device, 0) + if not ctx.exist: + raise SystemError("Skip because %s is not enabled" % device) + with _akg.tvm.target.create(device): + sch = schedule_elemwise(outs) + return sch diff --git a/mindspore/_akg/gpu/notequal.py b/mindspore/_akg/gpu/notequal.py new file mode 100644 index 0000000000..3e3a6561a1 --- /dev/null +++ b/mindspore/_akg/gpu/notequal.py @@ -0,0 +1,41 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""notequal""" +import _akg.tvm +from _akg.ops.math import notequal +from _akg.topi.generic import schedule_elemwise + +def NotEqual(x, y): + """notequal.""" + return notequal.notequal(x, y) + + +def gpu_schedule_NotEqual(outs): + """ + gpu schedule for NotEqual. + + Args: + outs (tvm.tensor.Tensor): outputs of compute. + + Returns: + sch (schedule.Schedule): The created schedule. + """ + device = 'cuda' + ctx = _akg.tvm.context(device, 0) + if not ctx.exist: + raise SystemError("Skip because %s is not enabled" % device) + with _akg.tvm.target.create(device): + sch = schedule_elemwise(outs) + return sch diff --git a/mindspore/_akg/ops/math/greater_equal.py b/mindspore/_akg/ops/math/greater_equal.py new file mode 100644 index 0000000000..00ad016643 --- /dev/null +++ b/mindspore/_akg/ops/math/greater_equal.py @@ -0,0 +1,54 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""operator dsl function: greaterequal""" +import _akg.tvm +import _akg.topi +from _akg.utils.dsl_create import produce_shapes +from _akg.utils import validation_check as vc_util + + +@vc_util.check_input_type(_akg.tvm.tensor.Tensor, _akg.tvm.tensor.Tensor) +def greater_equal(input1, input2): + """ + Check whether input1 greaterquals to input2. + + Args: + input1 (tvm.tensor.Tensor): Tensor. + input2 (tvm.tensor.Tensor): Tensor. + + Returns: + tvm.tensor.Tensor. If input1 greaterquals to input2 return True, else return False. + """ + shape1 = [x.value for x in input1.shape] + shape2 = [x.value for x in input2.shape] + vc_util.check_shape(shape1) + vc_util.check_shape(shape2) + + shape1, shape2, shape = produce_shapes(shape1, shape2) + + vc_util.elemwise_dtype_check(input1.dtype, input2.dtype) + dtype = input1.dtype + + # get greaterquals compute + t_value = _akg.tvm.compute(shape, lambda *indice: _akg.tvm.const(1, dtype), "T") + f_value = _akg.tvm.compute(shape, lambda *indice: _akg.tvm.const(0, dtype), "F") + + input1_bro = _akg.topi.broadcast_to(input1, shape) + input2_bro = _akg.topi.broadcast_to(input2, shape) + c_out = _akg.tvm.compute(shape, lambda *indice: _akg.tvm.expr.Select(input1_bro[indice] >= input2_bro[indice], + t_value[indice], f_value[indice]), name="C") + res = _akg.tvm.compute(shape, lambda *indice: c_out(*indice).astype("bool"), name="res") + + return res diff --git a/mindspore/_akg/ops/math/notequal.py b/mindspore/_akg/ops/math/notequal.py new file mode 100644 index 0000000000..16d5e4a0f4 --- /dev/null +++ b/mindspore/_akg/ops/math/notequal.py @@ -0,0 +1,54 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""operator dsl function: notequal""" +import _akg.tvm +import _akg.topi +from _akg.utils.dsl_create import produce_shapes +from _akg.utils import validation_check as vc_util + + +@vc_util.check_input_type(_akg.tvm.tensor.Tensor, _akg.tvm.tensor.Tensor) +def notequal(input1, input2): + """ + check whether input1 notequals to input2. + + Args: + input1 (tvm.tensor.Tensor): Tensor. + input2 (tvm.tensor.Tensor): Tensor. + + Returns: + tvm.tensor.Tensor. If input1 notequal to input2 return True, else return False. + """ + shape1 = [x.value for x in input1.shape] + shape2 = [x.value for x in input2.shape] + vc_util.check_shape(shape1) + vc_util.check_shape(shape2) + + shape1, shape2, shape = produce_shapes(shape1, shape2) + + vc_util.elemwise_dtype_check(input1.dtype, input2.dtype) + dtype = input1.dtype + + # get notequal compute + t_value = _akg.tvm.compute(shape, lambda *indice: _akg.tvm.const(1, dtype), "T") + f_value = _akg.tvm.compute(shape, lambda *indice: _akg.tvm.const(0, dtype), "F") + + input1_bro = _akg.topi.broadcast_to(input1, shape) + input2_bro = _akg.topi.broadcast_to(input2, shape) + c_out = _akg.tvm.compute(shape, lambda *indice: _akg.tvm.expr.Select(input1_bro[indice] != input2_bro[indice], + t_value[indice], f_value[indice]), name="C") + res = _akg.tvm.compute(shape, lambda *indice: c_out(*indice).astype("bool"), name="res") + + return res diff --git a/mindspore/_extends/builtin_operations.py b/mindspore/_extends/builtin_operations.py index a423fe6395..6bd382c1b6 100644 --- a/mindspore/_extends/builtin_operations.py +++ b/mindspore/_extends/builtin_operations.py @@ -13,7 +13,6 @@ # limitations under the License. # ============================================================================ """builtin_operations""" -import functools import numpy as np from mindspore.common.tensor import Tensor from mindspore.common.dtype import dtype_to_nptype, get_py_obj_dtype @@ -114,6 +113,24 @@ def bool_or(x, y): """Implement `bool_or`.""" return x or y +def vm_compare(*args): + """Implement `vm_compare` for tensor.""" + obj_str = args[-1] + if obj_str == "shape": + fn = getattr(args[0].asnumpy(), obj_str) + return fn + if len(args) == 2: + fn = getattr(args[0].asnumpy(), obj_str) + return Tensor(fn()) + if isinstance(args[0], Tensor): + fn = getattr(args[0].asnumpy(), obj_str) + y = args[1].asnumpy() if isinstance(args[1], Tensor) else args[1] + else: + obj_str = "__r" + obj_str[2:] + fn = getattr(args[1].asnumpy(), obj_str) + y = args[0] + return Tensor(np.array(fn(y))) + def make_list(*xs): """Implement `make_list`.""" @@ -124,17 +141,8 @@ def list_len(x): """Implement `list_len`.""" return len(x) - -# only used in PyNative mode -def partial(*args): - """Implement `partial`.""" - func = args[0].__call__ - partial_func = functools.partial(func, *args[1:]) - return partial_func - - -# only used in PyNative mode -def depend(value, expr): +def Depend(value, expr): + """Implement `Depend`.""" return value # only used in PyNative mode diff --git a/mindspore/_extends/parallel_compile/akg_compiler/__init__.py b/mindspore/_extends/parallel_compile/akg_compiler/__init__.py new file mode 100644 index 0000000000..e30774307c --- /dev/null +++ b/mindspore/_extends/parallel_compile/akg_compiler/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ diff --git a/mindspore/_extends/parallel_compile/akg_compiler/compiler.py b/mindspore/_extends/parallel_compile/akg_compiler/compiler.py new file mode 100644 index 0000000000..de78aad7e4 --- /dev/null +++ b/mindspore/_extends/parallel_compile/akg_compiler/compiler.py @@ -0,0 +1,35 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Providing akg compile with json""" +import sys +def run_compiler(op_json): + """ + Run AKG compiler to compile op with subprocess, if this process of + compilation failed, an exception will be raised + + Args: + op_json (str): json string of the op + + Returns: + None + """ + p = __import__("akg", globals(), locals(), ['ms'], 0) + func = getattr(p.ms, "compilewithjson") + res = func(op_json) + if not res: + raise ValueError("Compile error") + +if __name__ == "__main__": + run_compiler(sys.argv[1]) diff --git a/mindspore/_extends/parallel_compile/akg_compiler/multi_process_compiler.py b/mindspore/_extends/parallel_compile/akg_compiler/multi_process_compiler.py new file mode 100644 index 0000000000..ffe9c85dc3 --- /dev/null +++ b/mindspore/_extends/parallel_compile/akg_compiler/multi_process_compiler.py @@ -0,0 +1,71 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Providing multi process compile with json""" +import os +import subprocess +import sys +from multiprocessing import Pool, cpu_count + + +def _compile_akg_task(*json_strs): + """ + compile func called in single process + + Parameters: + json_strs: list. List contains multiple kernel infos, suitable for json compile api. + """ + akg_compiler = os.path.join(os.path.split( + os.path.realpath(__file__))[0], "compiler.py") + for json_str in json_strs: + res = subprocess.run( + [sys.executable, akg_compiler, json_str], text=True) + if res.returncode != 0: + raise ValueError("Failed, args: {}!".format(json_str)) + + +def compile_akg_kernel_parallel(json_infos, process, waitime): + """ + compile kernel use multi processes + + Parameters: + json_infos: list. list contain kernel info(task id and json str) + process: int. processes num + waittime: int. max time the function blocked + + Returns: + True for all compile success, False for some failed. + """ + if not isinstance(json_infos, list): + raise ValueError("json_infos must be a list") + if not isinstance(process, int): + raise ValueError("process must be a num") + if not isinstance(waitime, int): + raise ValueError("waittime must be a num") + + if process == 0 and json_infos: + process = 1 + + cpu_proc_num = cpu_count() + max_proc_num = 16 + process = min([cpu_proc_num, max_proc_num, process]) + + args = [[] for _ in range(process)] + for p, info in enumerate(json_infos): + args[p % process].append(info) + + with Pool(processes=process) as pool: + res = pool.starmap_async(_compile_akg_task, args) + res.get(timeout=waitime) + return True diff --git a/mindspore/_extends/parallel_compile/multi_compiler.py b/mindspore/_extends/parallel_compile/multi_compiler.py deleted file mode 100644 index 86e1b684d2..0000000000 --- a/mindspore/_extends/parallel_compile/multi_compiler.py +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""Providing multi process compile with json""" -import json -import math -import os -import subprocess -import sys -from multiprocessing import Pool - - -def _compiletask(platform, *jsons): - """ - compile func called in single process - - Parameters: - platform: str. AKG platform or TBE platform - *jsons: str. json str contain kernel info, suitable for json compile - api - - """ - if platform == "AKG": - p = __import__("_akg", globals(), locals(), ['ms'], 0) - func = getattr(p.ms, "compilewithjson") - for json_item in jsons: - res = func(json_item) - if not res: - raise ValueError("Compile error") - if platform == "TBE": - tbe_compiler = os.path.join(os.path.split(os.path.realpath(__file__))[0], "tbe_compiler", "compiler.py") - for json_item in jsons: - res = subprocess.run([sys.executable, tbe_compiler], input=json_item, text=True) - if res.returncode != 0: - raise ValueError("Tbe compile error") - - -def compilekernelparallel(jsons, process, waitime): - """ - compile kernel use multi processes - - Parameters: - jsons: list. json str list contain kernel info - process: int. processes num - waittime: int. max time the function blocked - """ - if not isinstance(jsons, list): - raise ValueError("jsons must be a list") - if not isinstance(process, int): - raise ValueError("process must be a num") - if not isinstance(waitime, int): - raise ValueError("waittime must be a num") - - jsons_akg = [] - jsons_tbe = [] - for json_ in jsons: - j = json.loads(json_) - if j["platform"] == "TBE": - jsons_tbe.append(json_) - continue - if j["platform"] == "AKG": - jsons_akg.append(json_) - continue - raise RuntimeError( - "not support this platform {0}".format(j["platform"])) - if jsons_akg: - process_akg = math.floor(len(jsons)/len(jsons_akg)*process) - else: - process_akg = 0 - - if process_akg == 0 and jsons_akg: - process_akg = 1 - process_tbe = process-process_akg - if process_tbe == 0 and jsons_tbe: - process_tbe = 1 - raise RuntimeWarning("we add a process for compile more operator") - - args = [[] for _ in range(process_akg+process_tbe)] - args_lens = len(args) - for p in range(args_lens): - if p < process_tbe: - args[p].append("TBE") - else: - args[p].append("AKG") - jsons_tbe_lens = len(jsons_tbe) - for p in range(jsons_tbe_lens): - args[p % process_tbe].append(jsons_tbe[p]) - jsons_akg_lens = len(jsons_akg) - for p in range(jsons_akg_lens): - args[process-p % process_akg-1].append(jsons_akg[p]) - for p in range(args_lens): - args[p] = tuple(args[p]) - with Pool(processes=process) as pool: - res = pool.starmap_async(_compiletask, args) - res.get(timeout=waitime) - return True diff --git a/mindspore/_extends/parallel_compile/tbe_compiler/common.py b/mindspore/_extends/parallel_compile/tbe_compiler/common.py index 1aeba9889d..3d55cf60a2 100644 --- a/mindspore/_extends/parallel_compile/tbe_compiler/common.py +++ b/mindspore/_extends/parallel_compile/tbe_compiler/common.py @@ -15,13 +15,6 @@ """tbe common""" import json import os -from attrdict import AttrDict - -class ParamType(AttrDict): - Required = "required" - Dynamic = "dynamic" - Optional = "optional" - class TBEException(Exception): """tbe exception class""" @@ -112,7 +105,7 @@ def get_input_output(io_info, args): if len(item) > 1: arg.append(info) else: - if info['param_type'] == ParamType.Dynamic: + if info['param_type'] == 'dynamic': arg.append(info) args.append(arg) else: diff --git a/mindspore/_extends/parallel_compile/tbe_compiler/compiler.py b/mindspore/_extends/parallel_compile/tbe_compiler/compiler.py index c385f7dee0..a241bf9e10 100755 --- a/mindspore/_extends/parallel_compile/tbe_compiler/compiler.py +++ b/mindspore/_extends/parallel_compile/tbe_compiler/compiler.py @@ -28,7 +28,8 @@ build_in_impl_path = get_build_in_impl_path() # op function list op_build = "compile" op_pre_build = "pre_build" - +fusion_pattern_start_flag = "fusion_pattern_start" +fusion_pattern_end_flag = "fusion_pattern_end" def _initialize(impl_path): """Initialize""" @@ -42,7 +43,6 @@ def _initialize(impl_path): sys.path.insert(0, op_module_name) - def build_op(build_type, json_str): """ call op functions with function name and input args json_str @@ -108,7 +108,7 @@ def build_op(build_type, json_str): # pre build if build_type == op_pre_build: - op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name) + op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name) # disable only pattern configuration op_build_cfg_en() return get_op_pattern() @@ -159,11 +159,14 @@ def compile_with_json(json_str): json_info = json.loads(json_str) if "fusion_op" in json_info: ret = compile_fusion_op(json_str) + elif "compile_type" in json_info: + ret = build_op(op_pre_build, json_str) else: ret = build_op(op_build, json_str) return ret - if __name__ == "__main__": in_args = sys.stdin.readline() - compile_with_json(in_args) + result = compile_with_json(in_args) + sys.stdout.write(fusion_pattern_start_flag + str(result) + fusion_pattern_end_flag) + sys.stdout.flush() diff --git a/mindspore/_extends/parallel_compile/tbe_compiler/tbe_process.py b/mindspore/_extends/parallel_compile/tbe_compiler/tbe_process.py index 9a3846c4f9..80b50c45a9 100644 --- a/mindspore/_extends/parallel_compile/tbe_compiler/tbe_process.py +++ b/mindspore/_extends/parallel_compile/tbe_compiler/tbe_process.py @@ -75,7 +75,6 @@ def check_supported(op_json: str): return ret - def run_compiler(op_json): """ run compiler to compile op with subprocess @@ -88,15 +87,16 @@ def run_compiler(op_json): """ try: tbe_compiler = os.path.join(os.path.split(os.path.realpath(__file__))[0], "compiler.py") - subprocess.run([sys.executable, tbe_compiler], input=op_json, timeout=300, - text=True, capture_output=True, check=True) - return "Success", "Success" + completed_object = subprocess.run([sys.executable, tbe_compiler], input=op_json, timeout=300, + text=True, capture_output=True, check=True) + if completed_object: + out = completed_object.stdout + return "Success", out except subprocess.TimeoutExpired: tb = traceback.format_exc() - return "TBEException", "CompileTimeOut: " + tb + "\ninput_args: " + op_json + return "TBEException", "PreCompileTimeOut: " + tb + "\ninput_args: " + op_json except subprocess.CalledProcessError as e: - return "TBEException", "CompileProcessFailed:\n" + e.stdout + "\n" + e.stderr + "\ninput_args: " + op_json - + return "TBEException", "PreCompileProcessFailed:\n" + e.stdout + "\n" + e.stderr + "\ninput_args: " + op_json class CompilerPool: """compiler pool""" @@ -154,11 +154,11 @@ class CompilerPool: task_id, task_future = self.__running_tasks.pop(0) ret_type, result = task_future.get(330) if ret_type == "Success": - ret = task_id, "Success" + ret = task_id, "Success", result elif ret_type in ("Exception", "TBEException"): - ret = task_id, ret_type + ":" + result + ret = task_id, ret_type + ":" + result, "_" else: - ret = task_id, "Exception: Not support return type:" + str(ret_type) + ret = task_id, "Exception: Not support return type:" + str(ret_type), "_" return ret def reset_task_info(self): diff --git a/mindspore/_extends/parse/__init__.py b/mindspore/_extends/parse/__init__.py index 62ba2e5406..323932560a 100644 --- a/mindspore/_extends/parse/__init__.py +++ b/mindspore/_extends/parse/__init__.py @@ -19,14 +19,15 @@ Interfaces for parser module in c++. from .parser import (Parser, create_obj_instance, generate_scope, get_bprop_method_of_class, get_class_instance_type, get_class_member_namespace_symbol, create_slice_obj, - get_dataclass_attributes, get_dataclass_methods, + get_dataclass_attributes, get_dataclass_methods, get_obj_id, get_module_namespace, get_obj_type, get_object_key, - get_parse_method_of_class, get_scope_name, - is_class_member, parse_cb, resolve_symbol, create_ellipsis_obj) + get_default_input, get_parse_method_of_class, get_scope_name, + is_class_member, parse_cb, resolve_symbol) from .serialize import * __all__ = ['parse_cb', 'get_parse_method_of_class', 'get_bprop_method_of_class', 'resolve_symbol', - 'get_object_key', 'get_class_instance_type', 'is_class_member', 'get_obj_type', - 'create_obj_instance', 'get_module_namespace', 'get_class_member_namespace_symbol', - 'Parser', 'get_dataclass_attributes', 'get_dataclass_methods', 'dump_obj', 'load_obj', - 'get_dataclass_methods', 'get_scope_name', 'create_slice_obj', 'create_ellipsis_obj'] + 'get_object_key', 'get_default_input', 'get_class_instance_type', 'is_class_member', + 'get_obj_type', 'get_obj_id', 'create_obj_instance', 'get_module_namespace', + 'get_class_member_namespace_symbol', 'get_obj_id', 'Parser', 'get_dataclass_attributes', + 'get_dataclass_methods', 'dump_obj', 'load_obj', 'get_dataclass_methods', 'get_scope_name', + 'create_slice_obj'] diff --git a/mindspore/_extends/parse/parser.py b/mindspore/_extends/parse/parser.py index 462565fd7f..2a1c9e0943 100644 --- a/mindspore/_extends/parse/parser.py +++ b/mindspore/_extends/parse/parser.py @@ -29,7 +29,6 @@ from mindspore.common.dtype import pytype_to_dtype from mindspore.common.api import _MindSporeFunction from .namespace import CellNamespace, ClosureNamespace, ClassMemberNamespace from .resources import parse_object_map, convert_object_map, trope_ns, SYMBOL_UNDEFINE, NO_IMPLEMENT -from ..utils import Slice, Ellipsis_ # define return value RET_SUCCESS = 0 @@ -70,14 +69,9 @@ parse_expr_statement_white_list = ( "append", ) -def create_ellipsis_obj(): - """Create Slice object""" - return Ellipsis_() - - def create_slice_obj(start, end, step): - """Create Slice object""" - return Slice(start, end, step) + """Create slice object""" + return slice(start, end, step) def parse_cb(func, parse_method=None): @@ -209,6 +203,14 @@ def get_object_key(obj): obj_id = instance_id + obj_id return obj_id, obj_key +def get_default_input(obj): + if hasattr(obj, '__parameter__'): + return obj.default_input + if isinstance(obj, tuple): + convert = lambda x: x.default_input if hasattr(x, '__parameter__') else x + args = tuple(convert(x) for x in obj) + return args + return obj def is_class_member(node): """Check the attr is class member variable.""" @@ -221,6 +223,9 @@ def is_class_member(node): return True return False +def get_obj_id(obj): + """Get the obj id.""" + return str(id(obj)) def get_obj_type(obj): """Get the obj type.""" diff --git a/mindspore/_extends/parse/resources.py b/mindspore/_extends/parse/resources.py index 60847c4338..2ae8b7172f 100644 --- a/mindspore/_extends/parse/resources.py +++ b/mindspore/_extends/parse/resources.py @@ -126,7 +126,7 @@ convert_object_map = { T.make_list: F.make_list, T.make_slice: F.make_slice, T.range: F.make_range, - + T.while_cond: M.while_cond, # lib function math.floor: NO_IMPLEMENT, math.trunc: NO_IMPLEMENT, diff --git a/mindspore/_extends/parse/standard_method.py b/mindspore/_extends/parse/standard_method.py index 2c94240ba2..0f3f843b63 100644 --- a/mindspore/_extends/parse/standard_method.py +++ b/mindspore/_extends/parse/standard_method.py @@ -16,8 +16,10 @@ # ============================================================================ """standard_method""" from dataclasses import dataclass +from mindspore.common import dtype as mstype from ...ops import functional as F from ...ops import operations as P +from ...ops.primitive import constexpr from ...ops.composite import tail, core, MultitypeFuncGraph, env_get, hyper_add, \ zeros_like, ones_like from ...ops.composite.base import _append @@ -102,11 +104,44 @@ def bool_(x): return x.__bool__() -def tensor_bool(x): - """return immedate x, x is a tensor of bool value""" +def while_cond(x): + """For while condtion, if the condition is a tensor, the loop will not be unrolled""" + if F.issubclass_(F.typeof(x), F.typeof(mstype.tensor)): + is_cond = check_is_tensor_bool_cond(F.shape(x)) + if is_cond: + return F.cast(x, mstype.bool_) return x +@constexpr +def check_is_tensor_bool_cond(shp): + """check if tensor is a bool condition""" + if shp in ((), (1,)): + return True + raise ValueError("tensor as bool condition, its shape should be () or (1,), but got ", shp) + +@constexpr +def const_tensor_to_bool(x): + """convert bool tensor to bool condition""" + if x is None: + raise ValueError("Only constant tensor bool can be converted to bool") + x = x.asnumpy() + if x.shape not in ((), (1,)): + raise ValueError("Tensor to bool should input shape () or (1), but got ", x.shape) + if x.shape == (): + value = bool(x) + else: + value = bool(x[0]) + return value + +def tensor_bool(x): + """tensor as conditon, if is constant, return immediate bool value""" + is_cond = check_is_tensor_bool_cond(F.shape(x)) + if is_cond and F.isconstant(x): + return const_tensor_to_bool(x) + return F.cast(x, mstype.bool_) + + def and_(x, y): """Implementation of `and` (`&`).""" return x.__and__(y) diff --git a/mindspore/_extends/parse/trope.py b/mindspore/_extends/parse/trope.py index 7b40adcd16..f169c58fb9 100644 --- a/mindspore/_extends/parse/trope.py +++ b/mindspore/_extends/parse/trope.py @@ -91,3 +91,7 @@ def to_array(x): # pragma: no cover def not_contains(x): # pragma: no cover """Not in function.""" raise RuntimeError('This operation is not meant to be called directly.') + +def while_cond(x): # pragma: no cover + """Not in function.""" + raise RuntimeError('This operation is not meant to be called directly.') diff --git a/mindspore/_extends/utils.py b/mindspore/_extends/utils.py index fecbf546f5..8469ddda8b 100644 --- a/mindspore/_extends/utils.py +++ b/mindspore/_extends/utils.py @@ -19,7 +19,6 @@ import logging import os import inspect from functools import wraps -from dataclasses import dataclass def cal_sha256(file_path): @@ -100,20 +99,3 @@ def cell_attr_register(fn=None, attrs=None): if fn is not None: return wrap_cell(fn) return wrap_cell - - -@dataclass -class Slice: - """ - Slice class - """ - start: int - end: int - step: int - - -@dataclass -class Ellipsis_: - """ - Ellipsis class - """ diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt index 4184d29281..c435672bde 100644 --- a/mindspore/ccsrc/CMakeLists.txt +++ b/mindspore/ccsrc/CMakeLists.txt @@ -8,6 +8,10 @@ if (CMAKE_SYSTEM_NAME MATCHES "Windows") add_compile_definitions(BUILDING_DLL) endif() +if (ENABLE_MPI) + add_compile_definitions(ENABLE_MPI) +endif () + if(ENABLE_GPU) find_package(CUDA REQUIRED) find_package(Threads) @@ -35,7 +39,7 @@ if(ENABLE_GPU) "device/gpu/*.cu" "kernel/gpu/*.cu" "kernel/akg/gpu/*.cc" - "kernel/akg/akgkernelbuild.cc" + "kernel/akg/akg_kernel_build.cc" "kernel/akg/akg_kernel_attrs_process.cc" ) @@ -75,7 +79,9 @@ if (ENABLE_DUMP_PROTO) file(GLOB_RECURSE PROTO_PY RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "utils/anf_ir.proto" "utils/summary.proto" + "utils/lineage.proto" "utils/checkpoint.proto" + "utils/print.proto" ) ms_protobuf_generate_py(PY_SRCS PY_HDRS PY_PYS ${PROTO_PY}) @@ -120,7 +126,11 @@ endforeach () set_property(SOURCE ${SUB_OBJECTS_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ME) add_library(mindspore STATIC ${SUB_OBJECTS_SRC}) target_link_libraries(mindspore proto_input) -target_link_libraries(mindspore securec mindspore::flatbuffers) +if (ENABLE_CPU AND ENABLE_MPI) + target_link_libraries(mindspore securec mindspore::flatbuffers mindspore::ompi) +else () + target_link_libraries(mindspore securec mindspore::flatbuffers) +endif () if (NOT WIN32) target_link_libraries(mindspore dl) endif() @@ -227,3 +237,29 @@ if (ENABLE_MINDDATA) add_subdirectory(mindrecord) add_subdirectory(dataset) endif () + +# build inference +set(LOAD_ONNX_SRC + ${CMAKE_CURRENT_SOURCE_DIR}/utils/load_onnx/anf_converter.cc + ${CMAKE_CURRENT_SOURCE_DIR}/utils/load_onnx/anf_model_parser.cc + ) +add_library(inference SHARED + ${CMAKE_CURRENT_SOURCE_DIR}/session/session.cc + ${LOAD_ONNX_SRC} + ) +target_link_libraries(inference PRIVATE ${PYTHON_LIBRARIES} ${SECUREC_LIBRARY} + -Wl,--whole-archive mindspore -Wl,--no-whole-archive mindspore_gvar mindspore::protobuf) + +if (ENABLE_CPU) + target_link_libraries(inference PRIVATE mindspore::dnnl mindspore::mkldnn) +endif () + +if (USE_GLOG) + target_link_libraries(inference PRIVATE mindspore::glog) +else() + if (CMAKE_SYSTEM_NAME MATCHES "Linux") + target_link_options(inference PRIVATE -Wl,-init,mindspore_log_init) + elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin") + set_target_properties(inference PROPERTIES MACOSX_RPATH ON) + endif () +endif() diff --git a/mindspore/ccsrc/common/trans.cc b/mindspore/ccsrc/common/trans.cc index 55e4761036..9cf6eb3a5a 100644 --- a/mindspore/ccsrc/common/trans.cc +++ b/mindspore/ccsrc/common/trans.cc @@ -14,11 +14,9 @@ * limitations under the License. */ #include "common/trans.h" -#include #include #include #include -#include "./securec.h" #include "common/utils.h" #include "session/anf_runtime_algorithm.h" #include "kernel/kernel.h" @@ -29,34 +27,7 @@ namespace mindspore { namespace trans { -namespace { -std::vector PaddingShapeTo4dByDefault(const std::vector &shape) { - std::vector shape_4d(4, 1); - switch (shape.size()) { - case 0: - return shape_4d; - case 1: - shape_4d[1] = shape[0]; - break; - case 2: - shape_4d[1] = shape[0]; - shape_4d[2] = shape[1]; - break; - case 3: - shape_4d[1] = shape[0]; - shape_4d[2] = shape[1]; - shape_4d[3] = shape[2]; - break; - case 4: - std::copy(shape.begin(), shape.end(), shape_4d.begin()); - break; - default: - MS_LOG(EXCEPTION) << "Unexpect shape size = " << shape.size(); - } - return shape_4d; -} -} // namespace -const size_t kNchwDims = 4; +enum kAxis : int { kN = 0, kC, kH, kW, kNchwDims, kNdhwc }; const std::map type_map = {{kNumberTypeBool, 1}, {kNumberTypeInt, 4}, {kNumberTypeInt8, 1}, {kNumberTypeInt16, 2}, {kNumberTypeInt32, 4}, {kNumberTypeInt64, 8}, {kNumberTypeUInt, 4}, {kNumberTypeUInt8, 1}, {kNumberTypeUInt16, 2}, @@ -84,7 +55,10 @@ inline void SetData(size_t size, bool pad_zero, size_t src_idx, size_t dst_idx, template T DivCeil(T n1, T n2) { - return (n2 != 0) ? (n1 - 1) / n2 + 1 : 0; + if (n2 != 0) { + return (n1 - 1) / n2 + 1; + } + return 0; } enum DataTypeTransMode { @@ -226,8 +200,7 @@ size_t CubeSizeByType(const TypeId data_type) { } size_t ShapeSize(const std::vector &shape) { - size_t product = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); - return product; + return std::accumulate(shape.begin(), shape.end(), IntToSize(1), std::multiplies()); } size_t TypeIdSize(const TypeId data_type) { @@ -239,57 +212,9 @@ size_t TypeIdSize(const TypeId data_type) { return unsupported_type_error; } -bool IsNeedPadding(const std::string &format, const size_t shape_size) { - if (shape_size == 0) { - return false; - } - if (format == kOpFormat_DEFAULT || format == kOpFormat_FRAC_NZ) { - return false; - } else if (shape_size < 4) { - return true; - } - return false; -} - -std::vector GetRuntimePaddingShape(const AnfNodePtr &node, size_t index) { - std::vector shape; - std::vector host_shape; - if (node->isa()) { - auto value_node = node->cast(); - auto node_value = value_node->value(); - auto tensor = node_value->cast(); - if (tensor == nullptr) { - MS_LOG(EXCEPTION) << " the node[ " << node->DebugString() << "]'s cannot convert "; - } - auto shape_temp = tensor->shape(); - (void)std::transform(shape_temp.begin(), shape_temp.end(), std::back_inserter(host_shape), IntToSize); - if (host_shape.empty()) { - host_shape.push_back(1); - } - } else { - host_shape = AnfAlgo::GetOutputInferShape(node, index); - } - if (trans::IsNeedPadding(AnfAlgo::GetOutputFormat(node, 0), host_shape.size())) { - host_shape = trans::PaddingShapeTo4d(host_shape, AnfAlgo::GetOutputReshapeType(node, 0)); - } - std::transform(host_shape.begin(), host_shape.end(), std::back_inserter(shape), SizeToInt); - return shape; -} - -std::vector PaddingShapeTo4d(const std::vector &shape, const std::vector &padding_axis) { - if (padding_axis.empty() || shape.size() != padding_axis.size()) { - return PaddingShapeTo4dByDefault(shape); - } - std::vector shape_4d(4, 1); - for (size_t index = 0; index < padding_axis.size(); index++) { - shape_4d[padding_axis[index]] = shape[index]; - } - return shape_4d; -} - namespace { bool CheckDims(const std::vector &shape) { - if (shape.size() != 4) { + if (shape.size() != kNchwDims) { MS_LOG(ERROR) << "Host shape dims shoud be 4"; return false; } @@ -308,10 +233,10 @@ std::vector NhwcDeviceShape(const std::vector &shape) { MS_LOG(EXCEPTION) << "Ccheck dims failed."; } std::vector device_shape; - device_shape.push_back(shape[0]); - device_shape.push_back(shape[2]); - device_shape.push_back(shape[3]); - device_shape.push_back(shape[1]); + device_shape.push_back(shape[kN]); + device_shape.push_back(shape[kH]); + device_shape.push_back(shape[kW]); + device_shape.push_back(shape[kC]); return device_shape; } @@ -320,10 +245,10 @@ std::vector HwchDeviceShape(const std::vector &shape) { MS_LOG(EXCEPTION) << "Check dims failed."; } std::vector device_shape; - device_shape.push_back(shape[2]); - device_shape.push_back(shape[3]); - device_shape.push_back(shape[1]); - device_shape.push_back(shape[0]); + device_shape.push_back(shape[kH]); + device_shape.push_back(shape[kW]); + device_shape.push_back(shape[kC]); + device_shape.push_back(shape[kN]); return device_shape; } @@ -332,9 +257,9 @@ std::vector FracZDeviceShape(const std::vector &shape) { MS_LOG(EXCEPTION) << "Check dims failed."; } std::vector device_shape; - size_t cout16 = ((shape[0] + kCubeSize - 1) / kCubeSize) * kCubeSize; - size_t cin16 = ((shape[1] + kCubeSize - 1) / kCubeSize) * kCubeSize; - device_shape.push_back(shape[2] * shape[3] * cin16 / kCubeSize); + const size_t cout16 = ((shape[kN] + kCubeSize - 1) / kCubeSize) * kCubeSize; + const size_t cin16 = ((shape[kC] + kCubeSize - 1) / kCubeSize) * kCubeSize; + device_shape.push_back(shape[kH] * shape[kW] * cin16 / kCubeSize); device_shape.push_back(cout16 / kCubeSize); device_shape.push_back(kCubeSize); device_shape.push_back(kCubeSize); @@ -346,12 +271,12 @@ std::vector Nc1hwc0DeviceShape(const std::vector &shape) { MS_LOG(EXCEPTION) << "Check dims failed."; } std::vector device_shape; - size_t C1 = (shape[1] + kCubeSize - 1) / kCubeSize; - size_t C0 = kCubeSize; - device_shape.push_back(shape[0]); + const size_t C1 = (shape[kC] + kCubeSize - 1) / kCubeSize; + const size_t C0 = kCubeSize; + device_shape.push_back(shape[kN]); device_shape.push_back(C1); - device_shape.push_back(shape[2]); - device_shape.push_back(shape[3]); + device_shape.push_back(shape[kH]); + device_shape.push_back(shape[kW]); device_shape.push_back(C0); return device_shape; } @@ -361,10 +286,10 @@ std::vector C1hwncoc0DeviceShape(const std::vector &shape) { MS_LOG(EXCEPTION) << "Check dims failed."; } std::vector device_shape; - device_shape.push_back((shape[1] - 1) / kCubeSize + 1); - device_shape.push_back(shape[2]); - device_shape.push_back(shape[3]); - device_shape.push_back(shape[0]); + device_shape.push_back((shape[kC] - 1) / kCubeSize + 1); + device_shape.push_back(shape[kH]); + device_shape.push_back(shape[kW]); + device_shape.push_back(shape[kN]); device_shape.push_back(kCubeSize); device_shape.push_back(kCubeSize); return device_shape; @@ -375,9 +300,9 @@ std::vector FracZc04DeviceShape(const std::vector &shape) { MS_LOG(EXCEPTION) << "Check dims failed."; } std::vector device_shape; - size_t c0 = 4; - auto first_dim = DivCeil(c0 * shape.at(2) * shape.at(3), kCubeSize); - auto no = DivCeil(shape.at(0), kCubeSize); + const size_t c0 = 4; + auto first_dim = DivCeil(c0 * shape[kH] * shape[kW], kCubeSize); + auto no = DivCeil(shape.at(kN), kCubeSize); device_shape.push_back(first_dim); device_shape.push_back(no); device_shape.push_back(kCubeSize); @@ -390,24 +315,101 @@ std::vector Nc1hwc04DeviceShape(const std::vector &shape) { MS_LOG(EXCEPTION) << "Check dims failed."; } std::vector device_shape; - size_t C1 = 1; - size_t C0 = 4; - device_shape.push_back(shape[0]); + const size_t C1 = 1; + const size_t C0 = 4; + device_shape.push_back(shape[kN]); device_shape.push_back(C1); - device_shape.push_back(shape[2]); - device_shape.push_back(shape[3]); + device_shape.push_back(shape[kH]); + device_shape.push_back(shape[kW]); device_shape.push_back(C0); return device_shape; } std::vector NdhwcDeviceShape(const std::vector &shape) { - if (shape.size() < 5) { + if (shape.size() < kNdhwc) { MS_LOG(EXCEPTION) << "Shape dims must be 5 when format is ndhwc."; } return shape; } + +std::vector PaddingShapeTo4dByDefault(const std::vector &shape) { + std::vector shape_4d(kNchwDims, 1); + switch (shape.size()) { + case 0: + return shape_4d; + case 1: + shape_4d[kC] = shape[kN]; + break; + case 2: + shape_4d[kC] = shape[kN]; + shape_4d[kH] = shape[kC]; + break; + case 3: + shape_4d[kC] = shape[kN]; + shape_4d[kH] = shape[kC]; + shape_4d[kW] = shape[kH]; + break; + case 4: + std::copy(shape.begin(), shape.end(), shape_4d.begin()); + break; + default: + MS_LOG(EXCEPTION) << "Unexpect shape size = " << shape.size(); + } + return shape_4d; +} } // namespace +bool IsNeedPadding(const std::string &format, const size_t shape_size) { + if (shape_size == 0) { + return false; + } + if (format == kOpFormat_DEFAULT || format == kOpFormat_FRAC_NZ) { + return false; + } else if (shape_size < kNchwDims) { + return true; + } + return false; +} + +std::vector GetRuntimePaddingShape(const AnfNodePtr &node, size_t index) { + MS_EXCEPTION_IF_NULL(node); + std::vector shape; + std::vector host_shape; + if (node->isa()) { + auto value_node = node->cast(); + MS_EXCEPTION_IF_NULL(value_node); + auto node_value = value_node->value(); + MS_EXCEPTION_IF_NULL(node_value); + auto tensor = node_value->cast(); + if (tensor == nullptr) { + MS_LOG(EXCEPTION) << " The node[ " << node->DebugString() << "]'s cannot convert "; + } + auto shape_temp = tensor->shape(); + (void)std::transform(shape_temp.begin(), shape_temp.end(), std::back_inserter(host_shape), IntToSize); + if (host_shape.empty()) { + host_shape.push_back(1); + } + } else { + host_shape = AnfAlgo::GetOutputInferShape(node, index); + } + if (trans::IsNeedPadding(AnfAlgo::GetOutputFormat(node, 0), host_shape.size())) { + host_shape = trans::PaddingShapeTo4d(host_shape, AnfAlgo::GetOutputReshapeType(node, 0)); + } + std::transform(host_shape.begin(), host_shape.end(), std::back_inserter(shape), SizeToInt); + return shape; +} + +std::vector PaddingShapeTo4d(const std::vector &shape, const std::vector &padding_axis) { + if (padding_axis.empty() || shape.size() != padding_axis.size()) { + return PaddingShapeTo4dByDefault(shape); + } + std::vector shape_4d(kNchwDims, 1); + for (size_t index = 0; index < padding_axis.size(); index++) { + shape_4d[padding_axis[index]] = shape[index]; + } + return shape_4d; +} + std::vector TransShapeToDevice(const std::vector &shape, const std::string &format) { using DeviceShapeTransfer = std::function(const std::vector &)>; const std::map device_shape_map{{kOpFormat_NCHW, NchwDeviceShape}, @@ -426,6 +428,10 @@ std::vector TransShapeToDevice(const std::vector &shape, const s auto temp_shape = shape; std::vector device_shape; if (format == kOpFormat_FRAC_NZ) { + if (shape.size() == 1 && (shape[0] == 1 || shape[0] % kCubeSize == 0)) { + // For [1] and [1024] shape we can trait it as NZ shape + return shape; + } if (shape.size() < 2) { MS_LOG(EXCEPTION) << "Format" << format << " is not support shape " << shape.size(); } else { @@ -439,7 +445,7 @@ std::vector TransShapeToDevice(const std::vector &shape, const s device_shape.push_back(kCubeSize); return device_shape; } - if (shape.size() != 4) { + if (shape.size() != kNchwDims) { MS_LOG(WARNING) << "Get Device Shape using a shape size is less than 4 ,should be Padding shape by Default firstly"; temp_shape = PaddingShapeTo4dByDefault(shape); } @@ -455,6 +461,8 @@ bool CheckArgs(const FormatArgs &args, size_t *size, size_t *total_size) { MS_LOG(ERROR) << "Invalid host shape, host shape dims:" << args.host_shape.size() << ", expect dims:" << kNchwDims; return false; } + MS_EXCEPTION_IF_NULL(size); + MS_EXCEPTION_IF_NULL(total_size); *size = TypeIdSize(args.src_data_type); if (*size < 1) { MS_LOG(ERROR) << "Illegal dtype."; @@ -540,10 +548,10 @@ bool NchwTo4D(const FormatArgs &args, void *result) { MS_LOG(ERROR) << "Check args failed."; return false; } - size_t n = args.host_shape[0]; - size_t c = args.host_shape[1]; - size_t h = args.host_shape[2]; - size_t w = args.host_shape[3]; + auto n = args.host_shape[kN]; + auto c = args.host_shape[kC]; + auto h = args.host_shape[kH]; + auto w = args.host_shape[kW]; for (size_t ni = 0; ni < n; ni++) { for (size_t ci = 0; ci < c; ci++) { for (size_t hi = 0; hi < h; hi++) { @@ -572,10 +580,10 @@ bool ToNchw(const FormatArgs &args, void *result) { MS_LOG(ERROR) << "Check args failed."; return false; } - size_t n = args.host_shape[0]; - size_t c = args.host_shape[1]; - size_t h = args.host_shape[2]; - size_t w = args.host_shape[3]; + auto n = args.host_shape[kN]; + auto c = args.host_shape[kC]; + auto h = args.host_shape[kH]; + auto w = args.host_shape[kW]; for (size_t ni = 0; ni < n; ni++) { for (size_t ci = 0; ci < c; ci++) { for (size_t hi = 0; hi < h; hi++) { @@ -602,32 +610,32 @@ bool NchwToFracZ(const FormatArgs &args, void *result) { MS_LOG(ERROR) << "Invalid host shape, host shape dims:" << args.host_shape.size() << ", expect dims:" << kNchwDims; return false; } - size_t size = TypeIdSize(args.src_data_type); + auto size = TypeIdSize(args.src_data_type); if (size < 1) { MS_LOG(ERROR) << "Illegal dtype."; return false; } - auto n = args.host_shape[0]; - auto c = args.host_shape[1]; - auto h = args.host_shape[2]; - auto w = args.host_shape[3]; + auto n = args.host_shape[kN]; + auto c = args.host_shape[kC]; + auto h = args.host_shape[kH]; + auto w = args.host_shape[kW]; - size_t c0 = CubeSizeByType(args.src_data_type); + auto c0 = CubeSizeByType(args.src_data_type); if (c0 < 1) { MS_LOG(ERROR) << "Illegal dtype."; return false; } - size_t c1 = DivCeil(c, c0); - size_t hw = h * w; - size_t chw = c * hw; - size_t hwc0 = hw * c0; - size_t nchw = n * chw; - - size_t hf_cnt = DivCeil(n, kCubeSize); - size_t vf_cnt = c1 * hw; - size_t fractal_ele_cnt = c0 * kCubeSize; - size_t total_ele_cnt = hf_cnt * vf_cnt * fractal_ele_cnt; - size_t dst_size = total_ele_cnt * size; + auto c1 = DivCeil(c, c0); + auto hw = h * w; + auto chw = c * hw; + auto hwc0 = hw * c0; + auto nchw = n * chw; + + auto hf_cnt = DivCeil(n, kCubeSize); + auto vf_cnt = c1 * hw; + auto fractal_ele_cnt = c0 * kCubeSize; + auto total_ele_cnt = hf_cnt * vf_cnt * fractal_ele_cnt; + auto dst_size = total_ele_cnt * size; if (dst_size != args.device_size) { MS_LOG(ERROR) << "Illegal total data size." << "dst size is :" << dst_size << "device size is :" << args.device_size; @@ -647,7 +655,7 @@ bool NchwToFracZ(const FormatArgs &args, void *result) { auto src_ni = hfi * kCubeSize + col; auto src_idx = src_row_offset + chw * col; auto dst_idx = gfi * fractal_ele_cnt + col * c0 + row; - auto pad_zero = (src_ni >= n || src_idx >= nchw || src_ci >= c) ? true : false; + auto pad_zero = src_ni >= n || src_idx >= nchw || src_ci >= c; SetData(size, pad_zero, src_idx, dst_idx, args, result); } } @@ -663,12 +671,12 @@ bool FracZToNchw(const FormatArgs &args, void *result) { MS_LOG(ERROR) << "Invalid host shape, host shape dims:" << args.host_shape.size() << ", expect dims:" << kNchwDims; return false; } - size_t size = TypeIdSize(args.src_data_type); + auto size = TypeIdSize(args.src_data_type); if (size < 1) { MS_LOG(ERROR) << "Illegal dtype."; return false; } - size_t total_size = ShapeSize(args.device_shape) * size; + auto total_size = ShapeSize(args.device_shape) * size; if (total_size != args.device_size) { MS_LOG(ERROR) << "Illegal total data size, total_size:" << total_size << ", device_size:" << args.device_size; return false; @@ -677,18 +685,16 @@ bool FracZToNchw(const FormatArgs &args, void *result) { auto n0 = args.device_shape.at(1); auto ni = args.device_shape.at(2); auto c0 = args.device_shape.at(3); - - auto n = args.host_shape[0]; - auto c = args.host_shape[1]; - auto h = args.host_shape[2]; - auto w = args.host_shape[3]; - - size_t nc = ni * n0; - size_t ncc0 = nc * c0; - size_t wncc0 = w * ncc0; - size_t hwncc0 = h * wncc0; - size_t hw = h * w; - size_t chw = c * hw; + auto n = args.host_shape[kN]; + auto c = args.host_shape[kC]; + auto h = args.host_shape[kH]; + auto w = args.host_shape[kW]; + auto nc = ni * n0; + auto ncc0 = nc * c0; + auto wncc0 = w * ncc0; + auto hwncc0 = h * wncc0; + auto hw = h * w; + auto chw = c * hw; for (size_t n_idx = 0; n_idx < n; n_idx++) { size_t n_head_addr = n_idx * chw; @@ -720,20 +726,18 @@ bool NchwToFracZc04(const FormatArgs &args, void *result) { MS_LOG(ERROR) << "Check args failed."; return false; } - size_t cube = kCubeSize; - size_t n = args.host_shape[0]; - size_t c = args.host_shape[1]; - size_t h = args.host_shape[2]; - size_t w = args.host_shape[3]; - - size_t c0 = 4; - size_t c1 = DivCeil(c, c0); - size_t hwc0 = h * w * c0; - size_t hwc = h * w * c; - size_t nhwc = n * h * w * c; - - size_t n_cnt = DivCeil(n, cube); - size_t v_cnt = DivCeil(h * w * c0 * c1, cube); + auto cube = kCubeSize; + auto n = args.host_shape[kN]; + auto c = args.host_shape[kC]; + auto h = args.host_shape[kH]; + auto w = args.host_shape[kW]; + const size_t c0 = 4; + auto c1 = DivCeil(c, c0); + auto hwc0 = h * w * c0; + auto hwc = h * w * c; + auto nhwc = n * h * w * c; + auto n_cnt = DivCeil(n, cube); + auto v_cnt = DivCeil(h * w * c0 * c1, cube); size_t dst_idx = 0; for (size_t vi = 0; vi < v_cnt; vi++) { @@ -929,7 +933,7 @@ bool NchwToNc1hwc0(const FormatArgs &args, void *result) { MS_LOG(ERROR) << "Invalid host shape, host shape dims:" << args.host_shape.size() << ", expect dims:" << kNchwDims; return false; } - size_t size = TypeIdSize(args.src_data_type); + auto size = TypeIdSize(args.src_data_type); if (size < 1) { MS_LOG(ERROR) << "Illegal dtype."; return false; @@ -940,20 +944,23 @@ bool NchwToNc1hwc0(const FormatArgs &args, void *result) { return false; } - auto n = args.host_shape[0]; - auto c = args.host_shape[1]; - auto h = args.host_shape[2]; - auto w = args.host_shape[3]; - size_t c0 = CubeSizeByType(args.src_data_type); + auto n = args.host_shape[kN]; + auto c = args.host_shape[kC]; + auto h = args.host_shape[kH]; + auto w = args.host_shape[kW]; + auto c0 = CubeSizeByType(args.src_data_type); if (c0 < 1) { MS_LOG(ERROR) << "Illegal dtype."; return false; } - size_t c1 = DivCeil(c, c0); - size_t hw = h * w; - size_t chw = c * hw; - size_t c1hwc0 = c1 * hw * c0; - size_t wc0 = w * c0; + if (args.device_format == kOpFormat_NC1HWC0_C04) { + c0 = 4; + } + auto c1 = DivCeil(c, c0); + auto hw = h * w; + auto chw = c * hw; + auto c1hwc0 = c1 * hw * c0; + auto wc0 = w * c0; for (size_t n_idx = 0; n_idx < n; n_idx++) { size_t n_head_addr = n_idx * c1hwc0; @@ -967,7 +974,7 @@ bool NchwToNc1hwc0(const FormatArgs &args, void *result) { size_t dst_idx = c0_idx + w_head_addr; size_t c_idx = c0_idx + c1_idx * c0; size_t src_idx = n_idx * chw + c_idx * hw + h_idx * w + w_idx; - auto pad_zero = (c_idx < c) ? false : true; + auto pad_zero = c_idx >= c; SetData(size, pad_zero, src_idx, dst_idx, args, result); } } @@ -984,29 +991,29 @@ bool Nc1hwc0ToNchw(const FormatArgs &args, void *result) { MS_LOG(ERROR) << "Invalid host shape, host shape dims:" << args.host_shape.size() << ", expect dims:" << kNchwDims; return false; } - size_t size = TypeIdSize(args.src_data_type); + auto size = TypeIdSize(args.src_data_type); if (size < 1) { MS_LOG(ERROR) << "Illegal dtype."; return false; } - size_t total_size = ShapeSize(args.device_shape) * size; + auto total_size = ShapeSize(args.device_shape) * size; if (total_size != args.device_size) { MS_LOG(ERROR) << "Illegal total data size, total_size:" << total_size << ", device_size:" << args.device_size; return false; } - auto n = args.host_shape[0]; - auto c = args.host_shape[1]; - auto h = args.host_shape[2]; - auto w = args.host_shape[3]; + auto n = args.host_shape[kN]; + auto c = args.host_shape[kC]; + auto h = args.host_shape[kH]; + auto w = args.host_shape[kW]; auto c1 = args.device_shape[1]; auto c0 = args.device_shape[4]; - size_t hw = h * w; - size_t chw = c * hw; - size_t wc0 = w * c0; - size_t hwc0 = h * wc0; - size_t c1hwc0 = c1 * hwc0; + auto hw = h * w; + auto chw = c * hw; + auto wc0 = w * c0; + auto hwc0 = h * wc0; + auto c1hwc0 = c1 * hwc0; for (size_t n_idx = 0; n_idx < n; n_idx++) { size_t n_head_addr = n_idx * chw; @@ -1037,13 +1044,15 @@ bool NchwToC1hwncoc0(const FormatArgs &args, void *result) { MS_LOG(ERROR) << "Check args failed."; return false; } - auto n = args.host_shape[0]; - auto c = args.host_shape[1]; - auto h = args.host_shape[2]; - auto w = args.host_shape[3]; + auto n = args.host_shape[kN]; + auto c = args.host_shape[kC]; + auto h = args.host_shape[kH]; + auto w = args.host_shape[kW]; + const int co_idx = 4; + const int c0_idx = 5; auto c1 = args.device_shape[0]; - auto co = args.device_shape[4]; - auto c0 = args.device_shape[5]; + auto co = args.device_shape[co_idx]; + auto c0 = args.device_shape[c0_idx]; for (size_t c1_i = 0; c1_i < c1; c1_i++) { for (size_t h_i = 0; h_i < h; h_i++) { @@ -1055,7 +1064,7 @@ bool NchwToC1hwncoc0(const FormatArgs &args, void *result) { co_i * c0 + c0_i; size_t c_i = c0_i + c1_i * c0; size_t src_idx = n_i * c * h * w + c_i * h * w + h_i * w + w_i; - auto pad_zero = (c_i < c && c0_i == co_i) ? false : true; + auto pad_zero = !(c_i < c && c0_i == co_i); SetData(size, pad_zero, src_idx, dst_idx, args, result); } } @@ -1076,12 +1085,14 @@ bool C1hwncoc0ToNchw(const FormatArgs &args, void *result) { MS_LOG(ERROR) << "Check args failed."; return false; } - auto n = args.host_shape[0]; - auto c = args.host_shape[1]; - auto h = args.host_shape[2]; - auto w = args.host_shape[3]; - auto co = args.device_shape[4]; - auto c0 = args.device_shape[5]; + auto n = args.host_shape[kN]; + auto c = args.host_shape[kC]; + auto h = args.host_shape[kH]; + auto w = args.host_shape[kW]; + const int co_idx = 4; + const int c0_idx = 5; + auto co = args.device_shape[co_idx]; + auto c0 = args.device_shape[c0_idx]; for (size_t n_i = 0; n_i < n; n_i++) { for (size_t c_i = 0; c_i < c; c_i++) { for (size_t h_i = 0; h_i < h; h_i++) { diff --git a/mindspore/ccsrc/dataset/CMakeLists.txt b/mindspore/ccsrc/dataset/CMakeLists.txt index 068aec8873..9238be93f2 100644 --- a/mindspore/ccsrc/dataset/CMakeLists.txt +++ b/mindspore/ccsrc/dataset/CMakeLists.txt @@ -62,6 +62,7 @@ add_dependencies(engine-datasetops-source core) add_dependencies(engine-datasetops-source-sampler core) add_dependencies(engine-datasetops core) add_dependencies(engine-opt core) +add_dependencies(engine-perf core) add_dependencies(engine-gnn core) add_dependencies(engine core) add_dependencies(text core) @@ -81,6 +82,7 @@ set(submodules $ $ $ + $ $ $ $ @@ -106,10 +108,11 @@ target_link_libraries(_c_dataengine PRIVATE mindspore mindspore_gvar) if (${CMAKE_SYSTEM_NAME} MATCHES "Windows") target_link_libraries(_c_dataengine PRIVATE mindspore::pybind11_module ${PYTHON_LIBRARIES} mindspore::protobuf ${SECUREC_LIBRARY}) else() + set(ICU_LIB mindspore::icuuc mindspore::icudata mindspore::icui18n) target_link_libraries(_c_dataengine PRIVATE mindspore::pybind11_module -ldl mindspore::protobuf ${SECUREC_LIBRARY}) endif() target_link_libraries(_c_dataengine PUBLIC mindspore::jpeg_turbo mindspore::opencv_core mindspore::opencv_imgcodecs - mindspore::opencv_imgproc mindspore::tinyxml2) + mindspore::opencv_imgproc mindspore::tinyxml2 ${ICU_LIB}) if (ENABLE_GPUQUE) target_link_libraries(_c_dataengine PRIVATE gpu_queue ${CUDNN_PATH}/lib64/libcudnn.so diff --git a/mindspore/ccsrc/dataset/api/de_pipeline.cc b/mindspore/ccsrc/dataset/api/de_pipeline.cc index 0194785090..ce70476423 100644 --- a/mindspore/ccsrc/dataset/api/de_pipeline.cc +++ b/mindspore/ccsrc/dataset/api/de_pipeline.cc @@ -19,56 +19,64 @@ #include #include "common/utils.h" -#include "dataset/kernels/py_func_op.h" -#include "dataset/engine/datasetops/source/image_folder_op.h" -#include "dataset/engine/datasetops/source/mnist_op.h" -#include "dataset/engine/datasetops/source/voc_op.h" #include "dataset/core/tensor.h" #include "dataset/engine/dataset_iterator.h" -#include "dataset/engine/datasetops/source/manifest_op.h" -#include "dataset/engine/datasetops/source/cifar_op.h" +#include "dataset/engine/datasetops/bucket_batch_by_length_op.h" +#include "dataset/engine/datasetops/filter_op.h" #include "dataset/engine/datasetops/source/celeba_op.h" +#include "dataset/engine/datasetops/source/cifar_op.h" +#include "dataset/engine/datasetops/source/clue_op.h" +#include "dataset/engine/datasetops/source/coco_op.h" +#include "dataset/engine/datasetops/source/image_folder_op.h" +#include "dataset/engine/datasetops/source/manifest_op.h" +#include "dataset/engine/datasetops/source/mnist_op.h" #include "dataset/engine/datasetops/source/random_data_op.h" #include "dataset/engine/datasetops/source/text_file_op.h" -#include "dataset/engine/datasetops/filter_op.h" +#include "dataset/engine/datasetops/source/voc_op.h" +#include "dataset/kernels/py_func_op.h" +#include "dataset/util/random.h" +#include "dataset/util/status.h" #include "mindrecord/include/shard_category.h" +#include "mindrecord/include/shard_distributed_sample.h" #include "mindrecord/include/shard_sample.h" #include "mindrecord/include/shard_shuffle.h" -#include "dataset/util/random.h" -#include "dataset/util/status.h" -#include "utils/log_adapter.h" #include "pybind11/stl.h" +#include "utils/log_adapter.h" namespace mindspore { namespace dataset { using pFunction = Status (DEPipeline::*)(const py::dict &, std::shared_ptr *); -static std::unordered_map g_parse_op_func_ = {{kStorage, &DEPipeline::ParseStorageOp}, - {kShuffle, &DEPipeline::ParseShuffleOp}, - {kMindrecord, &DEPipeline::ParseMindRecordOp}, - {kMap, &DEPipeline::ParseMapOp}, - {kFilter, &DEPipeline::ParseFilterOp}, - {kBatch, &DEPipeline::ParseBatchOp}, - {kBarrier, &DEPipeline::ParseBarrierOp}, - {kRepeat, &DEPipeline::ParseRepeatOp}, - {kSkip, &DEPipeline::ParseSkipOp}, - {kZip, &DEPipeline::ParseZipOp}, - {kConcat, &DEPipeline::ParseConcatOp}, - {kRename, &DEPipeline::ParseRenameOp}, - {kDeviceQueue, &DEPipeline::ParseDeviceQueueOp}, - {kGenerator, &DEPipeline::ParseGeneratorOp}, - {kTfReader, &DEPipeline::ParseTFReaderOp}, - {kProject, &DEPipeline::ParseProjectOp}, - {kTake, &DEPipeline::ParseTakeOp}, - {kImageFolder, &DEPipeline::ParseImageFolderOp}, - {kMnist, &DEPipeline::ParseMnistOp}, - {kManifest, &DEPipeline::ParseManifestOp}, - {kVoc, &DEPipeline::ParseVOCOp}, - {kCifar10, &DEPipeline::ParseCifar10Op}, - {kCifar100, &DEPipeline::ParseCifar100Op}, - {kCelebA, &DEPipeline::ParseCelebAOp}, - {kRandomData, &DEPipeline::ParseRandomDataOp}, - {kTextFile, &DEPipeline::ParseTextFileOp}}; +static std::unordered_map g_parse_op_func_ = { + {kShuffle, &DEPipeline::ParseShuffleOp}, + {kMindrecord, &DEPipeline::ParseMindRecordOp}, + {kMap, &DEPipeline::ParseMapOp}, + {kFilter, &DEPipeline::ParseFilterOp}, + {kBatch, &DEPipeline::ParseBatchOp}, + {kBucketBatch, &DEPipeline::ParseBucketBatchByLengthOp}, + {kBarrier, &DEPipeline::ParseBarrierOp}, + {kRepeat, &DEPipeline::ParseRepeatOp}, + {kSkip, &DEPipeline::ParseSkipOp}, + {kZip, &DEPipeline::ParseZipOp}, + {kConcat, &DEPipeline::ParseConcatOp}, + {kRename, &DEPipeline::ParseRenameOp}, + {kDeviceQueue, &DEPipeline::ParseDeviceQueueOp}, + {kGenerator, &DEPipeline::ParseGeneratorOp}, + {kTfReader, &DEPipeline::ParseTFReaderOp}, + {kProject, &DEPipeline::ParseProjectOp}, + {kTake, &DEPipeline::ParseTakeOp}, + {kImageFolder, &DEPipeline::ParseImageFolderOp}, + {kMnist, &DEPipeline::ParseMnistOp}, + {kManifest, &DEPipeline::ParseManifestOp}, + {kVoc, &DEPipeline::ParseVOCOp}, + {kCoco, &DEPipeline::ParseCocoOp}, + {kCifar10, &DEPipeline::ParseCifar10Op}, + {kCifar100, &DEPipeline::ParseCifar100Op}, + {kCelebA, &DEPipeline::ParseCelebAOp}, + {kRandomData, &DEPipeline::ParseRandomDataOp}, + {kTextFile, &DEPipeline::ParseTextFileOp}, + {kBuildVocab, &DEPipeline::ParseBuildVocabOp}, + {kClue, &DEPipeline::ParseClueOp}}; DEPipeline::DEPipeline() : iterator_(nullptr) { try { @@ -292,70 +300,6 @@ Status DEPipeline::SetBatchParameters(const py::dict &args) { return Status::OK(); } -Status DEPipeline::ValidateArgStorageOp(const py::dict &args) { - // Required arguments - if (((args.contains("dataset_files") && args["dataset_files"].is_none()) || args["schema"].is_none()) && - ((args.contains("dataset_dir") && args["dataset_dir"].is_none()) || - (args["schema"].is_none() && args["schema_json_string"].is_none()))) { - std::string err_msg = "Error: at least one of dataset_files or schema_file is missing"; - RETURN_STATUS_UNEXPECTED(err_msg); - } - - return Status::OK(); -} - -Status DEPipeline::ParseStorageOp(const py::dict &args, std::shared_ptr *ptr) { - RETURN_IF_NOT_OK(ValidateArgStorageOp(args)); - std::shared_ptr builder; - if (args.contains("dataset_files") && !args["dataset_files"].is_none()) { - builder = std::make_shared(); - (void)builder->SetDatasetFileList(ToStringVector(args["dataset_files"])); - (void)builder->SetSchemaFile(ToString(args["schema"])); - } else if (args.contains("dataset_dir") && !args["dataset_dir"].is_none()) { - builder = std::make_shared(); - (void)builder->SetDatasetFilesDir(ToString(args["dataset_dir"])); - if (!args["schema"].is_none()) { - (void)builder->SetSchemaFile(ToString(args["schema"])); - } else if (!args["schema_json_string"].is_none()) { - std::unique_ptr schema = std::make_unique(); - std::string s = ToString(args["schema_json_string"]); - RETURN_IF_NOT_OK(schema->LoadSchemaString(s, std::vector())); - (void)builder->SetNumRows(schema->num_rows()); - (void)builder->SetSchema(std::move(schema)); - } - } - - // Optional arguments - for (auto arg : args) { - std::string key = py::str(arg.first); - py::handle value = arg.second; - if (!value.is_none()) { - if (key == "num_parallel_workers") { - (void)builder->SetNumWorkers(ToInt(value)); - } else if (key == "prefetch_size") { - (void)builder->SetOpConnectorSize(ToInt(value)); - } else if (key == "columns_list") { - (void)builder->SetColumnsToLoad(ToStringVector(value)); - } else if (key == "distribution") { - (void)builder->SetDataDistributionFile(ToString(value)); - } else if (key == "labels_filename") { - (void)builder->setLabelsFileName(ToString(value)); - } else if (key == "dataset_usage") { - (void)builder->SetDatasetUsage(ToString(value)); - } - } - } - (void)builder->SetBatchSize(temp_batch_size_); - (void)builder->SetDropRemainder(temp_drop_remainder_); - - std::shared_ptr op; - RETURN_IF_NOT_OK(builder->Build(&op)); - num_rows_ = op->num_rows(); - num_classes_ = op->num_classes(); - *ptr = op; - return Status::OK(); -} - Status DEPipeline::ParseShuffleOp(const py::dict &args, std::shared_ptr *ptr) { std::shared_ptr builder = std::make_shared(); if (!args["buffer_size"].is_none()) { @@ -382,35 +326,27 @@ Status DEPipeline::ParseShuffleOp(const py::dict &args, std::shared_ptr *in_partitions) { - if (args["partitions"].is_none()) { - std::string err_msg = "Error: partitions is not set (None)"; - RETURN_STATUS_UNEXPECTED(err_msg); - } - - py::list list = py::reinterpret_borrow(args["partitions"]); - for (auto l : list) { - if (!l.is_none()) { - in_partitions->push_back(ToInt(l)); +Status DEPipeline::BuildMindrecordSamplerChain(const py::handle &handle, + std::vector> *operators, + int num_padded) { + auto sampler = py::reinterpret_borrow(handle); + auto create = sampler.attr("create_for_minddataset"); + auto op = create().cast>(); + std::stack> stack_ops; + while (op != nullptr) { + auto sampler_op = std::dynamic_pointer_cast(op); + if (sampler_op && num_padded > 0) { + sampler_op->SetNumPaddedSamples(num_padded); + stack_ops.push(sampler_op); + } else { + stack_ops.push(op); } + op = op->GetChildOp(); } - - if (in_partitions->size() != 2) { - std::string err_msg = "Error: partitions is invalid or not set."; - RETURN_STATUS_UNEXPECTED(err_msg); - } - - constexpr int kMaxPartitions = 64; - if (in_partitions->at(0) <= 0 || in_partitions->at(0) > kMaxPartitions) { - std::string err_msg = "Error: partitions is invalid or not set."; - RETURN_STATUS_UNEXPECTED(err_msg); - } - - if (in_partitions->at(1) < 0 || in_partitions->at(1) >= in_partitions->at(0)) { - std::string err_msg = "Error: partitions is invalid or not set."; - RETURN_STATUS_UNEXPECTED(err_msg); + while (!stack_ops.empty()) { + operators->push_back(stack_ops.top()); + stack_ops.pop(); } - return Status::OK(); } @@ -438,6 +374,10 @@ Status DEPipeline::ParseMindRecordOp(const py::dict &args, std::shared_ptrSetColumnsToLoad(in_col_names); } + if (!args["padded_sample"].is_none()) { + (void)builder->SetPaddedSample(args["padded_sample"]); + (void)builder->SetNumToPadSamples(ToInt(args["num_padded"])); + } std::vector> operators; for (auto arg : args) { std::string key = py::str(arg.first); @@ -447,27 +387,16 @@ Status DEPipeline::ParseMindRecordOp(const py::dict &args, std::shared_ptrSetNumMindRecordWorkers(ToInt(value)); } else if (key == "block_reader" && ToBool(value) == true) { (void)builder->SetBlockReader(); - } else if (key == "global_shuffle" && ToBool(value) == true) { - uint32_t seed = args["partitions"].is_none() ? GetSeed() : 0; - operators.push_back(std::make_shared(seed)); } else if (key == "sampler") { - auto create = py::reinterpret_borrow(value).attr("_create_for_minddataset"); - std::shared_ptr sample_op = - create().cast>(); - operators.push_back(sample_op); + int num_padded = 0; + if (!args["num_padded"].is_none()) { + num_padded = ToInt(args["num_padded"]); + } + RETURN_IF_NOT_OK(BuildMindrecordSamplerChain(value, &operators, num_padded)); } } } - std::vector in_partitions; - if (!args["partitions"].is_none()) { - auto ret = CheckMindRecordPartitionInfo(args, &in_partitions); - if (Status::OK() != ret) { - return ret; - } - operators.push_back(std::make_shared(1, in_partitions[0], in_partitions[1])); - } - if (!operators.empty()) { (void)builder->SetOperators(operators); } @@ -493,6 +422,8 @@ Status DEPipeline::ParseMapOp(const py::dict &args, std::shared_ptr * (void)builder->SetInColNames(in_col_names); } else if (key == "output_columns") { (void)builder->SetOutColNames(ToStringVector(value)); + } else if (key == "columns_order") { + (void)builder->SetColOrder(ToStringVector(value)); } else if (key == "num_parallel_workers") { (void)builder->SetNumWorkers(ToInt(value)); } else if (key == "prefetch_size") { @@ -642,18 +573,8 @@ Status DEPipeline::ParseBatchOp(const py::dict &args, std::shared_ptr (void)builder->SetColumnsToMap(ToStringVector(value)); } if (key == "pad_info") { - std::map> pad_info; - for (auto p : py::reinterpret_borrow(value)) { - if (!p.second.is_none()) { - py::tuple tp = py::reinterpret_borrow(p.second); - CHECK_FAIL_RETURN_UNEXPECTED(tp.size() == 2, "tuple in pad_info must be (list,int) or (list,float)"); - TensorShape shape = tp[0].is_none() ? TensorShape::CreateUnknownRankShape() : TensorShape(tp[0]); - float pad_val = tp[1].is_none() ? 0 : ToFloat(tp[1]); - (void)pad_info.insert({ToString(p.first), {shape, pad_val}}); - } else { // tuple is None - (void)pad_info.insert({ToString(p.first), {TensorShape({}), 0}}); - } - } + PadInfo pad_info; + RETURN_IF_NOT_OK(ParsePadInfo(value, &pad_info)); (void)builder->SetPaddingMap(pad_info, true); } } @@ -665,6 +586,56 @@ Status DEPipeline::ParseBatchOp(const py::dict &args, std::shared_ptr return Status::OK(); } +Status DEPipeline::ParseBucketBatchByLengthOp(const py::dict &args, std::shared_ptr *ptr) { + std::vector mandatory_arguments = {"length_dependent_columns", "bucket_boundaries", + "bucket_batch_sizes"}; + for (auto name : mandatory_arguments) { + if (args[name.c_str()].is_none()) { + std::string err_msg = "Error: " + name + " is not set."; + RETURN_STATUS_UNEXPECTED(err_msg); + } + } + + std::shared_ptr builder = std::make_shared( + ToStringVector(args[mandatory_arguments[0].c_str()]), ToIntVector(args[mandatory_arguments[1].c_str()]), + ToIntVector(args[mandatory_arguments[2].c_str()])); + + for (auto arg : args) { + std::string key = py::str(arg.first); + py::handle value = arg.second; + if (!value.is_none()) { + if (key == "length_dependent_columns") { + (void)builder->SetLengthDependentColumns(ToStringVector(value)); + } + if (key == "bucket_boundaries") { + (void)builder->SetBucketBoundaries(ToIntVector(value)); + } + if (key == "bucket_batch_sizes") { + (void)builder->SetBucketBatchSizes(ToIntVector(value)); + } + if (key == "element_length_function") { + (void)builder->SetElementLengthFunction(value.cast()); + } + if (key == "pad_info") { + PadInfo pad_info; + RETURN_IF_NOT_OK(ParsePadInfo(value, &pad_info)); + (void)builder->SetPadInfo(pad_info); + } + if (key == "pad_to_bucket_boundary") { + (void)builder->SetPadToBucketBoundary(ToBool(value)); + } + if (key == "drop_remainder") { + (void)builder->SetDropRemainder(ToBool(value)); + } + } + } + + std::shared_ptr op; + RETURN_IF_NOT_OK(builder->Build(&op)); + *ptr = op; + return Status::OK(); +} + Status DEPipeline::ParseBarrierOp(const py::dict &args, std::shared_ptr *ptr) { std::shared_ptr builder = std::make_shared(); // Right now barrier should only take num_rows_per_buffer = 1 @@ -801,6 +772,8 @@ Status DEPipeline::ParseTFReaderOp(const py::dict &args, std::shared_ptrSetColumnsToLoad(columns_to_load); } else if (key == "shuffle_files") { (void)builder->SetShuffleFiles(ToBool(value)); + } else if (key == "shuffle_global") { + (void)builder->SetShuffleGlobal(ToBool(value)); } else if (key == "schema_file_path" || key == "schema_json_string") { schema_exists = true; } else if (key == "num_samples") { @@ -856,9 +829,7 @@ Status DEPipeline::ParseImageFolderOp(const py::dict &args, std::shared_ptrSetNumSamples(ToInt(value)); - } else if (key == "num_parallel_workers") { + if (key == "num_parallel_workers") { (void)builder->SetNumWorkers(ToInt(value)); } else if (key == "sampler") { auto create = py::reinterpret_borrow(value).attr("create"); @@ -893,9 +864,7 @@ Status DEPipeline::ParseManifestOp(const py::dict &args, std::shared_ptrSetNumSamples(ToInt(value)); - } else if (key == "num_parallel_workers") { + if (key == "num_parallel_workers") { (void)builder->SetNumWorkers(ToInt(value)); } else if (key == "sampler") { auto create = py::reinterpret_borrow(value).attr("create"); @@ -922,6 +891,16 @@ Status DEPipeline::ParseVOCOp(const py::dict &args, std::shared_ptr * RETURN_STATUS_UNEXPECTED(err_msg); } + if (args["task"].is_none()) { + std::string err_msg = "Error: No task specified"; + RETURN_STATUS_UNEXPECTED(err_msg); + } + + if (args["mode"].is_none()) { + std::string err_msg = "Error: No mode specified"; + RETURN_STATUS_UNEXPECTED(err_msg); + } + std::shared_ptr builder = std::make_shared(); (void)builder->SetDir(ToString(args["dataset_dir"])); (void)builder->SetTask(ToString(args["task"])); @@ -930,9 +909,7 @@ Status DEPipeline::ParseVOCOp(const py::dict &args, std::shared_ptr * std::string key = py::str(arg.first); py::handle value = arg.second; if (!value.is_none()) { - if (key == "num_samples") { - (void)builder->SetNumSamples(ToInt(value)); - } else if (key == "num_parallel_workers") { + if (key == "num_parallel_workers") { (void)builder->SetNumWorkers(ToInt(value)); } else if (key == "sampler") { auto create = py::reinterpret_borrow(value).attr("create"); @@ -951,6 +928,47 @@ Status DEPipeline::ParseVOCOp(const py::dict &args, std::shared_ptr * return Status::OK(); } +Status DEPipeline::ParseCocoOp(const py::dict &args, std::shared_ptr *ptr) { + if (args["dataset_dir"].is_none()) { + std::string err_msg = "Error: No dataset path specified"; + RETURN_STATUS_UNEXPECTED(err_msg); + } + + if (args["annotation_file"].is_none()) { + std::string err_msg = "Error: No annotation_file specified"; + RETURN_STATUS_UNEXPECTED(err_msg); + } + + if (args["task"].is_none()) { + std::string err_msg = "Error: No task specified"; + RETURN_STATUS_UNEXPECTED(err_msg); + } + + std::shared_ptr builder = std::make_shared(); + (void)builder->SetDir(ToString(args["dataset_dir"])); + (void)builder->SetFile(ToString(args["annotation_file"])); + (void)builder->SetTask(ToString(args["task"])); + for (auto arg : args) { + std::string key = py::str(arg.first); + py::handle value = arg.second; + if (!value.is_none()) { + if (key == "num_parallel_workers") { + (void)builder->SetNumWorkers(ToInt(value)); + } else if (key == "sampler") { + auto create = py::reinterpret_borrow(value).attr("create"); + std::shared_ptr sampler = create().cast>(); + (void)builder->SetSampler(std::move(sampler)); + } else if (key == "decode") { + (void)builder->SetDecode(ToBool(value)); + } + } + } + std::shared_ptr op; + RETURN_IF_NOT_OK(builder->Build(&op)); + *ptr = op; + return Status::OK(); +} + Status DEPipeline::ParseCifar10Op(const py::dict &args, std::shared_ptr *ptr) { // Required arguments if (args["dataset_dir"].is_none()) { @@ -966,9 +984,7 @@ Status DEPipeline::ParseCifar10Op(const py::dict &args, std::shared_ptrSetNumSamples(ToInt(value)); - } else if (key == "num_parallel_workers") { + if (key == "num_parallel_workers") { (void)builder->SetNumWorkers(ToInt(value)); } else if (key == "sampler") { auto create = py::reinterpret_borrow(value).attr("create"); @@ -1001,9 +1017,7 @@ Status DEPipeline::ParseCifar100Op(const py::dict &args, std::shared_ptrSetNumSamples(ToInt(value)); - } else if (key == "num_parallel_workers") { + if (key == "num_parallel_workers") { (void)builder->SetNumWorkers(ToInt(value)); } else if (key == "sampler") { auto create = py::reinterpret_borrow(value).attr("create"); @@ -1039,10 +1053,12 @@ Status DEPipeline::ParseRandomDataOp(const py::dict &args, std::shared_ptr std::string key = py::str(arg.first); py::handle value = arg.second; if (!value.is_none()) { - if (key == "num_samples") { - (void)builder->SetNumSamples(ToInt(value)); - } else if (key == "num_parallel_workers") { + if (key == "num_parallel_workers") { (void)builder->SetNumWorkers(ToInt(value)); } else if (key == "sampler") { auto create = py::reinterpret_borrow(value).attr("create"); @@ -1121,8 +1135,6 @@ Status DEPipeline::ParseCelebAOp(const py::dict &args, std::shared_ptrSetDecode(ToBool(value)); } else if (key == "extensions") { (void)builder->SetExtensions(ToStringSet(value)); - } else if (key == "num_samples") { - (void)builder->SetNumSamples(ToInt(value)); } else if (key == "dataset_type") { (void)builder->SetDatasetType(ToString(value)); } @@ -1152,8 +1164,10 @@ Status DEPipeline::ParseTextFileOp(const py::dict &args, std::shared_ptrSetNumWorkers(ToInt(value)); } else if (key == "shuffle_files") { (void)builder->SetShuffleFiles(ToBool(value)); + } else if (key == "shuffle_global") { + (void)builder->SetShuffleGlobal(ToBool(value)); } else if (key == "num_samples") { - (void)builder->SetNumSamples(ToInt(value)); + (void)builder->SetTotalRows(ToInt(value)); } else if (key == "num_shards") { (void)builder->SetNumDevices(ToInt(value)); } else if (key == "shard_id") { @@ -1166,5 +1180,106 @@ Status DEPipeline::ParseTextFileOp(const py::dict &args, std::shared_ptr(value)) { + if (!p.second.is_none()) { + auto tp = py::reinterpret_borrow(p.second); + CHECK_FAIL_RETURN_UNEXPECTED(tp.size() == 2, "tuple in pad_info must be (list,int) or (list,float)"); + TensorShape shape = tp[0].is_none() ? TensorShape::CreateUnknownRankShape() : TensorShape(tp[0]); + std::shared_ptr pad_val = nullptr; + if (py::isinstance(tp[1])) { + std::string pad_val_string = tp[1].is_none() ? "" : ToString(tp[1]); + CHECK_FAIL_RETURN_UNEXPECTED( + Tensor::CreateTensor(&pad_val, std::vector{pad_val_string}, TensorShape::CreateScalar()), + "Cannot create pad_value Tensor"); + } else { + float pad_val_float = tp[1].is_none() ? 0 : ToFloat(tp[1]); + CHECK_FAIL_RETURN_UNEXPECTED(Tensor::CreateTensor(&pad_val, TensorImpl::kFlexible, TensorShape::CreateScalar(), + DataType(DataType::DE_FLOAT32)), + "Cannot create pad_value Tensor"); + pad_val->SetItemAt({}, pad_val_float); + } + (void)pad_info->insert({ToString(p.first), {shape, pad_val}}); + } else { // tuple is None + (void)pad_info->insert({ToString(p.first), {TensorShape({}), nullptr}}); + } + } + return Status::OK(); +} + +Status DEPipeline::ParseBuildVocabOp(const py::dict &args, std::shared_ptr *ptr) { + std::shared_ptr builder = std::make_shared(); + for (auto arg : args) { + std::string key = py::str(arg.first); + py::handle value = arg.second; + if (!value.is_none()) { + if (key == "freq_range") { + py::tuple tp = py::reinterpret_borrow(value); + if (!tp[0].is_none()) (void)builder->SetMinFreq(py::reinterpret_borrow(tp[0])); + if (!tp[1].is_none()) (void)builder->SetMaxFreq(py::reinterpret_borrow(tp[1])); + } else if (key == "top_k") { + builder->SetTopK(py::reinterpret_borrow(value)); + } else if (key == "columns") { + (void)builder->SetColumnNames(ToStringVector(value)); + } else if (key == "vocab") { + (void)builder->SetVocab(value.cast>()); + } else if (key == "num_parallel_workers") { + (void)builder->SetNumWorkers(ToInt(value)); + } else if (key == "special_first") { + (void)builder->SetSpecialFirst(ToBool(value)); + } else if (key == "special_tokens") { + (void)builder->SetSpecialTokens(ToStringVector(value)); + } + } + } + std::shared_ptr op; + RETURN_IF_NOT_OK(builder->Build(&op)); + *ptr = op; + return Status::OK(); +} + +Status DEPipeline::ParseClueOp(const py::dict &args, std::shared_ptr *ptr) { + std::shared_ptr builder = std::make_shared(); + if (!args["dataset_files"].is_none()) { + (void)builder->SetClueFilesList(ToStringVector(args["dataset_files"])); + } else { + RETURN_STATUS_UNEXPECTED("Error: dataset_files is missing"); + } + // Optional arguments + for (auto arg : args) { + std::string key = py::str(arg.first); + py::handle value = arg.second; + if (!value.is_none()) { + if (key == "num_parallel_workers") { + (void)builder->SetNumWorkers(ToInt(value)); + } else if (key == "shuffle_files") { + (void)builder->SetShuffleFiles(ToBool(value)); + } else if (key == "shuffle_global") { + (void)builder->SetShuffleGlobal(ToBool(value)); + } else if (key == "num_samples") { + (void)builder->SetNumSamples(ToInt(value)); + } else if (key == "num_shards") { + (void)builder->SetNumDevices(ToInt(value)); + } else if (key == "shard_id") { + (void)builder->SetDeviceId(ToInt(value)); + } else if (key == "cols_to_keyword") { + std::map map_dict; + for (auto p : py::reinterpret_borrow(value)) { + if (!p.second.is_none()) { + map_dict.insert({ToString(p.first), ToString(p.second)}); + } else { + map_dict.insert({ToString(p.first), ToString(p.first)}); + } + } + (void)builder->SetColsKeyMap(map_dict); + } + } + } + std::shared_ptr op; + RETURN_IF_NOT_OK(builder->Build(&op)); + *ptr = op; + return Status::OK(); +} } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/dataset/api/de_pipeline.h b/mindspore/ccsrc/dataset/api/de_pipeline.h index 4ecfb080c1..d6127d5d44 100644 --- a/mindspore/ccsrc/dataset/api/de_pipeline.h +++ b/mindspore/ccsrc/dataset/api/de_pipeline.h @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -36,10 +37,10 @@ using DsOpPtr = std::shared_ptr; // enum for the dataset operator names enum OpName { - kStorage = 0, kShuffle, kMindrecord, kBatch, + kBucketBatch, kBarrier, kCache, kRepeat, @@ -58,11 +59,14 @@ enum OpName { kMnist, kManifest, kVoc, + kCoco, kCifar10, kCifar100, kCelebA, kRandomData, - kTextFile + kTextFile, + kBuildVocab, + kClue }; // The C++ binder class that we expose to the python script. @@ -100,14 +104,14 @@ class DEPipeline { int GetRepeatCount() const; - Status ParseStorageOp(const py::dict &args, std::shared_ptr *ptr); - Status ParseShuffleOp(const py::dict &args, std::shared_ptr *ptr); - Status CheckMindRecordPartitionInfo(const py::dict &args, std::vector *ptr); - Status ParseMindRecordOp(const py::dict &args, std::shared_ptr *ptr); + Status BuildMindrecordSamplerChain(const py::handle &handle, + std::vector> *operators, + int num_padded); + Status ParseMapOp(const py::dict &args, std::shared_ptr *ptr); Status ParseFilterOp(const py::dict &args, std::shared_ptr *ptr); @@ -118,6 +122,8 @@ class DEPipeline { Status ParseBatchOp(const py::dict &args, std::shared_ptr *ptr); + Status ParseBucketBatchByLengthOp(const py::dict &args, std::shared_ptr *ptr); + Status ParseBarrierOp(const py::dict &args, std::shared_ptr *ptr); Status ParseGeneratorOp(const py::dict &args, std::shared_ptr *ptr); @@ -142,6 +148,8 @@ class DEPipeline { Status ParseVOCOp(const py::dict &args, std::shared_ptr *ptr); + Status ParseCocoOp(const py::dict &args, std::shared_ptr *ptr); + Status ParseCifar10Op(const py::dict &args, std::shared_ptr *ptr); Status ParseCifar100Op(const py::dict &args, std::shared_ptr *ptr); @@ -160,14 +168,17 @@ class DEPipeline { Status ParseTextFileOp(const py::dict &args, std::shared_ptr *ptr); + Status ParseBuildVocabOp(const py::dict &args, std::shared_ptr *ptr); + + Status ParseClueOp(const py::dict &args, std::shared_ptr *ptr); + private: // Execution tree that links the dataset operators. std::shared_ptr tree_; std::unique_ptr iterator_; - // Validate required args passed to storage op. - Status ValidateArgStorageOp(const py::dict &args); + static Status ParsePadInfo(py::handle value, PadInfo *pad_info); int batch_size_; int repeat_num_; diff --git a/mindspore/ccsrc/dataset/api/python_bindings.cc b/mindspore/ccsrc/dataset/api/python_bindings.cc index 55918d8b43..51f2be49d5 100644 --- a/mindspore/ccsrc/dataset/api/python_bindings.cc +++ b/mindspore/ccsrc/dataset/api/python_bindings.cc @@ -16,8 +16,37 @@ #include #include "dataset/api/de_pipeline.h" -#include "dataset/kernels/no_op.h" +#include "dataset/engine/datasetops/source/cifar_op.h" +#include "dataset/engine/datasetops/source/clue_op.h" +#include "dataset/engine/datasetops/source/coco_op.h" +#include "dataset/engine/datasetops/source/image_folder_op.h" +#include "dataset/engine/datasetops/source/io_block.h" +#include "dataset/engine/datasetops/source/manifest_op.h" +#include "dataset/engine/datasetops/source/mindrecord_op.h" +#include "dataset/engine/datasetops/source/mnist_op.h" +#include "dataset/engine/datasetops/source/random_data_op.h" +#include "dataset/engine/datasetops/source/sampler/distributed_sampler.h" +#include "dataset/engine/datasetops/source/sampler/pk_sampler.h" +#include "dataset/engine/datasetops/source/sampler/python_sampler.h" +#include "dataset/engine/datasetops/source/sampler/random_sampler.h" +#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h" +#include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h" +#include "dataset/engine/datasetops/source/sampler/weighted_random_sampler.h" +#include "dataset/engine/datasetops/source/text_file_op.h" +#include "dataset/engine/datasetops/source/tf_reader_op.h" +#include "dataset/engine/datasetops/source/voc_op.h" +#include "dataset/engine/gnn/graph.h" +#include "dataset/engine/jagged_connector.h" +#include "dataset/kernels/data/concatenate_op.h" +#include "dataset/kernels/data/duplicate_op.h" +#include "dataset/kernels/data/fill_op.h" +#include "dataset/kernels/data/mask_op.h" #include "dataset/kernels/data/one_hot_op.h" +#include "dataset/kernels/data/pad_end_op.h" +#include "dataset/kernels/data/slice_op.h" +#include "dataset/kernels/data/to_float16_op.h" +#include "dataset/kernels/data/type_cast_op.h" +#include "dataset/kernels/image/bounding_box_augment_op.h" #include "dataset/kernels/image/center_crop_op.h" #include "dataset/kernels/image/cut_out_op.h" #include "dataset/kernels/image/decode_op.h" @@ -26,51 +55,51 @@ #include "dataset/kernels/image/normalize_op.h" #include "dataset/kernels/image/pad_op.h" #include "dataset/kernels/image/random_color_adjust_op.h" -#include "dataset/kernels/image/random_crop_decode_resize_op.h" #include "dataset/kernels/image/random_crop_and_resize_op.h" +#include "dataset/kernels/image/random_crop_and_resize_with_bbox_op.h" +#include "dataset/kernels/image/random_crop_decode_resize_op.h" #include "dataset/kernels/image/random_crop_op.h" +#include "dataset/kernels/image/random_crop_with_bbox_op.h" +#include "dataset/kernels/image/random_horizontal_flip_bbox_op.h" #include "dataset/kernels/image/random_horizontal_flip_op.h" #include "dataset/kernels/image/random_resize_op.h" #include "dataset/kernels/image/random_rotation_op.h" #include "dataset/kernels/image/random_vertical_flip_op.h" +#include "dataset/kernels/image/random_vertical_flip_with_bbox_op.h" #include "dataset/kernels/image/rescale_op.h" #include "dataset/kernels/image/resize_bilinear_op.h" #include "dataset/kernels/image/resize_op.h" #include "dataset/kernels/image/uniform_aug_op.h" -#include "dataset/kernels/data/type_cast_op.h" -#include "dataset/engine/datasetops/source/cifar_op.h" -#include "dataset/engine/datasetops/source/image_folder_op.h" -#include "dataset/engine/datasetops/source/io_block.h" -#include "dataset/engine/datasetops/source/mnist_op.h" -#include "dataset/engine/datasetops/source/manifest_op.h" -#include "dataset/engine/datasetops/source/mindrecord_op.h" -#include "dataset/engine/datasetops/source/random_data_op.h" -#include "dataset/engine/datasetops/source/sampler/distributed_sampler.h" -#include "dataset/engine/datasetops/source/sampler/pk_sampler.h" -#include "dataset/engine/datasetops/source/sampler/random_sampler.h" -#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h" -#include "dataset/engine/datasetops/source/sampler/subset_sampler.h" -#include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h" -#include "dataset/engine/datasetops/source/sampler/weighted_random_sampler.h" -#include "dataset/engine/datasetops/source/sampler/python_sampler.h" -#include "dataset/engine/datasetops/source/tf_reader_op.h" -#include "dataset/engine/jagged_connector.h" -#include "dataset/engine/datasetops/source/text_file_op.h" -#include "dataset/engine/datasetops/source/voc_op.h" -#include "dataset/engine/gnn/graph.h" -#include "dataset/kernels/data/to_float16_op.h" +#include "dataset/kernels/no_op.h" #include "dataset/text/kernels/jieba_tokenizer_op.h" +#include "dataset/text/kernels/lookup_op.h" +#include "dataset/text/kernels/ngram_op.h" +#include "dataset/text/kernels/to_number_op.h" #include "dataset/text/kernels/unicode_char_tokenizer_op.h" +#include "dataset/text/kernels/wordpiece_tokenizer_op.h" #include "dataset/text/vocab.h" -#include "dataset/text/kernels/lookup_op.h" #include "dataset/util/random.h" +#include "mindrecord/include/shard_distributed_sample.h" #include "mindrecord/include/shard_operator.h" #include "mindrecord/include/shard_pk_sample.h" #include "mindrecord/include/shard_sample.h" +#include "mindrecord/include/shard_sequential_sample.h" +#include "mindspore/ccsrc/dataset/text/kernels/truncate_sequence_pair_op.h" #include "pybind11/pybind11.h" #include "pybind11/stl.h" #include "pybind11/stl_bind.h" +#ifdef ENABLE_ICU4C +#include "dataset/text/kernels/basic_tokenizer_op.h" +#include "dataset/text/kernels/bert_tokenizer_op.h" +#include "dataset/text/kernels/case_fold_op.h" +#include "dataset/text/kernels/normalize_utf8_op.h" +#include "dataset/text/kernels/regex_replace_op.h" +#include "dataset/text/kernels/regex_tokenizer_op.h" +#include "dataset/text/kernels/unicode_script_tokenizer_op.h" +#include "dataset/text/kernels/whitespace_tokenizer_op.h" +#endif + namespace py = pybind11; namespace mindspore { @@ -143,51 +172,49 @@ void bindDatasetOps(py::module *m) { }); (void)py::class_>(*m, "CifarOp") - .def_static("get_num_rows", [](const std::string &dir, int64_t numSamples, bool isCifar10) { + .def_static("get_num_rows", [](const std::string &dir, bool isCifar10) { int64_t count = 0; - THROW_IF_ERROR(CifarOp::CountTotalRows(dir, numSamples, isCifar10, &count)); + THROW_IF_ERROR(CifarOp::CountTotalRows(dir, isCifar10, &count)); return count; }); (void)py::class_>(*m, "ImageFolderOp") - .def_static("get_num_rows_and_classes", [](const std::string &path, int64_t numSamples) { + .def_static("get_num_rows_and_classes", [](const std::string &path) { int64_t count = 0, num_classes = 0; - THROW_IF_ERROR( - ImageFolderOp::CountRowsAndClasses(path, numSamples, std::set{}, &count, &num_classes)); + THROW_IF_ERROR(ImageFolderOp::CountRowsAndClasses(path, std::set{}, &count, &num_classes)); return py::make_tuple(count, num_classes); }); (void)py::class_>(*m, "MindRecordOp") - .def_static("get_num_rows", - [](const std::vector &paths, bool load_dataset, const py::object &sampler) { - int64_t count = 0; - std::shared_ptr op; - if (py::hasattr(sampler, "_create_for_minddataset")) { - auto create = sampler.attr("_create_for_minddataset"); - op = create().cast>(); - } - THROW_IF_ERROR(MindRecordOp::CountTotalRows(paths, load_dataset, op, &count)); - return count; - }); + .def_static("get_num_rows", [](const std::vector &paths, bool load_dataset, const py::object &sampler, + const int64_t num_padded) { + int64_t count = 0; + std::shared_ptr op; + if (py::hasattr(sampler, "create_for_minddataset")) { + auto create = sampler.attr("create_for_minddataset"); + op = create().cast>(); + } + THROW_IF_ERROR(MindRecordOp::CountTotalRows(paths, load_dataset, op, &count, num_padded)); + return count; + }); (void)py::class_>(*m, "ManifestOp") .def_static("get_num_rows_and_classes", - [](const std::string &file, int64_t numSamples, const py::dict &dict, const std::string &usage) { + [](const std::string &file, const py::dict &dict, const std::string &usage) { int64_t count = 0, num_classes = 0; - THROW_IF_ERROR(ManifestOp::CountTotalRows(file, numSamples, dict, usage, &count, &num_classes)); + THROW_IF_ERROR(ManifestOp::CountTotalRows(file, dict, usage, &count, &num_classes)); return py::make_tuple(count, num_classes); }) - .def_static("get_class_indexing", - [](const std::string &file, int64_t numSamples, const py::dict &dict, const std::string &usage) { - std::map output_class_indexing; - THROW_IF_ERROR(ManifestOp::GetClassIndexing(file, numSamples, dict, usage, &output_class_indexing)); - return output_class_indexing; - }); + .def_static("get_class_indexing", [](const std::string &file, const py::dict &dict, const std::string &usage) { + std::map output_class_indexing; + THROW_IF_ERROR(ManifestOp::GetClassIndexing(file, dict, usage, &output_class_indexing)); + return output_class_indexing; + }); (void)py::class_>(*m, "MnistOp") - .def_static("get_num_rows", [](const std::string &dir, int64_t numSamples) { + .def_static("get_num_rows", [](const std::string &dir) { int64_t count = 0; - THROW_IF_ERROR(MnistOp::CountTotalRows(dir, numSamples, &count)); + THROW_IF_ERROR(MnistOp::CountTotalRows(dir, &count)); return count; }); @@ -201,20 +228,44 @@ void bindDatasetOps(py::module *m) { THROW_IF_ERROR(TextFileOp::CountAllFileRows(filenames, &count)); return count; }); + + (void)py::class_>(*m, "ClueOp") + .def_static("get_num_rows", [](const py::list &files) { + int64_t count = 0; + std::vector filenames; + for (auto file : files) { + file.is_none() ? (void)filenames.emplace_back("") : filenames.push_back(py::str(file)); + } + THROW_IF_ERROR(ClueOp::CountAllFileRows(filenames, &count)); + return count; + }); + (void)py::class_>(*m, "VOCOp") .def_static("get_num_rows", [](const std::string &dir, const std::string &task_type, const std::string &task_mode, const py::dict &dict, int64_t numSamples) { int64_t count = 0; - THROW_IF_ERROR(VOCOp::CountTotalRows(dir, task_type, task_mode, dict, numSamples, &count)); + THROW_IF_ERROR(VOCOp::CountTotalRows(dir, task_type, task_mode, dict, &count)); return count; }) .def_static("get_class_indexing", [](const std::string &dir, const std::string &task_type, - const std::string &task_mode, const py::dict &dict, int64_t numSamples) { + const std::string &task_mode, const py::dict &dict) { std::map output_class_indexing; - THROW_IF_ERROR(VOCOp::GetClassIndexing(dir, task_type, task_mode, dict, numSamples, &output_class_indexing)); + THROW_IF_ERROR(VOCOp::GetClassIndexing(dir, task_type, task_mode, dict, &output_class_indexing)); return output_class_indexing; }); + (void)py::class_>(*m, "CocoOp") + .def_static("get_class_indexing", + [](const std::string &dir, const std::string &file, const std::string &task) { + std::vector>> output_class_indexing; + THROW_IF_ERROR(CocoOp::GetClassIndexing(dir, file, task, &output_class_indexing)); + return output_class_indexing; + }) + .def_static("get_num_rows", [](const std::string &dir, const std::string &file, const std::string &task) { + int64_t count = 0; + THROW_IF_ERROR(CocoOp::CountTotalRows(dir, file, task, &count)); + return count; + }); } void bindTensor(py::module *m) { (void)py::class_(*m, "GlobalContext") @@ -227,12 +278,14 @@ void bindTensor(py::module *m) { .def("set_worker_connector_size", &ConfigManager::set_worker_connector_size) .def("set_op_connector_size", &ConfigManager::set_op_connector_size) .def("set_seed", &ConfigManager::set_seed) + .def("set_monitor_sampling_interval", &ConfigManager::set_monitor_sampling_interval) .def("get_rows_per_buffer", &ConfigManager::rows_per_buffer) .def("get_num_parallel_workers", &ConfigManager::num_parallel_workers) .def("get_worker_connector_size", &ConfigManager::worker_connector_size) .def("get_op_connector_size", &ConfigManager::op_connector_size) .def("get_seed", &ConfigManager::seed) - .def("load", [](ConfigManager &c, std::string s) { (void)c.LoadFile(s); }); + .def("get_monitor_sampling_interval", &ConfigManager::monitor_sampling_interval) + .def("load", [](ConfigManager &c, std::string s) { THROW_IF_ERROR(c.LoadFile(s)); }); (void)py::class_>(*m, "Tensor", py::buffer_protocol()) .def(py::init([](py::array arr) { @@ -300,6 +353,11 @@ void bindTensorOps1(py::module *m) { .def(py::init>, int32_t>(), py::arg("operations"), py::arg("NumOps") = UniformAugOp::kDefNumOps); + (void)py::class_>( + *m, "BoundingBoxAugmentOp", "Tensor operation to apply a transformation on a random choice of bounding boxes.") + .def(py::init, float>(), py::arg("transform"), + py::arg("ratio") = BoundingBoxAugmentOp::kDefRatio); + (void)py::class_>( *m, "ResizeBilinearOp", "Tensor operation to resize an image using " @@ -314,6 +372,11 @@ void bindTensorOps1(py::module *m) { (void)py::class_>( *m, "RandomHorizontalFlipOp", "Tensor operation to randomly flip an image horizontally.") .def(py::init(), py::arg("probability") = RandomHorizontalFlipOp::kDefProbability); + + (void)py::class_>( + *m, "RandomHorizontalFlipWithBBoxOp", + "Tensor operation to randomly flip an image horizontally, while flipping bounding boxes.") + .def(py::init(), py::arg("probability") = RandomHorizontalFlipWithBBoxOp::kDefProbability); } void bindTensorOps2(py::module *m) { @@ -321,6 +384,12 @@ void bindTensorOps2(py::module *m) { *m, "RandomVerticalFlipOp", "Tensor operation to randomly flip an image vertically.") .def(py::init(), py::arg("probability") = RandomVerticalFlipOp::kDefProbability); + (void)py::class_>( + *m, "RandomVerticalFlipWithBBoxOp", + "Tensor operation to randomly flip an image vertically" + " and adjust bounding boxes.") + .def(py::init(), py::arg("probability") = RandomVerticalFlipWithBBoxOp::kDefProbability); + (void)py::class_>(*m, "RandomCropOp", "Gives random crop of specified size " "Takes crop size") @@ -332,10 +401,84 @@ void bindTensorOps2(py::module *m) { py::arg("fillG") = RandomCropOp::kDefFillG, py::arg("fillB") = RandomCropOp::kDefFillB); (void)py::class_>(*m, "ChannelSwapOp").def(py::init<>()); + (void)py::class_>(*m, "RandomCropWithBBoxOp", + "Gives random crop of given " + "size + adjusts bboxes " + "Takes crop size") + .def(py::init(), + py::arg("cropHeight"), py::arg("cropWidth"), py::arg("padTop") = RandomCropWithBBoxOp::kDefPadTop, + py::arg("padBottom") = RandomCropWithBBoxOp::kDefPadBottom, + py::arg("padLeft") = RandomCropWithBBoxOp::kDefPadLeft, + py::arg("padRight") = RandomCropWithBBoxOp::kDefPadRight, + py::arg("borderType") = RandomCropWithBBoxOp::kDefBorderType, + py::arg("padIfNeeded") = RandomCropWithBBoxOp::kDefPadIfNeeded, + py::arg("fillR") = RandomCropWithBBoxOp::kDefFillR, py::arg("fillG") = RandomCropWithBBoxOp::kDefFillG, + py::arg("fillB") = RandomCropWithBBoxOp::kDefFillB); + (void)py::class_>( *m, "OneHotOp", "Tensor operation to apply one hot encoding. Takes number of classes.") .def(py::init()); + (void)py::class_>( + *m, "FillOp", "Tensor operation to return tensor filled with same value as input fill value.") + .def(py::init>()); + + (void)py::class_>(*m, "SliceOp", "Tensor slice operation.") + .def(py::init()) + .def(py::init([](const py::list &py_list) { + std::vector c_list; + for (auto l : py_list) { + if (!l.is_none()) { + c_list.push_back(py::reinterpret_borrow(l)); + } + } + return std::make_shared(c_list); + })) + .def(py::init([](const py::tuple &py_slice) { + if (py_slice.size() != 3) { + THROW_IF_ERROR(Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Wrong slice object")); + } + Slice c_slice; + if (!py_slice[0].is_none() && !py_slice[1].is_none() && !py_slice[2].is_none()) { + c_slice = Slice(py::reinterpret_borrow(py_slice[0]), py::reinterpret_borrow(py_slice[1]), + py::reinterpret_borrow(py_slice[2])); + } else if (py_slice[0].is_none() && py_slice[2].is_none()) { + c_slice = Slice(py::reinterpret_borrow(py_slice[1])); + } else if (!py_slice[0].is_none() && !py_slice[1].is_none()) { + c_slice = Slice(py::reinterpret_borrow(py_slice[0]), py::reinterpret_borrow(py_slice[1])); + } + + if (!c_slice.valid()) { + THROW_IF_ERROR(Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Wrong slice object")); + } + return std::make_shared(c_slice); + })); + + (void)py::enum_(*m, "RelationalOp", py::arithmetic()) + .value("EQ", RelationalOp::kEqual) + .value("NE", RelationalOp::kNotEqual) + .value("LT", RelationalOp::kLess) + .value("LE", RelationalOp::kLessEqual) + .value("GT", RelationalOp::kGreater) + .value("GE", RelationalOp::kGreaterEqual) + .export_values(); + + (void)py::class_>(*m, "MaskOp", + "Tensor mask operation using relational comparator") + .def(py::init, DataType>()); + + (void)py::class_>(*m, "DuplicateOp", "Duplicate tensor.") + .def(py::init<>()); + + (void)py::class_>( + *m, "TruncateSequencePairOp", "Tensor operation to truncate two tensors to a max_length") + .def(py::init()); + + (void)py::class_>(*m, "ConcatenateOp", + "Tensor operation concatenate tensors.") + .def(py::init, std::shared_ptr>(), py::arg("axis"), + py::arg("prepend").none(true), py::arg("append").none(true)); + (void)py::class_>( *m, "RandomRotationOp", "Tensor operation to apply RandomRotation." @@ -347,6 +490,10 @@ void bindTensorOps2(py::module *m) { py::arg("interpolation") = RandomRotationOp::kDefInterpolation, py::arg("expand") = RandomRotationOp::kDefExpand, py::arg("fillR") = RandomRotationOp::kDefFillR, py::arg("fillG") = RandomRotationOp::kDefFillG, py::arg("fillB") = RandomRotationOp::kDefFillB); + + (void)py::class_>( + *m, "PadEndOp", "Tensor operation to pad end of tensor with a pad value.") + .def(py::init>()); } void bindTensorOps3(py::module *m) { @@ -364,6 +511,20 @@ void bindTensorOps3(py::module *m) { py::arg("interpolation") = RandomCropAndResizeOp::kDefInterpolation, py::arg("maxIter") = RandomCropAndResizeOp::kDefMaxIter); + (void)py::class_>( + *m, "RandomCropAndResizeWithBBoxOp", + "Tensor operation to randomly crop an image (with BBoxes) and resize to a given size." + "Takes output height and width and" + "optional parameters for lower and upper bound for aspect ratio (h/w) and scale," + "interpolation mode, and max attempts to crop") + .def(py::init(), py::arg("targetHeight"), + py::arg("targetWidth"), py::arg("scaleLb") = RandomCropAndResizeWithBBoxOp::kDefScaleLb, + py::arg("scaleUb") = RandomCropAndResizeWithBBoxOp::kDefScaleUb, + py::arg("aspectLb") = RandomCropAndResizeWithBBoxOp::kDefAspectLb, + py::arg("aspectUb") = RandomCropAndResizeWithBBoxOp::kDefAspectUb, + py::arg("interpolation") = RandomCropAndResizeWithBBoxOp::kDefInterpolation, + py::arg("maxIter") = RandomCropAndResizeWithBBoxOp::kDefMaxIter); + (void)py::class_>( *m, "RandomColorAdjustOp", "Tensor operation to adjust an image's color randomly." @@ -418,9 +579,13 @@ void bindTensorOps4(py::module *m) { .def(py::init(), py::arg("padTop"), py::arg("padBottom"), py::arg("padLeft"), py::arg("padRight"), py::arg("borderTypes") = PadOp::kDefBorderType, py::arg("fillR") = PadOp::kDefFillR, py::arg("fillG") = PadOp::kDefFillG, py::arg("fillB") = PadOp::kDefFillB); + (void)py::class_>(*m, "ToNumberOp", + "TensorOp to convert strings to numbers.") + .def(py::init(), py::arg("data_type")) + .def(py::init(), py::arg("data_type")); } -void bindTensorOps5(py::module *m) { +void bindTokenizerOps(py::module *m) { (void)py::class_>(*m, "JiebaTokenizerOp", "") .def(py::init(), py::arg("hmm_path"), py::arg("mp_path"), py::arg("mode") = JiebaMode::kMix) @@ -433,6 +598,60 @@ void bindTensorOps5(py::module *m) { "Tensor operation to LookUp each word") .def(py::init, WordIdType>(), py::arg("vocab"), py::arg("unknown")) .def(py::init>(), py::arg("vocab")); + (void)py::class_>(*m, "NgramOp", "TensorOp performs ngram mapping") + .def(py::init &, int32_t, int32_t, const std::string &, const std::string &, + const std::string &>(), + py::arg("ngrams"), py::arg("l_pad_len"), py::arg("r_pad_len"), py::arg("l_pad_token"), py::arg("r_pad_token"), + py::arg("separator")); + (void)py::class_>( + *m, "WordpieceTokenizerOp", "Tokenize scalar token or 1-D tokens to subword tokens.") + .def(py::init &, const std::string &, const int &, const std::string &>(), + py::arg("vocab"), py::arg("suffix_indicator") = std::string(WordpieceTokenizerOp::kDefSuffixIndicator), + py::arg("max_bytes_per_token") = WordpieceTokenizerOp::kDefMaxBytesPerToken, + py::arg("unknown_token") = std::string(WordpieceTokenizerOp::kDefUnknownToken)); +} + +void bindDependIcuTokenizerOps(py::module *m) { +#ifdef ENABLE_ICU4C + (void)py::class_>( + *m, "WhitespaceTokenizerOp", "Tokenize a scalar tensor of UTF-8 string on ICU defined whitespaces.") + .def(py::init<>()); + (void)py::class_>( + *m, "UnicodeScriptTokenizerOp", "Tokenize a scalar tensor of UTF-8 string on Unicode script boundaries.") + .def(py::init<>()) + .def(py::init(), py::arg("keep_whitespace") = UnicodeScriptTokenizerOp::kDefKeepWhitespace); + (void)py::class_>( + *m, "CaseFoldOp", "Apply case fold operation on utf-8 string tensor") + .def(py::init<>()); + (void)py::class_>( + *m, "NormalizeUTF8Op", "Apply normalize operation on utf-8 string tensor.") + .def(py::init<>()) + .def(py::init(), py::arg("normalize_form") = NormalizeUTF8Op::kDefNormalizeForm); + (void)py::class_>( + *m, "RegexReplaceOp", "Replace utf-8 string tensor with 'replace' according to regular expression 'pattern'.") + .def(py::init(), py::arg("pattern"), py::arg("replace"), + py::arg("replace_all")); + (void)py::class_>( + *m, "RegexTokenizerOp", "Tokenize a scalar tensor of UTF-8 string by regex expression pattern.") + .def(py::init(), py::arg("delim_pattern"), py::arg("keep_delim_pattern")); + (void)py::class_>( + *m, "BasicTokenizerOp", "Tokenize a scalar tensor of UTF-8 string by specific rules.") + .def(py::init(), py::arg("lower_case") = BasicTokenizerOp::kDefLowerCase, + py::arg("keep_whitespace") = BasicTokenizerOp::kDefKeepWhitespace, + py::arg("normalization_form") = BasicTokenizerOp::kDefNormalizationForm, + py::arg("preserve_unused_token") = BasicTokenizerOp::kDefPreserveUnusedToken); + (void)py::class_>(*m, "BertTokenizerOp", + "Tokenizer used for Bert text process.") + .def(py::init &, const std::string &, const int &, const std::string &, bool, bool, + NormalizeForm, bool>(), + py::arg("vocab"), py::arg("suffix_indicator") = std::string(WordpieceTokenizerOp::kDefSuffixIndicator), + py::arg("max_bytes_per_token") = WordpieceTokenizerOp::kDefMaxBytesPerToken, + py::arg("unknown_token") = std::string(WordpieceTokenizerOp::kDefUnknownToken), + py::arg("lower_case") = BasicTokenizerOp::kDefLowerCase, + py::arg("keep_whitespace") = BasicTokenizerOp::kDefKeepWhitespace, + py::arg("normalization_form") = BasicTokenizerOp::kDefNormalizationForm, + py::arg("preserve_unused_token") = BasicTokenizerOp::kDefPreserveUnusedToken); +#endif } void bindSamplerOps(py::module *m) { @@ -449,32 +668,29 @@ void bindSamplerOps(py::module *m) { .def("add_child", [](std::shared_ptr self, std::shared_ptr child) { THROW_IF_ERROR(self->AddChild(child)); }); - (void)py::class_>(*m, "ShardOperator"); + (void)py::class_>(*m, "ShardOperator") + .def("add_child", [](std::shared_ptr self, + std::shared_ptr child) { self->SetChildOp(child); }); (void)py::class_>(*m, "DistributedSampler") - .def(py::init(), py::arg("numDev"), py::arg("devId"), py::arg("shuffle"), - py::arg("seed")); + .def(py::init()); (void)py::class_>(*m, "PKSampler") - .def(py::init(), py::arg("kVal"), py::arg("shuffle")); + .def(py::init()); (void)py::class_>(*m, "RandomSampler") - .def(py::init(), py::arg("replacement"), py::arg("reshuffle_each_epoch"), - py::arg("num_samples")) - .def(py::init(), py::arg("replacement"), py::arg("reshuffle_each_epoch")); + .def(py::init()); (void)py::class_>(*m, "SequentialSampler") - .def(py::init<>()); - - (void)py::class_>(*m, "SubsetSampler") - .def(py::init(), py::arg("start_index"), py::arg("subset_size")); + .def(py::init()); (void)py::class_>(*m, "SubsetRandomSampler") - .def(py::init>(), py::arg("indices")); + .def(py::init>()); (void)py::class_>( *m, "MindrecordSubsetRandomSampler") .def(py::init, uint32_t>(), py::arg("indices"), py::arg("seed") = GetSeed()); + (void)py::class_>( *m, "MindrecordPkSampler") .def(py::init([](int64_t kVal, std::string kColumn, bool shuffle) { @@ -486,12 +702,27 @@ void bindSamplerOps(py::module *m) { } })); + (void)py::class_>(*m, "MindrecordDistributedSampler") + .def(py::init()); + + (void)py::class_>( + *m, "MindrecordRandomSampler") + .def(py::init([](int64_t num_samples, bool replacement, bool reshuffle_each_epoch) { + return std::make_shared(GetSeed(), num_samples, replacement, reshuffle_each_epoch); + })); + + (void)py::class_>(*m, "MindrecordSequentialSampler") + .def(py::init([](int num_samples, int start_index) { + return std::make_shared(num_samples, start_index); + })); + (void)py::class_>(*m, "WeightedRandomSampler") - .def(py::init, int64_t, bool>(), py::arg("weights"), py::arg("numSamples"), - py::arg("replacement")); + .def(py::init, bool>()); (void)py::class_>(*m, "PythonSampler") - .def(py::init(), py::arg("pySampler")); + .def(py::init()); } void bindInfoObjects(py::module *m) { @@ -503,16 +734,18 @@ void bindInfoObjects(py::module *m) { void bindVocabObjects(py::module *m) { (void)py::class_>(*m, "Vocab") + .def(py::init<>()) .def_static("from_list", - [](const py::list &words) { + [](const py::list &words, const py::list &special_tokens, bool special_first) { std::shared_ptr v; - THROW_IF_ERROR(Vocab::BuildFromPyList(words, &v)); + THROW_IF_ERROR(Vocab::BuildFromPyList(words, special_tokens, special_first, &v)); return v; }) .def_static("from_file", - [](const std::string &path, const std::string &dlm, int32_t vocab_size) { + [](const std::string &path, const std::string &dlm, int32_t vocab_size, const py::list &special_tokens, + bool special_first) { std::shared_ptr v; - THROW_IF_ERROR(Vocab::BuildFromFile(path, dlm, vocab_size, &v)); + THROW_IF_ERROR(Vocab::BuildFromFile(path, dlm, vocab_size, special_tokens, special_first, &v)); return v; }) .def_static("from_dict", [](const py::dict &words) { @@ -529,10 +762,22 @@ void bindGraphData(py::module *m) { THROW_IF_ERROR(g_out->Init()); return g_out; })) - .def("get_nodes", - [](gnn::Graph &g, gnn::NodeType node_type, gnn::NodeIdType node_num) { + .def("get_all_nodes", + [](gnn::Graph &g, gnn::NodeType node_type) { + std::shared_ptr out; + THROW_IF_ERROR(g.GetAllNodes(node_type, &out)); + return out; + }) + .def("get_all_edges", + [](gnn::Graph &g, gnn::EdgeType edge_type) { std::shared_ptr out; - THROW_IF_ERROR(g.GetNodes(node_type, node_num, &out)); + THROW_IF_ERROR(g.GetAllEdges(edge_type, &out)); + return out; + }) + .def("get_nodes_from_edges", + [](gnn::Graph &g, std::vector edge_list) { + std::shared_ptr out; + THROW_IF_ERROR(g.GetNodesFromEdges(edge_list, &out)); return out; }) .def("get_all_neighbors", @@ -541,12 +786,38 @@ void bindGraphData(py::module *m) { THROW_IF_ERROR(g.GetAllNeighbors(node_list, neighbor_type, &out)); return out; }) + .def("get_sampled_neighbors", + [](gnn::Graph &g, std::vector node_list, std::vector neighbor_nums, + std::vector neighbor_types) { + std::shared_ptr out; + THROW_IF_ERROR(g.GetSampledNeighbors(node_list, neighbor_nums, neighbor_types, &out)); + return out; + }) + .def("get_neg_sampled_neighbors", + [](gnn::Graph &g, std::vector node_list, gnn::NodeIdType neighbor_num, + gnn::NodeType neg_neighbor_type) { + std::shared_ptr out; + THROW_IF_ERROR(g.GetNegSampledNeighbors(node_list, neighbor_num, neg_neighbor_type, &out)); + return out; + }) .def("get_node_feature", [](gnn::Graph &g, std::shared_ptr node_list, std::vector feature_types) { TensorRow out; THROW_IF_ERROR(g.GetNodeFeature(node_list, feature_types, &out)); + return out.getRow(); + }) + .def("graph_info", + [](gnn::Graph &g) { + py::dict out; + THROW_IF_ERROR(g.GraphInfo(&out)); return out; - }); + }) + .def("random_walk", [](gnn::Graph &g, std::vector node_list, std::vector meta_path, + float step_home_param, float step_away_param, gnn::NodeIdType default_node) { + std::shared_ptr out; + THROW_IF_ERROR(g.RandomWalk(node_list, meta_path, step_home_param, step_away_param, default_node, &out)); + return out; + }); } // This is where we externalize the C logic as python modules @@ -555,9 +826,9 @@ PYBIND11_MODULE(_c_dataengine, m) { (void)py::class_>(m, "DatasetOp"); (void)py::enum_(m, "OpName", py::arithmetic()) - .value("STORAGE", OpName::kStorage) .value("SHUFFLE", OpName::kShuffle) .value("BATCH", OpName::kBatch) + .value("BUCKETBATCH", OpName::kBucketBatch) .value("BARRIER", OpName::kBarrier) .value("MINDRECORD", OpName::kMindrecord) .value("CACHE", OpName::kCache) @@ -578,11 +849,14 @@ PYBIND11_MODULE(_c_dataengine, m) { .value("MNIST", OpName::kMnist) .value("MANIFEST", OpName::kManifest) .value("VOC", OpName::kVoc) + .value("COCO", OpName::kCoco) .value("CIFAR10", OpName::kCifar10) .value("CIFAR100", OpName::kCifar100) .value("RANDOMDATA", OpName::kRandomData) + .value("BUILDVOCAB", OpName::kBuildVocab) .value("CELEBA", OpName::kCelebA) - .value("TEXTFILE", OpName::kTextFile); + .value("TEXTFILE", OpName::kTextFile) + .value("CLUE", OpName::kClue); (void)py::enum_(m, "JiebaMode", py::arithmetic()) .value("DE_JIEBA_MIX", JiebaMode::kMix) @@ -590,6 +864,16 @@ PYBIND11_MODULE(_c_dataengine, m) { .value("DE_JIEBA_HMM", JiebaMode::kHmm) .export_values(); +#ifdef ENABLE_ICU4C + (void)py::enum_(m, "NormalizeForm", py::arithmetic()) + .value("DE_NORMALIZE_NONE", NormalizeForm::kNone) + .value("DE_NORMALIZE_NFC", NormalizeForm::kNfc) + .value("DE_NORMALIZE_NFKC", NormalizeForm::kNfkc) + .value("DE_NORMALIZE_NFD", NormalizeForm::kNfd) + .value("DE_NORMALIZE_NFKD", NormalizeForm::kNfkd) + .export_values(); +#endif + (void)py::enum_(m, "InterpolationMode", py::arithmetic()) .value("DE_INTER_LINEAR", InterpolationMode::kLinear) .value("DE_INTER_CUBIC", InterpolationMode::kCubic) @@ -609,12 +893,13 @@ PYBIND11_MODULE(_c_dataengine, m) { bindTensorOps2(&m); bindTensorOps3(&m); bindTensorOps4(&m); - bindTensorOps5(&m); + bindTokenizerOps(&m); bindSamplerOps(&m); bindDatasetOps(&m); bindInfoObjects(&m); bindVocabObjects(&m); bindGraphData(&m); + bindDependIcuTokenizerOps(&m); } } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/dataset/core/CMakeLists.txt b/mindspore/ccsrc/dataset/core/CMakeLists.txt index 0b9f08d070..27b9f0e13b 100644 --- a/mindspore/ccsrc/dataset/core/CMakeLists.txt +++ b/mindspore/ccsrc/dataset/core/CMakeLists.txt @@ -11,6 +11,7 @@ add_library(core OBJECT data_type.cc global_context.cc tensor.cc + tensor_row.cc tensor_shape.cc ) add_dependencies(core mindspore::protobuf) diff --git a/mindspore/ccsrc/dataset/core/client.h b/mindspore/ccsrc/dataset/core/client.h index aa5e85f7de..a10cb4596e 100644 --- a/mindspore/ccsrc/dataset/core/client.h +++ b/mindspore/ccsrc/dataset/core/client.h @@ -27,6 +27,7 @@ #include "dataset/engine/dataset_iterator.h" #include "dataset/engine/datasetops/barrier_op.h" #include "dataset/engine/datasetops/batch_op.h" +#include "dataset/engine/datasetops/build_vocab_op.h" #include "dataset/engine/datasetops/dataset_op.h" #include "dataset/engine/datasetops/device_queue_op.h" #include "dataset/engine/datasetops/map_op.h" @@ -38,7 +39,6 @@ #include "dataset/engine/datasetops/shuffle_op.h" #include "dataset/engine/datasetops/source/generator_op.h" #include "dataset/engine/datasetops/source/mindrecord_op.h" -#include "dataset/engine/datasetops/source/storage_op.h" #include "dataset/engine/datasetops/source/tf_reader_op.h" #include "dataset/engine/datasetops/take_op.h" #include "dataset/engine/datasetops/zip_op.h" diff --git a/mindspore/ccsrc/dataset/core/config_manager.cc b/mindspore/ccsrc/dataset/core/config_manager.cc index 3f659555f4..a489b4a4ce 100644 --- a/mindspore/ccsrc/dataset/core/config_manager.cc +++ b/mindspore/ccsrc/dataset/core/config_manager.cc @@ -48,7 +48,7 @@ Status ConfigManager::FromJson(const nlohmann::json &j) { Status ConfigManager::LoadFile(const std::string &settingsFile) { Status rc; if (!Path(settingsFile).Exists()) { - RETURN_STATUS_UNEXPECTED("File is not found"); + RETURN_STATUS_UNEXPECTED("File is not found."); } // Some settings are mandatory, others are not (with default). If a setting // is optional it will set a default value if the config is missing from the file. @@ -59,14 +59,11 @@ Status ConfigManager::LoadFile(const std::string &settingsFile) { rc = FromJson(js); } catch (const nlohmann::json::type_error &e) { std::ostringstream ss; - ss << "Client settings failed to load:\n" << e.what(); + ss << "Client file failed to load:\n" << e.what(); std::string err_msg = ss.str(); RETURN_STATUS_UNEXPECTED(err_msg); } catch (const std::exception &err) { - std::ostringstream ss; - ss << "Client settings failed to load:\n" << err.what(); - std::string err_msg = ss.str(); - RETURN_STATUS_UNEXPECTED(err_msg); + RETURN_STATUS_UNEXPECTED("Client file failed to load."); } return rc; } @@ -88,5 +85,7 @@ void ConfigManager::set_op_connector_size(int32_t connector_size) { op_connector uint32_t ConfigManager::seed() const { return seed_; } void ConfigManager::set_seed(uint32_t seed) { seed_ = seed; } + +void ConfigManager::set_monitor_sampling_interval(uint32_t interval) { monitor_sampling_interval_ = interval; } } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/dataset/core/config_manager.h b/mindspore/ccsrc/dataset/core/config_manager.h index 654d5f930c..807591daa1 100644 --- a/mindspore/ccsrc/dataset/core/config_manager.h +++ b/mindspore/ccsrc/dataset/core/config_manager.h @@ -111,12 +111,21 @@ class ConfigManager { // @param seed - The default seed to use void set_seed(uint32_t seed); + // setter function + // @param interval - The setting to apply to the config + void set_monitor_sampling_interval(uint32_t interval); + + // getter function + // @return The iterval of monitor sampling + int32_t monitor_sampling_interval() const { return monitor_sampling_interval_; } + private: int32_t rows_per_buffer_{kCfgRowsPerBuffer}; int32_t num_parallel_workers_{kCfgParallelWorkers}; int32_t worker_connector_size_{kCfgWorkerConnectorSize}; int32_t op_connector_size_{kCfgOpConnectorSize}; uint32_t seed_{kCfgDefaultSeed}; + uint32_t monitor_sampling_interval_{kCfgMonitorSamplingInterval}; // Private helper function that taks a nlohmann json format and populates the settings // @param j - The json nlohmann json info diff --git a/mindspore/ccsrc/dataset/core/constants.h b/mindspore/ccsrc/dataset/core/constants.h index 9c0e24acc6..34d2f2583c 100644 --- a/mindspore/ccsrc/dataset/core/constants.h +++ b/mindspore/ccsrc/dataset/core/constants.h @@ -47,9 +47,13 @@ constexpr uint32_t kCfgParallelWorkers = 4; constexpr uint32_t kCfgWorkerConnectorSize = 16; constexpr uint32_t kCfgOpConnectorSize = 16; constexpr uint32_t kCfgDefaultSeed = std::mt19937::default_seed; +constexpr uint32_t kCfgMonitorSamplingInterval = 10; // Invalid OpenCV type should not be from 0 to 7 (opencv4/opencv2/core/hal/interface.h) constexpr uint8_t kCVInvalidType = 255; + +using connection_id_type = int64_t; +using row_id_type = int64_t; } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/dataset/core/data_type.cc b/mindspore/ccsrc/dataset/core/data_type.cc index 744c8c1ca0..71a510d88f 100644 --- a/mindspore/ccsrc/dataset/core/data_type.cc +++ b/mindspore/ccsrc/dataset/core/data_type.cc @@ -138,7 +138,7 @@ DataType DataType::FromNpArray(const py::array &arr) { return DataType(DataType::DE_FLOAT32); } else if (py::isinstance>(arr)) { return DataType(DataType::DE_FLOAT64); - } else if (arr.dtype().kind() == 'S') { + } else if (arr.dtype().kind() == 'S' || arr.dtype().kind() == 'U') { return DataType(DataType::DE_STRING); } else { MS_LOG(ERROR) << "Cannot convert from numpy type. Unknown data type is returned!"; diff --git a/mindspore/ccsrc/dataset/core/data_type.h b/mindspore/ccsrc/dataset/core/data_type.h index f1f0bb2ebb..a487f3300e 100644 --- a/mindspore/ccsrc/dataset/core/data_type.h +++ b/mindspore/ccsrc/dataset/core/data_type.h @@ -128,7 +128,9 @@ class DataType { // @tparam T // @return true or false template - bool IsCompatible() const; + bool IsCompatible() const { + return type_ == FromCType(); + } // returns true if the template type is the same as the Tensor type_ // @tparam T @@ -146,6 +148,9 @@ class DataType { return out; } + template + static DataType FromCType(); + // Convert from DataType to Pybind type // @return py::dtype AsNumpyType() const; @@ -191,68 +196,68 @@ class DataType { }; template <> -inline bool DataType::IsCompatible() const { - return type_ == DataType::DE_BOOL; +inline DataType DataType::FromCType() { + return DataType(DataType::DE_BOOL); } template <> -inline bool DataType::IsCompatible() const { - return type_ == DataType::DE_FLOAT64; +inline DataType DataType::FromCType() { + return DataType(DataType::DE_FLOAT64); } template <> -inline bool DataType::IsCompatible() const { - return type_ == DataType::DE_FLOAT32; +inline DataType DataType::FromCType() { + return DataType(DataType::DE_FLOAT32); } template <> -inline bool DataType::IsCompatible() const { - return type_ == DataType::DE_FLOAT16; +inline DataType DataType::FromCType() { + return DataType(DataType::DE_FLOAT16); } template <> -inline bool DataType::IsCompatible() const { - return type_ == DataType::DE_INT64; +inline DataType DataType::FromCType() { + return DataType(DataType::DE_INT64); } template <> -inline bool DataType::IsCompatible() const { - return type_ == DataType::DE_UINT64; +inline DataType DataType::FromCType() { + return DataType(DataType::DE_UINT64); } template <> -inline bool DataType::IsCompatible() const { - return type_ == DataType::DE_INT32; +inline DataType DataType::FromCType() { + return DataType(DataType::DE_INT32); } template <> -inline bool DataType::IsCompatible() const { - return type_ == DataType::DE_UINT32; +inline DataType DataType::FromCType() { + return DataType(DataType::DE_UINT32); } template <> -inline bool DataType::IsCompatible() const { - return type_ == DataType::DE_INT16; +inline DataType DataType::FromCType() { + return DataType(DataType::DE_INT16); } template <> -inline bool DataType::IsCompatible() const { - return type_ == DataType::DE_UINT16; +inline DataType DataType::FromCType() { + return DataType(DataType::DE_UINT16); } template <> -inline bool DataType::IsCompatible() const { - return type_ == DataType::DE_INT8; +inline DataType DataType::FromCType() { + return DataType(DataType::DE_INT8); } template <> -inline bool DataType::IsCompatible() const { - return type_ == DataType::DE_UINT8; +inline DataType DataType::FromCType() { + return DataType(DataType::DE_UINT8); } template <> -inline bool DataType::IsCompatible() const { - return type_ == DataType::DE_STRING; +inline DataType DataType::FromCType() { + return DataType(DataType::DE_STRING); } template <> diff --git a/mindspore/ccsrc/dataset/core/tensor.cc b/mindspore/ccsrc/dataset/core/tensor.cc index c986e07089..abab8cf3f4 100644 --- a/mindspore/ccsrc/dataset/core/tensor.cc +++ b/mindspore/ccsrc/dataset/core/tensor.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -229,7 +230,12 @@ Status Tensor::CreateTensorFromNumpyString(std::shared_ptr *ptr, py::arr } arr.resize({arr.size()}); // flatten the py::array so we can iterate once std::vector strings; - std::for_each(arr.begin(), arr.end(), [&strings](const auto &s) { strings.emplace_back(py::cast(s)); }); + + if (arr.dtype().kind() == 'U') { + std::for_each(arr.begin(), arr.end(), [&strings](const auto &s) { strings.emplace_back(py::cast(s)); }); + } else { + std::for_each(arr.begin(), arr.end(), [&strings](const auto &s) { strings.emplace_back(py::cast(s)); }); + } arr.resize(shape); // resize arr back to the original shape @@ -306,6 +312,50 @@ Status Tensor::CreateTensor(std::shared_ptr *ptr, const dataengine::Byte return Status::OK(); } +Status Tensor::CreateTensor(std::shared_ptr *ptr, const std::string &file_path) { + std::ifstream fs; + fs.open(file_path, std::ios::binary | std::ios::in); + CHECK_FAIL_RETURN_UNEXPECTED(!fs.fail(), "Fail to open file: " + file_path); + int64_t num_bytes = fs.seekg(0, std::ios::end).tellg(); + CHECK_FAIL_RETURN_UNEXPECTED(fs.seekg(0, std::ios::beg).good(), "Fail to find size of file"); + RETURN_IF_NOT_OK( + Tensor::CreateTensor(ptr, TensorImpl::kFlexible, TensorShape{num_bytes}, DataType(DataType::DE_UINT8))); + int64_t written_bytes = fs.read(reinterpret_cast((*ptr)->GetMutableBuffer()), num_bytes).gcount(); + CHECK_FAIL_RETURN_UNEXPECTED(written_bytes == num_bytes && fs.good(), "Error in writing to tensor"); + fs.close(); + return Status::OK(); +} + +Status Tensor::CreateTensor(std::shared_ptr *ptr, const dataengine::BytesList &bytes_list, + const TensorShape &shape, const DataType &type, dsize_t pad_size) { + RETURN_IF_NOT_OK(Tensor::CreateTensor(ptr, TensorImpl::kFlexible, shape, type)); + + unsigned char *current_tensor_addr = (*ptr)->GetMutableBuffer(); + int64_t tensor_bytes_remaining = bytes_list.value_size() * pad_size; + + for (int i = 0; i < bytes_list.value_size(); i++) { + // read string data into tensor + const std::string ¤t_element = bytes_list.value(i); + int return_code = + memcpy_s(current_tensor_addr, tensor_bytes_remaining, common::SafeCStr(current_element), current_element.size()); + + CHECK_FAIL_RETURN_UNEXPECTED(return_code == 0, "memcpy_s failed when reading bytesList element into Tensor"); + + current_tensor_addr += current_element.size(); + tensor_bytes_remaining -= current_element.size(); + + // pad + int64_t chars_to_pad = pad_size - current_element.size(); + return_code = memset_s(current_tensor_addr, tensor_bytes_remaining, static_cast(' '), chars_to_pad); + CHECK_FAIL_RETURN_UNEXPECTED(return_code == 0, "memcpy_s failed when padding Tensor"); + + current_tensor_addr += chars_to_pad; + tensor_bytes_remaining -= chars_to_pad; + } + + return Status::OK(); +} + // Memcpy the given strided array's used part to consecutive memory // Consider a 3-d array // A[(i * shape[1] + j) * shape[2] + k] = B[i][j][k] = C[i * strides[0] + j * strides[1] + k * strides[2]] @@ -539,11 +589,13 @@ Status Tensor::StartAddrOfIndex(std::vector ind, uchar **start_addr_of_ if (type() == DataType::DE_STRING) { RETURN_STATUS_UNEXPECTED("StartAddrOfIndex does not support string tensors yet."); } + dsize_t flat_ind; std::vector t_shape = shape().AsVector(); std::vector r(t_shape.begin() + ind.size(), t_shape.end()); *remaining = TensorShape(r); ind.resize(this->Rank(), 0); // same as -> while (ind.size() < this->Rank()) ind.push_back(0); + RETURN_IF_NOT_OK(shape_.ToFlatIndex(ind, &flat_ind)); // check if GetBuffer() returns null, we should flag this as an error, this sanity check will only // be true is the tensor failed to allocate memory. @@ -584,6 +636,39 @@ Status Tensor::InsertTensor(const std::vector &ind, const std::shared_p } } +Status Tensor::Concatenate(const std::vector &index, const std::shared_ptr &tensor) { + std::string err_msg; + err_msg += (index.size() != 1) ? "[Tensor] only supports 1d concatenation \n" : ""; + err_msg += (type() == DataType::DE_STRING) ? "[Tensor] Cannot batch tensors of type string\n" : ""; + err_msg += (!shape().known() || !tensor->shape().known()) ? "[Tensor] unknown shape\n" : ""; + + err_msg += + (index.at(0) + tensor->shape().NumOfElements() > this->shape().NumOfElements()) ? "[Tensor] incorrect index\n" : ""; + err_msg += tensor->type().SizeInBytes() != this->type().SizeInBytes() ? "[Tensor] incorrect datatype\n" : ""; + uchar *start_addr_of_ind = nullptr; + + TensorShape remaining_shape = tensor->shape(); + StartAddrOfIndex(index, &start_addr_of_ind, &remaining_shape); + err_msg += (start_addr_of_ind == nullptr) ? "Failed to create memory for Tensor.\n" : ""; + + if (!err_msg.empty()) { + MS_LOG(DEBUG) << "Insert tensor message: " << err_msg; + + RETURN_STATUS_UNEXPECTED(err_msg); + } else { + int ret_code = + memcpy_s(start_addr_of_ind, tensor->SizeInBytes(), tensor->GetMutableBuffer(), tensor->SizeInBytes()); + + if (ret_code == 0) { + return Status::OK(); + } else { + err_msg += "[Tensor] error in memcpy_s when inserting tensor\n"; + MS_LOG(DEBUG) << "Tensor message: " << err_msg; + RETURN_STATUS_UNEXPECTED(err_msg); + } + } +} + Status Tensor::ExpandDim(const dsize_t &axis) { if (axis > Rank()) { std::string err = "Axis is out of bound"; @@ -649,7 +734,7 @@ Status Tensor::GetItemAt(T *o, const std::vector &index) const { Status Tensor::GetItemAt(std::string_view *o, const std::vector &index) const { RETURN_UNEXPECTED_IF_NULL(data_); RETURN_UNEXPECTED_IF_NULL(o); - CHECK_FAIL_RETURN_UNEXPECTED(type_ == DataType::DE_STRING, "Type is not DE_STRING"); + CHECK_FAIL_RETURN_UNEXPECTED(type_ == DataType::DE_STRING, "Tensor type is not a string"); uchar *start = nullptr; offset_t length = 0; @@ -699,6 +784,8 @@ Status Tensor::GetDataAsNumpyStrings(py::array *data) { for (; itr != end(); itr++) { max = std::max((*itr).length(), max); } + // if all strings are empty, numpy stores a byte for each string |S1 + max = (max == 0 ? 1 : max); uint64_t total_size = shape_.NumOfElements() * max; char *tmp_data = reinterpret_cast(data_allocator_->allocate(total_size)); if (tmp_data == nullptr) RETURN_STATUS_UNEXPECTED("Cannot create temp array."); @@ -708,8 +795,10 @@ Status Tensor::GetDataAsNumpyStrings(py::array *data) { itr = begin(); uint64_t i = 0; for (; itr != end(); itr++, i++) { - ret_code = memcpy_s(tmp_data + i * max, total_size, (*itr).data(), (*itr).length()); - CHECK_FAIL_RETURN_UNEXPECTED(ret_code == 0, "Failed to copy string data."); + if (!(*itr).empty()) { + ret_code = memcpy_s(tmp_data + i * max, total_size, (*itr).data(), (*itr).length()); + CHECK_FAIL_RETURN_UNEXPECTED(ret_code == 0, "Failed to copy string data."); + } } auto strides = shape_.Strides(); std::transform(strides.begin(), strides.end(), strides.begin(), [&max](const auto &s) { return s * max; }); @@ -847,6 +936,78 @@ Status Tensor::GetStringAt(dsize_t index, uchar **string_start, offset_t *length *length = offset_ptr[index + 1] - start - 1; // -1 to skip the \0 from the string length return Status::OK(); } +Status Tensor::CopyLastDimAt(const std::shared_ptr &src, const std::vector &index) { + CHECK_FAIL_RETURN_UNEXPECTED(src->type() == type_, "Source Tensor has a different type"); + CHECK_FAIL_RETURN_UNEXPECTED(index.back() == 0, "Last dim in index should be 0"); + + uint8_t type_size = type_.SizeInBytes(); + size_t len = std::min(src->shape()[-1], shape_[-1]) * type_size; + dsize_t src_flat_ind = 0, dst_flat_ind = 0; + RETURN_IF_NOT_OK(src->shape().ToFlatIndex(index, &src_flat_ind)); + RETURN_IF_NOT_OK(shape_.ToFlatIndex(index, &dst_flat_ind)); + + const unsigned char *src_addr = src->GetBuffer() + src_flat_ind * type_size; + unsigned char *dst_addr = GetMutableBuffer() + dst_flat_ind * type_size; + CHECK_FAIL_RETURN_UNEXPECTED(memcpy_s(dst_addr, len, src_addr, len) == 0, "memcpy error"); + return Status::OK(); +} +Status Tensor::Slice(std::shared_ptr *out, const std::vector &indices) { + CHECK_FAIL_RETURN_UNEXPECTED(shape_.Rank() == 1, "Currently Slice work with rank 1 tensors only."); + CHECK_FAIL_RETURN_UNEXPECTED(!indices.empty(), "Indices are empty, generated tensor would be empty."); + if (type_.IsNumeric()) { + return SliceNumeric(out, indices); + } else { + return SliceString(out, indices); + } +} +Status Tensor::SliceNumeric(std::shared_ptr *out, const std::vector &indices) { + RETURN_IF_NOT_OK( + CreateTensor(out, TensorImpl::kFlexible, TensorShape({static_cast(indices.size())}), type_)); + (*out)->GetMutableBuffer(); + dsize_t out_index = 0; + dsize_t dim_length = shape_[0]; + dsize_t type_size = type_.SizeInBytes(); + dsize_t src_start = HandleNeg(indices[0], dim_length); + uchar *dst_addr = (*out)->data_; + dsize_t count = 1; + + for (dsize_t i = 0; i < indices.size(); i++) { + dsize_t cur_index = HandleNeg(indices[i], dim_length); + CHECK_FAIL_RETURN_UNEXPECTED( + cur_index >= 0 && cur_index < dim_length, + "Index " + std::to_string(indices[i]) + " is out of bounds [0," + std::to_string(dim_length) + ")"); + if (i < indices.size() - 1) { + dsize_t next_index = HandleNeg(indices[i + 1], dim_length); + if (next_index == cur_index + 1) { + count++; + continue; + } + } + int return_code = memcpy_s(dst_addr + out_index * type_size, (*out)->SizeInBytes(), data_ + src_start * type_size, + count * type_size); + CHECK_FAIL_RETURN_UNEXPECTED(return_code == 0, "memcpy_s failed in SliceNumeric"); + out_index += count; + if (i < indices.size() - 1) { + src_start = HandleNeg(indices[i + 1], dim_length); // next index + } + count = 1; + } + return Status::OK(); +} +Status Tensor::SliceString(std::shared_ptr *out, const std::vector &indices) { + dsize_t dim_length = shape_[0]; + std::vector strings; + for (dsize_t index : indices) { + dsize_t cur_index = HandleNeg(index, dim_length); + CHECK_FAIL_RETURN_UNEXPECTED( + cur_index >= 0 && cur_index < dim_length, + "Index " + std::to_string(index) + " is out of bounds [0," + std::to_string(dim_length) + ")"); + std::string_view sv; + GetItemAt(&sv, {cur_index}); + strings.emplace_back(sv); + } + return CreateTensor(out, strings); +} } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/dataset/core/tensor.h b/mindspore/ccsrc/dataset/core/tensor.h index 5efd989fc9..a3dbb391e5 100644 --- a/mindspore/ccsrc/dataset/core/tensor.h +++ b/mindspore/ccsrc/dataset/core/tensor.h @@ -44,9 +44,6 @@ class Tensor; using CharAllocPtr = std::unique_ptr>; using TensorAllocPtr = std::shared_ptr>; // An allocator shared_ptr for Tensors -using TensorRow = std::vector>; // A row is a set of Tensor pointers -using TensorTable = std::vector; // The table of tensors is a vector of rows -using TensorQTable = std::deque; // A different flavour of tensor table, this one has queue functionality class Tensor { public: @@ -118,6 +115,16 @@ class Tensor { static Status CreateTensor(std::shared_ptr *, TensorImpl tensor_impl, const TensorShape &shape, DataType type, const unsigned char *data = nullptr); + /// Create a copy of the input tensor + /// \param out [out] output tensor to be generated + /// \param in [in] orginal tensor to be copied + /// \return Status + static Status CreateTensor(std::shared_ptr *out, const std::shared_ptr &in) { + const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator(); + *out = std::allocate_shared(*alloc, in->shape(), in->type(), in->GetBuffer(), in->SizeInBytes()); + return Status::OK(); + } + // A static factory method to create a Tensor from a given py::array. // @param ptr output argument to hold the created Tensor // @param arr py::array @@ -135,9 +142,41 @@ class Tensor { static Status CreateTensor(std::shared_ptr *ptr, const std::vector &strings, const TensorShape &shape = TensorShape::CreateUnknownRankShape()); + // create tensor from protobuf bytelist with strings static Status CreateTensor(std::shared_ptr *ptr, const dataengine::BytesList &bytes_list, const TensorShape &shape); + // A static factory method to create a Tensor from a given list of numbers. + // @param ptr output argument to hold the created Tensor + // @param items elements of the tensor + // @param shape shape of the tensor + // @return Status Code + template + static Status CreateTensor(std::shared_ptr *ptr, const std::vector &items, + const TensorShape &shape_req = TensorShape::CreateUnknownRankShape()) { + DataType type = DataType::FromCType(); + auto items_ptr = reinterpret_cast(&items[0]); + TensorShape shape = shape_req; + if (!shape.known()) { + shape = TensorShape({static_cast(items.size())}); + } + return CreateTensor(ptr, TensorImpl::kFlexible, shape, type, items_ptr); + } + + // A static factory method to create a Tensor from a given number. + // @param ptr output argument to hold the created Tensor + // @param item value + // @return Status Code + template + static Status CreateTensor(std::shared_ptr *ptr, const T &item) { + return CreateTensor(ptr, {item}, TensorShape::CreateScalar()); + } + // Create tensor from protobuf bytelist with uint8 or int8 types + static Status CreateTensor(std::shared_ptr *ptr, const dataengine::BytesList &bytes_list, + const TensorShape &shape, const DataType &type, dsize_t pad_size); + + static Status CreateTensor(std::shared_ptr *ptr, const std::string &path); + // Copy raw data of a array based on shape and strides to the destination pointer // @param dst Pointer to the destination array where the content is to be copied // @param src Pointer to the source of strided array to be copied @@ -260,11 +299,6 @@ class Tensor { // @return const unsigned char* const unsigned char *GetBuffer() const; - // Get the starting memory address for the data of the tensor. This potentially - // drives an allocation if the data area. - // @return unsigned char* - unsigned char *GetMutableBuffer(); - // Getter of the type // @return DataType type() const { return type_; } @@ -323,6 +357,22 @@ class Tensor { return ss.str(); } + // Handle negative indices. + static inline dsize_t HandleNeg(dsize_t index, dsize_t length) { return (index < 0) ? (index + length) : index; } + + // Slice tensor bases on the given indicies. Copy the sliced data into out tensor. Only rank1 tensors are supported. + // Based on the type of tensor, SliceNumeric or SliceString will be called + // @param out Tensor + // @param indices vector of indices + // @return Status error code + Status Slice(std::shared_ptr *out, const std::vector &indices); + + // Slice numeric tensors. + Status SliceNumeric(std::shared_ptr *out, const std::vector &indices); + + // Slice string tensors + Status SliceString(std::shared_ptr *out, const std::vector &indices); + // Constructs numpy array from input tensor // @param data this data is the location of python data // @return Status code @@ -332,6 +382,9 @@ class Tensor { static Status GetBufferInfo(Tensor &t, py::buffer_info *out); + // Concatenate based on given tensor, can fill in current tensor with a smaller one, unlike InsertTensor + Status Concatenate(const std::vector &index, const std::shared_ptr &input); + // TensorIterator is a linear iterator that can be used to iterate over the elements of the Tensor // The order elements is as the memory layout (i.e., row-major) [[1,2,3],[4,5,6] --> 1,2,3,4,5,6 // @tparam T type of values in the Tensor Iterator @@ -518,6 +571,7 @@ class Tensor { // @return TensorIterator template TensorIterator begin() { + AllocateBuffer(SizeInBytes()); return TensorIterator(data_); } @@ -529,7 +583,18 @@ class Tensor { return TensorIterator(data_end_); } + // Copies the last dimension at `index` from Tensor `src` to this Tensor. + // @param src Tensor + // @param index vector to the start of the dimension. The last dim should be 0 + // @return Status + Status CopyLastDimAt(const std::shared_ptr &src, const std::vector &index); + protected: + // Get the starting memory address for the data of the tensor. This potentially + // drives an allocation if the data is null. + // @return unsigned char* + unsigned char *GetMutableBuffer(); + // A function that prints Tensor recursively, first called by print // @param out // @param cur_dim diff --git a/mindspore/ccsrc/dataset/core/tensor_row.cc b/mindspore/ccsrc/dataset/core/tensor_row.cc new file mode 100644 index 0000000000..882f6728bf --- /dev/null +++ b/mindspore/ccsrc/dataset/core/tensor_row.cc @@ -0,0 +1,75 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "dataset/core/tensor_row.h" + +namespace py = pybind11; +namespace mindspore { +namespace dataset { + +TensorRow::TensorRow() noexcept : id_(kDefaultRowId) {} + +TensorRow::TensorRow(size_type n, TensorRow::value_type t) noexcept : id_(kDefaultRowId), row_(n, t) {} + +TensorRow::TensorRow(const TensorRow::vector_type &v) : id_(kDefaultRowId), row_(v) {} + +TensorRow::TensorRow(row_id_type id, const std::initializer_list &lst) : id_(id), row_(lst) {} + +TensorRow::TensorRow(const TensorRow &tr) : id_(tr.id_), row_(tr.row_) {} + +TensorRow &TensorRow::operator=(const TensorRow &tr) { + if (this == &tr) { + return *this; + } + row_ = tr.row_; + id_ = tr.id_; + return *this; +} + +TensorRow &TensorRow::operator=(const std::initializer_list &lst) { + row_ = lst; + return *this; +} + +TensorRow::TensorRow(TensorRow::vector_type &&v) noexcept : id_(kDefaultRowId), row_(std::move(v)) {} + +TensorRow::TensorRow(row_id_type id, std::initializer_list &&lst) noexcept + : id_(id), row_(std::move(lst)) {} + +TensorRow::TensorRow(TensorRow &&tr) noexcept { + id_ = tr.id_; + row_ = std::move(tr.row_); +} + +TensorRow &TensorRow::operator=(TensorRow &&tr) noexcept { + if (this == &tr) { + return *this; + } + row_ = std::move(tr.row_); + id_ = tr.id_; + tr.id_ = kDefaultRowId; + return *this; +} + +TensorRow &TensorRow::operator=(std::initializer_list &&lst) noexcept { + row_ = std::move(lst); + return *this; +} + +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/core/tensor_row.h b/mindspore/ccsrc/dataset/core/tensor_row.h new file mode 100644 index 0000000000..49bc61657c --- /dev/null +++ b/mindspore/ccsrc/dataset/core/tensor_row.h @@ -0,0 +1,131 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DATASET_CORE_TENSOR_ROW_H_ +#define DATASET_CORE_TENSOR_ROW_H_ + +#include +#include +#include + +#include "dataset/core/tensor.h" + +namespace mindspore { +namespace dataset { + +class TensorRow; // A set of Tensor pointers with an id +using TensorTable = std::vector; // The table of tensors is a vector of rows +using TensorQTable = std::deque; // A different flavour of tensor table, this one has queue functionality + +class TensorRow { + public: + static constexpr row_id_type kDefaultRowId = -1; // Default row id + + // Type definitions + using size_type = dsize_t; + using value_type = std::shared_ptr; + using reference = std::shared_ptr &; + using const_reference = const std::shared_ptr &; + using vector_type = std::vector>; + using iterator = std::vector>::iterator; + using const_iterator = std::vector>::const_iterator; + + TensorRow() noexcept; + + TensorRow(size_type n, value_type t) noexcept; + + // Copy Constructors + explicit TensorRow(const vector_type &v); + + TensorRow(row_id_type id, const std::initializer_list &lst); + + TensorRow(const TensorRow &tr); + + TensorRow &operator=(const TensorRow &tr); + + TensorRow &operator=(const std::initializer_list &lst); + + // Move Constructors + explicit TensorRow(vector_type &&v) noexcept; + + TensorRow(row_id_type id, std::initializer_list &&lst) noexcept; + + TensorRow(TensorRow &&tr) noexcept; + + TensorRow &operator=(TensorRow &&tr) noexcept; + + TensorRow &operator=(std::initializer_list &&lst) noexcept; + + // Destructor + ~TensorRow() = default; + + // Functions to fetch/set id/vector + row_id_type getId() const { return id_; } + + void setId(row_id_type id) { id_ = id; } + + const vector_type &getRow() const { return row_; } + + // Wrapper functions to support vector operations + void emplace_back(value_type t) { row_.emplace_back(t); } + + void push_back(value_type t) { row_.push_back(t); } + + void clear() noexcept { row_.clear(); } + + size_type size() const noexcept { return row_.size(); } + + void reserve(size_type size) { row_.reserve(size); } + + void resize(size_type size) { row_.resize(size); } + + bool empty() { return row_.empty(); } + + void insert(iterator position, iterator first, iterator last) { row_.insert(position, first, last); } + + // Wrapper functions to support vector element access + reference at(size_type index) { return row_.at(index); } + + const_reference at(size_type index) const { return row_.at(index); } + + reference front() { return row_.front(); } + + const_reference front() const { return row_.front(); } + + reference back() { return row_.back(); } + + const_reference back() const { return row_.back(); } + + reference operator[](size_type index) { return row_[index]; } + + const_reference operator[](size_type index) const { return row_[index]; } + + // Wrapper functions to support vector iteration + iterator begin() { return row_.begin(); } + + const_iterator begin() const { return row_.begin(); } + + iterator end() { return row_.end(); } + + const_iterator end() const { return row_.end(); } + + protected: + row_id_type id_; + std::vector> row_; +}; +} // namespace dataset +} // namespace mindspore +#endif // DATASET_CORE_TENSOR_ROW_H_ diff --git a/mindspore/ccsrc/dataset/core/tensor_shape.h b/mindspore/ccsrc/dataset/core/tensor_shape.h index 6cfb007b56..c83e43cd7d 100644 --- a/mindspore/ccsrc/dataset/core/tensor_shape.h +++ b/mindspore/ccsrc/dataset/core/tensor_shape.h @@ -94,7 +94,7 @@ class TensorShape { // @return TensorShape PrependDim(dsize_t dim) const; - // Insert a new dim at the end of the shape. For example, <2,4> --> PrependDim(4) --> <2,4,4> + // Insert a new dim at the end of the shape. For example, <2,4> --> AppendDim(4) --> <2,4,4> // @param dim // @return TensorShape AppendDim(dsize_t dim) const; @@ -118,7 +118,10 @@ class TensorShape { bool operator!=(const TensorShape &rhs) const { return !(rhs == *this); } - dsize_t operator[](const dsize_t index) const { return raw_shape_[index]; } + dsize_t operator[](const dsize_t index) const { + if (index < 0) return raw_shape_[raw_shape_.size() + index]; + return raw_shape_[index]; + } // Return the Shape as a vector // @return diff --git a/mindspore/ccsrc/dataset/engine/CMakeLists.txt b/mindspore/ccsrc/dataset/engine/CMakeLists.txt index e7b5e682f3..66f95d0926 100644 --- a/mindspore/ccsrc/dataset/engine/CMakeLists.txt +++ b/mindspore/ccsrc/dataset/engine/CMakeLists.txt @@ -1,6 +1,7 @@ add_subdirectory(datasetops) add_subdirectory(opt) add_subdirectory(gnn) +add_subdirectory(perf) if (ENABLE_TDTQUE) add_subdirectory(tdt) endif () @@ -16,7 +17,7 @@ add_library(engine OBJECT target_include_directories(engine PRIVATE ${pybind11_INCLUDE_DIRS}) if (ENABLE_TDTQUE) - add_dependencies(engine engine-datasetops engine-datasetops-source engine-tdt engine-opt engine-gnn) + add_dependencies(engine engine-datasetops engine-datasetops-source engine-tdt engine-opt engine-gnn engine-perf) else() - add_dependencies(engine engine-datasetops engine-datasetops-source engine-opt engine-gnn) + add_dependencies(engine engine-datasetops engine-datasetops-source engine-opt engine-gnn engine-perf) endif () diff --git a/mindspore/ccsrc/dataset/engine/connector.h b/mindspore/ccsrc/dataset/engine/connector.h index 085b790ec5..cdce592c1b 100644 --- a/mindspore/ccsrc/dataset/engine/connector.h +++ b/mindspore/ccsrc/dataset/engine/connector.h @@ -152,6 +152,23 @@ class Connector { return out; } + // Get current size of connector. + int32_t size() const { + int32_t size = 0; + for (int32_t i = 0; i < queues_.size(); ++i) { + size += queues_[i]->size(); + } + return size; + } + + int32_t capacity() const { + int32_t capacity = 0; + for (int32_t i = 0; i < queues_.size(); ++i) { + capacity += queues_[i]->capacity(); + } + return capacity; + } + // Register the internal resources with Task group for interruption service. // @param vg // @return diff --git a/mindspore/ccsrc/dataset/engine/data_buffer.cc b/mindspore/ccsrc/dataset/engine/data_buffer.cc index 4aed994d3c..32a70c259f 100644 --- a/mindspore/ccsrc/dataset/engine/data_buffer.cc +++ b/mindspore/ccsrc/dataset/engine/data_buffer.cc @@ -17,8 +17,6 @@ #include "dataset/util/allocator.h" #include "dataset/core/global_context.h" #include "dataset/core/tensor.h" -#include "dataset/engine/datasetops/source/storage_client.h" -#include "dataset/engine/datasetops/source/tf_buffer.h" namespace mindspore { namespace dataset { @@ -26,37 +24,6 @@ namespace dataset { // Description: This is the main constructor that is used for making a buffer DataBuffer::DataBuffer(int32_t id, BufferFlags flags) : buffer_id_(id), tensor_table_(nullptr), buffer_flags_(flags) {} -// Name: CreateDataBuffer() -// Description: A static factory method to create the appropriate type of derived class -// buffer. Returns the base class reference for DataBuffer. -Status DataBuffer::CreateDataBuffer( - int32_t id, // In: The id for the new buffer - std::shared_ptr storage_client, // In: The storage client that is related to this buffer type - std::unique_ptr *ptr) { - std::unique_ptr new_data_buffer; - try { - DatasetType ds_type = storage_client->schema()->dataset_type(); - switch (ds_type) { - case DatasetType::kTf: { - // This type of buffer is for TF record data. - // Allocate derived class version for a TF buffers - new_data_buffer = std::make_unique(id, kDeBFlagNone, storage_client); - break; - } - default: { - std::string errMsg("Invalid buffer type"); - RETURN_STATUS_UNEXPECTED(errMsg); - } - } - } catch (std::bad_alloc &e) { - return Status(StatusCode::kOutOfMemory, __LINE__, __FILE__, e.what()); - } catch (std::exception &e) { - RETURN_STATUS_UNEXPECTED(e.what()); - } - *ptr = std::move(new_data_buffer); - return Status::OK(); -} - // Name: print() // Description: A function that prints info about the DataBuffer (base class version) void DataBuffer::Print(std::ostream &out, // In: The output stream to print to @@ -98,7 +65,7 @@ Status DataBuffer::GetTensor(std::shared_ptr *ptr, int32_t row_id, int32 // Remove me!! Callers should fetch rows via pop Status DataBuffer::GetRow(int32_t row_id, TensorRow *ptr) const { - if (row_id < tensor_table_->size()) { + if (tensor_table_ && !tensor_table_->empty() && row_id < tensor_table_->size()) { *ptr = tensor_table_->at(row_id); } else { std::string err_msg = "rowId for mTensorTable out of range: " + std::to_string(row_id); diff --git a/mindspore/ccsrc/dataset/engine/data_buffer.h b/mindspore/ccsrc/dataset/engine/data_buffer.h index 0053d8894d..2ab0783519 100644 --- a/mindspore/ccsrc/dataset/engine/data_buffer.h +++ b/mindspore/ccsrc/dataset/engine/data_buffer.h @@ -25,12 +25,10 @@ #include "dataset/util/status.h" #include "dataset/core/constants.h" #include "dataset/core/tensor.h" +#include "dataset/core/tensor_row.h" namespace mindspore { namespace dataset { -// Forward declares -class StorageClient; - // The DataBuffer class is a base class that will represent the data for n values based // on a unique row id for each row of data. // There can be different types of DataBuffers to abstract over how the data is stored @@ -52,14 +50,6 @@ class DataBuffer { // Destructor virtual ~DataBuffer(); - // Name: CreateDataBuffer() - // Description: A factory method to create the appropriate type of derived class - // buffer. Returns the base class reference for DataBuffer. - static Status CreateDataBuffer( - int32_t id, // In: The id for the new buffer - std::shared_ptr, // In: The StorageClient is used to choose the buffer type to create - std::unique_ptr *); - // Name: print() // Description: A function that prints info about the DataBuffer (base class version) virtual void Print(std::ostream &out, // In: The output stream to print to diff --git a/mindspore/ccsrc/dataset/engine/dataset_iterator.cc b/mindspore/ccsrc/dataset/engine/dataset_iterator.cc index 011e60cc24..7eb38785aa 100644 --- a/mindspore/ccsrc/dataset/engine/dataset_iterator.cc +++ b/mindspore/ccsrc/dataset/engine/dataset_iterator.cc @@ -83,7 +83,19 @@ Status IteratorBase::FetchNextTensorRow(TensorRow *out_row) { } // Constructor of the DatasetIterator -DatasetIterator::DatasetIterator(std::shared_ptr exe_tree) : IteratorBase(), root_(exe_tree->root()) {} +DatasetIterator::DatasetIterator(std::shared_ptr exe_tree) + : IteratorBase(), + root_(exe_tree->root()), + tracing_(nullptr), + cur_batch_num_(0), + cur_connector_size_(0), + cur_connector_capacity_(0) { + std::shared_ptr node; + Status s = exe_tree->GetProfilingManager()->GetTracingNode(kDatasetIteratorTracingName, &node); + if (s.IsOk()) { + tracing_ = std::dynamic_pointer_cast(node); + } +} DatasetIterator::~DatasetIterator() = default; @@ -101,6 +113,10 @@ Status DatasetIterator::FetchNextTensorRow(TensorRow *out_row) { // Check if we need to get a new DataBuffer to iterate. if (curr_buffer_ == nullptr || curr_buffer_->NumRows() == 0) { + if (tracing_ != nullptr) { + cur_connector_size_ = root_->ConnectorSize(); + cur_connector_capacity_ = root_->ConnectorCapacity(); + } RETURN_IF_NOT_OK(root_->GetNextBuffer(&curr_buffer_)); // Since GetNextBuffer was used rather than GetNextInput(), it means we need to manually @@ -121,6 +137,8 @@ Status DatasetIterator::FetchNextTensorRow(TensorRow *out_row) { } eof_handled_ = true; curr_buffer_.reset(); // explicitly free the eof buffer + // Set tree to Finished state + root_->Tree()->SetFinished(); return Status::OK(); } @@ -131,13 +149,18 @@ Status DatasetIterator::FetchNextTensorRow(TensorRow *out_row) { // flow of an eof up the pipeline by itself. eof_handled_ = true; curr_buffer_.reset(); // explicitly free the eof buffer + // Set tree to Finished state + root_->Tree()->SetFinished(); return Status::OK(); } } // If we got this far, now it's time to pop that next row for return to caller RETURN_IF_NOT_OK(curr_buffer_->PopRow(out_row)); - + if (tracing_ != nullptr) { + cur_batch_num_++; + tracing_->Record(CONNECTOR_DEPTH, cur_connector_capacity_, cur_batch_num_, cur_connector_size_); + } return Status::OK(); } diff --git a/mindspore/ccsrc/dataset/engine/dataset_iterator.h b/mindspore/ccsrc/dataset/engine/dataset_iterator.h index ddd4883a86..ada2b0ffb6 100644 --- a/mindspore/ccsrc/dataset/engine/dataset_iterator.h +++ b/mindspore/ccsrc/dataset/engine/dataset_iterator.h @@ -24,6 +24,7 @@ #include "dataset/core/tensor.h" #include "dataset/engine/datasetops/dataset_op.h" #include "dataset/engine/execution_tree.h" +#include "dataset/engine/perf/dataset_iterator_tracing.h" namespace mindspore { namespace dataset { @@ -52,7 +53,7 @@ class IteratorBase { // messages are encountered (such as eoe or eof), then an empty TensorRow is returned back. // @return Status - The error code return // @note The position of a Tensor/column might be different from the initial column order - // in the storageOp. User must be aware that MapOp, ZipOps, and others might change + // in corresponding Dataset Op. User must be aware that MapOp, ZipOps, and others might change // the column ordering. virtual Status FetchNextTensorRow(TensorRow *out_row); @@ -109,6 +110,10 @@ class DatasetIterator : public IteratorBase { private: std::shared_ptr root_; // saves the root of the executionTree TensorRow device_queue_row_; + std::shared_ptr tracing_; // trace profiling data + int32_t cur_batch_num_; // current batch number,used for profiling + int32_t cur_connector_size_; // current connector size of root op,used for profiling + int32_t cur_connector_capacity_; // current connector capacity of root op, used for profiling }; // The ChildIterator derived class is for fetching rows from intermediate nodes of execution tree. diff --git a/mindspore/ccsrc/dataset/engine/datasetops/CMakeLists.txt b/mindspore/ccsrc/dataset/engine/datasetops/CMakeLists.txt index 70065df5f4..ed57421030 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/CMakeLists.txt +++ b/mindspore/ccsrc/dataset/engine/datasetops/CMakeLists.txt @@ -8,6 +8,7 @@ add_library(engine-datasetops OBJECT pipeline_op.cc barrier_op.cc batch_op.cc + bucket_batch_by_length_op.cc device_queue_op.cc map_op.cc project_op.cc @@ -19,5 +20,6 @@ add_library(engine-datasetops OBJECT zip_op.cc concat_op.cc filter_op.cc + build_vocab_op.cc ) diff --git a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc index 374128eb21..60643c90ba 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc @@ -23,6 +23,7 @@ #include "dataset/engine/data_buffer.h" #include "dataset/engine/db_connector.h" #include "dataset/engine/opt/pass.h" +#include "dataset/kernels/data/data_utils.h" using float16 = Eigen::half; @@ -53,7 +54,7 @@ Status BatchOp::Builder::SanityCheck() { BatchOp::BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers, const std::vector &cols_to_map, py::function batch_size_func, py::function batch_map_func, - std::map> pad_map) + PadInfo pad_map) : ParallelOp(num_workers, op_queue_size), start_batch_size_(batch_size), drop_(drop), @@ -75,10 +76,6 @@ Status BatchOp::operator()() { std::unique_ptr table = std::make_unique(); child_iterator_ = std::make_unique(this, 0, 0); RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); - for (const auto &t : new_row) { - CHECK_FAIL_RETURN_UNEXPECTED(t->type().IsNumeric(), - "[Batch ERROR] Batch does not support Tensor of type string yet."); - } RETURN_IF_NOT_OK(DatasetOp::AssignColMapFromChild()); // must come after the first fetch above int32_t cur_batch_size = 0; RETURN_IF_NOT_OK(GetBatchSize(&cur_batch_size, CBatchInfo(0, 0, 0))); @@ -134,49 +131,57 @@ void BatchOp::Print(std::ostream &out, bool show_all) const { } } -Status BatchOp::BatchRows(const std::unique_ptr *source_table, - const std::unique_ptr *dest_table, size_t batch_size) { - if ((*source_table)->size() < batch_size || (*source_table)->size() == 0) { - RETURN_STATUS_UNEXPECTED("[Internal Batch ERROR] Insufficient rows in source_table\n"); +Status BatchOp::BatchRows(const std::unique_ptr *src, const std::unique_ptr *dest, + dsize_t batch_size) { + if ((*src)->size() != batch_size) { + RETURN_STATUS_UNEXPECTED("[Internal Batch ERROR] Source table size does not match the batch_size"); } - TensorRow row = std::move((*source_table)->front()); - (*source_table)->pop_front(); + if (batch_size == 1) { - for (std::shared_ptr tensor : row) { + TensorRow row = std::move((*src)->front()); + (*src)->pop_front(); + (*dest)->push_back(row); + for (const auto &tensor : (*dest)->front()) { RETURN_IF_NOT_OK(tensor->ExpandDim(0)); } - (*dest_table)->push_back(row); - } else { // batch_size > 1 - std::vector row_shapes; - TensorRow batched_row; - for (size_t i = 0; i < row.size(); i++) { // Handle the first row popped - row_shapes.push_back(row[i]->shape()); - std::shared_ptr ts; - RETURN_IF_NOT_OK(Tensor::CreateTensor( - &ts, TensorImpl::kFlexible, row[i]->shape().PrependDim(static_cast(batch_size)), row[i]->type())); - batched_row.emplace_back(ts); - RETURN_IF_NOT_OK(batched_row[i]->InsertTensor(std::vector(1, 0), row[i])); // {j} = 0 - } - for (size_t j = 1; j < batch_size; j++) { // Handle the rest of the rows - row = std::move((*source_table)->front()); - (*source_table)->pop_front(); - for (size_t i = 0; i < row.size(); i++) { - if (row[i]->shape() == row_shapes[i]) { // check the newly popped rows have the same dim as the first - RETURN_IF_NOT_OK(batched_row[i]->InsertTensor(std::vector(1, j), row[i])); + return Status::OK(); + } + + TensorRow batched_row; + auto num_columns = (*src)->front().size(); + for (size_t i = 0; i < num_columns; i++) { + std::shared_ptr first_tensor = (*src)->at(0).at(i); // first row, column i + TensorShape first_shape = first_tensor->shape(); + DataType first_type = first_tensor->type(); + TensorShape new_shape = first_shape.PrependDim(static_cast(batch_size)); + + std::shared_ptr new_tensor; + if (first_type.IsNumeric()) { // numeric tensor + RETURN_IF_NOT_OK(Tensor::CreateTensor(&new_tensor, TensorImpl::kFlexible, new_shape, first_type)); + dsize_t j = 0; + for (auto row : **src) { + std::shared_ptr old_tensor = row.at(i); // row j, column i + if (old_tensor->shape() == first_shape) { // check the newly popped rows have the same dim as the first + RETURN_IF_NOT_OK(new_tensor->InsertTensor({j++}, old_tensor)); } else { - std::string column_name; - for (auto itr : column_name_id_map_) { - if (static_cast(itr.second) == i) { - column_name = itr.first; - break; - } - } - RETURN_STATUS_UNEXPECTED("[Batch ERROR] Inconsistent TensorShapes of Column " + column_name); + RETURN_STATUS_UNEXPECTED("[Batch ERROR] Inconsistent TensorShapes of Column " + std::to_string(i)); } } + } else { // handle string column differently + std::vector strings; + for (dsize_t j = 0; j < batch_size; j++) { + std::shared_ptr old_tensor = (*src)->at(j).at(i); + for (auto itr = old_tensor->begin(); itr != old_tensor->end(); itr++) { + strings.emplace_back(*itr); + } + } + RETURN_IF_NOT_OK(Tensor::CreateTensor(&new_tensor, strings, new_shape)); } - (*dest_table)->emplace_back(batched_row); + batched_row.emplace_back(new_tensor); } + + (*dest)->emplace_back(batched_row); + return Status::OK(); } @@ -202,8 +207,8 @@ Status BatchOp::WorkerEntry(int32_t workerId) { Status BatchOp::MakeBatchedBuffer(std::pair, CBatchInfo> table_pair, std::unique_ptr *db) { RETURN_UNEXPECTED_IF_NULL(table_pair.first); - if (!pyfunc_column_names_.empty()) RETURN_IF_NOT_OK(MapColumns(&table_pair)); // pass it through pyfunc - if (pad_) RETURN_IF_NOT_OK(PadColumns(&table_pair)); // do padding if needed + if (!pyfunc_column_names_.empty()) RETURN_IF_NOT_OK(MapColumns(&table_pair)); // pass it through pyfunc + if (pad_) RETURN_IF_NOT_OK(PadColumns(&table_pair.first, pad_info_, column_name_id_map_)); // do padding if needed (*db) = std::make_unique(table_pair.second.batch_num_, DataBuffer::kDeBFlagNone); std::unique_ptr dest_table = std::make_unique(); RETURN_IF_NOT_OK(BatchRows(&table_pair.first, &dest_table, table_pair.first->size())); @@ -333,74 +338,27 @@ Status BatchOp::InvokeBatchMapFunc(TensorBatchTable *input, TensorBatchTable *ou return Status(StatusCode::kOK); } -Status BatchOp::PadTensor(std::shared_ptr src, std::shared_ptr *dst, - const std::vector &pad_shape, float pad_val) { - CHECK_FAIL_RETURN_UNEXPECTED(src != nullptr && dst != nullptr, "tensor can't be nullptr"); - if (src->Rank() == 0 || src->shape().AsVector() == pad_shape) { - (*dst) = src; // if no padding, copy the pointer - } else { - CHECK_FAIL_RETURN_UNEXPECTED(src->Rank() == pad_shape.size(), "Pad to diff rank not allowed"); - RETURN_IF_NOT_OK(Tensor::CreateTensor(dst, TensorImpl::kFlexible, TensorShape(pad_shape), src->type())); - auto tensor_type = src->type().value(); - if (pad_val == 0) { // if pad with zero, don't care what type it is - RETURN_IF_NOT_OK((*dst)->Zero()); - } else if (tensor_type == DataType::DE_INT8) { - RETURN_IF_NOT_OK((*dst)->Fill(pad_val)); - } else if (tensor_type == DataType::DE_BOOL) { - RETURN_IF_NOT_OK((*dst)->Fill(pad_val)); - } else if (tensor_type == DataType::DE_UINT8) { - RETURN_IF_NOT_OK((*dst)->Fill(pad_val)); - } else if (tensor_type == DataType::DE_INT16) { - RETURN_IF_NOT_OK((*dst)->Fill(pad_val)); - } else if (tensor_type == DataType::DE_FLOAT16) { - RETURN_IF_NOT_OK((*dst)->Fill(static_cast(pad_val))); - } else if (tensor_type == DataType::DE_UINT16) { - RETURN_IF_NOT_OK((*dst)->Fill(pad_val)); - } else if (tensor_type == DataType::DE_INT32) { - RETURN_IF_NOT_OK((*dst)->Fill(pad_val)); - } else if (tensor_type == DataType::DE_UINT32) { - RETURN_IF_NOT_OK((*dst)->Fill(pad_val)); - } else if (tensor_type == DataType::DE_INT64) { - RETURN_IF_NOT_OK((*dst)->Fill(pad_val)); - } else if (tensor_type == DataType::DE_UINT64) { - RETURN_IF_NOT_OK((*dst)->Fill(pad_val)); - } else if (tensor_type == DataType::DE_FLOAT32) { - RETURN_IF_NOT_OK((*dst)->Fill(pad_val)); - } else if (tensor_type == DataType::DE_FLOAT64) { - RETURN_IF_NOT_OK((*dst)->Fill(pad_val)); - } else { - RETURN_STATUS_UNEXPECTED("Incorrect/Unknown tensor type"); - } - std::vector cur_ind(src->Rank(), 0), src_s(src->Rank(), 1), dst_s(src->Rank(), 1); - for (dsize_t i = src->Rank() - 2; i >= 0; i--) { - src_s[i] = src->shape()[i + 1] * src_s[i + 1]; - dst_s[i] = pad_shape[i + 1] * dst_s[i + 1]; - } - RETURN_IF_NOT_OK(PadHelper(src, *dst, cur_ind, src_s, dst_s, 0)); - } - return Status::OK(); -} // namespace dataset - -Status BatchOp::PadColumns(std::pair, CBatchInfo> *table_pair) { - RETURN_UNEXPECTED_IF_NULL(table_pair); // placeholder for now, might need this in the future - CHECK_FAIL_RETURN_UNEXPECTED(table_pair->first->front().size() == column_name_id_map_.size(), - "col_name_map mismatch"); - std::vector pad_vals(column_name_id_map_.size(), 0); // value to pad each column's tensor with, default 0 +Status BatchOp::PadColumns(std::unique_ptr *table, const PadInfo &pad_info, + const std::unordered_map &column_name_id_map) { + RETURN_UNEXPECTED_IF_NULL(table); // placeholder for now, might need this in the future + CHECK_FAIL_RETURN_UNEXPECTED((*table)->front().size() == column_name_id_map.size(), "col_name_map mismatch"); + std::vector> pad_vals(column_name_id_map.size(), + 0); // value to pad each column's tensor with, default 0 std::set pad_cols; // padded_shape provided by user, maximum shapes of current batch of tensors - std::vector> pad_shapes(column_name_id_map_.size()), max_shapes(column_name_id_map_.size()); - RETURN_IF_NOT_OK(UnpackPadInfo(&pad_cols, &pad_vals, &pad_shapes)); + std::vector> pad_shapes(column_name_id_map.size()), max_shapes(column_name_id_map.size()); + RETURN_IF_NOT_OK(UnpackPadInfo(pad_info, column_name_id_map, &pad_cols, &pad_vals, &pad_shapes)); // init each shape in max_shape to {-1,-1...} init each unspecified shape in pad_shape to -1 as well for (size_t col_id : pad_cols) { - max_shapes[col_id] = std::vector(table_pair->first->front()[col_id]->Rank(), -1); + max_shapes[col_id] = std::vector((*table)->front()[col_id]->Rank(), -1); if (pad_shapes[col_id].empty()) pad_shapes[col_id] = max_shapes[col_id]; // fill pad shape with -1 CHECK_FAIL_RETURN_UNEXPECTED(pad_shapes[col_id].size() == max_shapes[col_id].size(), "wrong rank in pad_shape"); } // calculate maximum shape for each column that needs to be padded - for (const TensorRow &row : *(table_pair->first)) { // iterator each row in a batch - for (size_t col_id : pad_cols) { // iterator each tensor in a row + for (const TensorRow &row : **table) { // iterator each row in a batch + for (size_t col_id : pad_cols) { // iterator each tensor in a row CHECK_FAIL_RETURN_UNEXPECTED(row[col_id]->Rank() == max_shapes[col_id].size(), "Tensor to be padded together need to have the same rank"); for (size_t dim = 0; dim < row[col_id]->Rank(); dim++) { // pick the largest number in each dimension @@ -417,27 +375,29 @@ Status BatchOp::PadColumns(std::pair, CBatchInfo> } // call pad on each tensor that needs to be padded - for (TensorRow &row : *(table_pair->first)) { + for (TensorRow &row : **table) { for (size_t col_id : pad_cols) { std::shared_ptr pad_tensor; - RETURN_IF_NOT_OK(PadTensor(row[col_id], &pad_tensor, pad_shapes[col_id], pad_vals[col_id])); + RETURN_IF_NOT_OK(PadEnd(row[col_id], &pad_tensor, pad_shapes[col_id], pad_vals[col_id])); row[col_id] = pad_tensor; } } return Status::OK(); } -Status BatchOp::UnpackPadInfo(std::set *pad_cols, std::vector *pad_vals, +Status BatchOp::UnpackPadInfo(const PadInfo &pad_info, + const std::unordered_map &column_name_id_map, + std::set *pad_cols, std::vector> *pad_vals, std::vector> *pad_shapes) { - if (pad_info_.empty()) { // if pad_info empty, pad every columns automatically - for (dsize_t col_id = 0; col_id < column_name_id_map_.size(); col_id++) { + if (pad_info.empty()) { // if pad_info empty, pad every columns automatically + for (dsize_t col_id = 0; col_id < column_name_id_map.size(); col_id++) { pad_cols->insert(col_id); } } else { - for (auto p : pad_info_) { - CHECK_FAIL_RETURN_UNEXPECTED(column_name_id_map_.find(p.first) != column_name_id_map_.end(), - "no column exists with name:" + p.first); - dsize_t col_id = static_cast(column_name_id_map_[p.first]); + for (const auto &p : pad_info) { + auto location = column_name_id_map.find(p.first); + CHECK_FAIL_RETURN_UNEXPECTED(location != column_name_id_map.end(), "no column exists with name:" + p.first); + auto col_id = static_cast(location->second); CHECK_FAIL_RETURN_UNEXPECTED(col_id < pad_vals->size() && col_id < pad_shapes->size(), "col_id out of bound"); pad_cols->insert(col_id); (*pad_vals)[col_id] = p.second.second; // set pad values @@ -447,29 +407,6 @@ Status BatchOp::UnpackPadInfo(std::set *pad_cols, std::vector *p return Status::OK(); } -Status BatchOp::PadHelper(std::shared_ptr src, std::shared_ptr dst, std::vector cur_ind, - const std::vector &src_s, const std::vector &dst_s, size_t cur_dim) { - if (cur_dim == src->Rank() - 1) { // if this is the last dimension, copy the data - uint8_t type_size = src->type().SizeInBytes(); - size_t len = std::min(src->shape()[cur_dim], dst->shape()[cur_dim]) * type_size; - dsize_t src_flat_ind = 0, dst_flat_ind = 0; - for (size_t i = 0; i < src->Rank(); i++) { - src_flat_ind += src_s[i] * cur_ind[i]; - dst_flat_ind += dst_s[i] * cur_ind[i]; - } - unsigned char *src_addr = src->GetMutableBuffer() + src_flat_ind * type_size; - unsigned char *dst_addr = dst->GetMutableBuffer() + dst_flat_ind * type_size; - CHECK_FAIL_RETURN_UNEXPECTED(memcpy_s(dst_addr, len, src_addr, len) == 0, "memcpy error"); - } else { // not the last dimension, keep doing recursion - dsize_t min_ind = std::min(dst->shape()[cur_dim], src->shape()[cur_dim]); - for (dsize_t i = 0; i < min_ind; i++) { - cur_ind[cur_dim] = i; - RETURN_IF_NOT_OK(PadHelper(src, dst, cur_ind, src_s, dst_s, cur_dim + 1)); - } - } - return Status::OK(); -} - // Visitor accept method for NodePass Status BatchOp::Accept(NodePass *p, bool *modified) { // Downcast shared pointer then call visitor diff --git a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.h b/mindspore/ccsrc/dataset/engine/datasetops/batch_op.h index 1a862acd0b..28df5e7e81 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/batch_op.h @@ -36,8 +36,9 @@ namespace mindspore { namespace dataset { class DataBuffer; -using TensorBatch = std::vector>; +using TensorBatch = TensorRow; using TensorBatchTable = std::vector; +using PadInfo = std::map>>; class BatchOp : public ParallelOp { public: @@ -66,7 +67,7 @@ class BatchOp : public ParallelOp { return *this; } - Builder &SetPaddingMap(const std::map> &pad_map, bool pad = true) { + Builder &SetPaddingMap(const PadInfo &pad_map, bool pad = true) { builder_pad_ = pad; builder_pad_map_ = pad_map; return *this; @@ -119,7 +120,7 @@ class BatchOp : public ParallelOp { int32_t builder_num_workers_; int32_t builder_op_connector_size_; std::vector builder_cols_to_map_; - std::map> builder_pad_map_; + PadInfo builder_pad_map_; py::function builder_batch_size_func_; py::function builder_batch_map_func_; }; @@ -150,8 +151,7 @@ class BatchOp : public ParallelOp { // @param int32_t rows_per_buf // @param int32_t num_workers BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers, - const std::vector &, py::function batch_size_func, py::function batch_map_func, - std::map> pad_map); + const std::vector &, py::function batch_size_func, py::function batch_map_func, PadInfo pad_map); // BatchOp destructor ~BatchOp() {} @@ -183,34 +183,33 @@ class BatchOp : public ParallelOp { // @return Status - The error code return Status operator()() override; - // Pad input tensor according pad_shape, need to have same rank. - // @param std::shared_ptr src - tensor to pad from - // @param std::shared_ptr *dst - return tensor padded - // @param std::vector pad_shape - shape to pad to - // @param float pad_val - value to pad with - // @return - The error code return - Status PadTensor(std::shared_ptr src, std::shared_ptr *dst, const std::vector &pad_shape, - float pad_val); - // Base-class override for NodePass visitor acceptor. // @param p - Pointer to the NodePass to be accepted. // @param modified - Whether this node visit modified the pipeline. // @return - Status of the node visit. Status Accept(NodePass *p, bool *modified) override; - private: - // recursive helper function. This function could be very expensive if called on a multi-dimensional tensor - // it is only meant to be called by PadTensor. - // @tparam T - type of tensor and fill value - // @param std::shared_ptr src - Tensor to pad from - // @param std::shared_ptr* dst - Tensor to pad to, return value - // @param std::vector cur_ind - recursion helper - // @param T pad_val - value to pad tensor with - // @param size_t cur_dim - recursion helper + // Op name getter + // @return Name of the current Op + std::string Name() const override { return "BatchOp"; } + + // batch the rows in src table then put it to dest table + // @param const std::unique_ptr *src - table that has the rows for batching + // @param const std::unique_ptr *dest - dest_table to hold batched rows + // @param int32_t size - batch_size + // @param const std::unordered_map& column_name_id_map - column names to index mapping // @return Status - The error code return - Status PadHelper(std::shared_ptr src, std::shared_ptr dst, std::vector cur_ind, - const std::vector &src_s, const std::vector &dst_s, size_t cur_dim = 0); + static Status BatchRows(const std::unique_ptr *src, const std::unique_ptr *dest, + dsize_t batch_size); + // @param table + // @param const PadInfo &pad_info pad info + // @param const std::unordered_map& column_name_id_map - column names to index mapping + // @return Status - The error code return + static Status PadColumns(std::unique_ptr *table, const PadInfo &pad_info, + const std::unordered_map &column_name_id_map); + + private: // Worker thread for doing the memcpy of batch // @param int32_t param workerId // @return Status - The error code return @@ -220,28 +219,21 @@ class BatchOp : public ParallelOp { // @return Status - The error code return Status MakeBatchedBuffer(std::pair, CBatchInfo> table_pair, std::unique_ptr *db); - - // batch the rows in src table then put it to dest table - // @param const std::unique_ptr *src - table that has the rows for batching - // @param const std::unique_ptr *dest - dest_table to hold batched rows - // @param int32_t size - batch_size - // @return Status - The error code return - Status BatchRows(const std::unique_ptr *src, const std::unique_ptr *dest, size_t size); - // Function that calls pyfunc to perform map on batch // @param (std::pair, batch_stats> *table_pair - contains un-batched tensor // @return Status - The error code return Status MapColumns(std::pair, CBatchInfo> *table_pair); + // @param const PadInfo &pad_info pad info to unpack + // @param const std::unordered_map& column_name_id_map - column names to index mapping // @param std::set *cols, col ids to perform pad on // @param std::vector *vals, default padding value for each column // @param std::vector> *shapes, padding shape specified by user // @return Status - The error code return - Status UnpackPadInfo(std::set *cols, std::vector *vals, std::vector> *shapes); - - // @param table_pair - // @return Status - The error code return - Status PadColumns(std::pair, CBatchInfo> *table_pair); + static Status UnpackPadInfo(const PadInfo &pad_info, + const std::unordered_map &column_name_id_map, + std::set *pad_cols, std::vector> *pad_vals, + std::vector> *pad_shapes); // the number of thread pulling from the mOutConnector of the Op below // @return int32_t, 1 @@ -264,11 +256,11 @@ class BatchOp : public ParallelOp { Status InvokeBatchMapFunc(TensorTable *input, TensorTable *output, CBatchInfo info); int32_t start_batch_size_; - bool drop_; // bool for whether to drop remainder or not - bool pad_; // bool for whether to perform padding on tensor - std::vector pyfunc_column_names_; // Name of the columns to perform map op on - std::map> pad_info_; // column names to perform padding on - std::unique_ptr child_iterator_; // child iterator for fetching TensorRows 1 by 1 + bool drop_; // bool for whether to drop remainder or not + bool pad_; // bool for whether to perform padding on tensor + std::vector pyfunc_column_names_; // Name of the columns to perform map op on + PadInfo pad_info_; // column names to perform padding on + std::unique_ptr child_iterator_; // child iterator for fetching TensorRows 1 by 1 QueueList, CBatchInfo>> worker_queues_; // internal queue for syncing worker py::function batch_size_func_; // Function pointer of batch size function py::function batch_map_func_; // Function pointer of per batch map function diff --git a/mindspore/ccsrc/dataset/engine/datasetops/bucket_batch_by_length_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/bucket_batch_by_length_op.cc new file mode 100644 index 0000000000..def2ea0fee --- /dev/null +++ b/mindspore/ccsrc/dataset/engine/datasetops/bucket_batch_by_length_op.cc @@ -0,0 +1,241 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "dataset/engine/datasetops/bucket_batch_by_length_op.h" + +#include +#include +#include +#include +#include + +#include "pybind11/numpy.h" +#include "pybind11/pybind11.h" +#include "pybind11/stl.h" +#include "dataset/core/pybind_support.h" +#include "dataset/core/config_manager.h" +#include "dataset/core/tensor.h" +#include "dataset/core/tensor_shape.h" +#include "dataset/engine/dataset_iterator.h" +#include "dataset/engine/datasetops/parallel_op.h" +#include "dataset/engine/opt/pass.h" +#include "dataset/util/status.h" + +namespace py = pybind11; +namespace mindspore { +namespace dataset { +BucketBatchByLengthOp::Builder::Builder(std::vector length_dependent_columns, + std::vector bucket_boundaries, std::vector bucket_batch_sizes) + : builder_length_dependent_columns_(length_dependent_columns), + builder_bucket_boundaries_(bucket_boundaries), + builder_bucket_batch_sizes_(bucket_batch_sizes), + builder_pad_info_({}), + builder_pad_to_bucket_boundary_(false), + builder_drop_remainder_(false) { + std::shared_ptr config_manager = GlobalContext::config_manager(); + builder_op_connector_size_ = config_manager->op_connector_size(); +} + +Status BucketBatchByLengthOp::Builder::SanityCheck() { + std::string error_message; + + if (builder_length_dependent_columns_.empty()) { + error_message += "At least 1 column must be specified for element length calculation.\n"; + } + + if (builder_bucket_boundaries_.empty()) { + error_message += "At least 1 bucket boundary must be specified.\n"; + } + + if (builder_bucket_batch_sizes_.size() != builder_bucket_boundaries_.size() + 1) { + error_message += "There must be exactly one bucket batch size specified for each bucket boundary.\n"; + } + + CHECK_FAIL_RETURN_UNEXPECTED(error_message.empty(), error_message); + + return Status::OK(); +} + +Status BucketBatchByLengthOp::Builder::Build(std::shared_ptr *new_bucket_batch_by_length_op) { + RETURN_IF_NOT_OK(SanityCheck()); + + // insert 0 for the first bucket + builder_bucket_boundaries_.insert(builder_bucket_boundaries_.begin(), 0); + + *new_bucket_batch_by_length_op = std::make_shared( + builder_length_dependent_columns_, builder_bucket_boundaries_, builder_bucket_batch_sizes_, + builder_element_length_function_, builder_pad_info_, builder_pad_to_bucket_boundary_, builder_drop_remainder_, + builder_op_connector_size_); + + return Status::OK(); +} + +BucketBatchByLengthOp::BucketBatchByLengthOp(std::vector length_dependent_columns, + std::vector bucket_boundaries, + std::vector bucket_batch_sizes, + py::function element_length_function, PadInfo pad_info, + bool pad_to_bucket_boundary, bool drop_remainder, + int32_t op_connector_size) + : PipelineOp(op_connector_size), + length_dependent_columns_(length_dependent_columns), + bucket_boundaries_(bucket_boundaries), + bucket_batch_sizes_(bucket_batch_sizes), + element_length_function_(element_length_function), + pad_info_(pad_info), + pad_to_bucket_boundary_(pad_to_bucket_boundary), + drop_remainder_(drop_remainder), + batch_count_(0) { + for (int i = 0; i < bucket_batch_sizes_.size(); i++) { + buckets_.push_back(std::make_unique()); + } +} + +Status BucketBatchByLengthOp::EoeReceived(int32_t) { + state_ = OpState::kDeOpIdle; + return Status::OK(); +} + +void BucketBatchByLengthOp::Print(std::ostream &out, bool show_all) const { out << "BucketBatchByLengthOp\n"; } + +Status BucketBatchByLengthOp::operator()() { + TaskManager::FindMe()->Post(); + + TensorRow current_row; + child_iterator_ = std::make_unique(this, 0, 0); + RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(¤t_row)); + RETURN_IF_NOT_OK(AssignColMapFromChild()); + while (!child_iterator_->eof_handled()) { + while (!current_row.empty()) { + int32_t element_length; + RETURN_IF_NOT_OK(ObtainElementLength(&element_length, current_row)); + + int bucket_index = bucket_boundaries_.size() - 1; + while (element_length < bucket_boundaries_[bucket_index]) { + bucket_index--; + } + + buckets_[bucket_index]->push_back(current_row); + + if (buckets_[bucket_index]->size() == bucket_batch_sizes_[bucket_index]) { + RETURN_IF_NOT_OK(PadAndBatchBucket(bucket_index, bucket_batch_sizes_[bucket_index])); + } + + RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(¤t_row)); + } + + // got EOE, do what we need to do with remainders in each bucket + if (!drop_remainder_) { + for (int i = 0; i < bucket_boundaries_.size(); i++) { + if (!buckets_[i]->empty()) { + RETURN_IF_NOT_OK(PadAndBatchBucket(i, buckets_[i]->size())); + } + } + } + + // need to send EOE manually since we set state to idle in EoeRecieved() + std::unique_ptr eoe_buffer = std::make_unique(0, DataBuffer::kDeBFlagEOE); + RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eoe_buffer))); + + RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(¤t_row)); + } + + return Status::OK(); +} + +Status BucketBatchByLengthOp::ObtainElementLength(int32_t *out_element_length, TensorRow element) { + // call pyfunc here if given pyfunc, otherwise return 0th dimension of shape of + // the single column specified in length_dependent_columns_ + if (element_length_function_) { + py::gil_scoped_acquire gil_acquire; + if (Py_IsInitialized() == 0) { + return Status(StatusCode::kPythonInterpreterFailure, "Python Interpreter is finalized"); + } + try { + size_t number_of_arguments = length_dependent_columns_.size(); + py::tuple input_arguments(number_of_arguments); + for (size_t i = 0; i < number_of_arguments; i++) { + py::array argument_value; + int32_t column_index = column_name_id_map_[length_dependent_columns_[i]]; + RETURN_IF_NOT_OK(element[column_index]->GetDataAsNumpy(&argument_value)); + input_arguments[i] = argument_value; + } + + py::object length = element_length_function_(*input_arguments); + *out_element_length = length.cast(); + if (*out_element_length < 0) { + return Status(StatusCode::kPyFuncException, "Element length function should return a non negative integer."); + } + } catch (const py::error_already_set &e) { + return Status(StatusCode::kPyFuncException, e.what()); + } catch (const py::cast_error &e) { + return Status(StatusCode::kPyFuncException, "Count not cast output of element length function to int32_t."); + } + } else { + *out_element_length = element[0]->shape()[0]; + } + + return Status::OK(); +} + +Status BucketBatchByLengthOp::PadAndBatchBucket(int32_t bucket_index, int32_t batch_size) { + std::unique_ptr *bucket = &buckets_[bucket_index]; + + PadInfo pad_info_copy = pad_info_; + if (pad_to_bucket_boundary_) { + for (auto &pair : pad_info_copy) { + std::vector pad_shape = pair.second.first.AsVector(); + + for (size_t i = 0; i < pad_shape.size(); i++) { + if (pad_shape[i] == TensorShape::kDimUnknown) { + if (bucket_index + 1 >= bucket_boundaries_.size()) { + std::string error_message = "Requested to pad to bucket boundary, element falls in last bucket"; + return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, error_message); + } + + pad_shape[i] = bucket_boundaries_[bucket_index + 1] - 1; + } + } + + pair.second.first = TensorShape(pad_shape); + } + } + + // PadColumns will change the data in bucket + RETURN_IF_NOT_OK(BatchOp::PadColumns(bucket, pad_info_copy, column_name_id_map_)); + + std::unique_ptr batched_bucket = std::make_unique(); + RETURN_IF_NOT_OK(BatchOp::BatchRows(bucket, &batched_bucket, batch_size)); + (*bucket)->clear(); + + std::unique_ptr batched_buffer = std::make_unique(batch_count_, DataBuffer::kDeBFlagNone); + batched_buffer->set_tensor_table(std::move(batched_bucket)); + RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(batched_buffer))); + + batch_count_++; + + return Status::OK(); +} + +Status BucketBatchByLengthOp::Reset() { + batch_count_ = 0; + + for (int i = 0; i < buckets_.size(); i++) { + buckets_[i] = std::make_unique(); + } + + return Status::OK(); +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/engine/datasetops/bucket_batch_by_length_op.h b/mindspore/ccsrc/dataset/engine/datasetops/bucket_batch_by_length_op.h new file mode 100644 index 0000000000..bf0bcb0e78 --- /dev/null +++ b/mindspore/ccsrc/dataset/engine/datasetops/bucket_batch_by_length_op.h @@ -0,0 +1,155 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_ENGINE_DATASETOPS_BUCKET_BATCH_BY_LENGTH_OP_H_ +#define DATASET_ENGINE_DATASETOPS_BUCKET_BATCH_BY_LENGTH_OP_H_ + +#include +#include +#include +#include +#include + +#include "dataset/core/config_manager.h" +#include "dataset/core/tensor.h" +#include "dataset/engine/dataset_iterator.h" +#include "dataset/engine/datasetops/batch_op.h" +#include "dataset/engine/datasetops/pipeline_op.h" +#include "dataset/util/status.h" + +namespace mindspore { +namespace dataset { +class DataBuffer; + +class BucketBatchByLengthOp : public PipelineOp { + public: + class Builder { + public: + Builder(std::vector length_dependent_columns, std::vector bucket_boundaries, + std::vector bucket_batch_sizes); + + ~Builder() = default; + + Builder &SetLengthDependentColumns(std::vector length_dependent_columns) { + builder_length_dependent_columns_ = length_dependent_columns; + return *this; + } + + Builder &SetBucketBoundaries(std::vector bucket_boundaries) { + builder_bucket_boundaries_ = bucket_boundaries; + return *this; + } + + Builder &SetBucketBatchSizes(std::vector bucket_batch_sizes) { + builder_bucket_batch_sizes_ = bucket_batch_sizes; + return *this; + } + + Builder &SetElementLengthFunction(py::function element_length_function) { + builder_element_length_function_ = element_length_function; + return *this; + } + + Builder &SetPadInfo(PadInfo pad_info) { + builder_pad_info_ = pad_info; + return *this; + } + + Builder &SetPadToBucketBoundary(bool pad_to_bucket_boundary) { + builder_pad_to_bucket_boundary_ = pad_to_bucket_boundary; + return *this; + } + + Builder &SetDropRemainder(bool drop_remainder) { + builder_drop_remainder_ = drop_remainder; + return *this; + } + + Builder &SetOpConnectorSize(int32_t op_connector_size) { + builder_op_connector_size_ = op_connector_size; + return *this; + } + + Status Build(std::shared_ptr *new_bucket_batch_by_length_op); + + private: + Status SanityCheck(); + + std::vector builder_length_dependent_columns_; + std::vector builder_bucket_boundaries_; + std::vector builder_bucket_batch_sizes_; + py::function builder_element_length_function_; + PadInfo builder_pad_info_; + bool builder_pad_to_bucket_boundary_; + bool builder_drop_remainder_; + int32_t builder_op_connector_size_; + }; + + BucketBatchByLengthOp(std::vector length_dependent_columns, std::vector bucket_boundaries, + std::vector bucket_batch_sizes, py::function element_length_function, PadInfo pad_info, + bool pad_to_bucket_boundary, bool drop_remainder, int32_t op_connector_size); + + // Destructor + ~BucketBatchByLengthOp() = default; + + // Might need to batch remaining buckets after receiving eoe, so override this method. + // @param int32_t workerId + // @return Status - The error code returned + Status EoeReceived(int32_t) override; + + // A print method typically used for debugging + // @param out - The output stream to write output to + // @param show_all - A bool to control if you want to show all info or just a summary + void Print(std::ostream &out, bool show_all) const override; + + // << Stream output operator overload + // @notes This allows you to write the debug print info using stream operators + // @param out - reference to the output stream being overloaded + // @param sO - reference to the BucketBatchByLengthOp to display + // @return - the output stream must be returned + friend std::ostream &operator<<(std::ostream &out, const BucketBatchByLengthOp &bo) { + bo.Print(out, false); + return out; + } + + // Main loop of batch + // @return Status - The error code returned + Status operator()() override; + + // Function that is called by ResetOp at the end of every epoch + // @return Status - The error code returned + Status Reset() override; + + private: + Status ObtainElementLength(int32_t *out_element_length, TensorRow element); + + Status PadAndBatchBucket(int32_t bucket_index, int32_t batch_size); + + std::vector length_dependent_columns_; + std::vector bucket_boundaries_; + std::vector bucket_batch_sizes_; + py::function element_length_function_; + PadInfo pad_info_; + bool pad_to_bucket_boundary_; + bool drop_remainder_; + + int32_t batch_count_; + std::unique_ptr child_iterator_; + std::vector> buckets_; +}; +} // namespace dataset +} // namespace mindspore + +#endif // DATASET_ENGINE_DATASETOPS_BUCKET_BATCH_BY_LENGTH_OP_H_ diff --git a/mindspore/ccsrc/dataset/engine/datasetops/build_vocab_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/build_vocab_op.cc new file mode 100644 index 0000000000..f99804ec9b --- /dev/null +++ b/mindspore/ccsrc/dataset/engine/datasetops/build_vocab_op.cc @@ -0,0 +1,207 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dataset/engine/datasetops/build_vocab_op.h" + +#include +#include +#include +#include +#include +#include "dataset/core/config_manager.h" + +namespace mindspore { +namespace dataset { + +BuildVocabOp::BuildVocabOp(std::shared_ptr vocab, std::vector col_names, + std::pair freq_r, int64_t top_k, const std::vector &tokens, + bool prepend, int32_t num_workers, int32_t op_conn_size) + : ParallelOp(num_workers, op_conn_size), + interval_(op_conn_size * num_workers), + vocab_(vocab), + col_names_(col_names), + freq_range_(freq_r), + top_k_(top_k), + special_tokens_(tokens), + special_first_(prepend) { + // init two queues for thread sync + distributor_queue_ = std::make_unique>(num_workers * op_conn_size); + collector_queue_ = + std::make_unique>>>(num_workers * op_conn_size); +} + +Status BuildVocabOp::WorkerEntry(int32_t worker_id) { + TaskManager::FindMe()->Post(); + TensorRow new_row; + RETURN_IF_NOT_OK(distributor_queue_->PopFront(&new_row)); + std::unique_ptr> wrkr_map = + std::make_unique>(); + int32_t row_cnt = 0; + while (!new_row.empty()) { + for (int32_t col : col_ids_) { + CHECK_FAIL_RETURN_UNEXPECTED(!new_row[col]->type().IsNumeric(), "from_dataset only works on string columns"); + for (auto itr = new_row[col]->begin(); itr != new_row[col]->end(); itr++) { + (*wrkr_map)[std::string(*itr)] += 1; + } + } + row_cnt++; // row is processed by this point + if ((row_cnt % interval_ == 0) && ((row_cnt / interval_) % num_workers_ == worker_id) && (!wrkr_map->empty())) { + RETURN_IF_NOT_OK(collector_queue_->Add(std::move(wrkr_map))); + wrkr_map = std::make_unique>(); + } + RETURN_IF_NOT_OK(distributor_queue_->PopFront(&new_row)); + } + // clean up + if (!wrkr_map->empty()) { + RETURN_IF_NOT_OK(collector_queue_->Add(std::move(wrkr_map))); + } + // empty map as quit signal + RETURN_IF_NOT_OK(collector_queue_->Add(std::make_unique>())); + return Status::OK(); +} + +Status BuildVocabOp::operator()() { + // launch the collector thread + RETURN_UNEXPECTED_IF_NULL(tree_); + RETURN_IF_NOT_OK(distributor_queue_->Register(tree_->AllTasks())); + RETURN_IF_NOT_OK(collector_queue_->Register(tree_->AllTasks())); + // launch worker threads and collector thread + RETURN_IF_NOT_OK( + tree_->LaunchWorkers(num_workers_, std::bind(&BuildVocabOp::WorkerEntry, this, std::placeholders::_1))); + RETURN_IF_NOT_OK(tree_->AllTasks()->CreateAsyncTask("collector", std::bind(&BuildVocabOp::CollectorThread, this))); + TaskManager::FindMe()->Post(); + child_iterator_ = std::make_unique(this, 0, 0); + TensorRow new_row; + RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); + RETURN_IF_NOT_OK(AssignColMapFromChild()); + if (!col_names_.empty()) { + col_ids_.reserve(col_names_.size()); + for (std::string col : col_names_) { + auto itr = column_name_id_map_.find(col); + CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(), col + " column doesn't exist"); + col_ids_.push_back(itr->second); + } + } else { + col_ids_.reserve(column_name_id_map_.size()); + for (const auto &p : column_name_id_map_) { + col_ids_.push_back(p.second); + } + } + bool eoe_warning = false; // give out warning if receive more than 1 eoe + while (child_iterator_->eof_handled() == false) { + while (new_row.empty() == false) { + RETURN_IF_NOT_OK(distributor_queue_->EmplaceBack(new_row)); + RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); + } + CHECK_FAIL_RETURN_UNEXPECTED(!eoe_warning, "no op should be after from_dataset (repeat detected)"); + eoe_warning = true; + } + + // tell all workers to quit + for (int32_t wrkr_id = 0; wrkr_id < num_workers_; wrkr_id++) { + RETURN_IF_NOT_OK(distributor_queue_->EmplaceBack(TensorRow())); + } + return Status::OK(); +} + +Status BuildVocabOp::CollectorThread() { + TaskManager::FindMe()->Post(); + int32_t num_quited_worker = 0; + std::unique_ptr> wrkr_map; + while (num_quited_worker != num_workers_) { + RETURN_IF_NOT_OK(collector_queue_->PopFront(&wrkr_map)); + RETURN_UNEXPECTED_IF_NULL(wrkr_map); + if (!wrkr_map->empty()) { + for (const auto &wd : *wrkr_map) word_cnt_[wd.first] += wd.second; + } else { + ++num_quited_worker; + } + } // all frequencies are obtained + CHECK_FAIL_RETURN_UNEXPECTED(!word_cnt_.empty(), "word_cnt is empty"); + std::vector words; + // make sure enough is reserved, this will become a partially sorted list eventually + words.reserve(wrkr_map->size()); + + for (auto it = word_cnt_.begin(); it != word_cnt_.end();) { + if (it->second >= freq_range_.first && it->second <= freq_range_.second) { + words.push_back(it->first); + it++; + } else { + it = word_cnt_.erase(it); + } + } + std::string err_msg; + + for (const std::string &sp_tk : special_tokens_) { + // if a special word exists in dataset, warn user about this + err_msg += (word_cnt_.find(sp_tk) != word_cnt_.end() ? sp_tk + "\t" : ""); + } + + CHECK_FAIL_RETURN_UNEXPECTED(err_msg.empty(), "These specials words are already in the dataset: " + err_msg + "."); + + int64_t num_words = std::min(static_cast(words.size()), top_k_); + if (num_words == 0) { + MS_LOG(WARNING) << "No word falls in the frequency range: (" << freq_range_.first << "," << freq_range_.second + << ") vocab would be empty (except for special tokens)."; + } + + // this would take the top-k most frequent words + std::partial_sort(words.begin(), words.begin() + num_words, words.end(), + [this](const std::string &w1, const std::string &w2) { + int64_t f1 = word_cnt_[w1], f2 = word_cnt_[w2]; + return f1 == f2 ? w1 < w2 : f1 > f2; + }); + + if (special_first_) { + for (const std::string &sp_tk : special_tokens_) vocab_->append_word(sp_tk); + } + + for (int64_t i = 0; i < num_words; i++) { + vocab_->append_word(words[i]); + } + + if (!special_first_) { + for (const std::string &sp_tk : special_tokens_) vocab_->append_word(sp_tk); + } + + RETURN_IF_NOT_OK(out_connector_->Add(0, std::make_unique(0, DataBuffer::kDeBFlagEOE))); + RETURN_IF_NOT_OK(out_connector_->Add(0, std::make_unique(0, DataBuffer::kDeBFlagEOF))); + // then use std::nth_element to partial sort + return Status::OK(); +} + +Status BuildVocabOp::Builder::Build(std::shared_ptr *op) { + CHECK_FAIL_RETURN_UNEXPECTED(builder_num_workers_ > 0, "builder num_workers need to be greater than 0"); + CHECK_FAIL_RETURN_UNEXPECTED(builder_top_k_ > 0, "top_k needs to be positive number"); + CHECK_FAIL_RETURN_UNEXPECTED(builder_max_freq_ >= builder_min_freq_ && builder_min_freq_ >= 0, + "frequency range [a,b] should be 0 <= a <= b (a,b are inclusive)"); + (*op) = std::make_shared( + builder_vocab_, builder_col_names_, std::make_pair(builder_min_freq_, builder_max_freq_), builder_top_k_, + builder_speical_tokens_, builder_special_first_, builder_num_workers_, builder_connector_size_); + return Status::OK(); +} + +BuildVocabOp::Builder::Builder() + : builder_top_k_(std::numeric_limits::max()), + builder_min_freq_(0), + builder_max_freq_(std::numeric_limits::max()), + builder_special_first_(true) { + std::shared_ptr cfg = GlobalContext::config_manager(); + builder_num_workers_ = cfg->num_parallel_workers(); + builder_connector_size_ = cfg->op_connector_size(); +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/engine/datasetops/build_vocab_op.h b/mindspore/ccsrc/dataset/engine/datasetops/build_vocab_op.h new file mode 100644 index 0000000000..bf358c48c6 --- /dev/null +++ b/mindspore/ccsrc/dataset/engine/datasetops/build_vocab_op.h @@ -0,0 +1,174 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_ENGINE_DATASETOPS_BUILD_VOCAB_OP_H_ +#define DATASET_ENGINE_DATASETOPS_BUILD_VOCAB_OP_H_ + +#include +#include +#include +#include +#include + +#include "dataset/core/tensor.h" +#include "dataset/engine/dataset_iterator.h" +#include "dataset/engine/datasetops/parallel_op.h" +#include "dataset/text/vocab.h" +#include "dataset/util/queue.h" +#include "dataset/util/status.h" + +namespace mindspore { +namespace dataset { +class BuildVocabOp : public ParallelOp { + public: + class Builder { + public: + Builder(); + + // Destructor. + ~Builder() = default; + + // Setter method + // @param int32_t size + // @return Builder setter method returns reference to the builder. + Builder &SetOpConnectorSize(int32_t size) { + builder_connector_size_ = size; + return *this; + } + + // Setter method + // @param int32_t num_workers + // @return Builder setter method returns reference to the builder. + Builder &SetNumWorkers(int32_t num_workers) { + builder_num_workers_ = num_workers; + return *this; + } + + // Setter method + // @param int64_t top_k + // @return Builder setter method returns reference to the builder. + Builder &SetTopK(int64_t top_k) { + builder_top_k_ = top_k; + return *this; + } + + // Setter method + // @param int64_t min_freq + // @return Builder setter method returns reference to the builder. + Builder &SetMinFreq(int64_t min_freq) { + builder_min_freq_ = min_freq; + return *this; + } + + // Setter method + // @param int64_t max_freq + // @return Builder setter method returns reference to the builder. + Builder &SetMaxFreq(int64_t max_freq) { + builder_max_freq_ = max_freq; + return *this; + } + + // set columns names + // @param const std::vector & col_names - name of columns to get words + // @return Builder & reference to builder class object + Builder &SetColumnNames(const std::vector &col_names) { + builder_col_names_ = col_names; + return *this; + } + + // set special tokens + // @param const std::vector & col_names - name of columns to get words + // @return Builder & reference to builder class object + Builder &SetSpecialTokens(const std::vector &tokens) { + builder_speical_tokens_ = tokens; + return *this; + } + + // set vocab object + Builder &SetVocab(std::shared_ptr vocab) { + builder_vocab_ = vocab; + return *this; + } + + // set special tokens first (or last) + Builder &SetSpecialFirst(bool prepend) { + builder_special_first_ = prepend; + return *this; + } + + // The builder "build" method creates the final object. + // @param std::shared_ptr *op - DatasetOp + // @return - The error code return + Status Build(std::shared_ptr *op); + + private: + int32_t builder_num_workers_; + int32_t builder_connector_size_; + int64_t builder_min_freq_; + int64_t builder_max_freq_; + bool builder_special_first_; + std::vector builder_col_names_; + std::vector builder_speical_tokens_; + std::shared_ptr builder_vocab_; + int64_t builder_top_k_; + }; + + BuildVocabOp(std::shared_ptr vocab, std::vector col_names, std::pair freq_range, + int64_t top_k, const std::vector &tokens, bool prepend, int32_t num_workers, + int32_t op_connector_size); + + ~BuildVocabOp() = default; + + Status WorkerEntry(int32_t worker_id) override; + + // collect the work product from each worker + Status CollectorThread(); + + Status EofReceived(int32_t) override { return Status::OK(); } + + Status EoeReceived(int32_t) override { return Status::OK(); } + + Status operator()() override; + + // Getter + // @return the number of workers + int32_t num_producers() const override { return 1; } + + // Getter + // @return the number of threads consuming from the previous Connector + int32_t num_consumers() const override { return 1; } + + Status Reset() override { RETURN_STATUS_UNEXPECTED("Reset shouldn't be called in BuildVocabOp"); } + + private: + const int32_t interval_; + bool special_first_; + std::shared_ptr vocab_; + std::vector col_names_; + std::vector col_ids_; + std::vector special_tokens_; + // pair = {min_f, max_f} + // make sure that 0<= min_f < max_f <= int32_max in the builder + std::pair freq_range_; + + int64_t top_k_; // every thing means top_k_ == int32_max + std::unique_ptr child_iterator_; // child iterator for fetching TensorRows 1 by 1 + std::unique_ptr> distributor_queue_; // master thread assigns each worker TensorRow via this + std::unique_ptr>>> collector_queue_; + std::unordered_map word_cnt_; +}; +} // namespace dataset +} // namespace mindspore +#endif // DATASET_ENGINE_DATASETOPS_BUILD_VOCAB_OP_H_ diff --git a/mindspore/ccsrc/dataset/engine/datasetops/concat_op.h b/mindspore/ccsrc/dataset/engine/datasetops/concat_op.h index 9afadab39a..0fb8ec8362 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/concat_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/concat_op.h @@ -40,7 +40,7 @@ class ConcatOp : public PipelineOp { ~Builder() = default; // The builder "build" method creates the final object. - // @return shared_ptr to the new StorageOp object + // @return shared_ptr to the new ConcatOp object Status Build(std::shared_ptr *); private: @@ -81,6 +81,10 @@ class ConcatOp : public PipelineOp { // before providing their own implementations. Status PrepareNodePostAction() override; + // Op name getter + // @return Name of the current Op + std::string Name() const override { return "ConcatOp"; } + private: Status Verify(int32_t id, const std::unique_ptr &buf); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc index 9ee6e706aa..bf991ea7d9 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc @@ -20,6 +20,7 @@ #include #include #include +#include #include "dataset/engine/execution_tree.h" #include "dataset/engine/datasetops/device_queue_op.h" @@ -38,6 +39,7 @@ DatasetOp::DatasetOp(int32_t op_connector_size) tree_(nullptr), state_(OpState::kDeOpIdle), op_ctrl_flags_(kDeOpNone), + out_connector_(nullptr), first_fetch_(true) { // The operator starts out with an invalid operator id. The only way to // get it out of invalid state is to assign the operator to an execution tree. @@ -67,8 +69,45 @@ Status DatasetOp::AddChild(std::shared_ptr child) { return Status::OK(); } +Status DatasetOp::RemoveChild(std::shared_ptr child) { + if (operator_id_ == kInvalidOperatorId) { + std::string err_msg( + "Cannot remove child node. Tree node connections can only" + "be made if the node belongs to a tree."); + RETURN_STATUS_UNEXPECTED(err_msg); + } + + // disallow relationships with other trees + if (tree_ != child->tree_) { + std::string err_msg( + "Cannot remove child node. Tree node connections can only be made if both nodes belong to the same tree."); + RETURN_STATUS_UNEXPECTED(err_msg); + } + + child_.erase(std::remove(child_.begin(), child_.end(), child), child_.end()); + child->RemoveParent(this); + return Status::OK(); +} + +Status DatasetOp::InsertAsParent(std::shared_ptr to_add) { + for (auto &prev_parent : this->parent_) { + RETURN_IF_NOT_OK(prev_parent->RemoveChild(shared_from_this())); + RETURN_IF_NOT_OK(prev_parent->AddChild(to_add)); + } + RETURN_IF_NOT_OK(to_add->AddChild(shared_from_this())); + if (tree_->root()->id() == this->id()) { + tree_->AssignRoot(to_add); + } + return Status::OK(); +} + // Adds a parent operator to this operator -void DatasetOp::AddParent(const DatasetOp *parent) { parent_.push_back(parent); } +void DatasetOp::AddParent(DatasetOp *parent) { parent_.push_back(parent); } + +// Removes a parent operator from this operator +void DatasetOp::RemoveParent(DatasetOp *parent) { + parent_.erase(std::remove(parent_.begin(), parent_.end(), parent), parent_.end()); +} // Getter function to get a shared pointer to our childAdds a operator to become our child. std::shared_ptr DatasetOp::child(int32_t child_index) const { diff --git a/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.h b/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.h index 315dc27219..973b5be962 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.h @@ -51,7 +51,7 @@ class DatasetOp : public std::enable_shared_from_this { }; // Flags that control operator runtime behaviours - enum OpState { kDeOpRunning = 0, kDeOpIdle = 1 }; + enum OpState { kDeOpRunning = 0, kDeOpIdle = 1, kDeOpTerminated }; // Constructor // @param op_connector_size - The size for the output connector of this operator. @@ -64,10 +64,19 @@ class DatasetOp : public std::enable_shared_from_this { // @param child - shared pointer to the child to add. Status AddChild(std::shared_ptr child); + // Remove a operator from our children. + // @param child - shared pointer to the child to remove. + Status RemoveChild(std::shared_ptr child); + // Getter function to get a shared pointer to our child // @param child_index - An operator can have n children. Indicates choose which child to return. std::shared_ptr child(int32_t child_index) const; + // Inserts a operator as the parent current op. + // Inserted op will become the sole parent of the current op. + // The existing parent of the current op will be transferred to the inserted op. + Status InsertAsParent(std::shared_ptr to_add); + // Creates the connector within this operator // @param num_producers - number of threads that write into this connector // @param num_consumers - number of threads that read from this connector @@ -211,8 +220,36 @@ class DatasetOp : public std::enable_shared_from_this { // @return - the column name map as a string std::string ColumnNameMapAsString() const; + // Getter function + // @return connector size of current op + int32_t ConnectorSize() const { + if (!inlined()) { + return out_connector_->size(); + } + // Return child connector size for inlined op + return ChildOpConnectorSize(); + } + + // Getter function + // @return connector size of current op + int32_t ConnectorCapacity() const { + if (!inlined()) { + return out_connector_->capacity(); + } + // Return child connector capacity for inlined op + return ChildOpConnectorCapacity(); + } + + // Getter function + // @return connector size of child op + int32_t ChildOpConnectorSize(int32_t child_index = 0) const { return child_[child_index]->ConnectorSize(); } + + // Getter function + // @return connector capacity of child op + int32_t ChildOpConnectorCapacity(int32_t child_index = 0) const { return child_[child_index]->ConnectorCapacity(); } + // Children Getter - // @return Vector or Children + // @return Vector of Children std::vector> Children() const { return child_; } // Base method for NodePass visit. @@ -221,11 +258,24 @@ class DatasetOp : public std::enable_shared_from_this { // @return Statue of the node visit virtual Status Accept(NodePass *p, bool *modified); + // Op name getter + // @return Name of the current Op + virtual std::string Name() const { return "DatasetOp"; } + + // Execution Tree getter + // @return Pointer to the ExecutionTree the current op belongs to, no ownership + ExecutionTree *Tree() { return tree_; } + protected: // Adds a parent operator to this operator // @notes External callers do not have access to this function. // @param parent - The parent node to add - void AddParent(const DatasetOp *parent); + void AddParent(DatasetOp *parent); + + // Removes a parent operator from this operator + // @notes External callers do not have access to this function. + // @param parent - The parent node to remove + void RemoveParent(DatasetOp *parent); // A helper function for providing an assignment of the column name map. // This grabs the map from child 0 and assigns it into this op. @@ -234,7 +284,7 @@ class DatasetOp : public std::enable_shared_from_this { Status AssignColMapFromChild(); std::vector> child_; // Child nodes - std::vector parent_; // Parent nodes. No ownership and read-only + std::vector parent_; // Parent nodes. No ownership int32_t oc_queue_size_; // Capacity for each out_connector_ int32_t operator_id_; // Generated id for the node ExecutionTree *tree_; // Back pointer to our tree. diff --git a/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc index bcdb58db24..84bad9db1a 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc @@ -13,18 +13,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "dataset/engine/datasetops/device_queue_op.h" + #include #include #include - #include "dataset/core/config_manager.h" #include "dataset/core/global_context.h" +#include "dataset/engine/datasetops/device_queue_op.h" #include "dataset/engine/data_buffer.h" #include "dataset/engine/dataset_iterator.h" +#include "dataset/engine/opt/pass.h" +#include "dataset/engine/perf/profiling.h" +#include "dataset/engine/perf/device_queue_tracing.h" #include "dataset/util/status.h" #include "dataset/util/task_manager.h" -#include "dataset/engine/opt/pass.h" namespace mindspore { namespace dataset { @@ -97,7 +99,19 @@ Status DeviceQueueOp::SendDataToAscend() { MS_LOG(INFO) << "Device queue, sending data to Ascend."; int64_t total_batch = 0; bool is_break_loop = false; - + double batch_start_time, end_time; + int32_t batch_cost, tdt_cost; + int32_t connector_size = 0; + int32_t connector_capacity; + std::shared_ptr profiling_node; + bool isProfilingEnable = tree_->GetProfilingManager()->IsProfilingEnable(); + if (isProfilingEnable) { + std::shared_ptr node; + RETURN_IF_NOT_OK(tree_->GetProfilingManager()->GetTracingNode(kDeviceQueueTracingName, &node)); + profiling_node = std::dynamic_pointer_cast(node); + batch_start_time = ProfilingTime::GetCurMilliSecond(); + connector_capacity = ChildOpConnectorCapacity(); + } std::unique_ptr current_buffer; RETURN_IF_NOT_OK(GetNextInput(¤t_buffer)); @@ -107,20 +121,43 @@ Status DeviceQueueOp::SendDataToAscend() { TensorRow currRow; for (int row_id = 0; row_id < current_buffer->NumRows() && !is_break_loop; row_id++) { RETURN_IF_NOT_OK(current_buffer->GetRow(row_id, &currRow)); - auto status = tdtInstancePtr->hostPush(currRow, true, channel_name_); + auto status = tdtInstancePtr->hostPush(currRow, true, channel_name_, isProfilingEnable, tdt_cost); if (status == TdtStatus::FAILED) { return Status(StatusCode::kTDTPushFailure, "TDT Push Failed"); } + + if (isProfilingEnable) { + end_time = ProfilingTime::GetCurMilliSecond(); + // record push tdt time + profiling_node->Record(TIME, TDT_PUSH_TIME, total_batch + 1, tdt_cost); + batch_cost = (int32_t)(end_time - batch_start_time); + // record batch time + profiling_node->Record(TIME, BATCH_TIME, total_batch + 1, batch_cost); + // record pipeline time + profiling_node->Record(TIME, PIPELINE_TIME, total_batch + 1, batch_cost - tdt_cost); + batch_start_time = end_time; + // record connector depth + profiling_node->Record(CONNECTOR_DEPTH, connector_capacity, total_batch + 1, connector_size); + } total_batch++; if (num_batch_ > 0 && total_batch == num_batch_) { is_break_loop = true; } } + if (isProfilingEnable) { + connector_size = ChildOpConnectorSize(); + connector_capacity = ChildOpConnectorCapacity(); + } RETURN_IF_NOT_OK(GetNextInput(¤t_buffer)); } + if (isProfilingEnable) { + connector_size = ChildOpConnectorSize(); + connector_capacity = ChildOpConnectorCapacity(); + } RETURN_IF_NOT_OK(GetNextInput(¤t_buffer)); } + tree_->SetFinished(); MS_LOG(INFO) << "Device queue total batch is " << total_batch << ", number of batches is " << num_batch_ << "."; return Status::OK(); @@ -195,13 +232,17 @@ Status DeviceQueueOp::RetryPushGPUData(const std::vector &data_size, con while (!GpuBufferMgr::GetInstance().IsClosed() && !TaskManager::FindMe()->Interrupted()) { RETURN_IF_NOT_OK(MallocForGPUData(&items, curr_row)); - auto ret = GpuBufferMgr::GetInstance().Push(handle, items, WAIT_TIME); + BlockQueueStatus_T ret = GpuBufferMgr::GetInstance().Push(handle, items, WAIT_TIME); if (ret) { for (int i = 0; i < items.size(); i++) { free(items[i].data_ptr_); } - MS_LOG(WARNING) << "Retry pushing data..."; - continue; + if (ret == BlockQueueStatus_T::ERROR_INPUT) { + return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "invalid input Data, please check it."); + } else { + MS_LOG(WARNING) << "Retry pushing data..."; + continue; + } } else { break; } @@ -217,7 +258,7 @@ Status DeviceQueueOp::MallocForGPUData(std::vector *items, return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "memory malloc failed."); } (void)memset_s(sub_item.data_ptr_, sub_item.data_len_, 0, sub_item.data_len_); - unsigned char *column_data = curr_row[i]->GetMutableBuffer(); + const unsigned char *column_data = curr_row[i]->GetBuffer(); if (memcpy_s(sub_item.data_ptr_, sub_item.data_len_, column_data, static_cast(curr_row[i++]->SizeInBytes())) != 0) { MS_LOG(ERROR) << "memcpy_s failed!"; diff --git a/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.h b/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.h index ebbcd16cc3..a854004593 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.h @@ -25,11 +25,11 @@ #ifdef ENABLE_TDTQUE #include "dataset/engine/tdt/tdt_plugin.h" - #endif #ifdef ENABLE_GPUQUE #include "device/gpu/gpu_buffer_mgr.h" +using mindspore::device::BlockQueueStatus_T; using mindspore::device::GpuBufferMgr; #endif @@ -140,6 +140,10 @@ class DeviceQueueOp : public PipelineOp { // @return - Status of the node visit. Status Accept(NodePass *p, bool *modified) override; + // Op name getter + // @return Name of the current Op + std::string Name() const override { return "DeviceQueueOp"; } + private: // Name: checkExceptions(DataBuffer); // Description: Check whether the dataBuffer meets the condition for performing DeviceQueueOp diff --git a/mindspore/ccsrc/dataset/engine/datasetops/filter_op.h b/mindspore/ccsrc/dataset/engine/datasetops/filter_op.h index cd6c01da90..36f70cb82f 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/filter_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/filter_op.h @@ -127,6 +127,10 @@ class FilterOp : public ParallelOp { // @return - Status of the node visit. Status Accept(NodePass *p, bool *modified) override; + // Op name getter + // @return Name of the current Op + std::string Name() const override { return "FilterOp"; } + private: // predicate_func python callable which returns a boolean value. py::function predicate_func_; diff --git a/mindspore/ccsrc/dataset/engine/datasetops/map_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/map_op.cc index 008ff09c99..9918260201 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/map_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/map_op.cc @@ -54,19 +54,20 @@ Status MapOp::Builder::sanityCheck() const { Status MapOp::Builder::Build(std::shared_ptr *ptr) { RETURN_IF_NOT_OK(sanityCheck()); *ptr = std::make_shared(std::move(build_in_col_names_), std::move(build_out_col_names_), - std::move(build_tensor_funcs_), build_num_workers_, build_op_connector_size_, - build_perf_mode_); + std::move(build_tensor_funcs_), std::move(build_col_order_), build_num_workers_, + build_op_connector_size_, build_perf_mode_); return Status::OK(); } // Constructor of MapOp MapOp::MapOp(const std::vector &in_col_names, const std::vector &out_col_names, - std::vector> tensor_funcs, int32_t num_workers, int32_t op_connector_size, - bool perf_mode) + std::vector> tensor_funcs, const std::vector &columns_order, + int32_t num_workers, int32_t op_connector_size, bool perf_mode) : ParallelOp(num_workers, op_connector_size), tfuncs_(std::move(tensor_funcs)), in_columns_(in_col_names), out_columns_(out_col_names), + columns_order_(columns_order), perf_mode_(perf_mode) { // If caller didn't specify the out_col_names, assume they are same as the in_columns. if (out_columns_.empty() || out_columns_[0].empty()) { diff --git a/mindspore/ccsrc/dataset/engine/datasetops/map_op.h b/mindspore/ccsrc/dataset/engine/datasetops/map_op.h index f903881ca2..4d7ffd1204 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/map_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/map_op.h @@ -93,6 +93,13 @@ class MapOp : public ParallelOp { return *this; } + // Setter method. + // @return Builder setter method returns reference to the builder. + Builder &SetColOrder(const std::vector &col_order_) { + build_col_order_ = col_order_; + return *this; + } + // Setter method. // @return Builder setter method returns reference to the builder. Builder &SetNumWorkers(int32_t num_workers) { @@ -123,6 +130,7 @@ class MapOp : public ParallelOp { std::vector build_in_col_names_; std::vector build_out_col_names_; std::vector> build_tensor_funcs_; + std::vector build_col_order_; int32_t build_num_workers_; int32_t build_op_connector_size_; bool build_perf_mode_; // Default true. @@ -137,11 +145,12 @@ class MapOp : public ParallelOp { // @param in_col_names A list of input column names (should match the input/output \p tensorFuncs). // @param out_col_names A list of output column names (should match the input/output \p tensorFuncs). // @param tensor_funcs A list of TensorOp pointers for MapOp to apply to each data. + // @param columns_order names A full list of column names (should match the whole dataset view post \p tensorFuncs). // @param num_workers The number of worker threads. // @param op_connector_size The size of each queue in the connector. MapOp(const std::vector &in_col_names, const std::vector &out_col_names, - std::vector> tensor_funcs, int32_t num_workers, int32_t op_connector_size, - bool perf_mode); + std::vector> tensor_funcs, const std::vector &columns_order, + int32_t num_workers, int32_t op_connector_size, bool perf_mode); // Destructor ~MapOp() = default; @@ -177,6 +186,14 @@ class MapOp : public ParallelOp { // @return - Status of the node visit. Status Accept(NodePass *p, bool *modified) override; + // Op name getter + // @return Name of the current Op + std::string Name() const override { return "MapOp"; } + + // Columns order getter + // @return The post map columns order + std::vector const &ColumnsOrder() const { return columns_order_; } + private: // Local queues where worker threads can pop from. // Popping directly from the Connector can block if the previous designated threads haven't pop. @@ -198,6 +215,9 @@ class MapOp : public ParallelOp { // Indices of the columns to process. std::vector to_process_indices_; + // Variable to store the column_order of all columns post tensorOps + std::vector columns_order_; + // Performance mode is when the main thread creates local queues, pulls databuffers from the previous // op's Connector and distributes them to the local queues. Workers pull from the local queues. // If this flag is false, each worker pulls directly from the Connector. This use less resources diff --git a/mindspore/ccsrc/dataset/engine/datasetops/project_op.h b/mindspore/ccsrc/dataset/engine/datasetops/project_op.h index 3940b9adc7..ced0f9e5a9 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/project_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/project_op.h @@ -40,7 +40,7 @@ class ProjectOp : public PipelineOp { ~Builder() = default; // The builder "build" method creates the final object. - // @return shared_ptr to the new StorageOp object. + // @return shared_ptr to the new ProjectOp object. Status Build(std::shared_ptr *); private: @@ -107,6 +107,10 @@ class ProjectOp : public PipelineOp { // @return - Status of the node visit. Status Accept(NodePass *p, bool *modified) override; + // Op name getter + // @return Name of the current Op + std::string Name() const override { return "ProjectOp"; } + private: std::vector columns_to_project_; std::vector projected_column_indices_; diff --git a/mindspore/ccsrc/dataset/engine/datasetops/rename_op.h b/mindspore/ccsrc/dataset/engine/datasetops/rename_op.h index 2bd4875fda..eaca20ccc8 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/rename_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/rename_op.h @@ -67,7 +67,7 @@ class RenameOp : public PipelineOp { } // The builder "build" method creates the ZipOp dataset Operator. - // @return shared_ptr to the new StorageOp object + // @return shared_ptr to the new RenameOp object Status Build(std::shared_ptr *); private: @@ -116,6 +116,10 @@ class RenameOp : public PipelineOp { // @return - Status of the node visit. Status Accept(NodePass *p, bool *modified) override; + // Op name getter + // @return Name of the current Op + std::string Name() const override { return "RenameOp"; } + protected: // Rename core functionality Status RenameColumns(); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/repeat_op.h b/mindspore/ccsrc/dataset/engine/datasetops/repeat_op.h index 718bc1922b..bba85c3bb5 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/repeat_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/repeat_op.h @@ -42,7 +42,7 @@ class RepeatOp : public PipelineOp { ~Builder() = default; // The builder "build" method creates the final object. - // @return shared_ptr to the new StorageOp object + // @return shared_ptr to the new RepeatOp object Status Build(std::shared_ptr *); private: @@ -124,6 +124,10 @@ class RepeatOp : public PipelineOp { // @return - Status of the node visit. Status Accept(NodePass *p, bool *modified) override; + // Op name getter + // @return Name of the current Op + std::string Name() const override { return "RepeatOp"; } + private: int32_t max_repeats_; // The number of repeats that the user requested int32_t repeat_count_; // A counter for the current number of executed repeats diff --git a/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.h b/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.h index baabad758c..14b1e4511e 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.h @@ -101,7 +101,7 @@ class ShuffleOp : public PipelineOp { } // The builder "build" method creates the final object. - // @return shared_ptr to the new StorageOp object + // @return shared_ptr to the new ShuffleOp object Status Build(std::shared_ptr *); private: @@ -161,6 +161,10 @@ class ShuffleOp : public PipelineOp { // @return - Status of the node visit. Status Accept(NodePass *p, bool *modified) override; + // Op name getter + // @return Name of the current Op + std::string Name() const override { return "ShuffleOp"; } + private: // Private function to add a new row to the shuffle buffer. // @return Status - The error code return diff --git a/mindspore/ccsrc/dataset/engine/datasetops/skip_op.h b/mindspore/ccsrc/dataset/engine/datasetops/skip_op.h index 40db770642..4cb658b2a7 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/skip_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/skip_op.h @@ -37,7 +37,7 @@ class SkipOp : public PipelineOp { ~Builder() = default; // The builder "build" method creates the final object. - // @return shared_ptr to the new StorageOp object + // @return shared_ptr to the new SkipOp object Status Build(std::shared_ptr *); private: @@ -80,6 +80,10 @@ class SkipOp : public PipelineOp { // @return - Status of the node visit. Status Accept(NodePass *p, bool *modified) override; + // Op name getter + // @return Name of the current Op + std::string Name() const override { return "SkipOp"; } + private: int32_t max_skips_; // The number of skips that the user requested int32_t skip_count_; // A counter for the current number of executed skips diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt b/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt index a1d0b22f15..b78ddcd87b 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt @@ -5,17 +5,15 @@ add_library(engine-datasetops-source OBJECT generator_op.cc io_block.cc mindrecord_op.cc - storage_client.cc - storage_op.cc - tf_buffer.cc - tf_client.cc tf_reader_op.cc image_folder_op.cc mnist_op.cc voc_op.cc + coco_op.cc manifest_op.cc cifar_op.cc random_data_op.cc celeba_op.cc text_file_op.cc + clue_op.cc ) \ No newline at end of file diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc index 8f8c57b012..4b32201d6d 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc @@ -26,7 +26,7 @@ namespace mindspore { namespace dataset { -CelebAOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr), builder_num_samples_(0) { +CelebAOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr) { std::shared_ptr cfg = GlobalContext::config_manager(); builder_num_workers_ = cfg->num_parallel_workers(); builder_rows_per_buffer_ = cfg->rows_per_buffer(); @@ -38,7 +38,9 @@ Status CelebAOp::Builder::Build(std::shared_ptr *op) { MS_LOG(DEBUG) << "Celeba dataset type is " << builder_dataset_type_.c_str() << "."; RETURN_IF_NOT_OK(SanityCheck()); if (builder_sampler_ == nullptr) { - builder_sampler_ = std::make_shared(); + const int64_t num_samples = 0; + const int64_t start_index = 0; + builder_sampler_ = std::make_shared(start_index, num_samples); } builder_schema_ = std::make_unique(); @@ -47,10 +49,9 @@ Status CelebAOp::Builder::Build(std::shared_ptr *op) { // label is like this:0 1 0 0 1...... RETURN_IF_NOT_OK( builder_schema_->AddColumn(ColDescriptor("attr", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); - *op = - std::make_shared(builder_num_workers_, builder_rows_per_buffer_, builder_dir_, builder_op_connector_size_, - builder_decode_, builder_dataset_type_, builder_extensions_, std::move(builder_schema_), - std::move(builder_sampler_), builder_num_samples_); + *op = std::make_shared(builder_num_workers_, builder_rows_per_buffer_, builder_dir_, + builder_op_connector_size_, builder_decode_, builder_dataset_type_, + builder_extensions_, std::move(builder_schema_), std::move(builder_sampler_)); if (*op == nullptr) { return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "CelebAOp is null"); } @@ -68,7 +69,7 @@ Status CelebAOp::Builder::SanityCheck() { CelebAOp::CelebAOp(int32_t num_workers, int32_t rows_per_buffer, const std::string &dir, int32_t queue_size, bool decode, const std::string &dataset_type, const std::set &exts, - std::unique_ptr schema, std::shared_ptr sampler, int64_t num_samples) + std::unique_ptr schema, std::shared_ptr sampler) : ParallelOp(num_workers, queue_size), rows_per_buffer_(rows_per_buffer), folder_path_(dir), @@ -77,8 +78,6 @@ CelebAOp::CelebAOp(int32_t num_workers, int32_t rows_per_buffer, const std::stri data_schema_(std::move(schema)), sampler_(std::move(sampler)), num_rows_in_attr_file_(0), - num_rows_exact_(0), - num_samples_(num_samples), dataset_type_(dataset_type) { // Set the column name map (base class field) for (int32_t index = 0; index < data_schema_->NumColumns(); index++) { @@ -202,13 +201,6 @@ Status CelebAOp::ParseImageAttrInfo() { RETURN_IF_NOT_OK(attr_info_queue_->PopFront(&image_infos)); while (!image_infos.empty() && needMoreData) { for (uint32_t index = 0; index < image_infos.size(); index++) { - if (num_samples_ != 0 && image_labels_vec_.size() >= num_samples_) { - MS_LOG(WARNING) << "Image number(" << image_labels_vec_.size() << " is more than" - << " rows num eval attr file(" << num_rows_in_attr_file_ << ") or num samples(" << num_samples_ - << ")."; - needMoreData = false; - break; - } std::string image_info = image_infos[index]; std::vector split = Split(image_info); std::pair> image_labels; @@ -239,14 +231,13 @@ Status CelebAOp::ParseImageAttrInfo() { RETURN_IF_NOT_OK(attr_info_queue_->PopFront(&image_infos)); } - num_rows_exact_ = image_labels_vec_.size(); - num_samples_ = (num_samples_ == 0 || num_samples_ > num_rows_exact_) ? num_rows_exact_ : num_samples_; - if (num_rows_exact_ == 0) { + num_rows_ = image_labels_vec_.size(); + if (num_rows_ == 0) { RETURN_STATUS_UNEXPECTED( "There is no valid data matching the dataset API CelebADataset.Please check file path or dataset API " "validation first."); } - MS_LOG(DEBUG) << "Celeba dataset rows number is " << num_rows_exact_ << "."; + MS_LOG(DEBUG) << "Celeba dataset rows number is " << num_rows_ << "."; return Status::OK(); } @@ -268,33 +259,11 @@ std::vector CelebAOp::Split(const std::string &line) { return split; } -// Derived from RandomAccessOp -Status CelebAOp::GetNumSamples(int64_t *num) const { - if (num == nullptr || num_samples_ == 0) { - RETURN_STATUS_UNEXPECTED( - "There is no valid data matching the dataset API CelebADataset.Please check file path or dataset API " - "validation first."); - } - (*num) = num_samples_; - return Status::OK(); -} - -Status CelebAOp::GetNumRowsInDataset(int64_t *num) const { - if (num == nullptr || num_rows_exact_ == 0) { - RETURN_STATUS_UNEXPECTED( - "There is no valid data matching the dataset API CelebADataset.Please check file path or dataset API " - "validation first."); - } - - *num = num_rows_exact_; - return Status::OK(); -} - // Main logic, Register Queue with TaskGroup, launch all threads and do the functor's work Status CelebAOp::operator()() { RETURN_IF_NOT_OK(LaunchThreadsAndInitOp()); std::unique_ptr data_buffer; - RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&data_buffer)); + RETURN_IF_NOT_OK(sampler_->GetNextSample(&data_buffer)); RETURN_IF_NOT_OK(AddIOBlock(&data_buffer)); return Status::OK(); } @@ -310,9 +279,8 @@ Status CelebAOp::AddIOBlock(std::unique_ptr *data_buffer) { RETURN_IF_NOT_OK((*data_buffer)->PopRow(&sample_row)); std::shared_ptr sample_ids = sample_row[0]; for (auto itr = sample_ids->begin(); itr != sample_ids->end(); ++itr) { - if ((*itr) >= num_rows_exact_) { - MS_LOG(WARNING) << "Sample Id (" << *itr << ") is out of bounds, skipping. Max id is " << num_rows_exact_ - << "."; + if ((*itr) >= num_rows_) { + MS_LOG(WARNING) << "Sample Id (" << *itr << ") is out of bounds, skipping. Max id is " << num_rows_ << "."; continue; } keys.push_back(*itr); @@ -323,7 +291,7 @@ Status CelebAOp::AddIOBlock(std::unique_ptr *data_buffer) { keys.clear(); } } - RETURN_IF_NOT_OK(sampler_->GetNextBuffer(data_buffer)); + RETURN_IF_NOT_OK(sampler_->GetNextSample(data_buffer)); } if (!keys.empty()) { @@ -345,7 +313,7 @@ Status CelebAOp::AddIOBlock(std::unique_ptr *data_buffer) { io_block_queues_[(buff_count++) % num_workers_]->Add(std::make_unique(IOBlock::kDeIoBlockFlagEoe))); RETURN_IF_NOT_OK(wp_.Wait()); // Master thread goes to sleep after it has made all the IOBlocks wp_.Clear(); - RETURN_IF_NOT_OK(sampler_->GetNextBuffer(data_buffer)); + RETURN_IF_NOT_OK(sampler_->GetNextSample(data_buffer)); } } } @@ -381,7 +349,7 @@ Status CelebAOp::LoadBuffer(const std::vector &keys, std::unique_ptr deq = std::make_unique(); for (const auto &key : keys) { TensorRow row; - RETURN_IF_NOT_OK(LoadTensorRow(image_labels_vec_[key], &row)); + RETURN_IF_NOT_OK(LoadTensorRow(key, image_labels_vec_[key], &row)); deq->push_back(std::move(row)); } @@ -389,25 +357,14 @@ Status CelebAOp::LoadBuffer(const std::vector &keys, std::unique_ptr> &image_label, TensorRow *row) { +Status CelebAOp::LoadTensorRow(row_id_type row_id, const std::pair> &image_label, + TensorRow *row) { std::shared_ptr image; std::shared_ptr label; Path path(folder_path_); Path image_path = path / image_label.first; - std::ifstream handle(image_path.toString(), std::ios::binary | std::ios::in); - if (handle.fail()) { - std::string err_msg = "Fail to open file: " + image_path.toString(); - return Status(StatusCode::kFileNotExist, __LINE__, __FILE__, err_msg); - } - - (void)handle.seekg(0, std::ios::end); - int64_t num_elements = handle.tellg(); - (void)handle.seekg(0, std::ios::beg); - RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(), - TensorShape(std::vector(1, num_elements)), - data_schema_->column(0).type())); - (void)handle.read(reinterpret_cast(image->GetMutableBuffer()), num_elements); + RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, image_path.toString())); if (decode_ == true) { Status rc = Decode(image, &image); if (rc.IsError()) { @@ -430,7 +387,7 @@ Status CelebAOp::LoadTensorRow(const std::pair } label->Squeeze(); - (*row) = {std::move(image), std::move(label)}; + (*row) = TensorRow(row_id, {std::move(image), std::move(label)}); return Status::OK(); } @@ -446,13 +403,13 @@ void CelebAOp::Print(std::ostream &out, bool show_all) const { // Call the super class for displaying any common detailed info ParallelOp::Print(out, show_all); // Then show any custom derived-internal stuff - out << "\nNumber of rows:" << num_rows_exact_ << "\nceleba dir: " << folder_path_ << "\n\n"; + out << "\nNumber of rows:" << num_rows_ << "\nceleba dir: " << folder_path_ << "\n\n"; } } // Reset Sampler and wakeup Master thread (functor) Status CelebAOp::Reset() { - RETURN_IF_NOT_OK(sampler_->Reset()); + RETURN_IF_NOT_OK(sampler_->ResetSampler()); wp_.Set(); // wake up master thread after reset is done return Status::OK(); } diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.h b/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.h index e0055441ef..f4b5d040ca 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.h @@ -108,14 +108,6 @@ class CelebAOp : public ParallelOp, RandomAccessOp { return *this; } - // Setter method - // @param int64_t num_samples - // @return Builder setter method returns reference to the builder. - Builder &SetNumSamples(int64_t num_samples) { - builder_num_samples_ = num_samples; - return *this; - } - // Setter method // @param const std::string dataset_type: type to be read // @return Builder setter method returns reference to the builder. @@ -141,7 +133,6 @@ class CelebAOp : public ParallelOp, RandomAccessOp { std::set builder_extensions_; std::shared_ptr builder_sampler_; std::unique_ptr builder_schema_; - int64_t builder_num_samples_; std::string builder_dataset_type_; }; @@ -153,7 +144,7 @@ class CelebAOp : public ParallelOp, RandomAccessOp { // @param std::unique_ptr sampler - sampler tells CelebAOp what to read CelebAOp(int32_t num_workers, int32_t rows_per_buffer, const std::string &dir, int32_t queue_size, bool decode, const std::string &dataset_type, const std::set &exts, std::unique_ptr schema, - std::shared_ptr sampler, int64_t num_samples); + std::shared_ptr sampler); ~CelebAOp() override = default; @@ -163,16 +154,6 @@ class CelebAOp : public ParallelOp, RandomAccessOp { // @return Status - The error code return Status operator()() override; - // Method derived from RandomAccess Op, enable Sampler to get numRows - // @param int64_t num - to return numRows - // @return Status - The error code return - Status GetNumSamples(int64_t *num) const override; - - // Method derived from RandomAccess Op, enable Sampler to get numRows - // @param int64_t num - to return numRows - // @return Status - The error code return - Status GetNumRowsInDataset(int64_t *num) const override; - // Worker thread pulls a number of IOBlock from IOBlock Queue, make a buffer and push it to Connector // @param int32_t worker_id - id of each worker // @return Status - The error code return @@ -188,6 +169,10 @@ class CelebAOp : public ParallelOp, RandomAccessOp { // @return Status - The error code return Status AddIOBlock(std::unique_ptr *data_buffer); + // Op name getter + // @return Name of the current Op + std::string Name() const { return "CelebAOp"; } + private: // Called first when function is called // @return @@ -212,10 +197,12 @@ class CelebAOp : public ParallelOp, RandomAccessOp { Status LoadBuffer(const std::vector &keys, std::unique_ptr *db); // Load a tensor row according to a pair + // @param row_id_type row_id - id for this tensor row // @param std::pair - > // @param TensorRow row - image & label read into this tensor row // @return Status - The error code return - Status LoadTensorRow(const std::pair> &image_label, TensorRow *row); + Status LoadTensorRow(row_id_type row_id, const std::pair> &image_label, + TensorRow *row); // Check if need read according to dataset type // @return bool - if need read @@ -233,11 +220,9 @@ class CelebAOp : public ParallelOp, RandomAccessOp { std::shared_ptr sampler_; std::unique_ptr>> attr_info_queue_; int64_t num_rows_in_attr_file_; // rows number specified in attr file - int64_t num_rows_exact_; // exact rows number,maybe is less than rows_num_in_attr_file_ QueueList> io_block_queues_; WaitPost wp_; std::vector>> image_labels_vec_; - int64_t num_samples_; std::string dataset_type_; std::ifstream partition_file_; }; diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc index d0a17b56f9..ad87e394eb 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc @@ -35,7 +35,7 @@ constexpr uint32_t kCifarImageChannel = 3; constexpr uint32_t kCifarBlockImageNum = 5; constexpr uint32_t kCifarImageSize = kCifarImageHeight * kCifarImageWidth * kCifarImageChannel; -CifarOp::Builder::Builder() : num_samples_(0), sampler_(nullptr) { +CifarOp::Builder::Builder() : sampler_(nullptr) { std::shared_ptr cfg = GlobalContext::config_manager(); num_workers_ = cfg->num_parallel_workers(); rows_per_buffer_ = cfg->rows_per_buffer(); @@ -46,7 +46,9 @@ CifarOp::Builder::Builder() : num_samples_(0), sampler_(nullptr) { Status CifarOp::Builder::Build(std::shared_ptr *ptr) { RETURN_IF_NOT_OK(SanityCheck()); if (sampler_ == nullptr) { - sampler_ = std::make_shared(); + const int64_t num_samples = 0; + const int64_t start_index = 0; + sampler_ = std::make_shared(start_index, num_samples); } schema_ = std::make_unique(); TensorShape scalar = TensorShape::CreateScalar(); @@ -62,7 +64,7 @@ Status CifarOp::Builder::Build(std::shared_ptr *ptr) { ColDescriptor("fine_label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &another_scalar))); } - *ptr = std::make_shared(cifar_type_, num_workers_, rows_per_buffer_, dir_, op_connect_size_, num_samples_, + *ptr = std::make_shared(cifar_type_, num_workers_, rows_per_buffer_, dir_, op_connect_size_, std::move(schema_), std::move(sampler_)); return Status::OK(); } @@ -76,16 +78,13 @@ Status CifarOp::Builder::SanityCheck() { } CifarOp::CifarOp(CifarType type, int32_t num_works, int32_t rows_per_buf, const std::string &file_dir, - int32_t queue_size, int64_t num_samples, std::unique_ptr data_schema, - std::shared_ptr sampler) + int32_t queue_size, std::unique_ptr data_schema, std::shared_ptr sampler) : ParallelOp(num_works, queue_size), cifar_type_(type), rows_per_buffer_(rows_per_buf), folder_path_(file_dir), - num_samples_(num_samples), data_schema_(std::move(data_schema)), sampler_(std::move(sampler)), - num_rows_(0), row_cnt_(0), buf_cnt_(0) { // set the column name map (base class field) @@ -101,7 +100,7 @@ CifarOp::CifarOp(CifarType type, int32_t num_works, int32_t rows_per_buf, const Status CifarOp::operator()() { RETURN_IF_NOT_OK(LaunchThreadsAndInitOp()); std::unique_ptr sampler_buffer; - RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer)); + RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); while (true) { // each iterator is 1 epoch std::vector keys; keys.reserve(rows_per_buffer_); @@ -112,15 +111,14 @@ Status CifarOp::operator()() { for (auto itr = sample_ids->begin(); itr != sample_ids->end(); itr++) { keys.push_back(*itr); row_cnt_++; - if ((*itr) >= num_rows_) continue; // index out of bound, skipping - if (row_cnt_ >= num_samples_) break; // enough row read, break for loop + if ((*itr) >= num_rows_) continue; // index out of bound, skipping if (row_cnt_ % rows_per_buffer_ == 0) { RETURN_IF_NOT_OK(io_block_queues_[buf_cnt_++ % num_workers_]->Add( std::make_unique(IOBlock(keys, IOBlock::kDeIoBlockNone)))); keys.clear(); } } - RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer)); + RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); } if (keys.empty() == false) { RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add( @@ -141,7 +139,7 @@ Status CifarOp::operator()() { io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique(IOBlock::kDeIoBlockFlagEoe))); RETURN_IF_NOT_OK(wp_.Wait()); // Master thread goes to sleep after it has made all the IOBlocks wp_.Clear(); - RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer)); + RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); } } } @@ -197,7 +195,7 @@ Status CifarOp::LoadTensorRow(uint64_t index, TensorRow *trow) { std::shared_ptr fine_label; std::shared_ptr ori_image = cifar_image_label_pairs_[index].first; std::shared_ptr copy_image = - std::make_shared(ori_image->shape(), ori_image->type(), ori_image->GetMutableBuffer()); + std::make_shared(ori_image->shape(), ori_image->type(), ori_image->GetBuffer()); RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(), data_schema_->column(1).shape(), data_schema_->column(1).type(), reinterpret_cast(&cifar_image_label_pairs_[index].second[0]))); @@ -205,9 +203,9 @@ Status CifarOp::LoadTensorRow(uint64_t index, TensorRow *trow) { RETURN_IF_NOT_OK(Tensor::CreateTensor( &fine_label, data_schema_->column(2).tensorImpl(), data_schema_->column(2).shape(), data_schema_->column(2).type(), reinterpret_cast(&cifar_image_label_pairs_[index].second[1]))); - (*trow) = {copy_image, std::move(label), std::move(fine_label)}; + (*trow) = TensorRow(index, {copy_image, std::move(label), std::move(fine_label)}); } else { - (*trow) = {copy_image, std::move(label)}; + (*trow) = TensorRow(index, {copy_image, std::move(label)}); } return Status::OK(); @@ -243,7 +241,7 @@ void CifarOp::Print(std::ostream &out, bool show_all) const { // Reset Sampler and wakeup Master thread (functor) Status CifarOp::Reset() { - RETURN_IF_NOT_OK(sampler_->Reset()); + RETURN_IF_NOT_OK(sampler_->ResetSampler()); row_cnt_ = 0; wp_.Set(); // wake up master thread after reset is done return Status::OK(); @@ -255,30 +253,6 @@ Status CifarOp::InitSampler() { return Status::OK(); } -// Derived from RandomAccessOp -Status CifarOp::GetNumSamples(int64_t *num) const { - if (num == nullptr || num_rows_ == 0) { - std::string api = cifar_type_ == kCifar10 ? "Cifar10Dataset" : "Cifar100Dataset"; - std::string err_msg = "There is no valid data matching the dataset API " + api + - ".Please check file path or dataset API validation first."; - RETURN_STATUS_UNEXPECTED(err_msg); - } - (*num) = num_samples_; - return Status::OK(); -} - -// Derived from RandomAccessOp -Status CifarOp::GetNumRowsInDataset(int64_t *num) const { - if (num == nullptr || num_rows_ == 0) { - std::string api = cifar_type_ == kCifar10 ? "Cifar10Dataset" : "Cifar100Dataset"; - std::string err_msg = "There is no valid data matching the dataset API " + api + - ".Please check file path or dataset API validation first."; - RETURN_STATUS_UNEXPECTED(err_msg); - } - (*num) = num_rows_; - return Status::OK(); -} - Status CifarOp::ReadCifarBlockDataAsync() { TaskManager::FindMe()->Post(); RETURN_IF_NOT_OK(GetCifarFiles()); @@ -392,11 +366,15 @@ Status CifarOp::ParseCifarData() { RETURN_IF_NOT_OK(Tensor::CreateTensor(&image_tensor, data_schema_->column(0).tensorImpl(), TensorShape({kCifarImageHeight, kCifarImageWidth, kCifarImageChannel}), data_schema_->column(0).type())); - for (int ch = 0; ch < kCifarImageChannel; ++ch) { - for (int pix = 0; pix < kCifarImageHeight * kCifarImageWidth; ++pix) { - (image_tensor->GetMutableBuffer())[pix * kCifarImageChannel + ch] = block[cur_block_index++]; + auto itr = image_tensor->begin(); + uint32_t total_pix = kCifarImageHeight * kCifarImageWidth; + for (int pix = 0; pix < total_pix; ++pix) { + for (int ch = 0; ch < kCifarImageChannel; ++ch) { + *itr = block[cur_block_index + ch * total_pix + pix]; + itr++; } } + cur_block_index += total_pix * kCifarImageChannel; cifar_image_label_pairs_.emplace_back(std::make_pair(image_tensor, labels)); } RETURN_IF_NOT_OK(cifar_raw_data_block_->PopFront(&block)); @@ -404,7 +382,6 @@ Status CifarOp::ParseCifarData() { } cifar_image_label_pairs_.shrink_to_fit(); num_rows_ = cifar_image_label_pairs_.size(); - num_samples_ = (num_samples_ == 0 || num_samples_ > num_rows_) ? num_rows_ : num_samples_; if (num_rows_ == 0) { std::string api = cifar_type_ == kCifar10 ? "Cifar10Dataset" : "Cifar100Dataset"; std::string err_msg = "There is no valid data matching the dataset API " + api + @@ -432,11 +409,11 @@ Status CifarOp::GetClassIds(std::map> *cls_ids) co return Status::OK(); } -Status CifarOp::CountTotalRows(const std::string &dir, int64_t numSamples, bool isCIFAR10, int64_t *count) { +Status CifarOp::CountTotalRows(const std::string &dir, bool isCIFAR10, int64_t *count) { // the logic of counting the number of samples is copied from ReadCifar100Block() and ReadCifar10Block() std::shared_ptr op; *count = 0; - RETURN_IF_NOT_OK(Builder().SetCifarDir(dir).SetNumSamples(numSamples).SetCifarType(isCIFAR10).Build(&op)); + RETURN_IF_NOT_OK(Builder().SetCifarDir(dir).SetCifarType(isCIFAR10).Build(&op)); RETURN_IF_NOT_OK(op->GetCifarFiles()); if (op->cifar_type_ == kCifar10) { constexpr int64_t num_cifar10_records = 10000; @@ -448,7 +425,6 @@ Status CifarOp::CountTotalRows(const std::string &dir, int64_t numSamples, bool } *count = *count + num_cifar10_records; } - *count = *count < numSamples || numSamples == 0 ? *count : numSamples; return Status::OK(); } else { int64_t num_cifar100_records = 0; @@ -458,7 +434,11 @@ Status CifarOp::CountTotalRows(const std::string &dir, int64_t numSamples, bool std::string err_msg = "Invalid cifar100 file path"; RETURN_STATUS_UNEXPECTED(err_msg); } - std::string file_name(file.substr(pos + 1)); + std::string file_name; + if (file.size() > 0) + file_name = file.substr(pos + 1); + else + RETURN_STATUS_UNEXPECTED("Invalid string length!"); if (file_name.find("test") != std::string::npos) { num_cifar100_records = 10000; } else if (file_name.find("train") != std::string::npos) { @@ -470,7 +450,7 @@ Status CifarOp::CountTotalRows(const std::string &dir, int64_t numSamples, bool RETURN_STATUS_UNEXPECTED(err_msg); } } - *count = num_cifar100_records < numSamples || numSamples == 0 ? num_cifar100_records : numSamples; + *count = num_cifar100_records; return Status::OK(); } } diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.h b/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.h index ade0998c30..62c20ac401 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.h @@ -73,14 +73,6 @@ class CifarOp : public ParallelOp, public RandomAccessOp { return *this; } - // Setter method - // @param uint64_t num_samples - // @return Builder setter method returns reference to the builder. - Builder &SetNumSamples(uint64_t num_samples) { - num_samples_ = num_samples; - return *this; - } - // Setter method // @param std::shared_ptr sampler // @return Builder setter method returns reference to the builder. @@ -121,7 +113,6 @@ class CifarOp : public ParallelOp, public RandomAccessOp { private: std::string dir_; int32_t num_workers_; - uint64_t num_samples_; int32_t rows_per_buffer_; int32_t op_connect_size_; std::shared_ptr sampler_; @@ -137,7 +128,7 @@ class CifarOp : public ParallelOp, public RandomAccessOp { // @param uint32_t - queueSize - connector queue size // @param std::unique_ptr sampler - sampler tells ImageFolderOp what to read CifarOp(CifarType type, int32_t num_works, int32_t rows_per_buf, const std::string &file_dir, int32_t queue_size, - int64_t num_samples, std::unique_ptr data_schema, std::shared_ptr sampler); + std::unique_ptr data_schema, std::shared_ptr sampler); // Destructor. ~CifarOp() = default; @@ -152,16 +143,6 @@ class CifarOp : public ParallelOp, public RandomAccessOp { // @return Status - The error code return Status operator()() override; - // Method derived from RandomAccess Op, enable Sampler to get numRows - // @param uint64_t num - to return numRows - // @return Status - The error code return - Status GetNumSamples(int64_t *num) const override; - - // Method derived from RandomAccess Op, enable Sampler to get total numRows in dataset - // @param uint64_t num - to return numRows - // @return Status - The error code return - Status GetNumRowsInDataset(int64_t *num) const override; - // A print method typically used for debugging // @param out // @param show_all @@ -169,11 +150,14 @@ class CifarOp : public ParallelOp, public RandomAccessOp { // Function to count the number of samples in the CIFAR dataset // @param dir path to the CIFAR directory - // @param numSamples maximum number of samples requested // @param isCIFAR10 true if CIFAR10 and false if CIFAR100 - // @param count output arg that will hold the minimum of the actual dataset size and numSamples + // @param count output arg that will hold the actual dataset size // @return - static Status CountTotalRows(const std::string &dir, int64_t numSamples, bool isCIFAR10, int64_t *count); + static Status CountTotalRows(const std::string &dir, bool isCIFAR10, int64_t *count); + + // Op name getter + // @return Name of the current Op + std::string Name() const override { return "CifarOp"; } private: // Initialize Sampler, calls sampler->Init() within @@ -227,10 +211,8 @@ class CifarOp : public ParallelOp, public RandomAccessOp { CifarType cifar_type_; int32_t rows_per_buffer_; std::string folder_path_; - int64_t num_samples_; std::unique_ptr data_schema_; std::shared_ptr sampler_; - int64_t num_rows_; int64_t row_cnt_; int64_t buf_cnt_; diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/clue_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/clue_op.cc new file mode 100644 index 0000000000..e92ca0d26c --- /dev/null +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/clue_op.cc @@ -0,0 +1,553 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "dataset/engine/datasetops/source/clue_op.h" + +#include +#include +#include +#include +#include + +#include "dataset/core/config_manager.h" +#include "dataset/util/task_manager.h" +#include "dataset/engine/jagged_connector.h" +#include "dataset/engine/execution_tree.h" +#include "dataset/engine/datasetops/source/io_block.h" +#include "dataset/util/random.h" + +namespace mindspore { +namespace dataset { +ClueOp::Builder::Builder() + : builder_device_id_(0), + builder_num_devices_(1), + builder_num_samples_(0), + builder_shuffle_files_(false), + builder_shuffle_global_(false) { + std::shared_ptr config_manager = GlobalContext::config_manager(); + builder_num_workers_ = config_manager->num_parallel_workers(); + builder_op_connector_size_ = config_manager->op_connector_size(); + builder_rows_per_buffer_ = config_manager->rows_per_buffer(); + builder_worker_connector_size_ = config_manager->worker_connector_size(); +} + +Status ClueOp::Builder::ValidateInputs() const { + std::string err; + err += builder_num_workers_ <= 0 ? "Number of parallel workers should be greater than 0\n" : ""; + err += (builder_device_id_ >= builder_num_devices_ || builder_num_devices_ < 1) ? "Wrong sharding configs\n" : ""; + return err.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err); +} + +Status ClueOp::Builder::Build(std::shared_ptr *op) { + RETURN_IF_NOT_OK(ValidateInputs()); + + // Throttle the number of workers if we have more workers than files! + if (static_cast(builder_num_workers_) > builder_clue_files_list_.size()) { + builder_num_workers_ = builder_clue_files_list_.size(); + MS_LOG(WARNING) << "ClueOp operator parallelism reduced to " << builder_num_workers_ << " workers."; + } + + ColKeyMap ck_map; + for (auto &p : builder_cols_to_keyword_) { + ck_map.insert({p.first, split(p.second, '/')}); + } + + std::shared_ptr clue_op = std::make_shared( + builder_num_workers_, builder_rows_per_buffer_, builder_num_samples_, builder_worker_connector_size_, ck_map, + builder_clue_files_list_, builder_op_connector_size_, builder_shuffle_files_, builder_shuffle_global_, + builder_num_devices_, builder_device_id_); + RETURN_IF_NOT_OK(clue_op->Init()); + *op = std::move(clue_op); + + return Status::OK(); +} + +std::vector ClueOp::Builder::split(const std::string &s, char delim) { + std::vector res; + std::stringstream ss(s); + std::string item; + + while (getline(ss, item, delim)) { + res.push_back(item); + } + return res; +} + +ClueOp::ClueOp(int32_t num_workers, int64_t rows_per_buffer, int64_t num_samples, int32_t worker_connector_size, + ColKeyMap cols_to_keyword, std::vector clue_files_list, int32_t op_connector_size, + bool shuffle_files, bool shuffle_global, int32_t num_device, int32_t device_id) + : ParallelOp(num_workers, op_connector_size), + rows_per_buffer_(rows_per_buffer), + num_rows_per_shard_(0), + all_num_rows_(0), + num_samples_(num_samples), + filename_index_(std::make_unique()), + clue_files_list_(std::move(clue_files_list)), + load_jagged_connector_(true), + cols_to_keyword_(cols_to_keyword), + shuffle_files_(shuffle_files), + shuffle_global_(shuffle_global), + finished_reading_dataset_(false), + num_devices_(num_device), + device_id_(device_id), + load_io_block_queue_(true) { + worker_connector_size_ = worker_connector_size; +} + +Status ClueOp::Init() { + RETURN_IF_NOT_OK(filename_index_->insert(clue_files_list_)); + + int32_t safe_queue_size = static_cast(std::ceil(clue_files_list_.size() / num_workers_) + 1); + io_block_queues_.Init(num_workers_, safe_queue_size); + + // Set the column name mapping (base class field) + int count = 0; + for (auto &p : cols_to_keyword_) { + column_name_id_map_[p.first] = count; + count++; + } + + RETURN_IF_NOT_OK(ParallelOp::CreateWorkerConnector(worker_connector_size_)); + jagged_buffer_connector_ = std::make_unique(num_workers_, 1, worker_connector_size_); + + return Status::OK(); +} + +Status ClueOp::Reset() { + load_jagged_connector_ = true; + load_io_block_queue_ = true; + + RETURN_IF_NOT_OK(ParallelOp::Reset()); + NotifyToFillIOBlockQueue(); + return Status::OK(); +} + +Status ClueOp::LoadTensor(const std::string &line, std::unique_ptr *tensor_table, int64_t row) { + TensorRow tRow(1, nullptr); + (*tensor_table)->push_back(std::move(tRow)); + + std::shared_ptr tensor; + RETURN_IF_NOT_OK(Tensor::CreateTensor(&tensor, {line}, TensorShape::CreateScalar())); + (**tensor_table)[row][0] = std::move(tensor); + return Status::OK(); +} + +Status ClueOp::GetValue(const nlohmann::json &js, std::vector key_chain, std::shared_ptr *t) { + nlohmann::json cursor = js; + for (int i = 0; i < key_chain.size(); i++) { + if (cursor.find(key_chain[i]) != cursor.end()) { + cursor = cursor[key_chain[i]]; + } else { + RETURN_STATUS_UNEXPECTED("Failed to find key: " + key_chain[i]); + } + } + std::string final_str = key_chain.back(); + switch (cursor.type()) { + case nlohmann::detail::value_t::string: + RETURN_IF_NOT_OK(Tensor::CreateTensor(t, {cursor.get()}, TensorShape::CreateScalar())); + break; + + case nlohmann::detail::value_t::number_integer: + RETURN_IF_NOT_OK( + Tensor::CreateTensor(t, TensorImpl::kFlexible, TensorShape::CreateScalar(), DataType(DataType::DE_INT32))); + (*t)->SetItemAt({0}, cursor.get()); + break; + case nlohmann::detail::value_t::number_unsigned: + RETURN_IF_NOT_OK( + Tensor::CreateTensor(t, TensorImpl::kFlexible, TensorShape::CreateScalar(), DataType(DataType::DE_INT32))); + (*t)->SetItemAt({0}, cursor.get()); + break; + case nlohmann::detail::value_t::number_float: + RETURN_IF_NOT_OK( + Tensor::CreateTensor(t, TensorImpl::kFlexible, TensorShape::CreateScalar(), DataType(DataType::DE_FLOAT32))); + (*t)->SetItemAt({0}, cursor.get()); + break; + case nlohmann::detail::value_t::array: + RETURN_IF_NOT_OK(Tensor::CreateTensor(t, {cursor.get>()}, TensorShape::CreateScalar())); + break; + default: + break; + } + return Status::OK(); +} + +Status ClueOp::LoadFile(const std::string &file, const int64_t start_offset, const int64_t end_offset, + const int32_t worker_id) { + std::ifstream handle(file); + if (!handle.is_open()) { + RETURN_STATUS_UNEXPECTED("Failed to open file " + file); + } + + int64_t rows_each_buffer = 0; + int64_t rows_total = 0; + std::string line; + std::unique_ptr cur_buffer = std::make_unique(0, DataBuffer::BufferFlags::kDeBFlagNone); + std::unique_ptr tensor_table = std::make_unique(); + + while (getline(handle, line)) { + if (line.empty()) { + continue; + } + // If read to the end offset of this file, break. + if (rows_total >= end_offset) { + break; + } + // Skip line before start offset. + if (rows_total < start_offset) { + rows_total++; + continue; + } + + try { + nlohmann::json js = nlohmann::json::parse(line); + int cols_count = cols_to_keyword_.size(); + TensorRow tRow(cols_count, nullptr); + tensor_table->push_back(std::move(tRow)); + + int cout = 0; + for (auto &p : cols_to_keyword_) { + std::shared_ptr tensor; + RETURN_IF_NOT_OK(GetValue(js, p.second, &tensor)); + (*tensor_table)[rows_each_buffer][cout] = std::move(tensor); + cout++; + } + } catch (const std::exception &err) { + // Catch any exception and convert to Status return code + RETURN_STATUS_UNEXPECTED("Failed to load json file"); + } + + // RETURN_IF_NOT_OK(LoadTensor(line, &tensor_table, rows_each_buffer)); + rows_each_buffer++; + rows_total++; + if (rows_each_buffer == rows_per_buffer_) { + cur_buffer->set_tensor_table(std::move(tensor_table)); + RETURN_IF_NOT_OK(jagged_buffer_connector_->Add(worker_id, std::move(cur_buffer))); + + cur_buffer = std::make_unique(0, DataBuffer::BufferFlags::kDeBFlagNone); + tensor_table = std::make_unique(); + rows_each_buffer = 0; + } + } + + if (rows_each_buffer > 0) { + cur_buffer->set_tensor_table(std::move(tensor_table)); + RETURN_IF_NOT_OK(jagged_buffer_connector_->Add(worker_id, std::move(cur_buffer))); + } + return Status::OK(); +} + +Status ClueOp::operator()() { + RETURN_IF_NOT_OK(CalculateNumRowsPerShard()); + + // launch one thread, responsible for filling IoBlockQueue + RETURN_IF_NOT_OK(tree_->LaunchWorkers(1, std::bind(&ClueOp::WaitToFillIOBlockQueue, this))); + + RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_, std::bind(&ClueOp::WorkerEntry, this, std::placeholders::_1))); + + // must be called after launching workers. + TaskManager::FindMe()->Post(); + RETURN_IF_NOT_OK(io_block_queue_wait_post_.Register(tree_->AllTasks())); + NotifyToFillIOBlockQueue(); + + while (!finished_reading_dataset_) { + int64_t buffer_id = 0; + int32_t workers_done = 0; + int64_t rows_read = 0; + load_io_block_queue_ = true; + + while (workers_done < num_workers_) { + std::unique_ptr buffer; + RETURN_IF_NOT_OK(jagged_buffer_connector_->Pop(0, &buffer)); + if (buffer->eoe()) { + workers_done++; + } else if (num_samples_ == 0 || rows_read < num_samples_) { + if ((num_samples_ > 0) && (rows_read + buffer->NumRows() > num_samples_)) { + int64_t rowsToRemove = buffer->NumRows() - (num_samples_ - rows_read); + RETURN_IF_NOT_OK(buffer->SliceOff(rowsToRemove)); + } + rows_read += buffer->NumRows(); + buffer->set_id(buffer_id++); + RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(buffer))); + } else { + // end of epoch + load_jagged_connector_ = false; + load_io_block_queue_ = false; + } + } + + std::unique_ptr eoe_buffer = std::make_unique(0, DataBuffer::kDeBFlagEOE); + RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eoe_buffer))); + + if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) { + finished_reading_dataset_ = true; + NotifyToFillIOBlockQueue(); + } else { + jagged_buffer_connector_->DoReset(); + buffer_id = 0; + } + } + std::unique_ptr eof_buffer = std::make_unique(0, DataBuffer::kDeBFlagEOF); + RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eof_buffer))); + + RETURN_IF_NOT_OK(PostEndOfData()); + return Status::OK(); +} + +Status ClueOp::WorkerEntry(int32_t worker_id) { + TaskManager::FindMe()->Post(); + std::unique_ptr io_block; + RETURN_IF_NOT_OK(PopIoBlockQueue(worker_id, &io_block)); + while (!io_block->eof()) { + if (!io_block->eoe()) { + if (load_jagged_connector_) { + std::string filename; + RETURN_IF_NOT_OK(io_block->GetFilename(&filename, *filename_index_)); + int64_t start_offset = io_block->GetStartOffset(); + int64_t end_offset = io_block->GetEndOffset(); + RETURN_IF_NOT_OK(LoadFile(filename, start_offset, end_offset, worker_id)); + } + } else { + std::unique_ptr eoe_buffer = std::make_unique(0, DataBuffer::kDeBFlagEOE); + RETURN_IF_NOT_OK(jagged_buffer_connector_->Add(worker_id, std::move(eoe_buffer))); + } + + RETURN_IF_NOT_OK(PopIoBlockQueue(worker_id, &io_block)); + } + return Status::OK(); +} + +// A print method typically used for debugging +void ClueOp::Print(std::ostream &out, bool show_all) const { + // Always show the id and name as first line regardless if this summary or detailed print + out << "(" << std::setw(2) << operator_id_ << ") :"; + if (!show_all) { + // Call the super class for displaying any common 1-liner info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal 1-liner info for this op + out << "\n"; + } else { + // Call the super class for displaying any common detailed info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal stuff + out << "\nRows per buffer: " << rows_per_buffer_ << "\nSample count: " << num_samples_ + << "\nDevice id: " << device_id_ << "\nNumber of devices: " << num_devices_ + << "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no") << "\nClue files list:\n"; + for (int i = 0; i < clue_files_list_.size(); ++i) { + out << " " << clue_files_list_[i]; + } + out << "\n\n"; + } +} + +// Pops an element from a queue in io_block_queues +Status ClueOp::PopIoBlockQueue(int32_t index, std::unique_ptr *out_block) { + RETURN_IF_NOT_OK(io_block_queues_[index]->PopFront(out_block)); + + return Status::OK(); +} + +// Pushes an element to a queue in io_block_queues +Status ClueOp::PushIoBlockQueue(int32_t index, std::unique_ptr &&io_block) { + RETURN_IF_NOT_OK(io_block_queues_[index]->Add(std::move(io_block))); + + return Status::OK(); +} + +static void ShuffleKeys(std::vector *i_keys, uint32_t seed) { + std::mt19937 rng(seed); + std::shuffle(i_keys->begin(), i_keys->end(), rng); +} + +Status ClueOp::WaitToFillIOBlockQueue() { + // must be called first if called by worker spanwed by taskgroup + TaskManager::FindMe()->Post(); + + std::vector i_keys; + if (shuffle_files_) { + for (auto it = filename_index_->begin(); it != filename_index_->end(); ++it) { + i_keys.push_back(it.key()); + } + } + uint32_t seed = 0; + while (true) { + RETURN_IF_NOT_OK(io_block_queue_wait_post_.Wait()); + io_block_queue_wait_post_.Clear(); + + if (finished_reading_dataset_) { + break; + } + + if (shuffle_files_) { + ShuffleKeys(&i_keys, num_devices_ == 1 ? GetSeed() : ++seed); + } + RETURN_IF_NOT_OK(FillIOBlockQueue(i_keys)); + } + return Status::OK(); +} + +Status ClueOp::FillIOBlockQueue(const std::vector &i_keys) { + int32_t queue_index = 0; + int64_t pre_count = 0; + int64_t start_offset = 0; + int64_t end_offset = 0; + bool finish = false; + while (!finish) { + std::vector> file_index; + if (!i_keys.empty()) { + for (auto it = i_keys.begin(); it != i_keys.end(); ++it) { + { + if (!load_io_block_queue_) { + break; + } + } + file_index.emplace_back(std::pair((*filename_index_)[*it], *it)); + } + } else { + for (auto it = filename_index_->begin(); it != filename_index_->end(); ++it) { + { + if (!load_io_block_queue_) { + break; + } + } + file_index.emplace_back(std::pair(it.value(), it.key())); + } + } + for (auto file_info : file_index) { + if (NeedPushFileToBlockQueue(file_info.first, &start_offset, &end_offset, pre_count)) { + auto ioBlock = + std::make_unique(file_info.second, start_offset, end_offset, IOBlock::kDeIoBlockNone); + RETURN_IF_NOT_OK(PushIoBlockQueue(queue_index, std::move(ioBlock))); + queue_index = (queue_index + 1) % num_workers_; + } + + pre_count += filename_numrows_[file_info.first]; + } + + if (pre_count < (static_cast(device_id_) + 1) * num_rows_per_shard_) { + finish = false; + } else { + finish = true; + } + } + + RETURN_IF_NOT_OK(PostEndOfEpoch(queue_index)); + return Status::OK(); +} + +void ClueOp::NotifyToFillIOBlockQueue() { io_block_queue_wait_post_.Set(); } + +bool ClueOp::NeedPushFileToBlockQueue(const std::string &file_name, int64_t *start_offset, int64_t *end_offset, + const int64_t &pre_count) { + *start_offset = 0; + *end_offset = 0; + bool push = false; + int64_t start_index = device_id_ * num_rows_per_shard_; + if (device_id_ + 1 < 0) { + MS_LOG(ERROR) << "Device id is invalid"; + return false; + } + + int64_t end_index = (static_cast(device_id_) + 1) * num_rows_per_shard_; + if (pre_count <= start_index && pre_count + filename_numrows_[file_name] > start_index) { + *start_offset = start_index - pre_count; + push = true; + if (pre_count < end_index && pre_count + filename_numrows_[file_name] >= end_index) { + *end_offset = end_index - pre_count; + } else { + *end_offset = filename_numrows_[file_name]; + } + } + + if (pre_count >= start_index && pre_count < end_index) { + *start_offset = 0; + push = true; + if (pre_count + filename_numrows_[file_name] >= end_index) { + *end_offset = end_index - pre_count; + } else { + *end_offset = filename_numrows_[file_name]; + } + } + + return push; +} + +// Pushes a control indicator onto the IOBlockQueue for each worker to consume. When the worker +// pops this control indicator, it will wait until the next epoch starts and then resume execution. +Status ClueOp::PostEndOfEpoch(int32_t queue_index) { + for (int i = 0; i < num_workers_; ++i) { + std::unique_ptr eoe = std::make_unique(IOBlock::kDeIoBlockFlagEoe); + RETURN_IF_NOT_OK(PushIoBlockQueue((queue_index + i) % num_workers_, std::move(eoe))); + } + + return Status::OK(); +} + +Status ClueOp::CalculateNumRowsPerShard() { + for (auto it = filename_index_->begin(); it != filename_index_->end(); ++it) { + int64_t count = CountTotalRows(it.value()); + filename_numrows_[it.value()] = count; + all_num_rows_ += count; + } + if (all_num_rows_ == 0) { + RETURN_STATUS_UNEXPECTED( + "There is no valid data matching the dataset API CLUEDataset. Please check file path or dataset API " + "validation first."); + } + + num_rows_per_shard_ = static_cast(std::ceil(all_num_rows_ * 1.0 / num_devices_)); + MS_LOG(DEBUG) << "Number rows per shard is " << num_rows_per_shard_; + return Status::OK(); +} + +int64_t ClueOp::CountTotalRows(const std::string &file) { + std::ifstream handle(file); + if (!handle.is_open()) { + MS_LOG(ERROR) << "Failed to open file: " << file; + return 0; + } + + std::string line; + int64_t count = 0; + while (getline(handle, line)) { + if (!line.empty()) { + count++; + } + } + + return count; +} + +// Pushes a control indicator onto the IOBlockQueue for each worker to consume. +// When the worker pops this control indicator, it will shut itself down gracefully. +Status ClueOp::PostEndOfData() { + for (int i = 0; i < num_workers_; ++i) { + std::unique_ptr eof = std::make_unique(IOBlock::kDeIoBlockFlagEof); + RETURN_IF_NOT_OK(PushIoBlockQueue(i, std::move(eof))); + } + + return Status::OK(); +} + +Status ClueOp::CountAllFileRows(const std::vector &files, int64_t *count) { + std::shared_ptr op; + *count = 0; + RETURN_IF_NOT_OK(Builder().SetClueFilesList(files).Build(&op)); + for (auto file : files) { + *count += op->CountTotalRows(file); + } + return Status::OK(); +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/clue_op.h b/mindspore/ccsrc/dataset/engine/datasetops/source/clue_op.h new file mode 100644 index 0000000000..b6a797d3f4 --- /dev/null +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/clue_op.h @@ -0,0 +1,287 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_ENGINE_DATASETOPS_SOURCE_CLUE_OP_H_ +#define DATASET_ENGINE_DATASETOPS_SOURCE_CLUE_OP_H_ + +#include +#include +#include +#include +#include +#include + +#include "dataset/util/auto_index.h" +#include "dataset/engine/datasetops/parallel_op.h" +#include "dataset/engine/datasetops/source/io_block.h" + +namespace mindspore { +namespace dataset { +using StringIndex = AutoIndexObj; +using ColKeyMap = std::map>; + +class JaggedConnector; + +class ClueOp : public ParallelOp { + public: + class Builder { + public: + // Builder constructor. Creates the builder object. + // @note No default args + // @return This is a constructor. + Builder(); + + // Default destructor + ~Builder() = default; + + // Checks if the inputs of the builder is valid. + // @return Status - the error code returned. + Status ValidateInputs() const; + + // Create the final object. + // @param op - dataset op. + // @return - the error code return. + Status Build(std::shared_ptr *op); + + // Setter method. + // @return Builder - setter method returns reference to the builder. + Builder &SetNumWorkers(int32_t num_workers) { + builder_num_workers_ = num_workers; + return *this; + } + + // Setter method. + // @return Builder - setter method returns reference to the builder. + Builder &SetOpConnectorSize(int32_t op_connector_size) { + builder_op_connector_size_ = op_connector_size; + return *this; + } + + // Setter method. + // @return Builder - setter method returns reference to the builder. + Builder &SetRowsPerBuffer(int64_t rows_per_buffer) { + builder_rows_per_buffer_ = rows_per_buffer; + return *this; + } + + // Setter method. + // @return Builder - setter method returns reference to the builder. + Builder &SetNumDevices(int64_t num_dev) { + builder_num_devices_ = num_dev; + return *this; + } + + // Setter method. + // @return Builder - setter method returns reference to the builder. + Builder &SetDeviceId(int64_t dev_id) { + builder_device_id_ = dev_id; + return *this; + } + + // Setter method. + // @return Builder - setter method returns reference to the builder. + Builder &SetClueFilesList(const std::vector &files_list) { + builder_clue_files_list_ = files_list; + return *this; + } + + // Setter method. + // @return Builder - setter method returns reference to the builder. + Builder &SetShuffleFiles(bool shuffle_files) { + builder_shuffle_files_ = shuffle_files; + return *this; + } + + // Setter method. + // @return Builder - setter method returns reference to the builder. + Builder &SetShuffleGlobal(bool shuffle_global) { + builder_shuffle_global_ = shuffle_global; + return *this; + } + + // Setter method. + // @return Builder - setter method returns reference to the builder. + Builder &SetNumSamples(int64_t num_samples) { + builder_num_samples_ = num_samples; + return *this; + } + + // Setter method. + // @return Builder - setter method returns reference to the builder. + Builder &SetColsKeyMap(const std::map &cols_to_key) { + builder_cols_to_keyword_ = cols_to_key; + return *this; + } + + // Split string based on a character delimiter + // @return - the a string vector + std::vector split(const std::string &s, char delim); + + private: + int32_t builder_device_id_; + int32_t builder_num_devices_; + int32_t builder_num_workers_; + int32_t builder_op_connector_size_; + int64_t builder_rows_per_buffer_; + int64_t builder_num_samples_; + int32_t builder_worker_connector_size_; + std::vector builder_clue_files_list_; + bool builder_shuffle_files_; + bool builder_shuffle_global_; + std::map builder_cols_to_keyword_; + }; + + // Constructor of ClueOp + // @param shuffle_global - whether or not to shuffle the entire dataset. + ClueOp(int32_t num_workers, int64_t rows_per_buffer, int64_t num_samples, int32_t worker_connector_size, + ColKeyMap cols_to_keyword, std::vector clue_files_list, int32_t op_connector_size, + bool shuffle_files, bool shuffle_global, int32_t num_devices, int32_t device_id); + + // Default destructor + ~ClueOp() = default; + + // A print method typically used for debugging + // @param out - The output stream to write output to + // @param show_all - A bool to control if you want to show all info or just a summary + void Print(std::ostream &out, bool show_all) const override; + + // Instantiates the internal queues and connectors + // @return Status - the error code returned + Status Init(); + + // Class functor operator () override. + // All dataset operators operate by launching a thread (see ExecutionTree). This class functor will + // provide the master loop that drives the logic for performing the work + // @return Status - the error code returned. + Status operator()() override; + + // Overrides base class reset method. Cleans up any state info from it's previous execution + // reinitializes itself so that it can be executed again, as if it was just created. + // @return Status - the error code returned. + Status Reset() override; + + // Get total rows in files. + // @param files - all clue files. + // @param count - number of rows. + // @return Status - the error coed returned. + static Status CountAllFileRows(const std::vector &files, int64_t *count); + + // File names getter + // @return Vector of the input file names + std::vector FileNames() { return clue_files_list_; } + + // Global shuffle flag getter + // @return Bool - whether this Op requires global shuffle + bool RequireGlobalShuffle() { return shuffle_global_; } + + private: + // The entry point for when workers are launched. + // @param worker_id - the id of the worker that is executing this function. + // @return Status - the error code returned. + Status WorkerEntry(int32_t worker_id) override; + + // Parses a single row and puts the data into a tensor table. + // @param line - the content of the row. + // @param tensor_table - the tensor table to put the parsed data in. + // @param row - the id of the row filled in the tensor table. + // @return Status - the error code returned. + Status LoadTensor(const std::string &line, std::unique_ptr *tensor_table, int64_t row); + + // Reads a clue file and loads the data into multiple buffers. + // @param file - the file to read. + // @param start_offset - the start offset of file. + // @param end_offset - the end offset of file. + // @param worker_id - the id of the worker that is executing this function. + // @return Status - the error code returned. + Status LoadFile(const std::string &file, const int64_t start_offset, const int64_t end_offset, + const int32_t worker_id); + + // Pops an element from a queue in IOBlockQueue. + // @param index - the index of the queue to pop from. + // @param out_block - the popped element. + // @return Status - the error code returned. + Status PopIoBlockQueue(int32_t index, std::unique_ptr *out_block); + + // Pushes an element to a queue in IOBlockQueue. + // @param index - the index of the queue to push to. + // @param io_block - the element to push onto the queue. + // @return Status - the error code returned. + Status PushIoBlockQueue(int32_t index, std::unique_ptr &&io_block); + + // Called asynchronously by another thread. Will wait until notified to fill the IOBlockQueue. + // @return Status - the error code returned. + Status WaitToFillIOBlockQueue(); + + // Fill the IOBlockQueue. + // @para i_keys - keys of file to fill to the IOBlockQueue + // @return Status - the error code returned. + Status FillIOBlockQueue(const std::vector &i_keys); + + // Notifies the thread which called FillIoBlockQueue to resume execution + void NotifyToFillIOBlockQueue(); + + // Select file and push it to the block queue. + // @param file_name - File name. + // @param start_file - If file contains the first sample of data. + // @param end_file - If file contains the end sample of data. + // @param pre_count - Total rows of previous files. + // @return Status - the error code returned. + bool NeedPushFileToBlockQueue(const std::string &file_name, int64_t *start_offset, int64_t *end_offset, + const int64_t &pre_count); + + // Pushes a control indicator onto the IOBlockQueue for each worker to consume. When the worker + // pops this control indicator, it will wait until the next epoch starts and then resume execution. + // @return Status - the error code returned. + Status PostEndOfEpoch(int32_t queue_index); + + // Calculate number of rows in each shard. + // @return Status - the error code returned. + Status CalculateNumRowsPerShard(); + + // Count number of rows in each file. + // @param filename - clue file name. + // @return int64_t - the total number of rows in file. + int64_t CountTotalRows(const std::string &file); + + // Pushes a control indicator onto the IOBlockQueue for each worker to consume. + // When the worker pops this control indicator, it will shut itself down gracefully. + // @return Status - the error code returned. + Status PostEndOfData(); + + // @return Status - the error code returned. + Status GetValue(const nlohmann::json &js, std::vector key_chain, std::shared_ptr *t); + + int32_t device_id_; + bool shuffle_files_; + bool shuffle_global_; + bool finished_reading_dataset_; + int32_t num_devices_; + int64_t rows_per_buffer_; + bool load_io_block_queue_; + int64_t num_rows_per_shard_; + int64_t all_num_rows_; + int64_t num_samples_; + std::map filename_numrows_; + std::unique_ptr filename_index_; + std::vector clue_files_list_; + WaitPost io_block_queue_wait_post_; + std::unique_ptr jagged_buffer_connector_; + QueueList> io_block_queues_; + bool load_jagged_connector_; + ColKeyMap cols_to_keyword_; +}; +} // namespace dataset +} // namespace mindspore +#endif // DATASET_ENGINE_DATASETOPS_SOURCE_CLUE_OP_H_ diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.cc new file mode 100644 index 0000000000..8d352bbd6c --- /dev/null +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.cc @@ -0,0 +1,631 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "dataset/engine/datasetops/source/coco_op.h" + +#include +#include +#include +#include "common/utils.h" +#include "dataset/core/config_manager.h" +#include "dataset/core/tensor_shape.h" +#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h" +#include "dataset/engine/db_connector.h" +#include "dataset/engine/execution_tree.h" + +namespace mindspore { +namespace dataset { +const char kColumnImage[] = "image"; +const char kJsonImages[] = "images"; +const char kJsonImagesFileName[] = "file_name"; +const char kJsonId[] = "id"; +const char kJsonAnnotations[] = "annotations"; +const char kJsonAnnoSegmentation[] = "segmentation"; +const char kJsonAnnoCounts[] = "counts"; +const char kJsonAnnoSegmentsInfo[] = "segments_info"; +const char kJsonAnnoIscrowd[] = "iscrowd"; +const char kJsonAnnoBbox[] = "bbox"; +const char kJsonAnnoArea[] = "area"; +const char kJsonAnnoImageId[] = "image_id"; +const char kJsonAnnoNumKeypoints[] = "num_keypoints"; +const char kJsonAnnoKeypoints[] = "keypoints"; +const char kJsonAnnoCategoryId[] = "category_id"; +const char kJsonCategories[] = "categories"; +const char kJsonCategoriesIsthing[] = "isthing"; +const char kJsonCategoriesName[] = "name"; +const float kDefaultPadValue = -1.0; +const unsigned int kPadValueZero = 0; + +CocoOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr) { + std::shared_ptr cfg = GlobalContext::config_manager(); + builder_num_workers_ = cfg->num_parallel_workers(); + builder_rows_per_buffer_ = cfg->rows_per_buffer(); + builder_op_connector_size_ = cfg->op_connector_size(); + builder_task_type_ = TaskType::Detection; +} + +Status CocoOp::Builder::Build(std::shared_ptr *ptr) { + RETURN_IF_NOT_OK(SanityCheck()); + if (builder_sampler_ == nullptr) { + const int64_t num_samples = 0; + const int64_t start_index = 0; + builder_sampler_ = std::make_shared(start_index, num_samples); + } + builder_schema_ = std::make_unique(); + RETURN_IF_NOT_OK(builder_schema_->AddColumn( + ColDescriptor(std::string(kColumnImage), DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); + switch (builder_task_type_) { + case TaskType::Detection: + RETURN_IF_NOT_OK(builder_schema_->AddColumn( + ColDescriptor(std::string(kJsonAnnoBbox), DataType(DataType::DE_FLOAT32), TensorImpl::kFlexible, 1))); + RETURN_IF_NOT_OK(builder_schema_->AddColumn( + ColDescriptor(std::string(kJsonAnnoCategoryId), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); + RETURN_IF_NOT_OK(builder_schema_->AddColumn( + ColDescriptor(std::string(kJsonAnnoIscrowd), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); + break; + case TaskType::Stuff: + RETURN_IF_NOT_OK(builder_schema_->AddColumn( + ColDescriptor(std::string(kJsonAnnoSegmentation), DataType(DataType::DE_FLOAT32), TensorImpl::kFlexible, 1))); + RETURN_IF_NOT_OK(builder_schema_->AddColumn( + ColDescriptor(std::string(kJsonAnnoIscrowd), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); + break; + case TaskType::Keypoint: + RETURN_IF_NOT_OK(builder_schema_->AddColumn( + ColDescriptor(std::string(kJsonAnnoKeypoints), DataType(DataType::DE_FLOAT32), TensorImpl::kFlexible, 1))); + RETURN_IF_NOT_OK(builder_schema_->AddColumn( + ColDescriptor(std::string(kJsonAnnoNumKeypoints), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); + break; + case TaskType::Panoptic: + RETURN_IF_NOT_OK(builder_schema_->AddColumn( + ColDescriptor(std::string(kJsonAnnoBbox), DataType(DataType::DE_FLOAT32), TensorImpl::kFlexible, 1))); + RETURN_IF_NOT_OK(builder_schema_->AddColumn( + ColDescriptor(std::string(kJsonAnnoCategoryId), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); + RETURN_IF_NOT_OK(builder_schema_->AddColumn( + ColDescriptor(std::string(kJsonAnnoIscrowd), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); + RETURN_IF_NOT_OK(builder_schema_->AddColumn( + ColDescriptor(std::string(kJsonAnnoArea), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); + break; + default: + RETURN_STATUS_UNEXPECTED("Invalid task type"); + } + *ptr = std::make_shared(builder_task_type_, builder_dir_, builder_file_, builder_num_workers_, + builder_rows_per_buffer_, builder_op_connector_size_, builder_decode_, + std::move(builder_schema_), std::move(builder_sampler_)); + return Status::OK(); +} + +Status CocoOp::Builder::SanityCheck() { + Path dir(builder_dir_); + Path file(builder_file_); + std::string err_msg; + err_msg += dir.IsDirectory() == false ? "Coco image folder path is invalid or not set\n" : ""; + err_msg += file.Exists() == false ? "Coco annotation json path is invalid or not set\n" : ""; + err_msg += builder_num_workers_ <= 0 ? "Num of parallel workers is set to 0 or negative\n" : ""; + return err_msg.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err_msg); +} + +CocoOp::CocoOp(const TaskType &task_type, const std::string &image_folder_path, const std::string &annotation_path, + int32_t num_workers, int32_t rows_per_buffer, int32_t queue_size, bool decode, + std::unique_ptr data_schema, std::shared_ptr sampler) + : ParallelOp(num_workers, queue_size), + decode_(decode), + row_cnt_(0), + buf_cnt_(0), + task_type_(task_type), + image_folder_path_(image_folder_path), + annotation_path_(annotation_path), + rows_per_buffer_(rows_per_buffer), + sampler_(std::move(sampler)), + data_schema_(std::move(data_schema)) { + // Set the column name map (base class field) + for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { + column_name_id_map_[data_schema_->column(i).name()] = i; + } + io_block_queues_.Init(num_workers_, queue_size); +} + +Status CocoOp::TraverseSampleIds(const std::shared_ptr &sample_ids, std::vector *keys) { + for (auto itr = sample_ids->begin(); itr != sample_ids->end(); ++itr) { + if ((*itr) > num_rows_) continue; + keys->push_back(*itr); + row_cnt_++; + if (row_cnt_ % rows_per_buffer_ == 0) { + RETURN_IF_NOT_OK(io_block_queues_[buf_cnt_++ % num_workers_]->Add( + std::make_unique(IOBlock(*keys, IOBlock::kDeIoBlockNone)))); + keys->clear(); + } + } + return Status::OK(); +} + +Status CocoOp::operator()() { + RETURN_IF_NOT_OK(LaunchThreadsAndInitOp()); + std::unique_ptr sampler_buffer; + RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); + while (true) { + std::vector keys; + keys.reserve(rows_per_buffer_); + while (sampler_buffer->eoe() == false) { + std::shared_ptr sample_ids; + RETURN_IF_NOT_OK(sampler_buffer->GetTensor(&sample_ids, 0, 0)); + if (sample_ids->type() != DataType(DataType::DE_INT64)) { + RETURN_STATUS_UNEXPECTED("Sampler Tensor isn't int64"); + } + RETURN_IF_NOT_OK(TraverseSampleIds(sample_ids, &keys)); + RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); + } + if (keys.empty() == false) { + RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add( + std::make_unique(IOBlock(keys, IOBlock::kDeIoBlockNone)))); + } + if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) { + std::unique_ptr eoe_block = std::make_unique(IOBlock::kDeIoBlockFlagEoe); + std::unique_ptr eof_block = std::make_unique(IOBlock::kDeIoBlockFlagEof); + RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eoe_block))); + RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eof_block))); + for (int32_t i = 0; i < num_workers_; i++) { + RETURN_IF_NOT_OK( + io_block_queues_[i]->Add(std::make_unique(std::vector(), IOBlock::kDeIoBlockNone))); + } + return Status::OK(); + } else { + RETURN_IF_NOT_OK( + io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique(IOBlock::kDeIoBlockFlagEoe))); + RETURN_IF_NOT_OK(wp_.Wait()); + wp_.Clear(); + RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); + } + } +} + +void CocoOp::Print(std::ostream &out, bool show_all) const { + // Always show the id and name as first line regardless if this summary or detailed print + out << "(" << std::setw(2) << operator_id_ << ") :"; + if (!show_all) { + // Call the super class for displaying any common 1-liner info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal 1-liner info for this op + out << "\n"; + } else { + // Call the super class for displaying any common detailed info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal stuff + out << "\nNumber of rows: " << num_rows_ << "\nCOCO Directory: " << image_folder_path_ << "\n\n"; + } +} + +Status CocoOp::Reset() { + RETURN_IF_NOT_OK(sampler_->ResetSampler()); + row_cnt_ = 0; + wp_.Set(); + return Status::OK(); +} + +Status CocoOp::LoadTensorRow(row_id_type row_id, const std::string &image_id, TensorRow *trow) { + std::shared_ptr image, coordinate; + auto itr = coordinate_map_.find(image_id); + if (itr == coordinate_map_.end()) RETURN_STATUS_UNEXPECTED("Invalid image_id found :" + image_id); + + std::string kImageFile = image_folder_path_ + image_id; + RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->column(0), &image)); + + auto bboxRow = itr->second; + std::vector bbox_row; + dsize_t bbox_row_num = static_cast(bboxRow.size()); + dsize_t bbox_column_num = 0; + for (auto bbox : bboxRow) { + if (static_cast(bbox.size()) > bbox_column_num) { + bbox_column_num = static_cast(bbox.size()); + } + } + + for (auto bbox : bboxRow) { + bbox_row.insert(bbox_row.end(), bbox.begin(), bbox.end()); + dsize_t pad_len = bbox_column_num - static_cast(bbox.size()); + if (pad_len > 0) { + for (dsize_t i = 0; i < pad_len; i++) { + bbox_row.push_back(kDefaultPadValue); + } + } + } + + std::vector bbox_dim = {bbox_row_num, bbox_column_num}; + RETURN_IF_NOT_OK(Tensor::CreateTensor(&coordinate, data_schema_->column(1).tensorImpl(), TensorShape(bbox_dim), + data_schema_->column(1).type(), + reinterpret_cast(&bbox_row[0]))); + if (task_type_ == TaskType::Detection) { + RETURN_IF_NOT_OK(LoadDetectionTensorRow(row_id, image_id, image, coordinate, trow)); + } else if (task_type_ == TaskType::Stuff || task_type_ == TaskType::Keypoint) { + RETURN_IF_NOT_OK(LoadSimpleTensorRow(row_id, image_id, image, coordinate, trow)); + } else if (task_type_ == TaskType::Panoptic) { + RETURN_IF_NOT_OK(LoadMixTensorRow(row_id, image_id, image, coordinate, trow)); + } else { + RETURN_STATUS_UNEXPECTED("Invalid task type."); + } + + return Status::OK(); +} + +// When task is Detection, user can get data with four columns: +// column ["image"] with datatype=uint8 +// column ["bbox"] with datatype=float32 +// column ["category_id"] with datatype=uint32 +// column ["iscrowd"] with datatype=uint32 +// By the way, column ["iscrowd"] is used for some testcases, like fasterRcnn. +// If "iscrowd" is not existed, user will get default value 0. +Status CocoOp::LoadDetectionTensorRow(row_id_type row_id, const std::string &image_id, std::shared_ptr image, + std::shared_ptr coordinate, TensorRow *trow) { + std::shared_ptr category_id, iscrowd; + std::vector category_id_row; + std::vector iscrowd_row; + auto itr_item = simple_item_map_.find(image_id); + if (itr_item == simple_item_map_.end()) RETURN_STATUS_UNEXPECTED("Invalid image_id found :" + image_id); + + std::vector annotation = itr_item->second; + for (int64_t i = 0; i < annotation.size(); i++) { + if (i % 2 == 0) { + category_id_row.push_back(annotation[i]); + } else if (i % 2 == 1) { + iscrowd_row.push_back(annotation[i]); + } + } + RETURN_IF_NOT_OK(Tensor::CreateTensor( + &category_id, data_schema_->column(2).tensorImpl(), TensorShape({static_cast(category_id_row.size()), 1}), + data_schema_->column(2).type(), reinterpret_cast(&category_id_row[0]))); + + RETURN_IF_NOT_OK(Tensor::CreateTensor( + &iscrowd, data_schema_->column(3).tensorImpl(), TensorShape({static_cast(iscrowd_row.size()), 1}), + data_schema_->column(3).type(), reinterpret_cast(&iscrowd_row[0]))); + (*trow) = TensorRow(row_id, {std::move(image), std::move(coordinate), std::move(category_id), std::move(iscrowd)}); + return Status::OK(); +} + +// When task is "Stuff"/"Keypoint", user can get data with three columns: +// column ["image"] with datatype=uint8 +// column ["segmentation"]/["keypoints"] with datatype=float32 +// column ["iscrowd"]/["num_keypoints"] with datatype=uint32 +Status CocoOp::LoadSimpleTensorRow(row_id_type row_id, const std::string &image_id, std::shared_ptr image, + std::shared_ptr coordinate, TensorRow *trow) { + std::shared_ptr item; + std::vector item_queue; + auto itr_item = simple_item_map_.find(image_id); + if (itr_item == simple_item_map_.end()) RETURN_STATUS_UNEXPECTED("Invalid image_id found :" + image_id); + + item_queue = itr_item->second; + std::vector bbox_dim = {static_cast(item_queue.size()), 1}; + RETURN_IF_NOT_OK(Tensor::CreateTensor(&item, data_schema_->column(2).tensorImpl(), TensorShape(bbox_dim), + data_schema_->column(2).type(), + reinterpret_cast(&item_queue[0]))); + (*trow) = TensorRow(row_id, {std::move(image), std::move(coordinate), std::move(item)}); + return Status::OK(); +} + +// When task is "Panoptic", user can get data with five columns: +// column ["image"] with datatype=uint8 +// column ["bbox"] with datatype=float32 +// column ["category_id"] with datatype=uint32 +// column ["iscrowd"] with datatype=uint32 +// column ["area"] with datattype=uint32 +Status CocoOp::LoadMixTensorRow(row_id_type row_id, const std::string &image_id, std::shared_ptr image, + std::shared_ptr coordinate, TensorRow *trow) { + std::shared_ptr category_id, iscrowd, area; + std::vector category_id_row; + std::vector iscrowd_row; + std::vector area_row; + auto itr_item = simple_item_map_.find(image_id); + if (itr_item == simple_item_map_.end()) RETURN_STATUS_UNEXPECTED("Invalid image_id found :" + image_id); + + std::vector annotation = itr_item->second; + for (int64_t i = 0; i < annotation.size(); i++) { + if (i % 3 == 0) { + category_id_row.push_back(annotation[i]); + } else if (i % 3 == 1) { + iscrowd_row.push_back(annotation[i]); + } else if (i % 3 == 2) { + area_row.push_back(annotation[i]); + } + } + + RETURN_IF_NOT_OK(Tensor::CreateTensor( + &category_id, data_schema_->column(2).tensorImpl(), TensorShape({static_cast(category_id_row.size()), 1}), + data_schema_->column(2).type(), reinterpret_cast(&category_id_row[0]))); + + RETURN_IF_NOT_OK(Tensor::CreateTensor( + &iscrowd, data_schema_->column(3).tensorImpl(), TensorShape({static_cast(iscrowd_row.size()), 1}), + data_schema_->column(3).type(), reinterpret_cast(&iscrowd_row[0]))); + + RETURN_IF_NOT_OK(Tensor::CreateTensor( + &area, data_schema_->column(4).tensorImpl(), TensorShape({static_cast(area_row.size()), 1}), + data_schema_->column(4).type(), reinterpret_cast(&area_row[0]))); + (*trow) = TensorRow( + row_id, {std::move(image), std::move(coordinate), std::move(category_id), std::move(iscrowd), std::move(area)}); + return Status::OK(); +} + +Status CocoOp::LoadBuffer(const std::vector &keys, std::unique_ptr *db) { + std::unique_ptr deq = std::make_unique(); + TensorRow trow; + for (const int64_t &key : keys) { + RETURN_IF_NOT_OK(this->LoadTensorRow(key, image_ids_[key], &trow)); + deq->push_back(std::move(trow)); + } + (*db)->set_tensor_table(std::move(deq)); + return Status::OK(); +} + +Status CocoOp::WorkerEntry(int32_t worker_id) { + TaskManager::FindMe()->Post(); + int64_t buffer_id = worker_id; + std::unique_ptr io_block; + RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); + while (io_block != nullptr) { + if (io_block->eoe() == true) { + RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique(0, DataBuffer::kDeBFlagEOE))); + buffer_id = worker_id; + } else if (io_block->eof() == true) { + RETURN_IF_NOT_OK(out_connector_->Add(worker_id, (std::make_unique(0, DataBuffer::kDeBFlagEOF)))); + } else { + std::vector keys; + RETURN_IF_NOT_OK(io_block->GetKeys(&keys)); + if (keys.empty() == true) return Status::OK(); + std::unique_ptr db = std::make_unique(buffer_id, DataBuffer::kDeBFlagNone); + RETURN_IF_NOT_OK(LoadBuffer(keys, &db)); + RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db))); + buffer_id += num_workers_; + } + RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); + } + RETURN_STATUS_UNEXPECTED("Unexpected nullptr received in worker"); +} + +template +Status CocoOp::SearchNodeInJson(nlohmann::json input_tree, std::string node_name, T *output_node) { + auto node = input_tree.find(node_name); + if (node == input_tree.end()) RETURN_STATUS_UNEXPECTED("Invalid node found in json : " + node_name); + (*output_node) = *node; + return Status::OK(); +} + +Status CocoOp::ParseAnnotationIds() { + std::ifstream in(annotation_path_); + nlohmann::json js; + in >> js; + + std::vector image_que; + nlohmann::json image_list; + RETURN_IF_NOT_OK(SearchNodeInJson(js, std::string(kJsonImages), &image_list)); + RETURN_IF_NOT_OK(ImageColumnLoad(image_list, &image_que)); + if (task_type_ == TaskType::Detection || task_type_ == TaskType::Panoptic) { + nlohmann::json node_categories; + RETURN_IF_NOT_OK(SearchNodeInJson(js, std::string(kJsonCategories), &node_categories)); + RETURN_IF_NOT_OK(CategoriesColumnLoad(node_categories)); + } + nlohmann::json annotations_list; + RETURN_IF_NOT_OK(SearchNodeInJson(js, std::string(kJsonAnnotations), &annotations_list)); + for (auto annotation : annotations_list) { + int32_t image_id = 0, id = 0; + std::string file_name; + RETURN_IF_NOT_OK(SearchNodeInJson(annotation, std::string(kJsonAnnoImageId), &image_id)); + auto itr_file = image_index_.find(image_id); + if (itr_file == image_index_.end()) + RETURN_STATUS_UNEXPECTED("Invalid image id of annotations : " + std::to_string(image_id)); + file_name = itr_file->second; + switch (task_type_) { + case TaskType::Detection: + RETURN_IF_NOT_OK(SearchNodeInJson(annotation, std::string(kJsonId), &id)); + RETURN_IF_NOT_OK(DetectionColumnLoad(annotation, file_name, id)); + break; + case TaskType::Stuff: + RETURN_IF_NOT_OK(SearchNodeInJson(annotation, std::string(kJsonId), &id)); + RETURN_IF_NOT_OK(StuffColumnLoad(annotation, file_name, id)); + break; + case TaskType::Keypoint: + RETURN_IF_NOT_OK(SearchNodeInJson(annotation, std::string(kJsonId), &id)); + RETURN_IF_NOT_OK(KeypointColumnLoad(annotation, file_name, id)); + break; + case TaskType::Panoptic: + RETURN_IF_NOT_OK(PanopticColumnLoad(annotation, file_name, image_id)); + break; + default: + RETURN_STATUS_UNEXPECTED("Invalid task type"); + } + } + for (auto img : image_que) { + if (coordinate_map_.find(img) != coordinate_map_.end()) image_ids_.push_back(img); + } + num_rows_ = image_ids_.size(); + return Status::OK(); +} + +Status CocoOp::ImageColumnLoad(nlohmann::json image_tree, std::vector *image_vec) { + if (image_tree.size() == 0) { + RETURN_STATUS_UNEXPECTED("No images found in " + annotation_path_); + } + for (auto img : image_tree) { + std::string file_name; + int32_t id = 0; + RETURN_IF_NOT_OK(SearchNodeInJson(img, std::string(kJsonImagesFileName), &file_name)); + RETURN_IF_NOT_OK(SearchNodeInJson(img, std::string(kJsonId), &id)); + + image_index_[id] = file_name; + image_vec->push_back(file_name); + } + return Status::OK(); +} + +Status CocoOp::DetectionColumnLoad(nlohmann::json annotation_tree, const std::string &image_file, + const int32_t &unique_id) { + std::vector bbox; + nlohmann::json node_bbox; + uint32_t category_id = 0, iscrowd = 0; + RETURN_IF_NOT_OK(SearchNodeInJson(annotation_tree, std::string(kJsonAnnoBbox), &node_bbox)); + RETURN_IF_NOT_OK(SearchNodeInJson(annotation_tree, std::string(kJsonAnnoCategoryId), &category_id)); + auto search_category = category_set_.find(category_id); + if (search_category == category_set_.end()) + RETURN_STATUS_UNEXPECTED("category_id can't find in categories where category_id: " + std::to_string(category_id)); + auto node_iscrowd = annotation_tree.find(kJsonAnnoIscrowd); + if (node_iscrowd != annotation_tree.end()) iscrowd = *node_iscrowd; + bbox.insert(bbox.end(), node_bbox.begin(), node_bbox.end()); + coordinate_map_[image_file].push_back(bbox); + simple_item_map_[image_file].push_back(category_id); + simple_item_map_[image_file].push_back(iscrowd); + return Status::OK(); +} + +Status CocoOp::StuffColumnLoad(nlohmann::json annotation_tree, const std::string &image_file, + const int32_t &unique_id) { + uint32_t iscrowd = 0; + std::vector bbox; + RETURN_IF_NOT_OK(SearchNodeInJson(annotation_tree, std::string(kJsonAnnoIscrowd), &iscrowd)); + simple_item_map_[image_file].push_back(iscrowd); + nlohmann::json segmentation; + RETURN_IF_NOT_OK(SearchNodeInJson(annotation_tree, std::string(kJsonAnnoSegmentation), &segmentation)); + if (iscrowd == 0) { + for (auto item : segmentation) { + if (bbox.size() > 0) bbox.clear(); + bbox.insert(bbox.end(), item.begin(), item.end()); + coordinate_map_[image_file].push_back(bbox); + } + } else if (iscrowd == 1) { + nlohmann::json segmentation_count; + RETURN_IF_NOT_OK(SearchNodeInJson(segmentation, std::string(kJsonAnnoCounts), &segmentation_count)); + bbox.insert(bbox.end(), segmentation_count.begin(), segmentation_count.end()); + coordinate_map_[image_file].push_back(bbox); + } + return Status::OK(); +} + +Status CocoOp::KeypointColumnLoad(nlohmann::json annotation_tree, const std::string &image_file, + const int32_t &unique_id) { + auto itr_num_keypoint = annotation_tree.find(kJsonAnnoNumKeypoints); + if (itr_num_keypoint == annotation_tree.end()) + RETURN_STATUS_UNEXPECTED("No num_keypoint found in annotations where id: " + std::to_string(unique_id)); + simple_item_map_[image_file].push_back(*itr_num_keypoint); + auto itr_keypoint = annotation_tree.find(kJsonAnnoKeypoints); + if (itr_keypoint == annotation_tree.end()) + RETURN_STATUS_UNEXPECTED("No keypoint found in annotations where id: " + std::to_string(unique_id)); + coordinate_map_[image_file].push_back(*itr_keypoint); + return Status::OK(); +} + +Status CocoOp::PanopticColumnLoad(nlohmann::json annotation_tree, const std::string &image_file, + const int32_t &image_id) { + auto itr_segments = annotation_tree.find(kJsonAnnoSegmentsInfo); + if (itr_segments == annotation_tree.end()) + RETURN_STATUS_UNEXPECTED("No segments_info found in annotations where image_id: " + std::to_string(image_id)); + for (auto info : *itr_segments) { + std::vector bbox; + uint32_t category_id = 0; + auto itr_bbox = info.find(kJsonAnnoBbox); + if (itr_bbox == info.end()) + RETURN_STATUS_UNEXPECTED("No bbox found in segments_info where image_id: " + std::to_string(image_id)); + bbox.insert(bbox.end(), itr_bbox->begin(), itr_bbox->end()); + coordinate_map_[image_file].push_back(bbox); + + RETURN_IF_NOT_OK(SearchNodeInJson(info, std::string(kJsonAnnoCategoryId), &category_id)); + auto search_category = category_set_.find(category_id); + if (search_category == category_set_.end()) + RETURN_STATUS_UNEXPECTED("category_id can't find in categories where category_id: " + + std::to_string(category_id)); + auto itr_iscrowd = info.find(kJsonAnnoIscrowd); + if (itr_iscrowd == info.end()) + RETURN_STATUS_UNEXPECTED("No iscrowd found in segments_info where image_id: " + std::to_string(image_id)); + auto itr_area = info.find(kJsonAnnoArea); + if (itr_area == info.end()) + RETURN_STATUS_UNEXPECTED("No area found in segments_info where image_id: " + std::to_string(image_id)); + simple_item_map_[image_file].push_back(category_id); + simple_item_map_[image_file].push_back(*itr_iscrowd); + simple_item_map_[image_file].push_back(*itr_area); + } + return Status::OK(); +} + +Status CocoOp::CategoriesColumnLoad(nlohmann::json categories_tree) { + if (categories_tree.size() == 0) RETURN_STATUS_UNEXPECTED("No categories found in " + annotation_path_); + for (auto category : categories_tree) { + int32_t id = 0; + std::string name; + std::vector label_info; + auto itr_id = category.find(kJsonId); + if (itr_id == category.end()) RETURN_STATUS_UNEXPECTED("No id found in categories of " + annotation_path_); + id = *itr_id; + label_info.push_back(id); + category_set_.insert(id); + + auto itr_name = category.find(kJsonCategoriesName); + if (itr_name == category.end()) + RETURN_STATUS_UNEXPECTED("No name found in categories where id: " + std::to_string(id)); + name = *itr_name; + + if (task_type_ == TaskType::Panoptic) { + auto itr_isthing = category.find(kJsonCategoriesIsthing); + if (itr_isthing == category.end()) + RETURN_STATUS_UNEXPECTED("No isthing found in categories of " + annotation_path_); + label_info.push_back(*itr_isthing); + } + label_index_.emplace_back(std::make_pair(name, label_info)); + } + return Status::OK(); +} + +Status CocoOp::InitSampler() { + RETURN_IF_NOT_OK(sampler_->HandshakeRandomAccessOp(this)); + return Status::OK(); +} + +Status CocoOp::LaunchThreadsAndInitOp() { + if (tree_ == nullptr) { + RETURN_STATUS_UNEXPECTED("tree_ not set"); + } + RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks())); + RETURN_IF_NOT_OK(wp_.Register(tree_->AllTasks())); + RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_, std::bind(&CocoOp::WorkerEntry, this, std::placeholders::_1))); + TaskManager::FindMe()->Post(); + RETURN_IF_NOT_OK(this->ParseAnnotationIds()); + RETURN_IF_NOT_OK(this->InitSampler()); + return Status::OK(); +} + +Status CocoOp::ReadImageToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr *tensor) { + RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, path)); + + if (decode_ == true) { + Status rc = Decode(*tensor, tensor); + if (rc.IsError()) { + RETURN_STATUS_UNEXPECTED("fail to decode file: " + path); + } + } + return Status::OK(); +} + +Status CocoOp::CountTotalRows(const std::string &dir, const std::string &file, const std::string &task, + int64_t *count) { + std::shared_ptr op; + RETURN_IF_NOT_OK(Builder().SetDir(dir).SetFile(file).SetTask(task).Build(&op)); + RETURN_IF_NOT_OK(op->ParseAnnotationIds()); + *count = static_cast(op->image_ids_.size()); + return Status::OK(); +} + +Status CocoOp::GetClassIndexing(const std::string &dir, const std::string &file, const std::string &task, + std::vector>> *output_class_indexing) { + std::shared_ptr op; + RETURN_IF_NOT_OK(Builder().SetDir(dir).SetFile(file).SetTask(task).Build(&op)); + RETURN_IF_NOT_OK(op->ParseAnnotationIds()); + *output_class_indexing = op->label_index_; + return Status::OK(); +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.h b/mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.h new file mode 100644 index 0000000000..f5abeed72e --- /dev/null +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.h @@ -0,0 +1,330 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_ENGINE_DATASETOPS_SOURCE_COCO_OP_H_ +#define DATASET_ENGINE_DATASETOPS_SOURCE_COC0_OP_H_ + +#include +#include +#include +#include +#include +#include + +#include "dataset/core/tensor.h" +#include "dataset/engine/data_buffer.h" +#include "dataset/engine/data_schema.h" +#include "dataset/engine/datasetops/parallel_op.h" +#include "dataset/engine/datasetops/source/io_block.h" +#include "dataset/engine/datasetops/source/sampler/sampler.h" +#include "dataset/kernels/image/image_utils.h" +#include "dataset/util/path.h" +#include "dataset/util/queue.h" +#include "dataset/util/status.h" +#include "dataset/util/wait_post.h" + +namespace mindspore { +namespace dataset { +// Forward declares +template +class Queue; + +using CoordinateRow = std::vector>; + +class CocoOp : public ParallelOp, public RandomAccessOp { + public: + enum class TaskType { Detection = 0, Stuff = 1, Panoptic = 2, Keypoint = 3 }; + + class Builder { + public: + // Constructor for Builder class of ImageFolderOp + // @param uint32_t numWrks - number of parallel workers + // @param dir - directory folder got ImageNetFolder + Builder(); + + // Destructor. + ~Builder() = default; + + // Setter method. + // @param const std::string & build_dir + // @return Builder setter method returns reference to the builder. + Builder &SetDir(const std::string &build_dir) { + builder_dir_ = build_dir; + return *this; + } + + // Setter method. + // @param const std::string & build_file + // @return Builder setter method returns reference to the builder. + Builder &SetFile(const std::string &build_file) { + builder_file_ = build_file; + return *this; + } + + // Setter method. + // @param const std::string & task_type + // @return Builder setter method returns reference to the builder. + Builder &SetTask(const std::string &task_type) { + if (task_type == "Detection") { + builder_task_type_ = TaskType::Detection; + } else if (task_type == "Stuff") { + builder_task_type_ = TaskType::Stuff; + } else if (task_type == "Panoptic") { + builder_task_type_ = TaskType::Panoptic; + } else if (task_type == "Keypoint") { + builder_task_type_ = TaskType::Keypoint; + } + return *this; + } + + // Setter method. + // @param int32_t num_workers + // @return Builder setter method returns reference to the builder. + Builder &SetNumWorkers(int32_t num_workers) { + builder_num_workers_ = num_workers; + return *this; + } + + // Setter method. + // @param int32_t op_connector_size + // @return Builder setter method returns reference to the builder. + Builder &SetOpConnectorSize(int32_t op_connector_size) { + builder_op_connector_size_ = op_connector_size; + return *this; + } + + // Setter method. + // @param int32_t rows_per_buffer + // @return Builder setter method returns reference to the builder. + Builder &SetRowsPerBuffer(int32_t rows_per_buffer) { + builder_rows_per_buffer_ = rows_per_buffer; + return *this; + } + + // Setter method. + // @param std::shared_ptr sampler + // @return Builder setter method returns reference to the builder. + Builder &SetSampler(std::shared_ptr sampler) { + builder_sampler_ = std::move(sampler); + return *this; + } + + // Setter method. + // @param bool do_decode + // @return Builder setter method returns reference to the builder. + Builder &SetDecode(bool do_decode) { + builder_decode_ = do_decode; + return *this; + } + + // Check validity of input args + // @return = The error code return + Status SanityCheck(); + + // The builder "Build" method creates the final object. + // @param std::shared_ptr *op - DatasetOp + // @return - The error code return + Status Build(std::shared_ptr *op); + + private: + bool builder_decode_; + std::string builder_dir_; + std::string builder_file_; + TaskType builder_task_type_; + int32_t builder_num_workers_; + int32_t builder_op_connector_size_; + int32_t builder_rows_per_buffer_; + std::shared_ptr builder_sampler_; + std::unique_ptr builder_schema_; + }; + + // Constructor + // @param TaskType task_type - task type of Coco + // @param std::string image_folder_path - image folder path of Coco + // @param std::string annotation_path - annotation json path of Coco + // @param int32_t num_workers - number of workers reading images in parallel + // @param int32_t rows_per_buffer - number of images (rows) in each buffer + // @param int32_t queue_size - connector queue size + // @param int64_t num_samples - number of samples to read + // @param bool decode - whether to decode images + // @param std::unique_ptr data_schema - the schema of the Coco dataset + // @param std::shared_ptr sampler - sampler tells CocoOp what to read + CocoOp(const TaskType &task_type, const std::string &image_folder_path, const std::string &annotation_path, + int32_t num_workers, int32_t rows_per_buffer, int32_t queue_size, bool decode, + std::unique_ptr data_schema, std::shared_ptr sampler); + + // Destructor + ~CocoOp() = default; + + // Worker thread pulls a number of IOBlock from IOBlock Queue, make a buffer and push it to Connector + // @param int32_t workerId - id of each worker + // @return Status - The error code return + Status WorkerEntry(int32_t worker_id) override; + + // Main Loop of CocoOp + // Master thread: Fill IOBlockQueue, then goes to sleep + // Worker thread: pulls IOBlock from IOBlockQueue, work on it the put buffer to mOutConnector + // @return Status - The error code return + Status operator()() override; + + // A print method typically used for debugging + // @param out + // @param show_all + void Print(std::ostream &out, bool show_all) const override; + + // @param const std::string &dir - Coco image dir path + // @param const std::string &file - Coco json file path + // @param const std::string &task - task mode of Coco task + // @param int64_t numSamples - samples number of CocoDataset + // @param int64_t *count - output rows number of CocoDataset + static Status CountTotalRows(const std::string &dir, const std::string &task_type, const std::string &task_mode, + int64_t *count); + + // @param const std::string &dir - Coco image dir path + // @param const std::string &file - Coco json file path + // @param const std::string &task - task mode of Coco task + // @param int64_t numSamples - samples number of CocoDataset + // @param std::map *output_class_indexing - output class index of CocoDataset + static Status GetClassIndexing(const std::string &dir, const std::string &task_type, const std::string &task_mode, + std::vector>> *output_class_indexing); + + private: + // Initialize Sampler, calls sampler->Init() within + // @return Status - The error code return + Status InitSampler(); + + // Load a tensor row according to image id + // @param row_id_type row_id - id for this tensor row + // @param std::string image_id - image id + // @param TensorRow row - image & target read into this tensor row + // @return Status - The error code return + Status LoadTensorRow(row_id_type row_id, const std::string &image_id, TensorRow *row); + + // Load a tensor row with vector which a vector to a tensor + // @param row_id_type row_id - id for this tensor row + // @param const std::string &image_id - image is + // @param std::shared_ptr image - image tensor + // @param std::shared_ptr coordinate - coordinate tensor + // @param TensorRow row - image & target read into this tensor row + // @return Status - The error code return + Status LoadDetectionTensorRow(row_id_type row_id, const std::string &image_id, std::shared_ptr image, + std::shared_ptr coordinate, TensorRow *trow); + + // Load a tensor row with vector which a vector to a tensor + // @param row_id_type row_id - id for this tensor row + // @param const std::string &image_id - image is + // @param std::shared_ptr image - image tensor + // @param std::shared_ptr coordinate - coordinate tensor + // @param TensorRow row - image & target read into this tensor row + // @return Status - The error code return + Status LoadSimpleTensorRow(row_id_type row_id, const std::string &image_id, std::shared_ptr image, + std::shared_ptr coordinate, TensorRow *trow); + + // Load a tensor row with vector which a vector to multi-tensor + // @param row_id_type row_id - id for this tensor row + // @param const std::string &image_id - image is + // @param std::shared_ptr image - image tensor + // @param std::shared_ptr coordinate - coordinate tensor + // @param TensorRow row - image & target read into this tensor row + // @return Status - The error code return + Status LoadMixTensorRow(row_id_type row_id, const std::string &image_id, std::shared_ptr image, + std::shared_ptr coordinate, TensorRow *trow); + + // @param const std::string &path - path to the image file + // @param const ColDescriptor &col - contains tensor implementation and datatype + // @param std::shared_ptr tensor - return + // @return Status - The error code return + Status ReadImageToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr *tensor); + + // @param const std::vector &keys - keys in ioblock + // @param std::unique_ptr db + // @return Status - The error code return + Status LoadBuffer(const std::vector &keys, std::unique_ptr *db); + + // Read annotation from Annotation folder + // @return Status - The error code return + Status ParseAnnotationIds(); + + // @param const std::shared_ptr &sample_ids - sample ids of tensor + // @param std::vector *keys - image id + // @return Status - The error code return + Status TraverseSampleIds(const std::shared_ptr &sample_ids, std::vector *keys); + + // Called first when function is called + // @return Status - The error code return + Status LaunchThreadsAndInitOp(); + + // Reset dataset state + // @return Status - The error code return + Status Reset() override; + + // @param nlohmann::json image_tree - image tree of json + // @param std::vector *image_vec - image id list of json + // @return Status - The error code return + Status ImageColumnLoad(nlohmann::json image_tree, std::vector *image_vec); + + // @param nlohmann::json categories_tree - categories tree of json + // return Status - The error code return + Status CategoriesColumnLoad(nlohmann::json categories_tree); + + // @param nlohmann::json categories_tree - categories tree of json + // @param const std::string &image_file - current image name in annotation + // @param const int32_t &id - current unique id of annotation + // @return Status - The error code return + Status DetectionColumnLoad(nlohmann::json annotation_tree, const std::string &image_file, const int32_t &id); + + // @param nlohmann::json categories_tree - categories tree of json + // @param const std::string &image_file - current image name in annotation + // @param const int32_t &id - current unique id of annotation + // @return Status - The error code return + Status StuffColumnLoad(nlohmann::json annotation_tree, const std::string &image_file, const int32_t &id); + + // @param nlohmann::json categories_tree - categories tree of json + // @param const std::string &image_file - current image name in annotation + // @param const int32_t &id - current unique id of annotation + // @return Status - The error code return + Status KeypointColumnLoad(nlohmann::json annotation_tree, const std::string &image_file, const int32_t &id); + + // @param nlohmann::json categories_tree - categories tree of json + // @param const std::string &image_file - current image name in annotation + // @param const int32_t &image_id - current unique id of annotation + // @return Status - The error code return + Status PanopticColumnLoad(nlohmann::json annotation_tree, const std::string &image_file, const int32_t &image_id); + + template + Status SearchNodeInJson(nlohmann::json input_tree, std::string node_name, T *output_node); + + bool decode_; + int64_t row_cnt_; + int64_t buf_cnt_; + std::string image_folder_path_; + std::string annotation_path_; + TaskType task_type_; + int32_t rows_per_buffer_; + std::shared_ptr sampler_; + std::unique_ptr data_schema_; + + WaitPost wp_; + std::vector image_ids_; + std::map image_index_; + QueueList> io_block_queues_; + std::vector>> label_index_; + std::map coordinate_map_; + std::map> simple_item_map_; + std::set category_set_; +}; +} // namespace dataset +} // namespace mindspore +#endif // DATASET_ENGINE_DATASETOPS_SOURCE_Coco_OP_H_ diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc index fe0763c8b7..d316524c04 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc @@ -25,7 +25,7 @@ namespace mindspore { namespace dataset { GeneratorOp::Builder::Builder() { - // Some arguments to the StorageOp constructor have a default argument that is taken + // Some arguments to the GeneratorOp constructor have a default argument that is taken // from the client config. build_buffer_size_ = kCfgRowsPerBuffer; build_op_connector_size_ = kCfgOpConnectorSize; diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.h b/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.h index afeff29b86..82b395d6de 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.h @@ -72,7 +72,7 @@ class GeneratorOp : public PipelineOp { } // The builder "build" method creates the final object. - // @return shared_ptr to the new StorageOp object + // @return shared_ptr to the new GeneratorOp object Status Build(std::shared_ptr *); private: @@ -127,6 +127,10 @@ class GeneratorOp : public PipelineOp { // @return - Status of the node visit. Status Accept(NodePass *p, bool *modified) override; + // Op name getter + // @return Name of the current Op + std::string Name() const override { return "GeneratorOp"; } + private: py::function generator_function_; std::vector column_names_; diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc index ce8fef7404..5cdfa8bb76 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc @@ -26,8 +26,7 @@ namespace mindspore { namespace dataset { -ImageFolderOp::Builder::Builder() - : builder_decode_(false), builder_recursive_(false), builder_num_samples_(0), builder_sampler_(nullptr) { +ImageFolderOp::Builder::Builder() : builder_decode_(false), builder_recursive_(false), builder_sampler_(nullptr) { std::shared_ptr cfg = GlobalContext::config_manager(); builder_num_workers_ = cfg->num_parallel_workers(); builder_rows_per_buffer_ = cfg->rows_per_buffer(); @@ -37,7 +36,9 @@ ImageFolderOp::Builder::Builder() Status ImageFolderOp::Builder::Build(std::shared_ptr *ptr) { RETURN_IF_NOT_OK(SanityCheck()); if (builder_sampler_ == nullptr) { - builder_sampler_ = std::make_shared(); + const int64_t num_samples = 0; // default num samples of 0 means to sample entire set of data + const int64_t start_index = 0; + builder_sampler_ = std::make_shared(start_index, num_samples); } builder_schema_ = std::make_unique(); TensorShape scalar = TensorShape::CreateScalar(); @@ -46,9 +47,9 @@ Status ImageFolderOp::Builder::Build(std::shared_ptr *ptr) { RETURN_IF_NOT_OK(builder_schema_->AddColumn( ColDescriptor("label", DataType(DataType::DE_INT32), TensorImpl::kFlexible, 0, &scalar))); *ptr = std::make_shared(builder_num_workers_, builder_rows_per_buffer_, builder_dir_, - builder_op_connector_size_, builder_num_samples_, builder_recursive_, - builder_decode_, builder_extensions_, builder_labels_to_read_, - std::move(builder_schema_), std::move(builder_sampler_)); + builder_op_connector_size_, builder_recursive_, builder_decode_, + builder_extensions_, builder_labels_to_read_, std::move(builder_schema_), + std::move(builder_sampler_)); return Status::OK(); } @@ -61,20 +62,18 @@ Status ImageFolderOp::Builder::SanityCheck() { } ImageFolderOp::ImageFolderOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir, int32_t queue_size, - int64_t num_samples, bool recursive, bool do_decode, const std::set &exts, + bool recursive, bool do_decode, const std::set &exts, const std::map &map, std::unique_ptr data_schema, std::shared_ptr sampler) : ParallelOp(num_wkrs, queue_size), rows_per_buffer_(rows_per_buffer), folder_path_(file_dir), - num_samples_(num_samples), recursive_(recursive), decode_(do_decode), extensions_(exts), class_index_(map), data_schema_(std::move(data_schema)), sampler_(std::move(sampler)), - num_rows_(0), row_cnt_(0), buf_cnt_(0), sampler_ind_(0), @@ -117,7 +116,11 @@ Status ImageFolderOp::PrescanMasterEntry(const std::string &filedir) { } image_label_pairs_.shrink_to_fit(); num_rows_ = image_label_pairs_.size(); - num_samples_ = (num_samples_ == 0 || num_samples_ > num_rows_) ? num_rows_ : num_samples_; + if (num_rows_ == 0) { + RETURN_STATUS_UNEXPECTED( + "There is no valid data matching the dataset API ImageFolderDatasetV2.Please check file path or dataset " + "API validation first."); + } // free memory of two queues used for pre-scan folder_name_queue_->Reset(); image_name_queue_->Reset(); @@ -128,7 +131,7 @@ Status ImageFolderOp::PrescanMasterEntry(const std::string &filedir) { Status ImageFolderOp::operator()() { RETURN_IF_NOT_OK(LaunchThreadsAndInitOp()); std::unique_ptr sampler_buffer; - RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer)); + RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); while (true) { // each iterator is 1 epoch std::vector keys; keys.reserve(rows_per_buffer_); @@ -138,8 +141,7 @@ Status ImageFolderOp::operator()() { std::shared_ptr sample_ids = sample_row[0]; if (sample_ids->type() != DataType(DataType::DE_INT64)) RETURN_STATUS_UNEXPECTED("Sampler Tensor isn't int64"); for (auto itr = sample_ids->begin(); itr != sample_ids->end(); ++itr) { - if ((*itr) >= num_rows_) continue; // index out of bound, skipping - if (row_cnt_ >= num_samples_) break; // enough row read, break for loop + if ((*itr) >= num_rows_) continue; // index out of bound, skipping keys.push_back(*itr); row_cnt_++; if (row_cnt_ % rows_per_buffer_ == 0) { @@ -148,7 +150,7 @@ Status ImageFolderOp::operator()() { keys.clear(); } } - RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer)); + RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); } if (keys.empty() == false) { RETURN_IF_NOT_OK( @@ -169,7 +171,7 @@ Status ImageFolderOp::operator()() { io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique(IOBlock::kDeIoBlockFlagEoe))); RETURN_IF_NOT_OK(wp_.Wait()); // Master thread goes to sleep after it has made all the IOBlocks wp_.Clear(); - RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer)); + RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); } } } @@ -202,23 +204,13 @@ Status ImageFolderOp::WorkerEntry(int32_t worker_id) { } // Load 1 TensorRow (image,label) using 1 ImageLabelPair. 1 function call produces 1 TensorTow in a DataBuffer -Status ImageFolderOp::LoadTensorRow(ImageLabelPair pairPtr, TensorRow *trow) { +Status ImageFolderOp::LoadTensorRow(row_id_type row_id, ImageLabelPair pairPtr, TensorRow *trow) { std::shared_ptr image, label; RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(), data_schema_->column(1).shape(), data_schema_->column(1).type(), reinterpret_cast(&pairPtr->second))); - std::ifstream fs; - fs.open(folder_path_ + (pairPtr->first), std::ios::binary | std::ios::in); - if (fs.fail()) { - RETURN_STATUS_UNEXPECTED("Fail to open file: " + pairPtr->first); - } - int64_t num_elements = fs.seekg(0, std::ios::end).tellg(); - (void)fs.seekg(0, std::ios::beg); - RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(), - TensorShape(std::vector(1, num_elements)), - data_schema_->column(0).type(), nullptr)); - (void)fs.read(reinterpret_cast(image->GetMutableBuffer()), num_elements); - fs.close(); + RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, folder_path_ + (pairPtr->first))); + if (decode_ == true) { Status rc = Decode(image, &image); if (rc.IsError()) { @@ -226,7 +218,7 @@ Status ImageFolderOp::LoadTensorRow(ImageLabelPair pairPtr, TensorRow *trow) { RETURN_STATUS_UNEXPECTED(err); } } - (*trow) = {std::move(image), std::move(label)}; + (*trow) = TensorRow(row_id, {std::move(image), std::move(label)}); return Status::OK(); } @@ -235,7 +227,7 @@ Status ImageFolderOp::LoadBuffer(const std::vector &keys, std::unique_p std::unique_ptr deq = std::make_unique(); TensorRow trow; for (const int64_t &key : keys) { - RETURN_IF_NOT_OK(this->LoadTensorRow(image_label_pairs_[key], &trow)); + RETURN_IF_NOT_OK(this->LoadTensorRow(key, image_label_pairs_[key], &trow)); deq->push_back(std::move(trow)); } (*db)->set_tensor_table(std::move(deq)); @@ -260,7 +252,7 @@ void ImageFolderOp::Print(std::ostream &out, bool show_all) const { // Reset Sampler and wakeup Master thread (functor) Status ImageFolderOp::Reset() { - RETURN_IF_NOT_OK(sampler_->Reset()); + RETURN_IF_NOT_OK(sampler_->ResetSampler()); row_cnt_ = 0; wp_.Set(); // wake up master thread after reset is done return Status::OK(); @@ -272,28 +264,6 @@ Status ImageFolderOp::InitSampler() { return Status::OK(); } -// Derived from RandomAccessOp -Status ImageFolderOp::GetNumSamples(int64_t *num) const { - if (num == nullptr || num_samples_ == 0) { - RETURN_STATUS_UNEXPECTED( - "There is no valid data matching the dataset API ImageFolderDatasetV2.Please check file path or dataset API " - "validation first."); - } - (*num) = num_samples_; - return Status::OK(); -} - -// Derived from RandomAccessOp -Status ImageFolderOp::GetNumRowsInDataset(int64_t *num) const { - if (num == nullptr || num_rows_ == 0) { - RETURN_STATUS_UNEXPECTED( - "There is no valid data matching the dataset API ImageFolderDatasetV2.Please check file path or dataset API " - "validation first."); - } - (*num) = num_rows_; - return Status::OK(); -} - // Derived from RandomAccessOp Status ImageFolderOp::GetClassIds(std::map> *cls_ids) const { if (cls_ids == nullptr || !cls_ids->empty() || image_label_pairs_.empty()) { @@ -353,9 +323,7 @@ Status ImageFolderOp::PrescanWorkerEntry(int32_t worker_id) { // if mRecursive == false, don't go into folder of folders Status ImageFolderOp::RecursiveWalkFolder(Path *dir) { std::shared_ptr dir_itr = Path::DirIterator::OpenDirectory(dir); - if (dir_itr == nullptr) { - RETURN_STATUS_UNEXPECTED("Error encountered when indexing files"); - } + RETURN_UNEXPECTED_IF_NULL(dir_itr); while (dir_itr->hasNext()) { Path subdir = dir_itr->next(); if (subdir.IsDirectory()) { @@ -389,9 +357,7 @@ Status ImageFolderOp::startAsyncWalk() { } Status ImageFolderOp::LaunchThreadsAndInitOp() { - if (tree_ == nullptr) { - RETURN_STATUS_UNEXPECTED("tree_ not set"); - } + RETURN_UNEXPECTED_IF_NULL(tree_); // Registers QueueList and individual Queues for interrupt services RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks())); RETURN_IF_NOT_OK(folder_name_queue_->Register(tree_->AllTasks())); @@ -413,16 +379,14 @@ Status ImageFolderOp::LaunchThreadsAndInitOp() { return Status::OK(); } -Status ImageFolderOp::CountRowsAndClasses(const std::string &path, const int64_t &num_samples, - const std::set &exts, int64_t *num_rows, int64_t *num_classes, - int64_t dev_id, int64_t num_dev) { +Status ImageFolderOp::CountRowsAndClasses(const std::string &path, const std::set &exts, int64_t *num_rows, + int64_t *num_classes, int64_t dev_id, int64_t num_dev) { Path dir(path); std::string err_msg = ""; int64_t row_cnt = 0; err_msg += (dir.Exists() == false || dir.IsDirectory() == false) ? "unable to open dir " + path : ""; err_msg += (num_classes == nullptr || num_rows == nullptr) ? "num_class/num_rows is null\n" : ""; err_msg += (dev_id >= num_dev || num_dev <= 0) ? "invalid sharding config\n" : ""; - err_msg += num_samples < 0 ? "num_samples can't be negative! set it to 0 to use all samples\n" : ""; if (err_msg.empty() == false) { RETURN_STATUS_UNEXPECTED(err_msg); } @@ -441,10 +405,6 @@ Status ImageFolderOp::CountRowsAndClasses(const std::string &path, const int64_t while (dir_itr->hasNext()) { if (exts.empty() || exts.find(subdir.Extension()) != exts.end()) { ++row_cnt; - if (row_cnt == num_samples * num_dev) { - (*num_rows) = (row_cnt / num_dev) + (row_cnt % num_dev == 0 ? 0 : 1); - return Status::OK(); - } } } foldernames.pop(); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.h b/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.h index 72d47224fb..e1d578e034 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.h @@ -107,14 +107,6 @@ class ImageFolderOp : public ParallelOp, public RandomAccessOp { return *this; } - // Setter method - // @param int64_t num_samples - // @return Builder setter method returns reference to the builder. - Builder &SetNumSamples(int64_t num_samples) { - builder_num_samples_ = num_samples; - return *this; - } - // Setter method // @param std::shared_ptr sampler // @return Builder setter method returns reference to the builder. @@ -153,7 +145,6 @@ class ImageFolderOp : public ParallelOp, public RandomAccessOp { bool builder_recursive_; std::string builder_dir_; int32_t builder_num_workers_; - int64_t builder_num_samples_; int32_t builder_rows_per_buffer_; int32_t builder_op_connector_size_; std::set builder_extensions_; @@ -169,10 +160,9 @@ class ImageFolderOp : public ParallelOp, public RandomAccessOp { // @param int32_t queue_size - connector queue size // @param std::set exts - set of file extensions to read, if empty, read everything under the dir // @param td::unique_ptr sampler - sampler tells ImageFolderOp what to read - ImageFolderOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir, int32_t queue_size, - int64_t num_samples, bool recursive, bool do_decode, const std::set &exts, - const std::map &map, std::unique_ptr, - std::shared_ptr sampler); + ImageFolderOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir, int32_t queue_size, bool recursive, + bool do_decode, const std::set &exts, const std::map &map, + std::unique_ptr, std::shared_ptr sampler); // Destructor. ~ImageFolderOp() = default; @@ -198,16 +188,6 @@ class ImageFolderOp : public ParallelOp, public RandomAccessOp { // @return Status - The error code return Status operator()() override; - // Method derived from RandomAccess Op, enable Sampler to get numRows - // @param int64_t num - to return numRows - // @return Status - The error code return - Status GetNumSamples(int64_t *num) const override; - - // Method derived from RandomAccess Op, enable Sampler to get total numRows in dataset - // @param int64_t num - to return numRows - // @return Status - The error code return - Status GetNumRowsInDataset(int64_t *num) const override; - // Method derived from RandomAccess Op, enable Sampler to get all ids for each class // @param (std::map> * map - key label, val all ids for this class // @return Status - The error code return @@ -218,12 +198,11 @@ class ImageFolderOp : public ParallelOp, public RandomAccessOp { // @param show_all void Print(std::ostream &out, bool show_all) const override; - // This function is a hack! It is to return the num_class and num_rows the old storageOp does. The result + // This function is a hack! It is to return the num_class and num_rows. The result // returned by this function may not be consistent with what image_folder_op is going to return // user this at your own risk! - static Status CountRowsAndClasses(const std::string &path, const int64_t &num_samples, - const std::set &exts, int64_t *num_rows, int64_t *num_classes, - int64_t dev_id = 0, int64_t num_dev = 1); + static Status CountRowsAndClasses(const std::string &path, const std::set &exts, int64_t *num_rows, + int64_t *num_classes, int64_t dev_id = 0, int64_t num_dev = 1); // Base-class override for NodePass visitor acceptor. // @param p - Pointer to the NodePass to be accepted. @@ -231,16 +210,21 @@ class ImageFolderOp : public ParallelOp, public RandomAccessOp { // @return - Status of the node visit. Status Accept(NodePass *p, bool *modified) override; + // Op name getter + // @return Name of the current Op + std::string Name() const override { return "ImageFolderOp"; } + private: // Initialize Sampler, calls sampler->Init() within // @return Status - The error code return Status InitSampler(); // Load a tensor row according to a pair + // @param row_id_type row_id - id for this tensor row // @param ImageLabelPair pair - // @param TensorRow row - image & label read into this tensor row // @return Status - The error code return - Status LoadTensorRow(ImageLabelPair pair, TensorRow *row); + Status LoadTensorRow(row_id_type row_id, ImageLabelPair pair, TensorRow *row); // @param const std::vector &keys - keys in ioblock // @param std::unique_ptr db @@ -266,14 +250,12 @@ class ImageFolderOp : public ParallelOp, public RandomAccessOp { int32_t rows_per_buffer_; std::string folder_path_; // directory of image folder - int64_t num_samples_; bool recursive_; bool decode_; std::set extensions_; // extensions allowed std::map class_index_; std::unique_ptr data_schema_; std::shared_ptr sampler_; - int64_t num_rows_; // total number of images in ImageFolder int64_t row_cnt_; int64_t buf_cnt_; int64_t sampler_ind_; diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/io_block.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/io_block.cc index 9f45e2179f..0963f1a67a 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/io_block.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/io_block.cc @@ -72,8 +72,9 @@ Status FilenameBlock::GetFilename(std::string *out_filename, const AutoIndexObj< RETURN_IF_NOT_OK(IOBlock::GetKey(&fetched_key)); // Do an index lookup using that key to get the filename. - auto it = index.Search(fetched_key); - if (it != index.end()) { + auto r = index.Search(fetched_key); + if (r.second) { + auto &it = r.first; *out_filename = it.value(); } else { RETURN_STATUS_UNEXPECTED("Could not find filename from index"); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc index 5892b10701..0762f36d5a 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc @@ -29,7 +29,7 @@ namespace mindspore { namespace dataset { -ManifestOp::Builder::Builder() : builder_sampler_(nullptr), builder_num_samples_(0), builder_decode_(false) { +ManifestOp::Builder::Builder() : builder_sampler_(nullptr), builder_decode_(false) { std::shared_ptr cfg = GlobalContext::config_manager(); builder_num_workers_ = cfg->num_parallel_workers(); builder_rows_per_buffer_ = cfg->rows_per_buffer(); @@ -39,16 +39,18 @@ ManifestOp::Builder::Builder() : builder_sampler_(nullptr), builder_num_samples_ Status ManifestOp::Builder::Build(std::shared_ptr *ptr) { RETURN_IF_NOT_OK(SanityCheck()); if (builder_sampler_ == nullptr) { - builder_sampler_ = std::make_shared(); + const int64_t num_samples = 0; + const int64_t start_index = 0; + builder_sampler_ = std::make_shared(start_index, num_samples); } builder_schema_ = std::make_unique(); RETURN_IF_NOT_OK( builder_schema_->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); RETURN_IF_NOT_OK( builder_schema_->AddColumn(ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); - *ptr = std::make_shared( - builder_num_workers_, builder_rows_per_buffer_, builder_file_, builder_op_connector_size_, builder_num_samples_, - builder_decode_, builder_labels_to_read_, std::move(builder_schema_), std::move(builder_sampler_), builder_usage_); + *ptr = std::make_shared(builder_num_workers_, builder_rows_per_buffer_, builder_file_, + builder_op_connector_size_, builder_decode_, builder_labels_to_read_, + std::move(builder_schema_), std::move(builder_sampler_), builder_usage_); return Status::OK(); } @@ -59,9 +61,9 @@ Status ManifestOp::Builder::SanityCheck() { return err_msg.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err_msg); } -ManifestOp::ManifestOp(int32_t num_works, int32_t rows_per_buffer, std::string file, int32_t queue_size, - int64_t num_samples, bool decode, const std::map &class_index, - std::unique_ptr data_schema, std::shared_ptr sampler, std::string usage) +ManifestOp::ManifestOp(int32_t num_works, int32_t rows_per_buffer, std::string file, int32_t queue_size, bool decode, + const std::map &class_index, std::unique_ptr data_schema, + std::shared_ptr sampler, std::string usage) : ParallelOp(num_works, queue_size), rows_per_buffer_(rows_per_buffer), io_block_pushed_(0), @@ -71,8 +73,6 @@ ManifestOp::ManifestOp(int32_t num_works, int32_t rows_per_buffer, std::string f file_(file), class_index_(class_index), sampler_(std::move(sampler)), - num_samples_(num_samples), - num_rows_(0), decode_(decode), usage_(usage), buf_cnt_(0) { @@ -88,7 +88,7 @@ ManifestOp::ManifestOp(int32_t num_works, int32_t rows_per_buffer, std::string f Status ManifestOp::operator()() { RETURN_IF_NOT_OK(LaunchThreadsAndInitOp()); std::unique_ptr sampler_buffer; - RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer)); + RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); return AddIoBlock(&sampler_buffer); } @@ -101,8 +101,7 @@ Status ManifestOp::AddIoBlock(std::unique_ptr *sampler_buffer) { RETURN_IF_NOT_OK((*sampler_buffer)->PopRow(&sample_row)); std::shared_ptr sample_ids = sample_row[0]; for (auto itr = sample_ids->begin(); itr != sample_ids->end(); ++itr) { - if ((*itr) >= num_rows_) continue; // index out of bound, skipping - if (row_cnt_ >= num_samples_) break; // enough row read, break for loop + if ((*itr) >= num_rows_) continue; // index out of bound, skipping keys.push_back(*itr); row_cnt_++; if (row_cnt_ % rows_per_buffer_ == 0) { @@ -111,7 +110,7 @@ Status ManifestOp::AddIoBlock(std::unique_ptr *sampler_buffer) { keys.clear(); } } - RETURN_IF_NOT_OK(sampler_->GetNextBuffer(sampler_buffer)); + RETURN_IF_NOT_OK(sampler_->GetNextSample(sampler_buffer)); } if (keys.empty() == false) { RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add( @@ -132,7 +131,7 @@ Status ManifestOp::AddIoBlock(std::unique_ptr *sampler_buffer) { io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique(IOBlock::kDeIoBlockFlagEoe))); RETURN_IF_NOT_OK(wp_.Wait()); // Master thread goes to sleep after it has made all the IOBlocks wp_.Clear(); - RETURN_IF_NOT_OK(sampler_->GetNextBuffer(sampler_buffer)); + RETURN_IF_NOT_OK(sampler_->GetNextSample(sampler_buffer)); } } } @@ -183,7 +182,8 @@ Status ManifestOp::WorkerEntry(int32_t worker_id) { } // Load 1 TensorRow (image,label) using 1 ImageLabelPair. 1 function call produces 1 TensorTow in a DataBuffer -Status ManifestOp::LoadTensorRow(const std::pair> &data, TensorRow *trow) { +Status ManifestOp::LoadTensorRow(row_id_type row_id, const std::pair> &data, + TensorRow *trow) { std::shared_ptr image; std::shared_ptr label; std::vector label_index(data.second.size()); @@ -199,23 +199,7 @@ Status ManifestOp::LoadTensorRow(const std::paircolumn(1).type(), reinterpret_cast(&label_index[0]))); } - std::ifstream fs; - fs.open(data.first, std::ios::binary | std::ios::in); - if (!fs.is_open()) { - RETURN_STATUS_UNEXPECTED("Fail to open file: " + data.first); - } - - int64_t num_elements = fs.seekg(0, std::ios::end).tellg(); - (void)fs.seekg(0, std::ios::beg); - RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(), - TensorShape(std::vector(1, num_elements)), - data_schema_->column(0).type(), nullptr)); - (void)fs.read(reinterpret_cast(image->GetMutableBuffer()), num_elements); - if (fs.fail()) { - fs.close(); - RETURN_STATUS_UNEXPECTED("Fail to read file: " + data.first); - } - fs.close(); + RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data.first)); if (decode_ == true) { Status rc = Decode(image, &image); if (rc.IsError()) { @@ -223,7 +207,7 @@ Status ManifestOp::LoadTensorRow(const std::pair &keys, std::unique_ptr< std::unique_ptr deq = std::make_unique(); for (const auto &key : keys) { TensorRow trow; - RETURN_IF_NOT_OK(LoadTensorRow(image_labelname_[static_cast(key)], &trow)); + RETURN_IF_NOT_OK(LoadTensorRow(key, image_labelname_[static_cast(key)], &trow)); deq->push_back(std::move(trow)); } (*db)->set_tensor_table(std::move(deq)); @@ -257,7 +241,7 @@ void ManifestOp::Print(std::ostream &out, bool show_all) const { // Reset Sampler and wakeup Master thread (functor) Status ManifestOp::Reset() { - RETURN_IF_NOT_OK(sampler_->Reset()); + RETURN_IF_NOT_OK(sampler_->ResetSampler()); row_cnt_ = 0; wp_.Set(); // wake up master thread after reset is done return Status::OK(); @@ -269,28 +253,6 @@ Status ManifestOp::InitSampler() { return Status::OK(); } -// Derived from RandomAccessOp -Status ManifestOp::GetNumSamples(int64_t *num) const { - if (num == nullptr || num_rows_ == 0) { - RETURN_STATUS_UNEXPECTED( - "There is no valid data matching the dataset API ManifestDataset.Please check file path or dataset API " - "validation first."); - } - (*num) = num_samples_; - return Status::OK(); -} - -// Derived from RandomAccessOp -Status ManifestOp::GetNumRowsInDataset(int64_t *num) const { - if (num == nullptr || num_rows_ == 0) { - RETURN_STATUS_UNEXPECTED( - "There is no valid data matching the dataset API ManifestDataset.Please check file path or dataset API " - "validation first."); - } - (*num) = num_rows_; - return Status::OK(); -} - // Derived from RandomAccessOp Status ManifestOp::GetClassIds(std::map> *cls_ids) const { if (cls_ids == nullptr || !cls_ids->empty() || image_labelname_.empty()) { @@ -408,7 +370,6 @@ Status ManifestOp::CountDatasetInfo() { } num_rows_ = static_cast(image_labelname_.size()); - num_samples_ = (num_samples_ == 0 || num_samples_ > num_rows_) ? num_rows_ : num_samples_; if (num_rows_ == 0) { RETURN_STATUS_UNEXPECTED( "There is no valid data matching the dataset API ManifestDataset.Please check file path or dataset API " @@ -417,8 +378,8 @@ Status ManifestOp::CountDatasetInfo() { return Status::OK(); } -Status ManifestOp::CountTotalRows(const std::string &file, int64_t numSamples, const py::dict &dict, - const std::string &usage, int64_t *count, int64_t *numClasses) { +Status ManifestOp::CountTotalRows(const std::string &file, const py::dict &dict, const std::string &usage, + int64_t *count, int64_t *numClasses) { // the logic of counting the number of samples is copied from ParseManifestFile() std::map map; for (auto p : dict) { @@ -428,17 +389,15 @@ Status ManifestOp::CountTotalRows(const std::string &file, int64_t numSamples, c std::shared_ptr op; *count = 0; - RETURN_IF_NOT_OK( - Builder().SetManifestFile(file).SetNumSamples(numSamples).SetClassIndex(map).SetUsage(usage).Build(&op)); + RETURN_IF_NOT_OK(Builder().SetManifestFile(file).SetClassIndex(map).SetUsage(usage).Build(&op)); RETURN_IF_NOT_OK(op->ParseManifestFile()); *numClasses = static_cast(op->label_index_.size()); *count = static_cast(op->image_labelname_.size()); - *count = (*count < numSamples || numSamples == 0) ? *count : numSamples; return Status::OK(); } -Status ManifestOp::GetClassIndexing(const std::string &file, int64_t numSamples, const py::dict &dict, - const std::string &usage, std::map *output_class_indexing) { +Status ManifestOp::GetClassIndexing(const std::string &file, const py::dict &dict, const std::string &usage, + std::map *output_class_indexing) { std::map input_class_indexing; for (auto p : dict) { (void)input_class_indexing.insert(std::pair(py::reinterpret_borrow(p.first), @@ -449,12 +408,7 @@ Status ManifestOp::GetClassIndexing(const std::string &file, int64_t numSamples, *output_class_indexing = input_class_indexing; } else { std::shared_ptr op; - RETURN_IF_NOT_OK(Builder() - .SetManifestFile(file) - .SetNumSamples(numSamples) - .SetClassIndex(input_class_indexing) - .SetUsage(usage) - .Build(&op)); + RETURN_IF_NOT_OK(Builder().SetManifestFile(file).SetClassIndex(input_class_indexing).SetUsage(usage).Build(&op)); RETURN_IF_NOT_OK(op->ParseManifestFile()); RETURN_IF_NOT_OK(op->CountDatasetInfo()); uint32_t count = 0; diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.h b/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.h index e015496acc..edfdbb51ae 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.h @@ -86,14 +86,6 @@ class ManifestOp : public ParallelOp, public RandomAccessOp { return *this; } - // Setter method - // @param int64_t num_samples - // @return Builder setter method returns reference to the builder. - Builder &SetNumSamples(int64_t num_samples) { - builder_num_samples_ = num_samples; - return *this; - } - // Setter method // @param std::shared_ptr sampler // @return Builder setter method returns reference to the builder. @@ -129,7 +121,6 @@ class ManifestOp : public ParallelOp, public RandomAccessOp { private: std::shared_ptr builder_sampler_; - int64_t builder_num_samples_; bool builder_decode_; std::string builder_file_; @@ -147,8 +138,8 @@ class ManifestOp : public ParallelOp, public RandomAccessOp { // @param std::string - file list of Manifest // @param int32_t queue_size - connector queue size // @param td::unique_ptr sampler - sampler tells ImageFolderOp what to read - ManifestOp(int32_t num_works, int32_t rows_per_buffer, std::string file, int32_t queue_size, int64_t num_samples, - bool decode, const std::map &class_index, std::unique_ptr data_schema, + ManifestOp(int32_t num_works, int32_t rows_per_buffer, std::string file, int32_t queue_size, bool decode, + const std::map &class_index, std::unique_ptr data_schema, std::shared_ptr sampler, std::string usage); // Destructor. ~ManifestOp() = default; @@ -164,16 +155,6 @@ class ManifestOp : public ParallelOp, public RandomAccessOp { // @return Status - The error code return Status operator()() override; - // Method derived from RandomAccess Op, enable Sampler to get numRows - // @param int64_t num - to return numRows - // @return Status - The error code return - Status GetNumSamples(int64_t *num) const override; - - // Method derived from RandomAccess Op, enable Sampler to get total number of Rows in dataset - // @param int64_t num - to return numRows - // @return Status - The error code return - Status GetNumRowsInDataset(int64_t *num) const override; - // Method derived from RandomAccess Op, enable Sampler to get all ids for each class // @param (std::map> * map - key label, val all ids for this class // @return Status - The error code return @@ -184,12 +165,16 @@ class ManifestOp : public ParallelOp, public RandomAccessOp { // @param show_all void Print(std::ostream &out, bool show_all) const override; - static Status CountTotalRows(const std::string &file, int64_t numSamples, const py::dict &dict, - const std::string &usage, int64_t *count, int64_t *numClasses); + static Status CountTotalRows(const std::string &file, const py::dict &dict, const std::string &usage, int64_t *count, + int64_t *numClasses); // Get str-to-int mapping from label name to index - static Status GetClassIndexing(const std::string &file, int64_t numSamples, const py::dict &dict, - const std::string &usage, std::map *output_class_indexing); + static Status GetClassIndexing(const std::string &file, const py::dict &dict, const std::string &usage, + std::map *output_class_indexing); + + // Op name getter + // @return Name of the current Op + std::string Name() const override { return "ManifestOp"; } private: // Initialize Sampler, calls sampler->Init() within @@ -202,10 +187,12 @@ class ManifestOp : public ParallelOp, public RandomAccessOp { Status AddIoBlock(std::unique_ptr *sampler_buffer); // Load a tensor row according to a pair + // @param row_id_type row_id - id for this tensor row // @param std::pair> - > // @param TensorRow row - image & label read into this tensor row // @return Status - The error code return - Status LoadTensorRow(const std::pair> &data, TensorRow *row); + Status LoadTensorRow(row_id_type row_id, const std::pair> &data, + TensorRow *row); // @param const std::vector &keys - keys in ioblock // @param std::unique_ptr db @@ -240,8 +227,6 @@ class ManifestOp : public ParallelOp, public RandomAccessOp { std::string file_; // file that store the information of images std::map class_index_; std::shared_ptr sampler_; - int64_t num_samples_; - int64_t num_rows_; bool decode_; std::string usage_; int64_t buf_cnt_; diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc index 358dd07872..0f762386af 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc @@ -44,7 +44,7 @@ using mindrecord::ShardReader; MindRecordOp::Builder::Builder() : build_dataset_file_({}) { // Some arguments to the MindRecordOp constructor have a default argument that is taken // from the client config. - // The user may choose to change these values for the construction of the StorageOp by + // The user may choose to change these values for the construction of the MindRecordOp by // using the various builder set methods. std::shared_ptr cfg = GlobalContext::config_manager(); @@ -53,6 +53,8 @@ MindRecordOp::Builder::Builder() : build_dataset_file_({}) { build_op_connector_queue_size_ = cfg->op_connector_size(); build_block_reader_ = false; builder_num_workers_ = 0; + build_num_padded_ = 0; + build_sample_ = nullptr; } // The builder "build" method creates the final object. @@ -63,24 +65,57 @@ Status MindRecordOp::Builder::Build(std::shared_ptr *ptr) { return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Building a MindRecordOp that has not provided a file."); } - + mindrecord::json sample_json; + if (build_num_padded_ > 0) { + sample_json = ToJson(build_sample_); + } new_mind_record_op = std::make_shared( build_num_mind_record_workers_, build_rows_per_buffer_, build_dataset_file_, build_load_dataset_, - build_op_connector_queue_size_, build_columns_to_load_, build_operators_, build_block_reader_); + build_op_connector_queue_size_, build_columns_to_load_, build_operators_, build_block_reader_, build_num_padded_, + sample_json, build_sample_bytes_); RETURN_IF_NOT_OK(new_mind_record_op->Init()); - *ptr = std::move(new_mind_record_op); return Status::OK(); } Status MindRecordOp::Builder::SanityCheck() const { return Status::OK(); } +mindrecord::json MindRecordOp::Builder::ToJson(const py::handle &obj) { + if (obj.is_none()) { + return nullptr; + } + if (py::isinstance(obj)) { + return obj.cast(); + } + if (py::isinstance(obj)) { + return obj.cast(); + } + if (py::isinstance(obj)) { // also catch py::bytes + return obj.cast(); + } + if (py::isinstance(obj)) { + auto out = mindrecord::json::object(); + for (const py::handle &key : obj) { + if (py::isinstance(obj[key])) { + build_sample_bytes_[py::str(key).cast()] = obj[key].cast(); + } else { + out[py::str(key).cast()] = ToJson(obj[key]); + } + } + return out; + } + MS_LOG(ERROR) << "Python object convert to json failed, object is: " << py::cast(obj); + return mindrecord::json(); +} + // Constructor of the MindRecordOp. MindRecordOp::MindRecordOp(int32_t num_mind_record_workers, int32_t rows_per_buffer, std::vector dataset_file, bool load_dataset, int32_t op_connector_queue_size, const std::vector &columns_to_load, - const std::vector> &operators, const bool &block_reader) + const std::vector> &operators, const bool &block_reader, + int64_t num_padded, const mindrecord::json &sample_json, + const std::map &sample_bytes) : ParallelOp(num_mind_record_workers, op_connector_queue_size), rows_per_buffer_(rows_per_buffer), dataset_file_(dataset_file), @@ -89,11 +124,14 @@ MindRecordOp::MindRecordOp(int32_t num_mind_record_workers, int32_t rows_per_buf operators_(operators), num_mind_record_workers_(num_mind_record_workers), block_reader_(block_reader), + num_rows_(0), buffers_needed_(0), buf_cnt_(0), - num_rows_(0), ended_worker_(0), - buffer_water_mark_(0) { + buffer_water_mark_(0), + num_padded_(num_padded), + sample_json_(sample_json), + sample_bytes_(sample_bytes) { io_blk_queues_.Init(num_workers_, op_connector_queue_size); if (!block_reader_) return; for (int32_t i = 0; i < num_workers_; ++i) { @@ -105,7 +143,7 @@ MindRecordOp::MindRecordOp(int32_t num_mind_record_workers, int32_t rows_per_buf Status MindRecordOp::Init() { shard_reader_ = std::make_unique(); auto rc = shard_reader_->Open(dataset_file_, load_dataset_, num_mind_record_workers_, columns_to_load_, operators_, - block_reader_); + block_reader_, num_padded_); CHECK_FAIL_RETURN_UNEXPECTED(rc == MSRStatus::SUCCESS, "MindRecordOp init failed. Error message: " + ErrnoToMessage(rc)); @@ -162,10 +200,6 @@ Status MindRecordOp::Init() { column_name_id_map_[columns_to_load_[i]] = i; } - num_rows_ = shard_reader_->GetNumRows(); - // Compute how many buffers we would need to accomplish rowsPerBuffer - buffers_needed_ = (num_rows_ + rows_per_buffer_ - 1) / rows_per_buffer_; - return Status::OK(); } @@ -262,20 +296,30 @@ Status MindRecordOp::GetBufferFromReader(std::unique_ptr *fetched_bu std::unique_ptr tensor_table = std::make_unique(); for (int32_t i = 0; i < rows_per_buffer_; ++i) { ShardTuple tupled_buffer; + mindrecord::TaskType task_type = mindrecord::TaskType::kCommonTask; if (block_reader_) { if (i >= block_buffer_[buffer_id % num_workers_]->size()) break; tupled_buffer = block_buffer_[buffer_id % num_workers_]->at(i); } else { int32_t row_id = buffer_id * rows_per_buffer_ + i; - tupled_buffer = shard_reader_->GetNextById(row_id, worker_id); + auto rc = shard_reader_->GetNextById(row_id, worker_id); + task_type = rc.first; + tupled_buffer = rc.second; + if (task_type == mindrecord::TaskType::kPaddedTask) { + TensorRow tensor_row; + RETURN_IF_NOT_OK(LoadTensorRow(&tensor_row, {}, mindrecord::json(), task_type)); + tensor_table->push_back(std::move(tensor_row)); + } if (tupled_buffer.empty()) break; } - for (const auto &tupled_row : tupled_buffer) { - std::vector columns_blob = std::get<0>(tupled_row); - mindrecord::json columns_json = std::get<1>(tupled_row); - TensorRow tensor_row; - RETURN_IF_NOT_OK(LoadTensorRow(&tensor_row, columns_blob, columns_json)); - tensor_table->push_back(std::move(tensor_row)); + if (task_type == mindrecord::TaskType::kCommonTask) { + for (const auto &tupled_row : tupled_buffer) { + std::vector columns_blob = std::get<0>(tupled_row); + mindrecord::json columns_json = std::get<1>(tupled_row); + TensorRow tensor_row; + RETURN_IF_NOT_OK(LoadTensorRow(&tensor_row, columns_blob, columns_json, task_type)); + tensor_table->push_back(std::move(tensor_row)); + } } } @@ -285,7 +329,7 @@ Status MindRecordOp::GetBufferFromReader(std::unique_ptr *fetched_bu } Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vector &columns_blob, - const mindrecord::json &columns_json) { + const mindrecord::json &columns_json, const mindrecord::TaskType task_type) { for (uint32_t i_col = 0; i_col < columns_to_load_.size(); i_col++) { auto column_name = columns_to_load_[i_col]; @@ -298,11 +342,39 @@ Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vector column_shape; // Get column data - auto has_column = shard_reader_->GetShardColumn()->GetColumnValueByName( - column_name, columns_blob, columns_json, &data, &data_ptr, &n_bytes, &column_data_type, &column_data_type_size, - &column_shape); - if (has_column == MSRStatus::FAILED) { - RETURN_STATUS_UNEXPECTED("Failed to retrieve data from mindrecord reader."); + auto shard_column = shard_reader_->GetShardColumn(); + if (num_padded_ > 0 && task_type == mindrecord::TaskType::kPaddedTask) { + auto rc = + shard_column->GetColumnTypeByName(column_name, &column_data_type, &column_data_type_size, &column_shape); + if (rc.first != MSRStatus::SUCCESS) { + RETURN_STATUS_UNEXPECTED("Failed to retrieve data type."); + } + if (rc.second == mindrecord::ColumnInRaw) { + auto has_column = shard_column->GetColumnFromJson(column_name, sample_json_, &data_ptr, &n_bytes); + if (has_column == MSRStatus::FAILED) { + RETURN_STATUS_UNEXPECTED("Failed to retrieve raw data from padding sample."); + } + } else if (rc.second == mindrecord::ColumnInBlob) { + if (sample_bytes_.find(column_name) == sample_bytes_.end()) { + RETURN_STATUS_UNEXPECTED("Failed to retrieve blob data from padding sample."); + } + std::string ss(sample_bytes_[column_name]); + n_bytes = ss.size(); + data_ptr = std::make_unique(n_bytes); + std::copy(ss.begin(), ss.end(), data_ptr.get()); + } else { + RETURN_STATUS_UNEXPECTED("Retrieved data type is unknown."); + } + if (data == nullptr) { + data = reinterpret_cast(data_ptr.get()); + } + } else { + auto has_column = + shard_column->GetColumnValueByName(column_name, columns_blob, columns_json, &data, &data_ptr, &n_bytes, + &column_data_type, &column_data_type_size, &column_shape); + if (has_column == MSRStatus::FAILED) { + RETURN_STATUS_UNEXPECTED("Failed to retrieve data from mindrecord reader."); + } } std::shared_ptr tensor; @@ -335,7 +407,8 @@ Status MindRecordOp::FetchBlockBuffer(const int32_t &buffer_id) { } for (int32_t i = 0; i < rows_per_buffer_; i++) { // Block reader does NOT care about argument - ShardTuple tuple_buffer = shard_reader_->GetNextById(i, i); + auto rc = shard_reader_->GetNextById(i, i); + ShardTuple tuple_buffer = rc.second; if (tuple_buffer.empty()) break; block_buffer_[buffer_id % num_workers_]->push_back(std::move(tuple_buffer)); } @@ -349,11 +422,8 @@ Status MindRecordOp::FetchBlockBuffer(const int32_t &buffer_id) { Status MindRecordOp::operator()() { RETURN_IF_NOT_OK(LaunchThreadAndInitOp()); num_rows_ = shard_reader_->GetNumRows(); - - buffers_needed_ = num_rows_ / rows_per_buffer_; - if (num_rows_ % rows_per_buffer_ != 0) { - buffers_needed_++; - } + // Compute how many buffers we would need to accomplish rowsPerBuffer + buffers_needed_ = (num_rows_ + rows_per_buffer_ - 1) / rows_per_buffer_; while (true) { // each iterator is 1 epoch for (int32_t i = 0; i < buffers_needed_; ++i) { @@ -418,9 +488,9 @@ Status MindRecordOp::LaunchThreadAndInitOp() { } Status MindRecordOp::CountTotalRows(const std::vector dataset_path, bool load_dataset, - const std::shared_ptr &op, int64_t *count) { + const std::shared_ptr &op, int64_t *count, int64_t num_padded) { std::unique_ptr shard_reader = std::make_unique(); - MSRStatus rc = shard_reader->CountTotalRows(dataset_path, load_dataset, op, count); + MSRStatus rc = shard_reader->CountTotalRows(dataset_path, load_dataset, op, count, num_padded); if (rc == MSRStatus::FAILED) { RETURN_STATUS_UNEXPECTED("MindRecordOp count total rows failed."); } diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.h b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.h index 251b4f9130..b704240aaa 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.h @@ -104,10 +104,22 @@ class MindRecordOp : public ParallelOp { return *this; } + Builder &SetNumToPadSamples(int64_t num_padded) { + build_num_padded_ = num_padded; + return *this; + } + + Builder &SetPaddedSample(const py::handle &sample) { + build_sample_ = sample; + return *this; + } + Status SanityCheck() const; static int32_t num_mind_record_workers() { return kDefaultMindRecordWorkers; } + mindrecord::json ToJson(const py::handle &obj); + private: static constexpr int32_t kDefaultMindRecordWorkers = 4; // The builder saves all MindRecordOp construction arguments internally. @@ -121,6 +133,9 @@ class MindRecordOp : public ParallelOp { std::vector build_columns_to_load_; std::vector> build_operators_; bool build_block_reader_; + int64_t build_num_padded_; + py::handle build_sample_; + std::map build_sample_bytes_; }; // Constructor of the MindRecordOp. @@ -133,7 +148,9 @@ class MindRecordOp : public ParallelOp { // @param operators - ShardOperators for Shuffle, Category, Sample MindRecordOp(int32_t num_mind_record_workers, int32_t rows_per_buffer, std::vector dataset_file, bool load_dataset, int32_t op_connector_queue_size, const std::vector &columns_to_load, - const std::vector> &operators, const bool &block_reader); + const std::vector> &operators, const bool &block_reader, + int64_t num_padded_, const mindrecord::json &sample_json, + const std::map &sample_bytes_); // Destructor ~MindRecordOp() override; @@ -178,7 +195,7 @@ class MindRecordOp : public ParallelOp { int32_t num_rows() const { return num_rows_; } static Status CountTotalRows(const std::vector dataset_path, bool load_dataset, - const std::shared_ptr &op, int64_t *count); + const std::shared_ptr &op, int64_t *count, int64_t num_padded); // Getter method int32_t rows_per_buffer() const { return rows_per_buffer_; } @@ -201,6 +218,10 @@ class MindRecordOp : public ParallelOp { // @return - Status of the node visit. Status Accept(NodePass *p, bool *modified) override; + // Op name getter + // @return Name of the current Op + std::string Name() const override { return "MindRecordOp"; } + private: Status GetBufferFromReader(std::unique_ptr *fetched_buffer, int64_t buffer_id, int32_t worker_id); @@ -209,7 +230,7 @@ class MindRecordOp : public ParallelOp { // @param columns_blob - the blob data received from the reader // @param columns_json - the data for fields received from the reader Status LoadTensorRow(TensorRow *tensor_row, const std::vector &columns_blob, - const mindrecord::json &columns_json); + const mindrecord::json &columns_json, const mindrecord::TaskType task_type); Status FetchBlockBuffer(const int32_t &buffer_id); @@ -226,6 +247,10 @@ class MindRecordOp : public ParallelOp { std::atomic ended_worker_; std::atomic buffer_water_mark_; + int64_t num_padded_; + mindrecord::json sample_json_; + std::map sample_bytes_; + std::unique_ptr data_schema_; // Data schema for column typing std::vector columns_blob_; // Blob Columns to load from dataset std::vector columns_blob_index_; // Blob Columns to load from dataset diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc index 53c32b1904..eacd9daf75 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc @@ -31,7 +31,7 @@ const int32_t kMnistLabelFileMagicNumber = 2049; const int32_t kMnistImageRows = 28; const int32_t kMnistImageCols = 28; -MnistOp::Builder::Builder() : builder_num_samples_(0), builder_sampler_(nullptr) { +MnistOp::Builder::Builder() : builder_sampler_(nullptr) { std::shared_ptr cfg = GlobalContext::config_manager(); builder_num_workers_ = cfg->num_parallel_workers(); builder_rows_per_buffer_ = cfg->rows_per_buffer(); @@ -41,7 +41,9 @@ MnistOp::Builder::Builder() : builder_num_samples_(0), builder_sampler_(nullptr) Status MnistOp::Builder::Build(std::shared_ptr *ptr) { RETURN_IF_NOT_OK(SanityCheck()); if (builder_sampler_ == nullptr) { - builder_sampler_ = std::make_shared(); + const int64_t num_samples = 0; + const int64_t start_index = 0; + builder_sampler_ = std::make_shared(start_index, num_samples); } builder_schema_ = std::make_unique(); RETURN_IF_NOT_OK( @@ -49,9 +51,8 @@ Status MnistOp::Builder::Build(std::shared_ptr *ptr) { TensorShape scalar = TensorShape::CreateScalar(); RETURN_IF_NOT_OK(builder_schema_->AddColumn( ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &scalar))); - *ptr = - std::make_shared(builder_num_workers_, builder_rows_per_buffer_, builder_dir_, builder_op_connector_size_, - builder_num_samples_, std::move(builder_schema_), std::move(builder_sampler_)); + *ptr = std::make_shared(builder_num_workers_, builder_rows_per_buffer_, builder_dir_, + builder_op_connector_size_, std::move(builder_schema_), std::move(builder_sampler_)); return Status::OK(); } @@ -60,17 +61,14 @@ Status MnistOp::Builder::SanityCheck() { std::string err_msg; err_msg += dir.IsDirectory() == false ? "MNIST path is invalid or not set\n" : ""; err_msg += builder_num_workers_ <= 0 ? "Number of parallel workers is set to 0 or negative\n" : ""; - err_msg += builder_num_samples_ < 0 ? "Number of samples is set to negative\n" : ""; return err_msg.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err_msg); } MnistOp::MnistOp(int32_t num_workers, int32_t rows_per_buffer, std::string folder_path, int32_t queue_size, - int64_t num_samples, std::unique_ptr data_schema, std::shared_ptr sampler) + std::unique_ptr data_schema, std::shared_ptr sampler) : ParallelOp(num_workers, queue_size), buf_cnt_(0), row_cnt_(0), - num_rows_(0), - num_samples_(num_samples), folder_path_(folder_path), rows_per_buffer_(rows_per_buffer), sampler_(std::move(sampler)), @@ -84,8 +82,7 @@ MnistOp::MnistOp(int32_t num_workers, int32_t rows_per_buffer, std::string folde Status MnistOp::TraversalSampleIds(const std::shared_ptr &sample_ids, std::vector *keys) { for (auto itr = sample_ids->begin(); itr != sample_ids->end(); ++itr) { - if ((*itr) >= num_rows_) continue; // index out of bound, skipping - if (row_cnt_ >= num_samples_) break; // enough row read, break for loop + if ((*itr) >= num_rows_) continue; // index out of bound, skipping keys->push_back(*itr); row_cnt_++; if (row_cnt_ % rows_per_buffer_ == 0) { @@ -101,7 +98,7 @@ Status MnistOp::TraversalSampleIds(const std::shared_ptr &sample_ids, st Status MnistOp::operator()() { RETURN_IF_NOT_OK(LaunchThreadsAndInitOp()); std::unique_ptr sampler_buffer; - RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer)); + RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); while (true) { // each iterator is 1 epoch std::vector keys; keys.reserve(rows_per_buffer_); @@ -112,7 +109,7 @@ Status MnistOp::operator()() { RETURN_STATUS_UNEXPECTED("Sampler Tensor isn't UINT64"); } RETURN_IF_NOT_OK(TraversalSampleIds(sample_ids, &keys)); - RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer)); + RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); } if (keys.empty() == false) { RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add( @@ -133,7 +130,7 @@ Status MnistOp::operator()() { io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique(IOBlock::kDeIoBlockFlagEoe))); RETURN_IF_NOT_OK(wp_.Wait()); // Master thread goes to sleep after it has made all the IOBlocks wp_.Clear(); - RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer)); + RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); } } } @@ -165,15 +162,15 @@ Status MnistOp::WorkerEntry(int32_t worker_id) { } // Load 1 TensorRow (image,label) using 1 MnistLabelPair. -Status MnistOp::LoadTensorRow(const MnistLabelPair &mnist_pair, TensorRow *trow) { +Status MnistOp::LoadTensorRow(row_id_type row_id, const MnistLabelPair &mnist_pair, TensorRow *trow) { std::shared_ptr image, label; int32_t l = mnist_pair.second; // make a copy of cached tensor RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(), mnist_pair.first->shape(), - mnist_pair.first->type(), mnist_pair.first->GetMutableBuffer())); + mnist_pair.first->type(), mnist_pair.first->GetBuffer())); RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(), data_schema_->column(1).shape(), data_schema_->column(1).type(), reinterpret_cast(&l))); - (*trow) = {std::move(image), std::move(label)}; + (*trow) = TensorRow(row_id, {std::move(image), std::move(label)}); return Status::OK(); } @@ -182,7 +179,7 @@ Status MnistOp::LoadBuffer(const std::vector &keys, std::unique_ptr deq = std::make_unique(); TensorRow trow; for (const int64_t &key : keys) { - RETURN_IF_NOT_OK(this->LoadTensorRow(image_label_pairs_[key], &trow)); + RETURN_IF_NOT_OK(this->LoadTensorRow(key, image_label_pairs_[key], &trow)); deq->push_back(std::move(trow)); } (*db)->set_tensor_table(std::move(deq)); @@ -207,7 +204,7 @@ void MnistOp::Print(std::ostream &out, bool show_all) const { // Reset Sampler and wakeup Master thread (functor) Status MnistOp::Reset() { - RETURN_IF_NOT_OK(sampler_->Reset()); + RETURN_IF_NOT_OK(sampler_->ResetSampler()); row_cnt_ = 0; wp_.Set(); // wake up master thread after reset is done return Status::OK(); @@ -219,17 +216,6 @@ Status MnistOp::InitSampler() { return Status::OK(); } -// Derived from RandomAccessOp -Status MnistOp::GetNumSamples(int64_t *num) const { - if (num == nullptr || num_rows_ == 0) { - RETURN_STATUS_UNEXPECTED( - "There is no valid data matching the dataset API MnistDataset.Please check file path or dataset API " - "validation first."); - } - (*num) = num_samples_; - return Status::OK(); -} - // Derived from RandomAccessOp Status MnistOp::GetClassIds(std::map> *cls_ids) const { if (cls_ids == nullptr || !cls_ids->empty() || image_label_pairs_.empty()) { @@ -364,7 +350,11 @@ Status MnistOp::ParseMnistData() { } image_label_pairs_.shrink_to_fit(); num_rows_ = image_label_pairs_.size(); - num_samples_ = (num_samples_ == 0 || num_samples_ > num_rows_) ? num_rows_ : num_samples_; + if (num_rows_ == 0) { + RETURN_STATUS_UNEXPECTED( + "There is no valid data matching the dataset API MnistDataset.Please check file path or dataset API " + "validation first."); + } return Status::OK(); } @@ -414,11 +404,11 @@ Status MnistOp::LaunchThreadsAndInitOp() { return Status::OK(); } -Status MnistOp::CountTotalRows(const std::string &dir, int64_t numSamples, int64_t *count) { +Status MnistOp::CountTotalRows(const std::string &dir, int64_t *count) { // the logic of counting the number of samples is copied from ParseMnistData() and uses CheckReader() std::shared_ptr op; *count = 0; - RETURN_IF_NOT_OK(Builder().SetDir(dir).SetNumSamples(numSamples).Build(&op)); + RETURN_IF_NOT_OK(Builder().SetDir(dir).Build(&op)); RETURN_IF_NOT_OK(op->WalkAllFiles()); @@ -440,19 +430,6 @@ Status MnistOp::CountTotalRows(const std::string &dir, int64_t numSamples, int64 label_reader.close(); } - *count = (numSamples == 0 || *count < numSamples) ? *count : numSamples; - - return Status::OK(); -} - -// Derived from RandomAccessOp -Status MnistOp::GetNumRowsInDataset(int64_t *num) const { - if (num == nullptr || num_rows_ == 0) { - RETURN_STATUS_UNEXPECTED( - "There is no valid data matching the dataset API MnistDataset.Please check file path or dataset API " - "validation first."); - } - (*num) = num_rows_; return Status::OK(); } } // namespace dataset diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.h b/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.h index 397a51710e..909ac22124 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.h @@ -78,14 +78,6 @@ class MnistOp : public ParallelOp, public RandomAccessOp { return *this; } - // Setter method - // @param int64_t num_samples - // @return Builder setter method returns reference to the builder. - Builder &SetNumSamples(int64_t num_samples) { - builder_num_samples_ = num_samples; - return *this; - } - // Setter method // @param std::shared_ptr sampler // @return Builder setter method returns reference to the builder. @@ -114,7 +106,6 @@ class MnistOp : public ParallelOp, public RandomAccessOp { private: std::string builder_dir_; int32_t builder_num_workers_; - int64_t builder_num_samples_; int32_t builder_rows_per_buffer_; int32_t builder_op_connector_size_; std::shared_ptr builder_sampler_; @@ -126,11 +117,10 @@ class MnistOp : public ParallelOp, public RandomAccessOp { // @param int32_t rows_per_buffer - number of images (rows) in each buffer // @param std::string folder_path - dir directory of mnist // @param int32_t queue_size - connector queue size - // @param int64_t num_samples - number of samples to read // @param std::unique_ptr data_schema - the schema of the mnist dataset // @param td::unique_ptr sampler - sampler tells MnistOp what to read MnistOp(int32_t num_workers, int32_t rows_per_buffer, std::string folder_path, int32_t queue_size, - int64_t num_samples, std::unique_ptr data_schema, std::shared_ptr sampler); + std::unique_ptr data_schema, std::shared_ptr sampler); // Destructor. ~MnistOp() = default; @@ -146,16 +136,6 @@ class MnistOp : public ParallelOp, public RandomAccessOp { // @return Status - The error code return Status operator()() override; - // Method derived from RandomAccess Op, enable Sampler to get numRows - // @param int64_t num - to return numRows - // @return Status - The error code return - Status GetNumSamples(int64_t *num) const override; - - // Method derived from RandomAccess Op, enable Sampler to get total numRows in dataset - // @param int64_t num - to return numRows - // @return Status - The error code return - Status GetNumRowsInDataset(int64_t *num) const override; - // Method derived from RandomAccess Op, enable Sampler to get all ids for each class // @param (std::map> * map - key label, val all ids for this class // @return Status - The error code return @@ -167,11 +147,14 @@ class MnistOp : public ParallelOp, public RandomAccessOp { void Print(std::ostream &out, bool show_all) const override; // Function to count the number of samples in the MNIST dataset - // @param dir path to the MNSIT directory - // @param numSamples maximum number of samples requested + // @param dir path to the MNIST directory // @param count output arg that will hold the minimum of the actual dataset size and numSamples // @return - static Status CountTotalRows(const std::string &dir, int64_t numSamples, int64_t *count); + static Status CountTotalRows(const std::string &dir, int64_t *count); + + // Op name getter + // @return Name of the current Op + std::string Name() const override { return "MnistOp"; } private: // Initialize Sampler, calls sampler->Init() within @@ -179,10 +162,11 @@ class MnistOp : public ParallelOp, public RandomAccessOp { Status InitSampler(); // Load a tensor row according to a pair + // @param row_id_type row_id - id for this tensor row // @param ImageLabelPair pair - // @param TensorRow row - image & label read into this tensor row // @return Status - The error code return - Status LoadTensorRow(const MnistLabelPair &mnist_pair, TensorRow *row); + Status LoadTensorRow(row_id_type row_id, const MnistLabelPair &mnist_pair, TensorRow *row); // @param const std::vector &keys - keys in ioblock // @param std::unique_ptr db @@ -244,9 +228,7 @@ class MnistOp : public ParallelOp, public RandomAccessOp { int64_t buf_cnt_; int64_t row_cnt_; - int64_t num_rows_; // total number of images in Mnist WaitPost wp_; - int64_t num_samples_; std::string folder_path_; // directory of image folder int32_t rows_per_buffer_; std::shared_ptr sampler_; diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.h b/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.h index 92d05d7318..48cfb0be51 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.h @@ -189,6 +189,10 @@ class RandomDataOp : public ParallelOp { */ int64_t GetTotalRows() const { return total_rows_; } + // Op name getter + // @return Name of the current Op + std::string Name() const override { return "RandomDataOp"; } + private: /** * The entry point code for when workers are launched diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/CMakeLists.txt b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/CMakeLists.txt index 152b887ef4..5209d9ba4a 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/CMakeLists.txt +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/CMakeLists.txt @@ -8,6 +8,5 @@ add_library(engine-datasetops-source-sampler OBJECT sampler.cc sequential_sampler.cc subset_random_sampler.cc - subset_sampler.cc weighted_random_sampler.cc ) diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.cc index d4e5a732db..226647df14 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.cc @@ -23,8 +23,9 @@ namespace mindspore { namespace dataset { -DistributedSampler::DistributedSampler(int64_t num_dev, int64_t dev_id, bool shuffle, uint32_t seed) - : Sampler(), +DistributedSampler::DistributedSampler(int64_t num_samples, int64_t num_dev, int64_t dev_id, bool shuffle, + uint32_t seed) + : Sampler(num_samples, std::numeric_limits::max()), cnt_(0), seed_(seed == std::numeric_limits::max() ? GetSeed() : seed), device_id_(dev_id), @@ -32,6 +33,11 @@ DistributedSampler::DistributedSampler(int64_t num_dev, int64_t dev_id, bool shu shuffle_(shuffle) {} Status DistributedSampler::InitSampler() { + // Special value of 0 for num_samples means that the user wants to sample the entire set of data. + // If the user asked to sample more rows than exists in the dataset, adjust the num_samples accordingly. + if (num_samples_ == 0 || num_samples_ > num_rows_) { + num_samples_ = num_rows_; + } CHECK_FAIL_RETURN_UNEXPECTED(num_samples_ > 0, "num_samples <= 0\n"); CHECK_FAIL_RETURN_UNEXPECTED(num_rows_ > 0, "num_rows <= 0\n"); CHECK_FAIL_RETURN_UNEXPECTED(device_id_ < num_devices_ && device_id_ >= 0 && num_rows_ > 0 && num_samples_ > 0, @@ -49,21 +55,21 @@ Status DistributedSampler::InitSampler() { return Status::OK(); } -Status DistributedSampler::GetNextBuffer(std::unique_ptr *out_buffer) { +Status DistributedSampler::GetNextSample(std::unique_ptr *out_buffer) { if (cnt_ > samples_per_buffer_) { RETURN_STATUS_UNEXPECTED("Distributed Sampler Error"); } else if (cnt_ == samples_per_buffer_) { (*out_buffer) = std::make_unique(0, DataBuffer::kDeBFlagEOE); } else { if (HasChildSampler()) { - RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&child_ids_)); + RETURN_IF_NOT_OK(child_[0]->GetNextSample(&child_ids_)); } (*out_buffer) = std::make_unique(cnt_, DataBuffer::kDeBFlagNone); std::shared_ptr sample_ids; RETURN_IF_NOT_OK(CreateSamplerTensor(&sample_ids, samples_per_buffer_)); - int64_t *id_ptr = reinterpret_cast(sample_ids->GetMutableBuffer()); - while (cnt_ < samples_per_buffer_) { + auto id_ptr = sample_ids->begin(); + while (cnt_ < samples_per_buffer_ && id_ptr != sample_ids->end()) { int64_t sampled_id = (num_devices_ * cnt_ + device_id_) % num_rows_; if (shuffle_) { sampled_id = shuffle_vec_[static_cast(sampled_id)]; @@ -83,7 +89,7 @@ Status DistributedSampler::GetNextBuffer(std::unique_ptr *out_buffer return Status::OK(); } -Status DistributedSampler::Reset() { +Status DistributedSampler::ResetSampler() { CHECK_FAIL_RETURN_UNEXPECTED(cnt_ == samples_per_buffer_, "ERROR Reset() called early/late"); cnt_ = 0; @@ -94,7 +100,7 @@ Status DistributedSampler::Reset() { } if (HasChildSampler()) { - RETURN_IF_NOT_OK(child_[0]->Reset()); + RETURN_IF_NOT_OK(child_[0]->ResetSampler()); } return Status::OK(); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.h b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.h index 29b5cda0da..7083580c6c 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.h @@ -27,10 +27,11 @@ namespace mindspore { namespace dataset { class DistributedSampler : public Sampler { public: - // @param int64_t numDev - // @param int64_t devId + // @param num_samples + // @param int64_t num_dev + // @param int64_t dev_id // @param bool shuffle - DistributedSampler(int64_t num_dev, int64_t dev_id, bool shuffle = true, + DistributedSampler(int64_t num_samples, int64_t num_dev, int64_t dev_id, bool shuffle, uint32_t seed = std::numeric_limits::max()); // default destructor @@ -39,14 +40,14 @@ class DistributedSampler : public Sampler { // @param std::unique_ptr * pBuffer // @param int32_t workerId // @return - The error code return - Status GetNextBuffer(std::unique_ptr *out_buffer) override; + Status GetNextSample(std::unique_ptr *out_buffer) override; // Init sampler, called by base class or python Status InitSampler() override; // for next epoch of sampleIds // @return - The error code return - Status Reset() override; + Status ResetSampler() override; void Print(std::ostream &out, bool show_all) const override; diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.cc index 72c2cc1874..92a880d599 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.cc @@ -20,12 +20,11 @@ namespace mindspore { namespace dataset { -PKSampler::PKSampler(int64_t val, bool shuffle, int64_t samples_per_buffer) - : Sampler(samples_per_buffer), +PKSampler::PKSampler(int64_t num_samples, int64_t val, bool shuffle, int64_t samples_per_buffer) + : Sampler(num_samples, samples_per_buffer), shuffle_(shuffle), seed_(GetSeed()), next_id_(0), - num_pk_samples_(0), samples_per_class_(val) {} Status PKSampler::InitSampler() { @@ -36,35 +35,46 @@ Status PKSampler::InitSampler() { } } rnd_.seed(seed_++); - num_pk_samples_ = samples_per_class_ * static_cast(labels_.size()); - samples_per_buffer_ = (samples_per_buffer_ > num_pk_samples_) ? num_pk_samples_ : samples_per_buffer_; - num_samples_ = num_pk_samples_; + + // The special handshake gives the list of classes and id's, but it did not set the num_rows_ to + // capture the total number of possible sample ids. + // Compute that here for this case to find the total number of samples that are available to return. + // (in this case, samples per class * total classes). + num_rows_ = samples_per_class_ * static_cast(labels_.size()); + + // The user may have chosen to sample less than the total amount. + // Special value of 0 for num_samples means that the user wants to sample the entire set of data. + // If the user asked to sample more rows than exists in the dataset, adjust the num_samples accordingly. + if (num_samples_ == 0 || num_samples_ > num_rows_) { + num_samples_ = num_rows_; + } + + samples_per_buffer_ = (samples_per_buffer_ > num_samples_) ? num_samples_ : samples_per_buffer_; if (shuffle_ == true) { std::shuffle(labels_.begin(), labels_.end(), rnd_); } else { std::sort(labels_.begin(), labels_.end()); } - CHECK_FAIL_RETURN_UNEXPECTED(num_pk_samples_ > 0, "num_class or K (num samples per class) is not positive"); + CHECK_FAIL_RETURN_UNEXPECTED(num_samples_ > 0, "num_class or K (num samples per class) is not positive"); return Status::OK(); } -Status PKSampler::GetNextBuffer(std::unique_ptr *out_buffer) { - if (next_id_ > num_pk_samples_ || num_pk_samples_ == 0) { +Status PKSampler::GetNextSample(std::unique_ptr *out_buffer) { + if (next_id_ > num_samples_ || num_samples_ == 0) { RETURN_STATUS_UNEXPECTED("Index out of bound in PKSampler"); - } else if (next_id_ == num_pk_samples_) { + } else if (next_id_ == num_samples_) { (*out_buffer) = std::make_unique(0, DataBuffer::kDeBFlagEOE); } else { if (HasChildSampler()) { - RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&child_ids_)); + RETURN_IF_NOT_OK(child_[0]->GetNextSample(&child_ids_)); } (*out_buffer) = std::make_unique(next_id_, DataBuffer::kDeBFlagNone); std::shared_ptr sample_ids; - int64_t last_id = - (samples_per_buffer_ + next_id_ > num_pk_samples_) ? num_pk_samples_ : samples_per_buffer_ + next_id_; + int64_t last_id = (samples_per_buffer_ + next_id_ > num_samples_) ? num_samples_ : samples_per_buffer_ + next_id_; RETURN_IF_NOT_OK(CreateSamplerTensor(&sample_ids, last_id - next_id_)); - int64_t *id_ptr = reinterpret_cast(sample_ids->GetMutableBuffer()); - while (next_id_ < last_id) { + auto id_ptr = sample_ids->begin(); + while (next_id_ < last_id && id_ptr != sample_ids->end()) { int64_t cls_id = next_id_++ / samples_per_class_; const std::vector &samples = label_to_ids_[labels_[cls_id]]; int64_t rnd_ind = std::uniform_int_distribution(0, samples.size() - 1)(rnd_); @@ -84,13 +94,13 @@ Status PKSampler::GetNextBuffer(std::unique_ptr *out_buffer) { return Status::OK(); } -Status PKSampler::Reset() { - CHECK_FAIL_RETURN_UNEXPECTED(next_id_ == num_pk_samples_, "ERROR Reset() called early/late"); +Status PKSampler::ResetSampler() { + CHECK_FAIL_RETURN_UNEXPECTED(next_id_ == num_samples_, "ERROR Reset() called early/late"); next_id_ = 0; rnd_.seed(seed_++); if (HasChildSampler()) { - RETURN_IF_NOT_OK(child_[0]->Reset()); + RETURN_IF_NOT_OK(child_[0]->ResetSampler()); } return Status::OK(); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.h b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.h index 14f598a9ce..7b1423326a 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.h @@ -28,10 +28,11 @@ namespace mindspore { namespace dataset { class PKSampler : public Sampler { // NOT YET FINISHED public: - // @param int64_t kVal + // @param num_samples - the number of samples to draw. value of 0 means to take the full amount + // @param int64_t val // @param bool shuffle - shuffle all classIds or not, if true, classes may be 5,1,4,3,2 // @param int64_t samplesPerBuffer - Num of Sampler Ids to fetch via 1 GetNextBuffer call - explicit PKSampler(int64_t val, bool shuffle = false, + explicit PKSampler(int64_t num_samples, int64_t val, bool shuffle, int64_t samples_per_buffer = std::numeric_limits::max()); // default destructor @@ -40,10 +41,11 @@ class PKSampler : public Sampler { // NOT YET FINISHED // @param std::unique_ptr *out_buffer) override; + Status GetNextSample(std::unique_ptr *out_buffer) override; - // first handshake between StorageOp and Sampler - // @param op - StorageOp pointer, pass in so Sampler can call GetNumSamples() and get ClassIds() + // first handshake between leaf source op and Sampler. This func will determine the amount of data + // in the dataset that we can sample from. + // @param op - leaf op pointer, pass in so Sampler can ask it about how much data there is // @return Status HandshakeRandomAccessOp(const RandomAccessOp *op) override; @@ -52,13 +54,12 @@ class PKSampler : public Sampler { // NOT YET FINISHED // for next epoch of sampleIds // @return - The error code return - Status Reset() override; + Status ResetSampler() override; private: bool shuffle_; uint32_t seed_; int64_t next_id_; - int64_t num_pk_samples_; int64_t samples_per_class_; std::mt19937 rnd_; std::vector labels_; diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/python_sampler.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/python_sampler.cc index ca999e31a5..af4aa20bb2 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/python_sampler.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/python_sampler.cc @@ -20,15 +20,15 @@ namespace mindspore { namespace dataset { -PythonSampler::PythonSampler(py::object py_sampler_instance, int64_t samples_per_buffer) - : Sampler(samples_per_buffer), py_sampler_instance(py_sampler_instance), need_to_reset_(false) {} +PythonSampler::PythonSampler(int64_t num_samples, py::object py_sampler_instance, int64_t samples_per_buffer) + : Sampler(num_samples, samples_per_buffer), py_sampler_instance(py_sampler_instance), need_to_reset_(false) {} -Status PythonSampler::GetNextBuffer(std::unique_ptr *out_buffer) { +Status PythonSampler::GetNextSample(std::unique_ptr *out_buffer) { if (need_to_reset_) { (*out_buffer) = std::make_unique(0, DataBuffer::kDeBFlagEOE); } else { if (HasChildSampler()) { - RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&child_ids_)); + RETURN_IF_NOT_OK(child_[0]->GetNextSample(&child_ids_)); } std::shared_ptr sample_ids; @@ -65,6 +65,11 @@ Status PythonSampler::GetNextBuffer(std::unique_ptr *out_buffer) { Status PythonSampler::InitSampler() { CHECK_FAIL_RETURN_UNEXPECTED(num_rows_ > 0, "ERROR num_rows_ should be greater than 0"); + // Special value of 0 for num_samples means that the user wants to sample the entire set of data. + // If the user asked to sample more rows than exists in the dataset, adjust the num_samples accordingly. + if (num_samples_ == 0 || num_samples_ > num_rows_) { + num_samples_ = num_rows_; + } { py::gil_scoped_acquire gil_acquire; if (Py_IsInitialized() == 0) { @@ -79,7 +84,7 @@ Status PythonSampler::InitSampler() { return Status::OK(); } -Status PythonSampler::Reset() { +Status PythonSampler::ResetSampler() { CHECK_FAIL_RETURN_UNEXPECTED(need_to_reset_, "ERROR Reset() called not at end of an epoch"); need_to_reset_ = false; py::gil_scoped_acquire gil_acquire; @@ -93,7 +98,7 @@ Status PythonSampler::Reset() { } if (HasChildSampler()) { - RETURN_IF_NOT_OK(child_[0]->Reset()); + RETURN_IF_NOT_OK(child_[0]->ResetSampler()); } return Status::OK(); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/python_sampler.h b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/python_sampler.h index b8734fee6a..49ff12878d 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/python_sampler.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/python_sampler.h @@ -26,8 +26,11 @@ namespace dataset { class PythonSampler : public Sampler { public: // Constructor - // @param int64_t samplesPerBuffer - Num of Sampler Ids to fetch via 1 GetNextBuffer call - explicit PythonSampler(py::object py_sampler_instance, + // @param num_samples - the number of samples to draw. Value of 0 means to sample all of the + // data from the dataset. + // @param py_sampler_instance - the python instance of the sampler + // @param int64_t samples_per_buffer - Num of Sampler Ids to fetch via 1 GetNextBuffer call + explicit PythonSampler(int64_t num_samples, py::object py_sampler_instance, int64_t samples_per_buffer = std::numeric_limits::max()); // Destructor. @@ -39,13 +42,13 @@ class PythonSampler : public Sampler { // for next epoch of sampleIds // @return - The error code return - Status Reset() override; + Status ResetSampler() override; // Op calls this to get next Buffer that contains all the sampleIds - // @param std::unique_ptr pBuffer - Buffer to be returned to StorageOp + // @param std::unique_ptr pBuffer - Buffer to be returned to corresponding Dataset Op // @param int32_t workerId - not meant to be used // @return - The error code return - Status GetNextBuffer(std::unique_ptr *out_buffer) override; + Status GetNextSample(std::unique_ptr *out_buffer) override; private: bool need_to_reset_; // Whether Reset() should be called before calling GetNextBuffer() diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc index 0de55e0fb4..b3dfaad7f7 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc @@ -22,31 +22,30 @@ namespace mindspore { namespace dataset { -RandomSampler::RandomSampler(bool replacement, bool reshuffle_each_epoch, int64_t num_samples, +RandomSampler::RandomSampler(int64_t num_samples, bool replacement, bool reshuffle_each_epoch, int64_t samples_per_buffer) - : Sampler(samples_per_buffer), + : Sampler(num_samples, samples_per_buffer), seed_(GetSeed()), replacement_(replacement), - user_num_samples_(num_samples), next_id_(0), reshuffle_each_epoch_(reshuffle_each_epoch), dist(nullptr) {} -Status RandomSampler::GetNextBuffer(std::unique_ptr *out_buffer) { +Status RandomSampler::GetNextSample(std::unique_ptr *out_buffer) { if (next_id_ > num_samples_) { RETURN_STATUS_UNEXPECTED("RandomSampler Internal Error"); } else if (next_id_ == num_samples_) { (*out_buffer) = std::make_unique(0, DataBuffer::kDeBFlagEOE); } else { if (HasChildSampler()) { - RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&child_ids_)); + RETURN_IF_NOT_OK(child_[0]->GetNextSample(&child_ids_)); } (*out_buffer) = std::make_unique(next_id_, DataBuffer::kDeBFlagNone); std::shared_ptr sampleIds; int64_t last_id = std::min(samples_per_buffer_ + next_id_, num_samples_); RETURN_IF_NOT_OK(CreateSamplerTensor(&sampleIds, last_id - next_id_)); - int64_t *id_ptr = reinterpret_cast(sampleIds->GetMutableBuffer()); + auto id_ptr = sampleIds->begin(); for (int64_t i = 0; i < (last_id - next_id_); i++) { int64_t sampled_id = 0; @@ -70,31 +69,29 @@ Status RandomSampler::GetNextBuffer(std::unique_ptr *out_buffer) { } Status RandomSampler::InitSampler() { - CHECK_FAIL_RETURN_UNEXPECTED(num_rows_ > 0, "num_rows needs to be positive."); - + // Special value of 0 for num_samples means that the user wants to sample the entire set of data. + // If the user asked to sample more rows than exists in the dataset, adjust the num_samples accordingly. + if (num_samples_ == 0 || num_samples_ > num_rows_) { + num_samples_ = num_rows_; + } + CHECK_FAIL_RETURN_UNEXPECTED(num_samples_ > 0 && num_rows_ > 0, "both num_samples & num_rows need to be positive"); + samples_per_buffer_ = samples_per_buffer_ > num_samples_ ? num_samples_ : samples_per_buffer_; rnd_.seed(seed_); if (replacement_ == false) { - num_samples_ = std::min(num_samples_, num_rows_); - num_samples_ = std::min(num_samples_, user_num_samples_); - shuffled_ids_.reserve(num_rows_); for (int64_t i = 0; i < num_rows_; i++) { shuffled_ids_.push_back(i); } std::shuffle(shuffled_ids_.begin(), shuffled_ids_.end(), rnd_); } else { - num_samples_ = std::min(num_samples_, user_num_samples_); dist = std::make_unique>(0, num_rows_ - 1); } - CHECK_FAIL_RETURN_UNEXPECTED(num_samples_ > 0, "num_samples needs to be positive."); - samples_per_buffer_ = samples_per_buffer_ > num_samples_ ? num_samples_ : samples_per_buffer_; - return Status::OK(); } -Status RandomSampler::Reset() { +Status RandomSampler::ResetSampler() { CHECK_FAIL_RETURN_UNEXPECTED(next_id_ == num_samples_, "ERROR Reset() called early/late"); next_id_ = 0; @@ -109,7 +106,7 @@ Status RandomSampler::Reset() { } if (HasChildSampler()) { - RETURN_IF_NOT_OK(child_[0]->Reset()); + RETURN_IF_NOT_OK(child_[0]->ResetSampler()); } return Status::OK(); @@ -119,7 +116,6 @@ void RandomSampler::Print(std::ostream &out, bool show_all) const { out << "(sampler): RandomSampler\n"; if (show_all) { - out << "user_num_samples_: " << user_num_samples_ << '\n'; out << "num_samples_: " << num_samples_ << '\n'; out << "next_id_: " << next_id_ << '\n'; } diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.h b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.h index 352751dbb8..b1c54eb98c 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.h @@ -27,11 +27,11 @@ namespace dataset { class RandomSampler : public Sampler { public: // Constructor + // @param int64_t num_samples - number samples to draw // @param bool replacement - put he id back / or not after a sample - // @param int64_t numSamples - number samples to draw - // @param int64_t samplesPerBuffer - Num of Sampler Ids to fetch via 1 GetNextBuffer call - explicit RandomSampler(bool replacement = false, bool reshuffle_each_epoch = true, - int64_t num_samples = std::numeric_limits::max(), + // @param reshuffle_each_epoch - T/F to reshuffle after epoch + // @param int64_t samples_per_buffer - Num of Sampler Ids to fetch via 1 GetNextBuffer call + explicit RandomSampler(int64_t num_samples, bool replacement, bool reshuffle_each_epoch, int64_t samples_per_buffer = std::numeric_limits::max()); // Destructor. @@ -41,21 +41,20 @@ class RandomSampler : public Sampler { // @param std::unique_ptr pBuffer - Buffer to be returned to StorageOp // @param int32_t workerId - not meant to be used // @return - The error code return - Status GetNextBuffer(std::unique_ptr *out_buffer) override; + Status GetNextSample(std::unique_ptr *out_buffer) override; // meant to be called by base class or python Status InitSampler() override; // for next epoch of sampleIds // @return - The error code return - Status Reset() override; + Status ResetSampler() override; virtual void Print(std::ostream &out, bool show_all) const; private: uint32_t seed_; bool replacement_; - int64_t user_num_samples_; std::vector shuffled_ids_; // only used for NO REPLACEMENT int64_t next_id_; std::mt19937 rnd_; diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc index 600d8c576b..3f737c167c 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc @@ -19,8 +19,21 @@ namespace mindspore { namespace dataset { -Sampler::Sampler(int64_t samples_per_buffer) - : DatasetOp(0), num_rows_(0), num_samples_(0), samples_per_buffer_(samples_per_buffer), col_desc_(nullptr) {} +Status RandomAccessOp::GetNumRowsInDataset(int64_t *num) const { + // The sampler base class itself does not compute it's own num_rows_ value. + // Instead, this value is computed by the derived leaf op during it's own initialization + // after it has interacted with it's storage layers. + // Here, it is just a getter method to return the value. However, it is invalid if there is + // not a value set for this count, so generate a failure if that is the case. + if (num == nullptr || num_rows_ == 0) { + RETURN_STATUS_UNEXPECTED("RandomAccessOp has not computed it's num rows yet."); + } + (*num) = num_rows_; + return Status::OK(); +} + +Sampler::Sampler(int64_t num_samples, int64_t samples_per_buffer) + : num_rows_(0), num_samples_(num_samples), samples_per_buffer_(samples_per_buffer), col_desc_(nullptr) {} Status Sampler::HandshakeRandomAccessOp(const RandomAccessOp *op) { std::shared_ptr child_sampler; @@ -36,10 +49,10 @@ Status Sampler::HandshakeRandomAccessOp(const RandomAccessOp *op) { } CHECK_FAIL_RETURN_UNEXPECTED(op != nullptr, "RandomAccessOp is nullptr\n"); - RETURN_IF_NOT_OK(op->GetNumSamples(&num_samples_)); + + // If there's a child sampler, set the row count to be it's sample count if (HasChildSampler()) { - int64_t child_num_samples = child_sampler->num_samples(); - num_rows_ = child_num_samples; + num_rows_ = child_sampler->num_samples_; } else { RETURN_IF_NOT_OK(op->GetNumRowsInDataset(&num_rows_)); } @@ -80,7 +93,7 @@ Status Sampler::GetAllIdsThenReset(py::array *data) { std::shared_ptr sample_ids; // A call to derived class to get sample ids wrapped inside a buffer - RETURN_IF_NOT_OK(GetNextBuffer(&db)); + RETURN_IF_NOT_OK(GetNextSample(&db)); // Get the only tensor inside the buffer that contains the actual SampleIds for the entire epoch RETURN_IF_NOT_OK(db->GetTensor(&sample_ids, 0, 0)); // check this buffer is not a ctrl buffer @@ -97,15 +110,15 @@ Status Sampler::GetAllIdsThenReset(py::array *data) { } } // perform error checking! Next buffer supposed to be EOE since last one already contains all ids for current epoch - RETURN_IF_NOT_OK(GetNextBuffer(&db)); + RETURN_IF_NOT_OK(GetNextSample(&db)); CHECK_FAIL_RETURN_UNEXPECTED(db->eoe(), "ERROR Non EOE received"); // Reset Sampler since this is the end of the epoch - RETURN_IF_NOT_OK(Reset()); + RETURN_IF_NOT_OK(ResetSampler()); return Status::OK(); } Status Sampler::SetNumSamples(int64_t num_samples) { - CHECK_FAIL_RETURN_UNEXPECTED(num_samples > 0, "num_samples is negative or 0"); + CHECK_FAIL_RETURN_UNEXPECTED(num_samples >= 0, "num_samples is negative"); num_samples_ = num_samples; return Status::OK(); } @@ -116,7 +129,7 @@ Status Sampler::SetNumRowsInDataset(int64_t num_rows) { return Status::OK(); } -Status Sampler::AddChild(std::shared_ptr child) { +Status Sampler::AddChild(std::shared_ptr child) { if (child == nullptr) { return Status::OK(); } diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.h b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.h index 936a80bb38..34c3cb7935 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.h @@ -33,25 +33,12 @@ namespace dataset { // must inherit from if those leaf operator wish to support sampling. class RandomAccessOp { public: - // Sampler get numRows from StorageOp - // @param int64_t num - return number of rows, normally num of samples - // @return - The error code return - virtual Status GetNumSamples(int64_t *num_samples) const { - // CI complains num_samples not used if the following line is not added - CHECK_FAIL_RETURN_UNEXPECTED(num_samples != nullptr, "num_samples == nullptr"); - RETURN_STATUS_UNEXPECTED("function GetNumSamples needs to overridden to support this sampler"); - } - - // Sampler get number of rows in the dataset! + // Sampler get number of rows in the dataset // @param int64_t num - return number of rows for this dataset // @return - The error code return - virtual Status GetNumRowsInDataset(int64_t *num_rows) const { - // CI complains num_rows not used if the following line is not added - CHECK_FAIL_RETURN_UNEXPECTED(num_rows != nullptr, "num_rows == nullptr"); - RETURN_STATUS_UNEXPECTED("function GetNumRowsInDataset needs to overridden to support this sampler"); - } + Status GetNumRowsInDataset(int64_t *num_rows) const; - // sampler gets label , imageIds from storageOp, this function is unique to PK + // sampler gets label , imageIds from corresponding Dataset Op, this function is unique to PK // @param std::map> * map // @return - The error code return virtual Status GetClassIds(std::map> *map) const { @@ -60,12 +47,22 @@ class RandomAccessOp { // default destructor virtual ~RandomAccessOp() = default; + + protected: + // The amount of rows in the dataset itself. This is the before-sampling value, the + // total count of rows. A sampler may choose to sample less than this amount. + int64_t num_rows_; }; -class Sampler : public DatasetOp { +class Sampler { public: + // Constructor + // @param int64_t num_samples: the user-requested number of samples ids to generate. A value of 0 + // indicates that the sampler should produce the complete set of ids. // @param int64_t samplesPerBuffer: Num of Sampler Ids to fetch via 1 GetNextBuffer call - explicit Sampler(int64_t samples_per_buffer = std::numeric_limits::max()); + explicit Sampler(int64_t num_samples, int64_t samples_per_buffer); + + Sampler(const Sampler &s) : Sampler(s.num_samples_, s.samples_per_buffer_) {} // default destructor ~Sampler() = default; @@ -75,51 +72,38 @@ class Sampler : public DatasetOp { // @param std::unique_ptr pBuffer - Buffer to be returned to StorageOp // @param int32_t workerId - not meant to be used // @return - The error code return - Status GetNextBuffer(std::unique_ptr *out_buffer) override = 0; + virtual Status GetNextSample(std::unique_ptr *out_buffer) = 0; // return all ids in one epoch as a numpy array, then call reset Status GetAllIdsThenReset(py::array *data); // for next epoch of sampleIds // @return - The error code return - Status Reset() override = 0; + virtual Status ResetSampler() = 0; - // setter function for num_rows_ - Status SetNumRowsInDataset(int64_t num_rows); - - // setter function for num_samples_ - Status SetNumSamples(int64_t num_samples); - - int64_t num_samples() { return num_samples_; } - - // first handshake between StorageOp and Sampler. This func will call getNumRows and getNumSamples - // @param op - StorageOp pointer, pass in so Sampler can call getNumSamples() and get ClassIds() + // first handshake between leaf source op and Sampler. This func will determine the amount of data + // in the dataset that we can sample from. + // @param op - leaf op pointer, pass in so Sampler can ask it about how much data there is // @return virtual Status HandshakeRandomAccessOp(const RandomAccessOp *op); // initialize sampler and perform checks on certain vars virtual Status InitSampler() { return Status::OK(); } - // Not meant to be called - // @return - int32_t num_workers() const final { return 0; } - - // Not meant to be called - // @return - int32_t num_consumers() const final { return 0; } - - // Not meant to be called - // @return - int32_t num_producers() const final { return 0; } + // setter for num samples + // @param num_samples - the number of samples to assign. + // @return status error code + Status SetNumSamples(int64_t num_samples); - // Not meant to be called! - // @return - The error code return - Status operator()() final { RETURN_STATUS_UNEXPECTED("Functor not supported in Sampler"); } + // setter for num or records in the dataset + // @param num_rows - the number of records + // @return status error code + Status SetNumRowsInDataset(int64_t num_rows); // Adds a sampler to become our child. // @param std::shared_ptr - The sampler to add as a child. // @return - The error code returned. - Status AddChild(std::shared_ptr child); + Status AddChild(std::shared_ptr child); // A helper function to create a int64_t 1-D Tensor specifically used to hold sampleIds for Sampler // @param std::shared_ptr* sampleIds @@ -127,8 +111,16 @@ class Sampler : public DatasetOp { // @return - The error code returned. Status CreateSamplerTensor(std::shared_ptr *sample_ids, int64_t num_elements); - void Print(std::ostream &out, bool show_all) const override; + // A print method typically used for debugging + // @param out - The output stream to write output to + // @param show_all - A bool to control if you want to show all info or just a summary + virtual void Print(std::ostream &out, bool show_all) const; + // << Stream output operator overload + // @notes This allows you to write the debug print info using stream operators + // @param out - reference to the output stream being overloaded + // @param sampler - reference to teh sampler to print + // @return - the output stream must be returned friend std::ostream &operator<<(std::ostream &out, const Sampler &sampler) { sampler.Print(out, false); return out; @@ -151,12 +143,14 @@ class Sampler : public DatasetOp { // output. Otherwise, num_rows_ is the number of rows in the dataset. int64_t num_rows_; - // Number of ids this sampler will return. + // The user may want to sample less than the full amount of data. num_samples_ reduces the number + // of id's returned as request by the user. Derived classes will choose how to sample the smaller + // amount. int64_t num_samples_; - // The max number of ids a DataBuffer returned by this sampler will contain. int64_t samples_per_buffer_; std::unique_ptr col_desc_; + std::vector> child_; // Child nodes std::unique_ptr child_ids_; }; } // namespace dataset diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.cc index 789f232e1e..f0ff6a2c02 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.cc @@ -20,34 +20,42 @@ namespace mindspore { namespace dataset { -SequentialSampler::SequentialSampler(int64_t samples_per_buffer) : Sampler(samples_per_buffer), next_id_(0) {} +SequentialSampler::SequentialSampler(int64_t num_samples, int64_t start_index, int64_t samples_per_buffer) + : Sampler(num_samples, samples_per_buffer), start_index_(start_index), current_id_(start_index), id_count_(0) {} -Status SequentialSampler::GetNextBuffer(std::unique_ptr *out_buffer) { - if (next_id_ > num_samples_) { - RETURN_STATUS_UNEXPECTED("Sequential Sampler Internal Error"); - } else if (next_id_ == num_samples_) { +Status SequentialSampler::GetNextSample(std::unique_ptr *out_buffer) { + if (id_count_ > num_samples_) { + RETURN_STATUS_UNEXPECTED("SequentialSampler Internal Error"); + } else if (id_count_ == num_samples_) { (*out_buffer) = std::make_unique(0, DataBuffer::kDeBFlagEOE); } else { if (HasChildSampler()) { - RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&child_ids_)); + RETURN_IF_NOT_OK(child_[0]->GetNextSample(&child_ids_)); } - (*out_buffer) = std::make_unique(next_id_, DataBuffer::kDeBFlagNone); + (*out_buffer) = std::make_unique(current_id_, DataBuffer::kDeBFlagNone); std::shared_ptr sampleIds; - int64_t lastId = (samples_per_buffer_ + next_id_ > num_samples_) ? num_samples_ : samples_per_buffer_ + next_id_; - RETURN_IF_NOT_OK(CreateSamplerTensor(&sampleIds, lastId - next_id_)); - int64_t *idPtr = reinterpret_cast(sampleIds->GetMutableBuffer()); - while (next_id_ < lastId) { - int64_t sampled_id = next_id_; + + // Compute how many ids are left to pack, and pack this amount into a new buffer. Respect the setting for + // samples per buffer though. + int64_t remaining_ids = num_samples_ - id_count_; + int64_t num_elements = std::min(remaining_ids, samples_per_buffer_); + + RETURN_IF_NOT_OK(CreateSamplerTensor(&sampleIds, num_elements)); + auto idPtr = sampleIds->begin(); + for (int64_t i = 0; i < num_elements; i++) { + int64_t sampled_id = current_id_; if (HasChildSampler()) { RETURN_IF_NOT_OK(GetAssociatedChildId(&sampled_id, sampled_id)); } *idPtr = sampled_id; - next_id_++; + current_id_++; // Move the current id to the next one in the sequence idPtr++; } + id_count_ += num_elements; // Count the packed ids towards our overall sample count + TensorRow row(1, sampleIds); (*out_buffer)->set_tensor_table(std::make_unique(1, row)); } @@ -55,22 +63,27 @@ Status SequentialSampler::GetNextBuffer(std::unique_ptr *out_buffer) } Status SequentialSampler::InitSampler() { - num_samples_ = (num_samples_ <= 0) ? num_rows_ : num_samples_; // if num_samples < 0, try if num_rows is set - if (HasChildSampler()) { - num_samples_ = std::min(num_samples_, num_rows_); + CHECK_FAIL_RETURN_UNEXPECTED(start_index_ >= 0, "start_index < 0\n"); + CHECK_FAIL_RETURN_UNEXPECTED(start_index_ < num_rows_, "start_index >= num_rows\n"); + CHECK_FAIL_RETURN_UNEXPECTED(num_samples_ >= 0, "num_samples < 0\n"); + // Adjust the num_samples count based on the range of ids we are sequencing. If num_samples is 0, we sample + // the entire set. If it's non-zero, we will implicitly cap the amount sampled based on available data. + int64_t available_row_count = num_rows_ - start_index_; + if (num_samples_ == 0 || num_samples_ > available_row_count) { + num_samples_ = available_row_count; } - CHECK_FAIL_RETURN_UNEXPECTED(num_samples_ > 0 && samples_per_buffer_ > 0, "Fail to init Sequential Sampler"); samples_per_buffer_ = samples_per_buffer_ > num_samples_ ? num_samples_ : samples_per_buffer_; return Status::OK(); } -Status SequentialSampler::Reset() { - CHECK_FAIL_RETURN_UNEXPECTED(next_id_ == num_samples_, "ERROR Reset() called early/late"); - next_id_ = 0; +Status SequentialSampler::ResetSampler() { + CHECK_FAIL_RETURN_UNEXPECTED(id_count_ == num_samples_, "ERROR Reset() called early/late"); + current_id_ = start_index_; + id_count_ = 0; if (HasChildSampler()) { - RETURN_IF_NOT_OK(child_[0]->Reset()); + RETURN_IF_NOT_OK(child_[0]->ResetSampler()); } return Status::OK(); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.h b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.h index 4e195d75db..2cb7a9ff8d 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.h @@ -26,8 +26,12 @@ namespace dataset { class SequentialSampler : public Sampler { public: // Constructor + // @param num_samples - The number of samples to draw. A value of 0 indicates the sampler should produce the + // full amount of ids from the dataset + // @param start_index - The starting index value // @param int64_t samplesPerBuffer - Num of Sampler Ids to fetch via 1 GetNextBuffer call - explicit SequentialSampler(int64_t samples_per_buffer = std::numeric_limits::max()); + explicit SequentialSampler(int64_t num_samples, int64_t start_index, + int64_t samples_per_buffer = std::numeric_limits::max()); // Destructor. ~SequentialSampler() = default; @@ -37,18 +41,20 @@ class SequentialSampler : public Sampler { // for next epoch of sampleIds // @return - The error code return - Status Reset() override; + Status ResetSampler() override; // Op calls this to get next Buffer that contains all the sampleIds - // @param std::unique_ptr pBuffer - Buffer to be returned to StorageOp + // @param std::unique_ptr pBuffer - Buffer to be returned to corresponding Dataset Op // @param int32_t workerId - not meant to be used // @return - The error code return - Status GetNextBuffer(std::unique_ptr *out_buffer) override; + Status GetNextSample(std::unique_ptr *out_buffer) override; void Print(std::ostream &out, bool show_all) const override; private: - int64_t next_id_; + int64_t current_id_; // The id sequencer. Each new id increments from this + int64_t start_index_; // The starting id. current_id_ begins from here. + int64_t id_count_; // An internal counter that tracks how many ids have been produced }; } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc index ca1160299a..54491889fc 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc @@ -27,29 +27,35 @@ namespace mindspore { namespace dataset { // Constructor. -SubsetRandomSampler::SubsetRandomSampler(const std::vector &indices, int64_t samples_per_buffer) - : Sampler(samples_per_buffer), indices_(indices), sample_id_(0), buffer_id_(0) {} +SubsetRandomSampler::SubsetRandomSampler(int64_t num_samples, const std::vector &indices, + int64_t samples_per_buffer) + : Sampler(num_samples, samples_per_buffer), indices_(indices), sample_id_(0), buffer_id_(0) {} // Initialized this Sampler. Status SubsetRandomSampler::InitSampler() { CHECK_FAIL_RETURN_UNEXPECTED(num_rows_ > 0, "num_rows <= 0\n"); - num_samples_ = indices_.size(); - + // Special value of 0 for num_samples means that the user wants to sample the entire set of data. + // In this case, the id's are provided by the user. Cap the num_samples on the number of id's given. + if (num_samples_ == 0 || num_samples_ > static_cast(indices_.size())) { + num_samples_ = static_cast(indices_.size()); + } // Initialize random generator with seed from config manager rand_gen_.seed(GetSeed()); - if (static_cast(samples_per_buffer_) > indices_.size()) { - samples_per_buffer_ = static_cast(indices_.size()); + if (samples_per_buffer_ > num_samples_) { + samples_per_buffer_ = num_samples_; } + // num_samples_ could be smaller than the total number of input id's. + // We will shuffle the full set of id's, but only select the first num_samples_ of them later. std::shuffle(indices_.begin(), indices_.end(), rand_gen_); return Status::OK(); } // Reset the internal variable to the initial state. -Status SubsetRandomSampler::Reset() { +Status SubsetRandomSampler::ResetSampler() { // Reset the internal counters. sample_id_ = 0; buffer_id_ = 0; @@ -59,20 +65,20 @@ Status SubsetRandomSampler::Reset() { std::shuffle(indices_.begin(), indices_.end(), rand_gen_); if (HasChildSampler()) { - RETURN_IF_NOT_OK(child_[0]->Reset()); + RETURN_IF_NOT_OK(child_[0]->ResetSampler()); } return Status::OK(); } // Get the sample ids. -Status SubsetRandomSampler::GetNextBuffer(std::unique_ptr *out_buffer) { +Status SubsetRandomSampler::GetNextSample(std::unique_ptr *out_buffer) { // All samples have been drawn - if (sample_id_ == indices_.size()) { + if (sample_id_ == num_samples_) { (*out_buffer) = std::make_unique(buffer_id_++, DataBuffer::kDeBFlagEOE); } else { if (HasChildSampler()) { - RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&child_ids_)); + RETURN_IF_NOT_OK(child_[0]->GetNextSample(&child_ids_)); } (*out_buffer) = std::make_unique(buffer_id_++, DataBuffer::kDeBFlagNone); @@ -80,15 +86,15 @@ Status SubsetRandomSampler::GetNextBuffer(std::unique_ptr *out_buffe int64_t last_id = sample_id_ + samples_per_buffer_; // Handling the return all samples at once, and when last draw is not a full batch. - if (static_cast(last_id) > indices_.size()) { - last_id = indices_.size(); + if (last_id > num_samples_) { + last_id = num_samples_; } // Allocate tensor RETURN_IF_NOT_OK(CreateSamplerTensor(&outputIds, last_id - sample_id_)); // Initialize tensor - int64_t *id_ptr = reinterpret_cast(outputIds->GetMutableBuffer()); + auto id_ptr = outputIds->begin(); while (sample_id_ < last_id) { if (indices_[sample_id_] >= num_rows_) { std::string err_msg = diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.h b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.h index 1f4c155748..980ffe578a 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.h @@ -28,10 +28,11 @@ namespace dataset { class SubsetRandomSampler : public Sampler { public: // Constructor. + // @param num_samples The number of samples to draw. 0 for the full amount. // @param indices List of indices from where we will randomly draw samples. // @param samples_per_buffer The number of ids we draw on each call to GetNextBuffer(). // When samplesPerBuffer=0, GetNextBuffer() will draw all the sample ids and return them at once. - explicit SubsetRandomSampler(const std::vector &indices, + explicit SubsetRandomSampler(int64_t num_samples, const std::vector &indices, std::int64_t samples_per_buffer = std::numeric_limits::max()); // Destructor. @@ -43,12 +44,12 @@ class SubsetRandomSampler : public Sampler { // Reset the internal variable to the initial state and reshuffle the indices. // @return Status - Status Reset() override; + Status ResetSampler() override; // Get the sample ids. // @param[out] out_buffer The address of a unique_ptr to DataBuffer where the sample ids will be placed. // @note the sample ids (int64_t) will be placed in one Tensor and be placed into pBuffer. - Status GetNextBuffer(std::unique_ptr *out_buffer) override; + Status GetNextSample(std::unique_ptr *out_buffer) override; private: // A list of indices (already randomized in constructor). diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_sampler.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_sampler.cc deleted file mode 100644 index 0ae7a7d503..0000000000 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_sampler.cc +++ /dev/null @@ -1,85 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "dataset/engine/datasetops/source/sampler/subset_sampler.h" - -#include -#include - -#include "dataset/core/config_manager.h" -#include "dataset/core/global_context.h" - -namespace mindspore { -namespace dataset { -// Constructor. -SubsetSampler::SubsetSampler(int64_t start_index, int64_t subset_size) - : Sampler(subset_size), start_index_(start_index), subset_size_(subset_size), current_id_(0) {} - -Status SubsetSampler::InitSampler() { - CHECK_FAIL_RETURN_UNEXPECTED(subset_size_ > 0, "subset_size <= 0\n"); - CHECK_FAIL_RETURN_UNEXPECTED(start_index_ >= 0, "start_index < 0\n"); - CHECK_FAIL_RETURN_UNEXPECTED(start_index_ < num_rows_, "start_index >= num_rows\n"); - CHECK_FAIL_RETURN_UNEXPECTED(start_index_ + subset_size_ - 1 < num_rows_, "Final index out of bounds.\n"); - - num_samples_ = subset_size_; - - return Status::OK(); -} - -Status SubsetSampler::Reset() { - current_id_ = 0; - - if (HasChildSampler()) { - RETURN_IF_NOT_OK(child_[0]->Reset()); - } - - return Status::OK(); -} - -Status SubsetSampler::GetNextBuffer(std::unique_ptr *out_buffer) { - if (current_id_ > subset_size_) { - RETURN_STATUS_UNEXPECTED("SubsetSampler Internal Error"); - } else if (current_id_ == subset_size_) { - (*out_buffer) = std::make_unique(0, DataBuffer::kDeBFlagEOE); - } else { - if (HasChildSampler()) { - RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&child_ids_)); - } - - (*out_buffer) = std::make_unique(0, DataBuffer::kDeBFlagNone); - std::shared_ptr sampled_ids; - RETURN_IF_NOT_OK(CreateSamplerTensor(&sampled_ids, subset_size_)); - - int64_t *sampled_ids_start_addr = reinterpret_cast(sampled_ids->GetMutableBuffer()); - - while (current_id_ < subset_size_) { - int64_t sampled_id = start_index_ + current_id_; - if (HasChildSampler()) { - RETURN_IF_NOT_OK(GetAssociatedChildId(&sampled_id, sampled_id)); - } - - *(sampled_ids_start_addr + current_id_) = sampled_id; - current_id_++; - } - - TensorRow sampled_ids_row(1, sampled_ids); - (*out_buffer)->set_tensor_table(std::make_unique(1, sampled_ids_row)); - } - - return Status::OK(); -} - -} // namespace dataset -} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_sampler.h b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_sampler.h deleted file mode 100644 index 5e8774f673..0000000000 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_sampler.h +++ /dev/null @@ -1,58 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SUBSET_SAMPLER_H_ -#define DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SUBSET_SAMPLER_H_ - -#include -#include - -#include "dataset/engine/datasetops/source/sampler/sampler.h" - -namespace mindspore { -namespace dataset { - -class SubsetSampler : public Sampler { - public: - // Constructor. - // @param start_index The index we start sampling from. - explicit SubsetSampler(int64_t start_index, int64_t subset_size); - - // Destructor. - ~SubsetSampler() = default; - - // Initialize the sampler. - // @return Status - Status InitSampler() override; - - // Reset the internal variable to the initial state and reshuffle the indices. - // @return Status - Status Reset() override; - - // Get the sample ids. - // @param[out] out_buffer The address of a unique_ptr to DataBuffer where the sample ids will be placed. - // @note the sample ids (int64_t) will be placed in one Tensor. - Status GetNextBuffer(std::unique_ptr *out_buffer) override; - - private: - int64_t start_index_; - int64_t subset_size_; - int64_t current_id_; -}; - -} // namespace dataset -} // namespace mindspore - -#endif // DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SUBSET_SAMPLER_H_ diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc index 5027dcdd67..759af99352 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc @@ -27,25 +27,28 @@ namespace mindspore { namespace dataset { // Constructor. -WeightedRandomSampler::WeightedRandomSampler(const std::vector &weights, int64_t num_samples, bool replacement, +WeightedRandomSampler::WeightedRandomSampler(int64_t num_samples, const std::vector &weights, bool replacement, int64_t samples_per_buffer) - : Sampler(samples_per_buffer), + : Sampler(num_samples, samples_per_buffer), weights_(weights), replacement_(replacement), sample_id_(0), - buffer_id_(0), - user_num_samples_(num_samples) {} + buffer_id_(0) {} // Initialized this Sampler. Status WeightedRandomSampler::InitSampler() { - CHECK_FAIL_RETURN_UNEXPECTED(num_rows_ > 0 && user_num_samples_, "num_samples & num_rows need to be positive"); + // Special value of 0 for num_samples means that the user wants to sample the entire set of data. + // If the user asked to sample more rows than exists in the dataset, adjust the num_samples accordingly. + if (num_samples_ == 0 || num_samples_ > num_rows_) { + num_samples_ = num_rows_; + } + CHECK_FAIL_RETURN_UNEXPECTED(num_rows_ > 0 && num_samples_, "num_samples & num_rows need to be positive"); CHECK_FAIL_RETURN_UNEXPECTED(samples_per_buffer_ > 0, "samples_per_buffer<=0\n"); - num_samples_ = user_num_samples_; // Initialize random generator with seed from config manager rand_gen_.seed(GetSeed()); - samples_per_buffer_ = (samples_per_buffer_ > user_num_samples_) ? user_num_samples_ : samples_per_buffer_; + samples_per_buffer_ = (samples_per_buffer_ > num_samples_) ? num_samples_ : samples_per_buffer_; if (!replacement_) { exp_dist_ = std::make_unique>(1); @@ -67,14 +70,14 @@ void WeightedRandomSampler::InitOnePassSampling() { } // Partial sort the first `numSamples` elements. - std::partial_sort(val_idx.begin(), val_idx.begin() + user_num_samples_, val_idx.end()); - for (int64_t i = 0; i < user_num_samples_; i++) { + std::partial_sort(val_idx.begin(), val_idx.begin() + num_samples_, val_idx.end()); + for (int64_t i = 0; i < num_samples_; i++) { onepass_ids_.push_back(val_idx[i].second); } } // Reset the internal variable to the initial state and reshuffle the indices. -Status WeightedRandomSampler::Reset() { +Status WeightedRandomSampler::ResetSampler() { sample_id_ = 0; buffer_id_ = 0; rand_gen_.seed(GetSeed()); @@ -85,28 +88,28 @@ Status WeightedRandomSampler::Reset() { } if (HasChildSampler()) { - RETURN_IF_NOT_OK(child_[0]->Reset()); + RETURN_IF_NOT_OK(child_[0]->ResetSampler()); } return Status::OK(); } // Get the sample ids. -Status WeightedRandomSampler::GetNextBuffer(std::unique_ptr *out_buffer) { +Status WeightedRandomSampler::GetNextSample(std::unique_ptr *out_buffer) { if (weights_.size() > static_cast(num_rows_)) { return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "number of samples weights is more than num of rows. Might generate id out of bound OR other errors"); } - if (!replacement_ && (weights_.size() < static_cast(user_num_samples_))) { + if (!replacement_ && (weights_.size() < static_cast(num_samples_))) { RETURN_STATUS_UNEXPECTED("Without replacement, sample weights less than numSamples"); } - if (sample_id_ == user_num_samples_) { + if (sample_id_ == num_samples_) { (*out_buffer) = std::make_unique(buffer_id_++, DataBuffer::kDeBFlagEOE); } else { if (HasChildSampler()) { - RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&child_ids_)); + RETURN_IF_NOT_OK(child_[0]->GetNextSample(&child_ids_)); } (*out_buffer) = std::make_unique(buffer_id_++, DataBuffer::kDeBFlagNone); @@ -114,15 +117,15 @@ Status WeightedRandomSampler::GetNextBuffer(std::unique_ptr *out_buf int64_t last_id = sample_id_ + samples_per_buffer_; // Handling the return all samples at once, and when last draw is not a full batch. - if (last_id > user_num_samples_) { - last_id = user_num_samples_; + if (last_id > num_samples_) { + last_id = num_samples_; } // Allocate tensor. RETURN_IF_NOT_OK(CreateSamplerTensor(&outputIds, last_id - sample_id_)); // Initialize tensor. - int64_t *id_ptr = reinterpret_cast(outputIds->GetMutableBuffer()); + auto id_ptr = outputIds->begin(); // Assign the data to tensor element. while (sample_id_ < last_id) { int64_t genId; diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h index 5381bb64b0..257501250d 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h @@ -29,12 +29,12 @@ namespace dataset { class WeightedRandomSampler : public Sampler { public: // Constructor. - // @param weights A lift of sample weights. // @param num_samples Number of samples to be drawn. + // @param weights A lift of sample weights. // @param replacement Determine if samples are drawn with/without replacement. // @param samples_per_buffer The number of ids we draw on each call to GetNextBuffer(). // When samplesPerBuffer=0, GetNextBuffer() will draw all the sample ids and return them at once. - WeightedRandomSampler(const std::vector &weights, int64_t num_samples, bool replacement = true, + WeightedRandomSampler(int64_t num_samples, const std::vector &weights, bool replacement, int64_t samples_per_buffer = std::numeric_limits::max()); // Destructor. @@ -46,12 +46,12 @@ class WeightedRandomSampler : public Sampler { Status InitSampler() override; // Reset the internal variable to the initial state and reshuffle the indices. - Status Reset() override; + Status ResetSampler() override; // Get the sample ids. // @param[out] out_buffer The address of a unique_ptr to DataBuffer where the sample ids will be placed. // @note the sample ids (int64_t) will be placed in one Tensor and be placed into pBuffer. - Status GetNextBuffer(std::unique_ptr *out_buffer) override; + Status GetNextSample(std::unique_ptr *out_buffer) override; private: // A list of weights for each sample. @@ -69,9 +69,6 @@ class WeightedRandomSampler : public Sampler { // Random engine and device std::mt19937 rand_gen_; - // num_samples from user - int64_t user_num_samples_; - // Discrete distribution for generating weighted random numbers with replacement. std::unique_ptr> discrete_dist_; diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/storage_client.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/storage_client.cc deleted file mode 100644 index 7f081af2b7..0000000000 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/storage_client.cc +++ /dev/null @@ -1,190 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#define MAX_INTEGER_INT32 2147483647 - -#include -#include -#include -#include -#include "dataset/core/constants.h" -#include "dataset/engine/datasetops/source/storage_client.h" -#include "dataset/engine/datasetops/source/storage_op.h" -#include "dataset/engine/datasetops/source/tf_client.h" -#include "dataset/util/status.h" - -namespace mindspore { -namespace dataset { -// Name: Constructor -// Description: -StorageClient::StorageClient(std::unique_ptr schema, // In: The schema for this storage client. - StorageOp *store_op) // In: The StorageOp that's using this client - : data_schema_(std::move(schema)), num_rows_in_dataset_(0), storage_op_(store_op), num_classes_(0) {} - -// Name: Print() -// Description: A function that prints info about the StorageClient -// In: The output stream to print to -void StorageClient::Print(std::ostream &out) const { - // not much to show here folks! - // out << "Storage client:\n"; -} - -// This is a local-only static function to drive the switch statement for creating -// the storage client (not a static member function) -static Status CreateStorageClientSwitch( - std::unique_ptr schema, // In: The schema to set into the client - StorageOp *store_op, // In: The StorageOp we are operating on - std::shared_ptr *out_client) { // Out: the created storage client - switch (schema->dataset_type()) { - case DatasetType::kArrow: { - return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, - "Storage client not implemented yet for arrow dataset type."); - } - case DatasetType::kTf: { - // Construct the derived class TFClient, stored as base class StorageClient - store_op->set_rows_per_buffer(32); - *out_client = std::make_unique(std::move(schema), store_op); - break; - } - case DatasetType::kUnknown: - default: { - RETURN_STATUS_UNEXPECTED("Invalid dataset type."); - } - } - if (*out_client) { - RETURN_IF_NOT_OK((*out_client)->Init()); - } - return Status::OK(); -} - -// Name: CreateStorageClient() -// Description: A factory method to create the derived storage client. -// Every dataset has a required field for the dataset type in a config -// file. This type will determine the child class to return for the -// type of storage client. It also creates the schema and sticks it -// into the cache. -Status StorageClient::CreateStorageClient( - StorageOp *store_op, // In: A backpointer to the owning cache for this client. - std::string dataset_schema_path, // In: The path to the schema - std::shared_ptr *out_client) { // Out: the created storage client - // Make a new schema first. This only assigns the dataset type. It does not - // create the columns yet. - auto new_schema = std::make_unique(); - RETURN_IF_NOT_OK(new_schema->LoadDatasetType(dataset_schema_path)); - RETURN_IF_NOT_OK(CreateStorageClientSwitch(std::move(new_schema), store_op, out_client)); - return Status::OK(); -} - -// Name: CreateStorageClient() -// Description: A factory method to create the derived storage client. -// This creator is a user-override for the schema properties where -// the user has input the layout of the data (typically used in testcases) -Status StorageClient::CreateStorageClient( - StorageOp *store_op, // In: A backpointer to the owning cache for this client. - DatasetType in_type, // In: The type of dataset - std::shared_ptr *out_client) { // Out: the created storage client - // The dataset type is passed in by the user. Create an empty schema with only - // only the dataset type filled in and then create the client with it. - auto new_schema = std::make_unique(); - new_schema->set_dataset_type(in_type); - RETURN_IF_NOT_OK(CreateStorageClientSwitch(std::move(new_schema), store_op, out_client)); - return Status::OK(); -} - -// Name: LoadDatasetLayout() -// Description: There are 2 ways to define the properties of the data in the storage -// layer: LoadDatasetLayout() and AssignDatasetLayout(). -// LoadDatasetLayout() will parse the json config file that comes with -// the dataset. -Status StorageClient::LoadDatasetLayout() { - // Access the json file to populate our schema, assume the json file is accessible - // locally. - RETURN_IF_NOT_OK(data_schema_->LoadSchemaFile(storage_op_->schema_file(), storage_op_->columns_to_load())); - - // The number of rows in the schema file is an optional config. For example, - // maybe the derived storage client will know how to determine the total number - // of rows a different way rather than having it in the schema config json file. - // Thus, mNumRowsInDataset can still be zero and force the derived class override - // to determine it another way. - uint32_t num_rows = 0; - RETURN_IF_NOT_OK(this->numRowsFromFile(num_rows)); - CHECK_FAIL_RETURN_UNEXPECTED(num_rows <= MAX_INTEGER_INT32, "numRows exceeds the boundary numRows>2147483647"); - if (num_rows_in_dataset_ == 0 || num_rows < num_rows_in_dataset_) { - num_rows_in_dataset_ = num_rows; - } - - return Status::OK(); -} - -// Name: AssignDatasetLayout() -// Description: There are 2 ways to define the properties of the data in the storage -// layer: LoadDatasetLayout() and AssignDatasetLayout(). -// AssignDatasetLayout() will take input from the caller and assign that -// info into the storage client. -Status StorageClient::AssignDatasetLayout(uint32_t num_rows, // In: The number of rows in the dataset - const DataSchema &schema) { // In: The schema for the dataset - // Since this is just an assignment into the storage client, you probably won't need - // to override this one in a derived class. First some sanity checks - CHECK_FAIL_RETURN_UNEXPECTED(data_schema_->dataset_type() == schema.dataset_type(), - "Assigning a schema into StorageClient with mismatched dataset types!"); - CHECK_FAIL_RETURN_UNEXPECTED(data_schema_->NumColumns() == 0, - "Assigning a schema into StorageClient that already has non-empty schema!"); - - // The current schema was just an empty one with only the dataset field populated. - // Let's copy construct a new one that will be a copy of the input schema (releasing the old - // one) and then set the number of rows that the user requested. - data_schema_ = std::make_unique(schema); - CHECK_FAIL_RETURN_UNEXPECTED(num_rows <= MAX_INTEGER_INT32, "numRows exceeds the boundary numRows>2147483647"); - num_rows_in_dataset_ = num_rows; - - return Status::OK(); -} - -// Name: numRowsFromFile() -// Description: Reads the schema json file to see if the optional numRows field has -// been set and returns it. -Status StorageClient::numRowsFromFile(uint32_t &num_rows) const { - std::string schemaFile = storage_op_->schema_file(); - try { - std::ifstream in(schemaFile); - nlohmann::json js; - in >> js; - if (js.find("numRows") == js.end()) { - num_rows = MAX_INTEGER_INT32; - } else { - num_rows = js.value("numRows", 0); - } - if (num_rows == 0) { - std::string err_msg = - "Storage client has not properly done dataset " - "handshake to initialize schema and number of rows."; - RETURN_STATUS_UNEXPECTED(err_msg); - } - } - // Catch any exception and rethrow it as our own - catch (const std::exception &err) { - std::ostringstream ss; - ss << "Schema file failed to load:\n" << err.what(); - std::string err_msg = ss.str(); - RETURN_STATUS_UNEXPECTED(err_msg); - } - return Status::OK(); -} - -// Get'r function -DataSchema *StorageClient::schema() const { return data_schema_.get(); } -} // namespace dataset -} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/storage_client.h b/mindspore/ccsrc/dataset/engine/datasetops/source/storage_client.h deleted file mode 100644 index 6198f4233f..0000000000 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/storage_client.h +++ /dev/null @@ -1,128 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef DATASET_ENGINE_DATASETOPS_SOURCE_STORAGE_CLIENT_H_ -#define DATASET_ENGINE_DATASETOPS_SOURCE_STORAGE_CLIENT_H_ - -#include -#include -#include -#include -#include "dataset/engine/data_schema.h" -#include "dataset/engine/datasetops/source/storage_op.h" -#include "dataset/util/status.h" - -namespace mindspore { -namespace dataset { -// The Storage Client is the interface and base class that the StorageOp -// will use to perform any interactions with the storage layer. -// The different types of datasets will have different derived classes -// under that storage client super class. -class StorageClient { - public: - // Name: Constructor - // Description: - StorageClient(std::unique_ptr schema, // In: The schema for this storage client. - StorageOp *store_op); // In: The StorageOp that's using this client - - // Destructor - virtual ~StorageClient() { storage_op_ = nullptr; } - - virtual Status Init() { return Status::OK(); } - - // Name: CreateStorageClient() - // Description: A factory method to create the derived storage client. - // Every dataset has a required field for the dataset type in a config - // file. This type will determine the child class to return for the - // type of storage client. - static Status CreateStorageClient(StorageOp *store_op, // In: A backpointer to the owning storage op for this client. - std::string dataset_schema_path, // In: The path to the dataset - std::shared_ptr *out_client); // Out: the created storage client - - // Name: CreateStorageClient() - // Description: A factory method to create the derived storage client. - // This creator is a user-override for the schema properties where - // the user has input the layout of the data (typically used in testcases) - static Status CreateStorageClient(StorageOp *store_op, // In: A backpointer to the owning cache for this client. - DatasetType in_type, // In: The type of dataset - std::shared_ptr *out_client); // Out: the created storage client - - // Name: Print() - // Description: A function that prints info about the StorageClient - virtual void Print(std::ostream &out) const; // In: The output stream to print to - - // Provide stream operator for displaying - friend std::ostream &operator<<(std::ostream &out, const StorageClient &storage_client) { - storage_client.Print(out); - return out; - } - - // Name: LoadDatasetLayout() - // Description: There are 2 ways to define the properties of the data in the storage - // layer: LoadDatasetLayout() and AssignDatasetLayout(). - // LoadDatasetLayout() will parse the json config file that comes with - // the dataset and internally populate row counts and schema. - virtual Status LoadDatasetLayout(); - - // Name: AssignDatasetLayout() - // Description: There are 2 ways to define the properties of the data in the storage - // layer: LoadDatasetLayout() and AssignDatasetLayout(). - // AssignDatasetLayout() will take input from the caller and assign that - virtual Status AssignDatasetLayout(uint32_t num_rows, // In: The number of rows in the dataset - const DataSchema &schema); // In: The schema for the dataset - - // Name: Reset() - // Description: Resets any state info inside the client back to it's initialized - // state. - virtual Status Reset() = 0; - - // Name: IsMoreData - // Description: General routine to ask if more data exists in the storage side for - // a given buffer id. - virtual bool IsMoreData(uint32_t id) { return true; } - - // Name: numRowsFromFile() - // Description: Reads the schema json file to see if the optional numRows field has - // been set and returns it. - Status numRowsFromFile(uint32_t &num_rows) const; - - // Get'r functions - DataSchema *schema() const; - - uint32_t num_rows() const { return num_rows_in_dataset_; } - - // Name: rows_per_buffer() - // Description: This default version simply gives you the count of the requested - // rows per buffer that the user defined in the storage op. - // However, if some condition down in the storage client layers - // could result in a buffer that has a different number of rows, - // then the derived class can override this method to provide their - // own implementation. - virtual uint32_t rows_per_buffer() { return storage_op_->rows_per_buffer(); } - - // Description: Get the label classes num. Only manifest and Imagenet dataset support this parameter - virtual uint32_t num_classes() const { return 0; } - - protected: - std::unique_ptr data_schema_; // The schema for the data - uint32_t num_rows_in_dataset_; // The number of rows in the dataset - StorageOp *storage_op_; // Back pointer to the owning storage operator. - std::vector col_names_; - uint32_t num_classes_; -}; -} // namespace dataset -} // namespace mindspore - -#endif // DATASET_ENGINE_DATASETOPS_SOURCE_STORAGE_CLIENT_H_ diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/storage_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/storage_op.cc deleted file mode 100644 index 052e474b6e..0000000000 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/storage_op.cc +++ /dev/null @@ -1,607 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#define MAX_INTEGER_UINT32 4294967295 -#define MAX_INTEGER_INT32 2147483647 - -#include "dataset/engine/datasetops/source/storage_client.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "common/utils.h" -#include "dataset/core/config_manager.h" -#include "dataset/core/constants.h" -#include "dataset/core/global_context.h" -#include "dataset/engine/data_buffer.h" -#include "dataset/engine/datasetops/dataset_op.h" -#include "dataset/engine/datasetops/parallel_op.h" -#include "dataset/engine/db_connector.h" -#include "dataset/engine/data_schema.h" -#include "dataset/engine/execution_tree.h" -#include "dataset/util/queue.h" -#include "dataset/engine/datasetops/source/storage_op.h" -#include "dataset/util/task_manager.h" -#include "utils/log_adapter.h" - -namespace mindspore { -namespace dataset { -// Builder constructor. Creates the builder object. -StorageOp::Builder::Builder() - : build_dataset_files_dir_(""), - build_schema_file_(""), - build_num_rows_(0), - build_data_distribution_file_(""), - build_batch_size_(1), - build_drop_remainder_(false) { - // Some arguments to the StorageOp constructor have a default argument that is taken - // from the client config. - // The user may choose to change these values for the construction of the StorageOp by - // using the various builder set methods. - - std::shared_ptr cfg = GlobalContext::config_manager(); - build_rows_per_buffer_ = cfg->rows_per_buffer(); - build_worker_connector_size_ = cfg->worker_connector_size(); - build_num_workers_ = cfg->num_parallel_workers(); - build_op_connector_size_ = cfg->op_connector_size(); -} - -// The builder "build" method creates the final object. -Status StorageOp::Builder::Build(std::shared_ptr *ptr) { - // There are 2 "flavours" of construction for a StorageOp: - // - // 1) Does a handshake with the dataset to identify row ranges and to identify - // the schema (internally the handshake does lookup against a json file in the dataset) - // - // 2) The user manually creates a schema and defines the row ranges, so there is no real - // dataset handshake. - // - // The decision about which style is called will depend on if the user supplied the - // schema and row range fields. - - const std::string dataset_schema_file("datasetSchema.json"); - if (build_schema_ != nullptr && build_num_rows_ == 0) { - return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, - "Building a StorageOp with a given schema, but the number of rows not specified!"); - } - if (build_schema_ == nullptr && build_num_rows_ != 0) { - return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, - "Building a StorageOp with a given number of rows but schema not specified!"); - } - if (build_dataset_files_dir_.empty() && build_dataset_file_list_.empty()) { - return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, - "Building a StorageOp that has not provided the location of the data files."); - } - if (!build_dataset_files_dir_.empty() && !build_dataset_file_list_.empty()) { - return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, - "Building a StorageOp that has provided conflicting location of the data files."); - } - - std::shared_ptr new_storage_op = std::make_shared( - build_num_workers_, build_worker_connector_size_, build_rows_per_buffer_, build_op_connector_size_, - build_columns_to_load_, build_data_distribution_file_, build_batch_size_, build_drop_remainder_); - - // If there is no schema or number of rows given, then we go with construction method 1 - // where we need to handshake with storage client to find out what the schema (and - // number of rows) are based on schema file. - if (build_schema_ == nullptr && build_num_rows_ == 0) { - if (!build_dataset_files_dir_.empty()) { - // We have a dataset files dir, but do not have a schema file. - // Set the default schema file to be inside the same path as the dataset files dir. - if (build_schema_file_.empty()) { - build_schema_file_ = build_dataset_files_dir_ + "/" + dataset_schema_file; - } - RETURN_IF_NOT_OK(new_storage_op->InitOp(build_dataset_files_dir_, build_schema_file_, build_labels_file_name_, - build_dataset_usage_)); - } else { - // dataset is provided by list of files not dir_path - RETURN_IF_NOT_OK(new_storage_op->InitOp(build_dataset_file_list_, build_schema_file_)); - } - } else { - // else, the user gave us a schema and a row range, go with construction method 2, where we use - // the user-provided schema, but we still need to identify our data files. - RETURN_IF_NOT_OK(new_storage_op->InitOp(build_num_rows_, build_dataset_files_dir_, std::move(build_schema_), - build_labels_file_name_, build_dataset_usage_)); - } - - // Call the actual workhorse of the constructor - RETURN_IF_NOT_OK(new_storage_op->init()); - *ptr = std::move(new_storage_op); - return Status::OK(); -} - -StorageOp::StorageOp(int32_t num_workers, int32_t worker_connector_size, int32_t rows_per_buffer, - int32_t op_connector_size, std::vector columns_to_load, - std::string data_distribution_file, int32_t batch_size, bool drop_remainder) - : ParallelOp(num_workers, op_connector_size), - worker_conn_size_(worker_connector_size), - rows_per_buffer_(rows_per_buffer), - num_rows_(0), - buffers_fetched_(0), - columns_to_load_(columns_to_load), - data_distribution_file_(data_distribution_file), - device_num_(1), - device_id_(0), - shard_config_("ALL"), - seed_(0), - shuffle_config_(false), - num_classes_(0), - batch_size_(batch_size), - drop_remainder_(drop_remainder) {} - -// Init of the StorageOp. This is 1 of 3 init. -// This version of the init does not take the schema in it's arguments. It must perform an -// internal handshake with the dataset to produce the schema. -Status StorageOp::InitOp(const std::string &dataset_files_dir, const std::string &schema_file, - const std::string &labels_file_name, const std::string &dataset_usage) { - dataset_files_dir_ = dataset_files_dir; - schema_file_ = schema_file; - labels_file_name_ = labels_file_name; - dataset_usage_ = dataset_usage; - - // Storage ops require the internal master/worker connector. create it here - RETURN_IF_NOT_OK(ParallelOp::CreateWorkerConnector(worker_conn_size_)); - - // Get parameter for distribution. - RETURN_IF_NOT_OK(LoadParallelConfig()); - - // Create the storage client. This will read the json file to determine what - // type of client we're creating. - RETURN_IF_NOT_OK(StorageClient::CreateStorageClient(this, schema_file_, &store_client_)); - - // Perform the initial handshake with the storage client to further read the - // dataset info to populate schema info and the number of rows in the client. - RETURN_IF_NOT_OK(store_client_->LoadDatasetLayout()); - - // Pull out the number of rows from the client and save into the op. - num_rows_ = store_client_->num_rows(); - num_classes_ = store_client_->num_classes(); - - return Status::OK(); -} - -// Init of the StorageOp. This is 2 of 3 init. -// This version of the init allows the user to input the schema and other dataset properties rather -// than get it from the dataset itself. -Status StorageOp::InitOp(int32_t num_rows, const std::string &dataset_files_dir, - std::unique_ptr data_schema, const std::string &labels_file_name, - const std::string &dataset_usage) { - num_rows_ = num_rows; - dataset_files_dir_ = dataset_files_dir; - labels_file_name_ = labels_file_name; - dataset_usage_ = dataset_usage; - - // Storage ops require the internal master/worker connector. create it here - RETURN_IF_NOT_OK(ParallelOp::CreateWorkerConnector(worker_conn_size_)); - - // Get parameter for distribution. - RETURN_IF_NOT_OK(LoadParallelConfig()); - - // Create the storage client based on the dataset type given from the input schema. - RETURN_IF_NOT_OK(StorageClient::CreateStorageClient(this, data_schema->dataset_type(), &store_client_)); - - // Perform the initial handshake with the storage client to initialize the schema - // and the number of rows in the set. In this case, since the schema and the number - // of rows is input by the user directly, it's not much of a "handshake", it's more - // like an assign. - RETURN_IF_NOT_OK(store_client_->AssignDatasetLayout(num_rows_, *data_schema)); - num_classes_ = store_client_->num_classes(); - - return Status::OK(); -} - -// Init of the StorageOp. This is 3 of 3 init. -// This version of the init does not take the schema in it's arguments. It must perform an -// internal handshake with the dataset to produce the schema. Unlike constructor 1, it takes a -// list of files rather than a directory. -Status StorageOp::InitOp(const std::vector &files_list, const std::string &schema_file) { - dataset_file_list_ = files_list; - schema_file_ = schema_file; - - // Storage ops require the internal master/worker connector. create it here - RETURN_IF_NOT_OK(ParallelOp::CreateWorkerConnector(worker_conn_size_)); - - // Get parameter for distribution. - RETURN_IF_NOT_OK(LoadParallelConfig()); - - // Create the storage client. This will read the json file to determine what - // type of client we're creating. - RETURN_IF_NOT_OK(StorageClient::CreateStorageClient(this, schema_file_, &store_client_)); - - // Perform the initial handshake with the storage client to further read the - // dataset info to populate schema info and the number of rows in the client. - RETURN_IF_NOT_OK(store_client_->LoadDatasetLayout()); - - // Pull out the number of rows from the client and save into the op. - num_rows_ = store_client_->num_rows(); - - return Status::OK(); -} - -// Private helper method. This one encapsulates some common construction/reset tasks and is -// designed to be re-entrant so that you can re-init a previously used StorageOp without needing -// to redo the storage client handshake. -Status StorageOp::init() { - // First a sanity check to make sure the StorageClient initialization has done the proper - // handshake and initialized both the schema and the number of rows for the dataset. - const DataSchema *the_schema = store_client_->schema(); - if (the_schema->NumColumns() == 0 || num_rows_ == 0) { - return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, - "Storage client did not run handshake to init schema and number of rows."); - } - - // Now that we have schema, generate the column name map (base class field) - for (int32_t i = 0; i < the_schema->NumColumns(); ++i) { - column_name_id_map_[the_schema->column(i).name()] = i; - } - - // If the data buffer vector is not empty, then we may be redoing a scan again after a repeat. - // In such a case, we have vector of nullptrs that used to hold the buffers. get rid of this - // so we can reuse the vector. - if (!data_buffers_.empty()) { - data_buffers_.clear(); - } - int32_t buffers_needed; - - // We have our range of row id's, but we must carve this up into buffers now so that - // each buffer holds a subset of the overall range. - // Instantiate the buffers now, but this does not actually drive a load of actual - // data at this point. - - // First, compute how many buffers we would need to accomplish rowsPerBuffer - buffers_needed = this->num_rows() / rows_per_buffer_; - - // If an extra partial buffer is needed, adjust for that. - if (this->num_rows() % rows_per_buffer_ != 0) { - buffers_needed++; - } - MS_LOG(DEBUG) << "Master: Initializing StorageOp. Dataset files dir: " << dataset_files_dir_ << " Dataset type: " - << static_cast::type>(store_client_->schema()->dataset_type()) - << " Dataset schema file: " << schema_file_ << " Number of rows: " << num_rows_ - << " Rows per buffer: " << rows_per_buffer_ << " Num buffers (computed): " << buffers_needed - << " Number of workers: " << num_workers_ << "."; - - // Next, create each buffer in a loop. - int32_t buff_id = 0; - for (buff_id = 0; buff_id < buffers_needed; buff_id++) { - // Create a new data buffer as a base class pointer, using the factory method from - // DataBuffer class - std::unique_ptr new_data_buffer; - RETURN_IF_NOT_OK(DataBuffer::CreateDataBuffer(buff_id, store_client_, &new_data_buffer)); - - // Insert the buffer into our vector - data_buffers_.push_back(std::move(new_data_buffer)); - } - - // Instantiate the action queues. If this was a re-entrant call then these already exist. - // We cannot drop and recreate them because there are threads waiting on them currently. - // They should be empty anyway in a reset codepath - if (action_queue_.empty()) { - // The max size of these queues should ensure they will never get full and they support - // precisely the amount of data that we know they will hold (the total number of buffers). - // There needs to be one queue for each worker, to support the Connector design for how - // data will be fetched and pushed into a Connector in parallel. - // - // Say the total buffers is 5, and we have 2 workers. - // To support this, we'd need 1 queue of size 2 and the other of size 3. - // For simplicity, we'll make both of them 3 so they are the same size. - int32_t action_queue_size = (buffers_needed / num_workers_) + 1; - for (int32_t i = 0; i < num_workers_; ++i) { - auto new_queue = std::make_unique>(action_queue_size); - action_queue_.push_back(std::move(new_queue)); - } - } - - // Extract the list of buffer id's from the vector and use this as our starting action - // queue of buffers. - RETURN_IF_NOT_OK(this->FillActionQueue(false)); - return Status::OK(); -} - -// Destructor -StorageOp::~StorageOp() {} - -// A print method typically used for debugging -void StorageOp::Print(std::ostream &out, bool show_all) const { - // Always show the id and name as first line regardless if this summary or detailed print - out << "(" << std::setw(2) << operator_id_ << ") :"; - if (!show_all) { - // Call the super class for displaying any common 1-liner info - ParallelOp::Print(out, show_all); - // Then show any custom derived-internal 1-liner info for this op - out << "\n"; - } else { - // Call the super class for displaying any common detailed info - ParallelOp::Print(out, show_all); - // Then show any custom derived-internal stuff - out << "\nDetailed operator printing has not been implemented for this op.\n\n"; - } -} - -// Private helper method. This one posts a control indicator for each worker thread to consume -// from the action queue. When the worker pops this msg, it will shut itself down gracefully. -Status StorageOp::PostEndOfData() { - MS_LOG(DEBUG) << "Master: Processed all of the buffers. Send end-of-data message to workers."; - - // For each worker we add the message so that they can all get the memo - for (int32_t i = 0; i < num_workers_; ++i) { - RETURN_IF_NOT_OK(action_queue_[i]->Add(kEndOfActions)); - } - return Status::OK(); -} - -// Private helper method. This one populates the action queue with the list of buffer ids. -Status StorageOp::FillActionQueue(bool randomize) { - // We only support adding the new list of id's to the queue if we are sure the old list - // of actions is already done. This might change in the future though - for (int32_t i = 0; i < num_workers_; ++i) { - if (!(action_queue_[i]->empty())) { - return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, - "Attempt to get buffer id's into a queue, but the queue not empty!"); - } - } - if (!data_buffers_.empty()) { - // Add buffer id's to the queue. Buffer id's in our vector are just numbers from 0 up, so - // basically just a list of consecutive numbers starting from 0 (incremented by 1). - // If randomize is requested, the list of id's will be jumbled up (so not consecutive - // order) - if (!randomize) { - // Round robin of filling each worker with the buffer id's - int32_t curr_worker = 0; - for (int32_t i = 0; i < data_buffers_.size(); ++i) { - RETURN_IF_NOT_OK(action_queue_[curr_worker]->Add(i)); - curr_worker++; - if (curr_worker == num_workers_) { - curr_worker = 0; - } - } - } else { - std::vector random_ids; - int32_t i; - for (i = 0; i < data_buffers_.size(); ++i) { - random_ids.push_back(i); - } - uint32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); - std::shuffle(random_ids.begin(), random_ids.end(), std::default_random_engine(seed)); - - // Round robin of filling each worker with the buffer id's from randomized list - int32_t curr_worker = 0; - for (i = 0; i < random_ids.size(); ++i) { - RETURN_IF_NOT_OK(action_queue_[curr_worker]->Add(random_ids[i])); - curr_worker++; - if (curr_worker == num_workers_) { - curr_worker = 0; - } - } - } - } - return Status::OK(); -} - -// The entry point code for when workers are launched. -// Given the input bufferId, it returns a shared_ptr to that buffer back to you by driving a -// load operation. This function is intended to be run by worker threads, when they are -// populating the memory with the actual data of the buffer. -Status StorageOp::GetBuffer(int32_t buffer_id, std::unique_ptr *ptr) { - if (!data_buffers_.empty()) { - if (static_cast(buffer_id) >= data_buffers_.size()) { - std::ostringstream ss; - ss << "Error. Buffer id " << buffer_id << " is out of range."; - std::string err_msg = ss.str(); - RETURN_STATUS_UNEXPECTED(err_msg); - } - - // execute a load operation to fill this buffer (may result in call to storage layers) - RETURN_IF_NOT_OK(data_buffers_[buffer_id]->Load()); - - // Return the buffer - // Important: The share pointer remains counted for the caller as well as locally in the - // mDataBuffers array. Later when the buffer is sent on it's way up the pipeline, the - // shared_ptr in the array will be reset so that the StorageOp will not hang on to old - // buffers that it has already passed up the pipeline. - *ptr = std::move(data_buffers_[buffer_id]); - } else { - RETURN_STATUS_UNEXPECTED("Requested to get a buffer from an empty cache."); - } - return Status::OK(); -} - -// Class functor operator () override. -// All dataset ops operate by launching a thread (see ExecutionTree). This class functor will -// provide the master loop that drives the logic for performing the work -Status StorageOp::operator()() { - // Before we enter our master loop, kick off our workers and assign them to - // use the StorageOp worker entry code. - RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_, std::bind(&StorageOp::WorkerEntry, this, std::placeholders::_1))); - // Handshake with TaskManager to synchronize thread creation - TaskManager::FindMe()->Post(); - int32_t num_buffers_to_fetch = data_buffers_.size(); - - // The storage op is the bottom node in the tree, so it does not listen to an input - // queue from an operator below us. Instead, we'll will read from the internal queue - // that our workers produce into, and then push that into output queue. - bool done = false; - std::unique_ptr fetched_buffer; - while (!done) { - // Get the next buffer. We are single thread master so thread id hard coded to 0 - // on the connector pop. Count this buffer towards our count, and then push - // it up to the output connector. - RETURN_IF_NOT_OK(worker_connector_->PopWithRetry(0, &fetched_buffer)); - buffers_fetched_++; - int32_t buffer_id = fetched_buffer->id(); - - if (buffers_fetched_ == 1) { - num_buffers_to_fetch = static_cast(data_buffers_.size()); - } - - // There should be 2 holders of this buffer currently. We have one in the mDataBuffers - // table, and then ourselves right now with fetchedBuffer. - // Reduce the shared_ptr ref count of this buffer by removing it from the mDataBuffers - // table first before we push the buffer to output connector. - data_buffers_[buffer_id].reset(); - MS_LOG(DEBUG) << "StorageOp master: Consumed buffer " << buffer_id << " from internal worker connector."; - RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(fetched_buffer))); - MS_LOG(DEBUG) << "StorageOp master: pushed buffer " << buffer_id << " to output connector."; - - // Now, check our loop exit conditions and perform appropriate end of data handling if - // we've reached the end of our scan. - if (buffers_fetched_ == num_buffers_to_fetch) { - MS_LOG(DEBUG) << "StorageOp master: Reached end of data."; - - // If we are not inside of a Repeat path in the tree, or we are in a repeat path but - // this was our last repeat, then we do a full quit here with eof control message. - if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) { - // Post the control message to tell the workers to stop waiting on action queue - // because we are done! - RETURN_IF_NOT_OK(this->PostEndOfData()); - std::unique_ptr eoeBuffer = std::make_unique(0, DataBuffer::kDeBFlagEOE); - RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eoeBuffer))); - MS_LOG(DEBUG) << "StorageOp master: Flow end-of-data eof message."; - std::unique_ptr eofBuffer = std::make_unique(0, DataBuffer::kDeBFlagEOF); - RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eofBuffer))); - MS_LOG(DEBUG) << "StorageOp master: Main execution loop complete."; - done = true; // while loop exit - } else { - // We are in a repeat path and it's not the last repeat. - // Flow an end-of-epoch control message up the pipeline. - // RepeatOp above us somewhere in the tree will re-init us with the data to fetch again - // once it gets the end-of-epoch message. - MS_LOG(DEBUG) << "StorageOp master: Flow end-of-epoch eoe message."; - std::unique_ptr eoe_buffer = std::make_unique(0, DataBuffer::kDeBFlagEOE); - RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eoe_buffer))); - - // reset our buffer count and go to loop again. - buffers_fetched_ = 0; - - // This is a bit of a cheat. Only the repeat op should perform resetting actions - // against us (currently). However, if we go to block/wait on the worker_connector_ - // right now before the reset is done (driven from the repeat op), then we end - // up using stale connector index info and blocking on the wrong thing, causing - // invalid order during the next epoch. - // For now then, do a quick reset of just the connector queue so that we block - // at a safe starting point in the connector. - worker_connector_->Reset(); - } - } - } - return Status::OK(); -} - -// The entry point code for when workers are launched. -Status StorageOp::WorkerEntry(int32_t worker_id) { - int32_t next_action_id = 0; - MS_LOG(DEBUG) << "Worker: StorageOp worker entry point."; - - // Handshake with TaskManager to synchronize the creation - TaskManager::FindMe()->Post(); - - // While there is still some actions to perform - RETURN_IF_NOT_OK(action_queue_[worker_id]->PopFront(&next_action_id)); - while (next_action_id != kEndOfActions) { - // Drive a load of this buffer and get a pointer to the buffer after it's loaded in - std::unique_ptr dB; - RETURN_IF_NOT_OK(this->GetBuffer(next_action_id, &dB)); - MS_LOG(DEBUG) << "Worker: Loaded buffer " << next_action_id << "."; - - // Add the buffer to the internal queue for master to consume from later. - // This could end up blocking if the queue is full in which case it waits here - // until the master can drain a buffer off the queue. - RETURN_IF_NOT_OK(worker_connector_->Add(worker_id, std::move(dB))); - MS_LOG(DEBUG) << "Worker: Pushed buffer " << next_action_id << " to internal worker connector."; - - // Get the next action id and loop - RETURN_IF_NOT_OK(action_queue_[worker_id]->PopFront(&next_action_id)); - } - MS_LOG(DEBUG) << "Worker: Received end-of-data message. Worker complete."; - return Status::OK(); -} - -const DataSchema *StorageOp::schema() const { return store_client_->schema(); } - -// Overrides base class reset method. When an operator does a reset, it cleans up any state -// info from it's previous execution and then initializes itself so that it can be executed -// again. -Status StorageOp::Reset() { - RETURN_IF_NOT_OK(ParallelOp::Reset()); // Call our super class reset first. - - // We do not need to redo the handshake with the storage client, since that - // info should be the same as the last time. However there may be stale - // state info in the client from the last execution. The client provides - // a reset method as well to re-initialize. - RETURN_IF_NOT_OK(store_client_->Reset()); - - // init method is re-entrant and will refresh everything. - RETURN_IF_NOT_OK(this->init()); - return Status::OK(); -} - -// Name: LoadParallelConfig -// Description: Load parallel config info from a specific config file. In multi-P cases (or single-P cases), we -// need to know deviceID, rank, device number, shard mode -// , shuffle (or not) and seed to prepare to scatter files. -Status StorageOp::LoadParallelConfig() { - if (data_distribution_file_ == "") { - return Status::OK(); - } - try { - std::ifstream in(data_distribution_file_); - nlohmann::json js; - in >> js; - device_num_ = js.value("deviceNum", 0); - device_id_ = js.value("deviceId", 0); - if (device_num_ == 0 || device_num_ > MAX_INTEGER_INT32) { - RETURN_STATUS_UNEXPECTED("Invalid deviceNum"); - } - if (device_id_ > MAX_INTEGER_INT32 || device_id_ >= device_num_) { - MS_LOG(DEBUG) << "In parallel config file " << data_distribution_file_ << ", wrong deviceID provided."; - RETURN_STATUS_UNEXPECTED("Invalid deviceId"); - } - shard_config_ = js.value("shardConfig", ""); - if (shard_config_ != "ALL" && shard_config_ != "UNIQUE" && shard_config_ != "RANDOM") { - MS_LOG(DEBUG) << "In parallel config file " << data_distribution_file_ << " wrong mShardConfig provided."; - RETURN_STATUS_UNEXPECTED("Invalid shardConfig"); - } - std::string shuffle_str = js.value("shuffle", ""); - if (shuffle_str == "ON") { - shuffle_config_ = true; - } else if (shuffle_str == "OFF") { - shuffle_config_ = false; - } else { - MS_LOG(DEBUG) << "In parallel config file " << data_distribution_file_ - << ", shuffle config is wrong: it's not ON or OFF"; - RETURN_STATUS_UNEXPECTED("Invalid shuffle option"); - } - seed_ = js.value("seed", 0); - if (seed_ > MAX_INTEGER_UINT32) { - RETURN_STATUS_UNEXPECTED("Invalid seed"); - } - } catch (const std::exception &e) { - RETURN_STATUS_UNEXPECTED("Load parallel config failed"); - } - return Status::OK(); -} -} // namespace dataset -} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/storage_op.h b/mindspore/ccsrc/dataset/engine/datasetops/source/storage_op.h deleted file mode 100644 index 9334addc34..0000000000 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/storage_op.h +++ /dev/null @@ -1,389 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef DATASET_ENGINE_DATASETOPS_SOURCE_STORAGE_OP_H_ -#define DATASET_ENGINE_DATASETOPS_SOURCE_STORAGE_OP_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "dataset/engine/data_schema.h" -#include "dataset/engine/datasetops/parallel_op.h" -#include "dataset/util/status.h" - -namespace mindspore { -namespace dataset { -// Forward declares -template -class Queue; - -// A type for a container of DataBuffer shared_ptr's -using DataBuffers = std::vector>; - -// A type for the queue of buffer id's for workers to fetch. -using ActionQueue = std::vector>>; - -// Forward declare -class DataBuffer; - -class StorageClient; - -class StorageOp : public ParallelOp { - public: - // The nested builder class inside of the StorageOp is used to help manage all of the arguments - // for constructing it. Use the builder by setting each argument with the provided set methods, - // and then finally call the build method to execute the actual construction. - class Builder { - public: - // Builder constructor. Creates the builder object. - // @note No default args - // @return This is a constructor. - Builder(); - - // Default destructor - ~Builder() = default; - - // Setter method. - // @return Builder setter method returns reference to the builder. - Builder &SetNumRows(int num_rows) { - build_num_rows_ = num_rows; - return *this; - } - - // Setter method. - // @return Builder setter method returns reference to the builder. - Builder &SetRowsPerBuffer(int rows_per_buffer) { - build_rows_per_buffer_ = rows_per_buffer; - return *this; - } - - // Setter method. - // @return Builder setter method returns reference to the builder. - Builder &SetSchema(std::unique_ptr schema) { - build_schema_ = std::move(schema); - return *this; - } - - // Setter method. - // @return Builder setter method returns reference to the builder. - Builder &SetNumWorkers(int32_t num_workers) { - build_num_workers_ = num_workers; - return *this; - } - - // Setter method. - // @return Builder setter method returns reference to the builder. - Builder &SetWorkerConnectorSize(int32_t connector_size) { - build_worker_connector_size_ = connector_size; - return *this; - } - - // Setter method. - // @return Builder setter method returns reference to the builder. - Builder &SetOpConnectorSize(int32_t connector_size) { - build_op_connector_size_ = connector_size; - return *this; - } - - // Setter method. - // @return Builder setter method returns reference to the builder. - Builder &SetSchemaDir(const std::string &schema_dir) { - build_schema_file_ = schema_dir + "/datasetSchema.json"; - return *this; - } - - // Setter method. - // @return Builder setter method returns reference to the builder. - Builder &SetSchemaFile(const std::string &schema_file) { - build_schema_file_ = schema_file; - return *this; - } - - // Setter method. - // @return Builder setter method returns reference to the builder. - Builder &SetDatasetFilesDir(const std::string &files_dir) { - build_dataset_files_dir_ = files_dir; - return *this; - } - - // Setter method. - // @return Builder setter method returns reference to the builder. - Builder &SetDatasetFileList(const std::vector &file_list) { - build_dataset_file_list_ = file_list; - return *this; - } - - // Setter method. - // @return Builder setter method returns reference to the builder. - Builder &SetColumnsToLoad(const std::vector &columns) { - build_columns_to_load_ = columns; - return *this; - } - - // Setter method. - // @return Builder setter method returns reference to the builder. - Builder &SetDataDistributionFile(const std::string &data_distribution_file) { - build_data_distribution_file_ = data_distribution_file; - return *this; - } - - // Setter method. - // @return Builder setter method returns reference to the builder. - Builder &setLabelsFileName(const std::string &labels_file_name) { - build_labels_file_name_ = labels_file_name; - return *this; - } - - // Setter method. - // @return Builder setter method returns reference to the builder. - Builder &SetDatasetUsage(const std::string &dataset_usage) { - build_dataset_usage_ = dataset_usage; - return *this; - } - - // Setter method. - // @return Builder setter method returns reference to the builder. - Builder &SetBatchSize(int32_t batch_size) { - build_batch_size_ = batch_size; - return *this; - } - - // Setter method. - // @return Builder setter method returns reference to the builder. - Builder &SetDropRemainder(bool drop_remainder) { - build_drop_remainder_ = drop_remainder; - return *this; - } - - // The builder "build" method creates the final object. - // @param shared_ptr to the new StorageOp object - // @return Status - The error code return - Status Build(std::shared_ptr *); - - private: - // The builder saves all StorageOp construction arguments internally. - // The following are the arguments. - std::string build_dataset_files_dir_; - std::string build_schema_file_; - int32_t build_num_rows_; - std::string build_data_distribution_file_; - int32_t build_rows_per_buffer_; - int32_t build_worker_connector_size_; - int32_t build_num_workers_; - int32_t build_op_connector_size_; - std::unique_ptr build_schema_; - std::vector build_dataset_file_list_; - std::vector build_columns_to_load_; - std::string build_labels_file_name_; - std::string build_dataset_usage_; - int32_t build_batch_size_; - bool build_drop_remainder_; - }; - - // Constructor of the StorageOp. - // @note The builder class should be used to call it - // @param num_workers - The number of workers for the op - // @param worker_connector_size - The internal connector size between workers and master - // @param rows_per_buffer - The requested number of rows per buffer - // @param op_connector_size - The output connector queue size - // @param columns_to_load - The list of columns to use (column name) - StorageOp(int32_t num_workers, int32_t worker_connector_size, int32_t rows_per_buffer, int32_t op_connector_size, - std::vector columns_to_load, std::string data_distribution_file, int32_t batch_size, - bool drop_remainder); - - // Init the StorageOp. This is 1 of 3 init. - // This version of the init does not take the schema in it's arguments. It must perform an - // internal handshake with the dataset to produce the schema. - // @note The builder class should be used to call it - // @param dataset_files_dir - The directory that has the dataset files - // @param schema_file - The schema file for providing column info - Status InitOp(const std::string &dataset_files_dir, const std::string &schema_file, - const std::string &labels_file_name, const std::string &dataset_usage); - - // Init the StorageOp. This is 2 of 3 init. - // This version of the init allows the user to input the schema and other dataset properties rather - // than get it from the dataset itself. - // @note The builder class should be used to call it - // @param num_rows - The number of rows in the dataset - // @param dataset_files_dir - The directory that has the dataset files - // @param data_schema - The schema to use - Status InitOp(int32_t num_rows, const std::string &dataset_files_dir, std::unique_ptr data_schema, - const std::string &labels_file_name, const std::string &dataset_usage); - - // Init the StorageOp. This is 3 of 3 init. - // This version of the init does not take the schema in it's arguments. It must perform an - // internal handshake with the dataset to produce the schema. Unlike constructor 1, it takes a - // list of files rather than a directory. - // @note The builder class should be used to call it - // @param files_list - The list of files to use for the dataset - // @param schema_file - The schema file for providing column info - Status InitOp(const std::vector &files_list, const std::string &schema_file); - - // Destructor - ~StorageOp(); - - // A print method typically used for debugging - // @param out - The output stream to write output to - // @param show_all - A bool to control if you want to show all info or just a summary - void Print(std::ostream &out, bool show_all) const override; - - // << Stream output operator overload - // @notes This allows you to write the debug print info using stream operators - // @param out - reference to the output stream being overloaded - // @param storage_op - reference to the StorageOp to display - // @return - the output stream must be returned - friend std::ostream &operator<<(std::ostream &out, const StorageOp &storage_op) { - storage_op.Print(out, false); - return out; - } - - // Class functor operator () override. - // All DatasetOps operate by launching a thread (see ExecutionTree). This class functor will - // provide the master loop that drives the logic for performing the work. - // @return Status - The error code return - Status operator()() override; - - // The entry point code for when workers are launched. - // @param worker_id - The worker id - // @return Status - The error code return - Status WorkerEntry(int32_t worker_id) override; - - // The entry point code for when workers are launched. - // Given the input bufferId, it returns a shared_ptr to that buffer back to you by driving a - // load operation. This function is intended to be run by worker threads, when they are - // populating the memory with the actual data of the buffer. - // @param buffer_id - The buffer id to get. - // @param ptr - Pointer to shared_ptr to the buffer that was loaded in. - // @return Status - The error code return - Status GetBuffer(int32_t buffer_id, std::unique_ptr *ptr); - - // Overrides base class reset method. When an operator does a reset, it cleans up any state - // info from it's previous execution and then initializes itself so that it can be executed - // again. - // @return Status - The error code return - Status Reset() override; - - // Getter method - int32_t num_rows() const { return num_rows_; } - - // Setter method - void set_num_rows(int32_t num_rows) { num_rows_ = num_rows; } - - // Getter method - int32_t rows_per_buffer() const { return rows_per_buffer_; } - - // Setter method - void set_rows_per_buffer(int32_t rows_per_buffer) { rows_per_buffer_ = rows_per_buffer; } - - // Getter method - std::string dataset_files_dir() const { return dataset_files_dir_; } - - // Getter method - std::vector dataset_file_list() const { return dataset_file_list_; } - - // Getter method - std::string schema_file() const { return schema_file_; } - - // Getter method - const DataSchema *schema() const; - - // Getter method - const std::vector columns_to_load() const { return columns_to_load_; } - - // Getter method - std::string data_distribution_file() const { return data_distribution_file_; } - - // Getter method - int32_t device_num() const { return device_num_; } - - // Getter method - int32_t device_id() const { return device_id_; } - - // Getter method - std::string shard_config() const { return shard_config_; } - - // Getter method - uint32_t seed() const { return seed_; } - - // Getter method - bool shuffle_config() const { return shuffle_config_; } - - // Getter method - int32_t num_classes() const { return num_classes_; } - - // Getter method - std::string labels_file_name() const { return labels_file_name_; } - - // Getter method - std::string dataset_usage() const { return dataset_usage_; } - - // Getter method - int32_t batch_size() const { return batch_size_; } - - // Getter method - bool drop_remainder() const { return drop_remainder_; } - - private: - // Private helper method. This one populates the action queue with the list of buffer ids. - // @param randomize - T/F if the id's in the action queue should be randomized or sequential. - Status FillActionQueue(bool randomize); - - // Private helper method. This one encapsulates some common construction/reset tasks and is - // designed to be re-entrant so that you can re-init a previously used StorageOp without needing - // to redo the storage client handshake. - // @return Status - The error code return - Status init(); - - // Private helper method. This one posts a control indicator for each worker thread to consume - // from the action queue. When the worker pops this msg, it will shut itself down gracefully. - // @return Status - The error code return - Status PostEndOfData(); - - Status LoadParallelConfig(); - - DataBuffers data_buffers_; // A vector of pointers to buffers - std::shared_ptr store_client_; // The client for interacting with storage - ActionQueue action_queue_; // The queues of buffer id's for workers to fetch. - int32_t worker_conn_size_; // connector size for internal worker queue - int32_t rows_per_buffer_; // The number of requested rows per buffer. - int32_t num_rows_; // One more than the last row id in the range for this cache - std::string dataset_files_dir_; // The path for the dataset files - std::vector dataset_file_list_; // List of paths to files for the dataset - int32_t buffers_fetched_; // Counter for the buffers that were fetched - std::string schema_file_; // Path to the schema json file - std::vector columns_to_load_; // Columns to load from dataset - std::string data_distribution_file_; // Distribution configuration file - int32_t device_num_; // All device number - int32_t device_id_; // Device id - std::string shard_config_; // ALL UNIQUE RANDOM - uint32_t seed_; // Used for shuffle - bool shuffle_config_; // True or false - std::string labels_file_name_; // File name of labels - int32_t num_classes_; // Label class number - std::string dataset_usage_; // train/eval/inference - int32_t batch_size_; - bool drop_remainder_; -}; -} // namespace dataset -} // namespace mindspore - -#endif // DATASET_ENGINE_DATASETOPS_SOURCE_STORAGE_OP_H_ diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc index e51eb4e00d..26058cc8b8 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc @@ -33,7 +33,11 @@ namespace mindspore { namespace dataset { TextFileOp::Builder::Builder() - : builder_device_id_(0), builder_num_devices_(1), builder_num_samples_(0), builder_shuffle_files_(false) { + : builder_device_id_(0), + builder_num_devices_(1), + builder_total_rows_(0), + builder_shuffle_files_(false), + builder_shuffle_global_(false) { std::shared_ptr config_manager = GlobalContext::config_manager(); builder_num_workers_ = config_manager->num_parallel_workers(); builder_op_connector_size_ = config_manager->op_connector_size(); @@ -43,7 +47,7 @@ TextFileOp::Builder::Builder() Status TextFileOp::Builder::ValidateInputs() const { std::string err_msg; - err_msg += builder_num_workers_ <= 0 ? "Number of parallel workers should be greate than 0\n" : ""; + err_msg += builder_num_workers_ <= 0 ? "Number of parallel workers should be greater than 0\n" : ""; err_msg += builder_device_id_ >= builder_num_devices_ || builder_num_devices_ < 1 ? "Wrong sharding configs\n" : ""; return err_msg.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err_msg); } @@ -62,25 +66,27 @@ Status TextFileOp::Builder::Build(std::shared_ptr *op) { builder_schema_->AddColumn(ColDescriptor("text", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); std::shared_ptr text_file_op = std::make_shared( - builder_num_workers_, builder_rows_per_buffer_, builder_num_samples_, builder_worker_connector_size_, + builder_num_workers_, builder_rows_per_buffer_, builder_total_rows_, builder_worker_connector_size_, std::move(builder_schema_), builder_text_files_list_, builder_op_connector_size_, builder_shuffle_files_, - builder_num_devices_, builder_device_id_); + builder_shuffle_global_, builder_num_devices_, builder_device_id_); RETURN_IF_NOT_OK(text_file_op->Init()); *op = std::move(text_file_op); return Status::OK(); } -TextFileOp::TextFileOp(int32_t num_workers, int64_t rows_per_buffer, int64_t num_samples, int32_t worker_connector_size, +TextFileOp::TextFileOp(int32_t num_workers, int64_t rows_per_buffer, int64_t total_rows, int32_t worker_connector_size, std::unique_ptr schema, std::vector text_files_list, - int32_t op_connector_size, bool shuffle_files, int32_t num_device, int32_t device_id) + int32_t op_connector_size, bool shuffle_files, bool shuffle_global, int32_t num_device, + int32_t device_id) : ParallelOp(num_workers, op_connector_size), device_id_(device_id), num_devices_(num_device), rows_per_buffer_(rows_per_buffer), - num_samples_(num_samples), + total_rows_(total_rows), text_files_list_(std::move(text_files_list)), shuffle_files_(shuffle_files), + shuffle_global_(shuffle_global), data_schema_(std::move(schema)), all_num_rows_(0), num_rows_per_shard_(0), @@ -104,9 +110,9 @@ void TextFileOp::Print(std::ostream &out, bool show_all) const { // Call the super class for displaying any common detailed info ParallelOp::Print(out, show_all); // Then show any custom derived-internal stuff - out << "\nRows per buffer: " << rows_per_buffer_ << "\nSample count: " << num_samples_ - << "\nDevice id: " << device_id_ << "\nNumber of devices: " << num_devices_ - << "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no") << "\nText files list:\n"; + out << "\nRows per buffer: " << rows_per_buffer_ << "\nRow count: " << total_rows_ << "\nDevice id: " << device_id_ + << "\nNumber of devices: " << num_devices_ << "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no") + << "\nText files list:\n"; for (int i = 0; i < text_files_list_.size(); ++i) { out << " " << text_files_list_[i]; } @@ -314,8 +320,7 @@ Status TextFileOp::FillIOBlockQueue(const std::vector &i_keys) { break; } } - auto file_it = filename_index_->Search(*it); - file_index.emplace_back(std::pair(file_it.value(), *it)); + file_index.emplace_back(std::pair((*filename_index_)[*it], *it)); } } else { for (auto it = filename_index_->begin(); it != filename_index_->end(); ++it) { @@ -404,9 +409,9 @@ Status TextFileOp::operator()() { RETURN_IF_NOT_OK(jagged_buffer_connector_->Pop(0, &buffer)); if (buffer->eoe()) { workers_done++; - } else if (num_samples_ == 0 || rows_read < num_samples_) { - if ((num_samples_ > 0) && (rows_read + buffer->NumRows() > num_samples_)) { - int64_t rowsToRemove = buffer->NumRows() - (num_samples_ - rows_read); + } else if (total_rows_ == 0 || rows_read < total_rows_) { + if ((total_rows_ > 0) && (rows_read + buffer->NumRows() > total_rows_)) { + int64_t rowsToRemove = buffer->NumRows() - (total_rows_ - rows_read); RETURN_IF_NOT_OK(buffer->SliceOff(rowsToRemove)); } rows_read += buffer->NumRows(); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.h b/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.h index 8b8eda00fe..dd258d914e 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.h @@ -107,8 +107,15 @@ class TextFileOp : public ParallelOp { // Setter method. // @return Builder - setter method returns reference to the builder. - Builder &SetNumSamples(int64_t num_samples) { - builder_num_samples_ = num_samples; + Builder &SetShuffleGlobal(bool shuffle_global) { + builder_shuffle_global_ = shuffle_global; + return *this; + } + + // Setter method. + // @return Builder - setter method returns reference to the builder. + Builder &SetTotalRows(int64_t total_rows) { + builder_total_rows_ = total_rows; return *this; } @@ -118,10 +125,11 @@ class TextFileOp : public ParallelOp { int32_t builder_num_workers_; int32_t builder_op_connector_size_; int64_t builder_rows_per_buffer_; - int64_t builder_num_samples_; + int64_t builder_total_rows_; int32_t builder_worker_connector_size_; std::vector builder_text_files_list_; bool builder_shuffle_files_; + bool builder_shuffle_global_; std::unique_ptr builder_schema_; }; @@ -135,10 +143,11 @@ class TextFileOp : public ParallelOp { // @param op_connector_size - size of each queue in the connector that the child operator pulls from. // @param columns_to_load - the names of the columns to load data from. // @param shuffle_files - whether or not to shuffle the files before reading data. + // @param shuffle_global - whether or not to shuffle the entire dataset. // @param equal_rows_per_shard - whether or not to get equal rows for each process. - TextFileOp(int32_t num_workers, int64_t rows_per_buffer, int64_t num_samples, int32_t worker_connector_size, + TextFileOp(int32_t num_workers, int64_t rows_per_buffer, int64_t total_rows, int32_t worker_connector_size, std::unique_ptr, std::vector text_files_list, int32_t op_connector_size, - bool shuffle_files, int32_t num_devices, int32_t device_id); + bool shuffle_files, bool shuffle_global, int32_t num_devices, int32_t device_id); // Default destructor ~TextFileOp() = default; @@ -169,6 +178,18 @@ class TextFileOp : public ParallelOp { // @return Status - the error coed returned. static Status CountAllFileRows(const std::vector &files, int64_t *count); + // Op name getter + // @return Name of the current Op + std::string Name() const override { return "TextFileOp"; } + + // File names getter + // @return Vector of the input file names + std::vector FileNames() { return text_files_list_; } + + // Global shuffle flag getter + // @return Bool - whether this Op requires global shuffle + bool RequireGlobalShuffle() { return shuffle_global_; } + private: // The entry point for when workers are launched. // @param worker_id - the id of the worker that is executing this function. @@ -246,9 +267,10 @@ class TextFileOp : public ParallelOp { int32_t device_id_; int32_t num_devices_; int64_t rows_per_buffer_; - int64_t num_samples_; + int64_t total_rows_; std::vector text_files_list_; bool shuffle_files_; + bool shuffle_global_; std::unique_ptr data_schema_; int64_t all_num_rows_; int64_t num_rows_per_shard_; diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_buffer.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_buffer.cc deleted file mode 100644 index 8803c3f040..0000000000 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_buffer.cc +++ /dev/null @@ -1,326 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "dataset/engine/datasetops/source/tf_buffer.h" -#include -#include -#include -#include -#include - -#include "common/utils.h" -#include "utils/log_adapter.h" - -#include "dataset/engine/datasetops/source/tf_client.h" -#include "dataset/core/data_type.h" -#include "dataset/engine/datasetops/source/storage_client.h" -#include "dataset/engine/data_schema.h" - -namespace mindspore { -namespace dataset { -// constructor -TFBuffer::TFBuffer( - uint32_t id, // In: The id for this buffer - BufferFlags flags, // In: The flags for this buffer - const std::shared_ptr &storage_client) // In: Storage client that is related to this buffer type - : DataBuffer(id, flags), storage_client_(storage_client) {} - -// destructor -TFBuffer::~TFBuffer() {} - -// Name: print() -// Description: A function that prints info -void TFBuffer::Print(std::ostream &out, // In: The output stream to print to - bool show_all) const { // In: T/F if it should print everything - out << "TFBuffer print\n"; - - // Call base class printer - DataBuffer::Print(out, show_all); -} - -// Name: load() -// Description: populates the DataBuffer with data -// Overrides base-class method. -Status TFBuffer::Load() { - const DataSchema *the_schema = storage_client_->schema(); - uint32_t num_columns = the_schema->NumColumns(); - uint32_t num_rows_requested = storage_client_->rows_per_buffer(); - uint32_t remaining_rows = storage_client_->num_rows() > buffer_id_ * storage_client_->rows_per_buffer() - ? storage_client_->num_rows() - buffer_id_ * storage_client_->rows_per_buffer() - : 0; - if (remaining_rows < num_rows_requested) { - num_rows_requested = remaining_rows; - } - - // Construct the Tensor table for this buffer. - tensor_table_ = std::make_unique(); - - // At each position in the tensor table, instantiate the shared pointer to it's Tensor. - uint32_t row = 0; - while (row < num_rows_requested && (cur_reader_.peek() != EOF || storage_client_->IsMoreData(buffer_id_))) { - TensorRow new_row; - - // Read the data from storage into a tf_file format - dataengine::Example tf_file; - RETURN_IF_NOT_OK(ParseSingleExample(&tf_file)); - for (uint32_t col = 0; col < num_columns; ++col) { - std::shared_ptr new_t; - const ColDescriptor current_col = the_schema->column(col); - const dataengine::Features &example_features = tf_file.features(); - const google::protobuf::Map &feature_map = example_features.feature(); - const dataengine::Feature &column_values_list = feature_map.at(current_col.name()); - const dataengine::Feature::KindCase column_list_type = column_values_list.kind_case(); - RETURN_IF_NOT_OK(LoadFeature(column_list_type, column_values_list, current_col, &new_t)); - - // Add the column to the current tensor row - new_row.push_back(std::move(new_t)); - } - - // Add the new row of tensors to the end of our tensor table - tensor_table_->push_back(new_row); - row++; - } - cur_reader_.close(); - return Status::OK(); -} - -// Name: ParseSingleExample() -// Description: Drives the calls to TFClient for fetching the tf_file info from -// the tf_file files. Returns a single row of data from the tf_file -// files. -Status TFBuffer::ParseSingleExample(dataengine::Example *ptr) { - if (cur_reader_.peek() == EOF) { - auto client = std::dynamic_pointer_cast(storage_client_); - if (client == nullptr) { - std::string errMsg = "Unexpected storage client type for TFBuffer"; - RETURN_STATUS_UNEXPECTED(errMsg); - } - RETURN_IF_NOT_OK(client->NextFileInfo(buffer_id_, &cur_f_info_)); - cur_reader_.close(); - cur_reader_.open(cur_f_info_.fileName); - // Seek to the offset - (void)cur_reader_.seekg(static_cast(cur_f_info_.startOffset)); - MS_LOG(DEBUG) << "got new file " << cur_f_info_.fileName << "."; - } - - // one record in tf_file looks like: - // Format of a single record: - // uint64 length - // uint32 masked crc of length - // byte data[length] - // uint32 masked crc of data - // read length - if (cur_reader_.peek() == EOF) { - MS_LOG(ERROR) << "ParseSingleExample failed"; - } - - dataengine::Example tf_file; - try { - uint64_t record_length = 0; - (void)cur_reader_.read(reinterpret_cast(&record_length), static_cast(sizeof(uint64_t))); - - // ignore crc header - (void)cur_reader_.ignore(static_cast(sizeof(uint32_t))); - - // read serialized Example - std::string serialized_example; - serialized_example.resize(record_length); - (void)cur_reader_.read(&serialized_example[0], static_cast(record_length)); - - // ignore crc footer - (void)cur_reader_.ignore(static_cast(sizeof(uint32_t))); - - if (!tf_file.ParseFromString(serialized_example)) { - std::string err_msg = "parse tf_file failed"; - RETURN_STATUS_UNEXPECTED(err_msg); - } - } catch (const std::exception &err) { - std::string err_msg = "Please check if the data file is complete!"; - RETURN_STATUS_UNEXPECTED(err_msg); - } - *ptr = tf_file; - return Status::OK(); -} - -// Name: LoadFeature() -// Description: Given the column type of the tf record and the values list, -// constructs the tensor and returns it. -Status TFBuffer::LoadFeature(const dataengine::Feature::KindCase &column_list_type, - const dataengine::Feature &column_values_list, const ColDescriptor ¤t_col, - std::shared_ptr *out_tensor) { - std::string element_str; // For staging data from protobuf deserialization - std::unique_ptr int_array; // For staging data from protobuf deserialization - std::unique_ptr float_array; // For staging data from protobuf deserialization - const unsigned char *data_ptr = nullptr; // Generic pointer used for populating the Tensor - // This variable will point into the above staging - // variables. - uint32_t num_elements = 0; // Generic counter used for setting shape attributes - - // Depending on the type of data from the tf_file, we want to extract 2 things: - // 1) A pointer to the data as a const unsigned char * - // 2) The number of elements of the data - // After those are determined, we can then build the tensor to represent this data. - - switch (column_list_type) { - // CASE : TF record type: kBytesList - case dataengine::Feature::KindCase::kBytesList: { - RETURN_IF_NOT_OK(LoadBytesList(current_col, column_values_list, &element_str)); - - // Get the const pointer representation of this data, and the number of elements - // (number of bytes) for this tensor. - data_ptr = reinterpret_cast(common::SafeCStr(element_str)); - num_elements = element_str.length(); - break; - } - - // CASE : TF record type: kFloatList - case dataengine::Feature::KindCase::kFloatList: { - RETURN_IF_NOT_OK(LoadFloatList(current_col, column_values_list, &num_elements, &float_array)); - - data_ptr = reinterpret_cast(float_array.get()); - break; - } - - // CASE : TF record type: kInt64List - case dataengine::Feature::KindCase::kInt64List: { - RETURN_IF_NOT_OK(LoadIntList(current_col, column_values_list, &num_elements, &int_array)); - - data_ptr = reinterpret_cast(int_array.get()); - break; - } - case dataengine::Feature::KindCase::KIND_NOT_SET: { - std::string errMsg = "tf_file column list type enum is KIND_NOT_SET"; - RETURN_STATUS_UNEXPECTED(errMsg); - } - default: { - std::string errMsg = "tf_file column list type enum does not match any known DE type"; - RETURN_STATUS_UNEXPECTED(errMsg); - } - } - - // At this point we have a raw pointer to the data, and we have the number of elements. - // Along with the tensor implementation type and the data type from the schema, we - // enough info to construct the Tensor for it. - TensorShape current_shape = TensorShape::CreateUnknownRankShape(); - RETURN_IF_NOT_OK(CreateTensorShapeForColumn(current_col, num_elements, ¤t_shape)); - - // Now, create this tensor directly into the appropriate slot in our tensor - // table. - RETURN_IF_NOT_OK( - Tensor::CreateTensor(out_tensor, current_col.tensorImpl(), current_shape, current_col.type(), data_ptr)); - - return Status::OK(); -} - -Status TFBuffer::LoadBytesList(const ColDescriptor ¤t_col, const dataengine::Feature &column_values_list, - std::string *element_str) { - // kBytesList can map to the following DE types ONLY! - // DE_UINT8, DE_INT8 - // Must be single byte type for each element! - if (current_col.type() != DataType::DE_UINT8 && current_col.type() != DataType::DE_INT8) { - std::string err_msg = "Invalid datatype for Tensor at column: " + current_col.name(); - RETURN_STATUS_UNEXPECTED(err_msg); - } - const dataengine::BytesList &bytes_list = column_values_list.bytes_list(); - - // A bytesList is a special case where the entire list of data can be - // deserialized into a single string. For example, it is not a list - // of bytes, it is a list of strings, where each string represents - // a list of bytes (this is different from the other cases like IntList etc) - // As such, if there is more than one string in this list, that is invalid. - if (bytes_list.value_size() > 1) { - std::string err_msg = "Bytes list contains more than one element for column: " + current_col.name(); - RETURN_STATUS_UNEXPECTED(err_msg); - } - - // Extract the string that contains the bytes we need. Position 0 is the only - // valid string here. - *element_str = bytes_list.value(0); - - return Status::OK(); -} - -Status TFBuffer::LoadFloatList(const ColDescriptor ¤t_col, const dataengine::Feature &column_values_list, - uint32_t *num_elements, std::unique_ptr *float_array) { - // KFloatList can only map to DE types: - // DE_FLOAT32 - if (current_col.type() != DataType::DE_FLOAT32) { - std::string err_msg = "Invalid datatype for Tensor at column: " + current_col.name(); - RETURN_STATUS_UNEXPECTED(err_msg); - } - const dataengine::FloatList &float_list = column_values_list.float_list(); - - // Identify how many values we have and then create a local array of these - // to deserialize into - *num_elements = float_list.value_size(); - *float_array = std::make_unique(*num_elements); - for (int i = 0; i < float_list.value_size(); i++) { - (*float_array)[i] = float_list.value(i); - } - - return Status::OK(); -} - -Status TFBuffer::LoadIntList(const ColDescriptor ¤t_col, const dataengine::Feature &column_values_list, - uint32_t *num_elements, std::unique_ptr *int_array) { - // KInt64List can only map to DE types: - // DE_UINT64, DE_INT64, DE_UINT32, DE_INT32, DE_UINT16, DE_INT16, DE_UINT8, DE_INT8 - if (!(current_col.type().IsInt())) { - std::string err_msg = "Invalid datatype/rank for column label in TFBuffer."; - RETURN_STATUS_UNEXPECTED(err_msg); - } - - const dataengine::Int64List &int64_list = column_values_list.int64_list(); - - // Identify how many values we have and then create a local array of these - // to deserialize into - *num_elements = int64_list.value_size(); - *int_array = std::make_unique(*num_elements); - for (int i = 0; i < int64_list.value_size(); i++) { - (*int_array)[i] = int64_list.value(i); - } - - return Status::OK(); -} - -Status TFBuffer::CreateTensorShapeForColumn(const ColDescriptor ¤t_col, uint32_t num_elements, - TensorShape *current_shape) { - // If the shape is assigned by user, we have an assumption that the data is - // already in the appropriate format that we can copy into the Tensor as-is. - if (current_col.hasShape()) { - *current_shape = current_col.shape(); - } else if (current_col.rank() == 1) { - // If shape was not given, then we support 2 possible shapes. - // 1) It's a scalar (rank 0), in which case the shape is empty but we need to flag - // it as a scalar value (empty shape but has a single value) - // 2) It's a rank 1 shape, and the dimension value for that single dimension will - // be comprised of the entire bytes-size of the input data. - *current_shape = TensorShape({num_elements}); - } else if (current_col.rank() == 0) { - // Make this shape into a single value scalar. - *current_shape = TensorShape::CreateScalar(); - } else if (current_col.rank() > 1) { - // All other ranks, except for 0, are invalid because we cannot guess - // what the shape will be. For example, if we have rank 3 and 12 bytes - // of data, is it shape {2,2,3} or is it {2,6,1}. We can't guess at - // the shape dimensions. - const std::string kErrMsg = "Invalid rank (rank>1) for dynamic shape construction. Specify shape in schema."; - RETURN_STATUS_UNEXPECTED(kErrMsg); - } - - return Status::OK(); -} -} // namespace dataset -} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_buffer.h b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_buffer.h deleted file mode 100644 index 389f4a76d9..0000000000 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_buffer.h +++ /dev/null @@ -1,91 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef DATASET_ENGINE_DATASETOPS_SOURCE_TF_BUFFER_H_ -#define DATASET_ENGINE_DATASETOPS_SOURCE_TF_BUFFER_H_ - -#include -#include -#include -#include -#include "dataset/engine/data_buffer.h" -#include "proto/example.pb.h" -#include "dataset/engine/datasetops/source/tf_client.h" - -namespace mindspore { -namespace dataset { -// This TFBuffer is the buffer type for dealing with tf record data. -class TFBuffer : public DataBuffer { - public: - // constructor - TFBuffer(uint32_t id, // In: The id for this buffer - DataBuffer::BufferFlags flags, // In: The flags for this buffer - const std::shared_ptr - &storage_client); // In: The storage client that is related to this buffer type - - // destructor - ~TFBuffer() override; - - // Name: print() - // Description: A function that prints info - void Print(std::ostream &out, // In: The output stream to print to - bool show_all) const override; // In: T/F if it should print everything - - // Provide stream operator for displaying it - friend std::ostream &operator<<(std::ostream &out, const TFBuffer &tf_buffer) { - tf_buffer.Print(out, false); // Show meta info only - return out; - } - - // Name: load() - // Description: populates the DataBuffer with data. - // Overrides base-class method. - Status Load() override; - - private: - std::ifstream cur_reader_; - FileInfo cur_f_info_; - - std::shared_ptr storage_client_; // The storage client for populating the buffer initially. - - // Name: ParseSingleExample() - // Description: Drives the calls to TFClient for fetching the tf_file info from - // the tf_file files. Returns a single row of data from the tf_file - // files. - Status ParseSingleExample(dataengine::Example *ptr); - - // Name: LoadFeature() - // Description: Given the column type of the tf record and the values list, - // constructs the tensor and returns it. - Status LoadFeature(const dataengine::Feature::KindCase &column_list_type, - const dataengine::Feature &column_values_list, const ColDescriptor ¤t_col, - std::shared_ptr *out_tensor); - - Status LoadBytesList(const ColDescriptor ¤t_col, const dataengine::Feature &column_values_list, - std::string *element_str); - - Status LoadFloatList(const ColDescriptor ¤t_col, const dataengine::Feature &column_values_list, - uint32_t *num_elements, std::unique_ptr *float_array); - - Status LoadIntList(const ColDescriptor ¤t_col, const dataengine::Feature &column_values_list, - uint32_t *num_elements, std::unique_ptr *int_array); - - Status CreateTensorShapeForColumn(const ColDescriptor ¤t_col, uint32_t num_elements, - TensorShape *current_shape); -}; -} // namespace dataset -} // namespace mindspore - -#endif // DATASET_ENGINE_DATASETOPS_SOURCE_TF_BUFFER_H_ diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_client.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_client.cc deleted file mode 100644 index 9e8cd67ae6..0000000000 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_client.cc +++ /dev/null @@ -1,376 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dataset/engine/datasetops/source/tf_client.h" - -#include -#include -#include -#include -#include -#include - -#include "common/utils.h" -#include "proto/example.pb.h" -#include "dataset/engine/datasetops/source/storage_client.h" -#include "dataset/util/path.h" -#include "dataset/util/status.h" -#include "dataset/engine/datasetops/source/storage_op.h" -#include "utils/log_adapter.h" - -namespace mindspore { -namespace dataset { -// Name: Constructor -// Description: Creates the TFClient. -TFClient::TFClient(std::unique_ptr schema, // In: The schema for this storage client. - StorageOp *so) // In: The StorageOp that's using this client - : StorageClient(std::move(schema), so), - rows_per_buffer_(so->rows_per_buffer()), - random_seed_generator_(so->seed()), - random_seed_distribution_(0, std::numeric_limits::max()), - rows_per_shard_(0) {} - -Status TFClient::Init() { - // Initialize queue to hold the tf file names - const std::string kExtensionData = ".data"; - const std::string kExtensionTF = ".tfrecord"; - bool schema_init = false; - if (!storage_op_->dataset_files_dir().empty()) { - MS_LOG(DEBUG) << "Reading dataset using datasetPath."; - Path data_set_directory(storage_op_->dataset_files_dir()); - auto dirIt = Path::DirIterator::OpenDirectory(&data_set_directory); - if (dirIt) { - while (dirIt->hasNext()) { - Path file = dirIt->next(); - std::string filename = file.toString(); - if ((file.Extension() == kExtensionData) || (file.Extension() == kExtensionTF)) { - const std::vector recs_lengths = ParseTfFileLines(filename); - v_total_file_rows_.emplace_back( - std::pair>(filename, std::move(recs_lengths))); - - // schema - if (!schema_init) { - RETURN_IF_NOT_OK(ParseTfFileSchema(filename)); - schema_init = true; - } - MS_LOG(INFO) << "found tf file: " << filename << ", num rows " << recs_lengths.size() << "."; - } - } - } else { - RETURN_STATUS_UNEXPECTED("Unable to open directory " + data_set_directory.toString()); - } - } else { - MS_LOG(DEBUG) << "Reading dataset using dataset files list."; - for (auto filename : storage_op_->dataset_file_list()) { - const std::vector recs_lengths = ParseTfFileLines(filename); - v_total_file_rows_.emplace_back(std::pair>(filename, std::move(recs_lengths))); - - // schema - if (!schema_init) { - RETURN_IF_NOT_OK(ParseTfFileSchema(filename)); - schema_init = true; - } - MS_LOG(INFO) << "Processed tf file: " << filename << ", num rows " << recs_lengths.size() << "."; - } - } - - RETURN_IF_NOT_OK(CalculateRowsPerDevice()); - std::sort(v_total_file_rows_.begin(), v_total_file_rows_.end()); - RETURN_IF_NOT_OK(ScatterFileRows(static_cast(storage_op_->device_id()), storage_op_->shard_config(), - storage_op_->seed(), storage_op_->shuffle_config())); - - CalculateNumRows(); - InitStateInfo(); - return Status::OK(); -} - -// Sharding will reduce the number of rows. Doing this in constructor as we only want to do this once. -void TFClient::CalculateNumRows() { - num_rows_in_dataset_ = 0; - for (auto rows : file_start_end_offset_) { - num_rows_in_dataset_ += (rows.second - rows.first); - } -} - -Status TFClient::CalculateRowsPerDevice() { - uint64_t num = std::accumulate( - v_total_file_rows_.begin(), v_total_file_rows_.end(), 0, - [](uint64_t value, const std::pair> &a) { return value + a.second.size(); }); - if (static_cast(std::floor(num * 1.0 / storage_op_->device_num())) == 0) { - RETURN_STATUS_UNEXPECTED("Num rows of dataset is less than device number"); - } - rows_per_shard_ = static_cast(std::ceil(num * 1.0 / storage_op_->device_num())); - return Status::OK(); -} - -bool TFClient::ValidFileForShard(const uint64_t file_rows, uint64_t *start_offset, uint64_t *end_offset, - const uint64_t &pre_count, uint32_t device_id) const { - *start_offset = 0; - *end_offset = 0; - bool valid = false; - uint64_t start_index = device_id * rows_per_shard_; - uint64_t end_index = (device_id + 1) * rows_per_shard_; - - // First valid file - if (pre_count <= start_index && pre_count + file_rows > start_index) { - *start_offset = start_index - pre_count; - valid = true; - if (pre_count < end_index && pre_count + file_rows >= end_index) { - *end_offset = end_index - pre_count; - } else { - *end_offset = file_rows; - } - } - - // Second and subsequent files - if (pre_count > start_index && pre_count < end_index) { - *start_offset = 0; - valid = true; - if (pre_count + file_rows >= end_index) { - *end_offset = end_index - pre_count; - } else { - *end_offset = file_rows; - } - } - - return valid; -} - -void TFClient::GetValidFileForShard(const std::vector>> &v_files, - uint32_t device_id) { - uint64_t start_offset = 0; - uint64_t end_offset = 0; - uint64_t pre_count = 0; - bool finish = false; - while (!finish) { - for (const auto &file : v_files) { - if (ValidFileForShard(file.second.size(), &start_offset, &end_offset, pre_count, device_id)) { - std::pair offset(start_offset, end_offset); - file_start_end_offset_.emplace_back(offset); - v_file_rows_.emplace_back(file); - } - pre_count += file.second.size(); - } - if (pre_count < (device_id + 1) * rows_per_shard_) { - finish = false; - } else { - finish = true; - } - } -} - -// Description: Scatter file rows to local single-P according to config info. -// There are 3 modes: ALL, UNIQUE, RANDOM. For UNIQUE and RANDOM mode, shuffleConfig controls -// whether file row vector would be shuffled or not before a new mEopch. -// For ALL mode, temporarily, we deal with epoch in python part. -Status TFClient::ScatterFileRows(uint32_t device_id, const std::string &shard_config, uint32_t seed, - bool shuffle_config) { - if (shard_config == "UNIQUE" || shard_config == "RANDOM") { - std::vector>> v_shuffled_total_file_rows = - ShuffleVector(v_total_file_rows_, seed); - GetValidFileForShard(v_shuffled_total_file_rows, device_id); - if (shuffle_config) { - v_total_file_rows_ = v_shuffled_total_file_rows; - } - } else if (shard_config == "ALL") { - v_file_rows_.insert(v_file_rows_.end(), v_total_file_rows_.begin(), v_total_file_rows_.end()); - if (shuffle_config) { - v_total_file_rows_ = ShuffleVector(v_total_file_rows_, seed); - } - - for (const auto &file : v_file_rows_) { - std::pair offset(0, file.second.size()); - file_start_end_offset_.emplace_back(offset); - } - } else { - RETURN_STATUS_UNEXPECTED("In parallel config file, wrong shuffleConfig or shardConfig provided."); - } - - return Status::OK(); -} - -std::vector>> TFClient::ShuffleVector( - std::vector>> v, uint32_t seed = 1) { - std::default_random_engine randomEngine(seed); - std::shuffle(std::begin(v), std::end(v), randomEngine); - return v; -} - -void TFClient::CalculateStartOffset(const uint64_t start_index, const uint64_t end_index, - const std::vector &vec_length, uint64_t *start_offset) const { - for (size_t i = start_index; i < end_index; i++) { - // Format of a single record: - // uint64 length - // uint32 masked crc of length - // byte data[length] - // uint32 masked crc of data - *start_offset += sizeof(uint64_t) + 2 * sizeof(uint32_t) + vec_length[i]; - } -} - -void TFClient::InitStateInfo() { - uint32_t start_idx = 0, record_num = 0, buffer_id = 0; - uint64_t start_offset = 0; - bool first_buffer = true; - f_info_queue_.emplace_back(QFile()); - std::vector>>::iterator itr = v_file_rows_.begin(); - uint32_t index = 0; - while (itr != v_file_rows_.end()) { - uint32_t file_start_index = file_start_end_offset_[index].first; - uint32_t file_end_index = file_start_end_offset_[index].second; - FileInfo f_info; - f_info.fileName = itr->first; - f_info.startRecordIdx = start_idx > file_start_index ? start_idx : file_start_index; - if (first_buffer && f_info.startRecordIdx != 0) { - CalculateStartOffset(0, f_info.startRecordIdx, itr->second, &start_offset); - start_idx = static_cast(f_info.startRecordIdx); - } - first_buffer = false; - f_info.startOffset = start_offset; - if (start_idx + rows_per_buffer_ - record_num < itr->second.size()) { - uint64_t end_idx = start_idx + rows_per_buffer_ - record_num - 1; - f_info.endRecordIdx = end_idx > (file_end_index - 1) ? (file_end_index - 1) : end_idx; - f_info_queue_[buffer_id].push(f_info); - CalculateStartOffset(start_idx, f_info.endRecordIdx + 1, itr->second, &start_offset); - start_idx = start_idx + rows_per_buffer_ - record_num; - record_num = 0; - buffer_id++; - f_info_queue_.emplace_back(QFile()); - if (end_idx >= file_end_index - 1) { - start_idx = start_offset = 0; - ++itr; - ++index; - } - } else { - f_info.endRecordIdx = itr->second.size() - 1 > file_end_index - 1 ? file_end_index - 1 : itr->second.size() - 1; - f_info_queue_[buffer_id].push(f_info); - if (start_idx + rows_per_buffer_ - record_num == itr->second.size()) { - record_num = start_idx = start_offset = 0; - buffer_id++; - if (itr + 1 != v_file_rows_.end()) { - f_info_queue_.emplace_back(QFile()); - } - } else { - record_num += static_cast(itr->second.size()) - start_idx; - start_idx = start_offset = 0; - } - ++itr; - ++index; - } - } -} - -// Name: Print() -// Description: A function that prints info about the TFClient -void TFClient::Print(std::ostream &out) const { // In: The output stream to print to - out << "TF client."; -} - -std::vector TFClient::ParseTfFileLines(const std::string &filename) { - std::vector recs_lengths; - std::ifstream reader; - reader.open(filename); - while (true) { - if (reader.peek() == EOF) { - reader.close(); - break; - } - - // read length - uint64_t record_length = 0; - (void)reader.read(reinterpret_cast(&record_length), static_cast(sizeof(uint64_t))); - recs_lengths.push_back(record_length); - - // ignore crc header - (void)reader.ignore(static_cast(sizeof(uint32_t))); - - // ignore data length - (void)reader.ignore(static_cast(record_length)); - - // ignore crc footer - (void)reader.ignore(static_cast(sizeof(uint32_t))); - } - return recs_lengths; -} - -Status TFClient::ParseTfFileSchema(const std::string &filename) { - std::ifstream reader; - reader.open(filename); - std::string serialized_example; - // read length - uint64_t record_length = 0; - (void)reader.read(reinterpret_cast(&record_length), static_cast(sizeof(uint64_t))); - - // ignore crc header - (void)reader.ignore(static_cast(sizeof(uint32_t))); - - // read serialized Example - serialized_example.resize(record_length); - (void)reader.read(&serialized_example[0], static_cast(record_length)); - - // ignore crc footer - (void)reader.ignore(static_cast(sizeof(uint32_t))); - - reader.close(); - dataengine::Example tf_file; - if (!tf_file.ParseFromString(serialized_example)) { - std::string err_msg = "parse tf_file failed, file name is " + filename; - RETURN_STATUS_UNEXPECTED(err_msg); - } - const dataengine::Features &example_features = tf_file.features(); - const google::protobuf::Map &feature_map = example_features.feature(); - for (auto it = feature_map.begin(); it != feature_map.end(); ++it) { - col_names_.push_back(it->first); - } - return Status::OK(); -} - -// Name: Reset() -// Description: Resets any state info inside the client back to it's initialized -// state. -Status TFClient::Reset() { - v_file_rows_.clear(); - file_start_end_offset_.clear(); - - uint32_t next_seed = random_seed_distribution_(random_seed_generator_); - RETURN_IF_NOT_OK(ScatterFileRows(static_cast(storage_op_->device_id()), storage_op_->shard_config(), - next_seed, storage_op_->shuffle_config())); - - CalculateNumRows(); - uint32_t num_rows_in_file = 0; - RETURN_IF_NOT_OK(this->numRowsFromFile(num_rows_in_file)); - if (num_rows_in_file < num_rows_in_dataset_) { - num_rows_in_dataset_ = num_rows_in_file; - } - - storage_op_->set_num_rows(static_cast(num_rows_in_dataset_)); - InitStateInfo(); - - return Status::OK(); -} - -Status TFClient::NextFileInfo(uint32_t id, FileInfo *ptr) { - if (f_info_queue_.empty() || id >= f_info_queue_.size() || f_info_queue_[id].empty()) { - RETURN_STATUS_UNEXPECTED("cannot find next FileInfo in mFInfoQueue"); - } - *ptr = f_info_queue_[id].front(); - f_info_queue_[id].pop(); - return Status::OK(); -} - -bool TFClient::IsMoreData(uint32_t id) { return (!f_info_queue_[id].empty()); } -} // namespace dataset -} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_client.h b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_client.h deleted file mode 100644 index 3602f93351..0000000000 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_client.h +++ /dev/null @@ -1,111 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef DATASET_ENGINE_DATASETOPS_SOURCE_TF_CLIENT_H_ -#define DATASET_ENGINE_DATASETOPS_SOURCE_TF_CLIENT_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "proto/example.pb.h" -#include "dataset/engine/datasetops/source/storage_client.h" -#include "dataset/util/status.h" - -struct FileInfo { - std::string fileName; - uint64_t startRecordIdx; - uint64_t endRecordIdx; - uint64_t startOffset; -}; - -using QFile = std::queue; - -namespace mindspore { -namespace dataset { -// forward declares -class DataSchema; -class ParallelOp; - -class TFClient : public StorageClient { - public: - // Name: Constructor - // Description: Creates the TFClient. - TFClient(std::unique_ptr schema, // In: The schema for this storage client. - StorageOp *so); // In: The ParallelOp that's using this client - - ~TFClient() {} - - Status Init() override; - - // Name: Print() - // Description: A function that prints info about the TFClient - void Print(std::ostream &out) const override; // In: The output stream to print to - - std::vector ParseTfFileLines(const std::string &filename); - - Status ParseTfFileSchema(const std::string &filename); - - Status NextFileInfo(uint32_t id, FileInfo *); - - bool IsMoreData(uint32_t id) override; - - // Name: Reset() - // Description: Resets any state info inside the client back to it's initialized - // state. - Status Reset() override; - - Status ScatterFileRows(uint32_t device_id, const std::string &shard_config, uint32_t seed, bool shuffle_config); - - private: - // hardcoded, put this in json schema - // const static int32_t BERT_DATASET_TOTAL_ROWS = 43900; - uint32_t rows_per_buffer_; - std::default_random_engine random_seed_generator_; - std::uniform_int_distribution random_seed_distribution_; - - std::vector>> v_file_rows_; - std::vector>> v_total_file_rows_; - std::vector f_info_queue_; - uint64_t rows_per_shard_; - std::vector> file_start_end_offset_; - - void InitStateInfo(); - - std::vector>> ShuffleVector( - std::vector>> v, uint32_t seed); - - Status CalculateRowsPerDevice(); - - bool ValidFileForShard(const uint64_t file_rows, uint64_t *start_offset, uint64_t *end_offset, - const uint64_t &pre_count, uint32_t device_id) const; - - void CalculateNumRows(); - - void GetValidFileForShard(const std::vector>> &v_files, - uint32_t device_id); - - void CalculateStartOffset(const uint64_t start_index, const uint64_t end_index, - const std::vector &vec_length, uint64_t *start_offset) const; -}; -} // namespace dataset -} // namespace mindspore - -#endif // DATASET_ENGINE_DATASETOPS_SOURCE_TF_CLIENT_H_ diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc index 60adddb4a8..23dce8dc10 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc @@ -15,14 +15,15 @@ */ #include "dataset/engine/datasetops/source/tf_reader_op.h" -#include -#include +#include +#include #include #include #include #include +#include #include -#include +#include #include "proto/example.pb.h" #include "./securec.h" @@ -32,8 +33,6 @@ #include "dataset/engine/connector.h" #include "dataset/engine/data_schema.h" #include "dataset/engine/datasetops/source/io_block.h" -#include "dataset/engine/datasetops/source/storage_client.h" -#include "dataset/engine/datasetops/source/tf_client.h" #include "dataset/engine/db_connector.h" #include "dataset/engine/execution_tree.h" #include "dataset/engine/jagged_connector.h" @@ -56,6 +55,7 @@ TFReaderOp::Builder::Builder() builder_op_connector_size_ = config_manager->op_connector_size(); builder_rows_per_buffer_ = config_manager->rows_per_buffer(); builder_shuffle_files_ = false; + builder_shuffle_global_ = false; builder_data_schema_ = std::make_unique(); } @@ -126,7 +126,8 @@ Status TFReaderOp::Builder::Build(std::shared_ptr *out_tf_reader_op) std::shared_ptr new_tf_reader_op = std::make_shared( builder_num_workers_, builder_worker_connector_size_, builder_rows_per_buffer_, builder_total_rows_, builder_dataset_files_list_, std::move(builder_data_schema_), builder_op_connector_size_, builder_columns_to_load_, - builder_shuffle_files_, builder_num_devices_, builder_device_id_, builder_equal_rows_per_shard_); + builder_shuffle_files_, builder_shuffle_global_, builder_num_devices_, builder_device_id_, + builder_equal_rows_per_shard_); RETURN_IF_NOT_OK(new_tf_reader_op->Init()); *out_tf_reader_op = std::move(new_tf_reader_op); @@ -136,8 +137,8 @@ Status TFReaderOp::Builder::Build(std::shared_ptr *out_tf_reader_op) TFReaderOp::TFReaderOp(int32_t num_workers, int32_t worker_connector_size, int64_t rows_per_buffer, int64_t total_num_rows, std::vector dataset_files_list, std::unique_ptr data_schema, int32_t op_connector_size, - std::vector columns_to_load, bool shuffle_files, int32_t num_device, - int32_t device_id, bool equal_rows_per_shard) + std::vector columns_to_load, bool shuffle_files, bool shuffle_global, + int32_t num_device, int32_t device_id, bool equal_rows_per_shard) : ParallelOp(num_workers, op_connector_size), device_id_(device_id), num_devices_(num_device), @@ -147,6 +148,7 @@ TFReaderOp::TFReaderOp(int32_t num_workers, int32_t worker_connector_size, int64 columns_to_load_(std::move(columns_to_load)), finished_reading_dataset_(false), shuffle_files_(shuffle_files), + shuffle_global_(shuffle_global), data_schema_(std::move(data_schema)), filename_index_(std::make_unique()), load_io_block_queue_(true), @@ -172,7 +174,8 @@ void TFReaderOp::Print(std::ostream &out, bool show_all) const { // Then show any custom derived-internal stuff out << "\nRows per buffer: " << rows_per_buffer_ << "\nTotal rows: " << total_rows_ << "\nDevice id: " << device_id_ << "\nNumber of devices: " << num_devices_ << "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no") - << "\nDataset files list:\n"; + << "\nShuffle global: " << ((shuffle_global_) ? "yes" : "no") + << "\nDataset files list: Size: " << dataset_files_list_.size() << "\n"; for (int i = 0; i < dataset_files_list_.size(); ++i) { out << " " << dataset_files_list_[i]; } @@ -217,7 +220,6 @@ Status TFReaderOp::Init() { // temporary: make size large enough to hold all files + EOE to avoid hangs int32_t safe_queue_size = static_cast(std::ceil(dataset_files_list_.size() / num_workers_)) + 1; io_block_queues_.Init(num_workers_, safe_queue_size); - dataset_files_list_.clear(); // no longer need the original list of files return Status::OK(); } @@ -451,8 +453,7 @@ Status TFReaderOp::FillIOBlockShuffle(const std::vector &i_keys) { } } else { // Do an index lookup using that key to get the filename. - auto file_it = filename_index_->Search(*it); - std::string file_name = file_it.value(); + std::string file_name = (*filename_index_)[*it]; if (NeedPushFileToblockQueue(file_name, &start_offset, &end_offset, pre_count)) { auto ioBlock = std::make_unique(*it, start_offset, end_offset, IOBlock::kDeIoBlockNone); RETURN_IF_NOT_OK(PushIoBlockQueue(queue_index, std::move(ioBlock))); @@ -481,7 +482,7 @@ Status TFReaderOp::FillIOBlockNoShuffle() { int64_t start_offset = 0; int64_t end_offset = 0; bool finish = false; - bool end_of_epoch = true; + bool end_of_epoch = false; while (!finish) { // Iterate over all the keys and add one key to each block. for (auto it = filename_index_->begin(); it != filename_index_->end(); ++it) { @@ -771,53 +772,7 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng // know how many elements there are and the total bytes, create tensor here: TensorShape current_shape = TensorShape::CreateScalar(); RETURN_IF_NOT_OK(current_col.MaterializeTensorShape((*num_elements) * pad_size, ¤t_shape)); - RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, current_col.tensorImpl(), current_shape, current_col.type())); - - // Tensors are lazily allocated, this eagerly allocates memory for the tensor. - unsigned char *current_tensor_addr = (*tensor)->GetMutableBuffer(); - int64_t tensor_bytes_remaining = (*num_elements) * pad_size; - - if (current_tensor_addr == nullptr) { - std::string err_msg = "tensor memory allocation failed"; - RETURN_STATUS_UNEXPECTED(err_msg); - } - - RETURN_IF_NOT_OK(LoadAndPadBytes(current_tensor_addr, bytes_list, tensor_bytes_remaining, pad_size)); - - return Status::OK(); -} - -Status TFReaderOp::LoadAndPadBytes(unsigned char *current_tensor_addr, const dataengine::BytesList &bytes_list, - int64_t tensor_bytes_remaining, int64_t pad_size) { - if (current_tensor_addr == nullptr) { - std::string err_msg = "current_tensor_addr is null"; - RETURN_STATUS_UNEXPECTED(err_msg); - } - - for (int i = 0; i < bytes_list.value_size(); i++) { - // read string data into tensor - const std::string ¤t_element = bytes_list.value(i); - int return_code = - memcpy_s(current_tensor_addr, tensor_bytes_remaining, common::SafeCStr(current_element), current_element.size()); - if (return_code != 0) { - std::string err_msg = "memcpy_s failed when reading bytesList element into Tensor"; - RETURN_STATUS_UNEXPECTED(err_msg); - } - - current_tensor_addr += current_element.size(); - tensor_bytes_remaining -= current_element.size(); - - // pad - int64_t chars_to_pad = pad_size - current_element.size(); - return_code = memset_s(current_tensor_addr, tensor_bytes_remaining, static_cast(' '), chars_to_pad); - if (return_code != 0) { - std::string err_msg = "memset_s failed when padding bytesList in Tensor"; - RETURN_STATUS_UNEXPECTED(err_msg); - } - - current_tensor_addr += chars_to_pad; - tensor_bytes_remaining -= chars_to_pad; - } + RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, bytes_list, current_shape, current_col.type(), pad_size)); return Status::OK(); } @@ -905,7 +860,7 @@ Status TFReaderOp::LoadIntList(const ColDescriptor ¤t_col, const dataengin return Status::OK(); } -Status TFReaderOp::CreateSchema(const std::string tf_file, const std::vector &columns_to_load) { +Status TFReaderOp::CreateSchema(const std::string tf_file, std::vector columns_to_load) { std::ifstream reader; reader.open(tf_file); @@ -926,12 +881,14 @@ Status TFReaderOp::CreateSchema(const std::string tf_file, const std::vector &feature_map = example_features.feature(); - std::vector columns = columns_to_load; - if (columns_to_load.empty()) - (void)std::transform(feature_map.begin(), feature_map.end(), std::back_inserter(columns), + if (columns_to_load.empty()) { + (void)std::transform(feature_map.begin(), feature_map.end(), std::back_inserter(columns_to_load), [](const auto &it) -> std::string { return it.first; }); - for (const auto &curr_col_name : columns) { + std::sort(columns_to_load.begin(), columns_to_load.end()); + } + + for (const auto &curr_col_name : columns_to_load) { auto it = feature_map.find(curr_col_name); if (it == feature_map.end()) { RETURN_STATUS_UNEXPECTED("Failed to find column " + curr_col_name); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.h b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.h index 3dc5ee932e..9c92d6d4be 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.h @@ -146,6 +146,13 @@ class TFReaderOp : public ParallelOp { return *this; } + // Setter method. + // @return Builder - setter method returns reference to the builder. + Builder &SetShuffleGlobal(bool shuffle_global) { + builder_shuffle_global_ = shuffle_global; + return *this; + } + // Setter method. // @return Builder - setter method returns reference to the builder. Builder &SetShardEqualRows(bool shard_equal_rows) { @@ -165,6 +172,7 @@ class TFReaderOp : public ParallelOp { std::vector builder_dataset_files_list_; std::vector builder_columns_to_load_; bool builder_shuffle_files_; + bool builder_shuffle_global_; bool builder_equal_rows_per_shard_; }; @@ -179,11 +187,12 @@ class TFReaderOp : public ParallelOp { // @param op_connector_size - size of each queue in the connector that the child operator pulls from. // @param columns_to_load - the names of the columns to load data from. // @param shuffle_files - whether or not to shuffle the files before reading data. + // @param shuffle_global - whether or not to shuffle the entire dataset. // @param equal_rows_per_shard - whether or not to get equal rows for each process. TFReaderOp(int32_t num_workers, int32_t worker_connector_size, int64_t rows_per_buffer, int64_t total_num_rows, std::vector dataset_files_list, std::unique_ptr data_schema, int32_t op_connector_size, std::vector columns_to_load, bool shuffle_files, - int32_t num_devices, int32_t device_id, bool equal_rows_per_shard); + bool shuffle_global, int32_t num_devices, int32_t device_id, bool equal_rows_per_shard); // Default destructor ~TFReaderOp() = default; @@ -228,6 +237,18 @@ class TFReaderOp : public ParallelOp { // @return - Status of the node visit. Status Accept(NodePass *p, bool *modified) override; + // Op name getter + // @return Name of the current Op + std::string Name() const override { return "TFReaderOp"; } + + // File names getter + // @return Vector of the input file names + std::vector FileNames() { return dataset_files_list_; } + + // Global shuffle flag getter + // @return Bool - whether this Op requires global shuffle + bool RequireGlobalShuffle() { return shuffle_global_; } + private: // The entry point for when workers are launched. // @param worker_id - the id of the worker that is executing this function. @@ -292,17 +313,8 @@ class TFReaderOp : public ParallelOp { // @param column_values_list - the cell that contains the bytes list to read from. // @param elementStr - the string we read the value into. // @return Status - the error code returned. - Status LoadBytesList(const ColDescriptor ¤t_col, const dataengine::Feature &column_values_list, - int32_t *num_elements, std::shared_ptr *tensor); - - // Loads all the strings in bytes_list into the memory at current_tensor_addr. - // @param current_tensor_addr - the memory address to load the strings to. - // @param bytes_list - the list of strings to load. - // @param tensor_bytes_remaining - the number of bytes available for this function to use. - // @param pad_size - number of bytes to pad to. - // @return Status - the error code returned. - Status LoadAndPadBytes(unsigned char *current_tensor_addr, const dataengine::BytesList &bytes_list, - int64_t tensor_bytes_remaining, int64_t pad_size); + static Status LoadBytesList(const ColDescriptor ¤t_col, const dataengine::Feature &column_values_list, + int32_t *num_elements, std::shared_ptr *tensor); // Reads values from a float list // @param current_col - the column descriptor containing the expected shape and type of the data. @@ -335,7 +347,7 @@ class TFReaderOp : public ParallelOp { // Reads one row of data from a tf file and creates a schema based on that row // @return Status - the error code returned. - Status CreateSchema(const std::string tf_file, const std::vector &columns_to_load); + Status CreateSchema(const std::string tf_file, std::vector columns_to_load); // Meant to be called async. Will read files in the range [begin, end) and return the total rows // @param filenames - a list of tf data filenames. @@ -377,6 +389,7 @@ class TFReaderOp : public ParallelOp { std::vector columns_to_load_; bool finished_reading_dataset_; bool shuffle_files_; + bool shuffle_global_; std::unique_ptr data_schema_; std::unique_ptr filename_index_; bool load_io_block_queue_; diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc index d96b3a8872..d3c7ff397f 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc @@ -44,7 +44,7 @@ const char kSegmentationExtension[] = ".png"; const char kAnnotationExtension[] = ".xml"; const char kImageSetsExtension[] = ".txt"; -VOCOp::Builder::Builder() : builder_decode_(false), builder_num_samples_(0), builder_sampler_(nullptr) { +VOCOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr) { std::shared_ptr cfg = GlobalContext::config_manager(); builder_num_workers_ = cfg->num_parallel_workers(); builder_rows_per_buffer_ = cfg->rows_per_buffer(); @@ -55,7 +55,9 @@ VOCOp::Builder::Builder() : builder_decode_(false), builder_num_samples_(0), bui Status VOCOp::Builder::Build(std::shared_ptr *ptr) { RETURN_IF_NOT_OK(SanityCheck()); if (builder_sampler_ == nullptr) { - builder_sampler_ = std::make_shared(); + const int64_t num_samples = 0; + const int64_t start_index = 0; + builder_sampler_ = std::make_shared(start_index, num_samples); } builder_schema_ = std::make_unique(); if (builder_task_type_ == TaskType::Segmentation) { @@ -71,8 +73,7 @@ Status VOCOp::Builder::Build(std::shared_ptr *ptr) { } *ptr = std::make_shared(builder_task_type_, builder_task_mode_, builder_dir_, builder_labels_to_read_, builder_num_workers_, builder_rows_per_buffer_, builder_op_connector_size_, - builder_num_samples_, builder_decode_, std::move(builder_schema_), - std::move(builder_sampler_)); + builder_decode_, std::move(builder_schema_), std::move(builder_sampler_)); return Status::OK(); } @@ -81,20 +82,16 @@ Status VOCOp::Builder::SanityCheck() { std::string err_msg; err_msg += dir.IsDirectory() == false ? "VOC path is invalid or not set\n" : ""; err_msg += builder_num_workers_ <= 0 ? "Num of parallel workers is set to 0 or negative\n" : ""; - err_msg += builder_num_samples_ < 0 ? "num_samples is negative\n" : ""; return err_msg.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err_msg); } VOCOp::VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path, const std::map &class_index, int32_t num_workers, int32_t rows_per_buffer, - int32_t queue_size, int64_t num_samples, bool decode, std::unique_ptr data_schema, - std::shared_ptr sampler) + int32_t queue_size, bool decode, std::unique_ptr data_schema, std::shared_ptr sampler) : ParallelOp(num_workers, queue_size), decode_(decode), row_cnt_(0), buf_cnt_(0), - num_rows_(0), - num_samples_(num_samples), task_type_(task_type), task_mode_(task_mode), folder_path_(folder_path), @@ -112,7 +109,6 @@ VOCOp::VOCOp(const TaskType &task_type, const std::string &task_mode, const std: Status VOCOp::TraverseSampleIds(const std::shared_ptr &sample_ids, std::vector *keys) { for (auto itr = sample_ids->begin(); itr != sample_ids->end(); ++itr) { if ((*itr) > num_rows_) continue; - if (row_cnt_ == num_samples_) break; keys->push_back(*itr); row_cnt_++; if (row_cnt_ % rows_per_buffer_ == 0) { @@ -127,7 +123,7 @@ Status VOCOp::TraverseSampleIds(const std::shared_ptr &sample_ids, std:: Status VOCOp::operator()() { RETURN_IF_NOT_OK(LaunchThreadsAndInitOp()); std::unique_ptr sampler_buffer; - RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer)); + RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); while (true) { std::vector keys; keys.reserve(rows_per_buffer_); @@ -138,7 +134,7 @@ Status VOCOp::operator()() { RETURN_STATUS_UNEXPECTED("Sampler Tensor isn't int64"); } RETURN_IF_NOT_OK(TraverseSampleIds(sample_ids, &keys)); - RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer)); + RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); } if (keys.empty() == false) { RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add( @@ -159,7 +155,7 @@ Status VOCOp::operator()() { io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique(IOBlock::kDeIoBlockFlagEoe))); RETURN_IF_NOT_OK(wp_.Wait()); wp_.Clear(); - RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer)); + RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); } } } @@ -181,23 +177,13 @@ void VOCOp::Print(std::ostream &out, bool show_all) const { } Status VOCOp::Reset() { - RETURN_IF_NOT_OK(sampler_->Reset()); + RETURN_IF_NOT_OK(sampler_->ResetSampler()); row_cnt_ = 0; wp_.Set(); return Status::OK(); } -Status VOCOp::GetNumSamples(int64_t *num) const { - if (num == nullptr || num_rows_ == 0) { - RETURN_STATUS_UNEXPECTED( - "There is no valid data matching the dataset API VOCDataset.Please check file path or dataset API " - "validation first."); - } - (*num) = num_samples_; - return Status::OK(); -} - -Status VOCOp::LoadTensorRow(const std::string &image_id, TensorRow *trow) { +Status VOCOp::LoadTensorRow(row_id_type row_id, const std::string &image_id, TensorRow *trow) { if (task_type_ == TaskType::Segmentation) { std::shared_ptr image, target; const std::string kImageFile = @@ -206,7 +192,7 @@ Status VOCOp::LoadTensorRow(const std::string &image_id, TensorRow *trow) { folder_path_ + std::string(kSegmentationClassFolder) + image_id + std::string(kSegmentationExtension); RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->column(0), &image)); RETURN_IF_NOT_OK(ReadImageToTensor(kTargetFile, data_schema_->column(1), &target)); - (*trow) = {std::move(image), std::move(target)}; + (*trow) = TensorRow(row_id, {std::move(image), std::move(target)}); } else if (task_type_ == TaskType::Detection) { std::shared_ptr image, annotation; const std::string kImageFile = @@ -215,7 +201,7 @@ Status VOCOp::LoadTensorRow(const std::string &image_id, TensorRow *trow) { folder_path_ + std::string(kAnnotationsFolder) + image_id + std::string(kAnnotationExtension); RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->column(0), &image)); RETURN_IF_NOT_OK(ReadAnnotationToTensor(kAnnotationFile, data_schema_->column(1), &annotation)); - (*trow) = {std::move(image), std::move(annotation)}; + (*trow) = TensorRow(row_id, {std::move(image), std::move(annotation)}); } return Status::OK(); } @@ -224,7 +210,7 @@ Status VOCOp::LoadBuffer(const std::vector &keys, std::unique_ptr deq = std::make_unique(); TensorRow trow; for (const uint64_t &key : keys) { - RETURN_IF_NOT_OK(this->LoadTensorRow(image_ids_[key], &trow)); + RETURN_IF_NOT_OK(this->LoadTensorRow(key, image_ids_[key], &trow)); deq->push_back(std::move(trow)); } (*db)->set_tensor_table(std::move(deq)); @@ -280,7 +266,6 @@ Status VOCOp::ParseImageIds() { in_file.close(); image_ids_.shrink_to_fit(); num_rows_ = image_ids_.size(); - num_samples_ = (num_samples_ == 0 || num_samples_ > num_rows_) ? num_rows_ : num_samples_; return Status::OK(); } @@ -305,7 +290,6 @@ Status VOCOp::ParseAnnotationIds() { } num_rows_ = image_ids_.size(); - num_samples_ = (num_samples_ == 0 || num_samples_ > num_rows_) ? num_rows_ : num_samples_; return Status::OK(); } @@ -384,17 +368,7 @@ Status VOCOp::LaunchThreadsAndInitOp() { } Status VOCOp::ReadImageToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr *tensor) { - std::ifstream fs; - fs.open(path, std::ios::binary | std::ios::in); - if (fs.fail()) { - RETURN_STATUS_UNEXPECTED("Fail to open file: " + path); - } - int64_t num_elements = fs.seekg(0, std::ios::end).tellg(); - (void)fs.seekg(0, std::ios::beg); - RETURN_IF_NOT_OK( - Tensor::CreateTensor(tensor, col.tensorImpl(), TensorShape(std::vector(1, num_elements)), col.type())); - (void)fs.read(reinterpret_cast((*tensor)->GetMutableBuffer()), num_elements); - fs.close(); + RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, path)); if (decode_ == true) { Status rc = Decode(*tensor, tensor); if (rc.IsError()) { @@ -432,19 +406,8 @@ Status VOCOp::ReadAnnotationToTensor(const std::string &path, const ColDescripto return Status::OK(); } -// Derived from RandomAccessOp -Status VOCOp::GetNumRowsInDataset(int64_t *num) const { - if (num == nullptr || num_rows_ == 0) { - RETURN_STATUS_UNEXPECTED( - "There is no valid data matching the dataset API VOCDataset.Please check file path or dataset API " - "validation first."); - } - (*num) = num_rows_; - return Status::OK(); -} - Status VOCOp::CountTotalRows(const std::string &dir, const std::string &task_type, const std::string &task_mode, - const py::dict &dict, int64_t numSamples, int64_t *count) { + const py::dict &dict, int64_t *count) { if (task_type == "Detection") { std::map input_class_indexing; for (auto p : dict) { @@ -464,14 +427,12 @@ Status VOCOp::CountTotalRows(const std::string &dir, const std::string &task_typ RETURN_IF_NOT_OK(op->ParseImageIds()); *count = static_cast(op->image_ids_.size()); } - *count = (numSamples == 0 || *count < numSamples) ? *count : numSamples; return Status::OK(); } Status VOCOp::GetClassIndexing(const std::string &dir, const std::string &task_type, const std::string &task_mode, - const py::dict &dict, int64_t numSamples, - std::map *output_class_indexing) { + const py::dict &dict, std::map *output_class_indexing) { std::map input_class_indexing; for (auto p : dict) { (void)input_class_indexing.insert(std::pair(py::reinterpret_borrow(p.first), diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.h b/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.h index 203ec05fab..bce82a43c9 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.h @@ -116,14 +116,6 @@ class VOCOp : public ParallelOp, public RandomAccessOp { return *this; } - // Setter method. - // @param int64_t num_samples - // @return Builder setter method returns reference to the builder. - Builder &SetNumSamples(int64_t num_samples) { - builder_num_samples_ = num_samples; - return *this; - } - // Setter method. // @param std::shared_ptr sampler // @return Builder setter method returns reference to the builder. @@ -157,7 +149,6 @@ class VOCOp : public ParallelOp, public RandomAccessOp { int32_t builder_num_workers_; int32_t builder_op_connector_size_; int32_t builder_rows_per_buffer_; - int64_t builder_num_samples_; std::shared_ptr builder_sampler_; std::unique_ptr builder_schema_; std::map builder_labels_to_read_; @@ -171,14 +162,12 @@ class VOCOp : public ParallelOp, public RandomAccessOp { // @param int32_t num_workers - number of workers reading images in parallel // @param int32_t rows_per_buffer - number of images (rows) in each buffer // @param int32_t queue_size - connector queue size - // @param int64_t num_samples - number of samples to read // @param bool decode - whether to decode images // @param std::unique_ptr data_schema - the schema of the VOC dataset // @param std::shared_ptr sampler - sampler tells VOCOp what to read VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path, const std::map &class_index, int32_t num_workers, int32_t rows_per_buffer, - int32_t queue_size, int64_t num_samples, bool decode, std::unique_ptr data_schema, - std::shared_ptr sampler); + int32_t queue_size, bool decode, std::unique_ptr data_schema, std::shared_ptr sampler); // Destructor ~VOCOp() = default; @@ -194,15 +183,6 @@ class VOCOp : public ParallelOp, public RandomAccessOp { // @return Status - The error code return Status operator()() override; - // Method derived from RandomAccessOp, enable Sampler to get numRows - // @param uint64_t num - to return numRows - // return Status - The error code return - Status GetNumSamples(int64_t *num) const override; - - // Method derived from RandomAccessOp, enable Sampler to get total number of rows in dataset - // @param uint64_t num - to return numRows - Status GetNumRowsInDataset(int64_t *num) const override; - // A print method typically used for debugging // @param out // @param show_all @@ -212,10 +192,9 @@ class VOCOp : public ParallelOp, public RandomAccessOp { // @param const std::string &task_type - task type of reading voc job // @param const std::string &task_mode - task mode of reading voc job // @param const py::dict &dict - input dict of class index - // @param int64_t numSamples - samples number of VOCDataset // @param int64_t *count - output rows number of VOCDataset static Status CountTotalRows(const std::string &dir, const std::string &task_type, const std::string &task_mode, - const py::dict &dict, int64_t numSamples, int64_t *count); + const py::dict &dict, int64_t *count); // @param const std::string &dir - VOC dir path // @param const std::string &task_type - task type of reading voc job @@ -224,8 +203,11 @@ class VOCOp : public ParallelOp, public RandomAccessOp { // @param int64_t numSamples - samples number of VOCDataset // @param std::map *output_class_indexing - output class index of VOCDataset static Status GetClassIndexing(const std::string &dir, const std::string &task_type, const std::string &task_mode, - const py::dict &dict, int64_t numSamples, - std::map *output_class_indexing); + const py::dict &dict, std::map *output_class_indexing); + + // Op name getter + // @return Name of the current Op + std::string Name() const override { return "VOCOp"; } private: // Initialize Sampler, calls sampler->Init() within @@ -233,10 +215,11 @@ class VOCOp : public ParallelOp, public RandomAccessOp { Status InitSampler(); // Load a tensor row according to image id + // @param row_id_type row_id - id for this tensor row // @param std::string image_id - image id // @param TensorRow row - image & target read into this tensor row // @return Status - The error code return - Status LoadTensorRow(const std::string &image_id, TensorRow *row); + Status LoadTensorRow(row_id_type row_id, const std::string &image_id, TensorRow *row); // @param const std::string &path - path to the image file // @param const ColDescriptor &col - contains tensor implementation and datatype @@ -283,8 +266,6 @@ class VOCOp : public ParallelOp, public RandomAccessOp { bool decode_; int64_t row_cnt_; int64_t buf_cnt_; - int64_t num_rows_; - int64_t num_samples_; std::string folder_path_; TaskType task_type_; std::string task_mode_; diff --git a/mindspore/ccsrc/dataset/engine/datasetops/take_op.h b/mindspore/ccsrc/dataset/engine/datasetops/take_op.h index 64ba8e69e0..9619a4409d 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/take_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/take_op.h @@ -40,7 +40,7 @@ class TakeOp : public PipelineOp { ~Builder() = default; // The builder "build" method creates the final object. - // @return shared_ptr to the new StorageOp object + // @return shared_ptr to the new TakeOp object Status Build(std::shared_ptr *); private: @@ -90,6 +90,10 @@ class TakeOp : public PipelineOp { // @return - Status of the node visit. Status Accept(NodePass *p, bool *modified) override; + // Op name getter + // @return Name of the current Op + std::string Name() const override { return "TakeOp"; } + private: int32_t max_takes_; // The number of takes that the user requested int32_t take_count_; // A counter for the current number of executed takes diff --git a/mindspore/ccsrc/dataset/engine/datasetops/zip_op.h b/mindspore/ccsrc/dataset/engine/datasetops/zip_op.h index 1140a98dd7..08b93c18b5 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/zip_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/zip_op.h @@ -65,7 +65,7 @@ class ZipOp : public PipelineOp { } // The builder "build" method creates the ZipOp dataset Operator. - // @return shared_ptr to the new StorageOp object + // @return shared_ptr to the new ZipOp object Status Build(std::shared_ptr *); private: @@ -110,6 +110,10 @@ class ZipOp : public PipelineOp { // @return - Status of the node visit. Status Accept(NodePass *p, bool *modified) override; + // Op name getter + // @return Name of the current Op + std::string Name() const override { return "ZipOp"; } + private: // Handles preprocessing of the main loop, used when starting new epoch Status prepare(TensorQTable *const table); diff --git a/mindspore/ccsrc/dataset/engine/execution_tree.cc b/mindspore/ccsrc/dataset/engine/execution_tree.cc index bcb387082b..5c921bba84 100644 --- a/mindspore/ccsrc/dataset/engine/execution_tree.cc +++ b/mindspore/ccsrc/dataset/engine/execution_tree.cc @@ -19,8 +19,10 @@ #include "dataset/engine/datasetops/dataset_op.h" #include "dataset/engine/datasetops/shuffle_op.h" #include "dataset/util/task_manager.h" - -#include "dataset/engine/opt/util/printer_pass.h" +#include "dataset/engine/opt/pre/map_column_reorder.h" +#include "dataset/engine/opt/pre/global_shuffle.h" +#include "dataset/engine/perf/profiling.h" +#include "dataset/engine/perf/monitor.h" namespace mindspore { namespace dataset { @@ -29,6 +31,8 @@ ExecutionTree::ExecutionTree() : id_count_(0) { tg_ = std::make_unique(); tree_state_ = kDeTStateInit; prepare_flags_ = kDePrepNone; + perf_monitor_ = std::make_unique(this); + profiling_manager_ = std::make_unique(this); } // Destructor @@ -77,8 +81,6 @@ Status ExecutionTree::AssignRoot(const std::shared_ptr &op) { // Then add it as the root. root_ = op; - // The tree has an assigned root now and it's ready to be prepared. - tree_state_ = kDeTStatePrepare; return Status::OK(); } @@ -120,6 +122,15 @@ Status ExecutionTree::Launch() { } std::ostringstream ss; ss << *this; + + // Profiling infrastructures need to be initialized before Op launching + if (profiling_manager_->IsProfilingEnable()) { + // Setup profiling manager + RETURN_IF_NOT_OK(profiling_manager_->Initialize()); + // Launch Monitor Thread + RETURN_IF_NOT_OK(tg_->CreateAsyncTask("Monitor Thread launched", std::ref(*perf_monitor_))); + } + MS_LOG(DEBUG) << "Printing the tree before launch tasks:\n" << ss.str(); for (auto itr = this->begin(); itr != this->end(); ++itr) { // An inlined operator is one that has an output connector size of 0, and it does not @@ -132,7 +143,9 @@ Status ExecutionTree::Launch() { // Set the state of the Operator as running. This only matters in Leaf ops, CacheOp and TakeOp } } + tree_state_ = kDeTStateExecuting; + return Status::OK(); } @@ -194,9 +207,24 @@ Status ExecutionTree::Prepare() { return Status::OK(); } -Status ExecutionTree::PrepareTreePreAction() { return Status::OK(); } +Status ExecutionTree::PrepareTreePreAction() { + bool modified = false; + std::vector pre_actions; + // Construct pre actions + pre_actions.push_back(new MapColumnReorder()); + pre_actions.push_back(new GlobalShufflePass()); + // Apply pre action passes + for (auto &pass : pre_actions) { + RETURN_IF_NOT_OK(pass->Run(this, &modified)); + } + return Status::OK(); +} -Status ExecutionTree::PrepareTreePostAction() { return Status::OK(); } +Status ExecutionTree::PrepareTreePostAction() { + // The tree is ready to be prepared. + tree_state_ = kDeTStatePrepare; + return Status::OK(); +} Status ExecutionTree::Optimize() { // auto pp = new PrinterPass(); diff --git a/mindspore/ccsrc/dataset/engine/execution_tree.h b/mindspore/ccsrc/dataset/engine/execution_tree.h index f0c894f05b..e1c5e8ff54 100644 --- a/mindspore/ccsrc/dataset/engine/execution_tree.h +++ b/mindspore/ccsrc/dataset/engine/execution_tree.h @@ -23,12 +23,14 @@ #include #include "dataset/engine/datasetops/dataset_op.h" #include "dataset/util/status.h" +#include "mindspore/ccsrc/dataset/engine/perf/profiling.h" namespace mindspore { namespace dataset { // Forward declares class TaskGroup; class DatasetOp; +class Monitor; class ExecutionTree { public: @@ -40,11 +42,12 @@ class ExecutionTree { // State flags for the lifecycle of the tree enum TreeState { - kDeTStateInit = 0, // The freshly initialized state after construction - kDeTStateBuilding, // The tree is being built, nodes are being added - kDeTStatePrepare, // The tree has been assigned a root node and is pending prepare - kDeTStateReady, // The tree has been prepared and is ready to be launched - kDeTStateExecuting // The tree has been launched and is executing + kDeTStateInit = 0, // The freshly initialized state after construction + kDeTStateBuilding, // The tree is being built, nodes are being added + kDeTStatePrepare, // The tree has been assigned a root node and is pending prepare + kDeTStateReady, // The tree has been prepared and is ready to be launched + kDeTStateExecuting, // The tree has been launched and is executing + kDeTStateFinished // The tree has been drained, dataset iterator received EOF }; class Iterator { @@ -120,7 +123,7 @@ class ExecutionTree { // Returns an iterator positioned at the start // @return Iterator - The iterator ExecutionTree::Iterator begin(const std::shared_ptr &root = nullptr) const { - return Iterator((root == nullptr) ? root_ : root); + return Iterator(root == nullptr ? root_ : root); } // Returns an iterator positioned at the end @@ -207,6 +210,16 @@ class ExecutionTree { // @return raw pointer to the TaskGroup TaskGroup *AllTasks() const { return tg_.get(); } + // Return if the ExecutionTree is finished (iterator receives EOF). + // @return Bool - true is ExecutionTree is finished + bool isFinished() const { return tree_state_ == TreeState::kDeTStateFinished; } + + // Set the ExecutionTree to Finished state. + void SetFinished() { tree_state_ = TreeState::kDeTStateFinished; } + + // Getter for profiling manager, no ownership + ProfilingManager *GetProfilingManager() { return profiling_manager_.get(); } + private: // A helper functions for doing the recursive printing // @param dataset_op - The dataset op to print @@ -222,6 +235,8 @@ class ExecutionTree { uint32_t prepare_flags_; // Flags used during tree prepare TreeState tree_state_; // Tracking the current tree state std::stack> repeat_stack_; // A stack used during prepare phase + std::unique_ptr perf_monitor_; // Performance Monitor + std::unique_ptr profiling_manager_; // Profiling manager }; } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/dataset/engine/gnn/graph.cc b/mindspore/ccsrc/dataset/engine/gnn/graph.cc index 74e7b85153..1017657397 100644 --- a/mindspore/ccsrc/dataset/engine/gnn/graph.cc +++ b/mindspore/ccsrc/dataset/engine/gnn/graph.cc @@ -17,29 +17,30 @@ #include #include +#include #include #include #include "dataset/core/tensor_shape.h" +#include "dataset/util/random.h" namespace mindspore { namespace dataset { namespace gnn { -Graph::Graph(std::string dataset_file, int32_t num_workers) : dataset_file_(dataset_file), num_workers_(num_workers) { +Graph::Graph(std::string dataset_file, int32_t num_workers) + : dataset_file_(dataset_file), num_workers_(num_workers), rnd_(GetRandomDevice()), random_walk_(this) { + rnd_.seed(GetSeed()); MS_LOG(INFO) << "num_workers:" << num_workers; } -Status Graph::GetNodes(NodeType node_type, NodeIdType node_num, std::shared_ptr *out) { +Status Graph::GetAllNodes(NodeType node_type, std::shared_ptr *out) { auto itr = node_type_map_.find(node_type); if (itr == node_type_map_.end()) { std::string err_msg = "Invalid node type:" + std::to_string(node_type); RETURN_STATUS_UNEXPECTED(err_msg); } else { - if (node_num == -1) { - RETURN_IF_NOT_OK(CreateTensorByVector({itr->second}, DataType(DataType::DE_INT32), out)); - } else { - } + RETURN_IF_NOT_OK(CreateTensorByVector({itr->second}, DataType(DataType::DE_INT32), out)); } return Status::OK(); } @@ -58,10 +59,10 @@ Status Graph::CreateTensorByVector(const std::vector> &data, Data size_t n = data[0].size(); RETURN_IF_NOT_OK(Tensor::CreateTensor( &tensor, TensorImpl::kFlexible, TensorShape({static_cast(m), static_cast(n)}), type, nullptr)); - T *ptr = reinterpret_cast(tensor->GetMutableBuffer()); - for (auto id_m : data) { + auto ptr = tensor->begin(); + for (const auto &id_m : data) { CHECK_FAIL_RETURN_UNEXPECTED(id_m.size() == n, "Each member of the vector has a different size"); - for (auto id_n : id_m) { + for (const auto &id_n : id_m) { *ptr = id_n; ptr++; } @@ -89,7 +90,38 @@ Status Graph::ComplementVector(std::vector> *data, size_t max_siz return Status::OK(); } -Status Graph::GetEdges(EdgeType edge_type, EdgeIdType edge_num, std::shared_ptr *out) { return Status::OK(); } +Status Graph::GetAllEdges(EdgeType edge_type, std::shared_ptr *out) { + auto itr = edge_type_map_.find(edge_type); + if (itr == edge_type_map_.end()) { + std::string err_msg = "Invalid edge type:" + std::to_string(edge_type); + RETURN_STATUS_UNEXPECTED(err_msg); + } else { + RETURN_IF_NOT_OK(CreateTensorByVector({itr->second}, DataType(DataType::DE_INT32), out)); + } + return Status::OK(); +} + +Status Graph::GetNodesFromEdges(const std::vector &edge_list, std::shared_ptr *out) { + if (edge_list.empty()) { + RETURN_STATUS_UNEXPECTED("Input edge_list is empty"); + } + + std::vector> node_list; + node_list.reserve(edge_list.size()); + for (const auto &edge_id : edge_list) { + auto itr = edge_id_map_.find(edge_id); + if (itr == edge_id_map_.end()) { + std::string err_msg = "Invalid edge id:" + std::to_string(edge_id); + RETURN_STATUS_UNEXPECTED(err_msg); + } else { + std::pair, std::shared_ptr> nodes; + RETURN_IF_NOT_OK(itr->second->GetNode(&nodes)); + node_list.push_back({nodes.first->id(), nodes.second->id()}); + } + } + RETURN_IF_NOT_OK(CreateTensorByVector(node_list, DataType(DataType::DE_INT32), out)); + return Status::OK(); +} Status Graph::GetAllNeighbors(const std::vector &node_list, NodeType neighbor_type, std::shared_ptr *out) { @@ -105,14 +137,10 @@ Status Graph::GetAllNeighbors(const std::vector &node_list, NodeType size_t max_neighbor_num = 0; neighbors.resize(node_list.size()); for (size_t i = 0; i < node_list.size(); ++i) { - auto itr = node_id_map_.find(node_list[i]); - if (itr != node_id_map_.end()) { - RETURN_IF_NOT_OK(itr->second->GetNeighbors(neighbor_type, -1, &neighbors[i])); - max_neighbor_num = max_neighbor_num > neighbors[i].size() ? max_neighbor_num : neighbors[i].size(); - } else { - std::string err_msg = "Invalid node id:" + std::to_string(node_list[i]); - RETURN_STATUS_UNEXPECTED(err_msg); - } + std::shared_ptr node; + RETURN_IF_NOT_OK(GetNodeByNodeId(node_list[i], &node)); + RETURN_IF_NOT_OK(node->GetAllNeighbors(neighbor_type, &neighbors[i])); + max_neighbor_num = max_neighbor_num > neighbors[i].size() ? max_neighbor_num : neighbors[i].size(); } RETURN_IF_NOT_OK(ComplementVector(&neighbors, max_neighbor_num, kDefaultNodeId)); @@ -121,18 +149,104 @@ Status Graph::GetAllNeighbors(const std::vector &node_list, NodeType return Status::OK(); } -Status Graph::GetSampledNeighbor(const std::vector &node_list, const std::vector &neighbor_nums, - const std::vector &neighbor_types, std::shared_ptr *out) { +Status Graph::GetSampledNeighbors(const std::vector &node_list, + const std::vector &neighbor_nums, + const std::vector &neighbor_types, std::shared_ptr *out) { + CHECK_FAIL_RETURN_UNEXPECTED(!node_list.empty(), "Input node_list is empty."); + CHECK_FAIL_RETURN_UNEXPECTED(neighbor_nums.size() == neighbor_types.size(), + "The sizes of neighbor_nums and neighbor_types are inconsistent."); + std::vector> neighbors_vec(node_list.size()); + for (size_t node_idx = 0; node_idx < node_list.size(); ++node_idx) { + neighbors_vec[node_idx].emplace_back(node_list[node_idx]); + std::vector input_list = {node_list[node_idx]}; + for (size_t i = 0; i < neighbor_nums.size(); ++i) { + std::vector neighbors; + neighbors.reserve(input_list.size() * neighbor_nums[i]); + for (const auto &node_id : input_list) { + if (node_id == kDefaultNodeId) { + for (int32_t j = 0; j < neighbor_nums[i]; ++j) { + neighbors.emplace_back(kDefaultNodeId); + } + } else { + std::shared_ptr node; + RETURN_IF_NOT_OK(GetNodeByNodeId(node_id, &node)); + std::vector out; + RETURN_IF_NOT_OK(node->GetSampledNeighbors(neighbor_types[i], neighbor_nums[i], &out)); + neighbors.insert(neighbors.end(), out.begin(), out.end()); + } + } + neighbors_vec[node_idx].insert(neighbors_vec[node_idx].end(), neighbors.begin(), neighbors.end()); + input_list = std::move(neighbors); + } + } + RETURN_IF_NOT_OK(CreateTensorByVector(neighbors_vec, DataType(DataType::DE_INT32), out)); + return Status::OK(); +} + +Status Graph::NegativeSample(const std::vector &data, const std::unordered_set &exclude_data, + int32_t samples_num, std::vector *out_samples) { + CHECK_FAIL_RETURN_UNEXPECTED(!data.empty(), "Input data is empty."); + std::vector shuffled_id(data.size()); + std::iota(shuffled_id.begin(), shuffled_id.end(), 0); + std::shuffle(shuffled_id.begin(), shuffled_id.end(), rnd_); + for (const auto &index : shuffled_id) { + if (exclude_data.find(data[index]) != exclude_data.end()) { + continue; + } + out_samples->emplace_back(data[index]); + if (out_samples->size() >= samples_num) { + break; + } + } return Status::OK(); } -Status Graph::GetNegSampledNeighbor(const std::vector &node_list, NodeIdType samples_num, - NodeType neg_neighbor_type, std::shared_ptr *out) { +Status Graph::GetNegSampledNeighbors(const std::vector &node_list, NodeIdType samples_num, + NodeType neg_neighbor_type, std::shared_ptr *out) { + CHECK_FAIL_RETURN_UNEXPECTED(!node_list.empty(), "Input node_list is empty."); + std::vector> neighbors_vec; + neighbors_vec.resize(node_list.size()); + for (size_t node_idx = 0; node_idx < node_list.size(); ++node_idx) { + std::shared_ptr node; + RETURN_IF_NOT_OK(GetNodeByNodeId(node_list[node_idx], &node)); + std::vector neighbors; + RETURN_IF_NOT_OK(node->GetAllNeighbors(neg_neighbor_type, &neighbors)); + std::unordered_set exclude_node; + std::transform(neighbors.begin(), neighbors.end(), + std::insert_iterator>(exclude_node, exclude_node.begin()), + [](const NodeIdType node) { return node; }); + auto itr = node_type_map_.find(neg_neighbor_type); + if (itr == node_type_map_.end()) { + std::string err_msg = "Invalid node type:" + std::to_string(neg_neighbor_type); + RETURN_STATUS_UNEXPECTED(err_msg); + } else { + neighbors_vec[node_idx].emplace_back(node->id()); + if (itr->second.size() > exclude_node.size()) { + while (neighbors_vec[node_idx].size() < samples_num + 1) { + RETURN_IF_NOT_OK(NegativeSample(itr->second, exclude_node, samples_num - neighbors_vec[node_idx].size(), + &neighbors_vec[node_idx])); + } + } else { + MS_LOG(DEBUG) << "There are no negative neighbors. node_id:" << node->id() + << " neg_neighbor_type:" << neg_neighbor_type; + // If there are no negative neighbors, they are filled with kDefaultNodeId + for (int32_t i = 0; i < samples_num; ++i) { + neighbors_vec[node_idx].emplace_back(kDefaultNodeId); + } + } + } + } + RETURN_IF_NOT_OK(CreateTensorByVector(neighbors_vec, DataType(DataType::DE_INT32), out)); return Status::OK(); } -Status Graph::RandomWalk(const std::vector &node_list, const std::vector &meta_path, float p, - float q, NodeIdType default_node, std::shared_ptr *out) { +Status Graph::RandomWalk(const std::vector &node_list, const std::vector &meta_path, + float step_home_param, float step_away_param, NodeIdType default_node, + std::shared_ptr *out) { + RETURN_IF_NOT_OK(random_walk_.Build(node_list, meta_path, step_home_param, step_away_param, default_node)); + std::vector> walks; + RETURN_IF_NOT_OK(random_walk_.SimulateWalk(&walks)); + RETURN_IF_NOT_OK(CreateTensorByVector({walks}, DataType(DataType::DE_INT32), out)); return Status::OK(); } @@ -154,7 +268,7 @@ Status Graph::GetNodeFeature(const std::shared_ptr &nodes, const std::ve } CHECK_FAIL_RETURN_UNEXPECTED(!feature_types.empty(), "Inpude feature_types is empty"); TensorRow tensors; - for (auto f_type : feature_types) { + for (const auto &f_type : feature_types) { std::shared_ptr default_feature; // If no feature can be obtained, fill in the default value RETURN_IF_NOT_OK(GetNodeDefaultFeature(f_type, &default_feature)); @@ -169,18 +283,14 @@ Status Graph::GetNodeFeature(const std::shared_ptr &nodes, const std::ve dsize_t index = 0; for (auto node_itr = nodes->begin(); node_itr != nodes->end(); ++node_itr) { - auto itr = node_id_map_.find(*node_itr); std::shared_ptr feature; - if (itr != node_id_map_.end()) { - if (!itr->second->GetFeatures(f_type, &feature).IsOk()) { - feature = default_feature; - } + if (*node_itr == kDefaultNodeId) { + feature = default_feature; } else { - if (*node_itr == kDefaultNodeId) { + std::shared_ptr node; + RETURN_IF_NOT_OK(GetNodeByNodeId(*node_itr, &node)); + if (!node->GetFeatures(f_type, &feature).IsOk()) { feature = default_feature; - } else { - std::string err_msg = "Invalid node id:" + std::to_string(*node_itr); - RETURN_STATUS_UNEXPECTED(err_msg); } } RETURN_IF_NOT_OK(fea_tensor->InsertTensor({index}, feature->Value())); @@ -209,35 +319,54 @@ Status Graph::Init() { return Status::OK(); } -Status Graph::GetMetaInfo(std::vector *node_info, std::vector *edge_info) { - node_info->reserve(node_type_map_.size()); - for (auto node : node_type_map_) { - NodeMetaInfo n_info; - n_info.type = node.first; - n_info.num = node.second.size(); - auto itr = node_feature_map_.find(node.first); - if (itr != node_feature_map_.end()) { - for (auto f_type : itr->second) { - n_info.feature_type.push_back(f_type); - } - std::sort(n_info.feature_type.begin(), n_info.feature_type.end()); +Status Graph::GetMetaInfo(MetaInfo *meta_info) { + meta_info->node_type.resize(node_type_map_.size()); + std::transform(node_type_map_.begin(), node_type_map_.end(), meta_info->node_type.begin(), + [](auto itr) { return itr.first; }); + std::sort(meta_info->node_type.begin(), meta_info->node_type.end()); + + meta_info->edge_type.resize(edge_type_map_.size()); + std::transform(edge_type_map_.begin(), edge_type_map_.end(), meta_info->edge_type.begin(), + [](auto itr) { return itr.first; }); + std::sort(meta_info->edge_type.begin(), meta_info->edge_type.end()); + + for (const auto &node : node_type_map_) { + meta_info->node_num[node.first] = node.second.size(); + } + + for (const auto &edge : edge_type_map_) { + meta_info->edge_num[edge.first] = edge.second.size(); + } + + for (const auto &node_feature : node_feature_map_) { + for (auto type : node_feature.second) { + meta_info->node_feature_type.emplace_back(type); } - node_info->push_back(n_info); } + std::sort(meta_info->node_feature_type.begin(), meta_info->node_feature_type.end()); + auto unique_node = std::unique(meta_info->node_feature_type.begin(), meta_info->node_feature_type.end()); + meta_info->node_feature_type.erase(unique_node, meta_info->node_feature_type.end()); - edge_info->reserve(edge_type_map_.size()); - for (auto edge : edge_type_map_) { - EdgeMetaInfo e_info; - e_info.type = edge.first; - e_info.num = edge.second.size(); - auto itr = edge_feature_map_.find(edge.first); - if (itr != edge_feature_map_.end()) { - for (auto f_type : itr->second) { - e_info.feature_type.push_back(f_type); - } + for (const auto &edge_feature : edge_feature_map_) { + for (const auto &type : edge_feature.second) { + meta_info->edge_feature_type.emplace_back(type); } - edge_info->push_back(e_info); } + std::sort(meta_info->edge_feature_type.begin(), meta_info->edge_feature_type.end()); + auto unique_edge = std::unique(meta_info->edge_feature_type.begin(), meta_info->edge_feature_type.end()); + meta_info->edge_feature_type.erase(unique_edge, meta_info->edge_feature_type.end()); + return Status::OK(); +} + +Status Graph::GraphInfo(py::dict *out) { + MetaInfo meta_info; + RETURN_IF_NOT_OK(GetMetaInfo(&meta_info)); + (*out)["node_type"] = py::cast(meta_info.node_type); + (*out)["edge_type"] = py::cast(meta_info.edge_type); + (*out)["node_num"] = py::cast(meta_info.node_num); + (*out)["edge_num"] = py::cast(meta_info.edge_num); + (*out)["node_feature_type"] = py::cast(meta_info.node_feature_type); + (*out)["edge_feature_type"] = py::cast(meta_info.edge_feature_type); return Status::OK(); } @@ -250,6 +379,207 @@ Status Graph::LoadNodeAndEdge() { &node_feature_map_, &edge_feature_map_, &default_feature_map_)); return Status::OK(); } + +Status Graph::GetNodeByNodeId(NodeIdType id, std::shared_ptr *node) { + auto itr = node_id_map_.find(id); + if (itr == node_id_map_.end()) { + std::string err_msg = "Invalid node id:" + std::to_string(id); + RETURN_STATUS_UNEXPECTED(err_msg); + } else { + *node = itr->second; + } + return Status::OK(); +} + +Graph::RandomWalkBase::RandomWalkBase(Graph *graph) + : graph_(graph), step_home_param_(1.0), step_away_param_(1.0), default_node_(-1), num_walks_(1), num_workers_(1) {} + +Status Graph::RandomWalkBase::Build(const std::vector &node_list, const std::vector &meta_path, + float step_home_param, float step_away_param, const NodeIdType default_node, + int32_t num_walks, int32_t num_workers) { + node_list_ = node_list; + if (meta_path.empty() || meta_path.size() > kMaxNumWalks) { + std::string err_msg = "Failed, meta path required between 1 and " + std::to_string(kMaxNumWalks) + + ". The size of input path is " + std::to_string(meta_path.size()); + RETURN_STATUS_UNEXPECTED(err_msg); + } + meta_path_ = meta_path; + if (step_home_param < kGnnEpsilon || step_away_param < kGnnEpsilon) { + std::string err_msg = "Failed, step_home_param and step_away_param required greater than " + + std::to_string(kGnnEpsilon) + ". step_home_param: " + std::to_string(step_home_param) + + ", step_away_param: " + std::to_string(step_away_param); + RETURN_STATUS_UNEXPECTED(err_msg); + } + step_home_param_ = step_home_param; + step_away_param_ = step_away_param; + default_node_ = default_node; + num_walks_ = num_walks; + num_workers_ = num_workers; + return Status::OK(); +} + +Status Graph::RandomWalkBase::Node2vecWalk(const NodeIdType &start_node, std::vector *walk_path) { + // Simulate a random walk starting from start node. + auto walk = std::vector(1, start_node); // walk is an vector + // walk simulate + while (walk.size() - 1 < meta_path_.size()) { + // current nodE + auto cur_node_id = walk.back(); + std::shared_ptr cur_node; + RETURN_IF_NOT_OK(graph_->GetNodeByNodeId(cur_node_id, &cur_node)); + + // current neighbors + std::vector cur_neighbors; + RETURN_IF_NOT_OK(cur_node->GetAllNeighbors(meta_path_[walk.size() - 1], &cur_neighbors, true)); + std::sort(cur_neighbors.begin(), cur_neighbors.end()); + + // break if no neighbors + if (cur_neighbors.empty()) { + break; + } + + // walk by the fist node, then by the previous 2 nodes + std::shared_ptr stochastic_index; + if (walk.size() == 1) { + RETURN_IF_NOT_OK(GetNodeProbability(cur_node_id, meta_path_[0], &stochastic_index)); + } else { + NodeIdType prev_node_id = walk[walk.size() - 2]; + RETURN_IF_NOT_OK(GetEdgeProbability(prev_node_id, cur_node_id, walk.size() - 2, &stochastic_index)); + } + NodeIdType next_node_id = cur_neighbors[WalkToNextNode(*stochastic_index)]; + walk.push_back(next_node_id); + } + + while (walk.size() - 1 < meta_path_.size()) { + walk.push_back(default_node_); + } + + *walk_path = std::move(walk); + return Status::OK(); +} + +Status Graph::RandomWalkBase::SimulateWalk(std::vector> *walks) { + // Repeatedly simulate random walks from each node + std::vector permutation(node_list_.size()); + std::iota(permutation.begin(), permutation.end(), 0); + for (int32_t i = 0; i < num_walks_; i++) { + unsigned seed = std::chrono::system_clock::now().time_since_epoch().count(); + std::shuffle(permutation.begin(), permutation.end(), std::default_random_engine(seed)); + for (const auto &i_perm : permutation) { + std::vector walk; + RETURN_IF_NOT_OK(Node2vecWalk(node_list_[i_perm], &walk)); + walks->push_back(walk); + } + } + return Status::OK(); +} + +Status Graph::RandomWalkBase::GetNodeProbability(const NodeIdType &node_id, const NodeType &node_type, + std::shared_ptr *node_probability) { + // Generate alias nodes + std::shared_ptr node; + graph_->GetNodeByNodeId(node_id, &node); + std::vector neighbors; + RETURN_IF_NOT_OK(node->GetAllNeighbors(node_type, &neighbors, true)); + std::sort(neighbors.begin(), neighbors.end()); + auto non_normalized_probability = std::vector(neighbors.size(), 1.0); + *node_probability = + std::make_shared(GenerateProbability(Normalize(non_normalized_probability))); + return Status::OK(); +} + +Status Graph::RandomWalkBase::GetEdgeProbability(const NodeIdType &src, const NodeIdType &dst, uint32_t meta_path_index, + std::shared_ptr *edge_probability) { + // Get the alias edge setup lists for a given edge. + std::shared_ptr src_node; + graph_->GetNodeByNodeId(src, &src_node); + std::vector src_neighbors; + RETURN_IF_NOT_OK(src_node->GetAllNeighbors(meta_path_[meta_path_index], &src_neighbors, true)); + + std::shared_ptr dst_node; + graph_->GetNodeByNodeId(dst, &dst_node); + std::vector dst_neighbors; + RETURN_IF_NOT_OK(dst_node->GetAllNeighbors(meta_path_[meta_path_index + 1], &dst_neighbors, true)); + + std::sort(dst_neighbors.begin(), dst_neighbors.end()); + std::vector non_normalized_probability; + for (const auto &dst_nbr : dst_neighbors) { + if (dst_nbr == src) { + non_normalized_probability.push_back(1.0 / step_home_param_); // replace 1.0 with G[dst][dst_nbr]['weight'] + continue; + } + auto it = std::find(src_neighbors.begin(), src_neighbors.end(), dst_nbr); + if (it != src_neighbors.end()) { + // stay close, this node connect both src and dst + non_normalized_probability.push_back(1.0); // replace 1.0 with G[dst][dst_nbr]['weight'] + } else { + // step far away + non_normalized_probability.push_back(1.0 / step_away_param_); // replace 1.0 with G[dst][dst_nbr]['weight'] + } + } + + *edge_probability = + std::make_shared(GenerateProbability(Normalize(non_normalized_probability))); + return Status::OK(); +} + +StochasticIndex Graph::RandomWalkBase::GenerateProbability(const std::vector &probability) { + uint32_t K = probability.size(); + std::vector switch_to_large_index(K, 0); + std::vector weight(K, .0); + std::vector smaller; + std::vector larger; + auto random_device = GetRandomDevice(); + std::uniform_real_distribution<> distribution(-kGnnEpsilon, kGnnEpsilon); + float accumulate_threshold = 0.0; + for (uint32_t i = 0; i < K; i++) { + float threshold_one = distribution(random_device); + accumulate_threshold += threshold_one; + weight[i] = i < K - 1 ? probability[i] * K + threshold_one : probability[i] * K - accumulate_threshold; + weight[i] < 1.0 ? smaller.push_back(i) : larger.push_back(i); + } + + while ((!smaller.empty()) && (!larger.empty())) { + uint32_t small = smaller.back(); + smaller.pop_back(); + uint32_t large = larger.back(); + larger.pop_back(); + switch_to_large_index[small] = large; + weight[large] = weight[large] + weight[small] - 1.0; + weight[large] < 1.0 ? smaller.push_back(large) : larger.push_back(large); + } + return StochasticIndex(switch_to_large_index, weight); +} + +uint32_t Graph::RandomWalkBase::WalkToNextNode(const StochasticIndex &stochastic_index) { + auto switch_to_large_index = stochastic_index.first; + auto weight = stochastic_index.second; + const uint32_t size_of_index = switch_to_large_index.size(); + + auto random_device = GetRandomDevice(); + std::uniform_real_distribution<> distribution(0.0, 1.0); + + // Generate random integer between [0, K) + uint32_t random_idx = std::floor(distribution(random_device) * size_of_index); + + if (distribution(random_device) < weight[random_idx]) { + return random_idx; + } + return switch_to_large_index[random_idx]; +} + +template +std::vector Graph::RandomWalkBase::Normalize(const std::vector &non_normalized_probability) { + float sum_probability = + 1.0 * std::accumulate(non_normalized_probability.begin(), non_normalized_probability.end(), 0); + if (sum_probability < kGnnEpsilon) { + sum_probability = 1.0; + } + std::vector normalized_probability; + std::transform(non_normalized_probability.begin(), non_normalized_probability.end(), + std::back_inserter(normalized_probability), [&](T value) -> float { return value / sum_probability; }); + return normalized_probability; +} } // namespace gnn } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/dataset/engine/gnn/graph.h b/mindspore/ccsrc/dataset/engine/gnn/graph.h index 3dd6444807..ea10363053 100644 --- a/mindspore/ccsrc/dataset/engine/gnn/graph.h +++ b/mindspore/ccsrc/dataset/engine/gnn/graph.h @@ -16,13 +16,17 @@ #ifndef DATASET_ENGINE_GNN_GRAPH_H_ #define DATASET_ENGINE_GNN_GRAPH_H_ +#include #include #include +#include #include #include #include +#include #include "dataset/core/tensor.h" +#include "dataset/core/tensor_row.h" #include "dataset/engine/gnn/graph_loader.h" #include "dataset/engine/gnn/feature.h" #include "dataset/engine/gnn/node.h" @@ -33,24 +37,17 @@ namespace mindspore { namespace dataset { namespace gnn { -struct NodeMetaInfo { - NodeType type; - NodeIdType num; - std::vector feature_type; - NodeMetaInfo() { - type = 0; - num = 0; - } -}; - -struct EdgeMetaInfo { - EdgeType type; - EdgeIdType num; - std::vector feature_type; - EdgeMetaInfo() { - type = 0; - num = 0; - } +const float kGnnEpsilon = 0.0001; +const uint32_t kMaxNumWalks = 80; +using StochasticIndex = std::pair, std::vector>; + +struct MetaInfo { + std::vector node_type; + std::vector edge_type; + std::map node_num; + std::map edge_num; + std::vector node_feature_type; + std::vector edge_feature_type; }; class Graph { @@ -62,19 +59,23 @@ class Graph { ~Graph() = default; - // Get the nodes from the graph. + // Get all nodes from the graph. // @param NodeType node_type - type of node - // @param NodeIdType node_num - Number of nodes to be acquired, if -1 means all nodes are acquired // @param std::shared_ptr *out - Returned nodes id // @return Status - The error code return - Status GetNodes(NodeType node_type, NodeIdType node_num, std::shared_ptr *out); + Status GetAllNodes(NodeType node_type, std::shared_ptr *out); - // Get the edges from the graph. + // Get all edges from the graph. // @param NodeType edge_type - type of edge - // @param NodeIdType edge_num - Number of edges to be acquired, if -1 means all edges are acquired // @param std::shared_ptr *out - Returned edge ids // @return Status - The error code return - Status GetEdges(EdgeType edge_type, EdgeIdType edge_num, std::shared_ptr *out); + Status GetAllEdges(EdgeType edge_type, std::shared_ptr *out); + + // Get the node id from the edge. + // @param std::vector edge_list - List of edges + // @param std::shared_ptr *out - Returned node ids + // @return Status - The error code return + Status GetNodesFromEdges(const std::vector &edge_list, std::shared_ptr *out); // All neighbors of the acquisition node. // @param std::vector node_list - List of nodes @@ -86,12 +87,35 @@ class Graph { Status GetAllNeighbors(const std::vector &node_list, NodeType neighbor_type, std::shared_ptr *out); - Status GetSampledNeighbor(const std::vector &node_list, const std::vector &neighbor_nums, - const std::vector &neighbor_types, std::shared_ptr *out); - Status GetNegSampledNeighbor(const std::vector &node_list, NodeIdType samples_num, - NodeType neg_neighbor_type, std::shared_ptr *out); - Status RandomWalk(const std::vector &node_list, const std::vector &meta_path, float p, float q, - NodeIdType default_node, std::shared_ptr *out); + // Get sampled neighbors. + // @param std::vector node_list - List of nodes + // @param std::vector neighbor_nums - Number of neighbors sampled per hop + // @param std::vector neighbor_types - Neighbor type sampled per hop + // @param std::shared_ptr *out - Returned neighbor's id. + // @return Status - The error code return + Status GetSampledNeighbors(const std::vector &node_list, const std::vector &neighbor_nums, + const std::vector &neighbor_types, std::shared_ptr *out); + + // Get negative sampled neighbors. + // @param std::vector node_list - List of nodes + // @param NodeIdType samples_num - Number of neighbors sampled + // @param NodeType neg_neighbor_type - The type of negative neighbor. + // @param std::shared_ptr *out - Returned negative neighbor's id. + // @return Status - The error code return + Status GetNegSampledNeighbors(const std::vector &node_list, NodeIdType samples_num, + NodeType neg_neighbor_type, std::shared_ptr *out); + + // Node2vec random walk. + // @param std::vector node_list - List of nodes + // @param std::vector meta_path - node type of each step + // @param float step_home_param - return hyper parameter in node2vec algorithm + // @param float step_away_param - inout hyper parameter in node2vec algorithm + // @param NodeIdType default_node - default node id + // @param std::shared_ptr *out - Returned nodes id in walk path + // @return Status - The error code return + Status RandomWalk(const std::vector &node_list, const std::vector &meta_path, + float step_home_param, float step_away_param, NodeIdType default_node, + std::shared_ptr *out); // Get the feature of a node // @param std::shared_ptr nodes - List of nodes @@ -112,14 +136,55 @@ class Graph { TensorRow *out); // Get meta information of graph - // @param std::vector *node_info - Returned meta information of node - // @param std::vector *node_info - Returned meta information of edge + // @param MetaInfo *meta_info - Returned meta information // @return Status - The error code return - Status GetMetaInfo(std::vector *node_info, std::vector *edge_info); + Status GetMetaInfo(MetaInfo *meta_info); + + // Return meta information to python layer + Status GraphInfo(py::dict *out); Status Init(); private: + class RandomWalkBase { + public: + explicit RandomWalkBase(Graph *graph); + + Status Build(const std::vector &node_list, const std::vector &meta_path, + float step_home_param = 1.0, float step_away_param = 1.0, NodeIdType default_node = -1, + int32_t num_walks = 1, int32_t num_workers = 1); + + ~RandomWalkBase() = default; + + Status SimulateWalk(std::vector> *walks); + + private: + Status Node2vecWalk(const NodeIdType &start_node, std::vector *walk_path); + + Status GetNodeProbability(const NodeIdType &node_id, const NodeType &node_type, + std::shared_ptr *node_probability); + + Status GetEdgeProbability(const NodeIdType &src, const NodeIdType &dst, uint32_t meta_path_index, + std::shared_ptr *edge_probability); + + static StochasticIndex GenerateProbability(const std::vector &probability); + + static uint32_t WalkToNextNode(const StochasticIndex &stochastic_index); + + template + std::vector Normalize(const std::vector &non_normalized_probability); + + Graph *graph_; + std::vector node_list_; + std::vector meta_path_; + float step_home_param_; // Return hyper parameter. Default is 1.0 + float step_away_param_; // Inout hyper parameter. Default is 1.0 + NodeIdType default_node_; + + int32_t num_walks_; // Number of walks per source. Default is 10 + int32_t num_workers_; // The number of worker threads. Default is 1 + }; + // Load graph data from mindrecord file // @return Status - The error code return Status LoadNodeAndEdge(); @@ -146,8 +211,25 @@ class Graph { // @return Status - The error code return Status GetNodeDefaultFeature(FeatureType feature_type, std::shared_ptr *out_feature); + // Find node object using node id + // @param NodeIdType id - + // @param std::shared_ptr *node - Returned node object + // @return Status - The error code return + Status GetNodeByNodeId(NodeIdType id, std::shared_ptr *node); + + // Negative sampling + // @param std::vector &input_data - The data set to be sampled + // @param std::unordered_set &exclude_data - Data to be excluded + // @param int32_t samples_num - + // @param std::vector *out_samples - Sampling results returned + // @return Status - The error code return + Status NegativeSample(const std::vector &input_data, const std::unordered_set &exclude_data, + int32_t samples_num, std::vector *out_samples); + std::string dataset_file_; int32_t num_workers_; // The number of worker threads + std::mt19937 rnd_; + RandomWalkBase random_walk_; std::unordered_map> node_type_map_; std::unordered_map> node_id_map_; diff --git a/mindspore/ccsrc/dataset/engine/gnn/graph_loader.cc b/mindspore/ccsrc/dataset/engine/gnn/graph_loader.cc index c517fda969..6504d088bf 100644 --- a/mindspore/ccsrc/dataset/engine/gnn/graph_loader.cc +++ b/mindspore/ccsrc/dataset/engine/gnn/graph_loader.cc @@ -36,6 +36,7 @@ GraphLoader::GraphLoader(std::string mr_filepath, int32_t num_workers) : mr_path_(mr_filepath), num_workers_(num_workers), row_id_(0), + shard_reader_(nullptr), keys_({"first_id", "second_id", "third_id", "attribute", "type", "node_feature_index", "edge_feature_index"}) {} Status GraphLoader::GetNodesAndEdges(NodeIdMap *n_id_map, EdgeIdMap *e_id_map, NodeTypeMap *n_type_map, @@ -203,7 +204,8 @@ Status GraphLoader::LoadFeatureIndex(const std::string &key, const std::vectorPost(); - ShardTuple rows = shard_reader_->GetNextById(row_id_++, worker_id); + auto ret = shard_reader_->GetNextById(row_id_++, worker_id); + ShardTuple rows = ret.second; while (rows.empty() == false) { RETURN_IF_INTERRUPTED(); for (const auto &tupled_row : rows) { @@ -224,7 +226,8 @@ Status GraphLoader::WorkerEntry(int32_t worker_id) { MS_LOG(WARNING) << "attribute:" << attr << " is neither edge nor node."; } } - rows = shard_reader_->GetNextById(row_id_++, worker_id); + auto rc = shard_reader_->GetNextById(row_id_++, worker_id); + rows = rc.second; } return Status::OK(); } diff --git a/mindspore/ccsrc/dataset/engine/gnn/local_node.cc b/mindspore/ccsrc/dataset/engine/gnn/local_node.cc index 24e865dff7..c829f8e8ca 100644 --- a/mindspore/ccsrc/dataset/engine/gnn/local_node.cc +++ b/mindspore/ccsrc/dataset/engine/gnn/local_node.cc @@ -20,12 +20,13 @@ #include #include "dataset/engine/gnn/edge.h" +#include "dataset/util/random.h" namespace mindspore { namespace dataset { namespace gnn { -LocalNode::LocalNode(NodeIdType id, NodeType type) : Node(id, type) {} +LocalNode::LocalNode(NodeIdType id, NodeType type) : Node(id, type), rnd_(GetRandomDevice()) { rnd_.seed(GetSeed()); } Status LocalNode::GetFeatures(FeatureType feature_type, std::shared_ptr *out_feature) { auto itr = features_.find(feature_type); @@ -38,21 +39,57 @@ Status LocalNode::GetFeatures(FeatureType feature_type, std::shared_ptr } } -Status LocalNode::GetNeighbors(NodeType neighbor_type, int32_t samples_num, std::vector *out_neighbors) { +Status LocalNode::GetAllNeighbors(NodeType neighbor_type, std::vector *out_neighbors, bool exclude_itself) { std::vector neighbors; auto itr = neighbor_nodes_.find(neighbor_type); if (itr != neighbor_nodes_.end()) { - if (samples_num == -1) { - // Return all neighbors + if (exclude_itself) { + neighbors.resize(itr->second.size()); + std::transform(itr->second.begin(), itr->second.end(), neighbors.begin(), + [](const std::shared_ptr node) { return node->id(); }); + } else { neighbors.resize(itr->second.size() + 1); neighbors[0] = id_; std::transform(itr->second.begin(), itr->second.end(), neighbors.begin() + 1, [](const std::shared_ptr node) { return node->id(); }); - } else { } } else { - neighbors.push_back(id_); MS_LOG(DEBUG) << "No neighbors. node_id:" << id_ << " neighbor_type:" << neighbor_type; + if (!exclude_itself) { + neighbors.emplace_back(id_); + } + } + *out_neighbors = std::move(neighbors); + return Status::OK(); +} + +Status LocalNode::GetSampledNeighbors(const std::vector> &neighbors, int32_t samples_num, + std::vector *out) { + std::vector shuffled_id(neighbors.size()); + std::iota(shuffled_id.begin(), shuffled_id.end(), 0); + std::shuffle(shuffled_id.begin(), shuffled_id.end(), rnd_); + int32_t num = std::min(samples_num, static_cast(neighbors.size())); + for (int32_t i = 0; i < num; ++i) { + out->emplace_back(neighbors[shuffled_id[i]]->id()); + } + return Status::OK(); +} + +Status LocalNode::GetSampledNeighbors(NodeType neighbor_type, int32_t samples_num, + std::vector *out_neighbors) { + std::vector neighbors; + neighbors.reserve(samples_num); + auto itr = neighbor_nodes_.find(neighbor_type); + if (itr != neighbor_nodes_.end()) { + while (neighbors.size() < samples_num) { + RETURN_IF_NOT_OK(GetSampledNeighbors(itr->second, samples_num - neighbors.size(), &neighbors)); + } + } else { + MS_LOG(DEBUG) << "There are no neighbors. node_id:" << id_ << " neighbor_type:" << neighbor_type; + // If there are no neighbors, they are filled with kDefaultNodeId + for (int32_t i = 0; i < samples_num; ++i) { + neighbors.emplace_back(kDefaultNodeId); + } } *out_neighbors = std::move(neighbors); return Status::OK(); diff --git a/mindspore/ccsrc/dataset/engine/gnn/local_node.h b/mindspore/ccsrc/dataset/engine/gnn/local_node.h index 25f24818e1..bc069d073f 100644 --- a/mindspore/ccsrc/dataset/engine/gnn/local_node.h +++ b/mindspore/ccsrc/dataset/engine/gnn/local_node.h @@ -43,12 +43,20 @@ class LocalNode : public Node { // @return Status - The error code return Status GetFeatures(FeatureType feature_type, std::shared_ptr *out_feature) override; - // Get the neighbors of a node + // Get the all neighbors of a node // @param NodeType neighbor_type - type of neighbor - // @param int32_t samples_num - Number of neighbors to be acquired, if -1 means all neighbors are acquired // @param std::vector *out_neighbors - Returned neighbors id // @return Status - The error code return - Status GetNeighbors(NodeType neighbor_type, int32_t samples_num, std::vector *out_neighbors) override; + Status GetAllNeighbors(NodeType neighbor_type, std::vector *out_neighbors, + bool exclude_itself = false) override; + + // Get the sampled neighbors of a node + // @param NodeType neighbor_type - type of neighbor + // @param int32_t samples_num - Number of neighbors to be acquired + // @param std::vector *out_neighbors - Returned neighbors id + // @return Status - The error code return + Status GetSampledNeighbors(NodeType neighbor_type, int32_t samples_num, + std::vector *out_neighbors) override; // Add neighbor of node // @param std::shared_ptr node - @@ -61,6 +69,10 @@ class LocalNode : public Node { Status UpdateFeature(const std::shared_ptr &feature) override; private: + Status GetSampledNeighbors(const std::vector> &neighbors, int32_t samples_num, + std::vector *out); + + std::mt19937 rnd_; std::unordered_map> features_; std::unordered_map>> neighbor_nodes_; }; diff --git a/mindspore/ccsrc/dataset/engine/gnn/node.h b/mindspore/ccsrc/dataset/engine/gnn/node.h index 8e3db51d65..282f856797 100644 --- a/mindspore/ccsrc/dataset/engine/gnn/node.h +++ b/mindspore/ccsrc/dataset/engine/gnn/node.h @@ -52,12 +52,20 @@ class Node { // @return Status - The error code return virtual Status GetFeatures(FeatureType feature_type, std::shared_ptr *out_feature) = 0; - // Get the neighbors of a node + // Get the all neighbors of a node // @param NodeType neighbor_type - type of neighbor - // @param int32_t samples_num - Number of neighbors to be acquired, if -1 means all neighbors are acquired // @param std::vector *out_neighbors - Returned neighbors id // @return Status - The error code return - virtual Status GetNeighbors(NodeType neighbor_type, int32_t samples_num, std::vector *out_neighbors) = 0; + virtual Status GetAllNeighbors(NodeType neighbor_type, std::vector *out_neighbors, + bool exclude_itself = false) = 0; + + // Get the sampled neighbors of a node + // @param NodeType neighbor_type - type of neighbor + // @param int32_t samples_num - Number of neighbors to be acquired + // @param std::vector *out_neighbors - Returned neighbors id + // @return Status - The error code return + virtual Status GetSampledNeighbors(NodeType neighbor_type, int32_t samples_num, + std::vector *out_neighbors) = 0; // Add neighbor of node // @param std::shared_ptr node - diff --git a/mindspore/ccsrc/dataset/engine/opt/CMakeLists.txt b/mindspore/ccsrc/dataset/engine/opt/CMakeLists.txt index 9804b85d3a..170cbb55e5 100644 --- a/mindspore/ccsrc/dataset/engine/opt/CMakeLists.txt +++ b/mindspore/ccsrc/dataset/engine/opt/CMakeLists.txt @@ -2,5 +2,7 @@ file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc" set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) add_library(engine-opt OBJECT pass.cc + pre/map_column_reorder.cc + pre/global_shuffle.cc util/printer_pass.cc ) \ No newline at end of file diff --git a/mindspore/ccsrc/dataset/engine/opt/pass.cc b/mindspore/ccsrc/dataset/engine/opt/pass.cc index e6bd9fe247..a032d46cba 100644 --- a/mindspore/ccsrc/dataset/engine/opt/pass.cc +++ b/mindspore/ccsrc/dataset/engine/opt/pass.cc @@ -27,7 +27,6 @@ #include "dataset/engine/datasetops/shuffle_op.h" #include "dataset/engine/datasetops/source/generator_op.h" #include "dataset/engine/datasetops/source/mindrecord_op.h" -#include "dataset/engine/datasetops/source/storage_op.h" #include "dataset/engine/datasetops/source/tf_reader_op.h" #include "dataset/engine/datasetops/source/image_folder_op.h" #include "dataset/engine/datasetops/take_op.h" @@ -37,10 +36,18 @@ namespace mindspore { namespace dataset { // Driver method for TreePass -Status TreePass::Run(ExecutionTree *tree, bool *modified) { return this->RunOnTree(tree, modified); } +Status TreePass::Run(ExecutionTree *tree, bool *modified) { + if (tree == nullptr || modified == nullptr) { + return Status(StatusCode::kUnexpectedError, "Null pointer passed to TreePass"); + } + return this->RunOnTree(tree, modified); +} // Driver method for NodePass Status NodePass::Run(ExecutionTree *tree, bool *modified) { + if (tree == nullptr || modified == nullptr) { + return Status(StatusCode::kUnexpectedError, "Null pointer passed to NodePass"); + } std::shared_ptr root = tree->root(); if (traversalOrder_ == Order::DFS) { // DFS diff --git a/mindspore/ccsrc/dataset/engine/opt/pass.h b/mindspore/ccsrc/dataset/engine/opt/pass.h index bac464f401..39682b22f7 100644 --- a/mindspore/ccsrc/dataset/engine/opt/pass.h +++ b/mindspore/ccsrc/dataset/engine/opt/pass.h @@ -57,10 +57,10 @@ class ImageFolderOp; // The actual implementation of the passes will be derived from here. class Pass : public std::enable_shared_from_this { public: - // Run the transformation pass again the execution tree. + // Run the transformation pass against the execution tree. // @param tree - Pointer to the execution tree to be transformed. // @param modified - Pointer to the modified flag, - virtual Status Run(ExecutionTree *tree, bool *modified) { return Status::OK(); } + virtual Status Run(ExecutionTree *tree, bool *modified) = 0; }; // TreePass is a basic Pass class which performs transformation on ExecutionTree directly. diff --git a/mindspore/ccsrc/dataset/engine/opt/pre/global_shuffle.cc b/mindspore/ccsrc/dataset/engine/opt/pre/global_shuffle.cc new file mode 100644 index 0000000000..2adf734a6c --- /dev/null +++ b/mindspore/ccsrc/dataset/engine/opt/pre/global_shuffle.cc @@ -0,0 +1,98 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "dataset/engine/opt/pre/global_shuffle.h" +#include "dataset/engine/execution_tree.h" +#include "dataset/engine/datasetops/shuffle_op.h" +#include "dataset/engine/datasetops/source/tf_reader_op.h" +#include "dataset/engine/datasetops/source/text_file_op.h" +#include "dataset/engine/datasetops/source/clue_op.h" + +namespace mindspore { +namespace dataset { + +Status GlobalShufflePass::RunOnTree(ExecutionTree *tree, bool *modified) { + std::vector> tf_readers; + std::vector> text_files; + std::vector> clues; + + // Pass 1, search for all sources which requires global shuffle + for (auto &op : *tree) { + if (auto ptr = std::dynamic_pointer_cast(op.shared_from_this())) { + if (ptr->RequireGlobalShuffle()) { + tf_readers.push_back(ptr); + continue; + } + } + if (auto ptr = std::dynamic_pointer_cast(op.shared_from_this())) { + if (ptr->RequireGlobalShuffle()) { + text_files.push_back(ptr); + continue; + } + } + if (auto ptr = std::dynamic_pointer_cast(op.shared_from_this())) { + if (ptr->RequireGlobalShuffle()) { + clues.push_back(ptr); + continue; + } + } + } + + // Pass 2, insert shuffle nodes + // The following blocks can be implemented with template if we unify the CountTotalRows across all source nodes . + for (auto node : tf_readers) { + std::shared_ptr builder = std::make_shared(); + int64_t total_rows = 0; + TFReaderOp::CountTotalRows(&total_rows, node->FileNames(), 8, true); + int32_t avg_file_size = total_rows / (node->FileNames().size()); + builder->SetShuffleSize(std::max(avg_file_size * 4, 10000)); + std::shared_ptr op; + RETURN_IF_NOT_OK(builder->Build(&op)); + RETURN_IF_NOT_OK(tree->AssociateNode(op)); + RETURN_IF_NOT_OK(node->InsertAsParent(op)); + } + + for (auto node : text_files) { + std::shared_ptr builder = std::make_shared(); + int64_t total_rows = 0; + TextFileOp::CountAllFileRows(node->FileNames(), &total_rows); + int32_t avg_file_size = total_rows / (node->FileNames().size()); + builder->SetShuffleSize(std::max(avg_file_size * 4, 10000)); + std::shared_ptr op; + RETURN_IF_NOT_OK(builder->Build(&op)); + RETURN_IF_NOT_OK(tree->AssociateNode(op)); + RETURN_IF_NOT_OK(node->InsertAsParent(op)); + } + + for (auto node : clues) { + std::shared_ptr builder = std::make_shared(); + int64_t total_rows = 0; + ClueOp::CountAllFileRows(node->FileNames(), &total_rows); + int32_t avg_file_size = total_rows / (node->FileNames().size()); + builder->SetShuffleSize(std::max(avg_file_size * 4, 10000)); + std::shared_ptr op; + RETURN_IF_NOT_OK(builder->Build(&op)); + RETURN_IF_NOT_OK(tree->AssociateNode(op)); + RETURN_IF_NOT_OK(node->InsertAsParent(op)); + } + + return Status::OK(); +} + +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/engine/opt/pre/global_shuffle.h b/mindspore/ccsrc/dataset/engine/opt/pre/global_shuffle.h new file mode 100644 index 0000000000..6865ac9391 --- /dev/null +++ b/mindspore/ccsrc/dataset/engine/opt/pre/global_shuffle.h @@ -0,0 +1,35 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DATASET_ENGINE_OPT_PASS_PRE_GLOBALSHUFFLE_H +#define DATASET_ENGINE_OPT_PASS_PRE_GLOBALSHUFFLE_H + +#include +#include "dataset/engine/opt/pass.h" + +namespace mindspore { +namespace dataset { +// Global Shuffle Pass will insert ShuffleOp when the leaf nodes requires global shuffle. +// Example: +// Input Tree: TFReader(GLOBAL_SHUFFLE) -> Batch +// Output Tree: TFReader -> Shuffle -> Batch +class GlobalShufflePass : public TreePass { + Status RunOnTree(ExecutionTree *tree, bool *modified) override; +}; +} // namespace dataset +} // namespace mindspore + +#endif // DATASET_ENGINE_OPT_PASS_PRE_GLOBALSHUFFLE_H diff --git a/mindspore/ccsrc/dataset/engine/opt/pre/map_column_reorder.cc b/mindspore/ccsrc/dataset/engine/opt/pre/map_column_reorder.cc new file mode 100644 index 0000000000..a3dbbfcc54 --- /dev/null +++ b/mindspore/ccsrc/dataset/engine/opt/pre/map_column_reorder.cc @@ -0,0 +1,51 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "dataset/engine/opt/pre/map_column_reorder.h" +#include "dataset/engine/execution_tree.h" +#include "dataset/engine/datasetops/map_op.h" +#include "dataset/engine/datasetops/project_op.h" + +namespace mindspore { +namespace dataset { + +Status MapColumnReorder::RunOnTree(ExecutionTree *tree, bool *modified) { + std::vector> to_process; + + // Pass 1, search for all MapOp with column orders + for (auto &op : *tree) { + if (auto mapOp = std::dynamic_pointer_cast(op.shared_from_this())) { + if (mapOp->ColumnsOrder().size() != 0) { + to_process.push_back(mapOp); + } + } + } + + // Pass 2, insert nodes for all MapOp + for (auto node : to_process) { + std::shared_ptr builder = std::make_shared(node->ColumnsOrder()); + std::shared_ptr op; + RETURN_IF_NOT_OK(builder->Build(&op)); + RETURN_IF_NOT_OK(tree->AssociateNode(op)); + RETURN_IF_NOT_OK(node->InsertAsParent(op)); + } + return Status::OK(); +} + +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/engine/opt/pre/map_column_reorder.h b/mindspore/ccsrc/dataset/engine/opt/pre/map_column_reorder.h new file mode 100644 index 0000000000..84274db3d5 --- /dev/null +++ b/mindspore/ccsrc/dataset/engine/opt/pre/map_column_reorder.h @@ -0,0 +1,35 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DATASET_ENGINE_OPT_PASS_PRE_MAPCOLREORDER_H +#define DATASET_ENGINE_OPT_PASS_PRE_MAPCOLREORDER_H + +#include +#include "dataset/engine/opt/pass.h" + +namespace mindspore { +namespace dataset { +// Map Column Recorder Pass will insert ProjectOp when MapOp requires a full output columns reorder. +// Example: +// Input Tree: TFReader -> MapOp(with col_order) -> Batch +// Output Tree: TFReader -> MapOp -> ProjectOp(col_order) -> Batch +class MapColumnReorder : public TreePass { + Status RunOnTree(ExecutionTree *tree, bool *modified) override; +}; +} // namespace dataset +} // namespace mindspore + +#endif // DATASET_ENGINE_OPT_PASS_PRE_MAPCOLREORDER_H diff --git a/mindspore/ccsrc/dataset/engine/perf/CMakeLists.txt b/mindspore/ccsrc/dataset/engine/perf/CMakeLists.txt new file mode 100644 index 0000000000..0b67469d2d --- /dev/null +++ b/mindspore/ccsrc/dataset/engine/perf/CMakeLists.txt @@ -0,0 +1,6 @@ +add_library(engine-perf OBJECT + profiling.cc + monitor.cc + device_queue_tracing.cc + connector_size.cc + dataset_iterator_tracing.cc) diff --git a/mindspore/ccsrc/dataset/engine/perf/connector_size.cc b/mindspore/ccsrc/dataset/engine/perf/connector_size.cc new file mode 100644 index 0000000000..862ec51c49 --- /dev/null +++ b/mindspore/ccsrc/dataset/engine/perf/connector_size.cc @@ -0,0 +1,89 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "dataset/engine/perf/connector_size.h" + +#include +#include +#include +#include +#include "dataset/core/config_manager.h" +#include "dataset/engine/execution_tree.h" +#include "dataset/util/path.h" + +using json = nlohmann::json; +namespace mindspore { +namespace dataset { +using Qrow = std::vector; + +// Sample action +Status ConnectorSize::Sample() { + Qrow cur_row; + std::transform(tree_->begin(), tree_->end(), std::back_inserter(cur_row), + [](DatasetOp &op) { return op.ConnectorSize(); }); + // Push new row of sample + sample_table_.push_back(cur_row); + return Status::OK(); +} + +// JSON serializer helper function +json ConnectorSize::ParseOpInfo(const DatasetOp &node, const std::vector &size) { + auto children = node.Children(); + std::vector children_id; + std::transform(children.begin(), children.end(), std::back_inserter(children_id), + [](std::shared_ptr op) -> int32_t { return op->id(); }); + json json_node; + json_node["op_id"] = node.id(); + json_node["op_type"] = node.Name(); + json_node["num_workers"] = node.num_workers(); + json metrics; + // DeviceQueueOp is a special op,it is not inlined but its output queue is invalid. + // So we should not output its queue size. + if (!node.inlined() && node.Name() != "DeviceQueueOp") { + metrics["output_queue"] = {{"size", size}, {"length", node.ConnectorCapacity()}}; + } + json_node["metrics"] = metrics; + if (!children_id.empty()) { + json_node["children"] = children_id; + } + + return json_node; +} + +// Save profiling data to file +Status ConnectorSize::SaveToFile() { + std::ofstream os(file_path_, std::ios::trunc); + uint32_t idx = 0; + json output; + std::shared_ptr cfg = GlobalContext::config_manager(); + output["sampling_interval"] = cfg->monitor_sampling_interval(); + // Traverse the ExecutionTree for JSON node generation + for (auto &node : *tree_) { + std::vector cur_queue_size; + std::transform(sample_table_.begin(), sample_table_.end(), std::back_inserter(cur_queue_size), + [&](const ConnectorSizeSample &sample) { return sample[idx]; }); + json json_node = ParseOpInfo(node, cur_queue_size); + output["op_info"].push_back(json_node); + idx++; + } + os << output; + return Status::OK(); +} +Status ConnectorSize::Init(const std::string &dir_path, const std::string &device_id) { + file_path_ = (Path(dir_path) / Path("pipeline_profiling_" + device_id + ".json")).toString(); + return Status::OK(); +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/engine/perf/connector_size.h b/mindspore/ccsrc/dataset/engine/perf/connector_size.h new file mode 100644 index 0000000000..6840ffe244 --- /dev/null +++ b/mindspore/ccsrc/dataset/engine/perf/connector_size.h @@ -0,0 +1,70 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_QUEUE_DEPTH_H +#define MINDSPORE_QUEUE_DEPTH_H + +#include +#include +#include +#include "dataset/engine/perf/profiling.h" +#include "dataset/engine/datasetops/dataset_op.h" + +using json = nlohmann::json; + +namespace mindspore { +namespace dataset { +class ExecutionTree; + +// Connector size sampling samples the output connector size of each op in the pipeline. +// It support JSON serialization for external usage. +class ConnectorSize : public Sampling { + // Connecto size sampling data is stored as a 2D vector + // op_0 ... op_m + // sample_0 size_0_0 ... size_m_0 + // ... ... ... ... + // sample_n size_0_m ... size_m_n + // + // A circular buffer will be implemented in the future to make this table more flexible. + using ConnectorSizeSample = std::vector; + using ConnectorSizeSampleTable = std::vector; + + public: + explicit ConnectorSize(ExecutionTree *tree) : tree_(tree) {} + + ~ConnectorSize() override = default; + + // Driver function for connector size sampling. + // This function samples the connector size of every nodes within the ExecutionTree + Status Sample() override; + + std::string Name() const override { return kDeviceQueueTracingName; }; + + // Save sampling data to file + // @return Status - The error code return + Status SaveToFile() override; + + Status Init(const std::string &dir_path, const std::string &device_id) override; + + // Parse op infomation and transform to json format + json ParseOpInfo(const DatasetOp &node, const std::vector &size); + + private: + ExecutionTree *tree_ = nullptr; // ExecutionTree pointer + ConnectorSizeSampleTable sample_table_; // Dataset structure to store all samples of connector size sampling +}; +} // namespace dataset +} // namespace mindspore +#endif // MINDSPORE_QUEUE_DEPTH_H diff --git a/mindspore/ccsrc/dataset/engine/perf/dataset_iterator_tracing.cc b/mindspore/ccsrc/dataset/engine/perf/dataset_iterator_tracing.cc new file mode 100644 index 0000000000..99b0c2d7e0 --- /dev/null +++ b/mindspore/ccsrc/dataset/engine/perf/dataset_iterator_tracing.cc @@ -0,0 +1,64 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include "dataset/engine/perf/dataset_iterator_tracing.h" +#include "dataset/util/path.h" + +namespace mindspore { +namespace dataset { + +Status DatasetIteratorTracing::Record(const int32_t type, const int32_t extra_info, const int32_t batch_num, + const int32_t value) { + // Format: "type extra-info batch-num value" + // type: 0: time, 1: connector size + // extra-info: if type is 0 - 0: pipeline time, 1: push tdt time, 2: batch time + // if type is 1 - connector capacity + // batch-num: batch number + // value: if type is 0 - value is time(ms) + // if type is 1 - value is connector size + // Examples: + // 0 0 20 10 - The 20th batch took 10ms to get data from pipeline. + // 1 64 20 5 - Connector size is 5 when get the 20th batch.Connector capacity is 64. + std::string data = std::to_string(type) + " " + std::to_string(extra_info) + " " + std::to_string(batch_num) + " " + + std::to_string(value); + value_.emplace_back(data); + return Status::OK(); +} + +Status DatasetIteratorTracing::SaveToFile() { + if (value_.empty()) { + return Status::OK(); + } + + std::ofstream handle(file_path_, std::ios::trunc); + if (!handle.is_open()) { + RETURN_STATUS_UNEXPECTED("Profiling file can not be opened."); + } + for (auto value : value_) { + handle << value << "\n"; + } + handle.close(); + + return Status::OK(); +} + +Status DatasetIteratorTracing::Init(const std::string &dir_path, const std::string &device_id) { + file_path_ = (Path(dir_path) / Path("dataset_iterator_profiling_" + device_id + ".txt")).toString(); + return Status::OK(); +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/engine/perf/dataset_iterator_tracing.h b/mindspore/ccsrc/dataset/engine/perf/dataset_iterator_tracing.h new file mode 100644 index 0000000000..00264939fc --- /dev/null +++ b/mindspore/ccsrc/dataset/engine/perf/dataset_iterator_tracing.h @@ -0,0 +1,51 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_DATASET_ITERATOR_TRACING_H +#define MINDSPORE_DATASET_ITERATOR_TRACING_H + +#include +#include +#include "dataset/engine/perf/profiling.h" + +namespace mindspore { +namespace dataset { +class DatasetIteratorTracing : public Tracing { + public: + // Constructor + DatasetIteratorTracing() = default; + + // Destructor + ~DatasetIteratorTracing() override = default; + + // Record tracing data + // @return Status - The error code return + Status Record(const int32_t type, const int32_t extra_info, const int32_t batch_num, const int32_t value); + + std::string Name() const override { return kDatasetIteratorTracingName; }; + + // Save tracing data to file + // @return Status - The error code return + Status SaveToFile() override; + + Status Init(const std::string &dir_path, const std::string &device_id) override; + + private: + std::vector value_; +}; +} // namespace dataset +} // namespace mindspore + +#endif // MINDSPORE_DATASET_ITERATOR_TRACING_H diff --git a/mindspore/ccsrc/dataset/engine/perf/device_queue_tracing.cc b/mindspore/ccsrc/dataset/engine/perf/device_queue_tracing.cc new file mode 100644 index 0000000000..204a83e3fb --- /dev/null +++ b/mindspore/ccsrc/dataset/engine/perf/device_queue_tracing.cc @@ -0,0 +1,64 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "dataset/engine/perf/device_queue_tracing.h" +#include "dataset/util/path.h" +namespace mindspore { +namespace dataset { + +Status DeviceQueueTracing::Record(const int32_t type, const int32_t extra_info, const int32_t batch_num, + const int32_t value) { + // Format: "type extra-info batch-num value" + // type: 0: time, 1: connector size + // extra-info: if type is 0 - 0: pipeline time, 1: push tdt time, 2: batch time + // if type is 1 - connector capacity + // batch-num: batch number + // value: if type is 0 - value is time(ms) + // if type is 1 - value is connector size + // Examples: + // 0 0 20 10 - The 20th batch took 10ms to get data from pipeline. + // 1 64 20 5 - Connector size is 5 when get the 20th batch.Connector capacity is 64. + std::string data = std::to_string(type) + " " + std::to_string(extra_info) + " " + std::to_string(batch_num) + " " + + std::to_string(value); + value_.emplace_back(data); + return Status::OK(); +} + +Status DeviceQueueTracing::SaveToFile() { + if (value_.empty()) { + return Status::OK(); + } + + std::ofstream handle(file_path_, std::ios::trunc); + if (!handle.is_open()) { + RETURN_STATUS_UNEXPECTED("Profiling file can not be opened."); + } + for (auto value : value_) { + handle << value << "\n"; + } + handle.close(); + + return Status::OK(); +} + +Status DeviceQueueTracing::Init(const std::string &dir_path, const std::string &device_id) { + file_path_ = (Path(dir_path) / Path("device_queue_profiling_" + device_id + ".txt")).toString(); + return Status::OK(); +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/engine/perf/device_queue_tracing.h b/mindspore/ccsrc/dataset/engine/perf/device_queue_tracing.h new file mode 100644 index 0000000000..f7c6da3a04 --- /dev/null +++ b/mindspore/ccsrc/dataset/engine/perf/device_queue_tracing.h @@ -0,0 +1,52 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_DEVICE_QUEUE_TRACING_H +#define MINDSPORE_DEVICE_QUEUE_TRACING_H + +#include +#include +#include "dataset/engine/perf/profiling.h" + +namespace mindspore { +namespace dataset { +class DeviceQueueTracing : public Tracing { + public: + // Constructor + DeviceQueueTracing() = default; + + // Destructor + ~DeviceQueueTracing() override = default; + + // Record tracing data + // @return Status - The error code return + Status Record(const int32_t type, const int32_t extra_info, const int32_t batch_num, const int32_t value); + + std::string Name() const override { return "Device Queue Tracing"; }; + + // Save tracing data to file + // @return Status - The error code return + Status SaveToFile() override; + + Status Init(const std::string &dir_path, const std::string &device_id) override; + + private: + std::vector value_; +}; +} // namespace dataset +} // namespace mindspore + +#endif // MINDSPORE_DEVICE_QUEUE_TRACING_H diff --git a/mindspore/ccsrc/dataset/engine/perf/monitor.cc b/mindspore/ccsrc/dataset/engine/perf/monitor.cc new file mode 100644 index 0000000000..c9dce004b5 --- /dev/null +++ b/mindspore/ccsrc/dataset/engine/perf/monitor.cc @@ -0,0 +1,52 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "dataset/core/config_manager.h" +#include "dataset/engine/perf/monitor.h" +#include "dataset/engine/execution_tree.h" + +namespace mindspore { +namespace dataset { + +Monitor::Monitor(ExecutionTree *tree) : tree_(tree) { + std::shared_ptr cfg = GlobalContext::config_manager(); + sampling_interval_ = cfg->monitor_sampling_interval(); + max_samples_ = 0; + cur_row_ = 0; +} + +Status Monitor::operator()() { + // Register this thread with TaskManager to receive proper interrupt signal. + TaskManager::FindMe()->Post(); + + // Keep sampling if + // 1) Monitor Task is not interrupted by TaskManager AND + // 2) Iterator has not received EOF + while (!this_thread::is_interrupted() && !(tree_->isFinished())) { + for (auto &node : tree_->GetProfilingManager()->GetSamplingNodes()) { + RETURN_IF_NOT_OK(node.second->Sample()); + std::this_thread::sleep_for(std::chrono::milliseconds(sampling_interval_)); + } + } + + // Output all profiling data upon request. + tree_->GetProfilingManager()->SaveProfilingData(); + return Status::OK(); +} + +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/engine/perf/monitor.h b/mindspore/ccsrc/dataset/engine/perf/monitor.h new file mode 100644 index 0000000000..2a482a6ad7 --- /dev/null +++ b/mindspore/ccsrc/dataset/engine/perf/monitor.h @@ -0,0 +1,54 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_MONITOR_H +#define MINDSPORE_MONITOR_H + +#include +#include +#include +#include "dataset/util/status.h" +#include "dataset/engine/perf/profiling.h" + +namespace mindspore { +namespace dataset { +class ExecutionTree; +class Monitor { + public: + // Monitor object constructor + explicit Monitor(ExecutionTree *tree); + + Monitor() = default; + + ~Monitor() = default; + + // Functor for Perf Monitor main loop. + // This function will be the entry point of mindspore::Dataset::Task + Status operator()(); + + int64_t GetSamplingInterval() { return sampling_interval_; } + + private: + int64_t cur_row_; + int64_t max_samples_; + int64_t sampling_interval_; + ExecutionTree *tree_; + std::vector> sampling_list_; +}; +} // namespace dataset +} // namespace mindspore + +#endif // MINDSPORE_MONITOR_H diff --git a/mindspore/ccsrc/dataset/engine/perf/profiling.cc b/mindspore/ccsrc/dataset/engine/perf/profiling.cc new file mode 100644 index 0000000000..4786b8dd69 --- /dev/null +++ b/mindspore/ccsrc/dataset/engine/perf/profiling.cc @@ -0,0 +1,153 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "dataset/engine/perf/profiling.h" + +#include +#include +#include +#include "common/utils.h" +#include "dataset/util/path.h" +#include "dataset/engine/perf/monitor.h" +#include "dataset/engine/perf/device_queue_tracing.h" +#include "dataset/engine/perf/connector_size.h" +#include "dataset/engine/perf/dataset_iterator_tracing.h" +#include "utils/log_adapter.h" + +namespace mindspore { +namespace dataset { + +bool ProfilingManager::IsProfilingEnable() const { + auto profiling = common::GetEnv("PROFILING_MODE"); + if (profiling.empty() || profiling != "true") { + return false; + } + return true; +} + +Status ProfilingManager::Initialize() { + // Register nodes based on config + std::string dir = common::GetEnv("MINDDATA_PROFILING_DIR"); + if (dir.empty()) { + RETURN_STATUS_UNEXPECTED("Profiling dir is not set."); + } + char real_path[PATH_MAX] = {0}; + if (dir.size() >= PATH_MAX) { + RETURN_STATUS_UNEXPECTED("Profiling dir is invalid."); + } +#if defined(_WIN32) || defined(_WIN64) + if (_fullpath(real_path, common::SafeCStr(dir), PATH_MAX) == nullptr) { + RETURN_STATUS_UNEXPECTED("Profiling dir is invalid."); + } +#else + if (realpath(common::SafeCStr(dir), real_path) == nullptr) { + RETURN_STATUS_UNEXPECTED("Profiling dir is invalid."); + } +#endif + dir_path_ = real_path; + + // If DEVICE_ID is not set,defult value is 0 + device_id_ = common::GetEnv("DEVICE_ID"); + if (device_id_.empty()) { + device_id_ = "0"; + } + + // Register all profiling node. + // device_queue node is used for graph mode + std::shared_ptr device_queue_tracing = std::make_shared(); + RETURN_IF_NOT_OK(RegisterTracingNode(device_queue_tracing)); + // dataset_iterator node is used for graph mode + std::shared_ptr dataset_iterator_tracing = std::make_shared(); + RETURN_IF_NOT_OK(RegisterTracingNode(dataset_iterator_tracing)); + + std::shared_ptr monitor_sampling = std::make_shared(tree_); + RETURN_IF_NOT_OK(RegisterSamplingNode(monitor_sampling)); + + return Status::OK(); +} + +// Profiling node registration +Status ProfilingManager::RegisterTracingNode(std::shared_ptr node) { + // Check if node with the same name has already been registered. + auto exist = tracing_nodes_.find(node->Name()); + if (exist != tracing_nodes_.end()) { + return Status(StatusCode::kProfilingError, "Profiling node already exist: " + node->Name()); + } + // Register the node with its name as key. + RETURN_IF_NOT_OK(node->Init(dir_path_, device_id_)); + tracing_nodes_[node->Name()] = node; + return Status::OK(); +} + +// Profiling node getter +Status ProfilingManager::GetTracingNode(const std::string &name, std::shared_ptr *node) { + // Check if node with the same name has already been registered. + auto exist = tracing_nodes_.find(name); + if (exist == tracing_nodes_.end()) { + return Status(StatusCode::kProfilingError, "Profiling node does not exist: " + name); + } + // Fetch node. + *node = tracing_nodes_[name]; + return Status::OK(); +} + +// Profiling node registration +Status ProfilingManager::RegisterSamplingNode(std::shared_ptr node) { + // Check if node with the same name has already been registered. + auto exist = sampling_nodes_.find(node->Name()); + if (exist != sampling_nodes_.end()) { + return Status(StatusCode::kProfilingError, "Profiling node already exist: " + node->Name()); + } + // Register the node with its name as key. + RETURN_IF_NOT_OK(node->Init(dir_path_, device_id_)); + sampling_nodes_[node->Name()] = node; + return Status::OK(); +} + +// Profiling node getter +Status ProfilingManager::GetSamplingNode(const std::string &name, std::shared_ptr *node) { + // Check if node with the same name has already been registered. + auto exist = sampling_nodes_.find(name); + if (exist == sampling_nodes_.end()) { + return Status(StatusCode::kProfilingError, "Profiling node does not exist: " + name); + } + // Fetch node. + *node = sampling_nodes_[name]; + return Status::OK(); +} + +Status ProfilingManager::SaveProfilingData() { + if (!IsProfilingEnable()) { + return Status::OK(); + } + MS_LOG(INFO) << "Start to save profiling data."; + for (auto node : tracing_nodes_) { + RETURN_IF_NOT_OK(node.second->SaveToFile()); + } + for (auto node : sampling_nodes_) { + RETURN_IF_NOT_OK(node.second->SaveToFile()); + } + MS_LOG(INFO) << "Save profiling data end."; + + return Status::OK(); +} + +double ProfilingTime::GetCurMilliSecond() { + struct timeval tv = {0, 0}; + (void)gettimeofday(&tv, nullptr); + return tv.tv_sec * 1000 + tv.tv_usec / 1000; +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/engine/perf/profiling.h b/mindspore/ccsrc/dataset/engine/perf/profiling.h new file mode 100644 index 0000000000..d0ea91d566 --- /dev/null +++ b/mindspore/ccsrc/dataset/engine/perf/profiling.h @@ -0,0 +1,140 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_UTIL_PROFILE_H_ +#define DATASET_UTIL_PROFILE_H_ + +#include +#include +#include +#include +#include "dataset/util/status.h" + +namespace mindspore { +namespace dataset { + +class Monitor; +class ExecutionTree; + +const char kDeviceQueueTracingName[] = "Device Queue Tracing"; +const char kDatasetIteratorTracingName[] = "Dataset Iterator Tracing"; +const char kConnectorSizeSamplingName[] = "Connector Size Sampling"; + +// Profiling is a class of basic unit of profiling action +// This base class encapsulate the serialization output logic +class Profiling : std::enable_shared_from_this { + public: + // Constructor + Profiling() = default; + + // Destructor + virtual ~Profiling() = default; + + virtual Status Init(const std::string &dir_path, const std::string &device_id) = 0; + + // Default serialization file generator + virtual Status SaveToFile() = 0; + + // Profiling name + virtual std::string Name() const = 0; + + protected: + std::string file_path_; +}; + +// Sampling is a class of profiling which generate samples periodically. +class Sampling : public Profiling { + public: + // Sampling action function. This function will be invoked by performance monitor thread. + virtual Status Sample() = 0; +}; + +// Tracing is class of profiling which record samples upon request. +class Tracing : public Profiling { + // Tracing does not define a fixed interface to provide flexible on data recording. +}; + +// ProfilingManager is a class manages all profiling infrastructure +// It serves the following purposes: +// 1) Fetch profiling configs from global contexts +// 2) Setup all profiling node based on config +// 3) Provide access of profiling nodes for profiling actions +// 4) Manage profiling data serialization process +class ProfilingManager { + public: + explicit ProfilingManager(ExecutionTree *tree) : tree_(tree) {} + + ~ProfilingManager() = default; + + Status Initialize(); + + // Save profile data to file + // @return Status - The error code return + Status SaveProfilingData(); + + // Sampling node getter + // @param name - The name of the requested node + // @param node - Pointer to the shared pointer for the Sampling node + // @return Status - The error code return + Status GetSamplingNode(const std::string &name, std::shared_ptr *node); + + // Tracing node getter + // @param name - The name of the requested node + // @param node - Pointer to the shared pointer for the Tracing node + // @return Status - The error code return + Status GetTracingNode(const std::string &name, std::shared_ptr *node); + + // If profiling is enabled. + bool IsProfilingEnable() const; + + const std::unordered_map> &GetSamplingNodes() { return sampling_nodes_; } + + private: + std::unordered_map> tracing_nodes_; + + std::unordered_map> sampling_nodes_; + + // Register profile node to tree + // @param node - Profiling node + // @return Status - The error code return + Status RegisterTracingNode(std::shared_ptr node); + + // Register profile node to tree + // @param node - Profiling node + // @return Status - The error code return + Status RegisterSamplingNode(std::shared_ptr node); + + ExecutionTree *tree_ = nullptr; // ExecutionTree pointer + std::string dir_path_; // where to create profiling file + std::string device_id_; // used when create profiling file,filename_deviceid.suffix +}; + +enum ProfilingType { TIME, CONNECTOR_DEPTH }; + +enum ProfilingTimeSubType { + PIPELINE_TIME, + TDT_PUSH_TIME, + BATCH_TIME, + INVALID_TIME, +}; + +class ProfilingTime { + public: + static double GetCurMilliSecond(); +}; + +} // namespace dataset +} // namespace mindspore +#endif diff --git a/mindspore/ccsrc/dataset/engine/tdt/tdt_plugin.cc b/mindspore/ccsrc/dataset/engine/tdt/tdt_plugin.cc index e457de52ae..ca9f2176f5 100644 --- a/mindspore/ccsrc/dataset/engine/tdt/tdt_plugin.cc +++ b/mindspore/ccsrc/dataset/engine/tdt/tdt_plugin.cc @@ -16,6 +16,7 @@ #include "dataset/engine/tdt/tdt_plugin.h" #include "common/utils.h" #include "utils/log_adapter.h" +#include "dataset/engine/perf/profiling.h" namespace mindspore { namespace dataset { @@ -28,18 +29,26 @@ std::shared_ptr TdtPlugin::GetInstance() { return instance_ptr_; } -TdtStatus TdtPlugin::hostPush(TensorRow ts_row, bool is_wait, std::string channel_name) { - MS_LOG(INFO) << "TDT channel name is " << channel_name << "."; +TdtStatus TdtPlugin::hostPush(TensorRow ts_row, bool is_wait, std::string channel_name, bool profiling, int32_t &time) { + MS_LOG(DEBUG) << "TDT channel name is " << channel_name << "."; std::vector items; + double start_time; auto ret = translate(ts_row, items); if (ret != SUCCESS) { MS_LOG(ERROR) << "TDT converting tensor failed!"; return FAILED; } + if (profiling) { + start_time = ProfilingTime::GetCurMilliSecond(); + } if (tdt::TdtHostPushData(channel_name, items) != 0) { MS_LOG(ERROR) << "TDT pushing data failed!"; return FAILED; } + if (profiling) { + double end_time = ProfilingTime::GetCurMilliSecond(); + time = (int32_t)(end_time - start_time); + } return SUCCESS; } @@ -110,10 +119,11 @@ TdtStatus TdtPlugin::translate(const TensorRow &ts_row, std::vector &i data_item.tensorShape_ = dataShapes; data_item.tensorType_ = datatype; data_item.dataLen_ = ts->SizeInBytes(); - data_item.dataPtr_ = std::shared_ptr(reinterpret_cast(ts->GetMutableBuffer()), [](void *elem) {}); + data_item.dataPtr_ = + std::shared_ptr(reinterpret_cast(&(*ts->begin())), [](const void *elem) {}); items.emplace_back(data_item); - MS_LOG(INFO) << "TDT data type is " << datatype << ", data shape is " << dataShapes << ", data length is " - << ts->Size() << "."; + MS_LOG(DEBUG) << "TDT data type is " << datatype << ", data shape is " << dataShapes << ", data length is " + << ts->Size() << "."; } return SUCCESS; } diff --git a/mindspore/ccsrc/dataset/engine/tdt/tdt_plugin.h b/mindspore/ccsrc/dataset/engine/tdt/tdt_plugin.h index a25deb4aab..304b205b81 100644 --- a/mindspore/ccsrc/dataset/engine/tdt/tdt_plugin.h +++ b/mindspore/ccsrc/dataset/engine/tdt/tdt_plugin.h @@ -26,6 +26,7 @@ #include "dataset/core/data_type.h" #include "dataset/core/tensor.h" +#include "dataset/core/tensor_row.h" namespace mindspore { namespace dataset { @@ -37,7 +38,7 @@ class TdtPlugin { public: static std::shared_ptr GetInstance(); - TdtStatus hostPush(TensorRow ts_row, bool is_wait, std::string channel_name); + TdtStatus hostPush(TensorRow ts_row, bool is_wait, std::string channel_name, bool profilig, int32_t &time); private: TdtPlugin() {} diff --git a/mindspore/ccsrc/dataset/kernels/data/CMakeLists.txt b/mindspore/ccsrc/dataset/kernels/data/CMakeLists.txt index 8472ab5192..9131c9c667 100644 --- a/mindspore/ccsrc/dataset/kernels/data/CMakeLists.txt +++ b/mindspore/ccsrc/dataset/kernels/data/CMakeLists.txt @@ -1,8 +1,14 @@ file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) add_library(kernels-data OBJECT - data_utils.cc - one_hot_op.cc - type_cast_op.cc - to_float16_op.cc - ) + data_utils.cc + one_hot_op.cc + pad_end_op.cc + type_cast_op.cc + to_float16_op.cc + fill_op.cc + slice_op.cc + mask_op.cc + concatenate_op.cc + duplicate_op.cc + ) diff --git a/mindspore/ccsrc/dataset/kernels/data/concatenate_op.cc b/mindspore/ccsrc/dataset/kernels/data/concatenate_op.cc new file mode 100644 index 0000000000..87115fd3ce --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/data/concatenate_op.cc @@ -0,0 +1,55 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "dataset/kernels/data/concatenate_op.h" + +#include "dataset/core/tensor.h" +#include "dataset/kernels/data/data_utils.h" +#include "dataset/kernels/tensor_op.h" + +namespace mindspore { +namespace dataset { + +Status ConcatenateOp::Compute(const TensorRow &input, TensorRow *output) { + IO_CHECK_VECTOR(input, output); + RETURN_IF_NOT_OK(Concatenate(input, output, axis_, prepend_, append_)); + return Status::OK(); +} + +Status ConcatenateOp::OutputShape(const std::vector &inputs, std::vector &outputs) { + RETURN_IF_NOT_OK(TensorOp::OutputShape(inputs, outputs)); + + std::vector inputs_copy; + inputs_copy.push_back(inputs[0].Squeeze()); + + CHECK_FAIL_RETURN_UNEXPECTED(inputs.at(0).Rank() == 1, "Only 1D input tensors supported"); + + outputs.clear(); + dsize_t output_shape = 0; + output_shape = output_shape + inputs.at(0).NumOfElements(); + if (prepend_ != nullptr) { + CHECK_FAIL_RETURN_UNEXPECTED(prepend_->shape().Rank() == 1, "Only 1D prepend tensors supported"); + output_shape = output_shape + prepend_->shape().NumOfElements(); + } + if (append_ != nullptr) { + CHECK_FAIL_RETURN_UNEXPECTED(append_->shape().Rank() == 1, "Only 1D append tensors supported"); + output_shape = output_shape + append_->shape().NumOfElements(); + } + + outputs.emplace_back(std::vector{output_shape}); + return Status::OK(); +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/kernels/data/concatenate_op.h b/mindspore/ccsrc/dataset/kernels/data/concatenate_op.h new file mode 100644 index 0000000000..4e4c7ad4e0 --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/data/concatenate_op.h @@ -0,0 +1,66 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DATASET_KERNELS_DATA_CONCATENATE_OP_H_ +#define DATASET_KERNELS_DATA_CONCATENATE_OP_H_ + +#include +#include +#include + +#include "dataset/core/tensor.h" +#include "dataset/kernels/tensor_op.h" + +namespace mindspore { +namespace dataset { + +class ConcatenateOp : public TensorOp { + public: + /// Constructor to ConcatenateOp. + /// @param int8_t axis - axis to concatenate tensors along. + /// @param std::shared_ptr prepend - prepend tensor. + /// @param std::shared_ptr append -append tensor. + explicit ConcatenateOp(int8_t axis, std::shared_ptr prepend, std::shared_ptr append) + : axis_(axis), prepend_(prepend), append_(append) {} + + ~ConcatenateOp() override = default; + + /// Print method to see which tensor Op this is. + /// @param std::ostream &out - output stream object. + void Print(std::ostream &out) const override { out << "ConcatenateOp"; } + + /// Compute method allowing multiple tensors as inputs + /// @param TensorRow &input - input tensor rows + /// @param TensorRow *output - output tensor rows + Status Compute(const TensorRow &input, TensorRow *output) override; + + /// Compute tensor output shape + /// @param std::vector &inputs - vector of input tensor shapes + /// @param std::vector &inputs, std::vector &outputs) override; + + /// Number of inputs the tensor operation accepts + uint32_t NumInput() override { return 0; } + + private: + int8_t axis_; + std::shared_ptr prepend_; + std::shared_ptr append_; +}; +} // namespace dataset +} // namespace mindspore + +#endif // MINDSPORE_CONCATENATE_OP_H diff --git a/mindspore/ccsrc/dataset/kernels/data/data_utils.cc b/mindspore/ccsrc/dataset/kernels/data/data_utils.cc index f2635c1fe3..40eba1edf6 100644 --- a/mindspore/ccsrc/dataset/kernels/data/data_utils.cc +++ b/mindspore/ccsrc/dataset/kernels/data/data_utils.cc @@ -15,12 +15,19 @@ */ #include "dataset/kernels/data/data_utils.h" + +#include +#include +#include #include + #include "dataset/core/constants.h" -#include "dataset/core/tensor.h" -#include "dataset/core/tensor_shape.h" #include "dataset/core/data_type.h" #include "dataset/core/pybind_support.h" +#include "dataset/core/tensor.h" +#include "dataset/core/tensor_shape.h" +#include "dataset/kernels/data/type_cast_op.h" +#include "dataset/util/status.h" namespace mindspore { namespace dataset { @@ -76,6 +83,7 @@ Status OneHotEncodingSigned(const std::shared_ptr &input, std::shared_pt Status OneHotEncoding(std::shared_ptr input, std::shared_ptr *output, dsize_t num_classes) { input->Squeeze(); + if (input->Rank() > 1) { // We expect the input to be int he first dimension RETURN_STATUS_UNEXPECTED("One hot only supports scalars or 1D shape Tensors."); } @@ -104,11 +112,121 @@ Status OneHotEncoding(std::shared_ptr input, std::shared_ptr *ou } } +Status Fill(const std::shared_ptr input, std::shared_ptr *output, std::shared_ptr fill_value) { + CHECK_FAIL_RETURN_UNEXPECTED(!((fill_value->type() == DataType::DE_STRING) && (input->type() != DataType::DE_STRING)), + "Types do not match"); + + CHECK_FAIL_RETURN_UNEXPECTED(fill_value->shape() == TensorShape({}), "fill_value is not a scalar"); + + std::shared_ptr out; + + const DataType &to = input->type(); + std::unique_ptr op(new TypeCastOp(to)); + + std::shared_ptr fill_output; + RETURN_IF_NOT_OK(op->Compute(fill_value, &fill_output)); + + RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, TensorImpl::kFlexible, input->shape(), input->type())); + + switch (input->type().value()) { + case DataType::DE_BOOL: { + bool value = 0; + RETURN_IF_NOT_OK(fill_output->GetItemAt(&value, {})); + out->Fill(value); + break; + } + case DataType::DE_INT8: { + int8_t value = 0; + RETURN_IF_NOT_OK(fill_output->GetItemAt(&value, {})); + out->Fill(value); + break; + } + case DataType::DE_UINT8: { + uint8_t value = 0; + RETURN_IF_NOT_OK(fill_output->GetItemAt(&value, {})); + out->Fill(value); + break; + } + case DataType::DE_UINT16: { + uint16_t value = 0; + RETURN_IF_NOT_OK(fill_output->GetItemAt(&value, {})); + out->Fill(value); + break; + } + case DataType::DE_INT16: { + int16_t value = 0; + RETURN_IF_NOT_OK(fill_output->GetItemAt(&value, {})); + out->Fill(value); + break; + } + case DataType::DE_UINT32: { + uint32_t value = 0; + RETURN_IF_NOT_OK(fill_output->GetItemAt(&value, {})); + out->Fill(value); + break; + } + case DataType::DE_INT32: { + int32_t value = 0; + RETURN_IF_NOT_OK(fill_output->GetItemAt(&value, {})); + out->Fill(value); + break; + } + case DataType::DE_UINT64: { + uint64_t value = 0; + RETURN_IF_NOT_OK(fill_output->GetItemAt(&value, {})); + out->Fill(value); + break; + } + case DataType::DE_INT64: { + int64_t value = 0; + RETURN_IF_NOT_OK(fill_output->GetItemAt(&value, {})); + out->Fill(value); + break; + } + case DataType::DE_FLOAT16: { + int64_t value = 0; + RETURN_IF_NOT_OK(fill_output->GetItemAt(&value, {})); + out->Fill(value); + break; + } + case DataType::DE_FLOAT32: { + float value = 0; + RETURN_IF_NOT_OK(fill_output->GetItemAt(&value, {})); + out->Fill(value); + break; + } + case DataType::DE_FLOAT64: { + double value = 0; + RETURN_IF_NOT_OK(fill_output->GetItemAt(&value, {})); + out->Fill(value); + break; + } + case DataType::DE_STRING: { + std::vector strings; + std::string_view fill_string_view; + RETURN_IF_NOT_OK(fill_value->GetItemAt(&fill_string_view, {})); + std::string fill_string = std::string(fill_string_view); + for (int i = 0; i < input->shape().NumOfElements(); i++) { + strings.emplace_back(fill_string); + } + RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, strings, input->shape())); + break; + } + case DataType::DE_UNKNOWN: { + RETURN_STATUS_UNEXPECTED("FillOp does not support input of this type."); + break; + } + } + + *output = out; + return Status::OK(); +} template void Cast(const std::shared_ptr &input, std::shared_ptr *output) { auto in_itr = input->begin(); auto out_itr = (*output)->begin(); auto out_end = (*output)->end(); + for (; out_itr != out_end; static_cast(in_itr++), static_cast(out_itr++)) *out_itr = static_cast(*in_itr); } @@ -216,7 +334,314 @@ Status ToFloat16(const std::shared_ptr &input, std::shared_ptr * auto in_itr = input->begin(); auto out_itr = (*output)->begin(); auto out_end = (*output)->end(); - for (; out_itr != out_end; in_itr++, out_itr++) *out_itr = Eigen::half(*in_itr); + + for (; out_itr != out_end; in_itr++, out_itr++) { + float element = *in_itr; + float float16_max = static_cast(std::numeric_limits::max()); + float float16_min = static_cast(std::numeric_limits::lowest()); + if (element > float16_max || element < float16_min) { + RETURN_STATUS_UNEXPECTED("Value " + std::to_string(element) + " is outside of valid float16 range [" + + std::to_string(float16_max) + ", " + std::to_string(float16_min) + "]."); + } + + *out_itr = Eigen::half(*in_itr); + } + + return Status::OK(); +} + +Status PadEnd(const std::shared_ptr &src, std::shared_ptr *dst, const std::vector &pad_shape, + const std::shared_ptr &pad_val) { + if (pad_val == nullptr) { + if (src->type().IsNumeric()) { + return PadEndNumeric(src, dst, pad_shape, 0); + } else { + return PadEndString(src, dst, pad_shape, ""); + } + } + CHECK_FAIL_RETURN_UNEXPECTED(src->type().IsNumeric() == pad_val->type().IsNumeric(), + "Source and pad_value tensors are not of the same type."); + if (pad_val->type().IsNumeric()) { + std::shared_ptr float_pad_value; + RETURN_IF_NOT_OK(TypeCast(pad_val, &float_pad_value, DataType(DataType::DE_FLOAT32))); + float val = 0; + RETURN_IF_NOT_OK(float_pad_value->GetItemAt(&val, {})); + return PadEndNumeric(src, dst, pad_shape, val); + } + std::string_view val; + RETURN_IF_NOT_OK(pad_val->GetItemAt(&val, {})); + return PadEndString(src, dst, pad_shape, std::string(val)); +} + +Status PadEndNumeric(const std::shared_ptr &src, std::shared_ptr *dst, + const std::vector &pad_shape, float pad_val) { + CHECK_FAIL_RETURN_UNEXPECTED(src != nullptr && dst != nullptr, "tensor can't be nullptr"); + if (src->Rank() == 0 || src->shape().AsVector() == pad_shape) { + (*dst) = src; // if no padding, copy the pointer + } else { + CHECK_FAIL_RETURN_UNEXPECTED(src->Rank() == pad_shape.size(), "Pad to diff rank not allowed"); + RETURN_IF_NOT_OK(Tensor::CreateTensor(dst, TensorImpl::kFlexible, TensorShape(pad_shape), src->type())); + auto tensor_type = src->type().value(); + if (pad_val == 0) { // if pad with zero, don't care what type it is + RETURN_IF_NOT_OK((*dst)->Zero()); + } else if (tensor_type == DataType::DE_INT8) { + RETURN_IF_NOT_OK((*dst)->Fill(pad_val)); + } else if (tensor_type == DataType::DE_BOOL) { + RETURN_IF_NOT_OK((*dst)->Fill(pad_val)); + } else if (tensor_type == DataType::DE_UINT8) { + RETURN_IF_NOT_OK((*dst)->Fill(pad_val)); + } else if (tensor_type == DataType::DE_INT16) { + RETURN_IF_NOT_OK((*dst)->Fill(pad_val)); + } else if (tensor_type == DataType::DE_FLOAT16) { + RETURN_IF_NOT_OK((*dst)->Fill(static_cast(pad_val))); + } else if (tensor_type == DataType::DE_UINT16) { + RETURN_IF_NOT_OK((*dst)->Fill(pad_val)); + } else if (tensor_type == DataType::DE_INT32) { + RETURN_IF_NOT_OK((*dst)->Fill(pad_val)); + } else if (tensor_type == DataType::DE_UINT32) { + RETURN_IF_NOT_OK((*dst)->Fill(pad_val)); + } else if (tensor_type == DataType::DE_INT64) { + RETURN_IF_NOT_OK((*dst)->Fill(pad_val)); + } else if (tensor_type == DataType::DE_UINT64) { + RETURN_IF_NOT_OK((*dst)->Fill(pad_val)); + } else if (tensor_type == DataType::DE_FLOAT32) { + RETURN_IF_NOT_OK((*dst)->Fill(pad_val)); + } else if (tensor_type == DataType::DE_FLOAT64) { + RETURN_IF_NOT_OK((*dst)->Fill(pad_val)); + } else { + RETURN_STATUS_UNEXPECTED("Incorrect/Unknown tensor type"); + } + std::vector cur_ind(src->Rank(), 0); + RETURN_IF_NOT_OK(PadEndNumericHelper(src, *dst, cur_ind, 0)); + } + return Status::OK(); +} +Status PadEndNumericHelper(const std::shared_ptr &src, std::shared_ptr dst, + std::vector cur_ind, size_t cur_dim) { + if (cur_dim == src->Rank() - 1) { // if this is the last dimension, copy the data + dst->CopyLastDimAt(src, cur_ind); + } else { // not the last dimension, keep doing recursion + dsize_t min_ind = std::min(dst->shape()[cur_dim], src->shape()[cur_dim]); + for (dsize_t i = 0; i < min_ind; i++) { + cur_ind[cur_dim] = i; + RETURN_IF_NOT_OK(PadEndNumericHelper(src, dst, cur_ind, cur_dim + 1)); + } + } + return Status::OK(); +} + +Status PadEndString(const std::shared_ptr &src, std::shared_ptr *dst, + const std::vector &pad_shape, const std::string &pad_val) { + CHECK_FAIL_RETURN_UNEXPECTED(src != nullptr && dst != nullptr, "tensor can't be nullptr"); + if (src->Rank() == 0 || src->shape().AsVector() == pad_shape) { + (*dst) = src; // if no padding, copy the pointer + } else { + CHECK_FAIL_RETURN_UNEXPECTED(src->Rank() == pad_shape.size(), "Pad to diff rank not allowed"); + std::vector cur_ind(src->Rank(), 0); + std::vector strings; + RETURN_IF_NOT_OK(PadEndStringHelper(src, &strings, TensorShape(pad_shape), cur_ind, 0, pad_val)); + RETURN_IF_NOT_OK(Tensor::CreateTensor(dst, strings, TensorShape(pad_shape))); + } + return Status::OK(); +} + +Status PadEndStringHelper(const std::shared_ptr &src, std::vector *dst, + const TensorShape &dst_shape, std::vector cur_ind, size_t cur_dim, + const std::string &pad_value) { + if (cur_dim == src->Rank() - 1) { // if this is the last dimension, copy the data + dsize_t min_ind = std::min(dst_shape[cur_dim], src->shape()[cur_dim]); + for (dsize_t i = 0; i < min_ind; i++) { + cur_ind[cur_dim] = i; + std::string_view item; + RETURN_IF_NOT_OK(src->GetItemAt(&item, cur_ind)); + dst->emplace_back(item); + } + for (dsize_t i = min_ind; i < dst_shape[cur_dim]; i++) { + dst->emplace_back(pad_value); + } + + } else { // not the last dimension, keep doing recursion + dsize_t min_ind = std::min(dst_shape[cur_dim], src->shape()[cur_dim]); + for (dsize_t i = 0; i < min_ind; i++) { + cur_ind[cur_dim] = i; + RETURN_IF_NOT_OK(PadEndStringHelper(src, dst, dst_shape, cur_ind, cur_dim + 1, pad_value)); + } + dsize_t count = (dst_shape[cur_dim] - min_ind) * dst_shape.Strides()[cur_dim]; + for (dsize_t i = 0; i < count; i++) { + dst->emplace_back(pad_value); + } + } + return Status::OK(); +} + +template +Status MaskHelper(const std::shared_ptr &input, const std::shared_ptr &output, + const std::shared_ptr &value_tensor, RelationalOp op) { + T value; + RETURN_IF_NOT_OK(value_tensor->GetItemAt(&value, {})); + auto in_itr = input->begin(); + auto out_itr = output->begin(); + for (; in_itr != input->end(); in_itr++, out_itr++) { + switch (op) { + case RelationalOp::kEqual: + *out_itr = (*in_itr == value); + break; + case RelationalOp::kNotEqual: + *out_itr = (*in_itr != value); + break; + case RelationalOp::kGreater: + *out_itr = (*in_itr > value); + break; + case RelationalOp::kGreaterEqual: + *out_itr = (*in_itr >= value); + break; + case RelationalOp::kLess: + *out_itr = (*in_itr < value); + break; + case RelationalOp::kLessEqual: + *out_itr = (*in_itr <= value); + break; + default: + RETURN_STATUS_UNEXPECTED("Unknown relational operator."); + } + } + return Status::OK(); +} + +Status Mask(const std::shared_ptr &input, std::shared_ptr *output, const std::shared_ptr &value, + RelationalOp op) { + CHECK_FAIL_RETURN_UNEXPECTED(input->type().IsNumeric() == value->type().IsNumeric(), + "Cannot convert constant value to the type of the input tensor."); + CHECK_FAIL_RETURN_UNEXPECTED(value->shape() == TensorShape::CreateScalar(), "Value is not a scalar"); + + RETURN_IF_NOT_OK(Tensor::CreateTensor(output, TensorImpl::kFlexible, input->shape(), DataType(DataType::DE_BOOL))); + + std::unique_ptr value_cast_op(new TypeCastOp(input->type())); + std::shared_ptr casted_value; + if (input->type().IsNumeric()) { + RETURN_IF_NOT_OK(value_cast_op->Compute(value, &casted_value)); + } else { + casted_value = value; + } + + switch (input->type().value()) { + case DataType::DE_BOOL: + RETURN_IF_NOT_OK(MaskHelper(input, *output, casted_value, op)); + break; + case DataType::DE_INT8: + RETURN_IF_NOT_OK(MaskHelper(input, *output, casted_value, op)); + break; + case DataType::DE_UINT8: + RETURN_IF_NOT_OK(MaskHelper(input, *output, casted_value, op)); + break; + case DataType::DE_UINT16: + RETURN_IF_NOT_OK(MaskHelper(input, *output, casted_value, op)); + break; + case DataType::DE_INT16: + RETURN_IF_NOT_OK(MaskHelper(input, *output, casted_value, op)); + break; + case DataType::DE_UINT32: + RETURN_IF_NOT_OK(MaskHelper(input, *output, casted_value, op)); + break; + case DataType::DE_INT32: + RETURN_IF_NOT_OK(MaskHelper(input, *output, casted_value, op)); + break; + case DataType::DE_UINT64: + RETURN_IF_NOT_OK(MaskHelper(input, *output, casted_value, op)); + break; + case DataType::DE_INT64: + RETURN_IF_NOT_OK(MaskHelper(input, *output, casted_value, op)); + break; + case DataType::DE_FLOAT16: + RETURN_IF_NOT_OK(MaskHelper(input, *output, casted_value, op)); + break; + case DataType::DE_FLOAT32: + RETURN_IF_NOT_OK(MaskHelper(input, *output, casted_value, op)); + break; + case DataType::DE_FLOAT64: + RETURN_IF_NOT_OK(MaskHelper(input, *output, casted_value, op)); + break; + case DataType::DE_STRING: + RETURN_IF_NOT_OK(MaskHelper(input, *output, casted_value, op)); + break; + case DataType::DE_UNKNOWN: + RETURN_STATUS_UNEXPECTED("Unsupported input type."); + break; + } + return Status::OK(); +} + +Status Concatenate(const TensorRow &input, TensorRow *output, int8_t axis, std::shared_ptr prepend, + std::shared_ptr append) { + CHECK_FAIL_RETURN_UNEXPECTED(input[0]->shape().Rank() == 1, "Only 1D tensors supported"); + CHECK_FAIL_RETURN_UNEXPECTED(axis == 0 || axis == -1, "Only concatenation along the last dimension supported"); + + axis = Tensor::HandleNeg(axis, input[0]->shape().Rank()); + CHECK_FAIL_RETURN_UNEXPECTED(axis == 0, "Only axis=0 is supported"); + + std::shared_ptr out; + if (prepend != nullptr) { + CHECK_FAIL_RETURN_UNEXPECTED(prepend->shape().Rank() == 1, "Only 1D tensors supported"); + RETURN_IF_NOT_OK(ConcatenateHelper(prepend, &out, axis, input[0])); + } else { + out = input[0]; + } + for (dsize_t i = 1; i < input.size(); i++) { + std::shared_ptr out_t; + CHECK_FAIL_RETURN_UNEXPECTED(input[i]->shape().Rank() == 1, "Only 1D tensors supported"); + RETURN_IF_NOT_OK(ConcatenateHelper(out, &out_t, axis, input[i])); + out = out_t; + } + std::shared_ptr out_t; + if (append != nullptr) { + CHECK_FAIL_RETURN_UNEXPECTED(append->shape().Rank() == 1, "Only 1D tensors supported"); + RETURN_IF_NOT_OK(ConcatenateHelper(out, &out_t, axis, append)); + } else { + out_t = out; + } + output->push_back(out_t); + + return Status::OK(); +} + +Status ConcatenateHelper(const std::shared_ptr &input, std::shared_ptr *output, int8_t axis, + std::shared_ptr append) { + CHECK_FAIL_RETURN_UNEXPECTED(input->type() == append->type(), "Tensor types do not match"); + + TensorShape t({}); + + for (dsize_t i = 0; i < input->shape().Rank(); i++) { + if (i != axis) { + t = t.AppendDim(input->shape()[i]); + } else { + dsize_t new_shape = input->shape()[i] + append->shape()[i]; + + t = t.AppendDim(new_shape); + } + } + std::shared_ptr out; + + if (input->type().IsNumeric()) { + RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, TensorImpl::kFlexible, t, input->type())); + + RETURN_IF_NOT_OK(out->Concatenate({0}, input)); + RETURN_IF_NOT_OK(out->Concatenate({input->shape()[0]}, append)); + *output = out; + } else { + std::vector strings; + + auto itr = input->begin(); + for (; itr != input->end(); itr++) { + strings.emplace_back(*itr); + } + itr = append->begin(); + for (; itr != append->end(); itr++) { + strings.emplace_back(*itr); + } + RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, strings, t)); + + *output = out; + } return Status::OK(); } diff --git a/mindspore/ccsrc/dataset/kernels/data/data_utils.h b/mindspore/ccsrc/dataset/kernels/data/data_utils.h index 2bd168a1fe..6034e2a0eb 100644 --- a/mindspore/ccsrc/dataset/kernels/data/data_utils.h +++ b/mindspore/ccsrc/dataset/kernels/data/data_utils.h @@ -17,11 +17,13 @@ #define DATASET_KERNELS_DATA_DATA_UTILS_H_ #include +#include #include #include "dataset/core/constants.h" #include "dataset/core/cv_tensor.h" #include "dataset/core/data_type.h" #include "dataset/core/tensor.h" +#include "dataset/core/tensor_row.h" namespace mindspore { namespace dataset { @@ -42,6 +44,13 @@ Status OneHotEncodingUnsigned(const std::shared_ptr &input, std::shared_ Status OneHotEncodingSigned(const std::shared_ptr &input, std::shared_ptr *output, dsize_t num_classes, int64_t index); +// Returns a tensor of shape input filled with the passed fill_value +// @param input Tensor +// @param output Tensor. The shape and type of the output tensor is same as input +// @param fill_value Tensor. A scalar tensor used to fill the output tensor + +Status Fill(const std::shared_ptr input, std::shared_ptr *output, std::shared_ptr fill_value); + // Returns a type changed input tensor. // Example: if input tensor is float64, the output will the specified dataType. See DataTypes.cpp // @param input Tensor @@ -58,6 +67,96 @@ void Cast(const std::shared_ptr &input, std::shared_ptr *output) Status ToFloat16(const std::shared_ptr &input, std::shared_ptr *output); Status TypeCast(const std::shared_ptr &input, std::shared_ptr *output, const DataType &data_type); + +// Pad input tensor according pad_shape, need to have same rank. +// Based on the type of the input tensor, PadEndNumeric/String will be called. +// @param std::shared_ptr src - tensor to pad from +// @param std::shared_ptr *dst - return tensor padded +// @param std::vector pad_shape - shape to pad to +// @param std::shared_ptr pad_val - value to pad with in Tensor format, +// @return - The error code return +Status PadEnd(const std::shared_ptr &src, std::shared_ptr *dst, const std::vector &pad_shape, + const std::shared_ptr &pad_val); + +// Pad input numeric tensor according pad_shape, need to have same rank. +// @param std::shared_ptr src - tensor to pad from +// @param std::shared_ptr *dst - return tensor padded +// @param std::vector pad_shape - shape to pad to +// @param float pad_val - value to pad with +// @return - The error code return +Status PadEndNumeric(const std::shared_ptr &src, std::shared_ptr *dst, + const std::vector &pad_shape, float pad_val); + +// recursive helper function for padding numric tensors. This function could be very expensive if called on a +// multi-dimensional tensor it is only meant to be called by PadEndNumeric. +// @tparam T - type of tensor and fill value +// @param std::shared_ptr src - Tensor to pad from +// @param std::shared_ptr* dst - Tensor to pad to, return value +// @param std::vector cur_ind - recursion helper +// @param T pad_val - value to pad tensor with +// @param size_t cur_dim - recursion helper +// @return Status - The error code return +Status PadEndNumericHelper(const std::shared_ptr &src, std::shared_ptr dst, + std::vector cur_ind, size_t cur_dim = 0); + +// Pad input string tensor according pad_shape, need to have same rank. +// @param std::shared_ptr src - tensor to pad from +// @param std::shared_ptr *dst - return tensor padded +// @param std::vector pad_shape - shape to pad to +// @param std::string pad_val - value to pad with +// @return - The error code return +Status PadEndString(const std::shared_ptr &src, std::shared_ptr *dst, + const std::vector &pad_shape, const std::string &pad_val); + +// recursive helper function for padding string tensors. This function could be very expensive if called on a +// multi-dimensional tensor it is only meant to be called by PadEndString. +// @tparam T - type of tensor and fill value +// @param std::shared_ptr src - Tensor to pad from +// @param std::shared_ptr* dst - Tensor to pad to, return value +// @param std::vector cur_ind - recursion helperas text +// @param std::string pad_val - value to pad tensor with +// @param size_t cur_dim - recursion helper +// @return Status - The error code return +Status PadEndStringHelper(const std::shared_ptr &src, std::vector *dst, + const TensorShape &dst_shape, std::vector cur_ind, size_t cur_dim, + const std::string &pad_value); + +enum class RelationalOp { + kEqual = 0, // == + kNotEqual, // != + kLess, // < + kLessEqual, // <= + kGreater, // > + kGreaterEqual, // >= +}; + +/// Helper method that masks the input tensor +/// @tparam T type of the tensor +/// @param input[in] input tensor +/// @param output[out] output tensor +/// @param value_tensor[in] scalar tensor value to compared with +/// @param op[in] RelationalOp enum +/// @return Status ok/error +template +Status MaskHelper(const std::shared_ptr &input, const std::shared_ptr &output, + const std::shared_ptr &value_tensor, RelationalOp op); + +/// Mask the input tensor +/// @param input[in] input tensor +/// @param output[out] output tensor +/// @param value[in] scalar tensor value to compared with +/// @param op[in] RelationalOp enum +/// @return Status ok/error +Status Mask(const std::shared_ptr &input, std::shared_ptr *output, const std::shared_ptr &value, + RelationalOp op); + +Status Concatenate(const TensorRow &input, TensorRow *output, int8_t axis, std::shared_ptr prepend, + std::shared_ptr append); + +// helper for concat, always append to the input, and pass that to the output +Status ConcatenateHelper(const std::shared_ptr &input, std::shared_ptr *output, int8_t axis, + std::shared_ptr append); + } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/dataset/kernels/data/duplicate_op.cc b/mindspore/ccsrc/dataset/kernels/data/duplicate_op.cc new file mode 100644 index 0000000000..959516a4aa --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/data/duplicate_op.cc @@ -0,0 +1,35 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dataset/kernels/data/duplicate_op.h" + +#include "dataset/core/tensor.h" +#include "dataset/kernels/tensor_op.h" + +namespace mindspore { +namespace dataset { + +Status DuplicateOp::Compute(const TensorRow &input, TensorRow *output) { + IO_CHECK_VECTOR(input, output); + CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor"); + std::shared_ptr out; + RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, input[0])); + output->push_back(input[0]); + output->push_back(out); + return Status::OK(); +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/kernels/data/duplicate_op.h b/mindspore/ccsrc/dataset/kernels/data/duplicate_op.h new file mode 100644 index 0000000000..4c9d6d36c9 --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/data/duplicate_op.h @@ -0,0 +1,42 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_KERNELS_DATA_DUPLICATE_OP_H_ +#define DATASET_KERNELS_DATA_DUPLICATE_OP_H_ + +#include +#include + +#include "dataset/core/tensor.h" +#include "dataset/kernels/tensor_op.h" + +namespace mindspore { +namespace dataset { + +class DuplicateOp : public TensorOp { + public: + DuplicateOp() = default; + + ~DuplicateOp() override = default; + + void Print(std::ostream &out) const override { out << "DuplicateOp"; } + + Status Compute(const TensorRow &input, TensorRow *output) override; + + uint32_t NumOutput() override { return 2; } +}; +} // namespace dataset +} // namespace mindspore +#endif // DATASET_KERNELS_DUPLICATE_OP_H_ diff --git a/mindspore/ccsrc/dataset/kernels/data/fill_op.cc b/mindspore/ccsrc/dataset/kernels/data/fill_op.cc new file mode 100644 index 0000000000..63895d3a95 --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/data/fill_op.cc @@ -0,0 +1,30 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "dataset/kernels/data/fill_op.h" + +#include "dataset/core/tensor.h" +#include "dataset/kernels/data/data_utils.h" +#include "dataset/kernels/tensor_op.h" + +namespace mindspore { +namespace dataset { +Status FillOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { + IO_CHECK(input, output); + Status s = Fill(input, output, fill_value_); + return s; +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/kernels/data/fill_op.h b/mindspore/ccsrc/dataset/kernels/data/fill_op.h new file mode 100644 index 0000000000..03f59f3e67 --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/data/fill_op.h @@ -0,0 +1,44 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DATASET_KERNELS_DATA_FILL_OP_H_ +#define DATASET_KERNELS_DATA_FILL_OP_H_ + +#include +#include +#include + +#include "dataset/core/tensor.h" +#include "dataset/kernels/tensor_op.h" + +namespace mindspore { +namespace dataset { +class FillOp : public TensorOp { + public: + explicit FillOp(std::shared_ptr value) : fill_value_(value) {} + + ~FillOp() override = default; + void Print(std::ostream &out) const override { out << "FillOp"; } + + Status Compute(const std::shared_ptr &input, std::shared_ptr *output) override; + + private: + std::shared_ptr fill_value_; +}; +} // namespace dataset +} // namespace mindspore + +#endif // MINDSPORE_FILL_OP_H diff --git a/mindspore/ccsrc/dataset/kernels/data/mask_op.cc b/mindspore/ccsrc/dataset/kernels/data/mask_op.cc new file mode 100644 index 0000000000..2cfeb7e36f --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/data/mask_op.cc @@ -0,0 +1,49 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dataset/kernels/data/mask_op.h" + +#include "dataset/core/tensor.h" +#include "dataset/kernels/tensor_op.h" + +namespace mindspore { +namespace dataset { + +Status MaskOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { + IO_CHECK(input, output); + std::shared_ptr temp_output; + CHECK_FAIL_RETURN_UNEXPECTED(type_.IsNumeric(), "Cannot generate a string mask. Type should be numeric."); + + RETURN_IF_NOT_OK(Mask(input, &temp_output, value_, op_)); + + // cast the output to the the required type. Skip casting if type_ is bool. + if (type_ != DataType::DE_BOOL) { + RETURN_IF_NOT_OK(cast_->Compute(temp_output, output)); + } else { + *output = std::move(temp_output); + } + + return Status::OK(); +} + +Status MaskOp::OutputType(const std::vector &inputs, std::vector &outputs) { + RETURN_IF_NOT_OK(TensorOp::OutputType(inputs, outputs)); + outputs[0] = type_; + return Status::OK(); +} + +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/kernels/data/mask_op.h b/mindspore/ccsrc/dataset/kernels/data/mask_op.h new file mode 100644 index 0000000000..0affe543bb --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/data/mask_op.h @@ -0,0 +1,54 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_KERNELS_DATA_MASK_OP_H_ +#define DATASET_KERNELS_DATA_MASK_OP_H_ + +#include +#include +#include +#include +#include + +#include "dataset/core/tensor.h" +#include "dataset/kernels/tensor_op.h" +#include "dataset/kernels/data/type_cast_op.h" +#include "dataset/kernels/data/data_utils.h" + +namespace mindspore { +namespace dataset { + +class MaskOp : public TensorOp { + public: + MaskOp(RelationalOp op, std::shared_ptr value, DataType type = DataType(DataType::DE_BOOL)) + : op_(op), value_(std::move(value)), type_(type), cast_(new TypeCastOp(type)) {} + + ~MaskOp() override = default; + + void Print(std::ostream &out) const override { out << "MaskOp"; } + + Status Compute(const std::shared_ptr &input, std::shared_ptr *output) override; + + Status OutputType(const std::vector &inputs, std::vector &outputs) override; + + private: + RelationalOp op_; + std::shared_ptr value_; + DataType type_; + std::unique_ptr cast_; +}; +} // namespace dataset +} // namespace mindspore +#endif // DATASET_KERNELS_DATA_MASK_OP_H_ diff --git a/mindspore/ccsrc/dataset/kernels/data/pad_end_op.cc b/mindspore/ccsrc/dataset/kernels/data/pad_end_op.cc new file mode 100644 index 0000000000..5b3b4cbe16 --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/data/pad_end_op.cc @@ -0,0 +1,40 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "dataset/kernels/data/pad_end_op.h" + +#include "dataset/core/tensor.h" +#include "dataset/kernels/data/data_utils.h" +#include "dataset/kernels/tensor_op.h" + +namespace mindspore { +namespace dataset { +Status PadEndOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { + IO_CHECK(input, output); + Status s = PadEnd(input, output, output_shape_.AsVector(), pad_val_); + return s; +} + +Status PadEndOp::OutputShape(const std::vector &inputs, std::vector &outputs) { + RETURN_IF_NOT_OK(TensorOp::OutputShape(inputs, outputs)); + outputs.clear(); + for (auto s : inputs) { + outputs.emplace_back(TensorShape(output_shape_.AsVector())); + } + CHECK_FAIL_RETURN_UNEXPECTED(!outputs.empty(), "Input has a wrong shape"); + return Status::OK(); +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/kernels/data/pad_end_op.h b/mindspore/ccsrc/dataset/kernels/data/pad_end_op.h new file mode 100644 index 0000000000..c6bc0c430e --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/data/pad_end_op.h @@ -0,0 +1,47 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_KERNELS_DATA_PAD_END_OP_H_ +#define DATASET_KERNELS_DATA_PAD_END_OP_H_ + +#include +#include +#include + +#include "dataset/core/tensor.h" +#include "dataset/kernels/tensor_op.h" + +namespace mindspore { +namespace dataset { +class PadEndOp : public TensorOp { + public: + explicit PadEndOp(const TensorShape &pad_shape, const std::shared_ptr &pad_value) + : output_shape_(pad_shape), pad_val_(pad_value) {} + + ~PadEndOp() override = default; + + void Print(std::ostream &out) const override { out << "PadEndOp"; } + + Status Compute(const std::shared_ptr &input, std::shared_ptr *output) override; + + Status OutputShape(const std::vector &inputs, std::vector &outputs) override; + + private: + TensorShape output_shape_; + std::shared_ptr pad_val_; +}; +} // namespace dataset +} // namespace mindspore +#endif // DATASET_KERNELS_DATA_PAD_END_OP_H_ diff --git a/mindspore/ccsrc/dataset/kernels/data/slice_op.cc b/mindspore/ccsrc/dataset/kernels/data/slice_op.cc new file mode 100644 index 0000000000..2eebf26e84 --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/data/slice_op.cc @@ -0,0 +1,47 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "dataset/kernels/data/slice_op.h" + +#include "dataset/core/tensor.h" +#include "dataset/kernels/tensor_op.h" + +namespace mindspore { +namespace dataset { +Status SliceOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { + IO_CHECK(input, output); + CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Rank() == 1, "SliceOp supports 1D Tensors only for now."); + + // if `all` flag is true, output is just the input. + if (all_) { + *output = input; + return Status::OK(); + } + + // if slice object was provided, indices should be empty. Generate indices from the slice object. + if (slice_.valid() && indices_.empty()) { + dsize_t len = input->shape()[0]; + std::vector indices = slice_.Indices(len); + return input->Slice(output, indices); + } + + // if indices are not empty, slices should be invalid, use indices_ to slice + if (!indices_.empty() && !slice_.valid()) { + return input->Slice(output, indices_); + } + RETURN_STATUS_UNEXPECTED("The indexing parameters are invalid"); +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/kernels/data/slice_op.h b/mindspore/ccsrc/dataset/kernels/data/slice_op.h new file mode 100644 index 0000000000..0a24ae171e --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/data/slice_op.h @@ -0,0 +1,85 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_KERNELS_DATA_SLICE_OP_H_ +#define DATASET_KERNELS_DATA_SLICE_OP_H_ + +#include +#include +#include +#include +#include + +#include "dataset/core/tensor.h" +#include "dataset/kernels/tensor_op.h" + +namespace mindspore { +namespace dataset { +class Slice { + public: + Slice() : start_(0), stop_(0), step_(0) {} + Slice(dsize_t start, dsize_t stop, dsize_t step) : start_(start), stop_(stop), step_(step) {} + Slice(dsize_t start, dsize_t stop) : start_(start), stop_(stop), step_(1) {} + explicit Slice(dsize_t stop) : start_(0), stop_(stop), step_(1) {} + + ~Slice() = default; + + std::vector Indices(dsize_t length) { + std::vector indices; + dsize_t index = std::min(Tensor::HandleNeg(start_, length), length); + dsize_t end_index = std::min(Tensor::HandleNeg(stop_, length), length); + if (step_ > 0) { + for (; index < end_index; index += step_) { + indices.push_back(index); + } + } else { + for (; index > end_index; index += step_) { + indices.push_back(index); + } + } + return indices; + } + + bool valid() { return !(start_ == 0 && stop_ == 0 && step_ == 0); } + + dsize_t start_; + dsize_t stop_; + dsize_t step_; +}; + +class SliceOp : public TensorOp { + public: + explicit SliceOp(std::vector indices) : indices_(std::move(indices)) {} + explicit SliceOp(Slice slice) : slice_(slice) {} + explicit SliceOp(bool all) : all_(all) {} + + ~SliceOp() override = default; + + void Print(std::ostream &out) const override { out << "SliceOp"; } + + Status Compute(const std::shared_ptr &input, std::shared_ptr *output) override; + + private: + // only on of the following will be valid + // given indices to slice the Tensor. Empty vector if invalid. + std::vector indices_; + // Slice object. All start, stop and step are 0 if invalid. + Slice slice_; + // Flag to read all indcies in the dim. + bool all_ = false; +}; +} // namespace dataset +} // namespace mindspore +#endif // DATASET_KERNELS_DATA_SLICE_OP_H_ diff --git a/mindspore/ccsrc/dataset/kernels/image/CMakeLists.txt b/mindspore/ccsrc/dataset/kernels/image/CMakeLists.txt index 583a732f7d..3d88d9989c 100644 --- a/mindspore/ccsrc/dataset/kernels/image/CMakeLists.txt +++ b/mindspore/ccsrc/dataset/kernels/image/CMakeLists.txt @@ -10,12 +10,17 @@ add_library(kernels-image OBJECT pad_op.cc random_color_adjust_op.cc random_crop_decode_resize_op.cc + random_crop_and_resize_with_bbox_op.cc random_crop_and_resize_op.cc random_crop_op.cc + random_crop_with_bbox_op.cc random_horizontal_flip_op.cc + random_horizontal_flip_bbox_op.cc + bounding_box_augment_op.cc random_resize_op.cc random_rotation_op.cc random_vertical_flip_op.cc + random_vertical_flip_with_bbox_op.cc rescale_op.cc resize_bilinear_op.cc resize_op.cc diff --git a/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.cc b/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.cc new file mode 100644 index 0000000000..04e00d878d --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.cc @@ -0,0 +1,78 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "dataset/kernels/image/bounding_box_augment_op.h" +#include "dataset/kernels/image/resize_op.h" +#include "dataset/kernels/image/image_utils.h" +#include "dataset/core/cv_tensor.h" + +namespace mindspore { +namespace dataset { +const float BoundingBoxAugmentOp::kDefRatio = 0.3; + +BoundingBoxAugmentOp::BoundingBoxAugmentOp(std::shared_ptr transform, float ratio) + : ratio_(ratio), transform_(std::move(transform)) { + rnd_.seed(GetSeed()); +} + +Status BoundingBoxAugmentOp::Compute(const TensorRow &input, TensorRow *output) { + IO_CHECK_VECTOR(input, output); + BOUNDING_BOX_CHECK(input); // check if bounding boxes are valid + uint32_t num_of_boxes = input[1]->shape()[0]; + uint32_t num_to_aug = num_of_boxes * ratio_; // cast to int + std::vector boxes(num_of_boxes); + std::vector selected_boxes; + for (uint32_t i = 0; i < num_of_boxes; i++) boxes[i] = i; + // sample bboxes according to ratio picked by user + std::sample(boxes.begin(), boxes.end(), std::back_inserter(selected_boxes), num_to_aug, rnd_); + std::shared_ptr crop_out; + std::shared_ptr res_out; + std::shared_ptr input_restore = CVTensor::AsCVTensor(input[0]); + + for (uint32_t i = 0; i < num_to_aug; i++) { + uint32_t min_x = 0; + uint32_t min_y = 0; + uint32_t b_w = 0; + uint32_t b_h = 0; + // get the required items + input[1]->GetItemAt(&min_x, {selected_boxes[i], 0}); + input[1]->GetItemAt(&min_y, {selected_boxes[i], 1}); + input[1]->GetItemAt(&b_w, {selected_boxes[i], 2}); + input[1]->GetItemAt(&b_h, {selected_boxes[i], 3}); + Crop(input_restore, &crop_out, min_x, min_y, b_w, b_h); + // transform the cropped bbox region + transform_->Compute(crop_out, &res_out); + // place the transformed region back in the restored input + std::shared_ptr res_img = CVTensor::AsCVTensor(res_out); + // check if transformed crop is out of bounds of the box + if (res_img->mat().cols > b_w || res_img->mat().rows > b_h || res_img->mat().cols < b_w || + res_img->mat().rows < b_h) { + // if so, resize to fit in the box + std::shared_ptr resize_op = std::make_shared(b_h, b_w); + resize_op->Compute(std::static_pointer_cast(res_img), &res_out); + res_img = CVTensor::AsCVTensor(res_out); + } + res_img->mat().copyTo(input_restore->mat()(cv::Rect(min_x, min_y, res_img->mat().cols, res_img->mat().rows))); + } + (*output).push_back(std::move(std::static_pointer_cast(input_restore))); + (*output).push_back(input[1]); + return Status::OK(); +} + +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.h b/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.h new file mode 100644 index 0000000000..6c106f75dc --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.h @@ -0,0 +1,61 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DATASET_KERNELS_IMAGE_BOUNDING_BOX_AUGMENT_OP_H_ +#define DATASET_KERNELS_IMAGE_BOUNDING_BOX_AUGMENT_OP_H_ + +#include +#include +#include +#include +#include "dataset/core/tensor.h" +#include "dataset/kernels/tensor_op.h" +#include "dataset/util/status.h" +#include "dataset/util/random.h" + +namespace mindspore { +namespace dataset { +class BoundingBoxAugmentOp : public TensorOp { + public: + // Default values, also used by python_bindings.cc + static const float kDefRatio; + + // Constructor for BoundingBoxAugmentOp + // @param std::shared_ptr transform transform: C++ opration to apply on select bounding boxes + // @param float ratio: ratio of bounding boxes to have the transform applied on + BoundingBoxAugmentOp(std::shared_ptr transform, float ratio); + + ~BoundingBoxAugmentOp() override = default; + + // Provide stream operator for displaying it + friend std::ostream &operator<<(std::ostream &out, const BoundingBoxAugmentOp &so) { + so.Print(out); + return out; + } + + void Print(std::ostream &out) const override { out << "BoundingBoxAugmentOp"; } + + Status Compute(const TensorRow &input, TensorRow *output) override; + + private: + float ratio_; + std::mt19937 rnd_; + std::shared_ptr transform_; +}; +} // namespace dataset +} // namespace mindspore + +#endif // DATASET_KERNELS_IMAGE_BOUNDING_BOX_AUGMENT_OP_H_ diff --git a/mindspore/ccsrc/dataset/kernels/image/image_utils.cc b/mindspore/ccsrc/dataset/kernels/image/image_utils.cc index bf470173d9..bb88f991a4 100644 --- a/mindspore/ccsrc/dataset/kernels/image/image_utils.cc +++ b/mindspore/ccsrc/dataset/kernels/image/image_utils.cc @@ -16,6 +16,7 @@ #include "dataset/kernels/image/image_utils.h" #include #include +#include #include #include #include @@ -119,17 +120,14 @@ Status Resize(const std::shared_ptr &input, std::shared_ptr *out } } -bool HasJpegMagic(const unsigned char *data, size_t data_size) { +bool HasJpegMagic(const std::shared_ptr &input) { const unsigned char *kJpegMagic = (unsigned char *)"\xFF\xD8\xFF"; constexpr size_t kJpegMagicLen = 3; - return data_size >= kJpegMagicLen && memcmp(data, kJpegMagic, kJpegMagicLen) == 0; + return input->SizeInBytes() >= kJpegMagicLen && memcmp(input->GetBuffer(), kJpegMagic, kJpegMagicLen) == 0; } Status Decode(const std::shared_ptr &input, std::shared_ptr *output) { - if (input->GetMutableBuffer() == nullptr) { - RETURN_STATUS_UNEXPECTED("Tensor is nullptr"); - } - if (HasJpegMagic(input->GetMutableBuffer(), input->SizeInBytes())) { + if (HasJpegMagic(input)) { return JpegCropAndDecode(input, output); } else { return DecodeCv(input, output); @@ -283,7 +281,7 @@ Status JpegCropAndDecode(const std::shared_ptr &input, std::shared_ptrGetMutableBuffer(), input->SizeInBytes()); + JpegSetSource(&cinfo, input->GetBuffer(), input->SizeInBytes()); (void)jpeg_read_header(&cinfo, TRUE); RETURN_IF_NOT_OK(JpegSetColorSpace(&cinfo)); jpeg_calc_output_dimensions(&cinfo); @@ -312,7 +310,7 @@ Status JpegCropAndDecode(const std::shared_ptr &input, std::shared_ptr(ts, DataType(DataType::DE_UINT8)); const int buffer_size = output_tensor->SizeInBytes(); - JSAMPLE *buffer = static_cast(output_tensor->GetMutableBuffer()); + JSAMPLE *buffer = static_cast(reinterpret_cast(&(*output_tensor->begin()))); const int max_scanlines_to_read = skipped_scanlines + crop_h; // stride refers to output tensor, which has 3 components at most const int stride = crop_w * kOutNumComponents; @@ -376,8 +374,9 @@ Status HwcToChw(std::shared_ptr input, std::shared_ptr *output) *output = input; return Status::OK(); } - if (input_cv->shape().Size() != 3 && input_cv->shape()[2] != 3) { - RETURN_STATUS_UNEXPECTED("The shape is incorrect: number of channels is not equal 3"); + if (input_cv->shape().Size() < 2 || input_cv->shape().Size() > 3 || + (input_cv->shape().Size() == 3 && input_cv->shape()[2] != 3 && input_cv->shape()[2] != 1)) { + RETURN_STATUS_UNEXPECTED("The shape is incorrect: number of channels does not equal 3 nor 1"); } cv::Mat output_img; @@ -401,8 +400,8 @@ Status HwcToChw(std::shared_ptr input, std::shared_ptr *output) Status SwapRedAndBlue(std::shared_ptr input, std::shared_ptr *output) { try { std::shared_ptr input_cv = CVTensor::AsCVTensor(std::move(input)); - if (input_cv->shape().Size() != 3 && input_cv->shape()[2] != 3) { - RETURN_STATUS_UNEXPECTED("The shape is incorrect: number of channels is not equal 3"); + if (input_cv->shape().Size() != 3 || input_cv->shape()[2] != 3) { + RETURN_STATUS_UNEXPECTED("The shape is incorrect: number of channels does not equal 3"); } auto output_cv = std::make_shared(input_cv->shape(), input_cv->type()); RETURN_UNEXPECTED_IF_NULL(output_cv); @@ -422,7 +421,7 @@ Status CropAndResize(const std::shared_ptr &input, std::shared_ptrRank() != 3 && input_cv->Rank() != 2) { - RETURN_STATUS_UNEXPECTED("Ishape not or "); + RETURN_STATUS_UNEXPECTED("Shape not or "); } // image too large or too small if (crop_height == 0 || crop_width == 0 || target_height == 0 || target_height > crop_height * 1000 || @@ -541,8 +540,8 @@ Status AdjustBrightness(const std::shared_ptr &input, std::shared_ptrmat().data) { RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor"); } - if (input_cv->Rank() != 3 && input_cv->shape()[2] != 3) { - RETURN_STATUS_UNEXPECTED("Shape not or "); + if (input_cv->Rank() != 3 || input_cv->shape()[2] != 3) { + RETURN_STATUS_UNEXPECTED("The shape is incorrect: number of channels does not equal 3"); } auto output_cv = std::make_shared(input_cv->shape(), input_cv->type()); RETURN_UNEXPECTED_IF_NULL(output_cv); @@ -561,8 +560,8 @@ Status AdjustContrast(const std::shared_ptr &input, std::shared_ptrmat().data) { RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor"); } - if (input_cv->Rank() != 3 && input_cv->shape()[2] != 3) { - RETURN_STATUS_UNEXPECTED("Shape not or "); + if (input_cv->Rank() != 3 || input_cv->shape()[2] != 3) { + RETURN_STATUS_UNEXPECTED("The shape is incorrect: number of channels does not equal 3"); } cv::Mat gray, output_img; cv::cvtColor(input_img, gray, CV_RGB2GRAY); @@ -587,8 +586,8 @@ Status AdjustSaturation(const std::shared_ptr &input, std::shared_ptrmat().data) { RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor"); } - if (input_cv->Rank() != 3 && input_cv->shape()[2] != 3) { - RETURN_STATUS_UNEXPECTED("Shape not or "); + if (input_cv->Rank() != 3 || input_cv->shape()[2] != 3) { + RETURN_STATUS_UNEXPECTED("The shape is incorrect: number of channels does not equal 3"); } auto output_cv = std::make_shared(input_cv->shape(), input_cv->type()); RETURN_UNEXPECTED_IF_NULL(output_cv); @@ -615,8 +614,8 @@ Status AdjustHue(const std::shared_ptr &input, std::shared_ptr * if (!input_cv->mat().data) { RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor"); } - if (input_cv->Rank() != 3 && input_cv->shape()[2] != 3) { - RETURN_STATUS_UNEXPECTED("Shape not or "); + if (input_cv->Rank() != 3 || input_cv->shape()[2] != 3) { + RETURN_STATUS_UNEXPECTED("The shape is incorrect: number of channels does not equal 3"); } auto output_cv = std::make_shared(input_cv->shape(), input_cv->type()); RETURN_UNEXPECTED_IF_NULL(output_cv); @@ -644,7 +643,7 @@ Status Erase(const std::shared_ptr &input, std::shared_ptr *outp uint8_t fill_g, uint8_t fill_b) { try { std::shared_ptr input_cv = CVTensor::AsCVTensor(input); - if (input_cv->mat().data == nullptr || (input_cv->Rank() != 3 && input_cv->shape()[2] != 3)) { + if (input_cv->mat().data == nullptr || input_cv->Rank() != 3 || input_cv->shape()[2] != 3) { RETURN_STATUS_UNEXPECTED("bad CV Tensor input for erase"); } cv::Mat input_img = input_cv->mat(); @@ -726,5 +725,101 @@ Status Pad(const std::shared_ptr &input, std::shared_ptr *output RETURN_STATUS_UNEXPECTED("Unexpected error in pad"); } } +// -------- BBOX OPERATIONS -------- // +Status UpdateBBoxesForCrop(std::shared_ptr *bboxList, size_t *bboxCount, int CB_Xmin, int CB_Ymin, int CB_Xmax, + int CB_Ymax) { + // PASS LIST, COUNT OF BOUNDING BOXES + // Also PAss X/Y Min/Max of image cropped region - normally obtained from 'GetCropBox' functions + uint32_t bb_Xmin_t, bb_Ymin_t, bb_Xmax_t, bb_Ymax_t; + + std::vector correct_ind; + std::vector copyVals; + dsize_t bboxDim = (*bboxList)->shape()[1]; + bool retFlag = false; // true unless overlap found + for (int i = 0; i < *bboxCount; i++) { + int bb_Xmin, bb_Xmax, bb_Ymin, bb_Ymax; + RETURN_IF_NOT_OK((*bboxList)->GetUnsignedIntAt(&bb_Xmin_t, {i, 0})); + RETURN_IF_NOT_OK((*bboxList)->GetUnsignedIntAt(&bb_Ymin_t, {i, 1})); + RETURN_IF_NOT_OK((*bboxList)->GetUnsignedIntAt(&bb_Xmax_t, {i, 2})); + RETURN_IF_NOT_OK((*bboxList)->GetUnsignedIntAt(&bb_Ymax_t, {i, 3})); + bb_Xmin = bb_Xmin_t; + bb_Ymin = bb_Ymin_t; + bb_Xmax = bb_Xmax_t; + bb_Ymax = bb_Ymax_t; + bb_Xmax = bb_Xmin + bb_Xmax; + bb_Ymax = bb_Ymin + bb_Ymax; + // check for image / BB overlap + if (((bb_Xmin > CB_Xmax) || (bb_Ymin > CB_Ymax)) || ((bb_Xmax < CB_Xmin) || (bb_Ymax < CB_Ymin))) { + continue; // no overlap found + } + // Update this bbox and select it to move to the final output tensor + correct_ind.push_back(i); + // adjust BBox corners by bringing into new CropBox if beyond + // Also reseting/adjusting for boxes to lie within CropBox instead of Image - subtract CropBox Xmin/YMin + bb_Xmin = bb_Xmin - (std::min(0, (bb_Xmin - CB_Xmin)) + CB_Xmin); + bb_Xmax = bb_Xmax - (std::max(0, (bb_Xmax - CB_Xmax)) + CB_Xmin); + bb_Ymin = bb_Ymin - (std::min(0, (bb_Ymin - CB_Ymin)) + CB_Ymin); + bb_Ymax = bb_Ymax - (std::max(0, (bb_Ymax - CB_Ymax)) + CB_Ymin); + // reset min values and calculate width/height from Box corners + RETURN_IF_NOT_OK((*bboxList)->SetItemAt({i, 0}, static_cast(bb_Xmin))); + RETURN_IF_NOT_OK((*bboxList)->SetItemAt({i, 1}, static_cast(bb_Ymin))); + RETURN_IF_NOT_OK((*bboxList)->SetItemAt({i, 2}, static_cast(bb_Xmax - bb_Xmin))); + RETURN_IF_NOT_OK((*bboxList)->SetItemAt({i, 3}, static_cast(bb_Ymax - bb_Ymin))); + } + // create new tensor and copy over bboxes still valid to the image + // bboxes outside of new cropped region are ignored - empty tensor returned in case of none + *bboxCount = correct_ind.size(); + uint32_t temp; + for (auto slice : correct_ind) { // for every index in the loop + for (int ix = 0; ix < bboxDim; ix++) { + RETURN_IF_NOT_OK((*bboxList)->GetUnsignedIntAt(&temp, {slice, ix})); + copyVals.push_back(temp); + } + } + std::shared_ptr retV; + RETURN_IF_NOT_OK(Tensor::CreateTensor(&retV, copyVals, TensorShape({static_cast(*bboxCount), bboxDim}))); + (*bboxList) = retV; // reset pointer + return Status::OK(); +} + +Status PadBBoxes(std::shared_ptr *bboxList, const size_t &bboxCount, int32_t pad_top, int32_t pad_left) { + for (int i = 0; i < bboxCount; i++) { + uint32_t xMin, yMin; + RETURN_IF_NOT_OK((*bboxList)->GetUnsignedIntAt(&xMin, {i, 0})); + RETURN_IF_NOT_OK((*bboxList)->GetUnsignedIntAt(&yMin, {i, 1})); + xMin += static_cast(pad_left); // should not be negative + yMin += static_cast(pad_top); + RETURN_IF_NOT_OK((*bboxList)->SetItemAt({i, 0}, xMin)); + RETURN_IF_NOT_OK((*bboxList)->SetItemAt({i, 1}, yMin)); + } + return Status::OK(); +} + +Status UpdateBBoxesForResize(const std::shared_ptr &bboxList, const size_t &bboxCount, int32_t target_width_, + int32_t target_height_, int orig_width, int orig_height) { + uint32_t bb_Xmin, bb_Ymin, bb_Xwidth, bb_Ywidth; + // cast to float to preseve fractional + double W_aspRatio = (target_width_ * 1.0) / (orig_width * 1.0); + double H_aspRatio = (target_height_ * 1.0) / (orig_height * 1.0); + for (int i = 0; i < bboxCount; i++) { + // for each bounding box + RETURN_IF_NOT_OK(bboxList->GetUnsignedIntAt(&bb_Xmin, {i, 0})); + RETURN_IF_NOT_OK(bboxList->GetUnsignedIntAt(&bb_Ymin, {i, 1})); + RETURN_IF_NOT_OK(bboxList->GetUnsignedIntAt(&bb_Xwidth, {i, 2})); + RETURN_IF_NOT_OK(bboxList->GetUnsignedIntAt(&bb_Ywidth, {i, 3})); + // update positions and widths + bb_Xmin = bb_Xmin * W_aspRatio; + bb_Ymin = bb_Ymin * H_aspRatio; + bb_Xwidth = bb_Xwidth * W_aspRatio; + bb_Ywidth = bb_Ywidth * H_aspRatio; + // reset bounding box values + RETURN_IF_NOT_OK(bboxList->SetItemAt({i, 0}, bb_Xmin)); + RETURN_IF_NOT_OK(bboxList->SetItemAt({i, 1}, bb_Ymin)); + RETURN_IF_NOT_OK(bboxList->SetItemAt({i, 2}, bb_Xwidth)); + RETURN_IF_NOT_OK(bboxList->SetItemAt({i, 3}, bb_Ywidth)); + } + return Status::OK(); +} + } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/dataset/kernels/image/image_utils.h b/mindspore/ccsrc/dataset/kernels/image/image_utils.h index 394323974a..231ee77de0 100644 --- a/mindspore/ccsrc/dataset/kernels/image/image_utils.h +++ b/mindspore/ccsrc/dataset/kernels/image/image_utils.h @@ -96,7 +96,7 @@ Status Decode(const std::shared_ptr &input, std::shared_ptr *out Status DecodeCv(const std::shared_ptr &input, std::shared_ptr *output); -bool HasJpegMagic(const unsigned char *data, size_t data_size); +bool HasJpegMagic(const std::shared_ptr &input); void JpegSetSource(j_decompress_ptr c_info, const void *data, int64_t data_size); @@ -225,7 +225,39 @@ Status Erase(const std::shared_ptr &input, std::shared_ptr *outp Status Pad(const std::shared_ptr &input, std::shared_ptr *output, const int32_t &pad_top, const int32_t &pad_bottom, const int32_t &pad_left, const int32_t &pad_right, const BorderType &border_types, uint8_t fill_r = 0, uint8_t fill_g = 0, uint8_t fill_b = 0); + +// -------- BBOX OPERATIONS -------- // +// Updates and checks bounding boxes for new cropped region of image +// @param bboxList: A tensor contaning bounding box tensors +// @param bboxCount: total Number of bounding boxes - required within caller function to run update loop +// @param CB_Xmin: Image's CropBox Xmin coordinate +// @param CB_Xmin: Image's CropBox Ymin coordinate +// @param CB_Xmax: Image's CropBox Xmax coordinate - (Xmin + width) +// @param CB_Xmax: Image's CropBox Ymax coordinate - (Ymin + height) +Status UpdateBBoxesForCrop(std::shared_ptr *bboxList, size_t *bboxCount, int CB_Xmin, int CB_Ymin, int CB_Xmax, + int CB_Ymax); + +// Updates bounding boxes with required Top and Left padding +// Top and Left padding amounts required to adjust bboxs min X,Y values according to padding 'push' +// Top/Left since images 0,0 coordinate is taken from top left +// @param bboxList: A tensor contaning bounding box tensors +// @param bboxCount: total Number of bounding boxes - required within caller function to run update loop +// @param pad_top: Total amount of padding applied to image top +// @param pad_left: Total amount of padding applied to image left side +Status PadBBoxes(std::shared_ptr *bboxList, const size_t &bboxCount, int32_t pad_top, int32_t pad_left); + +// Updates bounding boxes for an Image Resize Operation - Takes in set of valid BBoxes +// For e.g those that remain after a crop +// @param bboxList: A tensor contaning bounding box tensors +// @param bboxCount: total Number of bounding boxes - required within caller function to run update loop +// @param bboxList: A tensor contaning bounding box tensors +// @param target_width_: required width of image post resize +// @param target_width_: required height of image post resize +// @param orig_width: current width of image pre resize +// @param orig_height: current height of image pre resize +Status UpdateBBoxesForResize(const std::shared_ptr &bboxList, const size_t &bboxCount, int32_t target_width_, + int32_t target_height_, int orig_width, int orig_height); + } // namespace dataset } // namespace mindspore - #endif // DATASET_KERNELS_IMAGE_IMAGE_UTILS_H_ diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.cc b/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.cc index a3cf8cefb5..c5b5f20c63 100644 --- a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.cc +++ b/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.cc @@ -35,8 +35,10 @@ RandomCropAndResizeOp::RandomCropAndResizeOp(int32_t target_height, int32_t targ : target_height_(target_height), target_width_(target_width), rnd_scale_(scale_lb, scale_ub), - rnd_aspect_(aspect_lb, aspect_ub), + rnd_aspect_(log(aspect_lb), log(aspect_ub)), interpolation_(interpolation), + aspect_lb_(aspect_lb), + aspect_ub_(aspect_ub), max_iter_(max_iter) { rnd_.seed(GetSeed()); } @@ -64,33 +66,42 @@ Status RandomCropAndResizeOp::OutputShape(const std::vector &inputs return Status(StatusCode::kUnexpectedError, "Input has a wrong shape"); } Status RandomCropAndResizeOp::GetCropBox(int h_in, int w_in, int *x, int *y, int *crop_height, int *crop_width) { - double scale, aspect; *crop_width = w_in; *crop_height = h_in; - bool crop_success = false; + CHECK_FAIL_RETURN_UNEXPECTED(w_in != 0, "Width is 0"); + CHECK_FAIL_RETURN_UNEXPECTED(h_in != 0, "Height is 0"); + CHECK_FAIL_RETURN_UNEXPECTED(aspect_lb_ > 0, "Aspect lower bound must be greater than zero"); for (int32_t i = 0; i < max_iter_; i++) { - scale = rnd_scale_(rnd_); - aspect = rnd_aspect_(rnd_); - *crop_width = static_cast(std::round(std::sqrt(h_in * w_in * scale / aspect))); - *crop_height = static_cast(std::round(*crop_width * aspect)); + double const sample_scale = rnd_scale_(rnd_); + // In case of non-symmetrical aspect ratios, use uniform distribution on a logarithmic sample_scale. + // Note rnd_aspect_ is already a random distribution of the input aspect ratio in logarithmic sample_scale. + double const sample_aspect = exp(rnd_aspect_(rnd_)); + + *crop_width = static_cast(std::round(std::sqrt(h_in * w_in * sample_scale * sample_aspect))); + *crop_height = static_cast(std::round(*crop_width / sample_aspect)); if (*crop_width <= w_in && *crop_height <= h_in) { - crop_success = true; - break; + std::uniform_int_distribution<> rd_x(0, w_in - *crop_width); + std::uniform_int_distribution<> rd_y(0, h_in - *crop_height); + *x = rd_x(rnd_); + *y = rd_y(rnd_); + return Status::OK(); } } - if (!crop_success) { - CHECK_FAIL_RETURN_UNEXPECTED(w_in != 0, "Width is 0"); - aspect = static_cast(h_in) / w_in; - scale = rnd_scale_(rnd_); - *crop_width = static_cast(std::round(std::sqrt(h_in * w_in * scale / aspect))); - *crop_height = static_cast(std::round(*crop_width * aspect)); - *crop_height = (*crop_height > h_in) ? h_in : *crop_height; - *crop_width = (*crop_width > w_in) ? w_in : *crop_width; + double const img_aspect = static_cast(w_in) / h_in; + if (img_aspect < aspect_lb_) { + *crop_width = w_in; + *crop_height = static_cast(std::round(*crop_width / static_cast(aspect_lb_))); + } else { + if (img_aspect > aspect_ub_) { + *crop_height = h_in; + *crop_width = static_cast(std::round(*crop_height * static_cast(aspect_ub_))); + } else { + *crop_width = w_in; + *crop_height = h_in; + } } - std::uniform_int_distribution<> rd_x(0, w_in - *crop_width); - std::uniform_int_distribution<> rd_y(0, h_in - *crop_height); - *x = rd_x(rnd_); - *y = rd_y(rnd_); + *x = static_cast(std::round((w_in - *crop_width) / 2.0)); + *y = static_cast(std::round((h_in - *crop_height) / 2.0)); return Status::OK(); } } // namespace dataset diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.h b/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.h index 97ee9f6092..db805a9374 100644 --- a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.h +++ b/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.h @@ -60,6 +60,8 @@ class RandomCropAndResizeOp : public TensorOp { std::mt19937 rnd_; InterpolationMode interpolation_; int32_t max_iter_; + double aspect_lb_; + double aspect_ub_; }; } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_with_bbox_op.cc b/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_with_bbox_op.cc new file mode 100644 index 0000000000..b820779ed1 --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_with_bbox_op.cc @@ -0,0 +1,59 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "dataset/util/random.h" +#include "dataset/util/status.h" +#include "dataset/kernels/image/image_utils.h" +#include "dataset/kernels/image/random_crop_and_resize_with_bbox_op.h" + +namespace mindspore { +namespace dataset { + +Status RandomCropAndResizeWithBBoxOp::Compute(const TensorRow &input, TensorRow *output) { + IO_CHECK_VECTOR(input, output); + BOUNDING_BOX_CHECK(input); + CHECK_FAIL_RETURN_UNEXPECTED(input[0]->shape().Size() >= 2, "The shape of input is abnormal"); + + (*output).push_back(nullptr); // init memory for return vector + (*output).push_back(nullptr); + (*output)[1] = std::move(input[1]); // move boxes over to output + + size_t bboxCount = input[1]->shape()[0]; // number of rows in bbox tensor + int h_in = input[0]->shape()[0]; + int w_in = input[0]->shape()[1]; + int x = 0; + int y = 0; + int crop_height = 0; + int crop_width = 0; + + RETURN_IF_NOT_OK(RandomCropAndResizeOp::GetCropBox(h_in, w_in, &x, &y, &crop_height, &crop_width)); + + int maxX = x + crop_width; // max dims of selected CropBox on image + int maxY = y + crop_height; + + RETURN_IF_NOT_OK(UpdateBBoxesForCrop(&(*output)[1], &bboxCount, x, y, maxX, maxY)); // IMAGE_UTIL + RETURN_IF_NOT_OK(CropAndResize(input[0], &(*output)[0], x, y, crop_height, crop_width, target_height_, target_width_, + interpolation_)); + + RETURN_IF_NOT_OK( + UpdateBBoxesForResize((*output)[1], bboxCount, target_width_, target_height_, crop_width, crop_height)); + return Status::OK(); +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_with_bbox_op.h b/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_with_bbox_op.h new file mode 100644 index 0000000000..9675d43933 --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_with_bbox_op.h @@ -0,0 +1,46 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_KERNELS_IMAGE_RANDOM_CROP_AND_RESIZE_WITH_BBOX_OP_H_ +#define DATASET_KERNELS_IMAGE_RANDOM_CROP_AND_RESIZE_WITH_BBOX_OP_H_ + +#include "dataset/kernels/image/random_crop_and_resize_op.h" + +namespace mindspore { +namespace dataset { + +class RandomCropAndResizeWithBBoxOp : public RandomCropAndResizeOp { + public: + // Constructor for RandomCropAndResizeWithBBoxOp, with default value and passing to base class constructor + RandomCropAndResizeWithBBoxOp(int32_t target_height, int32_t target_width, float scale_lb = kDefScaleLb, + float scale_ub = kDefScaleUb, float aspect_lb = kDefAspectLb, + float aspect_ub = kDefAspectUb, InterpolationMode interpolation = kDefInterpolation, + int32_t max_iter = kDefMaxIter) + : RandomCropAndResizeOp(target_height, target_width, scale_lb, scale_ub, aspect_lb, aspect_ub, interpolation, + max_iter) {} + + ~RandomCropAndResizeWithBBoxOp() override = default; + + void Print(std::ostream &out) const override { + out << "RandomCropAndResizeWithBBox: " << RandomCropAndResizeOp::target_height_ << " " + << RandomCropAndResizeOp::target_width_; + } + + Status Compute(const TensorRow &input, TensorRow *output) override; +}; +} // namespace dataset +} // namespace mindspore + +#endif // DATASET_KERNELS_IMAGE_RANDOM_CROP_AND_RESIZE_WITH_BBOX_OP_H_ diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_decode_resize_op.cc b/mindspore/ccsrc/dataset/kernels/image/random_crop_decode_resize_op.cc index c11b5b5968..74aa91ea7e 100644 --- a/mindspore/ccsrc/dataset/kernels/image/random_crop_decode_resize_op.cc +++ b/mindspore/ccsrc/dataset/kernels/image/random_crop_decode_resize_op.cc @@ -31,7 +31,7 @@ Status RandomCropDecodeResizeOp::Compute(const std::shared_ptr &input, s if (input == nullptr) { RETURN_STATUS_UNEXPECTED("input tensor is null"); } - if (!HasJpegMagic(input->GetMutableBuffer(), input->SizeInBytes())) { + if (!HasJpegMagic(input)) { DecodeOp op(true); std::shared_ptr decoded; RETURN_IF_NOT_OK(op.Compute(input, &decoded)); @@ -43,7 +43,7 @@ Status RandomCropDecodeResizeOp::Compute(const std::shared_ptr &input, s jerr.pub.error_exit = JpegErrorExitCustom; try { jpeg_create_decompress(&cinfo); - JpegSetSource(&cinfo, input->GetMutableBuffer(), input->SizeInBytes()); + JpegSetSource(&cinfo, input->GetBuffer(), input->SizeInBytes()); (void)jpeg_read_header(&cinfo, TRUE); jpeg_calc_output_dimensions(&cinfo); } catch (std::runtime_error &e) { diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_op.cc b/mindspore/ccsrc/dataset/kernels/image/random_crop_op.cc index 7662c64cc4..110d769f26 100644 --- a/mindspore/ccsrc/dataset/kernels/image/random_crop_op.cc +++ b/mindspore/ccsrc/dataset/kernels/image/random_crop_op.cc @@ -48,44 +48,81 @@ RandomCropOp::RandomCropOp(int32_t crop_height, int32_t crop_width, int32_t pad_ rnd_.seed(GetSeed()); } -Status RandomCropOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { - IO_CHECK(input, output); - - // Apply padding first then crop - std::shared_ptr pad_image; +Status RandomCropOp::ImagePadding(const std::shared_ptr &input, std::shared_ptr *pad_image, + int32_t *t_pad_top, int32_t *t_pad_bottom, int32_t *t_pad_left, int32_t *t_pad_right, + int32_t *padded_image_w, int32_t *padded_image_h, bool *crop_further) { + *t_pad_top = pad_top_; + *t_pad_bottom = pad_bottom_; + *t_pad_left = pad_left_; + *t_pad_right = pad_right_; RETURN_IF_NOT_OK( - Pad(input, &pad_image, pad_top_, pad_bottom_, pad_left_, pad_right_, border_type_, fill_r_, fill_g_, fill_b_)); - CHECK_FAIL_RETURN_UNEXPECTED(pad_image->shape().Size() >= 2, "Abnormal shape"); - int32_t padded_image_h = pad_image->shape()[0]; - int32_t padded_image_w = pad_image->shape()[1]; - // no need to crop if same size - if (padded_image_h == crop_height_ && padded_image_w == crop_width_) { - *output = pad_image; + Pad(input, pad_image, pad_top_, pad_bottom_, pad_left_, pad_right_, border_type_, fill_r_, fill_g_, fill_b_)); + CHECK_FAIL_RETURN_UNEXPECTED((*pad_image)->shape().Size() >= 2, "Abnormal shape"); + + *padded_image_h = (*pad_image)->shape()[0]; + *padded_image_w = (*pad_image)->shape()[1]; + + if (*padded_image_h == crop_height_ && *padded_image_w == crop_width_) { + *crop_further = false; // no need for further crop return Status::OK(); - } - if (pad_if_needed_) { + } else if (pad_if_needed_) { // check the dimensions of the image for padding, if we do need padding, then we change the pad values - if (padded_image_h < crop_height_) { - RETURN_IF_NOT_OK(Pad(pad_image, &pad_image, crop_height_ - padded_image_h, crop_height_ - padded_image_h, 0, 0, + if (*padded_image_h < crop_height_) { + RETURN_IF_NOT_OK(Pad(*pad_image, pad_image, crop_height_ - *padded_image_h, crop_height_ - *padded_image_h, 0, 0, border_type_, fill_r_, fill_g_, fill_b_)); + + // update pad total above/below + t_pad_top += (crop_height_ - *padded_image_h); + t_pad_bottom += (crop_height_ - *padded_image_h); } - if (padded_image_w < crop_width_) { - RETURN_IF_NOT_OK(Pad(pad_image, &pad_image, 0, 0, crop_width_ - padded_image_w, crop_width_ - padded_image_w, + if (*padded_image_w < crop_width_) { + RETURN_IF_NOT_OK(Pad(*pad_image, pad_image, 0, 0, crop_width_ - *padded_image_w, crop_width_ - *padded_image_w, border_type_, fill_r_, fill_g_, fill_b_)); + // update pad total left/right + t_pad_left += (crop_width_ - *padded_image_w); + t_pad_right += (crop_width_ - *padded_image_w); } - padded_image_h = pad_image->shape()[0]; - padded_image_w = pad_image->shape()[1]; + *padded_image_h = (*pad_image)->shape()[0]; + *padded_image_w = (*pad_image)->shape()[1]; } - if (padded_image_h < crop_height_ || padded_image_w < crop_width_ || crop_height_ == 0 || crop_width_ == 0) { + + if (*padded_image_h < crop_height_ || *padded_image_w < crop_width_ || crop_height_ == 0 || crop_width_ == 0) { return Status(StatusCode::kShapeMisMatch, __LINE__, __FILE__, "Crop size is greater than the image dimensions or is zero."); } - // random top corner - int x = std::uniform_int_distribution(0, padded_image_w - crop_width_)(rnd_); - int y = std::uniform_int_distribution(0, padded_image_h - crop_height_)(rnd_); + return Status::OK(); +} + +void RandomCropOp::GenRandomXY(int *x, int *y, const int32_t &padded_image_w, const int32_t &padded_image_h) { + // GenCropPoints for cropping + *x = std::uniform_int_distribution(0, padded_image_w - crop_width_)(rnd_); + *y = std::uniform_int_distribution(0, padded_image_h - crop_height_)(rnd_); +} + +Status RandomCropOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { + IO_CHECK(input, output); + + // Apply padding first then crop + std::shared_ptr pad_image; + int32_t t_pad_top, t_pad_bottom, t_pad_left, t_pad_right; + int32_t padded_image_w; + int32_t padded_image_h; + bool crop_further = true; // whether image needs further cropping based on new size & requirements + + RETURN_IF_NOT_OK( // error code sent back directly + ImagePadding(input, &pad_image, &t_pad_top, &t_pad_bottom, &t_pad_left, &t_pad_right, &padded_image_w, + &padded_image_h, &crop_further)); + if (!crop_further) { + *output = pad_image; + return Status::OK(); + } + + int x, y; + GenRandomXY(&x, &y, padded_image_w, padded_image_h); return Crop(pad_image, output, x, y, crop_width_, crop_height_); } + Status RandomCropOp::OutputShape(const std::vector &inputs, std::vector &outputs) { RETURN_IF_NOT_OK(TensorOp::OutputShape(inputs, outputs)); outputs.clear(); diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_op.h b/mindspore/ccsrc/dataset/kernels/image/random_crop_op.h index d4ec49cd7b..cd43ec1efb 100644 --- a/mindspore/ccsrc/dataset/kernels/image/random_crop_op.h +++ b/mindspore/ccsrc/dataset/kernels/image/random_crop_op.h @@ -50,11 +50,33 @@ class RandomCropOp : public TensorOp { void Print(std::ostream &out) const override { out << "RandomCropOp: " << crop_height_ << " " << crop_width_; } Status Compute(const std::shared_ptr &input, std::shared_ptr *output) override; + + // Function breaks out the compute function's image padding functionality and makes available to other Ops + // Using this class as a base - restructrued to allow for RandomCropWithBBox Augmentation Op + // @param input: Input is the original Image + // @param pad_image: Pointer to new Padded image + // @param t_pad_top: Total Top Padding - Based on input and value calculated in function if required + // @param t_pad_bottom: Total bottom Padding - Based on input and value calculated in function if required + // @param t_pad_left: Total left Padding - Based on input and value calculated in function if required + // @param t_pad_right: Total right Padding - Based on input and value calculated in function if required + // @param padded_image_w: Final Width of the 'pad_image' + // @param padded_image_h: Final Height of the 'pad_image' + // @param crop_further: Whether image required cropping after padding - False if new padded image matches required + // dimensions + Status ImagePadding(const std::shared_ptr &input, std::shared_ptr *pad_image, int32_t *t_pad_top, + int32_t *t_pad_bottom, int32_t *t_pad_left, int32_t *t_pad_right, int32_t *padded_image_w, + int32_t *padded_image_h, bool *crop_further); + + // Function breaks X,Y generation functionality out of original compute function and makes available to other Ops + void GenRandomXY(int *x, int *y, const int32_t &padded_image_w, const int32_t &padded_image_h); + Status OutputShape(const std::vector &inputs, std::vector &outputs) override; - private: + protected: int32_t crop_height_ = 0; int32_t crop_width_ = 0; + + private: int32_t pad_top_ = 0; int32_t pad_bottom_ = 0; int32_t pad_left_ = 0; diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_with_bbox_op.cc b/mindspore/ccsrc/dataset/kernels/image/random_crop_with_bbox_op.cc new file mode 100644 index 0000000000..2be37f1da3 --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/image/random_crop_with_bbox_op.cc @@ -0,0 +1,67 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include "dataset/kernels/image/random_crop_with_bbox_op.h" +#include "dataset/kernels/image/image_utils.h" +#include "dataset/util/random.h" +#include "dataset/util/status.h" + +namespace mindspore { +namespace dataset { +Status RandomCropWithBBoxOp::Compute(const TensorRow &input, TensorRow *output) { + IO_CHECK_VECTOR(input, output); + BOUNDING_BOX_CHECK(input); + + std::shared_ptr pad_image; + int32_t t_pad_top, t_pad_bottom, t_pad_left, t_pad_right; + size_t boxCount = input[1]->shape()[0]; // number of rows + + int32_t padded_image_h; + int32_t padded_image_w; + + (*output).push_back(nullptr); + (*output).push_back(nullptr); + (*output)[1] = std::move(input[1]); // since some boxes may be removed + + bool crop_further = true; // Whether further cropping will be required or not, true unless required size matches + RETURN_IF_NOT_OK( // Error passed back to caller + RandomCropOp::ImagePadding(input[0], &pad_image, &t_pad_top, &t_pad_bottom, &t_pad_left, &t_pad_right, + &padded_image_w, &padded_image_h, &crop_further)); + + // update bounding boxes with new values based on relevant image padding + if (t_pad_left || t_pad_bottom) { + RETURN_IF_NOT_OK(PadBBoxes(&(*output)[1], boxCount, t_pad_left, t_pad_top)); + } + if (!crop_further) { + // no further cropping required + (*output)[0] = pad_image; + (*output)[1] = std::move(input[1]); + return Status::OK(); + } + + int x, y; + RandomCropOp::GenRandomXY(&x, &y, padded_image_w, padded_image_h); + int maxX = x + RandomCropOp::crop_width_; // max dims of selected CropBox on image + int maxY = y + RandomCropOp::crop_height_; + RETURN_IF_NOT_OK(UpdateBBoxesForCrop(&(*output)[1], &boxCount, x, y, maxX, maxY)); + return Crop(pad_image, &(*output)[0], x, y, RandomCropOp::crop_width_, RandomCropOp::crop_height_); +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_with_bbox_op.h b/mindspore/ccsrc/dataset/kernels/image/random_crop_with_bbox_op.h new file mode 100644 index 0000000000..88a58d3557 --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/image/random_crop_with_bbox_op.h @@ -0,0 +1,48 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_KERNELS_IMAGE_RANDOM_CROP_WITH_BBOX_OP_H_ +#define DATASET_KERNELS_IMAGE_RANDOM_CROP_WITH_BBOX_OP_H_ + +#include +#include + +#include "dataset/kernels/image/random_crop_op.h" + +namespace mindspore { +namespace dataset { +class RandomCropWithBBoxOp : public RandomCropOp { + public: + // Constructor for RandomCropWithBBoxOp, with default value and passing to base class constructor + RandomCropWithBBoxOp(int32_t crop_height, int32_t crop_width, int32_t pad_top = kDefPadTop, + int32_t pad_bottom = kDefPadBottom, int32_t pad_left = kDefPadLeft, + int32_t pad_right = kDefPadRight, BorderType border_types = kDefBorderType, + bool pad_if_needed = kDefPadIfNeeded, uint8_t fill_r = kDefFillR, uint8_t fill_g = kDefFillG, + uint8_t fill_b = kDefFillB) + : RandomCropOp(crop_height, crop_width, pad_top, pad_bottom, pad_left, pad_right, border_types, pad_if_needed, + fill_r, fill_g, fill_b) {} + + ~RandomCropWithBBoxOp() override = default; + + void Print(std::ostream &out) const override { + out << "RandomCropWithBBoxOp: " << RandomCropOp::crop_height_ << " " << RandomCropOp::crop_width_; + } + + Status Compute(const TensorRow &input, TensorRow *output) override; +}; +} // namespace dataset +} // namespace mindspore + +#endif // DATASET_KERNELS_IMAGE_RANDOM_CROP_WITH_BBOX_OP_H_ diff --git a/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_bbox_op.cc b/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_bbox_op.cc new file mode 100644 index 0000000000..5a5c632e81 --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_bbox_op.cc @@ -0,0 +1,60 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include "dataset/kernels/image/random_horizontal_flip_bbox_op.h" +#include "dataset/kernels/image/image_utils.h" +#include "dataset/util/status.h" +#include "dataset/core/cv_tensor.h" +#include "dataset/core/pybind_support.h" + +namespace mindspore { +namespace dataset { +const float RandomHorizontalFlipWithBBoxOp::kDefProbability = 0.5; + +Status RandomHorizontalFlipWithBBoxOp::Compute(const TensorRow &input, TensorRow *output) { + IO_CHECK_VECTOR(input, output); + BOUNDING_BOX_CHECK(input); + if (distribution_(rnd_)) { + // To test bounding boxes algorithm, create random bboxes from image dims + size_t num_of_boxes = input[1]->shape()[0]; // set to give number of bboxes + float img_center = (input[0]->shape()[1] / 2.); // get the center of the image + + for (int i = 0; i < num_of_boxes; i++) { + uint32_t b_w = 0; // bounding box width + uint32_t min_x = 0; + // get the required items + input[1]->GetItemAt(&min_x, {i, 0}); + input[1]->GetItemAt(&b_w, {i, 2}); + // do the flip + float diff = img_center - min_x; // get distance from min_x to center + uint32_t refl_min_x = diff + img_center; // get reflection of min_x + uint32_t new_min_x = refl_min_x - b_w; // subtract from the reflected min_x to get the new one + input[1]->SetItemAt({i, 0}, new_min_x); + } + (*output).push_back(nullptr); + (*output).push_back(nullptr); + // move input to output pointer of bounding boxes + (*output)[1] = std::move(input[1]); + // perform HorizontalFlip on the image + std::shared_ptr input_cv = CVTensor::AsCVTensor(std::move(input[0])); + return HorizontalFlip(std::static_pointer_cast(input_cv), &(*output)[0]); + } + *output = input; + return Status::OK(); +} + +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_bbox_op.h b/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_bbox_op.h new file mode 100644 index 0000000000..06c96e11ae --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_bbox_op.h @@ -0,0 +1,62 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_KERNELS_IMAGE_RANDOM_HORIZONTAL_FLIP_BBOX_OP_H_ +#define DATASET_KERNELS_IMAGE_RANDOM_HORIZONTAL_FLIP_BBOX_OP_H_ + +#include +#include +#include +#include +#include +#include +#include "dataset/core/tensor.h" +#include "dataset/kernels/tensor_op.h" +#include "dataset/util/random.h" +#include "dataset/util/status.h" +#include "pybind11/pybind11.h" +#include "pybind11/stl_bind.h" + +namespace mindspore { +namespace dataset { +class RandomHorizontalFlipWithBBoxOp : public TensorOp { + public: + // Default values, also used by python_bindings.cc + static const float kDefProbability; + + explicit RandomHorizontalFlipWithBBoxOp(float probability = kDefProbability) : distribution_(probability) { + rnd_.seed(GetSeed()); + } + + ~RandomHorizontalFlipWithBBoxOp() override = default; + + // Provide stream operator for displaying it + friend std::ostream &operator<<(std::ostream &out, const RandomHorizontalFlipWithBBoxOp &so) { + so.Print(out); + return out; + } + + void Print(std::ostream &out) const override { out << "RandomHorizontalFlipWithBBoxOp"; } + + Status Compute(const TensorRow &input, TensorRow *output) override; + + private: + std::mt19937 rnd_; + std::bernoulli_distribution distribution_; +}; +} // namespace dataset +} // namespace mindspore + +#endif // DATASET_KERNELS_IMAGE_RANDOM_HORIZONTAL_FLIP_BBOX_OP_H_ diff --git a/mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_with_bbox_op.cc b/mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_with_bbox_op.cc new file mode 100644 index 0000000000..d88c009559 --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_with_bbox_op.cc @@ -0,0 +1,58 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "dataset/util/status.h" +#include "dataset/kernels/image/image_utils.h" +#include "dataset/kernels/image/random_vertical_flip_with_bbox_op.h" + +namespace mindspore { +namespace dataset { +const float RandomVerticalFlipWithBBoxOp::kDefProbability = 0.5; +Status RandomVerticalFlipWithBBoxOp::Compute(const TensorRow &input, TensorRow *output) { + IO_CHECK_VECTOR(input, output); + BOUNDING_BOX_CHECK(input); + + if (distribution_(rnd_)) { + dsize_t imHeight = input[0]->shape()[0]; + size_t boxCount = input[1]->shape()[0]; // number of rows in tensor + + // one time allocation -> updated in the loop + // type defined based on VOC test dataset + for (int i = 0; i < boxCount; i++) { + uint32_t boxCorner_y = 0; + uint32_t boxHeight = 0; + uint32_t newBoxCorner_y = 0; + RETURN_IF_NOT_OK(input[1]->GetUnsignedIntAt(&boxCorner_y, {i, 1})); // get min y of bbox + RETURN_IF_NOT_OK(input[1]->GetUnsignedIntAt(&boxHeight, {i, 3})); // get height of bbox + + // subtract (curCorner + height) from (max) for new Corner position + newBoxCorner_y = (imHeight - 1) - (boxCorner_y + boxHeight); + RETURN_IF_NOT_OK(input[1]->SetItemAt({i, 1}, newBoxCorner_y)); + } + + (*output).push_back(nullptr); + (*output).push_back(nullptr); + (*output)[1] = std::move(input[1]); + + return VerticalFlip(input[0], &(*output)[0]); + } + *output = input; + return Status::OK(); +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_with_bbox_op.h b/mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_with_bbox_op.h new file mode 100644 index 0000000000..4764cc2b75 --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_with_bbox_op.h @@ -0,0 +1,52 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_KERNELS_IMAGE_RANDOM_VERTICAL_FLIP_WITH_BBOX_OP_H_ +#define DATASET_KERNELS_IMAGE_RANDOM_VERTICAL_FLIP_WITH_BBOX_OP_H_ + +#include +#include + +#include "dataset/core/tensor.h" +#include "dataset/kernels/tensor_op.h" +#include "dataset/util/status.h" +#include "dataset/util/random.h" + +namespace mindspore { +namespace dataset { +class RandomVerticalFlipWithBBoxOp : public TensorOp { + public: + // Default values, also used by python_bindings.cc + static const float kDefProbability; + // Constructor for RandomVerticalFlipWithBBoxOp + // @param probability: Probablity of Image flipping, 0.5 by default + explicit RandomVerticalFlipWithBBoxOp(float probability = kDefProbability) : distribution_(probability) { + rnd_.seed(GetSeed()); + } + + ~RandomVerticalFlipWithBBoxOp() override = default; + + void Print(std::ostream &out) const override { out << "RandomVerticalFlipWithBBoxOp"; } + + Status Compute(const TensorRow &input, TensorRow *output) override; + + private: + std::mt19937 rnd_; + std::bernoulli_distribution distribution_; +}; +} // namespace dataset +} // namespace mindspore + +#endif // DATASET_KERNELS_IMAGE_RANDOM_VERTICAL_FLIP_WITH_BBOX_OP_H_ diff --git a/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.cc b/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.cc index 147955ebac..7889b3b157 100644 --- a/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.cc +++ b/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.cc @@ -27,8 +27,7 @@ UniformAugOp::UniformAugOp(std::vector> op_list, int32 } // compute method to apply uniformly random selected augmentations from a list -Status UniformAugOp::Compute(const std::vector> &input, - std::vector> *output) { +Status UniformAugOp::Compute(const TensorRow &input, TensorRow *output) { IO_CHECK_VECTOR(input, output); // randomly select ops to be applied diff --git a/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.h b/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.h index 605f510746..824898ba2d 100644 --- a/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.h +++ b/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.h @@ -44,8 +44,7 @@ class UniformAugOp : public TensorOp { // Overrides the base class compute function // @return Status - The error code return - Status Compute(const std::vector> &input, - std::vector> *output) override; + Status Compute(const TensorRow &input, TensorRow *output) override; private: int32_t num_ops_; diff --git a/mindspore/ccsrc/dataset/kernels/py_func_op.cc b/mindspore/ccsrc/dataset/kernels/py_func_op.cc index c9e5d5b169..0a6a1452b5 100644 --- a/mindspore/ccsrc/dataset/kernels/py_func_op.cc +++ b/mindspore/ccsrc/dataset/kernels/py_func_op.cc @@ -24,8 +24,7 @@ namespace mindspore { namespace dataset { -Status PyFuncOp::Compute(const std::vector> &input, - std::vector> *output) { +Status PyFuncOp::Compute(const TensorRow &input, TensorRow *output) { IO_CHECK_VECTOR(input, output); Status ret = Status(StatusCode::kOK, "PyFunc Call Succeed"); { diff --git a/mindspore/ccsrc/dataset/kernels/py_func_op.h b/mindspore/ccsrc/dataset/kernels/py_func_op.h index af61f6ac55..a50aceafbb 100644 --- a/mindspore/ccsrc/dataset/kernels/py_func_op.h +++ b/mindspore/ccsrc/dataset/kernels/py_func_op.h @@ -36,8 +36,7 @@ class __attribute__((visibility("hidden"))) PyFuncOp : public TensorOp { uint32_t NumOutput() override { return 0; } // Compute function for n-n mapping. - Status Compute(const std::vector> &input, - std::vector> *output) override; + Status Compute(const TensorRow &input, TensorRow *output) override; private: py::function py_func_ptr_; diff --git a/mindspore/ccsrc/dataset/kernels/tensor_op.cc b/mindspore/ccsrc/dataset/kernels/tensor_op.cc index 390dd42a71..92aef8dc9e 100644 --- a/mindspore/ccsrc/dataset/kernels/tensor_op.cc +++ b/mindspore/ccsrc/dataset/kernels/tensor_op.cc @@ -37,8 +37,7 @@ Status TensorOp::Compute(const std::shared_ptr &input, std::shared_ptr> &input, - std::vector> *output) { +Status TensorOp::Compute(const TensorRow &input, TensorRow *output) { IO_CHECK_VECTOR(input, output); if (OneToOne()) { output->resize(1); diff --git a/mindspore/ccsrc/dataset/kernels/tensor_op.h b/mindspore/ccsrc/dataset/kernels/tensor_op.h index 73fba4e28d..293d4a4f99 100644 --- a/mindspore/ccsrc/dataset/kernels/tensor_op.h +++ b/mindspore/ccsrc/dataset/kernels/tensor_op.h @@ -21,6 +21,7 @@ #include #include "dataset/core/tensor.h" +#include "dataset/core/tensor_row.h" #include "dataset/util/status.h" #define IO_CHECK(input, output) \ @@ -42,6 +43,40 @@ } \ } while (false) +#define BOUNDING_BOX_CHECK(input) \ + do { \ + if (input[1]->shape().Size() < 2) { \ + return Status(StatusCode::kBoundingBoxInvalidShape, __LINE__, __FILE__, \ + "Bounding boxes shape should have at least two dims"); \ + } \ + uint32_t num_of_features = input[1]->shape()[1]; \ + if (num_of_features < 4) { \ + return Status(StatusCode::kBoundingBoxInvalidShape, __LINE__, __FILE__, \ + "Bounding boxes should be have at least 4 features"); \ + } \ + uint32_t num_of_boxes = input[1]->shape()[0]; \ + uint32_t img_h = input[0]->shape()[0]; \ + uint32_t img_w = input[0]->shape()[1]; \ + for (uint32_t i = 0; i < num_of_boxes; i++) { \ + uint32_t min_x = 0; \ + uint32_t min_y = 0; \ + uint32_t b_w = 0; \ + uint32_t b_h = 0; \ + input[1]->GetItemAt(&min_x, {i, 0}); \ + input[1]->GetItemAt(&min_y, {i, 1}); \ + input[1]->GetItemAt(&b_w, {i, 2}); \ + input[1]->GetItemAt(&b_h, {i, 3}); \ + if ((min_x + b_w > img_w) || (min_y + b_h > img_h)) { \ + return Status(StatusCode::kBoundingBoxOutOfBounds, __LINE__, __FILE__, \ + "At least one of the bounding boxes is out of bounds of the image."); \ + } \ + if (static_cast(min_x) < 0 || static_cast(min_y) < 0) { \ + return Status(StatusCode::kBoundingBoxOutOfBounds, __LINE__, __FILE__, \ + "At least one of the bounding boxes has negative min_x or min_y."); \ + } \ + } \ + } while (false) + namespace mindspore { namespace dataset { // A class that does a computation on a Tensor @@ -75,8 +110,7 @@ class TensorOp { // @param input is a vector of shared_ptr to Tensor (pass by const reference). // @param output is the address to an empty vector of shared_ptr to Tensor. // @return Status - virtual Status Compute(const std::vector> &input, - std::vector> *output); + virtual Status Compute(const TensorRow &input, TensorRow *output); // Returns true oif the TensorOp takes one input and returns one output. // @return true/false diff --git a/mindspore/ccsrc/dataset/text/kernels/CMakeLists.txt b/mindspore/ccsrc/dataset/text/kernels/CMakeLists.txt index 87d3dbad34..449bb93d8b 100644 --- a/mindspore/ccsrc/dataset/text/kernels/CMakeLists.txt +++ b/mindspore/ccsrc/dataset/text/kernels/CMakeLists.txt @@ -1,7 +1,23 @@ file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) +if (NOT (CMAKE_SYSTEM_NAME MATCHES "Windows")) + set(ICU_DEPEND_FILES + basic_tokenizer_op.cc + bert_tokenizer_op.cc + case_fold_op.cc + normalize_utf8_op.cc + regex_replace_op.cc + regex_tokenizer_op.cc + unicode_script_tokenizer_op.cc + whitespace_tokenizer_op.cc) +endif() add_library(text-kernels OBJECT lookup_op.cc jieba_tokenizer_op.cc unicode_char_tokenizer_op.cc + ngram_op.cc + wordpiece_tokenizer_op.cc + truncate_sequence_pair_op.cc + to_number_op.cc + ${ICU_DEPEND_FILES} ) diff --git a/mindspore/ccsrc/dataset/text/kernels/basic_tokenizer_op.cc b/mindspore/ccsrc/dataset/text/kernels/basic_tokenizer_op.cc new file mode 100644 index 0000000000..1128990b44 --- /dev/null +++ b/mindspore/ccsrc/dataset/text/kernels/basic_tokenizer_op.cc @@ -0,0 +1,94 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "dataset/text/kernels/basic_tokenizer_op.h" +#include +#include +#include +#include +#include + +namespace mindspore { +namespace dataset { +const bool BasicTokenizerOp::kDefLowerCase = false; +const bool BasicTokenizerOp::kDefKeepWhitespace = false; +const NormalizeForm BasicTokenizerOp::kDefNormalizationForm = NormalizeForm::kNone; +const bool BasicTokenizerOp::kDefPreserveUnusedToken = true; +const char BasicTokenizerOp::kCommonPattern[] = + "[!-/]" + "|[:-@]" + "|[\\[-`]" + "|[{-~]" + "|[\\p{P}]" + "|[\\x{4E00}-\\x{9FFF}]" + "|[\\x{3400}-\\x{4DBF}]" + "|[\\x{20000}-\\x{2A6DF}]" + "|[\\x{2A700}-\\x{2B73F}]" + "|[\\x{2B740}-\\x{2B81F}]" + "|[\\x{2B820}-\\x{2CEAF}]" + "|[\\x{F900}-\\x{FAFF}]" + "|[\\x{2F800}-\\x{2FA1F}]"; +const char BasicTokenizerOp::kUnusedPattern[] = "\\[CLS\\]|\\[SEP\\]|\\[UNK\\]|\\[PAD\\]|\\[MASK\\]|"; + +BasicTokenizerOp::BasicTokenizerOp(bool lower_case, bool keep_whitespace, NormalizeForm normalization_form, + bool preserve_unused_token) + : lower_case_(lower_case), + keep_whitespace_(keep_whitespace), + preserve_unused_token_(preserve_unused_token), + case_fold_(std::make_unique()), + nfd_normalize_(std::make_unique(NormalizeForm::kNfd)), + normalization_form_(normalization_form), + common_normalize_(std::make_unique(normalization_form)), + replace_accent_chars_(std::make_unique("\\p{Mn}", "")), + replace_control_chars_(std::make_unique("\\p{Cc}|\\p{Cf}", " ")) { + std::string delim_pattern = std::string("\\s+|") + kCommonPattern; + std::string keep_delim_pattern; + if (keep_whitespace_) { + keep_delim_pattern = delim_pattern; + } else { + keep_delim_pattern = kCommonPattern; + } + if (preserve_unused_token_) { + keep_delim_pattern = kUnusedPattern + keep_delim_pattern; + delim_pattern = kUnusedPattern + delim_pattern; + } + regex_tokenizer_ = std::make_unique(delim_pattern, keep_delim_pattern); +} + +Status BasicTokenizerOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { + IO_CHECK(input, output); + if (input->Rank() != 0 || input->type() != DataType::DE_STRING) { + RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor"); + } + std::shared_ptr cur_input; + std::shared_ptr processed_tensor; + if (lower_case_) { + // to lower case + RETURN_IF_NOT_OK(case_fold_->Compute(input, &processed_tensor)); + cur_input = processed_tensor; + // strip accent characters + RETURN_IF_NOT_OK(nfd_normalize_->Compute(cur_input, &processed_tensor)); + cur_input = processed_tensor; + RETURN_IF_NOT_OK(replace_accent_chars_->Compute(cur_input, &processed_tensor)); + } else { + RETURN_IF_NOT_OK(common_normalize_->Compute(input, &processed_tensor)); + } + // strip control characters + cur_input = processed_tensor; + RETURN_IF_NOT_OK(replace_control_chars_->Compute(cur_input, &processed_tensor)); + return regex_tokenizer_->Compute(processed_tensor, output); +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/text/kernels/basic_tokenizer_op.h b/mindspore/ccsrc/dataset/text/kernels/basic_tokenizer_op.h new file mode 100644 index 0000000000..a37e841573 --- /dev/null +++ b/mindspore/ccsrc/dataset/text/kernels/basic_tokenizer_op.h @@ -0,0 +1,64 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_TEXT_KERNELS_BASIC_TOKENIZER_OP_H_ +#define DATASET_TEXT_KERNELS_BASIC_TOKENIZER_OP_H_ +#include +#include + +#include "dataset/core/tensor.h" +#include "dataset/kernels/tensor_op.h" +#include "dataset/text/kernels/case_fold_op.h" +#include "dataset/text/kernels/normalize_utf8_op.h" +#include "dataset/text/kernels/regex_replace_op.h" +#include "dataset/text/kernels/regex_tokenizer_op.h" +#include "dataset/util/status.h" + +namespace mindspore { +namespace dataset { + +class BasicTokenizerOp : public TensorOp { + public: + static const bool kDefLowerCase; + static const bool kDefKeepWhitespace; + static const NormalizeForm kDefNormalizationForm; + static const bool kDefPreserveUnusedToken; + explicit BasicTokenizerOp(bool lower_case = kDefLowerCase, bool keep_whitespace = kDefKeepWhitespace, + NormalizeForm normalization_form = kDefNormalizationForm, + bool preserve_unused_token = kDefPreserveUnusedToken); + + ~BasicTokenizerOp() override = default; + + void Print(std::ostream &out) const override { out << "BasicTokenizerOp"; } + + Status Compute(const std::shared_ptr &input, std::shared_ptr *output) override; + + private: + static const char kCommonPattern[]; + static const char kUnusedPattern[]; + bool lower_case_; + bool keep_whitespace_; + NormalizeForm normalization_form_; + bool preserve_unused_token_; + std::unique_ptr case_fold_; + std::unique_ptr nfd_normalize_; + std::unique_ptr common_normalize_; + std::unique_ptr replace_accent_chars_; + std::unique_ptr replace_control_chars_; + std::unique_ptr regex_tokenizer_; +}; +} // namespace dataset +} // namespace mindspore +#endif // DATASET_TEXT_KERNELS_BASIC_TOKENIZER_OP_H_ diff --git a/mindspore/ccsrc/kernel/gpu/nn/relu_gpu_kernel.cc b/mindspore/ccsrc/dataset/text/kernels/bert_tokenizer_op.cc similarity index 55% rename from mindspore/ccsrc/kernel/gpu/nn/relu_gpu_kernel.cc rename to mindspore/ccsrc/dataset/text/kernels/bert_tokenizer_op.cc index d4cefc73ca..2b68a5accb 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/relu_gpu_kernel.cc +++ b/mindspore/ccsrc/dataset/text/kernels/bert_tokenizer_op.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,14 +13,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -#include "kernel/gpu/nn/relu_gpu_kernel.h" - +#include "dataset/text/kernels/bert_tokenizer_op.h" namespace mindspore { -namespace kernel { -MS_REG_GPU_KERNEL_ONE(ReLU, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), - ReLUGpuFwdKernel, float) -MS_REG_GPU_KERNEL_ONE(ReLU, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), - ReLUGpuFwdKernel, half) -} // namespace kernel +namespace dataset { +Status BertTokenizerOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { + IO_CHECK(input, output); + std::shared_ptr basic_tensor; + RETURN_IF_NOT_OK(basic_tokenizer_.Compute(input, &basic_tensor)); + RETURN_IF_NOT_OK(wordpiece_tokenizer_.Compute(basic_tensor, output)); + return Status::OK(); +} +} // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/dataset/text/kernels/bert_tokenizer_op.h b/mindspore/ccsrc/dataset/text/kernels/bert_tokenizer_op.h new file mode 100644 index 0000000000..660fdc7ba5 --- /dev/null +++ b/mindspore/ccsrc/dataset/text/kernels/bert_tokenizer_op.h @@ -0,0 +1,54 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_TEXT_KERNELS_BERT_TOKENIZER_OP_H_ +#define DATASET_TEXT_KERNELS_BERT_TOKENIZER_OP_H_ +#include +#include + +#include "dataset/core/tensor.h" +#include "dataset/kernels/tensor_op.h" +#include "dataset/text/kernels/basic_tokenizer_op.h" +#include "dataset/text/kernels/wordpiece_tokenizer_op.h" +#include "dataset/util/status.h" + +namespace mindspore { +namespace dataset { +class BertTokenizerOp : public TensorOp { + public: + explicit BertTokenizerOp(const std::shared_ptr &vocab, + const std::string &suffix_indicator = WordpieceTokenizerOp::kDefSuffixIndicator, + const int &max_bytes_per_token = WordpieceTokenizerOp::kDefMaxBytesPerToken, + const std::string &unknown_token = WordpieceTokenizerOp::kDefUnknownToken, + bool lower_case = BasicTokenizerOp::kDefLowerCase, + bool keep_whitespace = BasicTokenizerOp::kDefKeepWhitespace, + NormalizeForm normalization_form = BasicTokenizerOp::kDefNormalizationForm, + bool preserve_unused_token = BasicTokenizerOp::kDefPreserveUnusedToken) + : wordpiece_tokenizer_(vocab, suffix_indicator, max_bytes_per_token, unknown_token), + basic_tokenizer_(lower_case, keep_whitespace, normalization_form, preserve_unused_token) {} + + ~BertTokenizerOp() override = default; + + void Print(std::ostream &out) const override { out << "BertTokenizerOp"; } + + Status Compute(const std::shared_ptr &input, std::shared_ptr *output) override; + + private: + WordpieceTokenizerOp wordpiece_tokenizer_; + BasicTokenizerOp basic_tokenizer_; +}; +} // namespace dataset +} // namespace mindspore +#endif // DATASET_TEXT_KERNELS_BERT_TOKENIZER_OP_H_ diff --git a/mindspore/ccsrc/dataset/text/kernels/case_fold_op.cc b/mindspore/ccsrc/dataset/text/kernels/case_fold_op.cc new file mode 100644 index 0000000000..d935608efd --- /dev/null +++ b/mindspore/ccsrc/dataset/text/kernels/case_fold_op.cc @@ -0,0 +1,46 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "dataset/text/kernels/case_fold_op.h" +#include +#include +#include +#include +#include + +#include "unicode/errorcode.h" +#include "unicode/normalizer2.h" +#include "unicode/utypes.h" + +namespace mindspore { +namespace dataset { + +Status CaseFoldOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { + IO_CHECK(input, output); + icu::ErrorCode error; + const icu::Normalizer2 *nfkc_case_fold = icu::Normalizer2::getNFKCCasefoldInstance(error); + CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFKCCasefoldInstance failed."); + std::vector strs(input->Size()); + int i = 0; + for (auto iter = input->begin(); iter != input->end(); iter++) { + icu::StringByteSink sink(&strs[i++]); + nfkc_case_fold->normalizeUTF8(0, icu::StringPiece((*iter).data(), (*iter).size()), sink, nullptr, error); + CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "normalizeUTF8 failed."); + } + *output = std::make_shared(std::move(strs), input->shape()); + return Status::OK(); +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/text/kernels/case_fold_op.h b/mindspore/ccsrc/dataset/text/kernels/case_fold_op.h new file mode 100644 index 0000000000..d1b5ba53f1 --- /dev/null +++ b/mindspore/ccsrc/dataset/text/kernels/case_fold_op.h @@ -0,0 +1,39 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_TEXT_KERNELS_CASE_FOLD_OP_H_ +#define DATASET_TEXT_KERNELS_CASE_FOLD_OP_H_ +#include + +#include "dataset/core/tensor.h" +#include "dataset/kernels/tensor_op.h" +#include "dataset/util/status.h" + +namespace mindspore { +namespace dataset { + +class CaseFoldOp : public TensorOp { + public: + CaseFoldOp() {} + + ~CaseFoldOp() override = default; + + void Print(std::ostream &out) const override { out << "CaseFoldOp"; } + + Status Compute(const std::shared_ptr &input, std::shared_ptr *output) override; +}; +} // namespace dataset +} // namespace mindspore +#endif // DATASET_TEXT_KERNELS_CASE_FOLD_OP_H_ diff --git a/mindspore/ccsrc/dataset/text/kernels/jieba_tokenizer_op.cc b/mindspore/ccsrc/dataset/text/kernels/jieba_tokenizer_op.cc index 16f9409645..de1d915fbb 100644 --- a/mindspore/ccsrc/dataset/text/kernels/jieba_tokenizer_op.cc +++ b/mindspore/ccsrc/dataset/text/kernels/jieba_tokenizer_op.cc @@ -29,6 +29,7 @@ JiebaTokenizerOp::JiebaTokenizerOp(const std::string &hmm_path, const std::strin } Status JiebaTokenizerOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { + IO_CHECK(input, output); RETURN_UNEXPECTED_IF_NULL(jieba_parser_); if (input->Rank() != 0 || input->type() != DataType::DE_STRING) { diff --git a/mindspore/ccsrc/dataset/text/kernels/lookup_op.cc b/mindspore/ccsrc/dataset/text/kernels/lookup_op.cc index d4661ea16b..07cf7aef5c 100644 --- a/mindspore/ccsrc/dataset/text/kernels/lookup_op.cc +++ b/mindspore/ccsrc/dataset/text/kernels/lookup_op.cc @@ -24,6 +24,7 @@ LookupOp::LookupOp(std::shared_ptr vocab, WordIdType default_id) : vocab_(vocab), default_id_(default_id), type_(DataType("int32")) {} Status LookupOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { + IO_CHECK(input, output); RETURN_UNEXPECTED_IF_NULL(vocab_); CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "None String Tensor"); std::vector word_ids; diff --git a/mindspore/ccsrc/dataset/text/kernels/lookup_op.h b/mindspore/ccsrc/dataset/text/kernels/lookup_op.h index 58dea21d37..dad99c3241 100644 --- a/mindspore/ccsrc/dataset/text/kernels/lookup_op.h +++ b/mindspore/ccsrc/dataset/text/kernels/lookup_op.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef DATASET_NLP_KERNELS_LOOKUP_OP_H_ -#define DATASET_NLP_KERNELS_LOOKUP_OP_H_ +#ifndef DATASET_TEXT_KERNELS_LOOKUP_OP_H_ +#define DATASET_TEXT_KERNELS_LOOKUP_OP_H_ #include #include @@ -33,7 +33,7 @@ class LookupOp : public TensorOp { // constructor for lookup, takes in a vocab object // @param std::shared_ptr vocab - // @param WordIdType default_id, id to lookup if a word is not in vocab - explicit LookupOp(std::shared_ptr vocab, WordIdType default_id = Vocab::kSpecialTokens::unk); + explicit LookupOp(std::shared_ptr vocab, WordIdType default_id = 1); ~LookupOp() = default; @@ -61,4 +61,4 @@ class LookupOp : public TensorOp { } // namespace dataset } // namespace mindspore -#endif // DATASET_NLP_KERNELS_LOOKUP_OP_H_ +#endif // DATASET_TEXT_KERNELS_LOOKUP_OP_H_ diff --git a/mindspore/ccsrc/dataset/text/kernels/ngram_op.cc b/mindspore/ccsrc/dataset/text/kernels/ngram_op.cc new file mode 100644 index 0000000000..bbe449a89a --- /dev/null +++ b/mindspore/ccsrc/dataset/text/kernels/ngram_op.cc @@ -0,0 +1,96 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dataset/text/kernels/ngram_op.h" + +#include +#include +#include +#include + +namespace mindspore { +namespace dataset { + +NgramOp::NgramOp(const std::vector &ngrams, int32_t l_len, int32_t r_len, const std::string &l_pad, + const std::string &r_pad, const std::string &separator) + : ngrams_(ngrams), + l_len_(l_len), + r_len_(r_len), + l_pad_with_sp_(l_pad + separator), + r_pad_with_sp_(r_pad + separator), + separator_(separator) {} + +Status NgramOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { + IO_CHECK(input, output); + CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING && input->Rank() == 1, "Not a 1-D str Tensor"); + std::vector offsets; // offsets for each str + std::vector res; // holds the result of ngrams + std::string str_buffer; // concat all pad tokens with string interleaved with separators + res.reserve(input->shape().NumOfElements()); // this should be more than enough + offsets.reserve(1 + l_len_ + r_len_ + input->shape().NumOfElements()); + str_buffer.reserve(l_pad_with_sp_.size() * l_len_ + r_pad_with_sp_.size() * r_len_ + input->SizeInBytes()); + offsets.push_back(str_buffer.size()); // insert 0 as the starting pos + for (int i = 0; i < l_len_; i++) offsets.push_back((str_buffer += l_pad_with_sp_).size()); + + for (auto itr = input->begin(); itr != input->end(); itr++) { + str_buffer += (*itr); + str_buffer += separator_; + offsets.push_back(str_buffer.size()); + } + + for (int i = 0; i < r_len_; i++) offsets.push_back((str_buffer += r_pad_with_sp_).size()); + + for (auto n : ngrams_) { + CHECK_FAIL_RETURN_UNEXPECTED(n > 0, "n gram needs to be a positive number.\n"); + int32_t start_ind = l_len_ - std::min(l_len_, n - 1); + int32_t end_ind = offsets.size() - r_len_ + std::min(r_len_, n - 1); + if (end_ind - start_ind <= n) { + res.emplace_back(std::string()); // push back empty string + } else { + CHECK_FAIL_RETURN_UNEXPECTED(end_ind - n >= 0, "Incorrect loop condition"); + + for (int i = start_ind; i < end_ind - n; i++) { + res.emplace_back(str_buffer.substr(offsets[i], offsets[i + n] - offsets[i] - separator_.size())); + } + } + } + RETURN_IF_NOT_OK(Tensor::CreateTensor(output, res, TensorShape({static_cast(res.size())}))); + return Status::OK(); +} + +void NgramOp::Print(std::ostream &out) const { + out << "NgramOp: " + << "left pad width: " << l_len_ << " left pad token with separator: " << l_pad_with_sp_ << "\n" + << "right pad width: " << r_len_ << " right pad token with separator: " << r_pad_with_sp_ << "\n" + << "separator: " << separator_ << "\n"; +} + +Status NgramOp::OutputShape(const std::vector &inputs, std::vector &outputs) { + CHECK_FAIL_RETURN_UNEXPECTED(inputs.size() == NumInput(), "incorrect num of inputs\n"); + CHECK_FAIL_RETURN_UNEXPECTED(inputs[0].Rank() == 1, "ngram only works with 1-dim data\n"); + dsize_t num_elements = ngrams_.size(); + for (int32_t n : ngrams_) { + // here since rank == 1, NumOfElements == shape[0]. add padding length to string + int32_t len_with_padding = inputs[0].NumOfElements() + std::min(n - 1, l_len_) + std::min(n - 1, r_len_); + // if len_with_padding - n < 0, this would return an empty string + num_elements += std::max(len_with_padding - n, 0); + } + outputs.emplace_back(TensorShape({num_elements})); + CHECK_FAIL_RETURN_UNEXPECTED(outputs.size() == NumOutput(), "incorrect num of outputs\n"); + return Status::OK(); +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/text/kernels/ngram_op.h b/mindspore/ccsrc/dataset/text/kernels/ngram_op.h new file mode 100644 index 0000000000..3d2c547f79 --- /dev/null +++ b/mindspore/ccsrc/dataset/text/kernels/ngram_op.h @@ -0,0 +1,74 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DATASET_TEXT_KERNELS_NGRAM_OP_H_ +#define DATASET_TEXT_KERNELS_NGRAM_OP_H_ + +#include +#include +#include + +#include "dataset/core/tensor.h" +#include "dataset/kernels/tensor_op.h" +#include "dataset/util/status.h" + +namespace mindspore { +namespace dataset { +namespace py = pybind11; + +class NgramOp : public TensorOp { + public: + // Constructor of Ngram model + // @param const std::vector &ngrams + // @param int32_tl_len - padding length on the left + // @param int32_t r_len - padding length on the right + // @param const std::string &l_pad - padding token on the left + // @param const std::string &r_pad - padding token on the right + // @param const std::string &separator - use to join strings + NgramOp(const std::vector &ngrams, int32_t l_len, int32_t r_len, const std::string &l_pad, + const std::string &r_pad, const std::string &separator); + + // perform ngram model on each tensor + // @param const std::shared_ptr &input + // @param std::shared_ptr *output + // @return error code + Status Compute(const std::shared_ptr &input, std::shared_ptr *output) override; + + // destructor + ~NgramOp() override = default; + + // @param std::vector &inputs - shape of input tensors + // @param std::vector &outputs - shape of output tensors + // @return error code + Status OutputShape(const std::vector &inputs, std::vector &outputs) override; + + // print arg for debugging + // @param std::ostream &out + void Print(std::ostream &out) const override; + + private: + std::vector ngrams_; // list of n grams + int32_t l_len_; // left padding length + int32_t r_len_; // right padding length + std::string l_pad_with_sp_; // left padding appended with separator + std::string r_pad_with_sp_; // right padding appended with separator + std::string separator_; // separator +}; + +} // namespace dataset +} // namespace mindspore + +#endif // DATASET_TEXT_KERNELS_NGRAM_OP_H_ diff --git a/mindspore/ccsrc/dataset/text/kernels/normalize_utf8_op.cc b/mindspore/ccsrc/dataset/text/kernels/normalize_utf8_op.cc new file mode 100644 index 0000000000..b902286576 --- /dev/null +++ b/mindspore/ccsrc/dataset/text/kernels/normalize_utf8_op.cc @@ -0,0 +1,75 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "dataset/text/kernels/normalize_utf8_op.h" +#include +#include +#include +#include +#include + +#include "unicode/errorcode.h" +#include "unicode/normalizer2.h" +#include "unicode/utypes.h" + +namespace mindspore { +namespace dataset { +const NormalizeForm NormalizeUTF8Op::kDefNormalizeForm = NormalizeForm::kNfkc; +Status NormalizeUTF8Op::Compute(const std::shared_ptr &input, std::shared_ptr *output) { + IO_CHECK(input, output); + icu::ErrorCode error; + const icu::Normalizer2 *normalize = nullptr; + switch (normalize_form_) { + case NormalizeForm::kNone: { + *output = input; + return Status::OK(); + } + case NormalizeForm::kNfc: { + normalize = icu::Normalizer2::getNFCInstance(error); + CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFCInstance failed"); + break; + } + case NormalizeForm::kNfkc: { + normalize = icu::Normalizer2::getNFKCInstance(error); + CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFKCInstance failed"); + break; + } + case NormalizeForm::kNfd: { + normalize = icu::Normalizer2::getNFDInstance(error); + CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFDInstance failed"); + break; + } + case NormalizeForm::kNfkd: { + normalize = icu::Normalizer2::getNFKDInstance(error); + CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFKDInstance failed"); + break; + } + default: { + RETURN_STATUS_UNEXPECTED("unexpected normalize form"); + break; + } + } + std::vector strs(input->Size()); + int i = 0; + for (auto iter = input->begin(); iter != input->end(); iter++) { + icu::StringByteSink sink(&strs[i++]); + normalize->normalizeUTF8(0, icu::StringPiece((*iter).data(), (*iter).size()), sink, nullptr, error); + CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "normalizeUTF8 failed."); + } + *output = std::make_shared(std::move(strs), input->shape()); + return Status::OK(); +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/text/kernels/normalize_utf8_op.h b/mindspore/ccsrc/dataset/text/kernels/normalize_utf8_op.h new file mode 100644 index 0000000000..5033f2355f --- /dev/null +++ b/mindspore/ccsrc/dataset/text/kernels/normalize_utf8_op.h @@ -0,0 +1,50 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_TEXT_KERNELS_NORMALIZE_UTF8_OP_H_ +#define DATASET_TEXT_KERNELS_NORMALIZE_UTF8_OP_H_ +#include + +#include "dataset/core/tensor.h" +#include "dataset/kernels/tensor_op.h" +#include "dataset/util/status.h" + +namespace mindspore { +namespace dataset { +enum class NormalizeForm { + kNone = 0, + kNfc, + kNfkc, + kNfd, + kNfkd, +}; + +class NormalizeUTF8Op : public TensorOp { + public: + static const NormalizeForm kDefNormalizeForm; + explicit NormalizeUTF8Op(NormalizeForm normalize_form = kDefNormalizeForm) : normalize_form_(normalize_form) {} + + ~NormalizeUTF8Op() override = default; + + void Print(std::ostream &out) const override { out << "NormalizeUTF8Op"; } + + Status Compute(const std::shared_ptr &input, std::shared_ptr *output) override; + + private: + NormalizeForm normalize_form_; +}; +} // namespace dataset +} // namespace mindspore +#endif // DATASET_TEXT_KERNELS_NORMALIZE_UTF8_OP_H_ diff --git a/mindspore/ccsrc/dataset/text/kernels/regex_replace_op.cc b/mindspore/ccsrc/dataset/text/kernels/regex_replace_op.cc new file mode 100644 index 0000000000..1ce2c5ea61 --- /dev/null +++ b/mindspore/ccsrc/dataset/text/kernels/regex_replace_op.cc @@ -0,0 +1,57 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "dataset/text/kernels/regex_replace_op.h" +#include +#include +#include +#include +#include + +namespace mindspore { +namespace dataset { + +Status RegexReplaceOp::RegexReplace(icu::RegexMatcher *const matcher, const std::string_view &text, + std::string *out) const { + CHECK_FAIL_RETURN_UNEXPECTED((matcher != nullptr && out != nullptr), "Input is null"); + UErrorCode icu_error = U_ZERO_ERROR; + icu::UnicodeString unicode_text = icu::UnicodeString::fromUTF8(text); + matcher->reset(unicode_text); + icu::UnicodeString unicode_out; + if (replace_all_) { + unicode_out = matcher->replaceAll(replace_, icu_error); + } else { + unicode_out = matcher->replaceFirst(replace_, icu_error); + } + CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(icu_error), "RegexReplace failed"); + unicode_out.toUTF8String(*out); + return Status::OK(); +} + +Status RegexReplaceOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { + IO_CHECK(input, output); + UErrorCode icu_error = U_ZERO_ERROR; + icu::RegexMatcher matcher(pattern_, 0, icu_error); + CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(icu_error), "Create icu RegexMatcher failed, you may input one error pattern"); + std::vector strs(input->Size()); + int i = 0; + for (auto iter = input->begin(); iter != input->end(); iter++) { + RETURN_IF_NOT_OK(RegexReplace(&matcher, *iter, &strs[i])); + } + *output = std::make_shared(std::move(strs), input->shape()); + return Status::OK(); +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/text/kernels/regex_replace_op.h b/mindspore/ccsrc/dataset/text/kernels/regex_replace_op.h new file mode 100644 index 0000000000..30fae13241 --- /dev/null +++ b/mindspore/ccsrc/dataset/text/kernels/regex_replace_op.h @@ -0,0 +1,55 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_TEXT_KERNELS_REGEX_REPLACE_OP_H_ +#define DATASET_TEXT_KERNELS_REGEX_REPLACE_OP_H_ +#include +#include + +#include "unicode/regex.h" +#include "unicode/errorcode.h" +#include "unicode/utypes.h" + +#include "dataset/core/tensor.h" +#include "dataset/kernels/tensor_op.h" +#include "dataset/util/status.h" + +namespace mindspore { +namespace dataset { + +class RegexReplaceOp : public TensorOp { + public: + RegexReplaceOp(const std::string &pattern, const std::string &replace, bool replace_all = true) + : pattern_(icu::UnicodeString::fromUTF8(pattern)), + replace_(icu::UnicodeString::fromUTF8(replace)), + replace_all_(replace_all) {} + + ~RegexReplaceOp() override = default; + + void Print(std::ostream &out) const override { out << "RegexReplaceOp"; } + + Status Compute(const std::shared_ptr &input, std::shared_ptr *output) override; + + protected: + Status RegexReplace(icu::RegexMatcher *const matcher, const std::string_view &text, std::string *out) const; + + private: + const icu::UnicodeString pattern_; + const icu::UnicodeString replace_; + const bool replace_all_; +}; +} // namespace dataset +} // namespace mindspore +#endif // DATASET_TEXT_KERNELS_REGEX_REPLACE_OP_H_ diff --git a/mindspore/ccsrc/dataset/text/kernels/regex_tokenizer_op.cc b/mindspore/ccsrc/dataset/text/kernels/regex_tokenizer_op.cc new file mode 100644 index 0000000000..34c06f28ea --- /dev/null +++ b/mindspore/ccsrc/dataset/text/kernels/regex_tokenizer_op.cc @@ -0,0 +1,103 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "dataset/text/kernels/regex_tokenizer_op.h" +#include +#include +#include +#include +#include + +namespace mindspore { +namespace dataset { +Status RegexTokenizerOp::GetUnicodeSubstr(const icu::UnicodeString &input, int start, int len, std::string *out_utf8, + icu::UnicodeString *out_unicode) const { + CHECK_FAIL_RETURN_UNEXPECTED((out_utf8 != nullptr || out_unicode != nullptr), "Wrong input"); + int total_len = input.length(); + int end = start + len; + CHECK_FAIL_RETURN_UNEXPECTED((start >= 0 && len > 0 && end <= total_len), "Out of range"); + icu::UnicodeString temp; + input.extract(start, len, temp); + if (out_utf8 != nullptr) { + temp.toUTF8String(*out_utf8); + } + if (out_unicode != nullptr) { + *out_unicode = temp; + } + return Status::OK(); +} + +Status RegexTokenizerOp::GetRegexTokens(const std::string &text, std::vector *out_tokens) const { + UErrorCode status = U_ZERO_ERROR; + out_tokens->clear(); + icu::RegexMatcher token_matcher(delim_pattern_, 0, status); + CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(status), "Create icu RegexMatcher failed, you may input one error pattern"); + icu::RegexMatcher delim_matcher(keep_delim_pattern_, 0, status); + CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(status), "Create icu RegexMatcher failed, you may input one error pattern"); + + icu::UnicodeString utext(icu::UnicodeString::fromUTF8(text)); + token_matcher.reset(utext); + + int token_start_index = 0; + status = U_ZERO_ERROR; + while (token_matcher.find(status) && U_SUCCESS(status)) { + int deli_start_index = token_matcher.start(status); + CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(status), "Get RegexMatcher matched start index failed"); + int deli_end_index = token_matcher.end(status); + CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(status), "Get RegexMatcher matched start index failed"); + + // Add non-empty token + int token_len = deli_start_index - token_start_index; + if (token_len > 0) { + std::string token; + RETURN_IF_NOT_OK(GetUnicodeSubstr(utext, token_start_index, token_len, &token)); + out_tokens->emplace_back(std::move(token)); + } + + int delim_len = deli_end_index - deli_start_index; + if (keep_delim_ && delim_len > 0) { + icu::UnicodeString delim_str; + std::string delim_utf8_str; + RETURN_IF_NOT_OK(GetUnicodeSubstr(utext, deli_start_index, delim_len, &delim_utf8_str, &delim_str)); + delim_matcher.reset(delim_str); + if (delim_matcher.matches(status) && U_SUCCESS(status)) { + out_tokens->emplace_back(std::move(delim_utf8_str)); + } + } + token_start_index = deli_end_index; + } + + if (token_start_index < utext.length()) { + std::string temp; + RETURN_IF_NOT_OK(GetUnicodeSubstr(utext, token_start_index, utext.length() - token_start_index, &temp)); + out_tokens->emplace_back(std::move(temp)); + } + return Status::OK(); +} + +Status RegexTokenizerOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { + IO_CHECK(input, output); + if (input->Rank() != 0 || input->type() != DataType::DE_STRING) { + RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor"); + } + std::string_view text; + RETURN_IF_NOT_OK(input->GetItemAt(&text, {})); + std::vector tokens; + RETURN_IF_NOT_OK(GetRegexTokens(std::string(text.data(), text.size()), &tokens)); + *output = std::make_shared(std::move(tokens), TensorShape({(dsize_t)tokens.size()})); + return Status::OK(); +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/text/kernels/regex_tokenizer_op.h b/mindspore/ccsrc/dataset/text/kernels/regex_tokenizer_op.h new file mode 100644 index 0000000000..bcf02a4a11 --- /dev/null +++ b/mindspore/ccsrc/dataset/text/kernels/regex_tokenizer_op.h @@ -0,0 +1,58 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_TEXT_REGEX_TOKENIZER_OP_H_ +#define DATASET_TEXT_REGEX_TOKENIZER_OP_H_ +#include +#include +#include + +#include "unicode/regex.h" +#include "unicode/errorcode.h" +#include "unicode/utypes.h" + +#include "dataset/core/tensor.h" +#include "dataset/kernels/tensor_op.h" +#include "dataset/util/status.h" + +namespace mindspore { +namespace dataset { + +class RegexTokenizerOp : public TensorOp { + public: + RegexTokenizerOp(const std::string &delim_pattern, const std::string &keep_delim_pattern) + : delim_pattern_(icu::UnicodeString::fromUTF8(delim_pattern)), + keep_delim_pattern_(icu::UnicodeString::fromUTF8(keep_delim_pattern)), + keep_delim_(!keep_delim_pattern.empty()) {} + + ~RegexTokenizerOp() override = default; + + void Print(std::ostream &out) const override { out << "RegexTokenizerOp"; } + + Status Compute(const std::shared_ptr &input, std::shared_ptr *output) override; + + protected: + Status GetUnicodeSubstr(const icu::UnicodeString &input, int start, int len, std::string *out_utf8, + icu::UnicodeString *out_unicode = nullptr) const; + Status GetRegexTokens(const std::string &text, std::vector *out_tokens) const; + + private: + const icu::UnicodeString delim_pattern_; + const icu::UnicodeString keep_delim_pattern_; + const bool keep_delim_; +}; +} // namespace dataset +} // namespace mindspore +#endif // DATASET_TEXT_REGEX_TOKENIZER_OP_H_ diff --git a/mindspore/ccsrc/dataset/text/kernels/to_number_op.cc b/mindspore/ccsrc/dataset/text/kernels/to_number_op.cc new file mode 100644 index 0000000000..1368684daf --- /dev/null +++ b/mindspore/ccsrc/dataset/text/kernels/to_number_op.cc @@ -0,0 +1,241 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dataset/text/kernels/to_number_op.h" + +#include +#include +#include +#include +#include +#include + +#include "dataset/core/data_type.h" +#include "dataset/core/tensor.h" +#include "dataset/core/tensor_shape.h" +#include "dataset/kernels/data/data_utils.h" +#include "dataset/util/status.h" + +namespace mindspore { +namespace dataset { + +ToNumberOp::ToNumberOp(const DataType &cast_to_type) : cast_to_type_(cast_to_type) {} + +ToNumberOp::ToNumberOp(const std::string &cast_to_type) : cast_to_type_(DataType(cast_to_type)) {} + +Status ToNumberOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { + CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tenosrs should have type string."); + + switch (cast_to_type_.value()) { + case DataType::DE_INT8: + RETURN_IF_NOT_OK(ToSignedIntegral(input, output)); + break; + case DataType::DE_INT16: + RETURN_IF_NOT_OK(ToSignedIntegral(input, output)); + break; + case DataType::DE_INT32: + RETURN_IF_NOT_OK(ToSignedIntegral(input, output)); + break; + case DataType::DE_INT64: + RETURN_IF_NOT_OK(ToSignedIntegral(input, output)); + break; + case DataType::DE_UINT8: + RETURN_IF_NOT_OK(ToUnsignedIntegral(input, output)); + break; + case DataType::DE_UINT16: + RETURN_IF_NOT_OK(ToUnsignedIntegral(input, output)); + break; + case DataType::DE_UINT32: + RETURN_IF_NOT_OK(ToUnsignedIntegral(input, output)); + break; + case DataType::DE_UINT64: + RETURN_IF_NOT_OK(ToUnsignedIntegral(input, output)); + break; + case DataType::DE_FLOAT16: + RETURN_IF_NOT_OK(this->ToFloat16(input, output)); + break; + case DataType::DE_FLOAT32: + RETURN_IF_NOT_OK(ToFloat(input, output)); + break; + case DataType::DE_FLOAT64: + RETURN_IF_NOT_OK(ToDouble(input, output)); + break; + } + + return Status::OK(); +} + +void ToNumberOp::Print(std::ostream &out) const { out << "ToNumberOp: casting to " << '\n'; } + +Status ToNumberOp::OutputShape(const std::vector &input_shapes, std::vector &output_shapes) { + (void)std::copy(input_shapes.begin(), input_shapes.end(), std::back_inserter(output_shapes)); + return Status::OK(); +} + +template +Status ToNumberOp::ToSignedIntegral(const std::shared_ptr &input, std::shared_ptr *output) { + std::vector casted; + + for (auto it = input->begin(); it != input->end(); ++it) { + bool is_cast_out_of_range = false; + int64_t result = 0; + + try { + result = std::stoll(std::string(*it)); + } catch (const std::out_of_range &) { + is_cast_out_of_range = true; + } catch (const std::invalid_argument &) { + RETURN_STATUS_UNEXPECTED("It is invalid to convert " + std::string(*it) + " to a number."); + } + + if (result > std::numeric_limits::max() || result < std::numeric_limits::min() || is_cast_out_of_range) { + std::string error_message = "String input " + std::string(*it) + " will be out of bounds if casted to " + + cast_to_type_.ToString() + ". The valid range is: [" + + std::to_string(std::numeric_limits::min()) + ", " + + std::to_string(std::numeric_limits::max()) + "]."; + + RETURN_STATUS_UNEXPECTED(error_message); + } + + T casted_result = static_cast(result); + casted.push_back(casted_result); + } + + RETURN_IF_NOT_OK(Tensor::CreateTensor(output, casted, input->shape())); + return Status::OK(); +} + +template +Status ToNumberOp::ToUnsignedIntegral(const std::shared_ptr &input, std::shared_ptr *output) { + std::vector casted; + + for (auto it = input->begin(); it != input->end(); ++it) { + bool is_cast_out_of_range = false; + uint64_t result = 0; + + // If there is a - at the start of the string, it is considered by us to + // be out of bounds. If the - is somewhere else in the string, it is + // deemed invalid by std::stoull and will throw std::invalid_argument + for (int i = 0; i < (*it).size(); i++) { + if ((*it)[i] == '-') { + is_cast_out_of_range = true; + break; + } + } + + try { + result = std::stoull(std::string(*it)); + } catch (const std::out_of_range &) { + is_cast_out_of_range = true; + } catch (const std::invalid_argument &) { + RETURN_STATUS_UNEXPECTED("It is invalid to convert " + std::string(*it) + " to an unsigned integer."); + } + + if (result > std::numeric_limits::max() || result < std::numeric_limits::min() || is_cast_out_of_range) { + std::string error_message = "String input " + std::string(*it) + " will be out of bounds if casted to " + + cast_to_type_.ToString() + ". The valid range is: [" + + std::to_string(std::numeric_limits::min()) + ", " + + std::to_string(std::numeric_limits::max()) + "]."; + + RETURN_STATUS_UNEXPECTED(error_message); + } + + T casted_result = static_cast(result); + casted.push_back(casted_result); + } + + RETURN_IF_NOT_OK(Tensor::CreateTensor(output, casted, input->shape())); + return Status::OK(); +} + +Status ToNumberOp::ToFloat16(const std::shared_ptr &input, std::shared_ptr *output) { + // special case, float16 does not exist in c++, no native support for + // casting, so cast to float first then use this method, which use Eigen. + std::shared_ptr temp; + RETURN_IF_NOT_OK(Tensor::CreateTensor(&temp, TensorImpl::kFlexible, input->shape(), DataType("float32"))); + RETURN_IF_NOT_OK(ToFloat(input, &temp)); + RETURN_IF_NOT_OK(mindspore::dataset::ToFloat16(temp, output)); + return Status::OK(); +} + +Status ToNumberOp::ToFloat(const std::shared_ptr &input, std::shared_ptr *output) { + std::vector casted; + + for (auto it = input->begin(); it != input->end(); ++it) { + bool is_cast_out_of_range = false; + float result = 0; + + try { + result = std::stof(std::string(*it)); + } catch (const std::out_of_range &) { + is_cast_out_of_range = true; + } catch (const std::invalid_argument &) { + RETURN_STATUS_UNEXPECTED("It is invalid to convert " + std::string(*it) + " to an unsigned integer."); + } + + if (result > std::numeric_limits::max() || result < std::numeric_limits::lowest() || + is_cast_out_of_range) { + std::string error_message = "String input " + std::string(*it) + " will be out of bounds if casted to " + + cast_to_type_.ToString() + ". The valid range is: [" + + std::to_string(std::numeric_limits::lowest()) + ", " + + std::to_string(std::numeric_limits::max()) + "]."; + + RETURN_STATUS_UNEXPECTED(error_message); + } + + float casted_result = static_cast(result); + casted.push_back(casted_result); + } + + RETURN_IF_NOT_OK(Tensor::CreateTensor(output, casted, input->shape())); + return Status::OK(); +} + +Status ToNumberOp::ToDouble(const std::shared_ptr &input, std::shared_ptr *output) { + std::vector casted; + + for (auto it = input->begin(); it != input->end(); ++it) { + bool is_cast_out_of_range = false; + double result = 0; + + try { + result = std::stod(std::string(*it)); + } catch (const std::out_of_range &) { + is_cast_out_of_range = true; + } catch (const std::invalid_argument &) { + RETURN_STATUS_UNEXPECTED("It is invalid to convert " + std::string(*it) + " to an unsigned integer."); + } + + if (result > std::numeric_limits::max() || result < std::numeric_limits::lowest() || + is_cast_out_of_range) { + std::string error_message = "String input " + std::string(*it) + " will be out of bounds if casted to " + + cast_to_type_.ToString() + ". The valid range is: [" + + std::to_string(std::numeric_limits::lowest()) + ", " + + std::to_string(std::numeric_limits::max()) + "]."; + + RETURN_STATUS_UNEXPECTED(error_message); + } + + double casted_result = static_cast(result); + casted.push_back(casted_result); + } + + RETURN_IF_NOT_OK(Tensor::CreateTensor(output, casted, input->shape())); + return Status::OK(); +} + +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/text/kernels/to_number_op.h b/mindspore/ccsrc/dataset/text/kernels/to_number_op.h new file mode 100644 index 0000000000..1346ce2f47 --- /dev/null +++ b/mindspore/ccsrc/dataset/text/kernels/to_number_op.h @@ -0,0 +1,79 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DATASET_TEXT_KERNELS_TO_NUMBER_OP_H_ +#define DATASET_TEXT_KERNELS_TO_NUMBER_OP_H_ + +#include +#include +#include + +#include "dataset/core/data_type.h" +#include "dataset/core/tensor.h" +#include "dataset/kernels/tensor_op.h" +#include "dataset/util/status.h" + +namespace mindspore { +namespace dataset { + +class ToNumberOp : public TensorOp { + public: + // Constructor of ToNumberOp + // @param const DataType &cast_to_type - the type to convert string inputs to. + explicit ToNumberOp(const DataType &cast_to_type); + + // Constructor of ToNumberOp + // @param const std::string &cast_to_type - the type in string form to convert string inputs to. + explicit ToNumberOp(const std::string &cast_to_type); + + ~ToNumberOp() override = default; + + // Perform numeric conversion on each string in each tensor. + // @param const std::shared_ptr &input + // @param std::shared_ptr *output + // @return error code + Status Compute(const std::shared_ptr &input, std::shared_ptr *output) override; + + // For each input shape, find the output shape + // @param std::vector &inputs - shape of input tensors + // @param std::vector &outputs - shape of output tensors + // @return error code + Status OutputShape(const std::vector &input_shapes, std::vector &output_shapes) override; + + // print arg for debugging + // @param std::ostream &out + void Print(std::ostream &out) const override; + + private: + template + Status ToSignedIntegral(const std::shared_ptr &input, std::shared_ptr *output); + + template + Status ToUnsignedIntegral(const std::shared_ptr &input, std::shared_ptr *output); + + Status ToFloat16(const std::shared_ptr &input, std::shared_ptr *output); + + Status ToFloat(const std::shared_ptr &input, std::shared_ptr *output); + + Status ToDouble(const std::shared_ptr &input, std::shared_ptr *output); + + DataType cast_to_type_; +}; + +} // namespace dataset +} // namespace mindspore + +#endif // DATASET_TEXT_KERNELS_TO_NUMBER_OP_H_ diff --git a/mindspore/ccsrc/dataset/text/kernels/truncate_sequence_pair_op.cc b/mindspore/ccsrc/dataset/text/kernels/truncate_sequence_pair_op.cc new file mode 100644 index 0000000000..136d5006df --- /dev/null +++ b/mindspore/ccsrc/dataset/text/kernels/truncate_sequence_pair_op.cc @@ -0,0 +1,66 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dataset/text/kernels/truncate_sequence_pair_op.h" + +#include "dataset/core/tensor.h" +#include "dataset/kernels/tensor_op.h" +#include "dataset/kernels/data/slice_op.h" + +namespace mindspore { +namespace dataset { + +Status TruncateSequencePairOp::Compute(const TensorRow &input, TensorRow *output) { + IO_CHECK_VECTOR(input, output); + CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 2, "Number of inputs should be two."); + std::shared_ptr seq1 = input[0]; + std::shared_ptr seq2 = input[1]; + CHECK_FAIL_RETURN_UNEXPECTED(seq1->shape().Rank() == 1 && seq2->shape().Rank() == 1, + "Both sequences should be of rank 1"); + dsize_t length1 = seq1->shape()[0]; + dsize_t length2 = seq2->shape()[0]; + dsize_t outLength1 = length1; + dsize_t outLength2 = length2; + + dsize_t total = length1 + length2; + while (total > max_length_) { + if (outLength1 > outLength2) + outLength1--; + else + outLength2--; + total--; + } + std::shared_ptr outSeq1; + if (length1 != outLength1) { + std::unique_ptr slice1(new SliceOp(Slice(outLength1 - length1))); + RETURN_IF_NOT_OK(slice1->Compute(seq1, &outSeq1)); + } else { + outSeq1 = std::move(seq1); + } + + std::shared_ptr outSeq2; + if (length2 != outLength2) { + std::unique_ptr slice2(new SliceOp(Slice(outLength2 - length2))); + RETURN_IF_NOT_OK(slice2->Compute(seq2, &outSeq2)); + } else { + outSeq2 = std::move(seq2); + } + output->push_back(outSeq1); + output->push_back(outSeq2); + return Status::OK(); +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/text/kernels/truncate_sequence_pair_op.h b/mindspore/ccsrc/dataset/text/kernels/truncate_sequence_pair_op.h new file mode 100644 index 0000000000..e8be6802a8 --- /dev/null +++ b/mindspore/ccsrc/dataset/text/kernels/truncate_sequence_pair_op.h @@ -0,0 +1,48 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_KERNELS_DATA_TRUNCATE_SEQUENCE_PAIR_OP_H_ +#define DATASET_KERNELS_DATA_TRUNCATE_SEQUENCE_PAIR_OP_H_ + +#include +#include +#include +#include +#include + +#include "dataset/core/tensor.h" +#include "dataset/kernels/tensor_op.h" +#include "dataset/kernels/data/type_cast_op.h" +#include "dataset/kernels/data/data_utils.h" + +namespace mindspore { +namespace dataset { + +class TruncateSequencePairOp : public TensorOp { + public: + explicit TruncateSequencePairOp(dsize_t length) : max_length_(length) {} + + ~TruncateSequencePairOp() override = default; + + void Print(std::ostream &out) const override { out << "TruncateSequencePairOp"; } + + Status Compute(const TensorRow &input, TensorRow *output) override; + + private: + dsize_t max_length_; +}; +} // namespace dataset +} // namespace mindspore +#endif // DATASET_KERNELS_DATA_TRUNCATE_SEQUENCE_PAIR_OP_H_ diff --git a/mindspore/ccsrc/dataset/text/kernels/unicode_char_tokenizer_op.cc b/mindspore/ccsrc/dataset/text/kernels/unicode_char_tokenizer_op.cc index 343e079153..063bf21630 100644 --- a/mindspore/ccsrc/dataset/text/kernels/unicode_char_tokenizer_op.cc +++ b/mindspore/ccsrc/dataset/text/kernels/unicode_char_tokenizer_op.cc @@ -28,6 +28,7 @@ namespace mindspore { namespace dataset { Status UnicodeCharTokenizerOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { + IO_CHECK(input, output); if (input->Rank() != 0 || input->type() != DataType::DE_STRING) { RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor"); } diff --git a/mindspore/ccsrc/dataset/text/kernels/unicode_char_tokenizer_op.h b/mindspore/ccsrc/dataset/text/kernels/unicode_char_tokenizer_op.h index 53c42d599e..01a84eca8b 100644 --- a/mindspore/ccsrc/dataset/text/kernels/unicode_char_tokenizer_op.h +++ b/mindspore/ccsrc/dataset/text/kernels/unicode_char_tokenizer_op.h @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef DATASET_KERNELS_TEXT_UNICODE_CHAR_TOKENIZER_OP_H_ -#define DATASET_KERNELS_TEXT_UNICODE_CHAR_TOKENIZER_OP_H_ +#ifndef DATASET_TEXT_KERNELS_UNICODE_CHAR_TOKENIZER_OP_H_ +#define DATASET_TEXT_KERNELS_UNICODE_CHAR_TOKENIZER_OP_H_ #include #include "dataset/core/tensor.h" @@ -37,4 +37,4 @@ class UnicodeCharTokenizerOp : public TensorOp { } // namespace dataset } // namespace mindspore -#endif // DATASET_KERNELS_TEXT_UNICODE_CHAR_TOKENIZER_OP_H_ +#endif // DATASET_TEXT_KERNELS_UNICODE_CHAR_TOKENIZER_OP_H_ diff --git a/mindspore/ccsrc/dataset/text/kernels/unicode_script_tokenizer_op.cc b/mindspore/ccsrc/dataset/text/kernels/unicode_script_tokenizer_op.cc new file mode 100644 index 0000000000..97a4f1333d --- /dev/null +++ b/mindspore/ccsrc/dataset/text/kernels/unicode_script_tokenizer_op.cc @@ -0,0 +1,93 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "dataset/text/kernels/unicode_script_tokenizer_op.h" +#include +#include +#include +#include +#include + +#include "cppjieba/Unicode.hpp" +#include "unicode/errorcode.h" +#include "unicode/uchar.h" +#include "unicode/uscript.h" + +using cppjieba::DecodeRunesInString; +using cppjieba::RuneStrArray; + +namespace mindspore { +namespace dataset { + +const bool UnicodeScriptTokenizerOp::kDefKeepWhitespace = false; + +Status UnicodeScriptTokenizerOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { + IO_CHECK(input, output); + if (input->Rank() != 0 || input->type() != DataType::DE_STRING) { + RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor"); + } + std::string_view str; + RETURN_IF_NOT_OK(input->GetItemAt(&str, {})); + RuneStrArray runes; + if (!DecodeRunesInString(str.data(), str.size(), runes)) { + RETURN_STATUS_UNEXPECTED("Decode utf8 string failed."); + } + + UScriptCode last_script = USCRIPT_INVALID_CODE; + icu::ErrorCode status; + int start = 0; + int len = 0; + std::vector splits; + + bool was_space = false; + for (size_t i = 0; i < runes.size(); i++) { + bool is_space = u_isUWhiteSpace(runes[i].rune); + UScriptCode script = uscript_getScript(runes[i].rune, status); + if (status.isFailure()) { + status.reset(); + script = USCRIPT_INVALID_CODE; + } + // 1) Seperate UTF-8 strings of different UScriptCode values + // (such as: "Chinese中国" should be splited to ["Chinese", "中国"]) + // 2) Seperate whitespace and non-whitespace UTF-8 strings + // (such as: " ." should be split to [" ", "."]) + if (len > 0 && (script != last_script || is_space != was_space)) { + // 3) If keep_whitespace_ is false, all the whitespace characters will be discard + if (keep_whitespace_ || !was_space) { + std::string temp(str.substr(start, len)); + splits.emplace_back(std::move(temp)); + } + start = runes[i].offset; + len = runes[i].len; + } else { + len += runes[i].len; + } + last_script = script; + was_space = is_space; + } + + if (len > 0 && (keep_whitespace_ || !was_space)) { + std::string temp(str.substr(start, len)); + splits.emplace_back(std::move(temp)); + } + // 4) If the input is empty scalar string, the output will be 1-D empty string. + if (splits.empty()) { + splits.emplace_back(""); + } + *output = std::make_shared(splits, TensorShape({(dsize_t)splits.size()})); + return Status::OK(); +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/text/kernels/unicode_script_tokenizer_op.h b/mindspore/ccsrc/dataset/text/kernels/unicode_script_tokenizer_op.h new file mode 100644 index 0000000000..a77b0b3fa3 --- /dev/null +++ b/mindspore/ccsrc/dataset/text/kernels/unicode_script_tokenizer_op.h @@ -0,0 +1,44 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_TEXT_KERNELS_UNICODE_SCRIPT_TOKENIZER_OP_H_ +#define DATASET_TEXT_KERNELS_UNICODE_SCRIPT_TOKENIZER_OP_H_ +#include + +#include "dataset/core/tensor.h" +#include "dataset/kernels/tensor_op.h" +#include "dataset/util/status.h" + +namespace mindspore { +namespace dataset { + +class UnicodeScriptTokenizerOp : public TensorOp { + public: + static const bool kDefKeepWhitespace; + + explicit UnicodeScriptTokenizerOp(bool keep_whitespace = kDefKeepWhitespace) : keep_whitespace_(keep_whitespace) {} + + ~UnicodeScriptTokenizerOp() override = default; + + void Print(std::ostream &out) const override { out << "UnicodeScriptTokenizerOp"; } + + Status Compute(const std::shared_ptr &input, std::shared_ptr *output) override; + + private: + bool keep_whitespace_; // If or not keep whitespace tokens +}; +} // namespace dataset +} // namespace mindspore +#endif // DATASET_TEXT_KERNELS_UNICODE_SCRIPT_TOKENIZER_OP_H_ diff --git a/mindspore/ccsrc/dataset/text/kernels/whitespace_tokenizer_op.cc b/mindspore/ccsrc/dataset/text/kernels/whitespace_tokenizer_op.cc new file mode 100644 index 0000000000..35f3f8d0e2 --- /dev/null +++ b/mindspore/ccsrc/dataset/text/kernels/whitespace_tokenizer_op.cc @@ -0,0 +1,73 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "dataset/text/kernels/whitespace_tokenizer_op.h" +#include +#include +#include +#include +#include + +#include "cppjieba/Unicode.hpp" +#include "unicode/errorcode.h" +#include "unicode/uchar.h" +#include "unicode/uscript.h" + +using cppjieba::DecodeRunesInString; +using cppjieba::RuneStrArray; + +namespace mindspore { +namespace dataset { +Status WhitespaceTokenizerOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { + IO_CHECK(input, output); + if (input->Rank() != 0 || input->type() != DataType::DE_STRING) { + RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor"); + } + std::string_view str; + RETURN_IF_NOT_OK(input->GetItemAt(&str, {})); + + RuneStrArray runes; + if (!DecodeRunesInString(str.data(), str.size(), runes)) { + RETURN_STATUS_UNEXPECTED("Decode utf8 string failed."); + } + std::vector splits; + int start = 0; + int len = 0; + for (size_t i = 0; i < runes.size(); i++) { + if (u_isUWhiteSpace(runes[i].rune)) { + if (len > 0) { + std::string temp(str.substr(start, len)); + splits.emplace_back(std::move(temp)); + len = 0; + } + } else { + if (len == 0) { + start = runes[i].offset; + } + len += runes[i].len; + } + } + if (len > 0) { + std::string temp(str.substr(start, len)); + splits.emplace_back(std::move(temp)); + } + if (splits.empty()) { + splits.emplace_back(""); + } + *output = std::make_shared(splits, TensorShape({(dsize_t)splits.size()})); + return Status::OK(); +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/text/kernels/whitespace_tokenizer_op.h b/mindspore/ccsrc/dataset/text/kernels/whitespace_tokenizer_op.h new file mode 100644 index 0000000000..6d0bab0bea --- /dev/null +++ b/mindspore/ccsrc/dataset/text/kernels/whitespace_tokenizer_op.h @@ -0,0 +1,39 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_TEXT_KERNELS_WHITESPACE_TOKENIZER_OP_H_ +#define DATASET_TEXT_KERNELS_WHITESPACE_TOKENIZER_OP_H_ +#include + +#include "dataset/core/tensor.h" +#include "dataset/kernels/tensor_op.h" +#include "dataset/util/status.h" + +namespace mindspore { +namespace dataset { + +class WhitespaceTokenizerOp : public TensorOp { + public: + WhitespaceTokenizerOp() {} + + ~WhitespaceTokenizerOp() override = default; + + void Print(std::ostream &out) const override { out << "WhitespaceTokenizerOp"; } + + Status Compute(const std::shared_ptr &input, std::shared_ptr *output) override; +}; +} // namespace dataset +} // namespace mindspore +#endif // DATASET_TEXT_KERNELS_WHITESPACE_TOKENIZER_OP_H_ diff --git a/mindspore/ccsrc/dataset/text/kernels/wordpiece_tokenizer_op.cc b/mindspore/ccsrc/dataset/text/kernels/wordpiece_tokenizer_op.cc new file mode 100644 index 0000000000..48092d89cd --- /dev/null +++ b/mindspore/ccsrc/dataset/text/kernels/wordpiece_tokenizer_op.cc @@ -0,0 +1,117 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dataset/text/kernels/wordpiece_tokenizer_op.h" +#include +#include + +namespace mindspore { +namespace dataset { + +const char WordpieceTokenizerOp::kDefSuffixIndicator[] = "##"; +const int WordpieceTokenizerOp::kDefMaxBytesPerToken = 100; +const char WordpieceTokenizerOp::kDefUnknownToken[] = "[UNK]"; + +WordpieceTokenizerOp::WordpieceTokenizerOp(const std::shared_ptr &vocab, const std::string &suffix_indicator, + const int &max_bytes_per_token, const std::string &unknown_token) + : vocab_(vocab), + suffix_indicator_(suffix_indicator), + max_bytes_per_token_(max_bytes_per_token), + unknown_token_(unknown_token) {} + +Status WordpieceTokenizerOp::LookupWord(const std::string &input_token, const RuneStrArray &runes, const int start, + bool *out_found, int *out_end) const { + CHECK_FAIL_RETURN_UNEXPECTED(start >= 0 && start < input_token.size(), "Out of range"); + *out_found = false; + for (int i = runes.size() - 1; i >= 0; i--) { + *out_end = runes[i].offset + runes[i].len; + int len = *out_end - start; + std::string word = input_token.substr(start, len); + if (start > 0) { + word = suffix_indicator_ + word; + } + WordIdType default_id = -1; + if (vocab_->Lookup(word, default_id) != default_id) { + *out_found = true; + break; + } + } + return Status::OK(); +} + +Status WordpieceTokenizerOp::FoundNoToken(const std::string &input_token, std::vector *out_tokens) const { + out_tokens->clear(); + if (unknown_token_.empty()) { + out_tokens->emplace_back(input_token); + } else { + out_tokens->emplace_back(unknown_token_); + } + return Status::OK(); +} + +Status WordpieceTokenizerOp::AddSubword(const std::string &input_token, const int start, const int end, + std::vector *out_tokens) const { + CHECK_FAIL_RETURN_UNEXPECTED(start >= 0 && end > start && end <= input_token.size(), "Out of range"); + std::string subword = input_token.substr(start, end - start); + if (start > 0) { + subword = suffix_indicator_ + subword; + } + out_tokens->emplace_back(subword); + return Status::OK(); +} + +Status WordpieceTokenizerOp::GetTokens(const std::string &input_token, std::vector *out_tokens) const { + if (input_token.size() > max_bytes_per_token_) { + return FoundNoToken(input_token, out_tokens); + } + RuneStrArray runes; + if (!DecodeRunesInString(input_token.data(), input_token.size(), runes)) { + RETURN_STATUS_UNEXPECTED("Decode utf8 string failed."); + } + int end; + for (int start = 0; start < input_token.size();) { + bool found; + RETURN_IF_NOT_OK(LookupWord(input_token, runes, start, &found, &end)); + if (found) { + RETURN_IF_NOT_OK(AddSubword(input_token, start, end, out_tokens)); + start = end; + } else { + return FoundNoToken(input_token, out_tokens); + } + } + return Status::OK(); +} + +Status WordpieceTokenizerOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { + IO_CHECK(input, output); + if (input->Rank() > 1 || input->type() != DataType::DE_STRING) { + RETURN_STATUS_UNEXPECTED("The input tensor should be scalar or 1-D string tensor"); + } + std::vector out_tokens; + for (auto iter = input->begin(); iter != input->end(); iter++) { + std::vector temp_tokens; + RETURN_IF_NOT_OK(GetTokens(std::string(*iter), &temp_tokens)); + out_tokens.insert(out_tokens.end(), temp_tokens.begin(), temp_tokens.end()); + } + if (out_tokens.empty()) { + out_tokens.emplace_back(""); + } + *output = std::make_shared(out_tokens, TensorShape({(dsize_t)out_tokens.size()})); + return Status::OK(); +} + +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/text/kernels/wordpiece_tokenizer_op.h b/mindspore/ccsrc/dataset/text/kernels/wordpiece_tokenizer_op.h new file mode 100644 index 0000000000..c9a75025c6 --- /dev/null +++ b/mindspore/ccsrc/dataset/text/kernels/wordpiece_tokenizer_op.h @@ -0,0 +1,66 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_TEXT_KERNELS_WORDPIECE_TOKENIZER_OP_H_ +#define DATASET_TEXT_KERNELS_WORDPIECE_TOKENIZER_OP_H_ +#include +#include +#include +#include + +#include "cppjieba/Unicode.hpp" + +#include "dataset/core/tensor.h" +#include "dataset/kernels/tensor_op.h" +#include "dataset/text/vocab.h" +#include "dataset/util/status.h" + +using cppjieba::DecodeRunesInString; +using cppjieba::RuneStrArray; +namespace mindspore { +namespace dataset { + +class WordpieceTokenizerOp : public TensorOp { + public: + static const char kDefSuffixIndicator[]; + static const int kDefMaxBytesPerToken; + static const char kDefUnknownToken[]; + WordpieceTokenizerOp(const std::shared_ptr &vocab, const std::string &suffix_indicator = kDefSuffixIndicator, + const int &max_bytes_per_token = kDefMaxBytesPerToken, + const std::string &unknown_token = kDefUnknownToken); + + ~WordpieceTokenizerOp() override = default; + + void Print(std::ostream &out) const override { out << "WordpieceTokenizerOp"; } + + Status Compute(const std::shared_ptr &input, std::shared_ptr *output) override; + + protected: + Status AddSubword(const std::string &input_token, const int start, const int end, + std::vector *out_token) const; + Status FoundNoToken(const std::string &input_token, std::vector *out_tokens) const; + Status LookupWord(const std::string &input_token, const RuneStrArray &runes, const int start, bool *out_found, + int *out_end) const; + Status GetTokens(const std::string &input_token, std::vector *out_tokens) const; + + private: + const std::shared_ptr vocab_; + const std::string suffix_indicator_; + const int max_bytes_per_token_; + const std::string unknown_token_; +}; +} // namespace dataset +} // namespace mindspore +#endif // DATASET_TEXT_KERNELS_WORDPIECE_TOKENIZER_OP_H_ diff --git a/mindspore/ccsrc/dataset/text/vocab.cc b/mindspore/ccsrc/dataset/text/vocab.cc index 893336c62a..100dc9d655 100644 --- a/mindspore/ccsrc/dataset/text/vocab.cc +++ b/mindspore/ccsrc/dataset/text/vocab.cc @@ -14,51 +14,53 @@ * limitations under the License. */ #include -#include +#include +#include #include #include "dataset/text/vocab.h" namespace mindspore { namespace dataset { -Vocab::Vocab(std::unordered_map word2id) { - word2id_ = std::move(word2id); - id2word_.resize(word2id_.size()); - for (auto p : word2id_) { - id2word_[p.second - kSpecialTokens::num_tokens] = p.first; - } -} +Vocab::Vocab(std::unordered_map word2id) { word2id_ = std::move(word2id); } WordIdType Vocab::Lookup(const WordType &word, WordIdType default_id) const { auto itr = word2id_.find(word); return itr == word2id_.end() ? default_id : itr->second; } -WordType Vocab::Lookup(WordIdType id) const { - if (id < kSpecialTokens::num_tokens) { - return reserved_token_str_[id]; - } else if (id - kSpecialTokens::num_tokens >= id2word_.size()) { - return reserved_token_str_[kSpecialTokens::unk]; - } else { - return id2word_[id - kSpecialTokens::num_tokens]; - } -} -Status Vocab::BuildFromPyList(const py::list &words, std::shared_ptr *vocab) { +Status Vocab::BuildFromPyList(const py::list &words, const py::list &special_tokens, bool prepend_special, + std::shared_ptr *vocab) { + // check of duplication on both words and special_tokens will be performed in python + // special_tokens and words both need to be unique, and shouldn't overlap std::unordered_map word2id; - WordIdType word_id = kSpecialTokens::num_tokens; + // if special is added in front, normal words id will start from number of special tokens + WordIdType word_id = prepend_special ? static_cast(special_tokens.size()) : 0; + for (auto word : words) { - const std::string s = py::str(word); - CHECK_FAIL_RETURN_UNEXPECTED(word2id.find(s) == word2id.end(), "duplicate word:" + s); - word2id[s] = word_id++; + word2id[py::str(word)] = word_id++; } + + word_id = prepend_special ? 0 : word2id.size(); + + for (auto special_token : special_tokens) { + word2id[py::str(special_token)] = word_id++; + } + *vocab = std::make_shared(std::move(word2id)); return Status::OK(); } Status Vocab::BuildFromFile(const std::string &path, const std::string &delimiter, int32_t vocab_size, - std::shared_ptr *vocab) { + const py::list &special_tokens, bool prepend_special, std::shared_ptr *vocab) { + // python validator checks special_tokens doesn't contain any duplicate words + std::unordered_set specials; + // used to check that words in file don't contain any special token that already exists + for (auto word : special_tokens) { + specials.insert(py::str(word)); + } + WordIdType word_id = prepend_special ? static_cast(special_tokens.size()) : 0; std::unordered_map word2id; - WordIdType word_id = kSpecialTokens::num_tokens; std::fstream handle(path, std::ios::in); CHECK_FAIL_RETURN_UNEXPECTED(handle.good() && handle.is_open(), "fail to open:" + path); std::string word; @@ -67,35 +69,36 @@ Status Vocab::BuildFromFile(const std::string &path, const std::string &delimite // if delimiter is not found, find_first_of would return std::string::npos which is -1 word = word.substr(0, word.find_first_of(delimiter)); } - CHECK_FAIL_RETURN_UNEXPECTED(word2id.find(word) == word2id.end(), "duplicate word:" + word); + CHECK_FAIL_RETURN_UNEXPECTED(word2id.find(word) == word2id.end(), "duplicate word:" + word + "."); + CHECK_FAIL_RETURN_UNEXPECTED(specials.find(word) == specials.end(), word + " is already in special_tokens."); word2id[word] = word_id++; // break if enough row is read, if vocab_size is smaller than 0 - if (word_id == vocab_size + kSpecialTokens::num_tokens) break; + if (word2id.size() == vocab_size) break; } + + word_id = prepend_special ? 0 : word2id.size(); + + for (auto special_token : special_tokens) { + word2id[py::str(special_token)] = word_id++; + } + *vocab = std::make_shared(std::move(word2id)); return Status::OK(); } Status Vocab::BuildFromPyDict(const py::dict &words, std::shared_ptr *vocab) { std::unordered_map word2id; - std::map id2word; for (auto p : words) { - WordIdType word_id = py::reinterpret_borrow(p.second); - if (word_id < kSpecialTokens::num_tokens) continue; // skip id that are reserved - std::string word = py::str(p.first); - CHECK_FAIL_RETURN_UNEXPECTED(id2word.find(word_id) == id2word.end(), "duplicate id:" + word); - id2word[word_id] = word; - } - - WordIdType cnt = kSpecialTokens::num_tokens; - for (auto p : id2word) { - CHECK_FAIL_RETURN_UNEXPECTED(p.first == cnt++, "word id needs to be continuous starting from 2"); - word2id[p.second] = p.first; + word2id[py::str(p.first)] = py::reinterpret_borrow(p.second); } - *vocab = std::make_shared(std::move(word2id)); return Status::OK(); } -const std::vector Vocab::reserved_token_str_ = {"", ""}; + +void Vocab::append_word(const std::string &word) { + if (word2id_.find(word) == word2id_.end()) { + word2id_[word] = word2id_.size(); + } +} } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/dataset/text/vocab.h b/mindspore/ccsrc/dataset/text/vocab.h index 3dcc88c434..fc21c380a2 100644 --- a/mindspore/ccsrc/dataset/text/vocab.h +++ b/mindspore/ccsrc/dataset/text/vocab.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef DATASET_NLP_VOCAB_H_ -#define DATASET_NLP_VOCAB_H_ +#ifndef DATASET_TEXT_VOCAB_H_ +#define DATASET_TEXT_VOCAB_H_ #include #include @@ -45,7 +45,8 @@ class Vocab { // @param const py::list &words - a list of string, used to build vocab, id starts from 2 // @param std::shared_ptr *vocab - return value, vocab object // @return error code - static Status BuildFromPyList(const py::list &words, std::shared_ptr *vocab); + static Status BuildFromPyList(const py::list &words, const py::list &special_tokens, bool prepend_special, + std::shared_ptr *vocab); // Build a vocab from reading a vocab file, id are automatically assigned, start from 2 // @param std::string &path - path to vocab file , each line is assumed to contain 1 word @@ -54,7 +55,7 @@ class Vocab { // @param std::shared_ptr *vocab - return value, vocab object // @return error code static Status BuildFromFile(const std::string &path, const std::string &delimiter, int32_t vocab_size, - std::shared_ptr *vocab); + const py::list &special_tokens, bool prepend_special, std::shared_ptr *vocab); // Lookup the id of a word, if word doesn't exist in vocab, return default_id // @param const WordType word - word to look up @@ -65,26 +66,26 @@ class Vocab { // reverse lookup, lookup the word based on its id // @param WordIdType id - word id to lookup to // @return WordType the word - WordType Lookup(WordIdType id) const; + WordType Lookup(WordIdType id); // constructor, shouldn't be called directly, can't be private due to std::make_unique() // @param std::unordered_map map - sanitized word2id map explicit Vocab(std::unordered_map map); - ~Vocab() = default; + Vocab() = default; - // enum type that holds all special tokens, add more if needed - enum kSpecialTokens : WordIdType { pad = 0, unk = 1, num_tokens = 2 }; + // add one word to vocab, increment it's index automatically + // @param std::string & word - word to be added will skip if word already exists + void append_word(const std::string &word); - // reversed lookup table for the reserved tokens - static const std::vector reserved_token_str_; + // destructor + ~Vocab() = default; private: std::unordered_map word2id_; - std::vector id2word_; // reverse lookup }; } // namespace dataset } // namespace mindspore -#endif // DATASET_NLP_VOCAB_H_ +#endif // DATASET_TEXT_VOCAB_H_ diff --git a/mindspore/ccsrc/dataset/util/README.md b/mindspore/ccsrc/dataset/util/README.md index f62d77d1df..7cad3c0d7d 100644 --- a/mindspore/ccsrc/dataset/util/README.md +++ b/mindspore/ccsrc/dataset/util/README.md @@ -1,72 +1,426 @@ -# Event -The header file WaitPost.h contains the implementation of an event which is a type of synchronization mechanism that is used to indicate to waiting processes when a particular condition has become true. +This folder contains miscellaneous utilities used by the dataset code. We will describe a couple important classes in this file. +## Thread Management +This picture summarizes a few important classes that we will cover in the next few sections. -An event is created with initial state set to false. It provides the following operations: -* `wait` - causes the suspension of the executing process until the state of the event is set to true. If the state is already set to true has no effect. -* `set` - sets the event's state to true, releasing all waiting processes. -* `clear` - sets the event's state to false. +![Thread management](https://images.gitee.com/uploads/images/2020/0601/220111_9b07c8fa_7342120.jpeg "task_manager.JPG") -# Counting Semaphore -The header file Semaphore.h contains the implementation of counting semaphore. Conceptually, a semaphore is a nonnegative integer count. Semaphores are typically used to coordinate access to resources, with the semaphore count initialized to the number of free resources. Threads then atomically increment the count when resources are added and atomically decrement the count when resources are removed. +## Task +A Task object corresponds to an instance of std::future returning from std::async. In general, a user will not create a Task object directly. Most work will go through TaskManager's TaskGroup interface which we will cover later in this document. Here are some important members and functions of Task class. +```cpp +std::function fnc_obj_; +``` +It is the entry function when the thead is spawned. The function does not take any input and will return a Status object. The returned Status object will be saved in this member +```cpp +Status rc_; +``` +To retrieve the executed result from the entry function, call the following function +```cpp +Status Task::GetTaskErrorIfAny(); +``` +Here is roughly the pseudo code of a lifetime of a Task. Some extra works needed to spawn the thread are omitted for the purpose of simplicity. As mentioned previously, a user never spawn a thread directly using a Task class without using any helper. + +```cpp +1 Task tk = Task("A name for this thread", []() -> Status { +2 return Status::OK(); +3 }); +4 RETURN_IF_NOT_OK(tk.Run()); +5 RETURN_IF_NOT_OK(tk.Join();) +6 RETURN_IF_NOT_OK(tk.GetTaskErrorIfAny()); +``` +In the above example line 1 to 3 we use Task constructor to prepare a thread that we are going to create and what it will be running. We also assign a name to this thread. The name is for eye catcher purpose. The second parameter is the real job for this thread to run. +
Line 4 we spawn the thread. In the above example, the thread will execute the lambda function which does nothing but return a OK Status object. +
Line 5 We wait for the thread to complete +
Line 6 We retrieve the result from running the thread which should be the OK Status object. + +Another purpose of Task object is to wrap around the entry function and capture any possible exceptions thrown by running the entry function but not being caught within the entry function. +```cpp + try { + rc_ = fnc_obj_(); + } catch (const std::bad_alloc &e) { + rc_ = Status(StatusCode::kOutOfMemory, __LINE__, __FILE__, e.what()); + } catch (const std::exception &e) { + rc_ = Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, e.what()); + } +``` +Note that +```cpp +Status Task::Run(); +``` +is not returning the Status of running the entry function func_obj_. It merely indicates if the spawn is successful or not. This function returns immediately. + +Another thing to point out that Task::Run() is not designed to re-run the thread repeatedly, say after it has returned. Result will be unexpected if a Task object is re-run. + +For the function +```cpp +Status Task::Join(WaitFlag wf = WaitFlag::kBlocking); +``` +where +```cpp +enum class WaitFlag : int { kBlocking, kNonBlocking }; +``` +is also not returning the Status of running the entry function func_obj_ like the function Run(). It can return some other unexpected error while waiting for the thread to return. + +This function blocks (kBlocking) by default until the spawned thread returns. + +As mentioned previously, use the function GetTaskErrorIfAny() to fetch the result from running the entry function func_obj_. + +The non-blocking version (kNonBlocking) of Join allows us to force the thread to return if timed out. + +```cpp +while (thrd_.wait_for(std::chrono::seconds(1)) != std::future_status::ready) { + // Do something if the thread is blocked on a conditional variable +} +``` +The main use of this form of Join() is after we have interrupted the thread. + +A design alternative is to use +```cpp +std::future +``` +to spawn the thread asynchronously and we can get the result using std::future::get(). But get() can only be called once and it is then more convenient to save the returned result in the rc_ member for unlimited number of retrieval. As we shall see later, the value of rc_ will be propagated to high level classes like TaskGroup, master thread. + +Currently it is how the thread is defined in Task class +```cpp +std::future thrd_; +``` +and spawned by this line of code. +```cpp +thrd_ = std::async(std::launch::async, std::ref(*this)); +``` +Every thread can access its own Task object using the FindMe() function. +```cpp +Task * TaskManager::FindMe(); +``` + +There are other attributes of Task such as interrupt which we will cover later in this document. + +## TaskGroup +The first helper in managing Task objects is TaskGroup. Technically speaking a TaskGroup is a collection of related Tasks. As of this writing, every Task must belong to a TaskGroup. We spawn a thread using the following function +```cpp +Status TaskGroup::CreateAsyncTask(const std::string &my_name, const std::function &f, Task **pTask = nullptr); +``` +The created Task object is added to the TaskGroup object. In many cases, user do not need to get a reference to the newly created Task object. But the CreateAsyncTask can return one if requested. + +There is no other way to add a Task object to a TaskGroup other than by calling TaskGroup::CreateAsyncTask. As a result, no Task object can belong to multiple TaskGroup's by design. Every Task object has a back pointer to the TaskGroup it belongs to : +```cpp +TaskGroup *Task::MyTaskGroup(); +``` + +Task objects in the same TaskGroup will form a linked list with newly created Task object appended to the end of the list. + +Globally we support multiple TaskGroups's running concurrently. TaskManager (discussed in the next section) will chain all Task objects from all TaskGroup's in a single LRU linked list. + +###### HandShaking +As of this writing, the following handshaking logic is required. Suppose a thread T1 create another thread, say T2 by calling TaskGroup::CreateAsyncTask. T1 will block on a WaitPost area until T2 post back signalling T1 can resume. +```cpp +// Entry logic of T2 +auto *myTask = TaskManager::FindMe(); +myTask->Post(); +``` +If T2 is going to spawn more threads, say T3 and T4, it is *highly recommended* that T2 wait for T3 and T4 to post before it posts back to T1. + +The purpose of the handshake is to provide a way for T2 to synchronize with T1 if necessary. + +TaskGroup provides similar functions as Task but at a group level. +```cpp +void TaskGroup::interrupt_all() noexcept; +``` +This interrupt all the threads currently running in the TaskGroup. The function returns immediately. We will cover more details on the mechanism of interrupt later in this document. +```cpp +Status TaskGroup::join_all(Task::WaitFlag wf = Task::WaitFlag::kBlocking); +``` +This performs Task::Join() on all the threads in the group. This is a blocking call by default. +```cpp +Status TaskGroup::GetTaskErrorIfAny(); +``` +A TaskGroup does not save records for all the Task::rc_ for all the threads in this group. Only the first error is saved. For example, if thread T1 reports error rc1 and later on T2 reports error rc2, only rc1 is saved in the TaskGroup and rc2 is ignored. TaskGroup::GetTaskErrorIfAny() will return rc1 in this case. +```cpp +int size() const noexcept; +``` +This returns the size of the TaskGroup. + +## TaskManager +TaskManager is a singleton, meaning there is only one such class object. It is created by another Services singleton object which we will cover it in the later section. +```cpp +TaskManager &TaskManager::GetInstance() +``` +provides the method to access the singleton. -When the semaphore count becomes zero, indicating that no more resources are present, threads trying to decrement the semaphore block wait until the count becomes greater than zero. +TaskManager manages all the TaskGroups and all the Tasks objects ever created. +```cpp + List lru_; + List free_lst_; + std::set grp_list_; +``` +As mentioned previously, all the Tasks in the same TaskGroup are linked in a linked list local to this TaskGroup. At the TaskManager level, all Task objects from all the TaskGroups are linked in the lru_ list. -Two operations are provided -* `P`(). Decrement the semaphore count. If the count is 0, the current thread is blocked. -* `V`(). Increment the semaphore count. Wake up one of the threads that are currently blocked. Note that the current implementation wakes up one of the blocked threads instead of waking up all of them. +When a thread finished its job and returned, its corresponding Task object is saved for reuse in the free_lst_. When a new thread is created, TaskManager will first look into the free_lst_ before allocating memory for the new Task object. -# List -It is a doubly linked structure used solely by Buffer Manager. List can used for general purpose. The reason we use a home grown linked list because Buffer Manager manages several linked lists and an element can simultaneously in more than one list. Using STL C++ container is not as efficient as the home grown linked list. +```cpp + std::shared_ptr master_; +``` +The master thread itself also has a corresponding **fake** Task object in the TaskManager singleton object. But this fake Task is not in any of the List -# Consumer/Producer Queue -The header file Queue.h contains a generic implementation of producer/consumer queue. The problem describes two processes, the producer and the consumer, who share a common, fixed-size buffer used as a queue. The producer's job is to generate data, put it into the buffer, and start again. At the same time, the consumer is consuming the data (i.e., removing it from the buffer), one piece at a time. +###### Passing error to the master thread +```cpp +void TaskManager::InterruptGroup(Task &); +void TaskManager::InterruptMaster(const Status &); +Status Status::GetMasterThreadRc(); +``` +When a thread encounters some unexpected error, it performs the following actions before returning +* It saves the error rc in the TaskGroup it belongs (assuming it is the first error reported in the TaskGroup). +* It interrupts every other threads in the TaskGroup by calling TaskManager::InterruptGroup. +* It interrupts the master thread and copy the error rc to the TaskManager::master_::rc_ by calling TaskManager::InterruptMaster(rc). However, because there can be many TaskGroups running in parallel or back to back, if the TaskManager::master_::rc_ is already set to some error from earlier TaskGroup run but not yet retrieved, the old error code will **not** be overwritten by the new error code. -It has the following template signature +Master thread can query the result using TaskGroup::GetTaskErrorIfAny or TaskManager::GetMasterThreadRc. The first form is the *preferred* method. For the second form, TaskManager::master_::rc_ will be reset to OK() once retrieved such that future call of TaskManager::InterruptMaster() will populate the error to the master thread again. + +###### WatchDog +TaskManager will spawn an additional thread with "Watchdog" as name catcher. It executes the following function once startup +```cpp +Status TaskManager::WatchDog() { + TaskManager::FindMe()->Post(); + errno_t err = sem_wait(&sem_); + if (err == -1) { + RETURN_STATUS_UNEXPECTED("Errno = " + std::to_string(errno)); + } + // We are woken up by control-c and we are going to stop all threads that are running. + // In addition, we also want to prevent new thread from creating. This can be done + // easily by calling the parent function. + RETURN_IF_NOT_OK(ServiceStop()); + return Status::OK(); +} ``` - template - class Queue { +Its main purpose is to handle Control-C and stop all the threads from running by interrupting all of them. We will cover more on the function call ServiceStop() when we reach the section about Service class. + +WatchDog has its own TaskGroup to follow the protocol but it is not in the set of all the TaskGroup. +## Interrupt +C++ std::thread and std::async do not provide a way to stop a thread. So we implement interrupt mechanism to stop a thread from running and exit. + +The initial design can be considered as a polling method. A bit or a flag may be set in some global shared area. The running thread will periodically check this bit/flag. If it is set, interrupt has been sent and the thread will quit. This method has a requirement that even if the thread is waiting on a std::conditional_variable, it can't do an unconditional wait() call. That is, it must do a wait_for() with a time out. Once returned from the wait_for() call, the thread must check if it is woken up due to time out or due to the condition is satisfied. + +The cons of this approach is the performance cost and we design a pushing method approach. + +To begin with we define an abstract class that describe objects that are interruptible. + +```cpp +class IntrpResource { ... }; +``` +It has two states: +```cpp + enum class State : int { kRunning, kInterrupted }; ``` -_SIZE_ is the capacity of the queue. -_T_ is the object class that represents the data that are produced and consumed by the producer and consumer respectively. +either it is in the state of running or being interrupted. +There are two virtual functions that any class inherit can override +```cpp +virtual Status Interrupt(); +virtual void ResetIntrpState(); +``` +Interrupt() in the base class change the state of the object to kInterrupted. ResetIntrpState() is doing the opposite to reset the state. Any class that inherits the base class can implement its own Interrupt(), for example, we will later on see how a CondVar class (a wrapper for std::condition_variable) deals with interrupt on its own. -Initially the Queue is empty and all consumers are blocked. +All related IntrpResource can register to a +```cpp +class IntrpService {...} +``` +It provides the public method +```cpp + void InterruptAll() noexcept; +``` +which goes through all registered IntrpResource objects and call the corresponding Interrupt(). -The implementation of Queue is based on counting semaphore above. +A IntrpResource is always associated with a TaskGroup: +```cpp +class TaskGroup { + ... + std::shared_ptr intrp_svc_; + ... +}; +``` -The following operations are provided -* void `push_back`(const T&) used by producer to add data to the queue. -* T `pop_front`() used by consumer to retrieve the data from the queue. +As of this writing, both push and poll methods are used. There are still a few places (e.g. a busy while loop) where a thread must periodically check for interrupt. +## CondVar +A CondVar class is a wrapper of std::condition_variable +```cpp + std::condition_variable cv_; +``` +and is interruptible : +```cpp +class CondVar : public IntrpResource { ... } +``` +It overrides the Interrupt() method with its own +```cpp +void CondVar::Interrupt() { + IntrpResource::Interrupt(); + cv_.notify_all(); +} +``` +It provides a Wait() method and is equivalent to std::condition_variable::wait. +```cpp +Status Wait(std::unique_lock *lck, const std::function &pred); +``` +The main difference is Wait() is interruptible. Thread returning from Wait must check Status return code if it is being interrupted. -# Memory Pool -Two different kinds of memory pools are provided. While they behave differently, they have identical interfaces -* void * `allocate`(size_t reqSize). It allocates memory from the pool where reqSize is the size of memory requested -* void `deallocate`(void *p) returns the memory previously acquired by allocate pointed to by p back to the memory pool -* void `Reallocate`(void **pp, size_t oldSize, size_t newSize). Enlarge or shrink the memory acquired previously by allocate to the new size. The old pointer is passed in and a new pointer (or maybe the same ond) is returned. +Note that once a CondVar is interrupted, its state remains interrupted until it is reset. +## WaitPost +A WaitPost is an implementation of Event. In brief, it consists of a boolean state and provides methods to synchronize running threads. +* Wait(). If the boolean state is false, the calling threads will block until the boolean state becomes true or an interrupt has occurred. +* Set(). Change the boolean state to true. All blocking threads will be released. +* Clear(). Reset the boolean state back to false. -C++ operator **new** and **delete** are also overloaded to make use of the customized memory pools. +WaitPost is implemented on top of CondVar and hence is interruptible, that is, caller of +```cpp +Status Wait(); +``` +must check the return Status for interrupt. -Both functions allocate and deallocate can throw `std::bad_alloc` if running out of memory from the arena. It is user's responsibility to catch the out of memory exception. +The initial boolean state is false when a WaitPost object is created. Note that once a Set() call is invoked, the boolean state remains true until it is reset. +## List +A List is the implementation of doubly linked list. It is not thread safe and so user must provide methods to serialize the access to the list. -An allocator header file Allocator.h is created to provided additional support to hook into the C++ STL container such as vector or map to allocate memory from the customized memory pools. +The main feature of List is it allows an element to be inserted into multiple Lists. Take the Task class as an example. It can be in its TaskGroup list and at the same time linked in the global TaskManager task list. When a Task is done, it will be in the free list. +```cpp +class Task { + ... + Node node; + Node group; + Node free; + ... +}; +class TaskGroup { + ... + List grp_list_; + ... +}; +class TaskManager { + ... + List lru_; + List free_lst_; + ... +}; +``` +where Node is defined as +```cpp +template +struct Node { + using value_type = T; + using pointer = T *; + pointer prev; + pointer next; -## BuddyArena -The first kind of memory pool is BuddyArena. The corresponding header file is BuddyArena.h. + Node() { + prev = nullptr; + next = nullptr; + } +}; +``` +The constructor List class will take Node<> as input so it will follow this Node element to form a doubly linked chain. For example, List lru_ takes Task::node in its constructor while TaskGroup::grp_list_ takes Task::group in its constructor. This way we allow a Task to appear in two distinct linked lists. + +## Queue +A Queue is a thread safe solution to producer-consumer problem. Every queue is of finite capacity and its size must be provided to the constructor of the Queue. Few methods are provided +* Add(). It appends an element to queue and will be blocked if the queue is full or an interrupt has occurred. +* EmplaceBack(). Same as an Add() but construct the element in place. +* PopFront(). Remove the first element from the queue and will be blocked if the queue is empty or an interrupt has occurred. + +Queue is implemented on top of CondVar class and hence is interruptible. So callers of the above functions must check for Status return code for interrupt. + +## Locking +C++11 does not provide any shared lock support. So we implement some simple locking classes for our own benefits. +###### SpinLock +It is a simple exclusive lock based on CAS (compared and swap). The caller repeatedly trying (and hence the name spinning) to acquire the lock until successful. It is best used when the critical section is very short. + +SpinLock is not interruptible. -BuddyArena is a general purpose arena and the constructor takes K (in unit of MB) as input. The default value is 4096 which is 4G if no value is given to the constructor. +There is helper class LockGuard to ensure the lock is released if it is acquired. -BuddyArena is implemented based on Buddy System. +###### RWLock +It is a simple Read Write Lock where the implementation favors writers. Reader will acquire the lock in S (share) mode while writer will acquire the lock in X (exclusive) mode. X mode is not compatible with S and X. S is compatible with S but not X. In addition, we also provide additional functions +* Upgrade(). Upgrade a S lock to X lock. +* Downgrade(). Downgrade a X lock to S lock. -## CircularPool -The second kind of memory pool is CircularPool. The corresponding header file is CircularPool.h. +RWLock is not interruptible. -CircularPool is built upon multiple BuddyArena. Initially there is one BuddyArena. More BuddyArena are gradually added to the memory pool as needed until it reaches the specified maximum capacity. There is no guarantee the newly added BuddyArena is contiguous. Maximum size of allocated block in CircularPool is determined by the maximum block allowed by a BuddyArena. By default the maximum capacity is 32G and each BuddyArena is 4G.. The constructor takes unit of GB as input. +Like LockGuard helper class, there are helper classes SharedLock and UniqueLock to release the lock when the lock goes out of scope. -There are one important assumption of this kind of memory pool -* Allocated memory is not kept for the whole duration of the memory pool and will be released soon. +## Treap +A Treap is the combination of BST (Binary Search Tree) and a heap. Each key is given a priority. The priority for any non-leaf node is greater than or equal to the priority of its children. + +Treap supports the following basic operations +* To search for a given key value. Standard binary search algorithm is applied, ignoring the priorities. +* To insert a new key X into the treap. Heap properties of the tree is maintained by tree rotation. +* To delete a key from a treap. Heap properties of the tree is maintained by tree rotation. + +## MemoryPool +A MemoryPool is an abstract class to allow memory blocks to be dynamically allocated from a designated memory region. Any class that implements MemoryPool must provide the following implementations. +```cpp + // Allocate a block of size n + virtual Status Allocate(size_t, void **) = 0; + + // Enlarge or shrink a block from oldSz to newSz + virtual Status Reallocate(void **, size_t old_sz, size_t new_sz) = 0; + + // Free a pointer + virtual void Deallocate(void *) = 0; +``` +There are several implementations of MemoryPool +###### Arena +Arena is a fixed size memory region which is allocated up front. Each Allocate() will sub-allocate a block from this region. -User allocates memory from the _logical_ end of the pool while allocated memory will be returned to the _logical_ head of the pool. When a new BuddyArena is added to the pool, it will become the new logical end. When a BuddyArena becomes full, the next BuddyArena (in a round robin fashion) will become the new tail. +Internally free blocks are organized into a Treap where the address of the block is the key and its block size is the priority. So the top of the tree is the biggest free block that can be found. Memory allocation is always fast and at a constant cost. Contiguous free blocks are merged into one single free block. Similar algorithm is used to enlarge a block to avoid memory copy. +The main advantage of Arena is we do not need to free individual memory block and simply free the whole region instead. +###### CircularPool +It is still an experimental class. It consists of one single Arena or multiple Arenas. To allocate memory we circle through the Arenas before new Arena is added. It has an assumption that memory is not kept for too long and will be released at some point in the future, and memory allocation strategy is based on this assumption. +## B+ tree +We also provide B+ tree support. Compared to std::map, we provide the following additional features +* Thread safe +* Concurrent insert/update/search support. + +As of this writing, no delete support has been implemented yet. +## Service +Many of the internal class inherit from a Service abstract class. A Service class simply speaking it provides service. A Service class consists of four states +```cpp +enum class STATE : int { kStartInProg = 1, kRunning, kStopInProg, kStopped }; +``` +Any class that inherits from Service class must implement the following two methods. +```cpp + virtual Status DoServiceStart() = 0; + virtual Status DoServiceStop() = 0; +``` +###### Service::ServiceStart() +This function brings up the service and moves the state to kRunning. This function is thread safe. If another thread is bringing up the same service at the same time, only one of them will drive the service up. ServiceStart() will call DoServiceStart() provided by the child class when the state reaches kStartInProg. +An example will be TaskManager which inherits from Service. Its implementation of DoServiceStart will be to spawn off the WatchDog thread. +###### Service::ServiceStop() +This function shut down the service and moves the state to kStopped. This function is thread safe. If another thread is bringing down the same service at the same time, only one of them will drive the service down. ServiceStop() will call DoServiceStop() provided by the child class when the states reaches kStopInProg. +As an example, Both TaskManager and TaskGroup during service shutdown will generates interrupts to all the threads. +###### State checking +Other important use of Service is to synchronize operations. For example, TaskGroup::CreateAsyncTask will return interrupt error if the current state of TaskGroup is not kRunning. This way we can assure no new thread is allowed to create and added to a TaskGroup while the TaskGroup is going out of scope. Without this state check, we can have Task running without its TaskGroup, and may run into situation the Task is blocked on a CondVar and not returning. +## Services +Services is a singleton and is the first and only one singleton created as a result of calling +```cpp +mindspore::dataset::GlobalInit(); +``` +The first thing Services singleton do is to create a small 16M circular memory pool. This pool is used by many important classes to ensure basic operation will not fail due to out of memory. The most important example is TaskManager. Each Task memory is allocated from this memory pool. + +The next thing Services do is to spawn another singletons in some specific orders. One of the problems of multiple singletons is we have very limited control on the order of creation and destruction of singletons. Sometimes we need to control which singleton to allocate first and which one to deallocate last. One good example is logger. Logger is usually the last one to shutdown. + +Services singleton has a requirement on the list of singletons it bring up. They must inherit the Service class. Services singleton will bring each one up by calling the corresponding ServiceStart() function. The destructor of Services singleton will call ServiceStop() to bring down these singletons. TaskManager is a good example. It is invoked by Services singleton. + +Services singleton also provide other useful services like +* return the current hostname +* return the current username +* generate a random string + +## Path +Path class provides many operating system specific functions to shield the user to write functions for different platforms. As of this writing, the following functions are provided. +```cpp + bool Exists(); + bool IsDirectory(); + Status CreateDirectory(); + Status CreateDirectories(); + std::string Extension() const; + std::string ParentPath(); +``` +Simple "/" operators are also provided to allow folders and/or files to be concatenated and work on all platforms including Windows. diff --git a/mindspore/ccsrc/dataset/util/auto_index.h b/mindspore/ccsrc/dataset/util/auto_index.h index 2b4c2d6883..11a2e90b00 100644 --- a/mindspore/ccsrc/dataset/util/auto_index.h +++ b/mindspore/ccsrc/dataset/util/auto_index.h @@ -48,7 +48,7 @@ class AutoIndexObj : public BPlusTree { // @return Status insert(const value_type &val, key_type *key = nullptr) { key_type my_inx = inx_.fetch_add(1); - if (key) { + if (key != nullptr) { *key = my_inx; } return my_tree::DoInsert(my_inx, val); diff --git a/mindspore/ccsrc/dataset/util/btree.h b/mindspore/ccsrc/dataset/util/btree.h index df7cb8516f..ccf642e366 100644 --- a/mindspore/ccsrc/dataset/util/btree.h +++ b/mindspore/ccsrc/dataset/util/btree.h @@ -40,8 +40,6 @@ struct BPlusTreeTraits { static constexpr slot_type kLeafSlots = 256; // Number of slots in each inner node of the tree static constexpr slot_type kInnerSlots = 128; - // If kAppendMode is true, we will split high instead of 50/50 split - static constexpr bool kAppendMode = false; }; /// Implementation of B+ tree @@ -123,19 +121,14 @@ class BPlusTree { std::unique_ptr DoUpdate(const key_type &key, const value_type &new_value); std::unique_ptr DoUpdate(const key_type &key, std::unique_ptr &&new_value); - void PopulateNumKeys(); - - key_type KeyAtPos(uint64_t inx); - // Statistics struct tree_stats { std::atomic size_; uint32_t leaves_; uint32_t inner_nodes_; uint32_t level_; - bool num_keys_array_valid_; - tree_stats() : size_(0), leaves_(0), inner_nodes_(0), level_(0), num_keys_array_valid_(false) {} + tree_stats() : size_(0), leaves_(0), inner_nodes_(0), level_(0) {} }; private: @@ -160,10 +153,6 @@ class BPlusTree { Node lru_; }; - uint64_t PopulateNumKeys(BaseNode *n); - - key_type KeyAtPos(BaseNode *n, uint64_t inx); - // This control block keeps track of all the nodes we traverse on insert. // To maximize concurrency, internal nodes are latched S. If a node split // is required, we must releases all the latches and redo it again and change @@ -255,7 +244,6 @@ class BPlusTree { slot_type slot_dir_[traits::kInnerSlots] = {0}; key_type keys_[traits::kInnerSlots] = {0}; BaseNode *data_[traits::kInnerSlots + 1] = {nullptr}; - uint64_t num_keys_[traits::kInnerSlots + 1] = {0}; slot_type slotuse_; }; @@ -391,7 +379,6 @@ class BPlusTree { Iterator operator--(int); bool operator==(const Iterator &x) const { return (x.cur_ == cur_) && (x.slot_ == slot_); } - bool operator!=(const Iterator &x) const { return (x.cur_ != cur_) || (x.slot_ != slot_); } private: @@ -441,7 +428,6 @@ class BPlusTree { ConstIterator operator--(int); bool operator==(const ConstIterator &x) const { return (x.cur_ == cur_) && (x.slot_ == slot_); } - bool operator!=(const ConstIterator &x) const { return (x.cur_ != cur_) || (x.slot_ != slot_); } private: @@ -451,20 +437,17 @@ class BPlusTree { }; Iterator begin(); - Iterator end(); ConstIterator begin() const; - ConstIterator end() const; ConstIterator cbegin() const; - ConstIterator cend() const; // Locate the entry with key - ConstIterator Search(const key_type &key) const; - Iterator Search(const key_type &key); + std::pair Search(const key_type &key) const; + std::pair Search(const key_type &key); value_type operator[](key_type key); }; diff --git a/mindspore/ccsrc/dataset/util/btree_impl.tpp b/mindspore/ccsrc/dataset/util/btree_impl.tpp index 63117a0097..8148a8d12c 100644 --- a/mindspore/ccsrc/dataset/util/btree_impl.tpp +++ b/mindspore/ccsrc/dataset/util/btree_impl.tpp @@ -23,41 +23,39 @@ template typename BPlusTree::IndexRc BPlusTree::InnerNode::Sort() { // Build an inverse map. Basically it means keys[i] should be relocated to keys[inverse[i]]; slot_allocator alloc(this->alloc_); - slot_type *inverse = nullptr; try { - inverse = alloc.allocate(traits::kInnerSlots); + // We use a unique_ptr will custom deleter to ensure the memory will be released when this + // function returns. + std::unique_ptr> memGuard( + alloc.allocate(traits::kInnerSlots), [&alloc](slot_type *p) { alloc.deallocate(p, traits::kInnerSlots); }); + slot_type *inverse = memGuard.get(); + for (slot_type i = 0; i < slotuse_; i++) { + inverse[slot_dir_[i]] = i; + } + for (slot_type i = 0; i < slotuse_; i++) { + while (inverse[i] != i) { + slot_type j = inverse[i]; + slot_type k = inverse[j]; + // Swap the key + std::swap(keys_[j], keys_[i]); + // Swap the pointers. + if ((j + 1) >= traits::kInnerSlots + 1 || (i + 1) >= traits::kInnerSlots + 1) { + return IndexRc::kUnexpectedError; + } + std::swap(data_[j + 1], data_[i + 1]); + // one key in order. + inverse[j] = j; + // continue to move + inverse[i] = k; + } + slot_dir_[i] = i; + } + return IndexRc::kOk; } catch (std::bad_alloc &e) { return IndexRc::kOutOfMemory; } catch (std::exception &e) { return IndexRc::kUnexpectedError; } - - for (slot_type i = 0; i < slotuse_; i++) { - inverse[slot_dir_[i]] = i; - } - for (slot_type i = 0; i < slotuse_; i++) { - while (inverse[i] != i) { - slot_type j = inverse[i]; - slot_type k = inverse[j]; - // Swap the key - std::swap(keys_[j], keys_[i]); - // Swap the pointers. - if ((j + 1) >= traits::kInnerSlots + 1 || (i + 1) >= traits::kInnerSlots + 1) { - return IndexRc::kUnexpectedError; - } - std::swap(data_[j + 1], data_[i + 1]); - // one key in order. - inverse[j] = j; - // continue to move - inverse[i] = k; - } - slot_dir_[i] = i; - } - if (inverse != nullptr) { - alloc.deallocate(inverse, traits::kInnerSlots); - inverse = nullptr; - } - return IndexRc::kOk; } template @@ -117,41 +115,39 @@ template typename BPlusTree::IndexRc BPlusTree::LeafNode::Sort() { // Build an inverse map. Basically it means keys[i] should be relocated to keys[inverse[i]]; slot_allocator alloc(this->alloc_); - slot_type *inverse = nullptr; try { - inverse = alloc.allocate(traits::kLeafSlots); + // We use a unique_ptr will custom deleter to ensure the memory will be released when this + // function returns. + std::unique_ptr> memGuard( + alloc.allocate(traits::kLeafSlots), [&alloc](slot_type *p) { alloc.deallocate(p, traits::kLeafSlots); }); + slot_type *inverse = memGuard.get(); + for (slot_type i = 0; i < slotuse_; i++) { + inverse[slot_dir_[i]] = i; + } + for (slot_type i = 0; i < slotuse_; i++) { + while (inverse[i] != i) { + slot_type j = inverse[i]; + slot_type k = inverse[j]; + // Swap the key + if (j >= traits::kLeafSlots || i >= traits::kLeafSlots) { + return IndexRc::kUnexpectedError; + } + std::swap(keys_[j], keys_[i]); + // Swap the shared pointers + std::swap(data_[j], data_[i]); + // one key in order. + inverse[j] = j; + // continue to move + inverse[i] = k; + } + slot_dir_[i] = i; + } + return IndexRc::kOk; } catch (std::bad_alloc &e) { return IndexRc::kOutOfMemory; } catch (std::exception &e) { return IndexRc::kUnexpectedError; } - - for (slot_type i = 0; i < slotuse_; i++) { - inverse[slot_dir_[i]] = i; - } - for (slot_type i = 0; i < slotuse_; i++) { - while (inverse[i] != i) { - slot_type j = inverse[i]; - slot_type k = inverse[j]; - // Swap the key - if (j >= traits::kLeafSlots || i >= traits::kLeafSlots) { - return IndexRc::kUnexpectedError; - } - std::swap(keys_[j], keys_[i]); - // Swap the shared pointers - std::swap(data_[j], data_[i]); - // one key in order. - inverse[j] = j; - // continue to move - inverse[i] = k; - } - slot_dir_[i] = i; - } - if (inverse != nullptr) { - alloc.deallocate(inverse, traits::kLeafSlots); - inverse = nullptr; - } - return IndexRc::kOk; } template @@ -273,26 +269,17 @@ typename BPlusTree::IndexRc BPlusTree::LeafInsertK RETURN_IF_BAD_RC(rc); leaf_nodes_.InsertAfter(node, new_leaf); *split_node = new_leaf; - if (slot == node->slotuse_ && traits::kAppendMode) { - // Split high. Good for bulk load and keys are in asending order on insert - *split_key = key; - // Just insert the new key to the new leaf. No further need to move the keys - // from one leaf to the other. - rc = new_leaf->InsertIntoSlot(nullptr, 0, key, std::move(value)); + // 50/50 split + rc = node->Split(new_leaf); + RETURN_IF_BAD_RC(rc); + *split_key = new_leaf->keys_[0]; + if (LessThan(key, *split_key)) { + rc = node->InsertIntoSlot(nullptr, slot, key, std::move(value)); RETURN_IF_BAD_RC(rc); } else { - // 50/50 split - rc = node->Split(new_leaf); + slot -= node->slotuse_; + rc = new_leaf->InsertIntoSlot(nullptr, slot, key, std::move(value)); RETURN_IF_BAD_RC(rc); - *split_key = new_leaf->keys_[0]; - if (LessThan(key, *split_key)) { - rc = node->InsertIntoSlot(nullptr, slot, key, std::move(value)); - RETURN_IF_BAD_RC(rc); - } else { - slot -= node->slotuse_; - rc = new_leaf->InsertIntoSlot(nullptr, slot, key, std::move(value)); - RETURN_IF_BAD_RC(rc); - } } } return rc; @@ -313,25 +300,18 @@ typename BPlusTree::IndexRc BPlusTree::InnerInsert rc = AllocateInner(&new_inner); RETURN_IF_BAD_RC(rc); *split_node = new_inner; - if (slot == node->slotuse_ && traits::kAppendMode) { - *split_key = key; - new_inner->data_[0] = node->data_[node->slotuse_]; - rc = new_inner->InsertIntoSlot(0, key, ptr); + rc = node->Split(new_inner, split_key); + RETURN_IF_BAD_RC(rc); + if (LessThan(key, *split_key)) { + // Need to readjust the slot position since the split key is no longer in the two children. + slot = FindSlot(node, key); + rc = node->InsertIntoSlot(slot, key, ptr); RETURN_IF_BAD_RC(rc); } else { - rc = node->Split(new_inner, split_key); + // Same reasoning as above + slot = FindSlot(new_inner, key); + rc = new_inner->InsertIntoSlot(slot, key, ptr); RETURN_IF_BAD_RC(rc); - if (LessThan(key, *split_key)) { - // Need to readjust the slot position since the split key is no longer in the two children. - slot = FindSlot(node, key); - rc = node->InsertIntoSlot(slot, key, ptr); - RETURN_IF_BAD_RC(rc); - } else { - // Same reasoning as above - slot = FindSlot(new_inner, key); - rc = new_inner->InsertIntoSlot(slot, key, ptr); - RETURN_IF_BAD_RC(rc); - } } } return rc; @@ -381,8 +361,7 @@ typename BPlusTree::IndexRc BPlusTree::InsertKeyVa } template -typename BPlusTree::IndexRc BPlusTree::Locate(RWLock *parent_lock, - bool forUpdate, +typename BPlusTree::IndexRc BPlusTree::Locate(RWLock *parent_lock, bool forUpdate, BPlusTree::BaseNode *top, const key_type &key, BPlusTree::LeafNode **ln, @@ -485,9 +464,6 @@ Status BPlusTree::DoInsert(const key_type &key, std::unique_ptr BPlusTree::DoUpdate(const key_type &key, std:: } } -template -void BPlusTree::PopulateNumKeys() { - // Start from the root and we calculate how many leaf nodes as pointed to by each inner node. - // The results are stored in the numKeys array in each inner node. - (void)PopulateNumKeys(root_); - // Indicate the result is accurate since we have the tree locked exclusive. - stats_.num_keys_array_valid_ = true; -} - -template -uint64_t BPlusTree::PopulateNumKeys(BPlusTree::BaseNode *n) { - if (n->is_leafnode()) { - auto *leaf = static_cast(n); - return leaf->slotuse_; - } else { - auto *inner = static_cast(n); - uint64_t num_keys = 0; - for (auto i = 0; i < inner->slotuse_ + 1; i++) { - inner->num_keys_[i] = PopulateNumKeys(inner->data_[i]); - num_keys += inner->num_keys_[i]; - } - return num_keys; - } -} - -template -typename BPlusTree::key_type BPlusTree::KeyAtPos(uint64_t inx) { - if (stats_.num_keys_array_valid_ == false) { - // We need exclusive access to the tree. If concurrent insert is going on, it is hard to get accurate numbers - UniqueLock lck(&rw_lock_); - // Check again. - if (stats_.num_keys_array_valid_ == false) { - PopulateNumKeys(); - } - } - // Now we know how many keys each inner branch contains, we can now traverse the correct node in log n time. - return KeyAtPos(root_, inx); -} - -template -typename BPlusTree::key_type BPlusTree::KeyAtPos(BPlusTree::BaseNode *n, - uint64_t inx) { - if (n->is_leafnode()) { - auto *leaf = static_cast(n); - return leaf->keys_[leaf->slot_dir_[inx]]; - } else { - auto *inner = static_cast(n); - if ((inx + 1) > inner->num_keys_[0]) { - inx -= inner->num_keys_[0]; - } else { - return KeyAtPos(inner->data_[0], inx); - } - for (auto i = 0; i < inner->slotuse_; i++) { - if ((inx + 1) > inner->num_keys_[inner->slot_dir_[i] + 1]) { - inx -= inner->num_keys_[inner->slot_dir_[i] + 1]; - } else { - return KeyAtPos(inner->data_[inner->slot_dir_[i] + 1], inx); - } - } - } - // If we get here, inx is way too big. Instead of throwing exception, we will just return the default value - // of key_type whatever it is. - return key_type(); -} } // namespace dataset } // namespace mindspore #endif diff --git a/mindspore/ccsrc/dataset/util/btree_iterator.tpp b/mindspore/ccsrc/dataset/util/btree_iterator.tpp index ef3a47f176..91ba2acd7a 100644 --- a/mindspore/ccsrc/dataset/util/btree_iterator.tpp +++ b/mindspore/ccsrc/dataset/util/btree_iterator.tpp @@ -286,7 +286,8 @@ typename BPlusTree::ConstIterator &BPlusTree::Cons } template -typename BPlusTree::ConstIterator BPlusTree::Search(const key_type &key) const { +std::pair::ConstIterator, bool> BPlusTree::Search( + const key_type &key) const { if (root_ != nullptr) { LeafNode *leaf = nullptr; slot_type slot; @@ -294,21 +295,15 @@ typename BPlusTree::ConstIterator BPlusTree::Searc // Lock the tree in S, pass the lock to Locate which will unlock it for us underneath. myLock->LockShared(); IndexRc rc = Locate(myLock, false, root_, key, &leaf, &slot); - if (rc == IndexRc::kOk) { - // All locks from the tree to the parent of leaf are all gone. We still have a S lock - // on the leaf. The unlock will be handled by the iterator when it goes out of scope. - return ConstIterator(leaf, slot, true); - } else { - MS_LOG(DEBUG) << "Key not found. rc = " << static_cast(rc) << "."; - return cend(); - } + bool find = (rc == IndexRc::kOk); + return std::make_pair(ConstIterator(leaf, slot, find), find); } else { - return cend(); + return std::make_pair(cend(), false); } } template -typename BPlusTree::Iterator BPlusTree::Search(const key_type &key) { +std::pair::Iterator, bool> BPlusTree::Search(const key_type &key) { if (root_ != nullptr) { LeafNode *leaf = nullptr; slot_type slot; @@ -316,23 +311,17 @@ typename BPlusTree::Iterator BPlusTree::Search(con // Lock the tree in S, pass the lock to Locate which will unlock it for us underneath. myLock->LockShared(); IndexRc rc = Locate(myLock, false, root_, key, &leaf, &slot); - if (rc == IndexRc::kOk) { - // All locks from the tree to the parent of leaf are all gone. We still have a S lock - // on the leaf. The unlock will be handled by the iterator when it goes out of scope. - return Iterator(leaf, slot, true); - } else { - MS_LOG(DEBUG) << "Key not found. rc = " << static_cast(rc) << "."; - return end(); - } + bool find = (rc == IndexRc::kOk); + return std::make_pair(Iterator(leaf, slot, find), find); } else { - return end(); + return std::make_pair(end(), false); } } template typename BPlusTree::value_type BPlusTree::operator[](key_type key) { - Iterator it = Search(key); - return it.value(); + auto r = Search(key); + return r.first.value(); } template diff --git a/mindspore/ccsrc/dataset/util/queue.h b/mindspore/ccsrc/dataset/util/queue.h index b97e6a5c28..9a51565861 100644 --- a/mindspore/ccsrc/dataset/util/queue.h +++ b/mindspore/ccsrc/dataset/util/queue.h @@ -230,6 +230,8 @@ class QueueList { std::unique_ptr> &operator[](const int index) { return queue_list_[index]; } + const std::unique_ptr> &operator[](const int index) const { return queue_list_[index]; } + ~QueueList() = default; private: diff --git a/mindspore/ccsrc/dataset/util/random.h b/mindspore/ccsrc/dataset/util/random.h index 6c70d6c7ef..957a4214a8 100644 --- a/mindspore/ccsrc/dataset/util/random.h +++ b/mindspore/ccsrc/dataset/util/random.h @@ -19,13 +19,16 @@ #if defined(_WIN32) || defined(_WIN64) #include #endif +#include #include #include #include #include +#include #include "dataset/core/config_manager.h" #include "dataset/core/global_context.h" +#include "utils/log_adapter.h" namespace mindspore { namespace dataset { @@ -35,6 +38,17 @@ inline std::mt19937 GetRandomDevice() { rand_s(&number); std::mt19937 random_device{static_cast(number)}; #else + int i = 0; + while (i < 5) { + try { + std::mt19937 random_device{std::random_device("/dev/urandom")()}; + return random_device; + } catch (const std::exception &e) { + MS_LOG(WARNING) << "Get std::random_device failed, retry: " << i << ", error: " << e.what(); + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + i++; + } + } std::mt19937 random_device{std::random_device("/dev/urandom")()}; #endif return random_device; diff --git a/mindspore/ccsrc/dataset/util/status.cc b/mindspore/ccsrc/dataset/util/status.cc index 84d8ee582c..27e9dfbc83 100644 --- a/mindspore/ccsrc/dataset/util/status.cc +++ b/mindspore/ccsrc/dataset/util/status.cc @@ -45,6 +45,9 @@ std::string CodeAsString(const StatusCode c) { case StatusCode::kDuplicateKey: s = "Duplicate key"; break; + case StatusCode::kProfilingError: + s = "Error encountered while profiling"; + break; case StatusCode::kUnexpectedError: default: s = "Unexpected error"; diff --git a/mindspore/ccsrc/dataset/util/status.h b/mindspore/ccsrc/dataset/util/status.h index 38ed1fef89..7a480f4239 100644 --- a/mindspore/ccsrc/dataset/util/status.h +++ b/mindspore/ccsrc/dataset/util/status.h @@ -70,6 +70,9 @@ enum class StatusCode : char { kPythonInterpreterFailure = 7, kTDTPushFailure = 8, kFileNotExist = 9, + kProfilingError = 10, + kBoundingBoxOutOfBounds = 11, + kBoundingBoxInvalidShape = 12, // Make this error code the last one. Add new error code above it. kUnexpectedError = 127 }; diff --git a/mindspore/ccsrc/dataset/util/task.cc b/mindspore/ccsrc/dataset/util/task.cc index d9e0e73243..f00f26f5ce 100644 --- a/mindspore/ccsrc/dataset/util/task.cc +++ b/mindspore/ccsrc/dataset/util/task.cc @@ -69,7 +69,7 @@ void Task::ShutdownGroup() { // Wake up watch dog and shutdown the engine. vg->rc_ = rc_; rcLock.unlock(); TaskManager::InterruptMaster(rc_); - TaskManager::InterruptGroup(*gMyTask); + TaskManager::InterruptGroup(*this); } } } diff --git a/mindspore/ccsrc/debug/anf_ir_dump.cc b/mindspore/ccsrc/debug/anf_ir_dump.cc index 1fd3096e7c..fc32e0fb5f 100644 --- a/mindspore/ccsrc/debug/anf_ir_dump.cc +++ b/mindspore/ccsrc/debug/anf_ir_dump.cc @@ -111,9 +111,15 @@ void DumpGlobalInfoEntry(const FuncGraphPtr &graph, std::ostringstream &buffer) } buffer << "#IR entry : @" << graph->ToString() << "." << graph->debug_info()->get_id() << std::endl; - buffer << "#flags :" << std::endl; - for (const auto &flag : graph->flags()) { - buffer << flag.first << " : " << flag.second << std::endl; + buffer << "#attrs :" << std::endl; + for (const auto &attr : graph->attrs()) { + buffer << attr.first << " : "; + if (attr.second->isa()) { + buffer << GetValue(attr.second); + } else if (attr.second->isa()) { + buffer << GetValue(attr.second); + } + buffer << std::endl; } } @@ -417,10 +423,16 @@ void DumpSubgraph(const OrderedMap fout << std::endl; for (const auto &sg : *sub_graphs) { - fout << "subgraph flag:" << std::endl; + fout << "subgraph attr:" << std::endl; MS_EXCEPTION_IF_NULL(sg.first); - for (const auto &flag : sg.first->flags()) { - fout << flag.first << " : " << flag.second << std::endl; + for (const auto &attr : sg.first->attrs()) { + fout << attr.first << " : "; + if (attr.second->isa()) { + fout << GetValue(attr.second); + } else if (attr.second->isa()) { + fout << GetValue(attr.second); + } + fout << std::endl; } fout << "subgraph @" << sg.first->ToString() << "."; fout << sg.first->debug_info()->get_id() << "("; diff --git a/mindspore/ccsrc/debug/anf_ir_utils.cc b/mindspore/ccsrc/debug/anf_ir_utils.cc index 274cd43914..2b8e61ab15 100644 --- a/mindspore/ccsrc/debug/anf_ir_utils.cc +++ b/mindspore/ccsrc/debug/anf_ir_utils.cc @@ -30,6 +30,7 @@ #include "pipeline/parse/python_adapter.h" #include "pipeline/parse/resolve.h" #include "operator/composite/composite.h" +#include "operator/composite/map.h" #include "utils/ordered_map.h" #include "utils/ordered_set.h" #include "utils/utils.h" @@ -190,6 +191,8 @@ std::string AnfExporter::GetMultitypeFuncGraphText(const prim::MultitypeFuncGrap * ├── MultitypeGraph * ├── HyperMap * │ └── HyperMapPy + * ├── Map + * │ └── MapPy * ├── Tail * ├── MakeTupleGradient * ├── GradOperation @@ -208,17 +211,25 @@ std::string AnfExporter::GetMetaFuncGraphText(const MetaFuncGraphPtr &meta_func_ oss << GetMultitypeFuncGraphText(mt_func_graph); } else if (meta_func_graph ->isa()) { // this statement must before 'meta_graph->isa()' - prim::HyperMapPyPtr hyper_map = meta_func_graph->cast(); - MS_EXCEPTION_IF_NULL(hyper_map); + auto hyper_map = meta_func_graph->cast(); if (hyper_map->GetFnLeaf() != nullptr) { oss << "{fn_leaf=" << GetMetaFuncGraphText(hyper_map->GetFnLeaf()) << "}"; } } else if (meta_func_graph->isa()) { - prim::HyperMapPtr hyper_map = meta_func_graph->cast(); - MS_EXCEPTION_IF_NULL(hyper_map); + auto hyper_map = meta_func_graph->cast(); if (hyper_map->GetFnLeaf() != nullptr) { oss << "{fn_leaf=" << GetMetaFuncGraphText(hyper_map->GetFnLeaf()) << "}"; } + } else if (meta_func_graph->isa()) { // this statement must before 'meta_graph->isa()' + auto map = meta_func_graph->cast(); + if (map->GetFnLeaf() != nullptr) { + oss << "{fn_leaf=" << GetMetaFuncGraphText(map->GetFnLeaf()) << "}"; + } + } else if (meta_func_graph->isa()) { + auto map = meta_func_graph->cast(); + if (map->GetFnLeaf() != nullptr) { + oss << "{fn_leaf=" << GetMetaFuncGraphText(map->GetFnLeaf()) << "}"; + } } else if (meta_func_graph->isa()) { prim::GradOperationPtr grad_op = meta_func_graph->cast(); oss << "{get_all=" << grad_op->get_all_ << ", get_by_list=" << grad_op->get_by_list_ @@ -1555,7 +1566,7 @@ class IrParser { return lexer_.GetNextToken(); } else if (type == "Tuple") { return ParseTypeVector(func_graph, lexer_.GetNextToken(), type, ptr); - } else if (type == "Array") { + } else if (type == "Tensor") { return ParseTypeArray(func_graph, lexer_.GetNextToken(), ptr); } else if (type == "List") { return ParseTypeVector(func_graph, lexer_.GetNextToken(), type, ptr); @@ -1971,7 +1982,11 @@ class IrParser { MS_LOG(EXCEPTION) << "Cast to type 'PrimitivePyPtr' error"; } } else { - ptr = std::make_shared(id.substr(strlen("PrimitivePy::")), py_obj); + auto len = strlen("PrimitivePy::"); + if (id.size() < len) { + return TOK_ERROR; + } + ptr = std::make_shared(id.substr(len), py_obj); } *val_ptr = ptr; @@ -1988,7 +2003,7 @@ class IrParser { return next; } - Token ParseValueGraphAndNamespace(const std::string &id, ValuePtr *val_ptr) { + Token ParseValueGraphAndNamespace(const std::string &id, ValuePtr *const val_ptr) { if (Match(id, "MultitypeFuncGraph::")) { std::string name = id.substr(strlen("MultitypeFuncGraph::")); auto mt_func_graph = std::make_shared(name); @@ -2028,7 +2043,7 @@ class IrParser { } } - Token ParseValueBasic(const FuncGraphPtr &func_graph, const std::string &id, ValuePtr *val_ptr, + Token ParseValueBasic(const FuncGraphPtr &func_graph, const std::string &id, ValuePtr *const val_ptr, AnfNodePtr *const node_ptr = nullptr) { if (id == "None") { *val_ptr = std::make_shared(); diff --git a/mindspore/ccsrc/debug/anf_ir_utils.h b/mindspore/ccsrc/debug/anf_ir_utils.h index 6c8601c4af..4503692eb9 100644 --- a/mindspore/ccsrc/debug/anf_ir_utils.h +++ b/mindspore/ccsrc/debug/anf_ir_utils.h @@ -91,12 +91,12 @@ class AnfExporter { std::string GetMetaFuncGraphText(const MetaFuncGraphPtr &meta_func_graph); std::string GetAnfNodeText(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const std::map &apply_map); - void ExportOneFuncGraph(std::ofstream &ofs, const FuncGraphPtr &func_graph); + virtual void ExportOneFuncGraph(std::ofstream &ofs, const FuncGraphPtr &func_graph); void OutputParameters(std::ofstream &ofs, const std::vector ¶meters, OrderedMap *param_map); void OutputStatementComment(std::ofstream &ofs, const CNodePtr &node); - void OutputCNodes(std::ofstream &ofs, const std::vector &nodes, const FuncGraphPtr &func_graph); + virtual void OutputCNodes(std::ofstream &ofs, const std::vector &nodes, const FuncGraphPtr &func_graph); int param_index; OrderedSet func_graph_set{}; @@ -118,6 +118,8 @@ std::string GetFuncGraphProtoString(const FuncGraphPtr &func_graph); void DumpIRProto(const FuncGraphPtr &func_graph, const std::string &suffix); std::string GetOnnxProtoString(const FuncGraphPtr &func_graph); + +std::string GetBinaryProtoString(const FuncGraphPtr &func_graph); } // namespace mindspore #endif // MINDSPORE_CCSRC_DEBUG_ANF_IR_UTILS_H_ diff --git a/mindspore/ccsrc/debug/info.cc b/mindspore/ccsrc/debug/info.cc index 406bd11fab..f58522cf33 100644 --- a/mindspore/ccsrc/debug/info.cc +++ b/mindspore/ccsrc/debug/info.cc @@ -126,10 +126,10 @@ int64_t DebugInfo::debug_id() { } int64_t DebugInfo::unique_id_through_copy() const { - TraceInfoPtr trace_info = const_cast(this)->trace_info(); - if (trace_info != nullptr) { - if (trace_info->isa() && trace_info->debug_info() != nullptr) { - return trace_info->debug_info()->unique_id_through_copy(); + auto info = trace_info(); + if (info != nullptr) { + if (info->isa() && info->debug_info() != nullptr) { + return info->debug_info()->unique_id_through_copy(); } } return unique_id(); diff --git a/mindspore/ccsrc/debug/info.h b/mindspore/ccsrc/debug/info.h index 9ed216277e..c09c6031b3 100644 --- a/mindspore/ccsrc/debug/info.h +++ b/mindspore/ccsrc/debug/info.h @@ -118,7 +118,7 @@ class TraceContext { void set_location(const LocationPtr &loc) { location_ = loc; } LocationPtr location() { return location_; } void set_trace_info(const TraceInfoPtr &trace_info) { trace_info_ = trace_info; } - TraceInfoPtr trace_info() { return trace_info_; } + TraceInfoPtr trace_info() const { return trace_info_; } void set_func_name(const std::string &func_name) { func_name_ = func_name; } std::string func_name() { return func_name_; } }; @@ -139,7 +139,7 @@ class DebugInfo : public Base { std::string get_id() { return std::to_string(debug_id()); } void set_trace_info(const TraceInfoPtr &trace_info) { trace_info_ = trace_info; } - TraceInfoPtr trace_info() { return trace_info_; } + TraceInfoPtr trace_info() const { return trace_info_; } void set_location(const LocationPtr &loc) { location_ = loc; } virtual LocationPtr location() { return location_; } std::string name() { return name_; } diff --git a/mindspore/ccsrc/debug/trace.cc b/mindspore/ccsrc/debug/trace.cc index e5507a8c2b..e12a7b1209 100644 --- a/mindspore/ccsrc/debug/trace.cc +++ b/mindspore/ccsrc/debug/trace.cc @@ -37,6 +37,11 @@ namespace mindspore { // namespace to support debug trace infomation namespace trace { +using abstract::AbstractBasePtr; +using abstract::AnalysisContextPtr; +using abstract::AnalysisEnginePtr; +using abstract::AnfNodeConfigPtr; + std::string GetAbstractStr(const abstract::AbstractBasePtr &abs) { if (abs == nullptr) { return "Null Abstract"; @@ -117,8 +122,23 @@ class AnalyzedFuncGraphExporter : public AnfExporter { void ExportFuncGraph(const std::string &filename, const std::vector &node_cfgs); + void ExportOneFuncGraph(std::ofstream &ofs, const FuncGraphPtr &func_graph); + void OutputCNodes(std::ofstream &ofs, const std::vector &nodes, const FuncGraphPtr &func_graph); + void OutputCNode(std::ofstream &ofs, const CNodePtr &cnode, const FuncGraphPtr &func_graph, int *idx, + std::map *const apply_map); + private: std::string GetNodeType(const AnfNodePtr &nd) override; + AbstractBasePtr GetNodeAbstract(const AnfNodePtr &nd); + AnfNodeConfigPtr GetFordwardConfigPtr(const AnfNodeConfigPtr &cfg); + AnalysisContextPtr ProcessFuncGraphCall(const CNodePtr &node); + + // key: context, val: whether the context has already been printed + std::unordered_map context_map_; + std::vector context_vec_; + + AnalysisContextPtr cur_ctx_ = nullptr; + AnalysisEnginePtr engine_ = nullptr; }; std::unordered_map CalcTaggedFuncGraphs() { @@ -139,17 +159,20 @@ void OutputAnalyzedGraphWithType() { } std::string AnalyzedFuncGraphExporter::GetNodeType(const AnfNodePtr &node) { - if (node_cfg_ == nullptr) { + if (cur_ctx_ == nullptr) { return AnfExporter::GetNodeType(node); } - auto ctx = node_cfg_->context(); - auto engine = node_cfg_->engine(); - auto cfg = engine->MakeConfig(node, ctx); - auto eval_result = engine->cache().GetValue(cfg); - if (eval_result == nullptr || eval_result->abstract() == nullptr) { + + MS_EXCEPTION_IF_NULL(engine_); + auto cfg = engine_->MakeConfig(node, cur_ctx_); + auto ret = engine_->cache().GetValue(cfg); + if (ret == nullptr) { + return "Undefined"; + } + auto abs = ret->abstract(); + if (abs == nullptr) { return "Undefined"; } - auto abs = eval_result->abstract(); auto dtype = abs->BuildType(); auto shape = abs->BuildShape(); std::ostringstream oss; @@ -163,6 +186,176 @@ std::string AnalyzedFuncGraphExporter::GetNodeType(const AnfNodePtr &node) { return oss.str(); } +AbstractBasePtr AnalyzedFuncGraphExporter::GetNodeAbstract(const AnfNodePtr &node) { + if (cur_ctx_ == nullptr) { + return nullptr; + } + MS_EXCEPTION_IF_NULL(engine_); + auto cfg = engine_->MakeConfig(node, cur_ctx_); + auto ret = engine_->cache().GetValue(cfg); + return ret == nullptr ? nullptr : ret->abstract(); +} + +AnfNodeConfigPtr AnalyzedFuncGraphExporter::GetFordwardConfigPtr(const AnfNodeConfigPtr &cfg) { + AnfNodeConfigPtr cur_cfg = cfg; + auto iter = engine_->anfnode_config_map().find(cur_cfg); + while (iter != engine_->anfnode_config_map().end()) { + auto node = cur_cfg->node(); + cur_cfg = iter->second; + MS_LOG(DEBUG) << "Get forword node: " << node.get() << "[" << node->ToString() << "] --> " << cur_cfg->node().get() + << "[" << cur_cfg->node()->ToString() << "]"; + iter = engine_->anfnode_config_map().find(cur_cfg); + } + return cur_cfg; +} + +AnalysisContextPtr AnalyzedFuncGraphExporter::ProcessFuncGraphCall(const CNodePtr &node) { + if (node == nullptr) { + return nullptr; + } + auto cfg = engine_->MakeConfig(node, cur_ctx_); + cfg = GetFordwardConfigPtr(cfg); + auto cnode = dyn_cast(cfg->node()); + if (cnode == nullptr) { + MS_LOG(DEBUG) << "CNode is nullptr"; + return nullptr; + } + const auto &inputs = cnode->inputs(); + auto op_abs = GetNodeAbstract(inputs[0]); + if (op_abs == nullptr) { + MS_LOG(DEBUG) << "Abstract of inputs[0] of cnode " << cnode->ToString() << " is nullptr"; + return nullptr; + } + + if (!op_abs->isa() && !op_abs->isa()) { + MS_LOG(DEBUG) << "Inputs[0] of cnode " << cnode->ToString() << " is of type " << op_abs->type_name() + << ", not function, ignore it"; + return nullptr; + } + + auto evaluator = engine_->GetEvaluatorFor(dyn_cast(op_abs)); + if (!evaluator->isa()) { + MS_LOG(DEBUG) << "Evaluator for inputs[0] of cnode " << cnode->ToString() << " is of type " + << evaluator->type_name() << ", not BaseFuncGraphEvaluator, ignore it."; + return nullptr; + } + + auto base_fg_evaluator = dyn_cast(evaluator); + auto ctx = base_fg_evaluator->graph_context(); + if (ctx != nullptr && context_map_.insert({ctx, false}).second) { + MS_LOG(DEBUG) << "Add new context, ctx.addr = " << ctx.get() << "ctx = " << ctx->ToString(); + context_vec_.push_back(ctx); + } + return ctx; +} + +void AnalyzedFuncGraphExporter::OutputCNode(std::ofstream &ofs, const CNodePtr &cnode, const FuncGraphPtr &func_graph, + int *idx, std::map *const apply_map) { + auto &inputs = cnode->inputs(); + std::string op_text = GetAnfNodeText(func_graph, inputs[0], *apply_map); + // non-return node + if (cnode != func_graph->get_return()) { + int apply_idx = (*idx)++; + (*apply_map)[cnode] = apply_idx; + std::string type_info = GetNodeType(cnode); + if (type_info == "Undefined") { + ofs << " %" << apply_idx << " = " << op_text << "("; + } else { + ofs << " %" << apply_idx << " : " << type_info << " = " << op_text << "("; + } + } else { + ofs << " " << op_text << "("; + } + + for (size_t i = 1; i < inputs.size(); ++i) { + if (i != 1) { + ofs << ", "; + } + AnfNodePtr arg = inputs[i]; + ofs << GetAnfNodeText(func_graph, arg, *apply_map); + } + ofs << ")"; + + // process function graph call + auto ctx = ProcessFuncGraphCall(cnode); + + // output comment + OutputStatementComment(ofs, cnode); + if (ctx != nullptr) { + ofs << " @ctx.addr=" << ctx.get(); + } + ofs << "\n"; + + if (label_manage::GetGlobalTraceLabelType() == label_manage::TraceLabelType::kWithUniqueId) { + ofs << trace::GetDebugInfo(cnode->debug_info(), " # ", kSourceLineTipDiscard) << "#" + << label_manage::Label(cnode->debug_info()) << "\n"; + } else { + ofs << trace::GetDebugInfo(cnode->debug_info(), " # ", kSourceLineTipDiscard) << "\n"; + } +} + +void AnalyzedFuncGraphExporter::OutputCNodes(std::ofstream &ofs, const std::vector &nodes, + const FuncGraphPtr &func_graph) { + if (func_graph == nullptr) { + return; + } + + int idx = 1; + std::map apply_map; + for (const AnfNodePtr &node : nodes) { + MS_EXCEPTION_IF_NULL(node); + if (!node->isa()) { + continue; + } + + auto iter = tagged_cnodes_.find(node); + if (iter != tagged_cnodes_.end()) { + ofs << "\n#------------------------> " << iter->second << "\n"; + } + + auto cnode = node->cast(); + OutputCNode(ofs, cnode, func_graph, &idx, &apply_map); + } +} + +void AnalyzedFuncGraphExporter::ExportOneFuncGraph(std::ofstream &ofs, const FuncGraphPtr &func_graph) { + if (func_graph == nullptr) { + return; + } + + std::vector nodes = TopoSort(func_graph->get_return(), SuccIncoming, AlwaysInclude); + std::vector parameters = func_graph->parameters(); + OrderedMap param_map; + + ofs << "# [No." << (exported.size() + 1) << "] " << func_graph->DumpText() << "." + << func_graph->debug_info()->get_id(); + if (cur_ctx_ != nullptr) { + ofs << " @ctx.addr=" << cur_ctx_.get(); + } + ofs << "\n"; + if (label_manage::GetGlobalTraceLabelType() == label_manage::TraceLabelType::kWithUniqueId) { + ofs << trace::GetDebugInfo(func_graph->debug_info(), "# ", kSourceLineTipDiscard) << "#" + << label_manage::Label(func_graph->debug_info()) << "\n"; + } else { + ofs << trace::GetDebugInfo(func_graph->debug_info(), "# ", kSourceLineTipDiscard) << "\n"; + } + ofs << "funcgraph fg_" << func_graph->debug_info()->get_id(); + // output name of parent of graph if exists + if (func_graph->parent() != nullptr) { + ofs << "[fg_" << func_graph->parent()->debug_info()->get_id() << "]"; + } + ofs << "(\n"; + + OutputParameters(ofs, parameters, ¶m_map); + + exported[func_graph] = param_map; + ofs << (!parameters.empty() ? " " : "") << ") {\n"; + + OutputCNodes(ofs, nodes, func_graph); + + ofs << "}\n"; +} + void AnalyzedFuncGraphExporter::ExportFuncGraph(const std::string &filename, const std::vector &node_cfgs) { if (node_cfgs.empty()) { @@ -170,6 +363,9 @@ void AnalyzedFuncGraphExporter::ExportFuncGraph(const std::string &filename, return; } + context_map_.clear(); + context_vec_.clear(); + std::ofstream ofs(filename); if (!ofs.is_open()) { MS_LOG(ERROR) << "Open file '" << filename << "' failed!"; @@ -181,32 +377,47 @@ void AnalyzedFuncGraphExporter::ExportFuncGraph(const std::string &filename, // first output graph on the analysis stack for (const auto &node_cfg : node_cfgs) { - auto fg = node_cfg->context()->func_graph(); - // the graph is already output, skip it - if (exported.find(fg) != exported.end()) { + auto ctx = node_cfg->context(); + if (engine_ == nullptr) { + engine_ = node_cfg->engine(); + } + if (context_map_.insert({ctx, false}).second) { + context_vec_.push_back(ctx); + } + // the graph has already been printed + if (context_map_[ctx]) { continue; } - // set node_cfg info for getting type - node_cfg_ = node_cfg; + context_map_[ctx] = true; + + auto fg = ctx->func_graph(); + + // set current context + cur_ctx_ = ctx; tagged_cnodes_ = tagged_func_graphs[fg]; ExportOneFuncGraph(ofs, fg); ofs << "\n\n"; } - node_cfg_ = nullptr; tagged_cnodes_.clear(); // print seperator between function graphs on analyzed graph call stack and others ofs << "#===============================================================================\n\n\n"; // second output other graphs - while (!func_graph_set.empty()) { - FuncGraphPtr fg = *func_graph_set.begin(); - ExportOneFuncGraph(ofs, fg); + size_t ctx_idx = 0; + while (ctx_idx < context_vec_.size()) { + auto ctx = context_vec_[ctx_idx++]; + if (context_map_[ctx]) { + continue; + } + context_map_[ctx] = true; + cur_ctx_ = ctx; + ExportOneFuncGraph(ofs, ctx->func_graph()); ofs << "\n\n"; - (void)func_graph_set.erase(fg); } - ofs << "# num of total function graphs: " << exported.size(); + + ofs << "# num of total function graphs: " << context_map_.size() << "\n"; ofs.close(); } diff --git a/mindspore/ccsrc/debug/trace_info.h b/mindspore/ccsrc/debug/trace_info.h index 19c07bdbbc..cf4f0c080a 100644 --- a/mindspore/ccsrc/debug/trace_info.h +++ b/mindspore/ccsrc/debug/trace_info.h @@ -281,6 +281,16 @@ class TraceForceBool : public TraceInfo { TraceInfoPtr clone() override { return std::make_shared(*shared_from_base()); } }; +class TraceForceWhileCond : public TraceInfo { + public: + explicit TraceForceWhileCond(const DebugInfoPtr &info) : TraceInfo(info, "force_while_cond", "") {} + MS_DECLARE_PARENT(TraceForceWhileCond, TraceInfo); + ~TraceForceWhileCond() override = default; + TraceInfoPtr clone() override { + return std::make_shared(*shared_from_base()); + } +}; + class TraceExpandJ : public TraceInfo { public: explicit TraceExpandJ(const DebugInfoPtr &info) : TraceInfo(info, "expand_j", "") {} diff --git a/mindspore/ccsrc/device/CMakeLists.txt b/mindspore/ccsrc/device/CMakeLists.txt index 2ade0f0ef3..7178a01ce6 100644 --- a/mindspore/ccsrc/device/CMakeLists.txt +++ b/mindspore/ccsrc/device/CMakeLists.txt @@ -14,6 +14,17 @@ endif () if (ENABLE_CPU) file(GLOB_RECURSE CPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "cpu/*.cc") + if (NOT ENABLE_MPI) + list(REMOVE_ITEM CPU_SRC_LIST "cpu/mpi/mpi_adapter.cc") + endif () +endif () + +if (ENABLE_MPI) + # _ms_mpi + set_property(SOURCE "gpu/mpi/mpi_initializer.cc" + PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE) + pybind11_add_module(_ms_mpi "gpu/mpi/mpi_initializer.cc") + target_link_libraries(_ms_mpi PRIVATE mindspore::pybind11_module mindspore::ompi) endif () # gpu @@ -39,11 +50,6 @@ if (ENABLE_GPU) set_property(SOURCE ${GPU_COLLECTIVE_SRCS} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE) add_library(gpu_collective SHARED ${GPU_COLLECTIVE_SRCS}) - # _ms_mpi - set_property(SOURCE "gpu/mpi/mpi_initializer.cc" - PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE) - pybind11_add_module(_ms_mpi "gpu/mpi/mpi_initializer.cc") - target_link_libraries(_ms_mpi PRIVATE mindspore::pybind11_module mindspore::ompi) target_link_libraries(gpu_collective PRIVATE mindspore::ompi mindspore::nccl) endif () diff --git a/mindspore/ccsrc/device/ascend/ascend_device_address.cc b/mindspore/ccsrc/device/ascend/ascend_device_address.cc index c9fb6bacd3..a47c482c0e 100644 --- a/mindspore/ccsrc/device/ascend/ascend_device_address.cc +++ b/mindspore/ccsrc/device/ascend/ascend_device_address.cc @@ -92,10 +92,29 @@ bool SyncDeviceToHostAndFloatToFloat64(void *dst, size_t dst_size, const void *s return true; } +void AscendDeviceAddress::SyncStream() const { + MS_LOG(INFO) << "Start!"; + auto ms_context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(ms_context); + if (ms_context->execution_mode() != kPynativeMode) { + MS_LOG(INFO) << "Finish!"; + return; + } + auto device_id = ms_context->device_id(); + auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id); + MS_EXCEPTION_IF_NULL(runtime_instance); + auto ret = runtime_instance->SyncStream(); + if (!ret) { + MS_LOG(EXCEPTION) << "Sync stream error!"; + } + MS_LOG(INFO) << "Finish!"; +} + bool AscendDeviceAddress::SyncDeviceToHost(const std::vector &shape, size_t size, mindspore::TypeId type, void *host_ptr) const { MS_LOG(INFO) << "SyncDeviceToHost, Device(format:" << format_ << ", type_id:" << TypeIdLabel(type_id_) << ", size:" << size_ << "), Host(type_id:" << TypeIdLabel(type) << ", size:" << size << ")"; + SyncStream(); bool sync_ok = false; std::vector host_shape; (void)std::transform(shape.begin(), shape.end(), std::back_inserter(host_shape), IntToSize); @@ -186,6 +205,7 @@ bool AscendDeviceAddress::SyncHostToDevice(const std::vector &shape, size_t const void *host_ptr) const { MS_LOG(INFO) << "SyncHostToDevice, Device(format:" << format_ << ", type_id:" << TypeIdLabel(type_id_) << ", size:" << size_ << "), Host(type_id:" << TypeIdLabel(type) << ", size:" << size << ")"; + SyncStream(); bool sync_ok = false; std::vector host_shape; (void)std::transform(shape.begin(), shape.end(), std::back_inserter(host_shape), IntToSize); diff --git a/mindspore/ccsrc/device/ascend/ascend_device_address.h b/mindspore/ccsrc/device/ascend/ascend_device_address.h index 93746082c1..364f9e95fd 100644 --- a/mindspore/ccsrc/device/ascend/ascend_device_address.h +++ b/mindspore/ccsrc/device/ascend/ascend_device_address.h @@ -35,6 +35,7 @@ class AscendDeviceAddress : public DeviceAddress { ~AscendDeviceAddress() override; bool SyncDeviceToHost(const std::vector &shape, size_t size, TypeId type, void *host_ptr) const override; bool SyncHostToDevice(const std::vector &shape, size_t size, TypeId type, const void *host_ptr) const override; + DeviceAddressType DeviceType() const override { return DeviceAddressType::kAscend; } #ifdef ENABLE_DUMP_E2E bool DumpMemToFile(bool dump_mode, const std::string &filepath, const std::string &host_fmt, const std::vector &host_shape, TypeId host_type) const; @@ -43,6 +44,7 @@ class AscendDeviceAddress : public DeviceAddress { bool SyncDeviceToHostAndConvertFormat(const std::vector &shape, size_t size, TypeId type, void *host_ptr) const; bool ConvertFormatAndSyncHostToDevice(const std::vector &shape, size_t size, TypeId type, const void *host_ptr) const; + void SyncStream() const; }; using AscendDeviceAddressPtr = std::shared_ptr; } // namespace ascend diff --git a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc index 6ffa835204..fb2a3f350b 100644 --- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc +++ b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc @@ -15,7 +15,6 @@ */ #include "device/ascend/ascend_kernel_runtime.h" - #include #include #include @@ -24,7 +23,9 @@ #include #include "device/ascend/ascend_device_address.h" +#include "device/cpu/mpi/mpi_adapter.h" #include "utils/context/ms_context.h" +#include "utils/mpi/mpi_config.h" #include "device/ascend/profiling/profiling_manager.h" #include "hccl/hcom.h" #include "common/trans.h" @@ -51,6 +52,38 @@ namespace mindspore { namespace device { namespace ascend { static const size_t PRAMATER_OUTPUT_INDEX = 0; +namespace { +std::string GetRankId() { + std::string rank_id_str; +#ifdef ENABLE_MPI + auto mpi_config_ptr = MpiConfig::GetInstance(); + MS_EXCEPTION_IF_NULL(mpi_config_ptr); + if (mpi_config_ptr->enable_mpi()) { + int rank_id = device::cpu::MPIAdapter::Instance().GetRankId(); + const char *offset = std::getenv("RANK_OFFSET"); + if (offset != nullptr) { + try { + int rank_offset = std::stoi(offset); + rank_id += rank_offset; + } catch (std::invalid_argument) { + MS_LOG(EXCEPTION) << "stoi invalid argument:" << offset; + } catch (std::out_of_range) { + MS_LOG(EXCEPTION) << "stoi out_of_range:" << offset; + } + } + rank_id_str = std::to_string(rank_id); + } else { + rank_id_str = std::getenv("RANK_ID"); + } +#else + rank_id_str = std::getenv("RANK_ID"); +#endif + if (rank_id_str.empty()) { + MS_LOG(ERROR) << "get hccl rankid failed, please set env RANK_ID"; + } + return rank_id_str; +} +} // namespace AscendKernelRuntime::~AscendKernelRuntime() { graph_model_map_.clear(); } @@ -65,13 +98,13 @@ void AscendKernelRuntime::ClearGraphModelMap() { } void AscendKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id) { - MS_LOG(INFO) << "clear graph:" << graph_id << " runtime resource"; + MS_LOG(DEBUG) << "clear graph:" << graph_id << " runtime resource"; auto iter = graph_model_map_.find(graph_id); if (iter == graph_model_map_.end()) { - MS_LOG(WARNING) << "GraphId:" << graph_id << " not found"; + MS_LOG(DEBUG) << "GraphId:" << graph_id << " not found"; return; } - MS_LOG(INFO) << "Ge UnloadModel " << iter->first; + MS_LOG(DEBUG) << "Ge UnloadModel " << iter->first; auto ret = ge::model_runner::ModelRunner::Instance().UnloadModel(iter->first); if (!ret) { MS_LOG(ERROR) << "UnloadModel failed"; @@ -124,6 +157,12 @@ bool AscendKernelRuntime::Init() { } #endif + // Start up profiling before rtSetDevice + ret = ProfilingManager::GetInstance().StartupProfiling(device_id_); + if (!ret) { + MS_EXCEPTION(DeviceProcessError) << "StartupProfiling failed."; + } + ret = InitDevice(); if (!ret) { return ret; @@ -132,11 +171,6 @@ bool AscendKernelRuntime::Init() { MS_EXCEPTION_IF_NULL(mem_manager_); mem_manager_->MallocDeviceMemory(); - ret = ProfilingManager::GetInstance().StartupProfiling(device_id_); - if (!ret) { - MS_EXCEPTION(DeviceProcessError) << "StartupProfiling failed."; - } - initialized_ = true; return ret; } @@ -259,6 +293,15 @@ bool AscendKernelRuntime::DumpData(mindspore::session::KernelGraph *graph) { return true; } +bool AscendKernelRuntime::NodeOutputDeviceAddressExist(const AnfNodePtr &kernel, size_t index) { + if (AnfAlgo::OutputAddrExist(kernel, index)) { + auto address = AnfAlgo::GetOutputAddr(kernel, index); + MS_EXCEPTION_IF_NULL(address); + return address->DeviceType() == DeviceAddressType::kAscend; + } + return false; +} + DeviceAddressPtr AscendKernelRuntime::CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format, TypeId type_id) { return std::make_shared(device_ptr, device_size, format, type_id); @@ -284,38 +327,34 @@ bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) { vector> task_info_list; auto anf_node_list = graph->execution_order(); TaskGenerator::GenTasks(anf_node_list, &task_info_list, graph->graph_id()); - // Store the task_info_list auto insert_ret = task_map_.insert(std::make_pair(graph->graph_id(), task_info_list)); if (!insert_ret.second) { MS_LOG(EXCEPTION) << "Duplicate GraphId! Please check in ascend_session."; } - // Graph may have no compute node, such TensorAddGrad. if (task_info_list.empty()) { MS_LOG(WARNING) << "graph " << graph->graph_id() << " have no compute node"; return true; } - - AscendStreamAssign &stream_assign_instance = AscendStreamAssign::GetInstance(); + AscendStreamAssign &assign_instance = AscendStreamAssign::GetInstance(); + AscendStreamMng &stream_manager = AscendStreamMng::GetInstance(); AscendLabelAssign &label_assign_instance = AscendLabelAssign::GetInstance(); // the streams' flag not HEAD_STREAM std::vector wait_active_stream_list; - stream_assign_instance.GetWaitStreams(&wait_active_stream_list); - auto force_copy_stream_list = stream_assign_instance.hcom_streams(); - - MS_LOG(INFO) << "call DavinciModel total stream num:" << stream_assign_instance.GetTotalStreamNum() - << ", total event num:" << stream_assign_instance.total_event_num() + assign_instance.GetWaitStreams(&wait_active_stream_list); + std::vector force_copy_stream_list; + assign_instance.GetHcomStreams(&force_copy_stream_list); + MS_LOG(INFO) << "call DavinciModel total stream num:" << stream_manager.GetCurAllocStreamNum() + << ", total event num:" << assign_instance.total_event_num() << ", total label num:" << label_assign_instance.GetLabelNum(NOT_NULL(graph)) << ", wait_active_stream_list size:" << wait_active_stream_list.size() << ", force_copy_stream_list size:" << force_copy_stream_list.size(); - std::vector> empty_list; std::shared_ptr model = std::make_shared( task_info_list, empty_list, empty_list, empty_list, empty_list, wait_active_stream_list, force_copy_stream_list, 0, - 0, 0, 0, 0, 0, stream_assign_instance.GetTotalStreamNum(), label_assign_instance.GetLabelNum(NOT_NULL(graph)), - stream_assign_instance.total_event_num(), 0); - + 0, 0, 0, 0, 0, stream_manager.GetCurAllocStreamNum(), label_assign_instance.GetLabelNum(NOT_NULL(graph)), + assign_instance.total_event_num(), 0); auto ret = graph_model_map_.insert(std::make_pair(graph->graph_id(), model)); if (!ret.second) { MS_LOG(EXCEPTION) << "Duplicate GraphId! Please check in ascend_session."; @@ -356,7 +395,8 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) { } if (ProfilingManager::GetInstance().IsProfiling()) { auto task_ids = ge::model_runner::ModelRunner::Instance().GetTaskIdList(model_iter->first); - ProfilingUtils::ReportProfilingData(task_ids, NOT_NULL(graph)); + auto stream_ids = ge::model_runner::ModelRunner::Instance().GetStreamIdList(model_iter->first); + ProfilingUtils::ReportProfilingData(task_ids, stream_ids, NOT_NULL(graph)); } return true; } @@ -486,30 +526,23 @@ bool AscendKernelRuntime::HcclInit() { if (!context_ptr->IsTsdOpened()) { MS_LOG(EXCEPTION) << "Hccl dependent tsd is not open"; } - MS_LOG(INFO) << "do hcom init"; auto config_path_str = std::getenv("MINDSPORE_HCCL_CONFIG_PATH"); if (config_path_str == nullptr) { config_path_str = std::getenv("RANK_TABLE_FILE"); if (config_path_str == nullptr) { MS_LOG(ERROR) << "get hccl json config failed, please set env MINDSPORE_HCCL_CONFIG_PATH or RANK_TABLE_FILE"; + return false; } - return false; } + std::string rank_id_str = GetRankId(); auto full_path = realpath(config_path_str, nullptr); if (full_path == nullptr) { MS_LOG(ERROR) << "file path " << config_path_str << " does not exist"; return false; } - - const char *identify = std::getenv("RANK_ID"); - if (identify == nullptr) { - MS_LOG(ERROR) << "get hccl rankid failed, please set env RANK_ID"; - free(full_path); - return false; - } - MS_LOG(INFO) << "MINDSPORE_HCCL_CONFIG_PATH : " << full_path << ", RANK_ID: " << identify; - hcclResult_t res = hcom_init(full_path, identify); + MS_LOG(INFO) << "MINDSPORE_HCCL_CONFIG_PATH : " << full_path << ", RANK_ID: " << rank_id_str; + hcclResult_t res = hcom_init(full_path, rank_id_str.c_str()); free(full_path); if (res != HCCL_SUCCESS) { MS_LOG(ERROR) << "hcom init failed, res is " << static_cast(res); diff --git a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h index 336cfdc9f2..28076f95b7 100644 --- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h +++ b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h @@ -41,11 +41,12 @@ class AscendKernelRuntime : public KernelRuntime { bool RunTask(const session::KernelGraph *graph) override; bool LoadTask(const session::KernelGraph *graph) override; void ClearGraphRuntimeResource(uint32_t graph_id) override; + bool SyncStream() override; protected: DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format, TypeId type_id) override; - bool SyncStream() override; + bool NodeOutputDeviceAddressExist(const AnfNodePtr &node, size_t index) override; private: bool InitDevice(); diff --git a/mindspore/ccsrc/device/ascend/ascend_label_assign.cc b/mindspore/ccsrc/device/ascend/ascend_label_assign.cc index 9908b5d03d..7af615f448 100644 --- a/mindspore/ccsrc/device/ascend/ascend_label_assign.cc +++ b/mindspore/ccsrc/device/ascend/ascend_label_assign.cc @@ -33,11 +33,9 @@ static void UpdateLabelGoto(NotNull node) { if (node->size() <= kLabelGotoLabelId) { MS_LOG(EXCEPTION) << "Node " << node->DebugString() << " has invalid input size " << node->size(); } - auto label_set = AnfAlgo::GetCNodePrimitive(node->input(kLabelGotoLabelId)); - MS_EXCEPTION_IF_NULL(label_set); - auto value = label_set->GetAttr(kAttrLabelIndex); - MS_EXCEPTION_IF_NULL(value); - uint32_t goto_label_id = GetValue(value); + + auto input = node->input(kLabelGotoLabelId); + uint32_t goto_label_id = AnfAlgo::GetNodeAttr(input, kAttrLabelIndex); AnfAlgo::SetNodeAttr(kAttrLabelIndex, MakeValue(goto_label_id), node.get()); MS_LOG(INFO) << "Node " << node->DebugString() << " goto label id " << goto_label_id; node->set_inputs({node->input(0)}); @@ -57,16 +55,12 @@ static void UpdateLabelSwitch(NotNull node) { break; } - auto label_set = AnfAlgo::GetCNodePrimitive(input); - MS_EXCEPTION_IF_NULL(label_set); - auto value = label_set->GetAttr(kAttrLabelIndex); - MS_EXCEPTION_IF_NULL(value); - uint32_t goto_label_id = GetValue(value); + uint32_t goto_label_id = AnfAlgo::GetNodeAttr(input, kAttrLabelIndex); label_list.push_back(goto_label_id); MS_LOG(INFO) << "Switch " << node->DebugString() << " case " << i - kLabelSwitchLabelId << ": id " << goto_label_id; } AnfAlgo::SetNodeAttr(kAttrLabelSwitchList, MakeValue>(label_list), node.get()); - node->set_inputs({node->input(0), node->input(1)}); + node->set_inputs({node->input(kAnfPrimitiveIndex), node->input(kFirstDataInputIndex)}); } static void AssignLabelForLabelSet(NotNull> graph, NotNull label_id, @@ -154,8 +148,8 @@ uint32_t AscendLabelAssign::GetLabelNum(NotNull gr std::lock_guard lock(label_num_mutex_); auto iter = label_num_.find(graph.get()); if (iter == label_num_.end()) { - MS_LOG(WARNING) << "Graph " << graph->ToString() << " has not assigned label."; - return 1; + MS_LOG(DEBUG) << "Graph " << graph->ToString() << " has not assigned label, defalut is 0."; + return 0; } return iter->second; } diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc b/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc index 4c7b897cac..42c611c3af 100644 --- a/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc +++ b/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - +#include #include "device/ascend/ascend_memory_manager.h" #include "device/ascend/ascend_memory_pool.h" #include "utils/context/ms_context.h" @@ -21,25 +21,52 @@ namespace mindspore { namespace device { namespace ascend { -const uint64_t kAscendDeviceMemGB = 26; -const uint64_t kAscendMemPoolGB = 4; -const uint64_t kAscendDeviceMemSize = (kAscendDeviceMemGB << 30); -const uint64_t kAscendMemPoolSize = (kAscendMemPoolGB << 30); +constexpr uint64_t kAscendDeviceMemGB = 26; +constexpr uint64_t kAscendMemPoolGB = 4; +constexpr uint64_t kMemSizeGB = 30; +constexpr uint64_t kMaxMemSizeGB = 30; +constexpr uint64_t kAscendDeviceMemSize = (kAscendDeviceMemGB << kMemSizeGB); +constexpr uint64_t kAscendMemPoolSize = (kAscendMemPoolGB << kMemSizeGB); void AscendMemoryManager::MallocDeviceMemory() { - device_mem_size_ = kAscendDeviceMemSize; + auto context_mem = GetDeviceMemSizeFromContext(); + device_mem_size_ = context_mem == 0 ? kAscendDeviceMemSize : context_mem; static_mem_offset_ = device_mem_size_; auto ret = rtMalloc(reinterpret_cast(&device_mem_base_), static_mem_offset_, RT_MEMORY_HBM); if (ret != RT_ERROR_NONE) { MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << static_mem_offset_ << "] fail, ret[" << ret << "]"; } - device_mem_pool_size_ = kAscendMemPoolSize; - ret = rtMalloc(reinterpret_cast(&device_mem_pool_base_), device_mem_pool_size_, RT_MEMORY_HBM); - if (ret != RT_ERROR_NONE) { - MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]"; + + if (context_mem == 0) { + device_mem_pool_size_ = kAscendMemPoolSize; + ret = rtMalloc(reinterpret_cast(&device_mem_pool_base_), device_mem_pool_size_, RT_MEMORY_HBM); + if (ret != RT_ERROR_NONE) { + MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]"; + } + AscendMemoryPool::GetInstance().set_device_mem_pool_base(device_mem_pool_base_); + AscendMemoryPool::GetInstance().set_device_mem_pool_size(device_mem_pool_size_); + } +} + +uint64_t AscendMemoryManager::GetDeviceMemSizeFromContext() { + auto context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context); + auto variable_memory_max_size = context->variable_memory_max_size(); + if (variable_memory_max_size == "0") { + return 0; + } + MS_LOG(INFO) << "context variable_memory_max_size:" << variable_memory_max_size; + auto pos = variable_memory_max_size.find('*'); + if (pos == std::string::npos) { + MS_LOG(EXCEPTION) << "Invalid variable_memory_max_size"; + } + auto gb_str = variable_memory_max_size.substr(0, pos); + auto gb_var = std::stoull(gb_str); + MS_LOG(INFO) << "variable_memory_max_size(GB):" << gb_var; + if (gb_var > kMaxMemSizeGB || gb_var == 0) { + MS_LOG(EXCEPTION) << "Invalid allocate memory size:" << gb_var << " which should be in (0-30]GB"; } - AscendMemoryPool::GetInstance().set_device_mem_pool_base(device_mem_pool_base_); - AscendMemoryPool::GetInstance().set_device_mem_pool_size(device_mem_pool_size_); + return gb_var << kMemSizeGB; } void AscendMemoryManager::FreeDeviceMemory() { diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_manager.h b/mindspore/ccsrc/device/ascend/ascend_memory_manager.h index 90c8b2dfca..7fdd8f553e 100644 --- a/mindspore/ccsrc/device/ascend/ascend_memory_manager.h +++ b/mindspore/ccsrc/device/ascend/ascend_memory_manager.h @@ -32,6 +32,8 @@ class AscendMemoryManager : public MemoryManager { private: uint8_t *device_mem_pool_base_{nullptr}; uint64_t device_mem_pool_size_{0}; + + uint64_t GetDeviceMemSizeFromContext(); }; } // namespace ascend } // namespace device diff --git a/mindspore/ccsrc/device/ascend/ascend_stream_assign.cc b/mindspore/ccsrc/device/ascend/ascend_stream_assign.cc index 5e15055a08..f0bad6b492 100644 --- a/mindspore/ccsrc/device/ascend/ascend_stream_assign.cc +++ b/mindspore/ccsrc/device/ascend/ascend_stream_assign.cc @@ -33,238 +33,220 @@ namespace device { namespace ascend { const uint32_t kHcomMaxTask = 5; const uint32_t kCommonMaxTask = 350; -const uint32_t kIndependFirstStreamId = 1024; -bool AscendStreamAssign::IsHcom(const CNodePtr &apply_kernel) { - MS_EXCEPTION_IF_NULL(apply_kernel); - return AnfAlgo::GetKernelType(apply_kernel) == HCCL_KERNEL; -} - -void AscendStreamAssign::ResetNew() { - total_common_stream_num_ = 0; - total_independ_stream_num_ = 0; - total_event_num_ = 0; - first_physic_id_ = UINT32_MAX; - first_logic_id_ = UINT32_MAX; - independent_id_ = kIndependFirstStreamId; - logic_to_independent_map_.clear(); - processed_logic_id_.clear(); - logic_to_physic_map_.clear(); - independent_before_physic_id_.clear(); - inner_parallel_streams_.clear(); - processed_parallel_streams_.clear(); - hcom_stream_list_.clear(); - need_first_active_streams_.clear(); -} - -void AscendStreamAssign::AssignIndependentStreamId(const CNodePtr &cur_cnode_ptr, uint32_t processing_logic_id) { - MS_EXCEPTION_IF_NULL(cur_cnode_ptr); - auto it = logic_to_independent_map_.find(processing_logic_id); - if (it == logic_to_independent_map_.end()) { - (void)logic_to_independent_map_.insert(std::make_pair(processing_logic_id, independent_id_)); - AnfAlgo::SetStreamId(independent_id_, cur_cnode_ptr.get()); - independent_id_++; - } else { - AnfAlgo::SetStreamId(it->second, cur_cnode_ptr.get()); - } - - if (first_physic_id_ == UINT32_MAX) { - auto res = std::find(independent_before_physic_id_.begin(), independent_before_physic_id_.end(), - AnfAlgo::GetStreamId(cur_cnode_ptr)); - if (res == independent_before_physic_id_.end()) { - independent_before_physic_id_.push_back(AnfAlgo::GetStreamId(cur_cnode_ptr)); - } - } -} - -void AscendStreamAssign::AssignCommonStreamId(const CNodePtr &cur_cnode_ptr, CNodePtr *pre_cnode_ptr, - uint32_t *cur_index, uint32_t *cur_stream_id) { - MS_EXCEPTION_IF_NULL(cur_cnode_ptr); - MS_EXCEPTION_IF_NULL(*pre_cnode_ptr); - bool over_max_hcom_task = (IsHcom(cur_cnode_ptr) && (*cur_index) % kHcomMaxTask == 0); - bool over_max_common_task = (!IsHcom(cur_cnode_ptr) && (*cur_index) % kCommonMaxTask == 0); - bool pre_common_cur_hcom = (IsHcom(cur_cnode_ptr) && !IsHcom(*pre_cnode_ptr)); - bool pre_hcom_cur_common = (!IsHcom(cur_cnode_ptr) && IsHcom(*pre_cnode_ptr)); - if (over_max_hcom_task || over_max_common_task || pre_common_cur_hcom || pre_hcom_cur_common) { - *cur_index = 0; - ++(*cur_stream_id); - } - - if (over_max_hcom_task || pre_common_cur_hcom) { - hcom_stream_list_.emplace_back(*cur_stream_id); - } - ++(*cur_index); - AnfAlgo::SetStreamId(*cur_stream_id, cur_cnode_ptr.get()); - *pre_cnode_ptr = cur_cnode_ptr; -} - -bool AscendStreamAssign::IsProcessed(uint32_t logic_id) { - auto it = std::find(processed_logic_id_.begin(), processed_logic_id_.end(), logic_id); - if (it == processed_logic_id_.end()) { - return false; - } - - return true; -} +void AscendStreamAssign::AssignStream(const shared_ptr &graph_ptr) { + if (IsTaskSink()) { + Reset(); + ReorderIndependentOrders(graph_ptr); + AssignAllNodesStream(graph_ptr); + UpdateAtomicAddrCleanStreamId(graph_ptr); + FindHcomParallelStreams(graph_ptr); + InsertStreamActive(graph_ptr); + InsertSendRecvForHcomParallel(graph_ptr); + InsertSendRecvForIndependent(graph_ptr); + UpdateEventId(graph_ptr); + GetNeedActiveStreams(graph_ptr); + graph_ptr->PrintGraphExecuteOrder(); + CheckStreamAssign(graph_ptr); + MS_LOG(INFO) << "after finish stream assign"; -void AscendStreamAssign::RecordIdMap(uint32_t logic_id, uint32_t physic_id) { - auto it = logic_to_physic_map_.find(logic_id); - if (it == logic_to_physic_map_.end()) { - MS_LOG(INFO) << "New logic_id[" << logic_id << "] to physic_id[" << physic_id << "]"; - (void)logic_to_physic_map_.insert(std::make_pair(logic_id, physic_id)); + // Get info for D Model + AscendStreamMng &stream_manager = AscendStreamMng::GetInstance(); + generator::IRModelUtil::GetInstance().set_event_num(total_event_num()); + generator::IRModelUtil::GetInstance().set_stream_num(stream_manager.GetCurAllocStreamNum()); + // Init to 1,temporarily + generator::IRModelUtil::GetInstance().set_batch_num(1); } } -void AscendStreamAssign::RecordFirstCommonOp(const CNodePtr &cur_cnode_ptr, uint32_t cur_node_logic_id, - uint32_t cur_stream_id) { - AnfAlgo::SetStreamId(cur_stream_id, cur_cnode_ptr.get()); - RecordIdMap(cur_node_logic_id, cur_stream_id); - first_physic_id_ = cur_stream_id; - first_logic_id_ = cur_node_logic_id; -} +// section 0 +void AscendStreamAssign::CheckStreamAssign(const shared_ptr &graph_ptr) { + MS_EXCEPTION_IF_NULL(graph_ptr); + std::set streams; + uint32_t max_stream = 0; + uint32_t min_stream = kInvalidStreamId; + const std::vector &cnode_ptr_list = graph_ptr->execution_order(); + for (size_t i = 0; i < cnode_ptr_list.size(); ++i) { + CNodePtr cur_cnode_ptr = cnode_ptr_list[i]; + MS_EXCEPTION_IF_NULL(cur_cnode_ptr); + uint32_t stream_id = AnfAlgo::GetStreamId(cur_cnode_ptr); + if (stream_id == kInvalidStreamId) { + MS_LOG(EXCEPTION) << "node [" << AnfAlgo::GetCNodeName(cur_cnode_ptr) << "] had not been assigned streams"; + } -uint32_t AscendStreamAssign::GetLogicId(const CNodePtr &cur_cnode_ptr) { - uint32_t logic_id = AnfAlgo::GetStreamDistinctionLabel(cur_cnode_ptr.get()); - if (logic_id == kInvalidDistincLabel) { - MS_LOG(EXCEPTION) << "node[" << cur_cnode_ptr->DebugString() << "] logic id is invalid"; + streams.emplace(stream_id); + if (stream_id > max_stream) { + max_stream = stream_id; + } + if (stream_id < min_stream) { + min_stream = stream_id; + } } - return logic_id; -} -void AscendStreamAssign::SetCommonStreamNum(uint32_t cur_stream_id) { - if (first_physic_id_ == UINT32_MAX) { - MS_LOG(INFO) << "cur common node size is zero"; - total_common_stream_num_ = 0; - } else { - total_common_stream_num_ = cur_stream_id + 1; + if (!streams.empty()) { + if (min_stream != 0) { + MS_LOG(EXCEPTION) << "before stream assign, assigned stream should start from 0, now is from " << min_stream; + } + if (max_stream != (streams.size() - 1)) { + MS_LOG(EXCEPTION) << "before stream assign, assigned stream should be consecutive"; + } } } +// section 1 void AscendStreamAssign::AssignAllNodesStream(const shared_ptr &graph_ptr) { MS_EXCEPTION_IF_NULL(graph_ptr); auto cnode_ptr_list = graph_ptr->execution_order(); CNodePtr pre_cnode_ptr = nullptr; uint32_t cur_index = 0; uint32_t cur_stream_id = 0; - uint32_t processing_logic_id = UINT32_MAX; + bool exit_independent = false; + AscendStreamMng &stream_manager = AscendStreamMng::GetInstance(); for (size_t i = 0; i < cnode_ptr_list.size(); ++i) { CNodePtr cur_cnode_ptr = cnode_ptr_list[i]; MS_EXCEPTION_IF_NULL(cur_cnode_ptr); - // get logic id - uint32_t cur_node_logic_id = GetLogicId(cur_cnode_ptr); + if (AnfAlgo::GetStreamId(cur_cnode_ptr) != kInvalidStreamId) { + continue; + } if (IsIndependentNode(cur_cnode_ptr)) { - AssignIndependentStreamId(cur_cnode_ptr, cur_node_logic_id); + exit_independent = true; continue; } + // first common node, only exe one time if (pre_cnode_ptr == nullptr) { - RecordFirstCommonOp(cur_cnode_ptr, cur_node_logic_id, cur_stream_id); - processing_logic_id = cur_node_logic_id; + uint32_t cur_stream_num = stream_manager.GetCurAllocStreamNum(); + if (cur_stream_num == 0) { + cur_stream_id = stream_manager.ApplyNewStream(); + } else { + cur_stream_id = stream_manager.GetCurAllocStream(); + } ++cur_index; pre_cnode_ptr = cur_cnode_ptr; + AnfAlgo::SetStreamId(cur_stream_id, cur_cnode_ptr.get()); + if (IsHcom(cur_cnode_ptr)) { + hcom_stream_list_.emplace(cur_stream_id); + } continue; } - // 1.has been processed - if (IsProcessed(cur_node_logic_id)) { - continue; - } + AssignCommonStreamId(cur_cnode_ptr, &pre_cnode_ptr, &cur_index, &cur_stream_id); + } - if (cur_node_logic_id == processing_logic_id) { - AssignCommonStreamId(cur_cnode_ptr, &pre_cnode_ptr, &cur_index, &cur_stream_id); - } else { - // 1.find other same logic id - for (size_t j = i; j < cnode_ptr_list.size(); ++j) { - CNodePtr cnode_ptr = cnode_ptr_list[j]; - MS_EXCEPTION_IF_NULL(cnode_ptr); - uint32_t logic_id = AnfAlgo::GetStreamDistinctionLabel(cnode_ptr.get()); - if (logic_id == processing_logic_id) { - AssignCommonStreamId(cnode_ptr, &pre_cnode_ptr, &cur_index, &cur_stream_id); - } + if (exit_independent) { + uint32_t first_independent_stream_id = stream_manager.ApplyNewStream(); + for (size_t i = 0; i < cnode_ptr_list.size(); ++i) { + CNodePtr cur_cnode_ptr = cnode_ptr_list[i]; + MS_EXCEPTION_IF_NULL(cur_cnode_ptr); + if (AnfAlgo::GetStreamId(cur_cnode_ptr) != kInvalidStreamId) { + continue; + } + if (IsIndependentNode(cur_cnode_ptr)) { + AssignIndependentStreamId(cur_cnode_ptr); } - // 2.after deal: - processed_logic_id_.push_back(processing_logic_id); - cur_cnode_ptr = cnode_ptr_list[i]; - // 3. new stream - ++cur_stream_id; - AnfAlgo::SetStreamId(cur_stream_id, cur_cnode_ptr.get()); - cur_index = 1; - - pre_cnode_ptr = cur_cnode_ptr; - processing_logic_id = cur_node_logic_id; - RecordIdMap(processing_logic_id, cur_stream_id); } + MS_LOG(INFO) << "independent start from :" << first_independent_stream_id; } - SetCommonStreamNum(cur_stream_id); - total_independ_stream_num_ = independent_id_ - kIndependFirstStreamId; - MS_LOG(INFO) << "stream nums:common:" << total_common_stream_num_ << ",independ:" << total_independ_stream_num_; + MS_LOG(INFO) << "total stream nums:" << stream_manager.GetCurAllocStreamNum(); } -void AscendStreamAssign::TransLogicToPhysic(const vector &logic_ids, vector *physic_ids) { - for (auto &id : logic_ids) { - auto it = logic_to_physic_map_.find(id); - if (it != logic_to_physic_map_.end()) { - MS_LOG(INFO) << "logic id[" << id << "] to physic id[" << it->second << "]"; - (*physic_ids).push_back(it->second); +void AscendStreamAssign::AssignIndependentStreamId(const CNodePtr &cur_cnode_ptr) { + MS_EXCEPTION_IF_NULL(cur_cnode_ptr); + AscendStreamMng &stream_manager = AscendStreamMng::GetInstance(); + uint32_t cur_independent_id = stream_manager.GetCurAllocStream(); + auto it = independent_stream_map_.find(cur_independent_id); + if (it == independent_stream_map_.end()) { + AnfAlgo::SetStreamId(cur_independent_id, cur_cnode_ptr.get()); + independent_stream_map_.emplace(cur_independent_id, 1); + } else { + if (it->second < kCommonMaxTask) { + AnfAlgo::SetStreamId(it->first, cur_cnode_ptr.get()); + it->second++; } else { - MS_LOG(EXCEPTION) << "logic id[" << id << "] has no correspond physic id"; + cur_independent_id = stream_manager.ApplyNewStream(); + AnfAlgo::SetStreamId(cur_independent_id, cur_cnode_ptr.get()); + independent_stream_map_.emplace(cur_independent_id, 1); } + } +} + +bool AscendStreamAssign::IsIndependentNode(const CNodePtr &node_ptr) { + MS_EXCEPTION_IF_NULL(node_ptr); + if (AnfAlgo::GetKernelType(node_ptr) != AICPU_KERNEL) { + return false; + } - auto it_independ = logic_to_independent_map_.find(id); - if (it_independ != logic_to_independent_map_.end()) { - MS_LOG(INFO) << "logic id[" << id << "] to independent id[" << it_independ->second << "]"; - (*physic_ids).push_back(it_independ->second); + if (AnfAlgo::GetCNodeName(node_ptr) == kGetNextOpName) { + MS_LOG(INFO) << "GetNext should not be independent node"; + return false; + } + + uint32_t input_nums = AnfAlgo::GetInputTensorNum(node_ptr); + if (input_nums == 0) { + MS_LOG(INFO) << "node " << node_ptr->fullname_with_scope() << " is independent, as inputs nums is zero"; + return true; + } + + const std::vector &inputs = node_ptr->inputs(); + for (size_t i = 1; i < inputs.size(); i++) { + if (!inputs[i]->isa()) { + return false; } } + MS_LOG(INFO) << "node " << node_ptr->fullname_with_scope() << " is independent, as inputs is all value node"; + return true; } -void AscendStreamAssign::UpdateStreamActive(const CNodePtr &active_ptr) { - MS_LOG(INFO) << "start update outter active op[" << active_ptr->DebugString() << "] "; - MS_EXCEPTION_IF_NULL(active_ptr); - auto primitive = AnfAlgo::GetCNodePrimitive(active_ptr); - MS_EXCEPTION_IF_NULL(primitive); - vector active_logic_ids = GetValue>(primitive->GetAttr(kAttrActiveStreamList)); - // out StreamAcitve active physic stream is not parallel now, if parallel, should deal here. - vector active_physic_ids; - TransLogicToPhysic(active_logic_ids, &active_physic_ids); - ValuePtr active_physic_value = MakeValue>(active_physic_ids); - AnfAlgo::SetNodeAttr(kAttrActiveStreamList, active_physic_value, active_ptr); -} +void AscendStreamAssign::AssignCommonStreamId(const CNodePtr &cur_cnode_ptr, CNodePtr *pre_cnode_ptr, + uint32_t *cur_index, uint32_t *cur_stream_id) { + MS_EXCEPTION_IF_NULL(cur_cnode_ptr); + MS_EXCEPTION_IF_NULL(pre_cnode_ptr); + MS_EXCEPTION_IF_NULL(*pre_cnode_ptr); + AscendStreamMng &stream_manager = AscendStreamMng::GetInstance(); + bool over_max_hcom_task = (IsHcom(cur_cnode_ptr) && (*cur_index) % kHcomMaxTask == 0); + bool over_max_common_task = (!IsHcom(cur_cnode_ptr) && (*cur_index) % kCommonMaxTask == 0); + bool pre_common_cur_hcom = (IsHcom(cur_cnode_ptr) && !IsHcom(*pre_cnode_ptr)); + bool pre_hcom_cur_common = (!IsHcom(cur_cnode_ptr) && IsHcom(*pre_cnode_ptr)); + if (over_max_hcom_task || over_max_common_task || pre_common_cur_hcom || pre_hcom_cur_common) { + *cur_index = 0; + *cur_stream_id = stream_manager.ApplyNewStream(); + } -void AscendStreamAssign::UpdateStreamSwitch(const CNodePtr &switch_ptr, const CNodePtr &active_ptr) { - MS_LOG(INFO) << "start update switch op[" << switch_ptr->DebugString() << "]"; - MS_EXCEPTION_IF_NULL(switch_ptr); - MS_EXCEPTION_IF_NULL(active_ptr); - auto primitive = AnfAlgo::GetCNodePrimitive(switch_ptr); - MS_EXCEPTION_IF_NULL(primitive); - auto true_logic_id = GetValue(primitive->GetAttr(kAttrTrueBranchStream)); - MS_LOG(INFO) << "streamswtich stream id[" << AnfAlgo::GetStreamId(switch_ptr) << "], true_logic_id[" << true_logic_id - << "]"; - vector logic_ids{true_logic_id}; - vector physic_ids; - TransLogicToPhysic(logic_ids, &physic_ids); - if (physic_ids.empty()) { - MS_LOG(EXCEPTION) << "stream switch true logic id[" << true_logic_id << "] has no physical id"; + ++(*cur_index); + AnfAlgo::SetStreamId(*cur_stream_id, cur_cnode_ptr.get()); + *pre_cnode_ptr = cur_cnode_ptr; + + // record ll hcom streams as hcom stream has different stream flag + if (IsHcom(cur_cnode_ptr)) { + auto it = std::find(hcom_stream_list_.begin(), hcom_stream_list_.end(), *cur_stream_id); + if (it == hcom_stream_list_.end()) { + MS_LOG(INFO) << "hcom stream id:" << *cur_stream_id; + hcom_stream_list_.emplace(*cur_stream_id); + } } - ValuePtr true_index = MakeValue(physic_ids[0]); - AnfAlgo::SetNodeAttr(kAttrTrueBranchStream, true_index, switch_ptr); +} - MS_LOG(INFO) << "start update StreamActive op[" << active_ptr->DebugString() << "]"; - AnfAlgo::SetStreamId(physic_ids[0], active_ptr.get()); - vector active_ids; - for (size_t i = 0; i < physic_ids.size(); i++) { - if (i == 0) { - MS_LOG(INFO) << "StreamActive op self stream id[" << physic_ids[i] << "]"; - } else { - MS_LOG(INFO) << "StreamActive op active stream id[" << physic_ids[i] << "]"; - active_ids.emplace_back(physic_ids[i]); +// section 2: +void AscendStreamAssign::UpdateAtomicAddrCleanStreamId(const shared_ptr &graph_ptr) { + MS_LOG(INFO) << "start"; + MS_EXCEPTION_IF_NULL(graph_ptr); + const std::vector &cnode_ptr_list = graph_ptr->execution_order(); + for (size_t i = 0; i < cnode_ptr_list.size(); ++i) { + CNodePtr cur_cnode_ptr = cnode_ptr_list[i]; + MS_EXCEPTION_IF_NULL(cur_cnode_ptr); + // update AtomicAddrClean stream same witch the next node + if (i > 0 && AnfAlgo::GetCNodeName(cnode_ptr_list[i - 1]) == kAtomicAddrCleanOpName) { + MS_LOG(INFO) << "update AtomicAddrClean stream id from[" << AnfAlgo::GetStreamId(cnode_ptr_list[i - 1]) + << "] to [" << AnfAlgo::GetStreamId(cur_cnode_ptr) << "]"; + AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(cur_cnode_ptr), cnode_ptr_list[i - 1].get()); } } - AnfAlgo::SetNodeAttr(kAttrActiveStreamList, MakeValue>(active_ids), active_ptr); + MS_LOG(INFO) << "end"; } -void AscendStreamAssign::FindAllReduceParallel(const shared_ptr &graph_ptr) { +// section 3 +void AscendStreamAssign::FindHcomParallelStreams(const shared_ptr &graph_ptr) { MS_EXCEPTION_IF_NULL(graph_ptr); CNodePtr cur_cnode_ptr = nullptr; CNodePtr pre_cnode_ptr = nullptr; @@ -280,9 +262,9 @@ void AscendStreamAssign::FindAllReduceParallel(const shared_ptr{pre_stream_id, cur_stream_id}); } @@ -291,45 +273,107 @@ void AscendStreamAssign::FindAllReduceParallel(const shared_ptr &graph_ptr) { +// section 4 +void AscendStreamAssign::UpdateStreamSwitch(const std::shared_ptr &graph_ptr, + const CNodePtr &switch_ptr, const vector &independent_stream, + vector *orders) { + MS_EXCEPTION_IF_NULL(orders); + orders->emplace_back(switch_ptr); + auto primitive = AnfAlgo::GetCNodePrimitive(switch_ptr); + MS_EXCEPTION_IF_NULL(primitive); + auto value_ptr = primitive->GetAttr(kStreamNeedActivedFirst); + if (value_ptr == nullptr) { + return; + } + + auto need_active = GetValue(value_ptr); + if (!need_active) { + return; + } + + MS_LOG(INFO) << "start update switch op[" << switch_ptr->DebugString() << "]"; + MS_EXCEPTION_IF_NULL(switch_ptr); + auto true_stream_id = GetValue(primitive->GetAttr(kAttrTrueBranchStream)); + MS_LOG(INFO) << "streamswtich stream id[" << AnfAlgo::GetStreamId(switch_ptr) << "], true_logic_id[" << true_stream_id + << "]"; + + CNodePtr active_ptr = KernelAdjust::GetInstance().CreateStreamActiveOp(graph_ptr); + MS_LOG(INFO) << "start update StreamActive op[" << active_ptr->DebugString() << "]"; + AnfAlgo::SetStreamId(true_stream_id, active_ptr.get()); + AnfAlgo::SetNodeAttr(kAttrActiveStreamList, MakeValue>(independent_stream), active_ptr); + independent_stream_activated_ = true; + + // update processed stream + for (auto &item : independent_stream) { + processed_streams_.emplace(item); + } + + orders->emplace_back(active_ptr); +} // namespace ascend + +void AscendStreamAssign::InsertStreamActive(const std::shared_ptr &graph_ptr) { MS_LOG(INFO) << "start"; MS_EXCEPTION_IF_NULL(graph_ptr); - auto cnode_ptr_list = graph_ptr->execution_order(); - vector cnodes = cnode_ptr_list; - uint32_t cur_event_id = 0; - auto it = cnodes.begin(); - while (it != cnodes.end() && (it + 1) != cnodes.end()) { - MS_EXCEPTION_IF_NULL(*it); - MS_EXCEPTION_IF_NULL(*(it + 1)); - if (IsHcom(*it) && !IsHcom(*(it + 1))) { - CNodePtr send_cnode_ptr = CreateSendApplyKernel(graph_ptr, cur_event_id, AnfAlgo::GetStreamId(*it)); - it = cnodes.insert(it + 1, send_cnode_ptr); + std::vector update_cnode_list; + CNodePtr cur_cnode_ptr = nullptr; + CNodePtr pre_cnode_ptr = nullptr; + uint32_t pre_stream_id = UINT32_MAX; + std::vector independent_stream; + MS_LOG(INFO) << "independent stream size:" << independent_stream_map_.size(); + for (auto item : independent_stream_map_) { + independent_stream.emplace_back(item.first); + } - auto target = FindTargetOp(it, cnodes.end(), *(it - 1)); - if (target == cnodes.end()) { - MS_LOG(WARNING) << "hcom node[" << (*(it - 1))->fullname_with_scope() - << "] can't find target for insert recv op, no insert send/recv"; - it = cnodes.erase(it); - continue; - } + bool independent_flag = !(independent_stream.empty()); - // deal recv op - uint32_t stream_id = AnfAlgo::GetStreamId(*target); - CNodePtr recv_cnode_ptr = CreateRecvApplyKernel(graph_ptr, cur_event_id, stream_id); - (void)cnodes.insert(target, recv_cnode_ptr); - ++cur_event_id; + const std::vector &cnode_ptr_list = graph_ptr->execution_order(); + for (size_t i = 0; i < cnode_ptr_list.size(); ++i) { + cur_cnode_ptr = cnode_ptr_list[i]; + MS_EXCEPTION_IF_NULL(cur_cnode_ptr); + uint32_t cur_stream_id = AnfAlgo::GetStreamId(cur_cnode_ptr); + if (IsIndependentNode(cur_cnode_ptr)) { + update_cnode_list.emplace_back(cur_cnode_ptr); + continue; } - ++it; + + bool inner_active = false; + if (pre_cnode_ptr != nullptr) { + inner_active = pre_stream_id != cur_stream_id && AnfAlgo::GetCNodeName(pre_cnode_ptr) != kStreamSwitchOpName && + AnfAlgo::GetCNodeName(pre_cnode_ptr) != kSendOpName; + } + + bool processed = IsProcessedStream(cur_stream_id); + // 1)inner stream assign, need insert active op + if (inner_active && !processed) { + MS_LOG(INFO) << "Inner insert active op, self stream id[" << pre_stream_id << "]"; + CNodePtr active_ptr = KernelAdjust::GetInstance().CreateStreamActiveOp(graph_ptr); + // 1.set stream id + AnfAlgo::SetStreamId(pre_stream_id, active_ptr.get()); + // 2.set active stream ids + std::vector active_index_list; + GetParallelStream(cur_stream_id, pre_stream_id, &active_index_list); + AnfAlgo::SetNodeAttr(kAttrActiveStreamList, MakeValue>(active_index_list), active_ptr); + update_cnode_list.emplace_back(active_ptr); + } + + if (independent_flag && (AnfAlgo::GetCNodeName(cur_cnode_ptr) == kStreamSwitchOpName)) { + MS_LOG(INFO) << "Insert StreamActive op after FP StreamSwitch for stream parallel"; + UpdateStreamSwitch(graph_ptr, cur_cnode_ptr, independent_stream, &update_cnode_list); + } else { + update_cnode_list.emplace_back(cur_cnode_ptr); + } + + processed_streams_.emplace(cur_stream_id); + pre_stream_id = cur_stream_id; + pre_cnode_ptr = cur_cnode_ptr; } - graph_ptr->set_execution_order(cnodes); - total_event_num_ = cur_event_id; - MS_LOG(INFO) << "after insert send/recv for hcom parallel, total event nums[" << total_event_num_ << "]"; + graph_ptr->set_execution_order(update_cnode_list); MS_LOG(INFO) << "end"; } -bool AscendStreamAssign::IsProcessedParallelStream(uint32_t stream_id) { - auto it = std::find(processed_parallel_streams_.begin(), processed_parallel_streams_.end(), stream_id); - if (it != processed_parallel_streams_.end()) { +bool AscendStreamAssign::IsProcessedStream(uint32_t stream_id) { + auto it = std::find(processed_streams_.begin(), processed_streams_.end(), stream_id); + if (it != processed_streams_.end()) { return true; } return false; @@ -337,8 +381,9 @@ bool AscendStreamAssign::IsProcessedParallelStream(uint32_t stream_id) { void AscendStreamAssign::GetParallelStream(uint32_t cur_stream_id, uint32_t stream_acitve_id, vector *parallel_streams) { + MS_EXCEPTION_IF_NULL(parallel_streams); for (size_t i = 0; i < inner_parallel_streams_.size(); i++) { - auto cur_parallel_streams = inner_parallel_streams_[i]; + const auto &cur_parallel_streams = inner_parallel_streams_[i]; auto it = std::find(cur_parallel_streams.begin(), cur_parallel_streams.end(), cur_stream_id); if (it != cur_parallel_streams.end()) { MS_LOG(INFO) << "stream id:" << cur_stream_id << " is parallel stream"; @@ -349,74 +394,118 @@ void AscendStreamAssign::GetParallelStream(uint32_t cur_stream_id, uint32_t stre continue; } (*parallel_streams).emplace_back(cur_parallel_streams[j]); + processed_streams_.emplace(cur_parallel_streams[j]); } - - // record processed parallel streams - (void)std::copy((*parallel_streams).begin(), (*parallel_streams).end(), - std::back_inserter(processed_parallel_streams_)); return; } } + processed_streams_.emplace(cur_stream_id); (*parallel_streams).push_back(cur_stream_id); } -void AscendStreamAssign::InsertActiveNew(const std::shared_ptr &graph_ptr) { +// section5 +void AscendStreamAssign::InsertSendRecvForDiffHcom(const shared_ptr &graph_ptr) { MS_LOG(INFO) << "start"; MS_EXCEPTION_IF_NULL(graph_ptr); - std::vector update_cnode_list; - CNodePtr cur_cnode_ptr = nullptr; - CNodePtr pre_cnode_ptr = nullptr; - uint32_t pre_stream_id = UINT32_MAX; - auto cnode_ptr_list = graph_ptr->execution_order(); - for (size_t i = 0; i < cnode_ptr_list.size(); ++i) { - cur_cnode_ptr = cnode_ptr_list[i]; - MS_EXCEPTION_IF_NULL(cur_cnode_ptr); - uint32_t cur_stream_id = AnfAlgo::GetStreamId(cur_cnode_ptr); - if (cur_stream_id >= kIndependFirstStreamId) { - update_cnode_list.emplace_back(cur_cnode_ptr); + vector fusion_hcom_index; + vector orders; + for (size_t i = 0; i < cnode_ptr_list.size(); i++) { + auto cur_cnode = cnode_ptr_list[i]; + if (IsFusionHcom(cur_cnode)) { + fusion_hcom_index.emplace_back(i); + } + } + if (fusion_hcom_index.size() < 2) { + MS_LOG(INFO) << "fusion hcom size is less than 2, no need insert event between them"; + return; + } + uint32_t first_index = fusion_hcom_index[0]; + uint32_t last_index = fusion_hcom_index[fusion_hcom_index.size() - 1]; + uint32_t cur_event_id = total_event_num_; + uint32_t pre_hcom_stream_id = kInvalidStreamId; + std::copy(cnode_ptr_list.begin(), cnode_ptr_list.begin() + first_index, std::back_inserter(orders)); + for (size_t i = first_index; i <= last_index; i++) { + auto cur_cnode = cnode_ptr_list[i]; + auto it = std::find(fusion_hcom_index.begin(), fusion_hcom_index.end(), i); + if (it == fusion_hcom_index.end()) { + orders.emplace_back(cur_cnode); continue; } - - bool inner_active = pre_stream_id != cur_stream_id && pre_stream_id < cur_stream_id && - AnfAlgo::GetCNodeName(pre_cnode_ptr) != kStreamSwitchOpName && - AnfAlgo::GetCNodeName(pre_cnode_ptr) != kStreamActiveOpName && - AnfAlgo::GetCNodeName(pre_cnode_ptr) != kSendOpName; - bool processed = IsProcessedParallelStream(cur_stream_id); - // 1)inner stream assign, need insert active op - if (inner_active && !processed) { - MS_LOG(INFO) << "Inner insert active op, self stream id[" << pre_stream_id << "]"; - CNodePtr active_ptr = KernelAdjust::GetInstance().CreateStreamActiveOp(graph_ptr); - update_cnode_list.emplace_back(active_ptr); - // 1.set stream id - AnfAlgo::SetStreamId(pre_stream_id, active_ptr.get()); - // 2.set active stream ids - std::vector active_index_list; - GetParallelStream(cur_stream_id, pre_stream_id, &active_index_list); - AnfAlgo::SetNodeAttr(kAttrActiveStreamList, MakeValue>(active_index_list), active_ptr); + auto cur_hcom_stream_id = AnfAlgo::GetStreamId(cur_cnode); + if (cur_hcom_stream_id == pre_hcom_stream_id) { + orders.emplace_back(cur_cnode); + continue; } - // inner_active is not a if/else relationship with the next if/else. such as:StreamActive(S7)-->StreamActive(S8) - if (AnfAlgo::GetCNodeName(cur_cnode_ptr) == kStreamActiveOpName && - AnfAlgo::GetStreamDistinctionLabel(cur_cnode_ptr.get()) != UINT32_MAX) { - // 2)outter stream assign, update active op - update_cnode_list.emplace_back(cur_cnode_ptr); - UpdateStreamActive(cur_cnode_ptr); - } else if (AnfAlgo::GetCNodeName(cur_cnode_ptr) == kStreamSwitchOpName) { - // 3)update switch op - MS_LOG(INFO) << "Insert active op after switch"; - CNodePtr active_ptr = KernelAdjust::GetInstance().CreateStreamActiveOp(graph_ptr); - update_cnode_list.emplace_back(cur_cnode_ptr); - update_cnode_list.emplace_back(active_ptr); - UpdateStreamSwitch(cur_cnode_ptr, active_ptr); + if (i == first_index) { + // first fusion hcom + orders.emplace_back(cur_cnode); + auto send = CreateSendApplyKernel(graph_ptr, cur_event_id, cur_hcom_stream_id); + orders.emplace_back(send); + } else if (i == last_index) { + // last fusion hcom + auto recv = CreateRecvApplyKernel(graph_ptr, cur_event_id, cur_hcom_stream_id); + orders.emplace_back(recv); + orders.emplace_back(cur_cnode); + cur_event_id++; } else { - update_cnode_list.emplace_back(cur_cnode_ptr); + auto recv = CreateRecvApplyKernel(graph_ptr, cur_event_id, cur_hcom_stream_id); + orders.emplace_back(recv); + cur_event_id++; + orders.emplace_back(cur_cnode); + auto send = CreateSendApplyKernel(graph_ptr, cur_event_id, cur_hcom_stream_id); + orders.emplace_back(send); } + pre_hcom_stream_id = cur_hcom_stream_id; + } + std::copy(cnode_ptr_list.begin() + last_index + 1, cnode_ptr_list.end(), std::back_inserter(orders)); + graph_ptr->set_execution_order(orders); + total_event_num_ = cur_event_id; + MS_LOG(INFO) << "after indsert between allreduce, total event nums[" << total_event_num_ << "]\n end"; +} - pre_stream_id = cur_stream_id; - pre_cnode_ptr = cur_cnode_ptr; +void AscendStreamAssign::InsertSendRecvForHcomParallel(const shared_ptr &graph_ptr) { + MS_LOG(INFO) << "start"; + MS_EXCEPTION_IF_NULL(graph_ptr); + auto cnode_ptr_list = graph_ptr->execution_order(); + vector cnodes = cnode_ptr_list; + uint32_t cur_event_id = 0; + auto it = cnodes.begin(); + while (it != cnodes.end() && (it + 1) != cnodes.end()) { + MS_EXCEPTION_IF_NULL(*it); + MS_EXCEPTION_IF_NULL(*(it + 1)); + if (IsHcom(*it) && !IsHcom(*(it + 1))) { + bool is_fusion = IsFusionHcom(*it); + if (!is_fusion) { + ++it; + continue; + } + CNodePtr send_cnode_ptr = CreateSendApplyKernel(graph_ptr, cur_event_id, AnfAlgo::GetStreamId(*it)); + it = cnodes.insert(it + 1, send_cnode_ptr); + + auto target = FindTargetOp(it, cnodes.end(), *(it - 1)); + if (target == cnodes.end()) { + MS_LOG(WARNING) << "hcom node[" << (*(it - 1))->fullname_with_scope() + << "] can't find target for insert recv op, no insert send/recv"; + it = cnodes.erase(it); + continue; + } + + // deal recv op + uint32_t stream_id = AnfAlgo::GetStreamId(*target); + CNodePtr recv_cnode_ptr = CreateRecvApplyKernel(graph_ptr, cur_event_id, stream_id); + (void)cnodes.insert(target, recv_cnode_ptr); + ++cur_event_id; + } + ++it; } - graph_ptr->set_execution_order(update_cnode_list); + graph_ptr->set_execution_order(cnodes); + total_event_num_ = cur_event_id; + MS_LOG(INFO) << "after insert send/recv for hcom parallel, total event nums[" << total_event_num_ << "]"; + + // Insert Send/Recv between Hcom(such as:AllReduce1 Send1 Common Recv1 AllReduce2) + InsertSendRecvForDiffHcom(graph_ptr); MS_LOG(INFO) << "end"; } @@ -451,70 +540,23 @@ void AscendStreamAssign::UpdateEventId(const shared_ptr &g } } -void AscendStreamAssign::UpdateStreamId(const shared_ptr &graph_ptr) { - MS_LOG(INFO) << "start"; - MS_EXCEPTION_IF_NULL(graph_ptr); - CNodePtr cur_cnode_ptr = nullptr; - auto cnode_ptr_list = graph_ptr->execution_order(); - for (size_t i = 0; i < cnode_ptr_list.size(); ++i) { - cur_cnode_ptr = cnode_ptr_list[i]; - MS_EXCEPTION_IF_NULL(cur_cnode_ptr); - uint32_t cur_stream_id = AnfAlgo::GetStreamId(cur_cnode_ptr); - if (cur_stream_id < kIndependFirstStreamId) { - if (AnfAlgo::GetCNodeName(cur_cnode_ptr) == kStreamActiveOpName) { - auto primitive = AnfAlgo::GetCNodePrimitive(cur_cnode_ptr); - MS_EXCEPTION_IF_NULL(primitive); - vector active_ids = GetValue>(primitive->GetAttr(kAttrActiveStreamList)); - for (size_t j = 0; j < active_ids.size(); j++) { - if (active_ids[j] >= kIndependFirstStreamId) { - active_ids[j] = active_ids[j] - kIndependFirstStreamId + total_common_stream_num_; - } - } - ValuePtr active_value = MakeValue>(active_ids); - AnfAlgo::SetNodeAttr(kAttrActiveStreamList, active_value, cur_cnode_ptr); - } - } else { - uint32_t update_id = cur_stream_id - kIndependFirstStreamId + total_common_stream_num_; - AnfAlgo::SetStreamId(update_id, cur_cnode_ptr.get()); - } - - // update AtomicAddrClean stream same witch the next node - if (i > 0 && AnfAlgo::GetCNodeName(cnode_ptr_list[i - 1]) == "AtomicAddrClean") { - MS_LOG(INFO) << "update AtomicAddrClean stream id from[" << AnfAlgo::GetStreamId(cnode_ptr_list[i - 1]) - << "] to [" << AnfAlgo::GetStreamId(cur_cnode_ptr) << "]"; - AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(cur_cnode_ptr), cnode_ptr_list[i - 1].get()); - } - } - - // update logic_to_independent_map_ - for (auto &indep : logic_to_independent_map_) { - if (indep.second >= kIndependFirstStreamId) { - indep.second = indep.second - kIndependFirstStreamId + total_common_stream_num_; - } - } - - // update independent_before_physic_id_ - for (auto &id : independent_before_physic_id_) { - if (id >= kIndependFirstStreamId) { - id = id - kIndependFirstStreamId + total_common_stream_num_; - } - } - - // update independent_id_ - independent_id_ = independent_id_ - kIndependFirstStreamId + total_common_stream_num_; - MS_LOG(INFO) << "end"; -} - void AscendStreamAssign::GetNeedActiveStreams(const shared_ptr &graph_ptr) { MS_EXCEPTION_IF_NULL(graph_ptr); CNodePtr cur_cnode_ptr = nullptr; auto cnode_ptr_list = graph_ptr->execution_order(); + // 1)stream witch kStreamNeedActivedFirst attr should be actived; for (size_t i = 0; i < cnode_ptr_list.size(); ++i) { cur_cnode_ptr = cnode_ptr_list[i]; MS_EXCEPTION_IF_NULL(cur_cnode_ptr); + ValuePtr value_ptr = nullptr; auto primitive = AnfAlgo::GetCNodePrimitive(cur_cnode_ptr); - MS_EXCEPTION_IF_NULL(primitive); - auto value_ptr = primitive->GetAttr(kStreamNeedActivedFirst); + if (primitive != nullptr) { + value_ptr = primitive->GetAttr(kStreamNeedActivedFirst); + } else { + auto func_graph = AnfAlgo::GetCNodeFuncGraphPtr(cur_cnode_ptr); + MS_EXCEPTION_IF_NULL(func_graph); + value_ptr = func_graph->get_attr(kStreamNeedActivedFirst); + } if (value_ptr == nullptr) { continue; } @@ -526,29 +568,15 @@ void AscendStreamAssign::GetNeedActiveStreams(const shared_ptr &graph_ptr) { - if (IsTaskSink()) { - ResetNew(); - ReorderIndependentOrders(graph_ptr); - AssignAllNodesStream(graph_ptr); - FindAllReduceParallel(graph_ptr); - InsertActiveNew(graph_ptr); - InsertSendRecvForHcomParallel(graph_ptr); - InsertSendRecvForIndependent(graph_ptr); - UpdateStreamId(graph_ptr); - UpdateEventId(graph_ptr); - GetNeedActiveStreams(graph_ptr); - MS_LOG(INFO) << "after finish stream assign"; - PrintGraphExeOrders(graph_ptr); + // 2)first stream 0 should be actived first; + need_first_active_streams_.emplace_back(0); - // Get info for D Model - generator::IRModelUtil::GetInstance().set_event_num(total_event_num()); - generator::IRModelUtil::GetInstance().set_stream_num(total_common_stream_num() + total_independ_stream_num()); - // Init to 1,temporarily - generator::IRModelUtil::GetInstance().set_batch_num(1); + // 3)independent stream:if has not been activate, push to need active vector + if (!independent_stream_activated_) { + for (auto &item : independent_stream_map_) { + need_first_active_streams_.emplace_back(item.first); + } } } @@ -659,33 +687,6 @@ void AscendStreamAssign::InsertSendRecvForIndependent(const shared_ptrfullname_with_scope() << " is independent, as inputs nums is zero"; - return true; - } - - auto inputs = node_ptr->inputs(); - for (size_t i = 1; i < inputs.size(); i++) { - if (!inputs[i]->isa()) { - return false; - } - } - MS_LOG(INFO) << "node " << node_ptr->fullname_with_scope() << " is independent, as inputs is all value node"; - return true; -} - bool AscendStreamAssign::IsTaskSink() { auto ms_context = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(ms_context); @@ -699,63 +700,60 @@ bool AscendStreamAssign::IsTaskSink() { } void AscendStreamAssign::GetWaitStreams(vector *wait_active_stream_list) { - if (total_common_stream_num_ == 0) { + MS_EXCEPTION_IF_NULL(wait_active_stream_list); + AscendStreamMng &stream_manager = AscendStreamMng::GetInstance(); + uint32_t total_stream_num = stream_manager.GetCurAllocStreamNum(); + if (total_stream_num == 0) { MS_LOG(INFO) << "total_common_stream_num is zero"; return; } // common stream:active first common stream - MS_LOG(INFO) << "active physic id[" << first_physic_id_ << "]"; - for (uint32_t i = first_physic_id_ + 1; i < total_common_stream_num_; i++) { + for (uint32_t i = 0; i < total_stream_num; i++) { auto it = std::find(need_first_active_streams_.begin(), need_first_active_streams_.end(), i); if (it == need_first_active_streams_.end()) { MS_LOG(INFO) << "wait common stream id = " << i; (*wait_active_stream_list).push_back(i); } } +} - // all independ stream id before first physical stream id should be actived - auto it = logic_to_independent_map_.find(first_logic_id_); - if (it != logic_to_independent_map_.end()) { - uint32_t independent_id = it->second; - auto res = std::find(independent_before_physic_id_.begin(), independent_before_physic_id_.end(), independent_id); - if (res == independent_before_physic_id_.end()) { - // first physical to independ id may be not in independent_before_physic_id_ - independent_before_physic_id_.push_back(independent_id); - } - MS_LOG(INFO) << "active independent id[" << independent_id << "]"; +bool AscendStreamAssign::IsHcom(const CNodePtr &apply_kernel) { + MS_EXCEPTION_IF_NULL(apply_kernel); + return AnfAlgo::GetKernelType(apply_kernel) == HCCL_KERNEL; +} + +bool AscendStreamAssign::IsFusionHcom(const CNodePtr &cur_cnode_ptr) { + MS_EXCEPTION_IF_NULL(cur_cnode_ptr); + bool is_hcom = IsHcom(cur_cnode_ptr); + if (!is_hcom) { + return false; } - uint32_t max_before_physic = 0; - for (size_t i = 0; i < independent_before_physic_id_.size(); i++) { - if (independent_before_physic_id_[i] > max_before_physic) { - max_before_physic = independent_before_physic_id_[i]; - } - MS_LOG(INFO) << "independent id[" << independent_before_physic_id_[i] << "] before first physic is active"; + if (!AnfAlgo::HasNodeAttr(kAttrFusion, cur_cnode_ptr)) { + return false; } - for (uint32_t i = 0; i < total_independ_stream_num_; i++) { - if (i + total_common_stream_num_ <= max_before_physic) { - continue; - } - // all wait streams should not in need_first_active_streams_ - auto iter = - std::find(need_first_active_streams_.begin(), need_first_active_streams_.end(), i + total_common_stream_num_); - if (iter == need_first_active_streams_.end()) { - MS_LOG(INFO) << "wait independent stream id:" << i + total_common_stream_num_; - (*wait_active_stream_list).push_back(i + total_common_stream_num_); - } + if (AnfAlgo::GetNodeAttr(cur_cnode_ptr, kAttrFusion) == 0) { + return false; + } + + return true; +} + +void AscendStreamAssign::GetHcomStreams(std::vector *streams) { + MS_EXCEPTION_IF_NULL(streams); + for (const auto &stream : hcom_stream_list_) { + (*streams).emplace_back(stream); } } -uint32_t AscendStreamAssign::GetTotalStreamNum() const { return total_common_stream_num_ + total_independ_stream_num_; } void AscendStreamAssign::ReorderIndependentOrders(const shared_ptr &graph_ptr) { MS_EXCEPTION_IF_NULL(graph_ptr); CNodePtr cur_cnode_ptr = nullptr; std::vector exe_orders; std::vector independents; std::vector others; - auto cnode_ptr_list = graph_ptr->execution_order(); MS_LOG(INFO) << "before reorder, graph orders size:" << cnode_ptr_list.size(); for (size_t i = 0; i < cnode_ptr_list.size(); ++i) { @@ -767,68 +765,52 @@ void AscendStreamAssign::ReorderIndependentOrders(const shared_ptrset_execution_order(exe_orders); + if (others.empty() || independents.empty()) { + MS_LOG(INFO) << "independent or others is empty, no need reorder"; return; } - if (independents.empty()) { - std::copy(others.begin(), others.end(), std::back_inserter(exe_orders)); - graph_ptr->set_execution_order(exe_orders); - return; - } - - std::vector processed; + std::set processed; for (size_t i = 0; i < others.size(); i++) { auto begin = others.begin() + i; auto end = begin + 1; bool flag = false; for (size_t j = 0; j < independents.size(); j++) { auto cur_independent = independents[j]; - auto it = std::find(processed.begin(), processed.end(), cur_independent); + auto it = std::find(processed.begin(), processed.end(), cur_independent.get()); if (it != processed.end()) { continue; } - auto res = FindTargetOp(begin, end, cur_independent); if (res != end) { flag = true; exe_orders.emplace_back(cur_independent); exe_orders.emplace_back(*begin); - processed.emplace_back(cur_independent); + processed.emplace(cur_independent.get()); break; } } - if (!flag) { exe_orders.emplace_back(*begin); } } - MS_LOG(INFO) << "after reorder, graph orders size:" << exe_orders.size(); + if (processed.size() != independents.size()) { + MS_LOG(WARNING) << "processed independent nodes size is not equal to exiting independent nodes size"; + return; + } + graph_ptr->set_execution_order(exe_orders); } -void AscendStreamAssign::PrintGraphExeOrders(const shared_ptr &graph_ptr) { - MS_EXCEPTION_IF_NULL(graph_ptr); - auto cnode_ptr_list = graph_ptr->execution_order(); - for (size_t i = 0; i < cnode_ptr_list.size(); ++i) { - CNodePtr cur_cnode_ptr = cnode_ptr_list[i]; - MS_EXCEPTION_IF_NULL(cur_cnode_ptr); - if (AnfAlgo::GetCNodeName(cur_cnode_ptr) == kSendOpName || AnfAlgo::GetCNodeName(cur_cnode_ptr) == kRecvOpName) { - auto primitive = AnfAlgo::GetCNodePrimitive(cur_cnode_ptr); - MS_LOG(INFO) << "node name[" << AnfAlgo::GetCNodeName(cur_cnode_ptr) << "], logic id[" - << AnfAlgo::GetStreamDistinctionLabel(cur_cnode_ptr.get()) << "], stream id[" - << AnfAlgo::GetStreamId(cur_cnode_ptr) << "], event_id[" - << GetValue(primitive->GetAttr(kAttrEventId)) << "]"; - } else { - MS_LOG(INFO) << "node name[" << cur_cnode_ptr->fullname_with_scope() << "], logic id[" - << AnfAlgo::GetStreamDistinctionLabel(cur_cnode_ptr.get()) << "], stream id[" - << AnfAlgo::GetStreamId(cur_cnode_ptr) << "]"; - } - } +void AscendStreamAssign::Reset() { + total_event_num_ = 0; + independent_stream_activated_ = false; + independent_stream_map_.clear(); + processed_streams_.clear(); + hcom_stream_list_.clear(); + need_first_active_streams_.clear(); + inner_parallel_streams_.clear(); } } // namespace ascend } // namespace device diff --git a/mindspore/ccsrc/device/ascend/ascend_stream_assign.h b/mindspore/ccsrc/device/ascend/ascend_stream_assign.h old mode 100755 new mode 100644 index b6f6bfd479..bb918cfc79 --- a/mindspore/ccsrc/device/ascend/ascend_stream_assign.h +++ b/mindspore/ccsrc/device/ascend/ascend_stream_assign.h @@ -19,6 +19,8 @@ #include #include +#include +#include #include #include #include @@ -36,6 +38,36 @@ using std::shared_ptr; using std::unordered_map; using std::unordered_set; using std::vector; +using CnodeKey = void *; +const uint32_t kInvalidStreamId = UINT32_MAX; +class AscendStreamMng { + public: + static AscendStreamMng &GetInstance() { + static AscendStreamMng instance; + return instance; + } + + void Reset() { + cur_stream_id = 0; + cur_stream_num = 0; + } + uint32_t ApplyNewStream() { + if (!cur_stream_num) { + cur_stream_num++; + return cur_stream_id; + } + cur_stream_num++; + cur_stream_id++; + return cur_stream_id; + } + + uint32_t GetCurAllocStream() { return cur_stream_id; } + uint32_t GetCurAllocStreamNum() { return cur_stream_num; } + + private: + uint32_t cur_stream_num{0}; + uint32_t cur_stream_id{0}; +}; class AscendStreamAssign { public: @@ -47,22 +79,11 @@ class AscendStreamAssign { AscendStreamAssign(const AscendStreamAssign &) = delete; AscendStreamAssign &operator=(const AscendStreamAssign &) = delete; - uint32_t GetTotalStreamNum() const; - // new stream policy - uint32_t total_common_stream_num() const { return total_common_stream_num_; } - uint32_t total_independ_stream_num() const { return total_independ_stream_num_; } uint32_t total_event_num() const { return total_event_num_; } + void GetHcomStreams(std::vector *streams); - void InsertActiveNew(const std::shared_ptr &graph_ptr); - void AssignAllNodesStream(const std::shared_ptr &graph_ptr); - void ResetNew(); - void AssignStreamNew(const std::shared_ptr &graph_ptr); - bool IsIndependentNode(const CNodePtr &node_ptr); - const std::unordered_map &logic_to_independent_map() { return logic_to_independent_map_; } - const std::unordered_map &logic_to_physic_map() { return logic_to_physic_map_; } - const std::vector> &inner_parallel_streams() { return inner_parallel_streams_; } + void AssignStream(const std::shared_ptr &graph_ptr); void GetWaitStreams(vector *wait_active_stream_list); - const std::vector &hcom_streams() { return hcom_stream_list_; } CNodePtr CreateSendApplyKernel(const std::shared_ptr &graph_ptr, uint32_t event_id, uint32_t stream_id); CNodePtr CreateRecvApplyKernel(const std::shared_ptr &graph_ptr, uint32_t event_id, @@ -71,49 +92,41 @@ class AscendStreamAssign { private: AscendStreamAssign() = default; ~AscendStreamAssign() = default; - - vector::iterator FindTargetOp(vector::iterator begin, vector::iterator end, - const CNodePtr &node); - - bool IsHcom(const CNodePtr &apply_kernel); - bool IsProcessed(uint32_t logic_id); - void TransLogicToPhysic(const vector &logic_ids, vector *physic_ids); + void Reset(); + void CheckStreamAssign(const std::shared_ptr &graph_ptr); + void AssignAllNodesStream(const std::shared_ptr &graph_ptr); void AssignCommonStreamId(const CNodePtr &cur_cnode_ptr, CNodePtr *pre_cnode_ptr, uint32_t *cur_index, uint32_t *cur_stream_id); - void RecordIdMap(uint32_t logic_id, uint32_t physic_id); - void UpdateStreamActive(const CNodePtr &active_ptr); - void UpdateStreamSwitch(const CNodePtr &switch_ptr, const CNodePtr &active_ptr); - bool IsTaskSink(); - void AssignIndependentStreamId(const CNodePtr &cur_cnode_ptr, uint32_t deal_logic_id); - void UpdateStreamId(const std::shared_ptr &graph_ptr); - void UpdateEventId(const std::shared_ptr &graph_ptr); - void PrintGraphExeOrders(const std::shared_ptr &graph_ptr); - void RecordFirstCommonOp(const CNodePtr &cur_cnode_ptr, uint32_t cur_node_logic_id, uint32_t cur_stream_id); - uint32_t GetLogicId(const CNodePtr &cur_cnode_ptr); - void SetCommonStreamNum(uint32_t cur_stream_id); - void FindAllReduceParallel(const std::shared_ptr &graph_ptr); - bool IsProcessedParallelStream(uint32_t stream_id); - void GetParallelStream(uint32_t cur_stream_id, uint32_t stream_acitve_id, std::vector *parallel_streams); + void AssignIndependentStreamId(const CNodePtr &cur_cnode_ptr); + void UpdateAtomicAddrCleanStreamId(const std::shared_ptr &graph_ptr); + void FindHcomParallelStreams(const std::shared_ptr &graph_ptr); + void InsertStreamActive(const std::shared_ptr &graph_ptr); + void UpdateStreamSwitch(const std::shared_ptr &graph_ptr, const CNodePtr &switch_ptr, + const vector &independent_stream, vector *orders); void InsertSendRecvForIndependent(const std::shared_ptr &graph_ptr); void InsertSendRecvForHcomParallel(const std::shared_ptr &graph_ptr); + void InsertSendRecvForDiffHcom(const shared_ptr &graph_ptr); + void UpdateEventId(const std::shared_ptr &graph_ptr); void GetNeedActiveStreams(const std::shared_ptr &graph_ptr); void ReorderIndependentOrders(const std::shared_ptr &graph_ptr); - uint32_t total_common_stream_num_{0}; - uint32_t total_independ_stream_num_{0}; - uint32_t total_event_num_{0}; + bool IsTaskSink(); + bool IsFusionHcom(const CNodePtr &cur_cnode_ptr); + bool IsHcom(const CNodePtr &cur_cnode_ptr); + bool IsIndependentNode(const CNodePtr &node_ptr); + bool IsProcessedStream(uint32_t stream_id); + vector::iterator FindTargetOp(vector::iterator begin, vector::iterator end, + const CNodePtr &node); + void GetParallelStream(uint32_t cur_stream_id, uint32_t stream_acitve_id, std::vector *parallel_streams); - uint32_t first_physic_id_{UINT32_MAX}; - uint32_t first_logic_id_{UINT32_MAX}; - uint32_t independent_id_{UINT32_MAX}; - vector processed_logic_id_{}; - std::unordered_map logic_to_physic_map_{}; // key:logic id, value: first physic id - std::unordered_map logic_to_independent_map_{}; // key:logic id, value: dependent id - std::vector independent_before_physic_id_{}; // record independent id before first physic id - std::vector> inner_parallel_streams_{}; - std::vector processed_parallel_streams_{}; - std::vector hcom_stream_list_{}; + uint32_t total_event_num_{0}; + bool independent_stream_activated_{false}; + std::map independent_stream_map_{}; + std::set processed_streams_{}; + std::set hcom_stream_list_{}; std::vector need_first_active_streams_{}; + std::vector> inner_parallel_streams_{}; + // new policy end }; } // namespace ascend diff --git a/mindspore/ccsrc/device/ascend/kernel_build_ascend.cc b/mindspore/ccsrc/device/ascend/kernel_build_ascend.cc index afce5f3607..81d5be6731 100644 --- a/mindspore/ccsrc/device/ascend/kernel_build_ascend.cc +++ b/mindspore/ccsrc/device/ascend/kernel_build_ascend.cc @@ -26,10 +26,12 @@ #include "kernel/kernel.h" #include "kernel/tbe/tbe_kernel_build.h" #include "kernel/tbe/tbe_kernel_parallel_build.h" +#include "kernel/akg/ascend/akg_ascend_kernel_build.h" #include "kernel/aicpu/aicpu_kernel_build.h" #include "kernel/hccl/hccl_kernel_build.h" #include "kernel/rts/rt_kernel_build.h" #include "kernel/tbe/tbe_utils.h" +#include "kernel/common_utils.h" #include "operator/ops.h" #include "session/anf_runtime_algorithm.h" #include "./common.h" @@ -62,9 +64,36 @@ static kernel::KernelModPtr SerialCompileImpl(const AnfNodePtr &anf_node) { return kernel_mod_ptr; } +static bool KernelPreBuildParallelCompile(const mindspore::session::KernelGraph *kernel_graph_ptr) { + MS_EXCEPTION_IF_NULL(kernel_graph_ptr); + std::vector tbe_nodes; + for (const auto &anf_node : kernel_graph_ptr->execution_order()) { + MS_EXCEPTION_IF_NULL(anf_node); + if (!AnfAlgo::IsRealKernel(anf_node)) { + continue; + } + KernelType kernel_type = AnfAlgo::GetKernelType(anf_node); + switch (kernel_type) { + case KernelType::TBE_KERNEL: { + if (AnfAlgo::GetKernelMod(anf_node) == nullptr && + AnfAlgo::GetFusionType(anf_node) == kernel::FusionType::DYNAMIC) { + tbe_nodes.push_back(anf_node); + } + break; + } + default: { + break; + } + } + } + bool ret = kernel::TbeOpParallelPreBuild(tbe_nodes); + return ret; +} + static bool KernelBuildParallelCompile(const mindspore::session::KernelGraph *kernel_graph_ptr) { MS_EXCEPTION_IF_NULL(kernel_graph_ptr); std::vector tbe_nodes; + std::vector akg_nodes; std::vector other_nodes; for (const auto &anf_node : kernel_graph_ptr->execution_order()) { MS_EXCEPTION_IF_NULL(anf_node); @@ -79,43 +108,52 @@ static bool KernelBuildParallelCompile(const mindspore::session::KernelGraph *ke } break; } + case KernelType::AKG_KERNEL: { + akg_nodes.push_back(anf_node); + break; + } default: { other_nodes.push_back(anf_node); break; } } } - bool ret = kernel::TbeOpParallelBuild(tbe_nodes); + bool tbe_ret = kernel::TbeOpParallelBuild(tbe_nodes); + bool akg_ret = kernel::AkgAscendKernelParallelBuild(akg_nodes); + auto bin_map = kernel::tbe::KernelMeta::GetInstance(); + (void)bin_map->ReadIndex(kernel::kCceKernelMeta); for (const auto &anf_node : other_nodes) { kernel::KernelModPtr kernel_mod_ptr = SerialCompileImpl(anf_node); MS_EXCEPTION_IF_NULL(kernel_mod_ptr); AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get()); } - return ret; + return tbe_ret && akg_ret; } -static std::vector CalCleanZerosSize(const CNodePtr &pre_node) { +static std::vector CalCleanZerosSize(const CNodePtr &pre_node) { MS_EXCEPTION_IF_NULL(pre_node); - std::vector clean_size_list; + auto kernel_mod = AnfAlgo::GetKernelMod(pre_node); + MS_EXCEPTION_IF_NULL(kernel_mod); + std::vector clean_size_list; // clean output - if (AnfAlgo::HasNodeAttr(kAttrAutomicOutputIndexs, pre_node)) { - auto clean_output_indexs = AnfAlgo::GetNodeAttr>(pre_node, kAttrAutomicOutputIndexs); - for (auto index : clean_output_indexs) { - TypeId output_type_id = AnfAlgo::GetOutputDeviceDataType(pre_node, index); - size_t type_size = GetTypeByte(TypeIdToType(output_type_id)); - std::vector shape = AnfAlgo::GetOutputDeviceShape(pre_node, index); - auto size = std::accumulate(shape.begin(), shape.end(), type_size, std::multiplies()); - clean_size_list.push_back((size + kMemAlignSize + 31) / kMemAlignSize * kMemAlignSize); + if (AnfAlgo::HasNodeAttr(kAttrAtomicOutputIndexs, pre_node)) { + auto output_indexs = AnfAlgo::GetNodeAttr>(pre_node, kAttrAtomicOutputIndexs); + auto output_men_size = kernel_mod->GetOutputSizeList(); + for (auto index : output_indexs) { + auto clean_item = (output_men_size.at(index) + kMemAlignSize + 31) / kMemAlignSize * kMemAlignSize; + clean_size_list.emplace_back(clean_item); } } // clean workspace - auto workspaces_size = 0; - if (AnfAlgo::HasNodeAttr(kAttrAutomicWorkspaceSize, pre_node)) { - workspaces_size = AnfAlgo::GetNodeAttr(pre_node, kAttrAutomicWorkspaceSize); - clean_size_list.push_back(workspaces_size); + if (AnfAlgo::HasNodeAttr(kAttrAtomicWorkspaceIndexs, pre_node)) { + auto workspace_indexs = AnfAlgo::GetNodeAttr>(pre_node, kAttrAtomicWorkspaceIndexs); + auto workspace_men_sizes = kernel_mod->GetWorkspaceSizeList(); + for (const auto &index : workspace_indexs) { + auto clean_item = (workspace_men_sizes.at(index) + kMemAlignSize + 31) / kMemAlignSize * kMemAlignSize; + clean_size_list.emplace_back(clean_item); + } } - MS_LOG(INFO) << "clear output size:" << clean_size_list.size() << ", workspace size:" << workspaces_size - << ",pre_node:" << pre_node->fullname_with_scope(); + MS_LOG(INFO) << "clear output size:" << clean_size_list.size() << ",pre_node:" << pre_node->fullname_with_scope(); return clean_size_list; } @@ -139,12 +177,12 @@ static void AddTbeClearZeroNode(mindspore::session::KernelGraph *const kernel_gr builder->SetKernelType(KernelType::TBE_KERNEL); AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), clear_zero.get()); auto clean_size = CalCleanZerosSize(pre_node); - AnfAlgo::SetNodeAttr(kAttrAutomicAddMemSize, MakeValue(clean_size), clear_zero); + AnfAlgo::SetNodeAttr(kAttrAtomicAddMemSize, MakeValue(clean_size), clear_zero); AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(pre_node.get()), clear_zero.get()); new_nodes->push_back(clear_zero); } -bool IsAtomicNode(const CNodePtr &kernel_node) { +static bool IsAtomicNode(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); auto kernel_mod = AnfAlgo::GetKernelMod(kernel_node); MS_EXCEPTION_IF_NULL(kernel_mod); @@ -152,40 +190,50 @@ bool IsAtomicNode(const CNodePtr &kernel_node) { if (parameters_indexs.empty()) { return false; } - auto atomic_flag = false; size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); - auto workspace_size_list = kernel_mod->GetWorkspaceSizeList(); size_t workspace_num = kernel_mod->GetWorkspaceSizeList().size(); - if (input_num + workspace_num + output_num > parameters_indexs.size()) { - size_t lossNum = (input_num + workspace_num + output_num) - parameters_indexs.size(); - for (size_t i = 0; i < lossNum; i++) { - parameters_indexs.push_back(0); - } + size_t param_num = parameters_indexs.size(); + size_t total_num = input_num + workspace_num + output_num; + MS_LOG(INFO) << "parameters size: " << param_num << ", input & workspace & output num: " << total_num; + size_t pad_index = param_num; + for (; pad_index < total_num; ++pad_index) { + parameters_indexs.emplace_back(0); } - std::vector clean_output_indexs; - // in parameters data sort as input->workspace->output - size_t index = 0; - while (index < output_num) { - if (parameters_indexs[input_num + workspace_num + index] == 1) { - atomic_flag = true; - clean_output_indexs.push_back(index); + // process input + for (size_t j = 0; j < input_num; ++j) { + if (parameters_indexs.at(j) == 1) { + MS_LOG(EXCEPTION) << "Atomic addr clean does't support clean input address, input index: " << j; } - index++; } - if (atomic_flag) { - AnfAlgo::SetNodeAttr(kAttrAutomicOutputIndexs, MakeValue(clean_output_indexs), kernel_node); + // process output + std::vector output_indexs; + for (size_t i = 0; i < output_num; ++i) { + auto param_output = parameters_indexs.at(input_num + workspace_num + i); + if (param_output == 1) { + output_indexs.emplace_back(i); + MS_LOG(INFO) << "Atomic clear output index: " << i; + } } - for (size_t i = 0; i < workspace_num; ++i) { - if (parameters_indexs[input_num + i] == 1) { - atomic_flag = true; - AnfAlgo::SetNodeAttr(kAttrAutomicWorkspaceSize, - MakeValue(std::accumulate(workspace_size_list.begin(), workspace_size_list.end(), 0)), - kernel_node); - break; + AnfAlgo::SetNodeAttr(kAttrAtomicOutputIndexs, MakeValue(output_indexs), kernel_node); + // process workspace + std::vector workspace_indexs; + for (size_t k = 0; k < workspace_num; ++k) { + auto param_workspace = parameters_indexs.at(input_num + k); + if (param_workspace == 1) { + workspace_indexs.emplace_back(k); + MS_LOG(INFO) << "Atomic clear workspace index: " << k; } } - return atomic_flag; + AnfAlgo::SetNodeAttr(kAttrAtomicWorkspaceIndexs, MakeValue(workspace_indexs), kernel_node); + + return !(workspace_indexs.empty() && output_indexs.empty()); +} + +bool KernelPreBuild(const mindspore::session::KernelGraph *kernel_graph_ptr) { + MS_EXCEPTION_IF_NULL(kernel_graph_ptr); + bool ret = device::ascend::KernelPreBuildParallelCompile(kernel_graph_ptr); + return ret; } bool KernelBuild(const mindspore::session::KernelGraph *kernel_graph_ptr) { @@ -202,7 +250,7 @@ void KernelBuildPreprocess(mindspore::session::KernelGraph *kernel_graph) { for (const auto &anf_node : kernel_graph->execution_order()) { std::string apply_function_name = AnfAlgo::GetCNodeName(anf_node); if (apply_function_name == prim::kPrimMaxPoolGrad->name() && - AnfAlgo::GetKernelType(anf_node) == KernelType::AUTO_DIFF_KERNEL) { + AnfAlgo::GetKernelType(anf_node) == KernelType::AKG_KERNEL) { auto clear_zero_prim = std::make_shared(kClearZeroOpName); MS_EXCEPTION_IF_NULL(clear_zero_prim); auto new_value_node = NewValueNode(clear_zero_prim); diff --git a/mindspore/ccsrc/device/ascend/kernel_build_ascend.h b/mindspore/ccsrc/device/ascend/kernel_build_ascend.h index 5dea36a183..d987b6ce7a 100644 --- a/mindspore/ccsrc/device/ascend/kernel_build_ascend.h +++ b/mindspore/ccsrc/device/ascend/kernel_build_ascend.h @@ -22,6 +22,10 @@ namespace mindspore { namespace device { namespace ascend { +/** + * @brief kernel pre build for ascend. + */ +bool KernelPreBuild(const mindspore::session::KernelGraph *kernel_graph_ptr); /** * @brief kernel build for ascend. */ diff --git a/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc b/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc index 6e6e7419fd..4e56721fe0 100644 --- a/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc +++ b/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc @@ -15,18 +15,27 @@ */ #include "device/ascend/kernel_select_ascend.h" + #include #include #include #include +#include #include -#include "kernel/oplib/oplib.h" -#include "kernel/kernel_query.h" +#include +#include + +#include "common/utils.h" +#include "debug/anf_ir_dump.h" +#include "operator/ops.h" +#include "ir/func_graph.h" +#include "utils/context/ms_context.h" #include "session/anf_runtime_algorithm.h" +#include "device/kernel_info.h" +#include "kernel/common_utils.h" +#include "kernel/kernel_query.h" +#include "kernel/oplib/oplib.h" #include "kernel/kernel_build_info.h" -#include "utils/context/ms_context.h" -#include "operator/ops.h" -#include "debug/anf_ir_dump.h" namespace mindspore { namespace device { @@ -45,7 +54,6 @@ enum MatchCountPriority : int { MATCH_COUNT_PRIORITY_END }; -const size_t kMaxCount = 0xffffffff; const int kUnSupportMixedDataTypeIndex = -1; bool MatchInferOutputDataType(const CNodePtr &cnode, const kernel::KernelBuildInfo &kernel_build_info) { @@ -73,7 +81,7 @@ string GetPriorityMatchFormat(const CNodePtr &cnode) { for (size_t index = 0; index < AnfAlgo::GetInputTensorNum(cnode); ++index) { auto pre_output_format = AnfAlgo::GetPrevNodeOutputFormat(cnode, index); if (AnfAlgo::IsFeatureMapInput(cnode, index) && - kNeedTransFormatSet.find(pre_output_format) != kNeedTransFormatSet.end()) { + kHWSpecialFormatSet.find(pre_output_format) != kHWSpecialFormatSet.end()) { priority_matched_format = !is_init ? pre_output_format : priority_matched_format; is_init = true; } @@ -91,14 +99,14 @@ string GetPriorityMatchFormat(const CNodePtr &cnode) { return priority_matched_format; } /** - * compare two vector by priority, select a better vector, like compare two num, first compare highest num location, + * Compare two vector by priority, select a better vector, like compare two num, first compare highest num location, * if equal then next num location * example:[3,1,1,1] > [2,2,2,2] > [2,2,1,2] > [2,1,1,3] */ bool PriorityChooseItem(const std::vector &cur_item, std::vector *best_item) { MS_EXCEPTION_IF_NULL(best_item); if (cur_item.size() != best_item->size()) { - MS_LOG(ERROR) << "item size should be same!"; + MS_LOG(ERROR) << "Item size should be same!"; return false; } // Update the best_item by comparing the cur_item and best_item @@ -124,12 +132,23 @@ void UpdateCurMatchCounts(const kernel::KernelBuildInfo &kernel_build_info, cons } auto pri_match_format = GetPriorityMatchFormat(kernel_node); for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(kernel_node); ++input_index) { + auto input_anf_node = kernel_node->input(input_index + 1); + // we do not take ValueNode into consideration in graph kernel. + if (kernel_build_info.kernel_type() == KernelType::AKG_KERNEL) { + if (input_anf_node->isa() && AnfAlgo::GetOutputDeviceDataType(input_anf_node, 0) == kTypeUnknown) { + continue; + } + } auto base_score = AnfAlgo::IsFeatureMapInput(kernel_node, input_index) ? kFeatureMapBaseScore : kWegihtBaseScore; if (kernel_build_info.GetInputFormat(input_index) == AnfAlgo::GetPrevNodeOutputFormat(kernel_node, input_index)) { (*cur_kernelinfo_match_counts)[MATCH_FORMAT_COUNT] += base_score; } - if (kernel_build_info.GetInputDeviceType(input_index) == - AnfAlgo::GetPrevNodeOutputDeviceDataType(kernel_node, input_index)) { + // we match output fix precision first. + auto prev_device_type = AnfAlgo::GetPrevNodeOutputPrecision(kernel_node, input_index); + if (prev_device_type == kTypeUnknown) { + prev_device_type = AnfAlgo::GetPrevNodeOutputDeviceDataType(kernel_node, input_index); + } + if (kernel_build_info.GetInputDeviceType(input_index) == prev_device_type) { (*cur_kernelinfo_match_counts)[MATCH_DTYPE_COUNT] += base_score; } if (kernel_build_info.GetInputFormat(input_index) == pri_match_format) { @@ -149,40 +168,6 @@ void UpdateCurMatchCounts(const kernel::KernelBuildInfo &kernel_build_info, cons } } -void SetTensorDeviceInfo(const kernel::KernelBuildInfo &selected_kernel_info, const CNodePtr &kernel_node) { - MS_EXCEPTION_IF_NULL(kernel_node); - for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(kernel_node); ++input_index) { - auto input_kernel_node = AnfAlgo::GetInputNode(kernel_node, input_index); - MS_EXCEPTION_IF_NULL(input_kernel_node); - auto input_with_index = AnfAlgo::VisitKernel(input_kernel_node, 0); - MS_EXCEPTION_IF_NULL(input_with_index.first); - auto real_input_node = input_with_index.first; - if (real_input_node->isa()) { - continue; - } - std::shared_ptr builder = - std::make_shared(); - bool is_ref = false; - auto op_info = mindspore::kernel::OpLib::FindOp(AnfAlgo::GetCNodeName(kernel_node), kernel::kTBE); - if (op_info != nullptr) { - is_ref = op_info->is_ref(); - } - MS_EXCEPTION_IF_NULL(MsContext::GetInstance()); - if (MsContext::GetInstance()->execution_mode() == kPynativeMode && - AnfAlgo::GetOutputDeviceDataType(real_input_node, 0) != kTypeUnknown) { - continue; - } - // we set special device info of a input tensor. - if (AnfAlgo::GetOutputDeviceDataType(real_input_node, 0) == kTypeUnknown || is_ref) { - std::vector output_format = {selected_kernel_info.GetInputFormat(input_index)}; - builder->SetOutputsFormat(output_format); - std::vector output_type = {AnfAlgo::GetOutputInferDataType(real_input_node, 0)}; - builder->SetOutputsDeviceType(output_type); - AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), real_input_node.get()); - } - } -} - void AddSupportMixedPrecisionDataTypeIndex(TypeId data_type, std::vector *support_index) { MS_EXCEPTION_IF_NULL(support_index); int index = kUnSupportMixedDataTypeIndex; @@ -221,6 +206,7 @@ void AddNodeInputDataType(const CNodePtr &kernel_node, size_t input_index, std::vector *node_mix_precision_datatype) { AnfNodePtr cur_input = AnfAlgo::GetInputNode(kernel_node, input_index); MS_EXCEPTION_IF_NULL(cur_input); + MS_EXCEPTION_IF_NULL(node_mix_precision_datatype); TypeId input_origin_type = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, input_index); AddSupportMixedPrecisionDataTypeIndex(input_origin_type, node_mix_precision_datatype_index); node_mix_precision_datatype->push_back(input_origin_type); @@ -229,6 +215,7 @@ void AddNodeInputDataType(const CNodePtr &kernel_node, size_t input_index, void AddNodeOutputDataType(const CNodePtr &kernel_node, size_t output_index, std::vector *node_mix_precision_datatype_index, std::vector *node_mix_precision_datatype) { + MS_EXCEPTION_IF_NULL(node_mix_precision_datatype); auto output_origin_type = AnfAlgo::GetOutputInferDataType(kernel_node, output_index); AddSupportMixedPrecisionDataTypeIndex(output_origin_type, node_mix_precision_datatype_index); node_mix_precision_datatype->push_back(output_origin_type); @@ -239,12 +226,12 @@ void CheckDataTypeInputs(const std::vector &node_mix_precision_datatype_ind const std::map> &kernel_support_datatypes, std::map> *kernel_match_datatype_idx) { if (node_mix_precision_datatype_index.size() != node_mix_precision_datatype.size()) { - MS_LOG(EXCEPTION) << "node datatype index size " << node_mix_precision_datatype_index.size() << " != datatype size " + MS_LOG(EXCEPTION) << "Node datatype index size " << node_mix_precision_datatype_index.size() << " != datatype size " << node_mix_precision_datatype.size(); } MS_EXCEPTION_IF_NULL(kernel_match_datatype_idx); if (kernel_support_datatypes.size() != kernel_match_datatype_idx->size()) { - MS_LOG(EXCEPTION) << "kernel datatype index size " << kernel_match_datatype_idx->size() << " != datatype size " + MS_LOG(EXCEPTION) << "Kernel datatype index size " << kernel_match_datatype_idx->size() << " != datatype size " << kernel_support_datatypes.size(); } } @@ -265,10 +252,10 @@ bool RaiseDataTypePrecisionSelect(const std::vector &node_mix_precision_dat if (node_mix_precision_datatype_index[i] == kUnSupportMixedDataTypeIndex) { auto find_iter = kernel_support_datatypes.find(iter->first); if (find_iter == kernel_support_datatypes.end()) { - MS_LOG(EXCEPTION) << "kernel datatype index:%lu can not be found " << iter->first; + MS_LOG(EXCEPTION) << "Kernel datatype index:%lu can not be found " << iter->first; } if (i >= find_iter->second.size()) { - MS_LOG(EXCEPTION) << "node index " << i << "kernel datatype size " << find_iter->second.size(); + MS_LOG(EXCEPTION) << "Node index " << i << "kernel datatype size " << find_iter->second.size(); } if (node_mix_precision_datatype[i] != find_iter->second[i]) { iter = kernel_match_datatype_idx->erase(iter); @@ -279,7 +266,7 @@ bool RaiseDataTypePrecisionSelect(const std::vector &node_mix_precision_dat } auto datatype_indexes = iter->second; if (i >= datatype_indexes.size()) { - MS_LOG(EXCEPTION) << "node datatype index: " << i << " kernel support size " << datatype_indexes.size(); + MS_LOG(EXCEPTION) << "Node datatype index: " << i << " kernel support size " << datatype_indexes.size(); } if (datatype_indexes[i] < node_mix_precision_datatype_index[i]) { iter = kernel_match_datatype_idx->erase(iter); @@ -293,8 +280,12 @@ bool RaiseDataTypePrecisionSelect(const std::vector &node_mix_precision_dat bool CanDataTypeReduce(const std::vector &datatype_indexes, int check_index, const std::vector &node_mix_precision_datatype_index) { - return datatype_indexes[check_index] != kUnSupportMixedDataTypeIndex && - datatype_indexes[check_index] <= node_mix_precision_datatype_index[check_index]; + auto check_index_tmp = IntToSize(check_index); + if (check_index_tmp < datatype_indexes.size() && check_index_tmp < node_mix_precision_datatype_index.size()) { + return datatype_indexes[check_index] != kUnSupportMixedDataTypeIndex && + datatype_indexes[check_index] <= node_mix_precision_datatype_index[check_index]; + } + MS_LOG(EXCEPTION) << "Check index " << check_index << "is outof range"; } bool RaiseOrReduceDataTypePrecisionSelect(const std::vector &node_mix_precision_datatype_index, @@ -313,10 +304,10 @@ bool RaiseOrReduceDataTypePrecisionSelect(const std::vector &node_mix_preci if (node_mix_precision_datatype_index[i] == kUnSupportMixedDataTypeIndex) { auto find_iter = kernel_support_datatypes.find(iter->first); if (find_iter == kernel_support_datatypes.end()) { - MS_LOG(EXCEPTION) << "kernel datatype index:%lu can not be found " << iter->first; + MS_LOG(EXCEPTION) << "Kernel datatype index:%lu can not be found " << iter->first; } if (i >= find_iter->second.size()) { - MS_LOG(EXCEPTION) << "node index " << i << " >= kernel datatype size " << find_iter->second.size(); + MS_LOG(EXCEPTION) << "Node index " << i << " >= kernel datatype size " << find_iter->second.size(); } if (node_mix_precision_datatype[i] != find_iter->second[i]) { iter = kernel_match_datatype_idx->erase(iter); @@ -327,7 +318,7 @@ bool RaiseOrReduceDataTypePrecisionSelect(const std::vector &node_mix_preci } auto datatype_indexes = iter->second; if (i >= datatype_indexes.size()) { - MS_LOG(EXCEPTION) << "index " << i << "> kernel datatype indexes size " << datatype_indexes.size(); + MS_LOG(EXCEPTION) << "Index " << i << "> kernel datatype indexes size " << datatype_indexes.size(); } if (!CanDataTypeReduce(datatype_indexes, i, node_mix_precision_datatype_index)) { iter = kernel_match_datatype_idx->erase(iter); @@ -397,9 +388,9 @@ void PrintRaiseOrReducePrecisionSelectedInfo(const CNodePtr &cnode, std::ostringstream buffer; buffer << cnode->DebugString(); if (precision_reduce) { - buffer << " reduce precision, node datatype: \n"; + buffer << " Reduce precision, node datatype: \n"; } else { - buffer << " raise precision, node datatype: \n"; + buffer << " Raise precision, node datatype: \n"; } PrintInputAndOutputInferType(buffer, cnode); buffer << ", select kernel:" << selected_kernel_build_info->ToString(); @@ -415,8 +406,8 @@ std::shared_ptr ChooseMatchedKernelInfo( size_t selected_index = 0; for (size_t info_index = 0; info_index < kernel_info_list.size(); ++info_index) { std::vector cur_kernel_info_match_counts = {0, 0, 0, 0, 0}; - auto kernel_build_info = *(kernel_info_list[info_index]); - std::shared_ptr kernel_info_ptr = kernel_info_list[info_index]; + auto kernel_info_ptr = kernel_info_list[info_index]; + MS_EXCEPTION_IF_NULL(kernel_info_ptr); UpdateCurMatchCounts(*kernel_info_ptr, kernel_node, &cur_kernel_info_match_counts); // Currently the selection policy is the match format count first, and then is datatype counts. if (PriorityChooseItem(cur_kernel_info_match_counts, &most_match_counts)) { @@ -467,6 +458,51 @@ std::vector> FilterRaisedOrReducePrecis } } // namespace +void SetTensorDeviceInfo(const kernel::KernelBuildInfo &selected_kernel_info, const CNodePtr &kernel_node) { + MS_EXCEPTION_IF_NULL(kernel_node); + for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(kernel_node); ++input_index) { + auto input_kernel_node = AnfAlgo::GetInputNode(kernel_node, input_index); + MS_EXCEPTION_IF_NULL(input_kernel_node); + auto input_with_index = AnfAlgo::VisitKernel(input_kernel_node, 0); + MS_EXCEPTION_IF_NULL(input_with_index.first); + auto real_input_node = input_with_index.first; + if (real_input_node->isa()) { + continue; + } + if (real_input_node->isa() && !AnfAlgo::IsParameterWeight(real_input_node->cast())) { + continue; + } + auto builder = std::make_shared(); + if (IsValueNode(input_kernel_node) && + AnfAlgo::GetOutputDeviceDataType(input_kernel_node, 0) == kTypeUnknown) { + std::vector output_format = {selected_kernel_info.GetInputFormat(input_index)}; + builder->SetOutputsFormat(output_format); + std::vector output_type = {selected_kernel_info.GetInputDeviceType(input_index)}; + builder->SetOutputsDeviceType(output_type); + AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), input_kernel_node.get()); + continue; + } + // we set special device info of a input tensor. + bool is_ref = false; + auto op_info = kernel::OpLib::FindOp(AnfAlgo::GetCNodeName(kernel_node), kernel::kTBE); + if (op_info != nullptr) { + is_ref = op_info->is_ref(); + } + MS_EXCEPTION_IF_NULL(MsContext::GetInstance()); + if (MsContext::GetInstance()->execution_mode() == kPynativeMode && + AnfAlgo::GetOutputDeviceDataType(real_input_node, 0) != kTypeUnknown) { + continue; + } + if (AnfAlgo::GetOutputDeviceDataType(real_input_node, 0) == kTypeUnknown || is_ref) { + std::vector output_format = {selected_kernel_info.GetInputFormat(input_index)}; + builder->SetOutputsFormat(output_format); + std::vector output_type = {selected_kernel_info.GetInputDeviceType(input_index)}; + builder->SetOutputsDeviceType(output_type); + AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), real_input_node.get()); + } + } +} + KernelSelectStatus SetMatchedKernelInfo(const CNodePtr &kernel_node, const std::vector> &kernel_info_list) { MS_EXCEPTION_IF_NULL(kernel_node); @@ -498,11 +534,17 @@ KernelSelectStatus SetMatchedKernelInfo(const CNodePtr &kernel_node, return select_status; } -KernelSelectStatus SelectKernelInfo(const CNodePtr &kernel_node) { +KernelSelectStatus SelectKernelInfo(const CNodePtr &kernel_node, KernelType kernel_type) { std::vector> kernel_info_list; std::vector> aicpu_kernel_info_list; MS_EXCEPTION_IF_NULL(kernel_node); - kernel::KernelQuery(kernel_node, &kernel_info_list); + if (AnfAlgo::IsGraphKernel(kernel_node)) { + auto func_graph = GetValueNode(kernel_node->input(kAnfPrimitiveIndex)); + MS_EXCEPTION_IF_NULL(func_graph); + SelectGraphKernelInfo(kernel_node, func_graph); + return kStatusAllMatched; + } + kernel::KernelQuery(kernel_node, &kernel_info_list, kernel_type); auto select_status = SetMatchedKernelInfo(kernel_node, kernel_info_list); // If aicore not find valid kernel info reloading aicpu kernel info list to find it if (select_status == kNoMatched) { @@ -516,12 +558,12 @@ KernelSelectStatus SelectKernelInfo(const CNodePtr &kernel_node) { if (select_status == kNoMatched) { std::ostringstream buffer; PrintInputAndOutputInferType(buffer, kernel_node); - MS_LOG(WARNING) << ">>> candidates kernel info list:"; + MS_LOG(WARNING) << ">>> Candidates kernel info list:"; for (size_t index = 0; index < kernel_info_list.size(); ++index) { - MS_LOG(WARNING) << "kernel [" << index << "] :" << kernel_info_list[index]->ToString(); + MS_LOG(WARNING) << "Kernel [" << index << "] :" << kernel_info_list[index]->ToString(); } for (size_t index = 0; index < aicpu_kernel_info_list.size(); ++index) { - MS_LOG(WARNING) << "kernel [" << (kernel_info_list.size() + index) + MS_LOG(WARNING) << "Kernel [" << (kernel_info_list.size() + index) << "] :" << aicpu_kernel_info_list[index]->ToString(); } MS_LOG(WARNING) << " <<<"; diff --git a/mindspore/ccsrc/device/ascend/kernel_select_ascend.h b/mindspore/ccsrc/device/ascend/kernel_select_ascend.h index c4c777c18a..7b7a7b9fb9 100644 --- a/mindspore/ccsrc/device/ascend/kernel_select_ascend.h +++ b/mindspore/ccsrc/device/ascend/kernel_select_ascend.h @@ -27,7 +27,10 @@ enum KernelSelectStatus { kStatusReducePrecision = 1, kStatusRaisePrecision = 2, }; -KernelSelectStatus SelectKernelInfo(const CNodePtr &kernel_node); +KernelSelectStatus SelectKernelInfo(const CNodePtr &kernel_node, + KernelType kernel_type = KernelType::UNKNOWN_KERNEL_TYPE); +void SetTensorDeviceInfo(const kernel::KernelBuildInfo &selected_kernel_info, const CNodePtr &kernel_node); +void SelectGraphKernelInfo(const CNodePtr &kernel_node, const FuncGraphPtr &func_graph); } // namespace ascend } // namespace device } // namespace mindspore diff --git a/mindspore/ccsrc/device/ascend/kernel_select_graph_kernel.cc b/mindspore/ccsrc/device/ascend/kernel_select_graph_kernel.cc new file mode 100644 index 0000000000..b57ed1cd1b --- /dev/null +++ b/mindspore/ccsrc/device/ascend/kernel_select_graph_kernel.cc @@ -0,0 +1,516 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "device/ascend/kernel_select_ascend.h" +#include "session/anf_runtime_algorithm.h" +#include "device/kernel_info.h" +#include "ir/func_graph.h" +#include "kernel/common_utils.h" +#include "kernel/kernel_query.h" +#include "kernel/kernel_build_info.h" + +namespace mindspore { +namespace device { +namespace ascend { + +TypeId GetPrimitivePrecision(const CNodePtr &cnode) { + auto primitive = AnfAlgo::GetCNodePrimitive(cnode); + MS_EXCEPTION_IF_NULL(primitive); + + TypeId except_type = kTypeUnknown; + if (primitive->GetAttr(kAttrFixPrecision) != nullptr) { + auto strExceptDtype = GetValue(primitive->GetAttr(kAttrFixPrecision)); + if (strExceptDtype == "float16") { + except_type = kNumberTypeFloat16; + } else if (strExceptDtype == "float32") { + except_type = kNumberTypeFloat32; + } else { + MS_LOG(EXCEPTION) << "The fix precision must be float16 or float32, but got" << strExceptDtype; + } + } + + return except_type; +} + +void ResetKernelBuildInfo(const CNodePtr &kernel_node) { + size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); + for (size_t input_index = 0; input_index < input_num; ++input_index) { + auto input_kernel_node = AnfAlgo::GetInputNode(kernel_node, input_index); + MS_EXCEPTION_IF_NULL(input_kernel_node); + auto kernel_with_index = AnfAlgo::VisitKernel(input_kernel_node, 0); + if (!kernel::IsWeightBoundary(kernel_with_index.first)) { + continue; + } + // reset format and dtype. + kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; + builder.SetOutputsFormat(std::vector{kOpFormat_DEFAULT}); + builder.SetOutputsDeviceType(std::vector{kTypeUnknown}); + AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), input_kernel_node.get()); + } +} + +void UpdateKernelInfo(const std::vector &node_list) { + for (size_t i = 0; i < node_list.size(); ++i) { + // select nodes in subgraph. + auto anf_node = node_list[i]; + MS_EXCEPTION_IF_NULL(anf_node); + auto cnode = anf_node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + auto fix_precision_type = GetPrimitivePrecision(cnode); + if (fix_precision_type != kTypeUnknown) { + std::vector> kernel_info_list; + kernel::KernelQuery(cnode, &kernel_info_list, KernelType::AKG_KERNEL); + + for (size_t index = 0; index < kernel_info_list.size(); ++index) + // only math the first input + if (kernel_info_list[index]->GetInputDeviceType(0) == fix_precision_type && + kernel_info_list[index]->GetInputFormat(0) == AnfAlgo::GetPrevNodeOutputFormat(cnode, 0) && + AnfAlgo::GetInputDeviceDataType(cnode, 0) != fix_precision_type) { + auto selected_kernel_info_ptr = kernel_info_list[index]; + ResetKernelBuildInfo(cnode); + AnfAlgo::SetSelectKernelBuildInfo(selected_kernel_info_ptr, cnode.get()); + SetTensorDeviceInfo(*selected_kernel_info_ptr, cnode); + break; + } + } + } +} + +bool CanConvertDefaultShapeToNZ(const std::vector &shape) { + for (size_t i = 1; i <= shape.size(); ++i) { + if (i > 2) { + break; + } + if (shape[shape.size() - i] != 1 && shape[shape.size() - i] % kCubeSize != 0) { + return false; + } + } + return true; +} + +std::vector DefaultToFracNZAxis(const std::vector &ori_shape, const std::vector &axis) { + std::vector frac_nz_axis = axis; + auto shape_len = ori_shape.size(); + for (size_t i = 0; i < axis.size(); ++i) { + auto axis_idx = (frac_nz_axis[i] + shape_len) % shape_len; + if (axis_idx == shape_len - 1) { + frac_nz_axis[i] = axis_idx - 1; + frac_nz_axis.push_back(axis_idx + 2); + } else if (axis_idx == shape_len - 2) { + frac_nz_axis[i] = axis_idx + 1; + frac_nz_axis.push_back(axis_idx + 2); + } else { + frac_nz_axis[i] = axis_idx; + } + } + return frac_nz_axis; +} + +std::vector GetReducedFracNZShape(const std::vector &ori_shape, const std::vector &axis, + bool keep_dims) { + std::vector result; + std::set positive_idx; + for (const auto &a : axis) { + positive_idx.insert(a >= 0 ? a : ori_shape.size() + a); + } + for (size_t i = 0; i < ori_shape.size(); ++i) { + if (positive_idx.count(i) == 0) { + result.push_back(ori_shape[i]); + } else if (keep_dims) { + result.push_back(1); + } + } + return result; +} + +void UpdateFracNZReduceOp(const CNodePtr &cnode) { + MS_EXCEPTION_IF_NULL(cnode); + auto input_format = AnfAlgo::GetPrevNodeOutputFormat(cnode, 0); + if (input_format == kOpFormat_FRAC_NZ) { + // Clone primitive to modify it + auto prim = GetCNodePrimitive(cnode); + auto new_prim = std::make_shared(*prim); + auto new_prim_node = NewValueNode(new_prim); + cnode->set_input(0, new_prim_node); + + auto axis_value = new_prim->GetAttr(kAttrAxis); + std::vector default_axis; + if (axis_value->isa()) { + auto value_list = dyn_cast(axis_value); + for (const auto &item : value_list->value()) { + if (item->isa()) { + default_axis.push_back(GetValue(item)); + } + } + } else if (axis_value->isa()) { + auto value_tuple = dyn_cast(axis_value); + for (const auto &item : value_tuple->value()) { + if (item->isa()) { + default_axis.push_back(GetValue(item)); + } + } + } else { + MS_LOG(ERROR) << "Axis attr type is not correct!"; + } + auto infer_shape = AnfAlgo::GetPrevNodeOutputInferShape(cnode, 0); + std::vector frac_nz_axis = DefaultToFracNZAxis(infer_shape, default_axis); + AnfAlgo::SetNodeAttr(kAttrAxis, MakeValue>(frac_nz_axis), cnode); + auto output_shape = AnfAlgo::GetOutputInferShape(cnode, 0); + if (output_shape.size() == 1) { + AnfAlgo::SetNodeAttr(kAttrOutputDefault, MakeValue(true), cnode); + } + } +} + +void GetDefaultFormat(const CNodePtr &kernel_node, std::string *default_format, bool *use_same_format) { + MS_EXCEPTION_IF_NULL(kernel_node); + MS_EXCEPTION_IF_NULL(default_format); + MS_EXCEPTION_IF_NULL(use_same_format); + std::unordered_map all_input_formats; + size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); + for (size_t i = 0; i < input_num; ++i) { + auto input_kernel_node = AnfAlgo::VisitKernel(kernel_node->input(i + 1), 0).first; + MS_EXCEPTION_IF_NULL(input_kernel_node); + if (!input_kernel_node->isa()) { + auto pre_format = AnfAlgo::GetPrevNodeOutputFormat(kernel_node, i); + ++all_input_formats[pre_format]; + continue; + } + auto para = input_kernel_node->cast(); + MS_EXCEPTION_IF_NULL(para); + if (AnfAlgo::GetOutputDeviceDataType(para, 0) != kTypeUnknown) { + auto pre_format = AnfAlgo::GetOutputFormat(para, 0); + ++all_input_formats[pre_format]; + continue; + } + *use_same_format = false; + } + + if (all_input_formats.empty()) { + // all inputs are parameter. + *default_format = kOpFormat_NC1HWC0; + } else { + std::vector> pairs; + for (auto iter = all_input_formats.begin(); iter != all_input_formats.end(); ++iter) { + pairs.push_back(std::make_pair(iter->first, iter->second)); + } + auto cmp_func = [](const std::pair &a, const std::pair &b) { + if (a.second != b.second) { + return a.second > b.second; + } else if (a.first == kOpFormat_DEFAULT) { + return a.second + 1 > b.second; + } else if (b.first == kOpFormat_DEFAULT) { + return a.second > b.second + 1; + } + return a.second > b.second; + }; + std::sort(pairs.begin(), pairs.end(), cmp_func); + *default_format = pairs.begin()->first; + } + + for (size_t i = 0; i < input_num; ++i) { + auto input_kernel_node = AnfAlgo::VisitKernel(kernel_node->input(i + 1), 0).first; + MS_EXCEPTION_IF_NULL(input_kernel_node); + if (!input_kernel_node->isa() || + AnfAlgo::GetOutputDeviceDataType(input_kernel_node, 0) != kTypeUnknown) { + continue; + } + auto weight_infer_shape = AnfAlgo::GetOutputInferShape(input_kernel_node, 0); + if (weight_infer_shape.size() < 2 && *default_format == kOpFormat_FRAC_NZ) { + *default_format = kOpFormat_DEFAULT; + *use_same_format = true; + break; + } + } +} + +void UpdateGraphKernelInputsKernelInfo(const CNodePtr &kernel_node, const std::vector &input_list, + const std::string &default_format, bool use_same_format, + std::vector *graph_input_format, + std::vector *graph_input_type) { + MS_EXCEPTION_IF_NULL(graph_input_format); + MS_EXCEPTION_IF_NULL(graph_input_type); + // We set same format to all inputs of graph kernel subgraph, and process this latter. + // We set dtype to inputs of graph kernel subgraph same as infer dtypes. + size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); + for (size_t i = 0; i < input_num; ++i) { + auto input_kernel_node = AnfAlgo::VisitKernel(kernel_node->input(i + 1), 0).first; + MS_EXCEPTION_IF_NULL(input_kernel_node); + if (use_same_format) { + bool can_convert = true; + if (default_format == kOpFormat_FRAC_NZ) { + auto infer_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i); + if (!CanConvertDefaultShapeToNZ(infer_shape)) { + MS_LOG(WARNING) << "Shape can't be converted to frac nz shape, so use default format instead"; + can_convert = false; + } + } + if (can_convert) { + graph_input_format->push_back(default_format); + } else { + graph_input_format->push_back(kOpFormat_DEFAULT); + } + graph_input_type->push_back(AnfAlgo::GetPrevNodeOutputDeviceDataType(kernel_node, i)); + continue; + } + + if (!input_kernel_node->isa()) { + // subgraph parameter from output of other nodes. + graph_input_format->push_back(AnfAlgo::GetPrevNodeOutputFormat(kernel_node, i)); + graph_input_type->push_back(AnfAlgo::GetPrevNodeOutputDeviceDataType(kernel_node, i)); + continue; + } + + auto para = input_kernel_node->cast(); + MS_EXCEPTION_IF_NULL(para); + if (AnfAlgo::GetOutputDeviceDataType(para, 0) != kTypeUnknown) { + // parameter already selected. + graph_input_format->push_back(AnfAlgo::GetOutputFormat(para, 0)); + graph_input_type->push_back(AnfAlgo::GetOutputDeviceDataType(para, 0)); + continue; + } + + // weight parameter. + graph_input_format->push_back(default_format); + graph_input_type->push_back(AnfAlgo::GetOutputInferDataType(input_kernel_node, 0)); + } + + for (size_t i = 0; i < input_num; ++i) { + kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; + std::vector outputs_format = {(*graph_input_format)[i]}; + std::vector outputs_device_type = {(*graph_input_type)[i]}; + builder.SetOutputsFormat(outputs_format); + builder.SetOutputsDeviceType(outputs_device_type); + AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), input_list[i].get()); + } +} + +void UpdateEquivFormat(const std::vector> &output_index, + const std::vector &node_list, const FuncGraphPtr &func_graph, + const FuncGraphManagerPtr &mng) { + MS_EXCEPTION_IF_NULL(mng); + for (size_t i = 0; i < node_list.size(); ++i) { + // select nodes in subgraph. + auto anf_node = node_list[i]; + MS_EXCEPTION_IF_NULL(anf_node); + auto cnode = anf_node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + cnode->set_kernel_info(std::make_shared()); + SelectKernelInfo(cnode, KernelType::AKG_KERNEL); + // Update ReduceSum + if (!IsPrimitiveCNode(cnode, prim::kPrimReduceSum)) { + continue; + } + UpdateFracNZReduceOp(cnode); + // If ReduceSum's output is 1d and not Default format, convert it to Default format + auto out_format = AnfAlgo::GetOutputFormat(cnode, 0); + if (out_format == kOpFormat_DEFAULT || !AnfAlgo::HasNodeAttr(kAttrOutputDefault, cnode)) { + continue; + } + auto infer_shape = AnfAlgo::GetOutputInferShape(cnode, 0); + // Insert EquivFormat node, then select kernel info again + std::vector trans_inputs; + trans_inputs.push_back(NewValueNode(prim::kPrimEquivFormat)); + trans_inputs.push_back(cnode); + CNodePtr trans_node = func_graph->NewCNode(trans_inputs); + AnfAlgo::SetOutputInferTypeAndShape({AnfAlgo::GetPrevNodeOutputInferDataType(cnode, 0)}, + {AnfAlgo::GetOutputInferShape(cnode, 0)}, trans_node.get()); + AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue>({"x"}), trans_node); + + if (trans_node->kernel_info() == nullptr) { + trans_node->set_kernel_info(std::make_shared()); + } + SelectKernelInfo(trans_node, KernelType::AKG_KERNEL); + mng->Replace(cnode, trans_node); + } +} + +void UpdateFormatsAndDtypes(const CNodePtr &kernel_node, const std::vector &node_list, + const std::vector &input_list, const FuncGraphManagerPtr &mng, + const std::string &default_format, std::vector *graph_input_format, + std::vector *graph_input_type) { + MS_EXCEPTION_IF_NULL(kernel_node); + MS_EXCEPTION_IF_NULL(mng); + MS_EXCEPTION_IF_NULL(graph_input_format); + MS_EXCEPTION_IF_NULL(graph_input_type); + // update graph input format and dtype use inner ops. + size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); + if (graph_input_format->size() != input_num) { + MS_LOG(EXCEPTION) << "Graph input format size is not equal to input num of cnode[" << kernel_node->DebugString() + << "], [%" << graph_input_format->size() << "] != [%" << input_num << "]"; + } + std::vector need_update(input_num, false); + auto &node_users = mng->node_users(); + for (size_t i = 0; i < input_num; ++i) { + auto &input = input_list[i]; + auto iter = node_users.find(input); + if (iter == node_users.end() || iter->second.empty()) { + continue; + } + for (auto &node_user : iter->second) { + if (node_user.first->kernel_info() == nullptr || + node_user.first->kernel_info()->select_kernel_build_info() == nullptr) { + // maybe not a real kernel. + continue; + } + auto user_format = AnfAlgo::GetInputFormat(node_user.first, IntToSize(node_user.second - 1)); + if (user_format != (*graph_input_format)[i]) { + MS_LOG(WARNING) << "Users of input: [" << i << "][" << input->DebugString(2) << " of [" + << kernel_node->DebugString() + << "] selected different format. we use defult: " << default_format; + (*graph_input_format)[i] = default_format; + need_update[i] = true; + } + + if (kernel_node->input(i + 1)->isa()) { + auto user_dtype = AnfAlgo::GetInputDeviceDataType(node_user.first, IntToSize(node_user.second - 1)); + if (user_dtype != (*graph_input_type)[i]) { + TypeId default_dtype = AnfAlgo::GetOutputInferDataType(input, 0); + MS_LOG(WARNING) << "Users of input: [" << i << "][" << input->DebugString(2) << " of [" + << kernel_node->DebugString() + << "] selected different dtype. we use default: " << TypeIdLabel(default_dtype); + (*graph_input_type)[i] = default_dtype; + need_update[i] = true; + } + } + } + } + + for (size_t i = 0; i < input_num; ++i) { + if (!need_update[i]) { + continue; + } + need_update[i] = false; + + MS_LOG(DEBUG) << "Update input format: " << i << " of: [" << kernel_node->DebugString() + << "] to: " << (*graph_input_format)[i]; + MS_LOG(DEBUG) << "Update input dtype: " << i << " of: [" << kernel_node->DebugString() + << "] to: " << TypeIdLabel((*graph_input_type)[i]); + kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; + std::vector outputs_format = {(*graph_input_format)[i]}; + std::vector outputs_device_type = {(*graph_input_type)[i]}; + builder.SetOutputsFormat(outputs_format); + builder.SetOutputsDeviceType(outputs_device_type); + AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), input_list[i].get()); + } + + ResetKernelBuildInfo(kernel_node); + // select nodes in subgraph again. + for (size_t i = 0; i < node_list.size(); ++i) { + auto anf_node = node_list[i]; + MS_EXCEPTION_IF_NULL(anf_node); + auto cnode = anf_node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; + size_t cnode_input_num = AnfAlgo::GetInputTensorNum(cnode); + for (size_t j = 0; j < cnode_input_num; ++j) { + auto input_node = cnode->input(j + 1); + MS_EXCEPTION_IF_NULL(input_node); + if (!IsValueNode(input_node)) { + continue; + } + // reset format and dtype of const tensor. + builder.SetOutputsFormat(std::vector{kOpFormat_DEFAULT}); + builder.SetOutputsDeviceType(std::vector{kTypeUnknown}); + AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), input_node.get()); + } + SelectKernelInfo(node_list[i]->cast(), KernelType::AKG_KERNEL); + } +} + +void SetGraphKernelInfo(const CNodePtr &kernel_node, const std::vector> &output_index, + const std::vector &graph_input_format, + const std::vector &graph_input_type) { + MS_EXCEPTION_IF_NULL(kernel_node); + std::vector graph_output_format; + std::vector graph_output_type; + for (size_t i = 0; i < output_index.size(); ++i) { + auto const &output = output_index[i]; + graph_output_format.push_back(AnfAlgo::GetOutputFormat(output.first, output.second)); + TypeId output_type(kTypeUnknown); + if (output.first->isa()) { + output_type = AnfAlgo::GetCNodeOutputPrecision(output.first); + } + if (output_type == kTypeUnknown) { + output_type = AnfAlgo::GetOutputDeviceDataType(output.first, output.second); + } + graph_output_type.push_back(output_type); + } + + kernel::KernelBuildInfo::KernelBuildInfoBuilder graph_info_builder; + graph_info_builder.SetInputsFormat(graph_input_format); + graph_info_builder.SetInputsDeviceType(graph_input_type); + graph_info_builder.SetOutputsFormat(graph_output_format); + graph_info_builder.SetOutputsDeviceType(graph_output_type); + graph_info_builder.SetProcessor(kernel::Processor::AICORE); + graph_info_builder.SetKernelType(KernelType::AKG_KERNEL); + graph_info_builder.SetFusionType(kernel::FusionType::OPAQUE); + auto graph_selected_info = graph_info_builder.Build(); + MS_EXCEPTION_IF_NULL(graph_selected_info); + AnfAlgo::SetSelectKernelBuildInfo(graph_selected_info, kernel_node.get()); + SetTensorDeviceInfo(*graph_selected_info, kernel_node); +} + +void SelectGraphKernelInfo(const CNodePtr &kernel_node, const FuncGraphPtr &func_graph) { + MS_EXCEPTION_IF_NULL(kernel_node); + MS_EXCEPTION_IF_NULL(func_graph); + + // collect input info of funcgraph + std::vector node_list; + std::vector input_list; + std::vector output_list; + kernel::GetValidKernelNodes(func_graph, &node_list, &input_list, &output_list); + if (input_list.size() != kernel_node->inputs().size() - 1) { + MS_EXCEPTION(ArgumentError) << "Input num of funcgraph[" << func_graph->ToString() << "] not equal input of cnode[" + << kernel_node->DebugString() << "], [%" << input_list.size() << "] != [" + << kernel_node->inputs().size() << "]"; + } + + std::string default_format; + bool use_same_format = true; + GetDefaultFormat(kernel_node, &default_format, &use_same_format); + MS_LOG(DEBUG) << "GraphKernel[" << func_graph->ToString() << "] use same input format[" << default_format + << "] for ParameterWeight."; + + std::vector graph_input_format; + std::vector graph_input_type; + UpdateGraphKernelInputsKernelInfo(kernel_node, input_list, default_format, use_same_format, &graph_input_format, + &graph_input_type); + + auto mng = func_graph->manager(); + if (mng == nullptr) { + mng = Manage(func_graph, true); + } + auto output_index = kernel::GetOutputIndex(node_list, input_list, output_list); + UpdateEquivFormat(output_index, node_list, func_graph, mng); + node_list.clear(); + input_list.clear(); + output_list.clear(); + kernel::GetValidKernelNodes(func_graph, &node_list, &input_list, &output_list); + + // update graph input format and dtype use inner ops. + UpdateFormatsAndDtypes(kernel_node, node_list, input_list, mng, default_format, &graph_input_format, + &graph_input_type); + + // set fix_precision for kernel when the me prim has fix_precision attr + UpdateKernelInfo(node_list); + + output_index = kernel::GetOutputIndex(node_list, input_list, output_list); + SetGraphKernelInfo(kernel_node, output_index, graph_input_format, graph_input_type); +} +} // namespace ascend +} // namespace device +} // namespace mindspore diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_manager.cc b/mindspore/ccsrc/device/ascend/profiling/profiling_manager.cc index 6cf3cad62f..fec1aac685 100644 --- a/mindspore/ccsrc/device/ascend/profiling/profiling_manager.cc +++ b/mindspore/ccsrc/device/ascend/profiling/profiling_manager.cc @@ -28,6 +28,7 @@ #include "utils/context/ms_context.h" #include "common/utils.h" #include "utils/convert_utils.h" +#include "runtime/base.h" using std::vector; using Json = nlohmann::json; @@ -120,7 +121,6 @@ bool ProfilingManager::StartupProfiling(uint32_t device_id) { MS_LOG(ERROR) << "Register profiling Engine failed."; return false; } - auto context = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context); const string prof_options_str = context->profiling_options(); @@ -129,7 +129,6 @@ bool ProfilingManager::StartupProfiling(uint32_t device_id) { MS_LOG(WARNING) << "Profiling is enabled, but profiling option is not set!"; return true; } - // current one docker only use one device` Json p_device; // JOBID @@ -148,7 +147,6 @@ bool ProfilingManager::StartupProfiling(uint32_t device_id) { // only one device, but sProfMgrStartUp API require for device list Json devices; devices[0] = p_device; - Json startCfg; startCfg["startCfg"] = devices; @@ -156,8 +154,12 @@ bool ProfilingManager::StartupProfiling(uint32_t device_id) { std::stringstream ss; ss << startCfg; std::string cfg = ss.str(); - MS_LOG(INFO) << "profiling config " << cfg; + auto ret = rtProfilerStart(); + if (ret != RT_ERROR_NONE) { + MS_LOG(INFO) << "Call rtProfilerStart failed, ret:" << ret; + return false; + } // call profiling startup API ProfMgrCfg prof_cfg = {cfg}; @@ -169,7 +171,7 @@ bool ProfilingManager::StartupProfiling(uint32_t device_id) { return true; } -bool ProfilingManager::StopProfiling() const { +bool ProfilingManager::StopProfiling() { MS_LOG(INFO) << "StopProfiling"; if (!IsProfiling()) { MS_LOG(INFO) << "No need profiling. please export PROFILING_MODE and in train mode."; @@ -180,12 +182,20 @@ bool ProfilingManager::StopProfiling() const { MS_LOG(INFO) << "report data end, ret = " << reporter->Flush(); } + auto rt_ret = rtProfilerStop(); + if (rt_ret != RT_ERROR_NONE) { + MS_LOG(ERROR) << "Call rtProfilerStop failed"; + return false; + } + if (prof_handle_ != nullptr) { int result = ProfMgrStop(prof_handle_); if (result != 0) { MS_LOG(ERROR) << "ProfMgr stop return fail:" << result << "."; + prof_handle_ = nullptr; return false; } + prof_handle_ = nullptr; } return true; diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_manager.h b/mindspore/ccsrc/device/ascend/profiling/profiling_manager.h index f0c25d7f8a..c30c6898ea 100644 --- a/mindspore/ccsrc/device/ascend/profiling/profiling_manager.h +++ b/mindspore/ccsrc/device/ascend/profiling/profiling_manager.h @@ -39,7 +39,7 @@ class ProfilingManager { uint64_t GetJobId() const; bool ReportProfilingData(const map &op_taskId_map) const; bool StartupProfiling(uint32_t device_id); - bool StopProfiling() const; + bool StopProfiling(); inline bool IsProfiling() const { auto context = MsContext::GetInstance(); diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc index 62e18793b2..131a22805d 100644 --- a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc +++ b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include "device/ascend/profiling/reporter/graph_desc_reporter.h" #include "device/ascend/profiling/profiling_utils.h" #include "kernel/kernel.h" @@ -24,6 +23,7 @@ #include "utils/utils.h" #include "device/ascend/profiling/reporter/task_desc_reporter.h" #include "utils/context/ms_context.h" +#include "device/ascend/profiling/reporter/point_reporter.h" namespace mindspore { namespace device { @@ -33,8 +33,9 @@ constexpr char kCustomNode[] = "PROFILING_CUSTOM_"; constexpr char kFpStartNode[] = "PROFILING_FP_START"; constexpr char kBpEndNode[] = "PROFILING_BP_END"; constexpr char kIterEndNode[] = "PROFILING_ITER_END"; -std::unordered_map> ProfilingUtils::graph_profiling_cnode_; -std::unordered_map> ProfilingUtils::graph_kernel_name_; +std::map> ProfilingUtils::graph_profiling_cnode_; +std::map> ProfilingUtils::graph_kernel_name_; +std::map>> ProfilingUtils::graph_point_; uint32_t ProfilingUtils::custom_node_index_ = 1; ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(NotNull graph_ptr) { @@ -102,6 +103,7 @@ std::string ProfilingUtils::GetTraceBegin(const std::vector &cnode_exe void ProfilingUtils::GetCNodeOutputRealNode(const std::string &node_name, const std::vector &cnode_exec_order, NotNull *> getnext_outputs) { for (const auto &cnode : cnode_exec_order) { + MS_EXCEPTION_IF_NULL(cnode); for (const auto &input : cnode->inputs()) { auto prev_cnode = AnfAlgo::VisitKernel(input, 0); if (!prev_cnode.first->isa()) { @@ -203,6 +205,17 @@ NotNull ProfilingUtils::CreateProfilingCNode(const ProfilingContent &p return NOT_NULL(cnode_ptr); } +void ProfilingUtils::SaveProfilingPoint(uint32_t graph_id, const std::string &node_name, uint32_t point_id) { + std::shared_ptr prof_desc_ptr = std::make_shared(node_name, point_id); + auto iter = graph_point_.find(graph_id); + if (iter == graph_point_.end()) { + std::vector> tmp_vect = {prof_desc_ptr}; + graph_point_.insert({graph_id, tmp_vect}); + } else { + iter->second.emplace_back(prof_desc_ptr); + } +} + void ProfilingUtils::ProfilingTraceFpStart(const mindspore::AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info, NotNull graph_ptr, @@ -213,6 +226,8 @@ void ProfilingUtils::ProfilingTraceFpStart(const mindspore::AnfNodePtr &anf_node ProfilingContent fp_profiling_content = {false, kProfilingFpStartLogId, 0}; auto fp_profiling_node = CreateProfilingCNodeWithStream(anf_node, fp_profiling_content, graph_ptr); kernel_list->emplace_back(fp_profiling_node); + // insert ProfDesc + SaveProfilingPoint(graph_ptr->graph_id(), anf_node->fullname_with_scope(), kProfilingFpStartLogId); } } @@ -244,13 +259,16 @@ void ProfilingUtils::ProfilingCustomOp(const AnfNodePtr &anf_node, const Profili } MS_LOG(INFO) << "Profiling Match CustomOp:" << anf_node->fullname_with_scope(); // custom op profiling job start from 3. - ProfilingContent front_profiling_content = {false, 2 * custom_node_index_ + 1, 0}; + auto custom_point_id = 2 * custom_node_index_ + 1; + ProfilingContent front_profiling_content = {false, custom_point_id, 0}; CNodePtr front_node = CreateProfilingCNodeWithStream(anf_node, front_profiling_content, graph_ptr); kernel_list->insert(kernel_list->end() - 1, front_node); + SaveProfilingPoint(graph_ptr->graph_id(), anf_node->fullname_with_scope(), custom_point_id); - ProfilingContent back_profiling_content = {false, 2 * custom_node_index_ + 2, 0}; + ProfilingContent back_profiling_content = {false, custom_point_id + 1, 0}; CNodePtr back_node = CreateProfilingCNodeWithStream(anf_node, back_profiling_content, graph_ptr); kernel_list->insert(kernel_list->end(), back_node); + SaveProfilingPoint(graph_ptr->graph_id(), anf_node->fullname_with_scope(), custom_point_id + 1); ++custom_node_index_; } @@ -263,6 +281,7 @@ void ProfilingUtils::ProfilingTraceBpEnd(const AnfNodePtr &anf_node, const Profi ProfilingContent bp_end_profiling_content = {false, kProfilingBpEndLogId, 0}; CNodePtr bp_end_node = CreateProfilingCNodeWithStream(anf_node, bp_end_profiling_content, graph_ptr); kernel_list->emplace_back(bp_end_node); + SaveProfilingPoint(graph_ptr->graph_id(), anf_node->fullname_with_scope(), kProfilingBpEndLogId); } } @@ -276,6 +295,7 @@ void ProfilingUtils::ProfilingTraceEnd(const AnfNodePtr &anf_node, const Profili ProfilingContent bp_end_profiling_content = {true, kProfilingIterEndLogId, 0}; CNodePtr bp_kernel_ptr = CreateProfilingCNodeWithStream(anf_node, bp_end_profiling_content, graph_ptr); kernel_list->emplace_back(bp_kernel_ptr); + SaveProfilingPoint(graph_ptr->graph_id(), anf_node->fullname_with_scope(), kProfilingIterEndLogId); } } @@ -302,7 +322,7 @@ bool ProfilingUtils::ValidComputeGraph(NotNull gra return false; } -void ProfilingUtils::ReportProfilingData(const std::vector &task_ids, +void ProfilingUtils::ReportProfilingData(const std::vector &task_ids, const std::vector &stream_ids, NotNull graph) { if (!ValidComputeGraph(graph)) { MS_LOG(WARNING) << "Not a valid compute graph:" << graph->graph_id(); @@ -319,11 +339,24 @@ void ProfilingUtils::ReportProfilingData(const std::vector &task_ids, MS_EXCEPTION_IF_NULL(context); TaskDescReporter task_reporter(context->device_id(), "vm.task_desc_info", ret->second); task_reporter.set_task_ids(task_ids); + task_reporter.set_stream_ids(stream_ids); task_reporter.ReportData(); GraphDescReporter graph_reporter(context->device_id(), "vm.graph_desc_info", ret->second); graph_profiling_cnode_.erase(ret); graph_reporter.ReportData(); + + // Report profiling point + auto point_iter = graph_point_.find(graph->graph_id()); + if (point_iter == graph_point_.end()) { + MS_LOG(ERROR) << "Graph id not found in graph_point"; + return; + } + PointReporter point_reporter(context->device_id(), "vm.point"); + for (const auto &point : point_iter->second) { + point_reporter.AddReportData(point); + } + point_reporter.ReportData(); } } // namespace ascend } // namespace device diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h index 39ea80a2e9..a3c7739447 100644 --- a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h +++ b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h @@ -16,6 +16,7 @@ #ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_PROFILING_PROFILING_UTILS_H_ #define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_PROFILING_PROFILING_UTILS_H_ +#include #include #include #include @@ -23,6 +24,7 @@ #include #include "session/kernel_graph.h" #include "utils/contract.h" +#include "device/ascend/profiling/reporter/profiling_desc.h" namespace mindspore { namespace device { @@ -87,7 +89,8 @@ class ProfilingUtils { // Mapping task_id and kernel name for device to generate the time cost of specific kernel. // Device calculate the time cost of the task which is marked by task id. // But we need data of (kernel name , time cost) - static void ReportProfilingData(const std::vector &task_ids, NotNull graph); + static void ReportProfilingData(const std::vector &task_ids, const std::vector &stream_ids, + NotNull graph); // Get profiling trace point from envs. // export PROFILING_FP_START='full name of the first cnode to execute' @@ -103,7 +106,7 @@ class ProfilingUtils { NotNull graph_ptr, NotNull *> kernel_list); - static std::unordered_map> graph_kernel_name() { return graph_kernel_name_; } + static std::map> graph_kernel_name() { return graph_kernel_name_; } inline static constexpr char kProfiling[] = "Profiling"; inline static constexpr char kNotify[] = "notify"; @@ -125,10 +128,12 @@ class ProfilingUtils { NotNull *> getnext_outputs); static bool ValidComputeGraph(NotNull graph_ptr); + static void SaveProfilingPoint(uint32_t graph_id, const std::string &node_name, uint32_t point_id); // graph id --> (kernel name list) - static std::unordered_map> graph_profiling_cnode_; - static std::unordered_map> graph_kernel_name_; + static std::map> graph_profiling_cnode_; + static std::map> graph_kernel_name_; + static std::map>> graph_point_; static uint32_t custom_node_index_; }; } // namespace ascend diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/desc_reporter.cc b/mindspore/ccsrc/device/ascend/profiling/reporter/desc_reporter.cc index bf61471827..cf80c07ca9 100644 --- a/mindspore/ccsrc/device/ascend/profiling/reporter/desc_reporter.cc +++ b/mindspore/ccsrc/device/ascend/profiling/reporter/desc_reporter.cc @@ -42,22 +42,22 @@ void DescReporter::ReportByLine(const std::string &data, const std::string &file report_data.data = (unsigned char *)data.c_str() + cur_size; auto ret = memcpy_s(report_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, file_name.c_str(), file_name.length()); if (ret != 0) { - MS_LOG(EXCEPTION) << "memcpy_s report data tag failed"; + MS_LOG(EXCEPTION) << "Memcpy_s report data tag failed"; } auto report_ret = reporter->Report(&report_data); if (report_ret != 0) { - MS_LOG(EXCEPTION) << "report data failed"; + MS_LOG(EXCEPTION) << "Report data failed"; } if (report_size == 0) { - MS_LOG(WARNING) << "report_size is 0"; + MS_LOG(WARNING) << "Report_size is 0"; break; } cur_size += report_size; } } -void DescReporter::ReportData() { - for (const auto &desc : prof_desc_) { +void DescReporter::ReportAllLine() { + for (const auto &desc : prof_desc_list_) { auto data = desc->ToString(); ReportByLine(data, file_name_); } diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/desc_reporter.h b/mindspore/ccsrc/device/ascend/profiling/reporter/desc_reporter.h index b8f0cd2f25..c8e1b3ed62 100644 --- a/mindspore/ccsrc/device/ascend/profiling/reporter/desc_reporter.h +++ b/mindspore/ccsrc/device/ascend/profiling/reporter/desc_reporter.h @@ -32,16 +32,17 @@ namespace ascend { class DescReporter { public: virtual ~DescReporter() = 0; - DescReporter(int device_id, std::string file_name, std::vector cnode_list) - : device_id_(device_id), file_name_(std::move(file_name)), cnode_list_(std::move(cnode_list)) {} - virtual void ReportData(); + DescReporter(int device_id, std::string file_name) : device_id_(device_id), file_name_(std::move(file_name)) {} + + virtual void ReportData() = 0; protected: void ReportByLine(const std::string &data, const std::string &file_name) const; + void ReportAllLine(); + int device_id_; std::string file_name_; - std::vector cnode_list_; - std::vector> prof_desc_; + std::vector> prof_desc_list_; }; } // namespace ascend } // namespace device diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/graph_desc_reporter.cc b/mindspore/ccsrc/device/ascend/profiling/reporter/graph_desc_reporter.cc index f4f4b3362c..1f2d1570bb 100644 --- a/mindspore/ccsrc/device/ascend/profiling/reporter/graph_desc_reporter.cc +++ b/mindspore/ccsrc/device/ascend/profiling/reporter/graph_desc_reporter.cc @@ -24,12 +24,13 @@ namespace device { namespace ascend { void GraphDescReporter::ReportData() { for (const auto &node : cnode_list_) { - if (AnfAlgo::GetKernelType(node) != TBE_KERNEL) { + if (AnfAlgo::GetKernelType(node) != TBE_KERNEL && AnfAlgo::GetKernelType(node) != AKG_KERNEL) { MS_LOG(WARNING) << "Skip non tbe kernel"; continue; } std::vector input_data_list; std::vector output_data_list; + MS_EXCEPTION_IF_NULL(node); auto op_name = node->fullname_with_scope(); auto op_type = AnfAlgo::GetCNodeName(node); auto input_size = AnfAlgo::GetInputTensorNum(node); @@ -56,9 +57,9 @@ void GraphDescReporter::ReportData() { } auto graph_desc = std::make_shared(op_name, op_type, input_data_list, output_data_list); - prof_desc_.emplace_back(graph_desc); + prof_desc_list_.emplace_back(graph_desc); } - DescReporter::ReportData(); + ReportAllLine(); } } // namespace ascend } // namespace device diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/graph_desc_reporter.h b/mindspore/ccsrc/device/ascend/profiling/reporter/graph_desc_reporter.h index 3c48a90efe..10f78092f2 100644 --- a/mindspore/ccsrc/device/ascend/profiling/reporter/graph_desc_reporter.h +++ b/mindspore/ccsrc/device/ascend/profiling/reporter/graph_desc_reporter.h @@ -28,9 +28,12 @@ namespace ascend { class GraphDescReporter : public DescReporter { public: GraphDescReporter(uint32_t device_id, const std::string &file_name, std::vector cnode_list) - : DescReporter(device_id, file_name, std::move(cnode_list)) {} + : DescReporter(device_id, file_name), cnode_list_(std::move(cnode_list)) {} ~GraphDescReporter() override = default; void ReportData() override; + + private: + std::vector cnode_list_; }; } // namespace ascend } // namespace device diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/point_reporter.cc b/mindspore/ccsrc/device/ascend/profiling/reporter/point_reporter.cc new file mode 100644 index 0000000000..0024ab9c22 --- /dev/null +++ b/mindspore/ccsrc/device/ascend/profiling/reporter/point_reporter.cc @@ -0,0 +1,29 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "device/ascend/profiling/reporter/point_reporter.h" + +namespace mindspore { +namespace device { +namespace ascend { +void PointReporter::ReportData() { ReportAllLine(); } + +void PointReporter::AddReportData(const std::shared_ptr &prof_desc) { + prof_desc_list_.emplace_back(prof_desc); +} +} // namespace ascend +} // namespace device +} // namespace mindspore diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/point_reporter.h b/mindspore/ccsrc/device/ascend/profiling/reporter/point_reporter.h new file mode 100644 index 0000000000..ae12672df6 --- /dev/null +++ b/mindspore/ccsrc/device/ascend/profiling/reporter/point_reporter.h @@ -0,0 +1,37 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_PROFILING_REPORTER_POINT_REPORTER_H_ +#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_PROFILING_REPORTER_POINT_REPORTER_H_ + +#include +#include +#include "device/ascend/profiling/reporter/desc_reporter.h" + +namespace mindspore { +namespace device { +namespace ascend { +class PointReporter : public DescReporter { + public: + PointReporter(uint32_t device_id, const std::string &file_name) : DescReporter(device_id, file_name) {} + ~PointReporter() override = default; + void ReportData() override; + void AddReportData(const std::shared_ptr &prof_desc); +}; +} // namespace ascend +} // namespace device +} // namespace mindspore +#endif // MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_PROFILING_REPORTER_POINT_REPORTER_H_ diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/profiling_desc.cc b/mindspore/ccsrc/device/ascend/profiling/reporter/profiling_desc.cc index f28f133e1a..082cb81e42 100644 --- a/mindspore/ccsrc/device/ascend/profiling/reporter/profiling_desc.cc +++ b/mindspore/ccsrc/device/ascend/profiling/reporter/profiling_desc.cc @@ -66,6 +66,12 @@ std::string GraphDesc::ToString() { return desc; } +std::string PointDesc::ToString() { + std::string desc; + desc.append(std::to_string(point_id_)).append(" ").append(op_name_).append("\n"); + return desc; +} + std::string GraphDesc::DataShapeToString(const std::vector &shape) { std::ostringstream oss; oss << "\""; diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/profiling_desc.h b/mindspore/ccsrc/device/ascend/profiling/reporter/profiling_desc.h index 852bcf116b..6d0ed45bef 100644 --- a/mindspore/ccsrc/device/ascend/profiling/reporter/profiling_desc.h +++ b/mindspore/ccsrc/device/ascend/profiling/reporter/profiling_desc.h @@ -71,6 +71,16 @@ class GraphDesc : public ProfDesc { std::vector output_data_list_; [[nodiscard]] static std::string DataShapeToString(const std::vector &shape); }; + +class PointDesc : public ProfDesc { + public: + PointDesc(std::string op_name, uint32_t point_id) : ProfDesc(std::move(op_name)), point_id_(point_id) {} + ~PointDesc() override = default; + std::string ToString() override; + + private: + uint32_t point_id_; +}; } // namespace ascend } // namespace device } // namespace mindspore diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/task_desc_reporter.cc b/mindspore/ccsrc/device/ascend/profiling/reporter/task_desc_reporter.cc index 8f59e72613..0bd66e31ef 100644 --- a/mindspore/ccsrc/device/ascend/profiling/reporter/task_desc_reporter.cc +++ b/mindspore/ccsrc/device/ascend/profiling/reporter/task_desc_reporter.cc @@ -31,7 +31,7 @@ void TaskDescReporter::ReportData() { size_t task_index = 0; for (const auto &node : cnode_list_) { - if (AnfAlgo::GetKernelType(node) != TBE_KERNEL) { + if (AnfAlgo::GetKernelType(node) != TBE_KERNEL && AnfAlgo::GetKernelType(node) != AKG_KERNEL) { MS_LOG(WARNING) << "Skip non tbe kernel"; ++task_index; continue; @@ -40,11 +40,21 @@ void TaskDescReporter::ReportData() { auto ascend_kernel_mod = dynamic_cast(kernel_mod); MS_EXCEPTION_IF_NULL(node); MS_EXCEPTION_IF_NULL(ascend_kernel_mod); - auto desc_ptr = std::make_shared(node->fullname_with_scope(), task_ids_[task_index++], - ascend_kernel_mod->block_dim(), ascend_kernel_mod->stream_id()); - prof_desc_.emplace_back(desc_ptr); + // Check task_id and stream_id valid + CheckStreamTaskValid(task_index, task_index); + auto desc_ptr = std::make_shared(node->fullname_with_scope(), task_ids_[task_index], + ascend_kernel_mod->block_dim(), stream_ids_[task_index]); + prof_desc_list_.emplace_back(desc_ptr); + ++task_index; + } + ReportAllLine(); +} + +void TaskDescReporter::CheckStreamTaskValid(uint32_t task_id, uint32_t stream_id) { + if (task_id >= task_ids_.size() || stream_id >= stream_ids_.size()) { + MS_LOG(EXCEPTION) << "Index invalid. task_id:" << task_id << ", task_ids.size:" << task_ids_.size() + << ", stream_id:" << stream_id << ", stream_ids.size:" << stream_ids_.size(); } - DescReporter::ReportData(); } } // namespace ascend } // namespace device diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/task_desc_reporter.h b/mindspore/ccsrc/device/ascend/profiling/reporter/task_desc_reporter.h index c1f70cacaf..087c691a5f 100644 --- a/mindspore/ccsrc/device/ascend/profiling/reporter/task_desc_reporter.h +++ b/mindspore/ccsrc/device/ascend/profiling/reporter/task_desc_reporter.h @@ -28,13 +28,17 @@ namespace ascend { class TaskDescReporter : public DescReporter { public: TaskDescReporter(int device_id, const std::string &file_name, std::vector cnode_list) - : DescReporter(device_id, file_name, std::move(cnode_list)) {} + : DescReporter(device_id, file_name), cnode_list_(std::move(cnode_list)) {} ~TaskDescReporter() override = default; void ReportData() override; void set_task_ids(const std::vector &task_ids) { task_ids_ = task_ids; } + void set_stream_ids(const std::vector &stream_ids) { stream_ids_ = stream_ids; } private: std::vector task_ids_; + std::vector stream_ids_; + void CheckStreamTaskValid(uint32_t task_id, uint32_t stream_id); + std::vector cnode_list_; }; } // namespace ascend } // namespace device diff --git a/mindspore/ccsrc/device/ascend/tasksink/runtime_utils.cc b/mindspore/ccsrc/device/ascend/tasksink/runtime_utils.cc index 20084c0927..603dd989e5 100644 --- a/mindspore/ccsrc/device/ascend/tasksink/runtime_utils.cc +++ b/mindspore/ccsrc/device/ascend/tasksink/runtime_utils.cc @@ -54,13 +54,13 @@ bool RuntimeUtils::HcomDistribute(const std::shared_ptr &task_info MS_EXCEPTION_IF_NULL(task_info); hcclResult_t ret; static uint32_t task_counter = 0; - + auto hccl_group = task_info->group(); if (task_info->hccl_type() == kBroadcastOpName) { // call hcom broadcast interface to run op const string tag_broadcast = kHcomBroadcast + std::to_string(task_counter++) + kUnderline + std::to_string(0); ret = hcom_broadcast(tag_broadcast.c_str(), reinterpret_cast(task_info->input_data_addr()), static_cast(task_info->count()), static_cast(task_info->data_type()), - static_cast(task_info->root_id()), task_info->group().c_str(), stream); + static_cast(task_info->root_id()), hccl_group.c_str(), stream); if (ret != HCCL_SUCCESS) { MS_LOG(ERROR) << "hcom_broadcast fail, return ret: " << static_cast(ret); return false; @@ -70,7 +70,7 @@ bool RuntimeUtils::HcomDistribute(const std::shared_ptr &task_info const string tag_all_gather = kHcomAllGather + std::to_string(task_counter++) + kUnderline + std::to_string(0); ret = hcom_all_gather(tag_all_gather.c_str(), reinterpret_cast(task_info->input_data_addr()), reinterpret_cast(task_info->output_data_addr()), static_cast(task_info->count()), - static_cast(task_info->data_type()), task_info->group().c_str(), stream); + static_cast(task_info->data_type()), hccl_group.c_str(), stream); if (ret != HCCL_SUCCESS) { MS_LOG(ERROR) << "hcom_all_gather fail, return ret: " << ret; return false; @@ -81,7 +81,7 @@ bool RuntimeUtils::HcomDistribute(const std::shared_ptr &task_info ret = hcom_all_reduce(tag_all_reduce.c_str(), reinterpret_cast(task_info->input_data_addr()), reinterpret_cast(task_info->output_data_addr()), static_cast(task_info->count()), static_cast(task_info->data_type()), - static_cast(task_info->op_type()), task_info->group().c_str(), stream); + static_cast(task_info->op_type()), hccl_group.c_str(), stream); if (ret != HCCL_SUCCESS) { MS_LOG(ERROR) << "hcom_all_reduce fail, return ret: " << ret; return false; @@ -93,7 +93,7 @@ bool RuntimeUtils::HcomDistribute(const std::shared_ptr &task_info ret = hcom_reduce_scatter(tag_reduce_scatter.c_str(), reinterpret_cast(task_info->input_data_addr()), reinterpret_cast(task_info->output_data_addr()), static_cast(task_info->count()), static_cast(task_info->data_type()), - static_cast(task_info->op_type()), task_info->group().c_str(), stream); + static_cast(task_info->op_type()), hccl_group.c_str(), stream); if (ret != HCCL_SUCCESS) { MS_LOG(ERROR) << "hcom_reduce_scatter fail, return ret: " << ret; return false; diff --git a/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc b/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc index 18da966575..0cdf751801 100644 --- a/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc +++ b/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc @@ -43,13 +43,43 @@ bool TaskGenerator::GenTasks(const std::vector &anf_node_list, std::ve void TaskGenerator::LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs) { MS_EXCEPTION_IF_NULL(anf_node_ptr); if (anf_node_ptr->inputs().size() != 2) { - MS_LOG(EXCEPTION) << "atomic Addr clean Node Input nodes not equal 2."; + // akg process + // set atomic clean addr + if (AnfAlgo::HasNodeAttr(kAttrAtomicOutputIndexs, anf_node_ptr)) { + auto clean_output_indexs = AnfAlgo::GetNodeAttr>(anf_node_ptr, kAttrAtomicOutputIndexs); + auto graph = anf_node_ptr->func_graph(); + MS_EXCEPTION_IF_NULL(graph); + auto manager = graph->manager(); + MS_EXCEPTION_IF_NULL(manager); + auto node_users = manager->node_users(); + if (node_users[anf_node_ptr].empty()) { + MS_LOG(EXCEPTION) << "Node users of " << anf_node_ptr->ToString() << " is empty."; + } + auto depend_node = node_users[anf_node_ptr].pop().first; + if (!IsPrimitiveCNode(depend_node, prim::kPrimDepend)) { + MS_LOG(EXCEPTION) << "Checking Depend node failed"; + } + if (node_users[depend_node].empty()) { + MS_LOG(EXCEPTION) << "Node users of " << depend_node->ToString() << " is empty."; + } + auto post_node = node_users[depend_node].pop().first; + for (auto index : clean_output_indexs) { + auto device_address = AnfAlgo::GetOutputAddr(post_node, index); + kernel::AddressPtr input = std::make_shared(); + input->addr = device_address->ptr_; + MS_EXCEPTION_IF_NULL(input->addr); + input->size = device_address->size_; + kernel_inputs->push_back(input); + } + MS_LOG(DEBUG) << "AtomicAddClean clean output size: " << clean_output_indexs.size(); + } + return; } MS_EXCEPTION_IF_NULL(anf_node_ptr->inputs()[1]); auto pre_node = (anf_node_ptr->inputs()[1])->cast(); // set clean output addr - if (AnfAlgo::HasNodeAttr(kAttrAutomicOutputIndexs, pre_node)) { - auto clean_output_indexs = AnfAlgo::GetNodeAttr>(pre_node, kAttrAutomicOutputIndexs); + if (AnfAlgo::HasNodeAttr(kAttrAtomicOutputIndexs, pre_node)) { + auto clean_output_indexs = AnfAlgo::GetNodeAttr>(pre_node, kAttrAtomicOutputIndexs); for (auto index : clean_output_indexs) { auto device_address = AnfAlgo::GetOutputAddr(pre_node, index); kernel::AddressPtr input = std::make_shared(); @@ -59,13 +89,13 @@ void TaskGenerator::LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressP input->size = device_address->size_; kernel_inputs->push_back(input); } - MS_LOG(INFO) << "AtomicAddClean clean output size:" << clean_output_indexs.size(); + MS_LOG(DEBUG) << "AtomicAddClean clean output size:" << clean_output_indexs.size(); } // set clean workspace address - if (AnfAlgo::HasNodeAttr(kAttrAutomicWorkspaceSize, pre_node)) { - auto clean_workspaces = AnfAlgo::GetNodeAttr(pre_node, kAttrAutomicWorkspaceSize); - if (clean_workspaces != 0) { - auto device_address = AnfAlgo::GetWorkspaceAddr(pre_node, 0); + if (AnfAlgo::HasNodeAttr(kAttrAtomicWorkspaceIndexs, pre_node)) { + auto clean_workspace_indexs = AnfAlgo::GetNodeAttr>(pre_node, kAttrAtomicWorkspaceIndexs); + for (const auto &index : clean_workspace_indexs) { + auto device_address = AnfAlgo::GetWorkspaceAddr(pre_node, index); kernel::AddressPtr workspace = std::make_shared(); MS_EXCEPTION_IF_NULL(workspace); workspace->addr = device_address->ptr_; @@ -73,9 +103,8 @@ void TaskGenerator::LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressP workspace->size = device_address->size_; kernel_inputs->push_back(workspace); } - MS_LOG(INFO) << "AtomicAddClean clean workspace size" << clean_workspaces; } - auto clear_mems = AnfAlgo::GetNodeAttr>(anf_node_ptr, kAttrAutomicAddMemSize); + auto clear_mems = AnfAlgo::GetNodeAttr>(anf_node_ptr, kAttrAtomicAddMemSize); if (kernel_inputs->size() != clear_mems.size()) { MS_LOG(EXCEPTION) << "AtomicAddClean kernel inputs size not equal clear memory size,kerenl_inputs size:" << kernel_inputs->size() << ",clean mem size" << clear_mems.size(); diff --git a/mindspore/ccsrc/device/cpu/cpu_device_address.cc b/mindspore/ccsrc/device/cpu/cpu_device_address.cc index 56e9b6d36e..09ab0da12b 100644 --- a/mindspore/ccsrc/device/cpu/cpu_device_address.cc +++ b/mindspore/ccsrc/device/cpu/cpu_device_address.cc @@ -22,10 +22,30 @@ namespace device { namespace cpu { bool CPUDeviceAddress::SyncDeviceToHost(const std::vector & /*shape*/, size_t size, TypeId type, void *host_ptr) const { - if (type == kNumberTypeFloat16) { + if (ptr_ == nullptr) { + MS_LOG(ERROR) << "The pointer ptr_ is null!"; + return false; + } + + if (host_ptr == ptr_) { + MS_LOG(DEBUG) << "host_ptr is equal to ptr_, request ignored."; + return true; + } + + if (type == type_id_) { + auto ret_code = memcpy_s(host_ptr, size, ptr_, size_); + if (ret_code != EOK) { + MS_LOG(ERROR) << "Failed to copy tensor!"; + return false; + } + } else if (type == kNumberTypeFloat16) { FloatToHalf(host_ptr, ptr_, size / 2); } else if (type == kNumberTypeFloat64) { FloatToDouble(host_ptr, ptr_, size / sizeof(double)); + } else { + MS_LOG(ERROR) << "Types not match. Device type: " << TypeIdLabel(type_id_) << ", host type: " << TypeIdLabel(type) + << "!"; + return false; } return true; } diff --git a/mindspore/ccsrc/device/cpu/cpu_device_address.h b/mindspore/ccsrc/device/cpu/cpu_device_address.h index 9d51abe625..a041567f47 100644 --- a/mindspore/ccsrc/device/cpu/cpu_device_address.h +++ b/mindspore/ccsrc/device/cpu/cpu_device_address.h @@ -34,6 +34,7 @@ class CPUDeviceAddress : public DeviceAddress { bool SyncDeviceToHost(const std::vector &shape, size_t size, TypeId type, void *host_ptr) const override; bool SyncHostToDevice(const std::vector &shape, size_t size, TypeId type, const void *host_ptr) const override; + DeviceAddressType DeviceType() const override { return DeviceAddressType::kCPU; } }; } // namespace cpu } // namespace device diff --git a/mindspore/ccsrc/device/cpu/cpu_kernel_runtime.cc b/mindspore/ccsrc/device/cpu/cpu_kernel_runtime.cc index 67328f04c2..6725dff524 100644 --- a/mindspore/ccsrc/device/cpu/cpu_kernel_runtime.cc +++ b/mindspore/ccsrc/device/cpu/cpu_kernel_runtime.cc @@ -21,18 +21,37 @@ #include #include #include +#include #include "kernel/kernel.h" #include "device/cpu/cpu_device_address.h" #include "utils/context/ms_context.h" #include "utils/config_manager.h" #include "common/utils.h" #include "session/anf_runtime_algorithm.h" +#include "session/session_basic.h" #include "operator/ops.h" namespace mindspore { namespace device { namespace cpu { const size_t INIT_NODE_REF = 1; +namespace { +TypeId GetCPUSupportOutputTypeId(const TypeId type_id) { + TypeId support_type_id = type_id; + if (type_id == kNumberTypeUInt32) { + support_type_id = kNumberTypeInt32; + } + if (type_id == kNumberTypeFloat || type_id == kNumberTypeFloat16 || type_id == kNumberTypeFloat32 || + type_id == kNumberTypeFloat64) { + support_type_id = kNumberTypeFloat32; + } + if (support_type_id != kNumberTypeInt32 && support_type_id != kNumberTypeFloat32) { + MS_LOG(EXCEPTION) << "Check output type failed."; + } + return support_type_id; +} +} // namespace + void CPUKernelRuntime::AssignKernelAddress(session::KernelGraph *kernel_graph) { AssignValueNodeAddress(kernel_graph); AssignInputNodeAddress(kernel_graph); @@ -121,23 +140,25 @@ DeviceAddressPtr CPUKernelRuntime::CreateDeviceAddress(void *device_ptr, size_t return std::make_shared(device_ptr, device_size, format, type_id); } -BaseRef CPUKernelRuntime::CreatTensorForOutput(const AnfNodePtr &input_node, size_t index, - const std::unordered_map &input_map) { +BaseRef CPUKernelRuntime::CreatTensorForOutput(const session::KernelWithIndex &kernel_with_index, + const std::unordered_map &input_map, + std::set *bound_addresses, + std::vector *need_sync_outputs) { + auto &input_node = kernel_with_index.first; + auto index = kernel_with_index.second; MS_EXCEPTION_IF_NULL(input_node); - if (input_node->isa() && AnfAlgo::GetCNodeName(input_node) == prim::kPrimMakeTuple->name()) { - auto cnode = input_node->cast(); - MS_EXCEPTION_IF_NULL(cnode); - VectorRef ret; - for (size_t i = 1; i < cnode->inputs().size(); i++) { - auto item_with_index = AnfAlgo::VisitKernelWithReturnType(cnode->input(i), 0); - auto out = CreatTensorForOutput(item_with_index.first, item_with_index.second, input_map); - ret.push_back(out); - } - return ret; - } if (input_node->isa()) { auto node = input_node->cast(); MS_EXCEPTION_IF_NULL(node); + if (AnfAlgo::GetCNodeName(input_node) == prim::kPrimMakeTuple->name()) { + VectorRef ret; + for (size_t i = 1; i < node->inputs().size(); i++) { + auto item_with_index = AnfAlgo::VisitKernelWithReturnType(node->input(i), 0); + auto out = CreatTensorForOutput(item_with_index, input_map, bound_addresses, need_sync_outputs); + ret.push_back(out); + } + return ret; + } size_t output_size = AnfAlgo::GetOutputTensorNum(node); if (index >= output_size) { MS_LOG(EXCEPTION) << "Invalid input index " << index; @@ -148,20 +169,17 @@ BaseRef CPUKernelRuntime::CreatTensorForOutput(const AnfNodePtr &input_node, siz std::vector temp_shape; (void)temp_shape.insert(temp_shape.end(), shape.begin(), shape.end()); TypeId type_id = AnfAlgo::GetOutputInferDataType(node, index); - if (type_id == kNumberTypeUInt32) { - type_id = kNumberTypeInt32; - } - if (type_id == kNumberTypeFloat || type_id == kNumberTypeFloat16 || type_id == kNumberTypeFloat32 || - type_id == kNumberTypeFloat64) { - type_id = kNumberTypeFloat32; - } - if (type_id != kNumberTypeInt32 && type_id != kNumberTypeFloat32) { - MS_LOG(EXCEPTION) << "Check output type failed."; - } + type_id = GetCPUSupportOutputTypeId(type_id); tensor::TensorPtr tensor = std::make_shared(type_id, temp_shape); MS_EXCEPTION_IF_NULL(tensor); - address->ptr_ = tensor->data_c(true); - address->ref_count_ = INIT_NODE_REF; + if (bound_addresses->find(address) != bound_addresses->end()) { + tensor->set_device_address(address); + need_sync_outputs->emplace_back(tensor); + } else { + address->ptr_ = tensor->data_c(true); + address->ref_count_ = INIT_NODE_REF; + (void)bound_addresses->insert(address); + } tensor->set_dirty(false); return tensor; } else if (input_node->isa() || input_node->isa()) { @@ -174,7 +192,8 @@ BaseRef CPUKernelRuntime::CreatTensorForOutput(const AnfNodePtr &input_node, siz } void CPUKernelRuntime::BindInputOutput(const session::KernelGraph *kernel_graph, - const std::vector &inputs, VectorRef *outputs) { + const std::vector &inputs, VectorRef *outputs, + std::vector *need_sync_outputs) { MS_EXCEPTION_IF_NULL(kernel_graph); MS_EXCEPTION_IF_NULL(outputs); // bind input ptr @@ -182,20 +201,23 @@ void CPUKernelRuntime::BindInputOutput(const session::KernelGraph *kernel_graph, if (input_nodes.size() != inputs.size()) { MS_LOG(EXCEPTION) << "Input size not equal to input node size!"; } - std::unordered_map input_map; size_t input_idx = 0; - size_t type_size = sizeof(float); for (auto &item : input_nodes) { MS_EXCEPTION_IF_NULL(item); input_map[item.get()] = inputs[input_idx]; if (item->isa()) { auto address = AnfAlgo::GetMutableOutputAddr(item, 0); auto tensor = inputs[input_idx]; + auto tensor_address = tensor->device_address(); MS_EXCEPTION_IF_NULL(address); MS_EXCEPTION_IF_NULL(tensor); + if (tensor_address != nullptr && tensor_address != address) { + (void)tensor->data_sync(); + } std::vector data_shape = tensor->shape(); - size_t tensor_size = std::accumulate(data_shape.begin(), data_shape.end(), type_size, std::multiplies()); + size_t tensor_size = + std::accumulate(data_shape.begin(), data_shape.end(), sizeof(float), std::multiplies()); if (tensor->data_type() == kNumberTypeFloat32 || tensor->data_type() == kNumberTypeInt32) { address->ptr_ = tensor->data_c(false); } else { @@ -211,12 +233,12 @@ void CPUKernelRuntime::BindInputOutput(const session::KernelGraph *kernel_graph, } input_idx++; } - // new output and bind ptr + std::set bound_addresses; auto output_nodes = kernel_graph->outputs(); for (const auto &item : output_nodes) { - auto item_with_index = AnfAlgo::VisitKernelWithReturnType(item, 0); - auto out = CreatTensorForOutput(item_with_index.first, item_with_index.second, input_map); + auto item_with_index = AnfAlgo::VisitKernelWithReturnType(item, 0, true); + auto out = CreatTensorForOutput(item_with_index, input_map, &bound_addresses, need_sync_outputs); outputs->push_back(std::move(out)); } } @@ -234,9 +256,18 @@ void CPUKernelRuntime::AddRuntimeAddress(DeviceAddress *address, std::vectorpush_back(input); } +void CPUKernelRuntime::IncreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs) { + resource_manager_.IncreaseSummaryRefCount(summary_outputs); +} + +void CPUKernelRuntime::DecreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs) { + resource_manager_.DecreaseSummaryRefCount(summary_outputs); +} + bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph) { MS_EXCEPTION_IF_NULL(kernel_graph); - resource_manager_.ResetAddressRefCount(kernel_graph); + resource_manager_.IncreaseAddressRefCount(kernel_graph); + auto kernels = kernel_graph->execution_order(); for (const auto &kernel : kernels) { std::vector kernel_inputs; diff --git a/mindspore/ccsrc/device/cpu/cpu_kernel_runtime.h b/mindspore/ccsrc/device/cpu/cpu_kernel_runtime.h index 28e61c1479..27dcefdba9 100644 --- a/mindspore/ccsrc/device/cpu/cpu_kernel_runtime.h +++ b/mindspore/ccsrc/device/cpu/cpu_kernel_runtime.h @@ -20,9 +20,12 @@ #include #include #include +#include #include "device/kernel_runtime.h" #include "session/kernel_graph.h" +#include "session/session_basic.h" #include "device/cpu/cpu_resource_manager.h" +#include "session/anf_runtime_algorithm.h" #include "utils/any.h" namespace mindspore { namespace device { @@ -36,7 +39,9 @@ class CPUKernelRuntime : public KernelRuntime { bool Run(session::KernelGraph *graph) override; void AssignKernelAddress(session::KernelGraph *kernel_graph); void BindInputOutput(const session::KernelGraph *kernel_graph, const std::vector &inputs, - VectorRef *outputs); + VectorRef *outputs, std::vector *need_sync_outputs); + void IncreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs); + void DecreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs); protected: bool SyncStream() override { return true; }; @@ -44,8 +49,10 @@ class CPUKernelRuntime : public KernelRuntime { TypeId type_id) override; private: - BaseRef CreatTensorForOutput(const AnfNodePtr &input_node, size_t index, - const std::unordered_map &input_map); + BaseRef CreatTensorForOutput(const session::KernelWithIndex &kernel_with_index, + const std::unordered_map &input_map, + std::set *bound_addresses, + std::vector *need_sync_outputs); void AssignValueNodeAddress(session::KernelGraph *kernel_graph); void AssignInputNodeAddress(const session::KernelGraph *kernel_graph); void AssignKernelOutputAddress(const session::KernelGraph *kernel_graph); diff --git a/mindspore/ccsrc/device/cpu/cpu_resource_manager.cc b/mindspore/ccsrc/device/cpu/cpu_resource_manager.cc index 45b9ea5bed..c69ef35305 100644 --- a/mindspore/ccsrc/device/cpu/cpu_resource_manager.cc +++ b/mindspore/ccsrc/device/cpu/cpu_resource_manager.cc @@ -76,7 +76,47 @@ void CPUResourceManager::MemFree(void *ptr) { } } -void CPUResourceManager::ResetAddressRefCount(const session::KernelGraph *graph) { +void CPUResourceManager::IncreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs) { + if (!dynamic_malloc_) { + return; + } + + if (summary_outputs.empty()) { + return; + } + + for (auto &output_item : summary_outputs) { + auto node = output_item.second.first; + size_t index = IntToSize(output_item.second.second); + auto address = AnfAlgo::GetMutableOutputAddr(node, index); + MS_EXCEPTION_IF_NULL(address); + address->ref_count_++; + } +} + +void CPUResourceManager::DecreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs) { + if (!dynamic_malloc_) { + return; + } + + if (summary_outputs.empty()) { + return; + } + + for (auto &output_item : summary_outputs) { + auto node = output_item.second.first; + size_t index = IntToSize(output_item.second.second); + auto address = AnfAlgo::GetMutableOutputAddr(node, index); + MS_EXCEPTION_IF_NULL(address); + address->ref_count_--; + if (address->ref_count_ == 0 && address->ptr_ != nullptr) { + MemFree(address->ptr_); + address->ptr_ = nullptr; + } + } +} + +void CPUResourceManager::IncreaseAddressRefCount(const session::KernelGraph *graph) { if (!dynamic_malloc_) { return; } diff --git a/mindspore/ccsrc/device/cpu/cpu_resource_manager.h b/mindspore/ccsrc/device/cpu/cpu_resource_manager.h index 96cf00f3d8..d130241464 100644 --- a/mindspore/ccsrc/device/cpu/cpu_resource_manager.h +++ b/mindspore/ccsrc/device/cpu/cpu_resource_manager.h @@ -19,6 +19,7 @@ #include #include #include "session/kernel_graph.h" +#include "session/session_basic.h" #include "device/device_address.h" #include "device/cpu/cpu_simple_mem_plan.h" namespace mindspore { @@ -31,10 +32,12 @@ class CPUResourceManager { void MemPlan(const session::KernelGraph *graph); void MemMalloc(const session::KernelGraph *graph); - void ResetAddressRefCount(const session::KernelGraph *graph); + void IncreaseAddressRefCount(const session::KernelGraph *graph); void DecreaseAddressRefCount(const AnfNodePtr &kernel); void *MemMalloc(size_t mem_size); void MemFree(void *ptr); + void IncreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs); + void DecreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs); private: void MemFree(); diff --git a/mindspore/ccsrc/device/cpu/kernel_select_cpu.cc b/mindspore/ccsrc/device/cpu/kernel_select_cpu.cc index 76e91e059a..9d72bcab89 100644 --- a/mindspore/ccsrc/device/cpu/kernel_select_cpu.cc +++ b/mindspore/ccsrc/device/cpu/kernel_select_cpu.cc @@ -71,9 +71,6 @@ void GetInputFormatsAndDtypes(const CNodePtr &kernel_node, std::vector *output_formats, std::vector *output_types) { size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); - if (kernel_attr.GetOutputSize() != output_num) { - MS_LOG(EXCEPTION) << "Output num is not equal!"; - } for (size_t output_index = 0; output_index < output_num; ++output_index) { output_formats->emplace_back(kernel_attr.GetOutputAttr(output_index).second); auto dtype = kernel_attr.GetOutputAttr(output_index).first; @@ -145,6 +142,11 @@ void SetKernelInfo(const CNodePtr &kernel_node) { ExpandKernelAttr(kernel_node, &kernel_attr); } if (IsInputFormatDtypeMatched(kernel_attr, input_formats, input_types, input_not_cnode_indexes)) { + size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); + if (kernel_attr.GetOutputSize() != output_num) { + MS_LOG(DEBUG) << "Output num is not equal!"; + continue; + } MS_LOG(INFO) << "Input format and dtype is matched, index: " << index; GetOutputFormatsAndDtypes(kernel_node, kernel_attr, &output_formats, &output_types); UpdatePrevNotCNodeFormatDtype(kernel_attr, input_not_cnode_indexes, kernel_node); diff --git a/mindspore/ccsrc/device/cpu/kernel_select_cpu.h b/mindspore/ccsrc/device/cpu/kernel_select_cpu.h index d2138ec66d..b707c55e2c 100644 --- a/mindspore/ccsrc/device/cpu/kernel_select_cpu.h +++ b/mindspore/ccsrc/device/cpu/kernel_select_cpu.h @@ -33,7 +33,7 @@ void SetKernelInfo(const CNodePtr &apply_kernel_ptr); class KernelAttr { public: using DataType = std::pair; - KernelAttr() = default; + KernelAttr() : all_same_(0) {} ~KernelAttr() = default; KernelAttr &AddInputAttr(const TypeId &ms_type, const std::string &format = kOpFormat_DEFAULT) { diff --git a/mindspore/ccsrc/device/cpu/mpi/mpi_adapter.cc b/mindspore/ccsrc/device/cpu/mpi/mpi_adapter.cc new file mode 100644 index 0000000000..0d49846bf7 --- /dev/null +++ b/mindspore/ccsrc/device/cpu/mpi/mpi_adapter.cc @@ -0,0 +1,259 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "device/cpu/mpi/mpi_adapter.h" +#include +#include "utils/mpi/mpi_config.h" +#include "utils/log_adapter.h" + +namespace mindspore { +namespace device { +namespace cpu { +namespace { +MPI_Op GetMpiOp(const std::string &op_type) { + if (op_type == "sum") { + return MPI_SUM; + } else if (op_type == "max") { + return MPI_MAX; + } else if (op_type == "min") { + return MPI_MIN; + } else if (op_type == "prod") { + return MPI_PROD; + } + MS_LOG(EXCEPTION) << "unsupport op_type:" << op_type; + return MPI_SUM; +} + +int GetScatterIndex(int rankid, const std::vector &ranks_group) { + int scatter_index = -1; + for (size_t i = 0; i < ranks_group.size(); ++i) { + if (ranks_group[i] == rankid) { + scatter_index = static_cast(i); + break; + } + } + if (scatter_index == -1) { + MS_LOG(EXCEPTION) << "process rankid " << rankid << " does not in the input rank group!"; + } + return scatter_index; +} +} // namespace + +MPIAdapter::MPIAdapter() : rank_id_(0), rank_size_(0), comm_group_world_(MPI_GROUP_NULL) { Init(); } + +MPIAdapter::~MPIAdapter() { + for (auto iter = ranks_group_.begin(); iter != ranks_group_.end(); ++iter) { + MPI_Group_free(&iter->second); + } + if (comm_group_world_ != MPI_GROUP_NULL) { + MPI_Group_free(&comm_group_world_); + } + int finalized; + MPI_Finalized(&finalized); + if (finalized == 0) { + MPI_Finalize(); + } +} + +MPIAdapter &MPIAdapter::Instance() { + static MPIAdapter instance; + return instance; +} + +int MPIAdapter::GetRankId() const { return rank_id_; } + +void MPIAdapter::Init() { + static bool init = false; + if (init) { + return; + } + auto mpi_config_ptr = MpiConfig::GetInstance(); + MS_EXCEPTION_IF_NULL(mpi_config_ptr); + if (!mpi_config_ptr->enable_mpi()) { + MS_LOG(EXCEPTION) << "MPI is disabled now!Please enable mpi with mpi config first."; + } + int init_flag = 0; + if (MPI_Initialized(&init_flag) != MPI_SUCCESS) { + MS_LOG(EXCEPTION) << "Check mpi initialized fail!"; + } + if (init_flag == 0) { + auto ret = MPI_Init(nullptr, nullptr); + if (ret != MPI_SUCCESS) { + MS_LOG(EXCEPTION) << "Failed to init mpi!"; + } + } + + MPI_Comm_group(MPI_COMM_WORLD, &comm_group_world_); + if (comm_group_world_ == MPI_GROUP_NULL) { + MS_LOG(EXCEPTION) << "comm_group_world_ init fail!"; + } + auto ret = MPI_Comm_rank(MPI_COMM_WORLD, &rank_id_); + if (ret != MPI_SUCCESS) { + MS_LOG(EXCEPTION) << "Failed to init mpi rank id!"; + } + + ret = MPI_Comm_size(MPI_COMM_WORLD, &rank_size_); + if (ret != MPI_SUCCESS) { + MS_LOG(EXCEPTION) << "Failed to init mpi rank size!rankid:" << rank_id_; + } + init = true; +} + +MPI_Group MPIAdapter::AddGroup(const std::vector &ranks) { + if (ranks.size() > static_cast(rank_size_) || ranks.empty()) { + MS_LOG(EXCEPTION) << "input rank size: " << ranks.size() << ", max rank size: " << rank_size_; + } + + if (std::find(ranks.begin(), ranks.end(), rank_id_) == ranks.end()) { + MS_LOG(ERROR) << "rankid:" << rank_id_ << " is not in the input group."; + return MPI_GROUP_NULL; + } + std::lock_guard lock(group_mutex_); + auto iter = ranks_group_.find(ranks); + if (iter != ranks_group_.end()) { + return iter->second; + } + const auto ranks_size = ranks.size(); + std::vector ranks_input(ranks_size, 0); + for (size_t i = 0; i < ranks_size; ++i) { + ranks_input[i] = ranks[i]; + } + + MPI_Group group = MPI_GROUP_NULL; + MPI_Group_incl(comm_group_world_, ranks.size(), ranks_input.data(), &group); + if (group == MPI_GROUP_NULL) { + MS_LOG(EXCEPTION) << "create mpi group fail!rankid:" << rank_id_; + } + + ranks_group_[ranks] = group; + MS_LOG(INFO) << "rank:" << rank_id_ << " add group:" << group; + return group; +} + +bool MPIAdapter::ReduceScatter(const float *input, float *output, const std::vector &ranks_group, size_t data_num, + const std::string &op_type) { + if (ranks_group.empty()) { + MS_LOG(ERROR) << "input rank group is empty!"; + return false; + } + + auto group = AddGroup(ranks_group); + if (group == MPI_GROUP_NULL) { + MS_LOG(EXCEPTION) << "Get mpi group fail!rankid:" << rank_id_; + } + MPI_Comm comm; + MPI_Comm_create_group(MPI_COMM_WORLD, group, 0, &comm); + if (comm == MPI_COMM_NULL) { + MS_LOG(EXCEPTION) << "create mpi comm fail!rankid:" << rank_id_; + } + std::vector receive_count(ranks_group.size(), 0); + for (size_t i = 0; i < ranks_group.size(); ++i) { + receive_count[i] = data_num; + } + + auto op = GetMpiOp(op_type); + auto ret = MPI_Reduce_scatter(input, output, receive_count.data(), MPI_FLOAT, op, comm); + bool result = true; + if (ret != MPI_SUCCESS) { + MS_LOG(ERROR) << "mpi reduce_scatter fail!ret = " << ret << ", rankid:" << rank_id_; + result = false; + } + + ret = MPI_Comm_free(&comm); + if (ret != MPI_SUCCESS) { + MS_LOG(WARNING) << "mpi comm free fail! ret = " << ret << ", rankid:" << rank_id_; + } + return result; +} + +bool MPIAdapter::ReduceScatterOverwriteInput(float *input, const std::vector &ranks_group, size_t input_data_num, + size_t output_size, const std::string &op_type, float *output) { + int scatter_index = GetScatterIndex(rank_id_, ranks_group); + auto group = AddGroup(ranks_group); + if (group == MPI_GROUP_NULL) { + MS_LOG(EXCEPTION) << "Get mpi group fail!rankid:" << rank_id_; + } + MPI_Comm comm; + MPI_Comm_create_group(MPI_COMM_WORLD, group, 0, &comm); + if (comm == MPI_COMM_NULL) { + MS_LOG(EXCEPTION) << "create mpi comm fail!rankid:" << rank_id_; + } + + MPI_Win window; + auto ret = MPI_Win_create(input, input_data_num * sizeof(float), sizeof(float), MPI_INFO_NULL, comm, &window); + if (ret != MPI_SUCCESS) { + MS_LOG(ERROR) << "mpi window create fail! ret = " << ret; + return false; + } + MPI_Win_fence(0, window); + for (size_t i = 0; i < ranks_group.size(); ++i) { + int remote_rank = ranks_group[i]; + if (rank_id_ == remote_rank) { + continue; + } + auto op = GetMpiOp(op_type); + ret = MPI_Accumulate(input + i * input_data_num, input_data_num, MPI_FLOAT, remote_rank, i * input_data_num, + input_data_num, MPI_FLOAT, op, window); + if (ret != MPI_SUCCESS) { + MS_LOG(EXCEPTION) << "mpi accumulate " << op_type << " fail!ret = " << ret; + } + } + MPI_Win_fence(0, window); + if (output != nullptr) { + auto data_size = input_data_num * sizeof(float); + if (output_size < data_size) { + MS_LOG(EXCEPTION) << "output buffer size " << output_size << " < input size " << data_size; + } + auto copy_ret = memcpy_s(output, output_size, input + scatter_index * input_data_num, data_size); + if (copy_ret != 0) { + MS_LOG(EXCEPTION) << "copy output memory fail!ret = " << copy_ret; + } + } + MPI_Win_free(&window); + MPI_Comm_free(&comm); + return true; +} + +bool MPIAdapter::AllGather(const float *input, float *output, const std::vector &ranks_group, size_t data_num) { + if (ranks_group.empty()) { + MS_LOG(ERROR) << "input rank group is empty!"; + return false; + } + auto group = AddGroup(ranks_group); + if (group == MPI_GROUP_NULL) { + MS_LOG(EXCEPTION) << "Get mpi group fail! rankid:" << rank_id_; + } + MPI_Comm comm; + MPI_Comm_create_group(MPI_COMM_WORLD, group, 0, &comm); + if (comm == MPI_COMM_NULL) { + MS_LOG(EXCEPTION) << "create mpi comm fail! rankid:" << rank_id_; + } + + auto ret = MPI_Allgather(input, data_num, MPI_FLOAT, output, data_num, MPI_FLOAT, comm); + bool result = true; + if (ret != MPI_SUCCESS) { + MS_LOG(ERROR) << "mpi allgater fail!ret = " << ret << ", rankid:" << rank_id_; + result = false; + } + ret = MPI_Comm_free(&comm); + if (ret != MPI_SUCCESS) { + MS_LOG(WARNING) << "mpi comm free fail!ret = " << ret << ",rankid:" << rank_id_; + } + return result; +} +} // namespace cpu +} // namespace device +} // namespace mindspore diff --git a/mindspore/ccsrc/device/cpu/mpi/mpi_adapter.h b/mindspore/ccsrc/device/cpu/mpi/mpi_adapter.h new file mode 100644 index 0000000000..8265e89eab --- /dev/null +++ b/mindspore/ccsrc/device/cpu/mpi/mpi_adapter.h @@ -0,0 +1,58 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_DEVICE_CPU_MPI_MPI_ADAPTER_H_ +#define MINDSPORE_CCSRC_DEVICE_CPU_MPI_MPI_ADAPTER_H_ +#ifdef ENABLE_MPI +#include +#include +#include +#include +#include + +namespace mindspore { +namespace device { +namespace cpu { +constexpr auto kOpTypeSum = "sum"; +class MPIAdapter { + public: + ~MPIAdapter(); + static MPIAdapter &Instance(); + int GetRankId() const; + bool ReduceScatter(const float *input, float *output, const std::vector &ranks_group, size_t data_num, + const std::string &op_type = kOpTypeSum); + bool ReduceScatterOverwriteInput(float *input, const std::vector &ranks_group, size_t input_data_num, + size_t output_size, const std::string &op_type = kOpTypeSum, + float *output = nullptr); + bool AllGather(const float *input, float *output, const std::vector &ranks_group, size_t data_num); + + private: + MPIAdapter(); + void Init(); + MPI_Group AddGroup(const std::vector &ranks); + + int rank_id_; + int rank_size_; + MPI_Group comm_group_world_; + // key:ranks group, value: mpi group + std::map, MPI_Group> ranks_group_; + std::mutex group_mutex_; +}; +} // namespace cpu +} // namespace device +} // namespace mindspore +#endif // ENABLE_MPI +#endif // MINDSPORE_CCSRC_DEVICE_CPU_MPI_MPI_ADAPTER_H_ diff --git a/mindspore/ccsrc/device/device_address.h b/mindspore/ccsrc/device/device_address.h index fd3188e0f2..e02d231dd5 100644 --- a/mindspore/ccsrc/device/device_address.h +++ b/mindspore/ccsrc/device/device_address.h @@ -48,6 +48,7 @@ class GPUMemoryManager; namespace mindspore { namespace device { enum class DeviceAddressStatus { kInDevice, kInHost, kInDeviceToHost, kInHostToDevice }; +enum class DeviceAddressType { kUnknown, kAscend, kCPU, kGPU }; class DeviceAddress { public: @@ -64,6 +65,7 @@ class DeviceAddress { TypeId type_id() const { return type_id_; } virtual void set_status(DeviceAddressStatus status) {} virtual DeviceAddressStatus status() const { return DeviceAddressStatus::kInDevice; } + virtual DeviceAddressType DeviceType() const { return DeviceAddressType::kUnknown; } protected: const void *ptr() const { return ptr_; } diff --git a/mindspore/ccsrc/device/gpu/distribution/collective_fake_init.h b/mindspore/ccsrc/device/gpu/distribution/collective_fake_init.h index 65467139c0..c8405f12f6 100644 --- a/mindspore/ccsrc/device/gpu/distribution/collective_fake_init.h +++ b/mindspore/ccsrc/device/gpu/distribution/collective_fake_init.h @@ -20,7 +20,6 @@ namespace mindspore { namespace device { namespace gpu { - class CollectiveFakeInitializer { public: CollectiveFakeInitializer() = default; diff --git a/mindspore/ccsrc/device/gpu/gpu_device_address.cc b/mindspore/ccsrc/device/gpu/gpu_device_address.cc index c4c1094293..24097f3637 100644 --- a/mindspore/ccsrc/device/gpu/gpu_device_address.cc +++ b/mindspore/ccsrc/device/gpu/gpu_device_address.cc @@ -15,9 +15,7 @@ */ #include "device/gpu/gpu_device_address.h" - #include - #include "device/gpu/gpu_device_manager.h" #include "utils/log_adapter.h" #include "utils/context/ms_context.h" @@ -28,6 +26,13 @@ namespace device { namespace gpu { bool GPUDeviceAddress::SyncDeviceToHost(const std::vector &, size_t size, TypeId, void *host_ptr) const { MS_EXCEPTION_IF_NULL(host_ptr); + auto &stream = GPUDeviceManager::GetInstance().default_stream(); + MS_EXCEPTION_IF_NULL(stream); + auto ret = GPUDeviceManager::GetInstance().SyncStream(stream); + if (!ret) { + MS_LOG(ERROR) << "SyncStream failed"; + return ret; + } if (size != size_) { MS_LOG(WARNING) << "SyncDeviceToHost ignored, host size: " << size << ", device size " << size_; return true; diff --git a/mindspore/ccsrc/device/gpu/gpu_device_address.h b/mindspore/ccsrc/device/gpu/gpu_device_address.h index f5c6b6e36b..4074cb6ce9 100644 --- a/mindspore/ccsrc/device/gpu/gpu_device_address.h +++ b/mindspore/ccsrc/device/gpu/gpu_device_address.h @@ -35,6 +35,7 @@ class GPUDeviceAddress : public DeviceAddress { bool SyncHostToDevice(const std::vector &shape, size_t size, TypeId type, const void *host_ptr) const override; void set_status(DeviceAddressStatus status) { status_ = status; } DeviceAddressStatus status() const { return status_; } + DeviceAddressType DeviceType() const override { return DeviceAddressType::kGPU; } private: DeviceAddressStatus status_{DeviceAddressStatus::kInDevice}; diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc b/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc index f9d2cb878f..19d2284510 100644 --- a/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc +++ b/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc @@ -16,18 +16,17 @@ #include "device/gpu/gpu_kernel_build.h" #include #include "kernel/kernel.h" -#include "kernel/akg/akgkernelbuild.h" +#include "kernel/akg/akg_kernel_build.h" #include "kernel/akg/gpu/akg_gpu_kernel_build.h" #include "kernel/gpu/gpu_kernel_factory.h" #include "operator/ops.h" -#include "pybind11/stl.h" #include "session/anf_runtime_algorithm.h" namespace mindspore { namespace device { namespace gpu { -namespace py = pybind11; void GpuBuild(const KernelGraphPtr &kernel_graph) { kernel::KernelMeta *bin_map = kernel::KernelMeta::GetInstance(); + MS_EXCEPTION_IF_NULL(bin_map); bin_map->Initialize(); MS_EXCEPTION_IF_NULL(kernel_graph); auto kernels = kernel_graph->execution_order(); @@ -38,7 +37,7 @@ void GpuBuild(const KernelGraphPtr &kernel_graph) { continue; } - if (session::AnfRuntimeAlgorithm::GetKernelType(kernel) == KernelType::AUTO_DIFF_KERNEL) { + if (session::AnfRuntimeAlgorithm::GetKernelType(kernel) == KernelType::AKG_KERNEL) { auto gpu_kernel_ptr = kernel::AkgGpuKernelBuild(kernel); if (!gpu_kernel_ptr) { MS_LOG(EXCEPTION) << "Build akg kernel op[" << kernel_name << "] failed"; diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc index 6c658f12e8..8095a503e3 100644 --- a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc +++ b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc @@ -39,6 +39,7 @@ bool GPUKernelRuntime::SyncStream() { return GPUDeviceManager::GetInstance().Syn bool GPUKernelRuntime::Init() { if (device_init_ == true) { + GPUMemoryAllocator::GetInstance().CheckMaxDeviceMemory(); return true; } auto ret = InitDevice(); @@ -105,7 +106,7 @@ void GPUKernelRuntime::ReleaseDeviceRes() { CHECK_OP_RET_WITH_EXCEPT(GpuBufferMgr::GetInstance().Destroy(), "Could not destroy gpu data queue."); } - // destroy remaining memory swap events and free host memory + // Destroy remaining memory swap events and free host memory. for (auto &item : mem_swap_map_) { auto &mem_swap_manager = item.second; MS_EXCEPTION_IF_NULL(mem_swap_manager); @@ -119,7 +120,10 @@ void GPUKernelRuntime::ReleaseDeviceRes() { if (mem_manager_ != nullptr) { mem_manager_->FreeDeviceMemory(); } - kernel::KernelMeta::GetInstance()->RemoveKernelCache(); + + kernel::KernelMeta *bin_map = kernel::KernelMeta::GetInstance(); + MS_EXCEPTION_IF_NULL(bin_map); + bin_map->RemoveKernelCache(); } void GPUKernelRuntime::AssignMemory(session::KernelGraph *graph) { @@ -171,7 +175,7 @@ bool GPUKernelRuntime::Run(session::KernelGraph *graph) { const uint64_t kUSecondInSecond = 1000000; uint64_t cost = kUSecondInSecond * static_cast(end_time.tv_sec - start_time.tv_sec); cost += static_cast(end_time.tv_usec - start_time.tv_usec); - MS_LOG(DEBUG) << "kernel runtime run graph in " << cost << " us"; + MS_LOG(DEBUG) << "GPU kernel runtime run graph in " << cost << " us"; return ret; } @@ -187,6 +191,8 @@ void GPUKernelRuntime::InitKernelRefCount(const session::KernelGraph *graph) { mem_reuse_util_ptr->SetReuseRefCount(); // Can't free the device address of graph output, so set the reference count of graph output specially. mem_reuse_util_ptr->SetGraphOutputRefCount(); + // Can't free the device address of summary nodes, so set the reference count of summary nodes specially. + mem_reuse_util_ptr->SetSummaryNodesRefCount(); auto graph_id = graph->graph_id(); mem_reuse_util_map_[graph_id] = mem_reuse_util_ptr; } @@ -222,7 +228,7 @@ void GPUKernelRuntime::ClearKernelOutputAddress(const session::KernelGraph *grap continue; } - auto device_address = AnfAlgo::GetMutableOutputAddr(kernel, i); + auto device_address = AnfAlgo::GetMutableOutputAddr(kernel, i, false); if (device_address->ptr_) { mem_manager_->FreeMemFromMemPool(device_address); } @@ -233,6 +239,7 @@ void GPUKernelRuntime::ClearKernelOutputAddress(const session::KernelGraph *grap bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph) { MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(mem_swap_manager_); auto graph_id = graph->graph_id(); auto mem_reuse_util_ptr = mem_reuse_util_map_[graph_id]; MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr); @@ -277,11 +284,12 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph) { } bool GPUKernelRuntime::AddMemSwapTask(const AnfNodePtr &kernel) { + MS_EXCEPTION_IF_NULL(mem_swap_manager_); auto &mem_swap_info_list = mem_swap_manager_->QueryKernelMemSwapInfo(kernel); for (auto &mem_swap_info : mem_swap_info_list) { auto &kernel_exec_info = mem_swap_manager_->SearchKernelExecutionInfo(mem_swap_info.kernel_); const HostAddress &host_address = kernel_exec_info.host_addrs_[mem_swap_info.output_idx_]; - auto device_address = AnfAlgo::GetMutableOutputAddr(mem_swap_info.kernel_, mem_swap_info.output_idx_); + auto device_address = AnfAlgo::GetMutableOutputAddr(mem_swap_info.kernel_, mem_swap_info.output_idx_, false); if (mem_swap_info.swap_kind_ == SwapKind::kDeviceToHost) { mem_swap_manager_->AddMemSwapTask(SwapKind::kDeviceToHost, device_address, host_address); @@ -304,6 +312,7 @@ bool GPUKernelRuntime::AddMemSwapTask(const AnfNodePtr &kernel) { } bool GPUKernelRuntime::AttemptMallocMem(const DeviceAddressPtr &device_address, size_t size) { + MS_EXCEPTION_IF_NULL(mem_manager_); auto ret = mem_manager_->MallocMemFromMemPool(device_address, size); if (!ret) { if (!mem_swap_manager_->trigger_swap()) { @@ -327,6 +336,7 @@ bool GPUKernelRuntime::AttemptMallocMem(const DeviceAddressPtr &device_address, } void *GPUKernelRuntime::AttemptMallocMem(size_t size) { + MS_EXCEPTION_IF_NULL(mem_manager_); auto device_ptr = mem_manager_->MallocMemFromMemPool(size); if (!device_ptr) { if (!mem_swap_manager_->trigger_swap()) { @@ -367,8 +377,10 @@ bool GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod bool GPUKernelRuntime::AllocKernelInputDynamicRes(const mindspore::AnfNodePtr &kernel, AddressPtrList *kernel_inputs) { MS_EXCEPTION_IF_NULL(kernel); MS_EXCEPTION_IF_NULL(kernel_inputs); + MS_EXCEPTION_IF_NULL(mem_swap_manager_); for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(kernel); ++i) { - auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, i); + // Graph may be all nop nodes and not remove nop node, so this can not skip nop node. + auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, i, false); MS_EXCEPTION_IF_NULL(device_address); if (mem_swap_manager_->trigger_swap()) { while (auto device_address_swap_in = mem_swap_manager_->UpdateSwapQueue(SwapKind::kHostToDevice)) { @@ -415,6 +427,7 @@ bool GPUKernelRuntime::AllocKernelOutputDynamicRes(const mindspore::kernel::Kern MS_EXCEPTION_IF_NULL(kernel); MS_EXCEPTION_IF_NULL(kernel_outputs); MS_EXCEPTION_IF_NULL(mem_manager_); + MS_EXCEPTION_IF_NULL(mem_swap_manager_); if (mem_swap_manager_->trigger_swap()) { while (auto device_address_swap_out = mem_swap_manager_->UpdateSwapQueue(SwapKind::kDeviceToHost)) { if (!mem_swap_manager_->FindInSwapInBlackList(device_address_swap_out->ptr_) && device_address_swap_out->ptr_) { @@ -425,7 +438,7 @@ bool GPUKernelRuntime::AllocKernelOutputDynamicRes(const mindspore::kernel::Kern } auto output_sizes = kernel_mod.GetOutputSizeList(); for (size_t i = 0; i < output_sizes.size(); ++i) { - auto device_address = AnfAlgo::GetMutableOutputAddr(kernel, i); + auto device_address = AnfAlgo::GetMutableOutputAddr(kernel, i, false); MS_EXCEPTION_IF_NULL(device_address); if (device_address->ptr_ == nullptr && !AttemptMallocMem(device_address, output_sizes[i])) { return false; @@ -444,7 +457,6 @@ bool GPUKernelRuntime::AllocKernelWorkspaceDynamicRes(const mindspore::kernel::K AddressPtrList *kernel_workspaces) { MS_EXCEPTION_IF_NULL(kernel); MS_EXCEPTION_IF_NULL(kernel_workspaces); - MS_EXCEPTION_IF_NULL(mem_manager_); auto workspace_sizes = kernel_mod.GetWorkspaceSizeList(); for (size_t i = 0; i < workspace_sizes.size(); ++i) { if (workspace_sizes[i] == 0) { @@ -478,14 +490,13 @@ void GPUKernelRuntime::AllocCommunicationOpDynamicRes(const session::KernelGraph void GPUKernelRuntime::AllocCommunicationOpInputDynamicRes(const mindspore::AnfNodePtr &kernel) { MS_EXCEPTION_IF_NULL(kernel); - MS_EXCEPTION_IF_NULL(mem_manager_); bool is_need_alloc_memory = false; bool is_need_free_memory = false; size_t total_size = 0; std::vector size_list; DeviceAddressPtrList addr_list; for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(kernel); ++i) { - auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, i); + auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, i, false); MS_EXCEPTION_IF_NULL(device_address); if (device_address->ptr_ == nullptr) { is_need_alloc_memory = true; @@ -501,7 +512,6 @@ void GPUKernelRuntime::AllocCommunicationOpInputDynamicRes(const mindspore::AnfN void GPUKernelRuntime::AllocCommunicationOpOutputDynamicRes(const mindspore::AnfNodePtr &kernel) { MS_EXCEPTION_IF_NULL(kernel); - MS_EXCEPTION_IF_NULL(mem_manager_); bool is_need_alloc_memory = false; bool is_need_free_memory = false; size_t total_size = 0; @@ -511,7 +521,7 @@ void GPUKernelRuntime::AllocCommunicationOpOutputDynamicRes(const mindspore::Anf MS_EXCEPTION_IF_NULL(kernel_mod); auto output_sizes = kernel_mod->GetOutputSizeList(); for (size_t i = 0; i < output_sizes.size(); ++i) { - auto device_address = AnfAlgo::GetMutableOutputAddr(kernel, i); + auto device_address = AnfAlgo::GetMutableOutputAddr(kernel, i, false); MS_EXCEPTION_IF_NULL(device_address); if (device_address->ptr_ == nullptr) { is_need_alloc_memory = true; @@ -528,6 +538,7 @@ void GPUKernelRuntime::AllocCommunicationOpOutputDynamicRes(const mindspore::Anf void GPUKernelRuntime::AllocCommunicationOpMemory(bool is_need_alloc_memory, bool is_need_free_memory, const DeviceAddressPtrList addr_list, size_t total_size, std::vector size_list) { + MS_EXCEPTION_IF_NULL(mem_manager_); if (!is_need_alloc_memory) { return; } @@ -568,7 +579,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel, MS_LOG(EXCEPTION) << "Check dynamic reference count failed."; } if (kernel_ref_count_ptr->ref_count_dynamic_use_ == 0) { - auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, i); + auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, i, false); mem_manager_->FreeMemFromMemPool(device_address); device_address->set_status(DeviceAddressStatus::kInDevice); } @@ -580,7 +591,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel, continue; } if (kernel_ref_count_ptr->ref_count_dynamic_use_ == 0) { - auto device_address = AnfAlgo::GetMutableOutputAddr(kernel, i); + auto device_address = AnfAlgo::GetMutableOutputAddr(kernel, i, false); mem_manager_->FreeMemFromMemPool(device_address); device_address->set_status(DeviceAddressStatus::kInDevice); } diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_allocator.cc b/mindspore/ccsrc/device/gpu/gpu_memory_allocator.cc index 3a1a53c600..9137945661 100644 --- a/mindspore/ccsrc/device/gpu/gpu_memory_allocator.cc +++ b/mindspore/ccsrc/device/gpu/gpu_memory_allocator.cc @@ -14,25 +14,45 @@ * limitations under the License. */ +#include #include "device/gpu/gpu_memory_allocator.h" #include "device/gpu/cuda_driver.h" #include "utils/log_adapter.h" +#include "utils/context/ms_context.h" +#include "utils/convert_utils_base.h" namespace mindspore { namespace device { namespace gpu { bool GPUMemoryAllocator::Init() { size_t total_size = total_mem_size(); - size_t free_size = free_mem_size(); - if (total_size > 0 && free_size > 0) { - MS_LOG(INFO) << "GPU device total memory size " << total_size << ", current free memory size " << free_size; + size_t free_size = CudaDriver::free_mem_size(); + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + limited_device_memory_ = context_ptr->max_device_memory(); + available_device_memory_ = FloatToSize(limited_device_memory_ * 1024 * 1024 * 1024); + if (total_size > 0 && free_size > 0 && available_device_memory_ > 0) { + MS_LOG(INFO) << "GPU device total memory size " << total_size << ", current free memory size " << free_size + << ", set max available memory size " << available_device_memory_ << "."; } else { MS_LOG(EXCEPTION) << "GPU device memory error, total memory size " << total_size << ", current free memory size " - << free_size; + << free_size << ", set max available memory size " << available_device_memory_ << "."; } return true; } +void GPUMemoryAllocator::CheckMaxDeviceMemory() const { + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + auto max_device_memory = context_ptr->max_device_memory(); + // Currently not support modifying the max device memory. + if (limited_device_memory_ != max_device_memory) { + MS_LOG(EXCEPTION) + << "Can't change context param max_device_memory in runtime, currently effective max_device_memory(" + << limited_device_memory_ << "GB), set new max_device_memory(" << max_device_memory << "GB) failed."; + } +} + bool GPUMemoryAllocator::Finalize() { if (buffer_q_addr_ != nullptr) { if (!CudaDriver::FreeDeviceMem(buffer_q_addr_)) { @@ -64,13 +84,16 @@ size_t GPUMemoryAllocator::AllocDeviceMem(size_t size, DeviceMemPtr *addr) { if (alloc_size == 0) { MS_LOG(EXCEPTION) << "Alloc device memory[" << size << "] failed."; } - MS_LOG(INFO) << "Current free memory size[" << free_size << "], current alloc size[" << alloc_size << "]."; + total_used_device_memory_ += alloc_size; + available_device_memory_ -= alloc_size; + MS_LOG(INFO) << "Current free memory size[" << free_size - alloc_size << "], current alloc size[" << alloc_size + << "], total used size[" << total_used_device_memory_ << "]."; return alloc_size; } bool GPUMemoryAllocator::FreeDeviceMem(const DeviceMemPtr &addr) { return CudaDriver::FreeDeviceMem(addr); } -size_t GPUMemoryAllocator::free_mem_size() { return CudaDriver::free_mem_size(); } +size_t GPUMemoryAllocator::free_mem_size() { return std::min(CudaDriver::free_mem_size(), available_device_memory_); } size_t GPUMemoryAllocator::total_mem_size() { return CudaDriver::total_mem_size(); } } // namespace gpu diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_allocator.h b/mindspore/ccsrc/device/gpu/gpu_memory_allocator.h index 36374bfaad..90d7791057 100644 --- a/mindspore/ccsrc/device/gpu/gpu_memory_allocator.h +++ b/mindspore/ccsrc/device/gpu/gpu_memory_allocator.h @@ -28,6 +28,7 @@ class GPUMemoryAllocator : public DynamicMemPoolBestFit { public: ~GPUMemoryAllocator() override = default; bool Init(); + void CheckMaxDeviceMemory() const; bool Finalize(); bool AllocBufferQueueMem(size_t size, DeviceMemPtr *addr); @@ -48,6 +49,10 @@ class GPUMemoryAllocator : public DynamicMemPoolBestFit { // Used to track address of data buffer queue. DeviceMemPtr buffer_q_addr_{nullptr}; + + float limited_device_memory_{0.0}; + size_t total_used_device_memory_{0}; + size_t available_device_memory_{0}; }; } // namespace gpu } // namespace device diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_copy_manager.cc b/mindspore/ccsrc/device/gpu/gpu_memory_copy_manager.cc index 8443e4799f..80206f309d 100644 --- a/mindspore/ccsrc/device/gpu/gpu_memory_copy_manager.cc +++ b/mindspore/ccsrc/device/gpu/gpu_memory_copy_manager.cc @@ -104,12 +104,12 @@ DeviceAddressPtr GPUMemCopyManager::UpdateSwapInQueue() { return device_address; } -bool GPUMemCopyManager::AllocHostPinnedMem(size_t size, void **addr) { +bool GPUMemCopyManager::AllocHostPinnedMem(size_t size, void **addr) const { auto alloc_size = CudaDriver::AllocHostPinnedMem(size, addr); return alloc_size == size; } -void GPUMemCopyManager::FreeHostPinnedMem(void *addr) { CudaDriver::FreeHostPinnedMem(addr); } +void GPUMemCopyManager::FreeHostPinnedMem(void *addr) const { CudaDriver::FreeHostPinnedMem(addr); } void GPUMemCopyManager::ClearSwapQueue() { CHECK_OP_RET_WITH_EXCEPT(SyncMemCopyStream(SwapKind::kDeviceToHost), "Failed to sync swap out stream"); diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_copy_manager.h b/mindspore/ccsrc/device/gpu/gpu_memory_copy_manager.h index a7cd8d4d8f..36ff273015 100644 --- a/mindspore/ccsrc/device/gpu/gpu_memory_copy_manager.h +++ b/mindspore/ccsrc/device/gpu/gpu_memory_copy_manager.h @@ -48,9 +48,9 @@ class GPUMemCopyManager : public MemCopyManager { DeviceAddressPtr UpdateSwapInQueue() override; - bool AllocHostPinnedMem(size_t size, void **addr) override; + bool AllocHostPinnedMem(size_t size, void **addr) const override; - void FreeHostPinnedMem(void *addr) override; + void FreeHostPinnedMem(void *addr) const override; void ClearSwapQueue() override; @@ -61,7 +61,6 @@ class GPUMemCopyManager : public MemCopyManager { std::queue> swap_in_queue_; }; using GPUMemCopyManagerPtr = std::shared_ptr; - } // namespace gpu } // namespace device } // namespace mindspore diff --git a/mindspore/ccsrc/device/gpu/gpu_stream_assign.cc b/mindspore/ccsrc/device/gpu/gpu_stream_assign.cc index 3594081cc7..42cdcf29ec 100644 --- a/mindspore/ccsrc/device/gpu/gpu_stream_assign.cc +++ b/mindspore/ccsrc/device/gpu/gpu_stream_assign.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "device/gpu/gpu_stream_assign.h" #include #include #include @@ -21,7 +22,6 @@ #include "device/gpu/gpu_common.h" #include "device/gpu/kernel_info_setter.h" #include "device/gpu/gpu_device_manager.h" -#include "device/gpu/gpu_stream_assign.h" namespace mindspore { namespace device { @@ -36,18 +36,19 @@ void AssignGpuStream(const std::shared_ptr &kernel_graph) allreduce_kernels.emplace_back(kernel_node); } else { DeviceStream compute_stream = GPUDeviceManager::GetInstance().default_stream(); - AnfAlgo::SetNodeAttr("stream_id", MakeValue(reinterpret_cast(compute_stream)), kernel_node); + MS_EXCEPTION_IF_NULL(compute_stream); + AnfAlgo::SetNodeAttr(kAttrStreamId, MakeValue(reinterpret_cast(compute_stream)), kernel_node); } } if (allreduce_kernels.size() > 1) { - // Assign multiple streams only when there's Recv node for AllReduce. + // Assign multiple streams only when there're multiple AllReduce nodes. std::vector send_recv_pairs; if (FindAllReduceStreamSwitchPos(kernel_graph, &send_recv_pairs)) { DeviceStream comm_stream = nullptr; GPUDeviceManager::GetInstance().CreateStream(&comm_stream); std::transform( allreduce_kernels.begin(), allreduce_kernels.end(), allreduce_kernels.begin(), [&](CNodePtr allreduce_kernel) { - AnfAlgo::SetNodeAttr("stream_id", MakeValue(reinterpret_cast(comm_stream)), allreduce_kernel); + AnfAlgo::SetNodeAttr(kAttrStreamId, MakeValue(reinterpret_cast(comm_stream)), allreduce_kernel); return allreduce_kernel; }); InsertStreamSwitchNode(kernel_graph, send_recv_pairs); @@ -161,25 +162,28 @@ bool GenSendRecvCNodesForAllReduce(const std::shared_ptr & cudaEvent_t event = nullptr; CHECK_CUDA_RET_WITH_EXCEPT(cudaEventCreate(&event, cudaEventDisableTiming), "Creating cuda event failed."); - AnfAlgo::SetNodeAttr("record_event", MakeValue(reinterpret_cast(event)), *send_node); - AnfAlgo::SetNodeAttr("wait_event", MakeValue(reinterpret_cast(event)), *recv_node); + AnfAlgo::SetNodeAttr(kAttrRecordEvent, MakeValue(reinterpret_cast(event)), *send_node); + AnfAlgo::SetNodeAttr(kAttrWaitEvent, MakeValue(reinterpret_cast(event)), *recv_node); - uintptr_t send_stream = AnfAlgo::GetNodeAttr(mock_send_node, "stream_id"); - AnfAlgo::SetNodeAttr("record_event_stream", MakeValue(send_stream), *send_node); - uintptr_t recv_stream = AnfAlgo::GetNodeAttr(mock_recv_node, "stream_id"); - AnfAlgo::SetNodeAttr("wait_event_stream", MakeValue(recv_stream), *recv_node); + uintptr_t send_stream = AnfAlgo::GetNodeAttr(mock_send_node, kAttrStreamId); + AnfAlgo::SetNodeAttr(kAttrRecordEventStream, MakeValue(send_stream), *send_node); + uintptr_t recv_stream = AnfAlgo::GetNodeAttr(mock_recv_node, kAttrStreamId); + AnfAlgo::SetNodeAttr(kAttrWaitEventStream, MakeValue(recv_stream), *recv_node); return true; } CNodePtr CreateStreamSwitchNode(const std::shared_ptr &kernel_graph, const std::string &name) { auto op = std::make_shared(name); + MS_EXCEPTION_IF_NULL(op); auto apply = std::make_shared(op); + MS_EXCEPTION_IF_NULL(apply); std::vector input_list = {apply}; CNodePtr node = kernel_graph->NewCNode(input_list); MS_EXCEPTION_IF_NULL(node); kernel::KernelBuildInfo::KernelBuildInfoBuilder selected_kernel_builder; AnfAlgo::SetSelectKernelBuildInfo(selected_kernel_builder.Build(), node.get()); auto abstract_none = std::make_shared(); + MS_EXCEPTION_IF_NULL(abstract_none); node->set_abstract(abstract_none); SetKernelInfo(node); return node; diff --git a/mindspore/ccsrc/device/gpu/kernel_info_setter.cc b/mindspore/ccsrc/device/gpu/kernel_info_setter.cc index 2ba154b87b..42e76e2483 100644 --- a/mindspore/ccsrc/device/gpu/kernel_info_setter.cc +++ b/mindspore/ccsrc/device/gpu/kernel_info_setter.cc @@ -82,11 +82,16 @@ std::string SupportedTypeList(const CNodePtr &kernel_node) { (void)ParseMetadata(kernel_node, op_info_ptr, kernel::Processor::CUDA, &kernel_info_list); for (size_t i = 0; i < kernel_info_list.size(); i++) { auto supported_akg_type = kernel_info_list[i]->GetAllInputDeviceTypes(); - std::string supported_akg_type_list = "["; + auto supported_akg_type_out = kernel_info_list[i]->GetAllOutputDeviceTypes(); + std::string supported_akg_type_list = "in["; for (auto type : supported_akg_type) { supported_akg_type_list = supported_akg_type_list + mindspore::kernel::TypeId2String(type); } - supported_type_lists = supported_type_lists + supported_akg_type_list + "] "; + supported_type_lists = supported_type_lists + supported_akg_type_list + "], out["; + for (auto type : supported_akg_type_out) { + supported_akg_type_list = supported_akg_type_list + mindspore::kernel::TypeId2String(type); + } + supported_type_lists += "]; "; } return supported_type_lists; } @@ -179,7 +184,7 @@ void SetKernelInfo(const CNodePtr &kernel_node) { if (!result) { result = SelectAkgKernel(kernel_node, builder->Build()); - kernel_type = AUTO_DIFF_KERNEL; + kernel_type = AKG_KERNEL; } if (!result) { diff --git a/mindspore/ccsrc/device/gpu/mpi/mpi_initializer.cc b/mindspore/ccsrc/device/gpu/mpi/mpi_initializer.cc index f2dbd4491b..bcad74e5b5 100644 --- a/mindspore/ccsrc/device/gpu/mpi/mpi_initializer.cc +++ b/mindspore/ccsrc/device/gpu/mpi/mpi_initializer.cc @@ -24,10 +24,28 @@ namespace mindspore { namespace device { namespace gpu { MPIInitializer::MPIInitializer() { + int init_flag = 0; + if (MPI_Initialized(&init_flag) != MPI_SUCCESS) { + return; + } + if (init_flag == 0) { + auto ret = MPI_Init(nullptr, nullptr); + if (ret != MPI_SUCCESS) { + return; + } + } MPI_Comm_rank(MPI_COMM_WORLD, &rank_id_); MPI_Comm_size(MPI_COMM_WORLD, &rank_size_); } +MPIInitializer::~MPIInitializer() { + int finalized_flag = 0; + (void)MPI_Finalized(&finalized_flag); + if (finalized_flag == 0) { + (void)MPI_Finalize(); + } +} + MPIInitializer &MPIInitializer::GetInstance() { static MPIInitializer instance; return instance; diff --git a/mindspore/ccsrc/device/gpu/mpi/mpi_initializer.h b/mindspore/ccsrc/device/gpu/mpi/mpi_initializer.h index 00f3b9d713..bd0a4aa948 100644 --- a/mindspore/ccsrc/device/gpu/mpi/mpi_initializer.h +++ b/mindspore/ccsrc/device/gpu/mpi/mpi_initializer.h @@ -30,7 +30,7 @@ class MPIInitializer { private: MPIInitializer(); - ~MPIInitializer() = default; + ~MPIInitializer(); int rank_id_; int rank_size_; diff --git a/mindspore/ccsrc/device/kernel_adjust.cc b/mindspore/ccsrc/device/kernel_adjust.cc index 1bd384ff13..93007764af 100644 --- a/mindspore/ccsrc/device/kernel_adjust.cc +++ b/mindspore/ccsrc/device/kernel_adjust.cc @@ -37,24 +37,6 @@ namespace mindspore { namespace device { using device::ascend::ProfilingUtils; -void KernelAdjust::Reorder(const std::shared_ptr &kernel_graph_ptr) { - MS_EXCEPTION_IF_NULL(kernel_graph_ptr); - const std::vector &origin_cnode_list = kernel_graph_ptr->execution_order(); - std::vector momentum_list; - std::vector other_list; - for (const auto &cnode : origin_cnode_list) { - if (kOptOperatorSet.find(AnfAlgo::GetCNodeName(cnode)) != kOptOperatorSet.end()) { - momentum_list.emplace_back(cnode); - } else { - other_list.emplace_back(cnode); - } - } - std::vector new_order_list; - new_order_list.insert(new_order_list.end(), other_list.begin(), other_list.end()); - new_order_list.insert(new_order_list.end(), momentum_list.begin(), momentum_list.end()); - kernel_graph_ptr->set_execution_order(new_order_list); -} - void KernelAdjust::ReorderGetNext(const std::shared_ptr &kernel_graph_ptr) { MS_EXCEPTION_IF_NULL(kernel_graph_ptr); const std::vector &origin_cnode_list = kernel_graph_ptr->execution_order(); @@ -80,23 +62,6 @@ bool KernelAdjust::NeedInsertSwitch() { ConfigManager::GetInstance().iter_num() > 1); } -uint32_t KernelAdjust::FindFirstStreamSwitchLabel(const std::shared_ptr &kernel_graph_ptr) { - MS_EXCEPTION_IF_NULL(kernel_graph_ptr); - auto cnode_ptr_list = kernel_graph_ptr->execution_order(); - CNodePtr cur_cnode_ptr = nullptr; - uint32_t label = kInvalidDistincLabel; - for (uint32_t i = 0; i < cnode_ptr_list.size(); ++i) { - cur_cnode_ptr = cnode_ptr_list[i]; - MS_EXCEPTION_IF_NULL(cur_cnode_ptr); - if (AnfAlgo::GetCNodeName(cur_cnode_ptr) == kStreamSwitchOpName) { - label = AnfAlgo::GetStreamDistinctionLabel(cur_cnode_ptr.get()); - break; - } - } - - return label; -} - CNodePtr KernelAdjust::CreateSendApplyKernel(const std::shared_ptr &graph_ptr, uint32_t event_id) { MS_EXCEPTION_IF_NULL(graph_ptr); @@ -138,6 +103,8 @@ CNodePtr KernelAdjust::CreateRecvApplyKernel(const std::shared_ptr &kernel_graph_ptr) { + device::ascend::AscendStreamMng &stream_manager = device::ascend::AscendStreamMng::GetInstance(); + stream_manager.Reset(); if (!NeedInsertSwitch()) { return; } @@ -166,68 +133,62 @@ void KernelAdjust::InsertSwitchLoop(const std::shared_ptr if (orders.empty()) { MS_LOG(EXCEPTION) << "graph execution order is empty"; } - uint32_t first_cnode_stream_label = AnfAlgo::GetStreamDistinctionLabel(orders[0].get()); std::vector exec_order; - CNodePtr first_stream_switch_app = CreateStreamSwitchOp(kernel_graph_ptr, switch_loop_input); - MS_EXCEPTION_IF_NULL(first_stream_switch_app); - AnfAlgo::SetStreamDistinctionLabel(kFirstStreamSwitchLabel, first_stream_switch_app.get()); - AnfAlgo::SetNodeAttr(kAttrTrueBranchStream, MakeValue(kGetNextLabel), first_stream_switch_app); - - CNodePtr second_stream_switch_app = CreateStreamSwitchOp(kernel_graph_ptr, switch_loop_input); - MS_EXCEPTION_IF_NULL(second_stream_switch_app); - AnfAlgo::SetStreamDistinctionLabel(kSecondStreamSwitchLabel, second_stream_switch_app.get()); - AnfAlgo::SetNodeAttr(kAttrTrueBranchStream, MakeValue(first_cnode_stream_label), second_stream_switch_app); - // add attr "stream_need_active" - AnfAlgo::SetNodeAttr(kStreamNeedActivedFirst, MakeValue(true), second_stream_switch_app); - - CNodePtr first_stream_active_app = CreateStreamActiveOp(kernel_graph_ptr); - MS_EXCEPTION_IF_NULL(first_stream_active_app); - AnfAlgo::SetStreamDistinctionLabel(first_cnode_stream_label, first_stream_active_app.get()); - std::vector first_active_streams = {kFirstStreamSwitchLabel}; - AnfAlgo::SetNodeAttr(kAttrActiveStreamList, MakeValue>(first_active_streams), - first_stream_active_app); - - CNodePtr second_stream_active_app = CreateStreamActiveOp(kernel_graph_ptr); - MS_EXCEPTION_IF_NULL(second_stream_active_app); - // specific deal for common ctrl stream policy - uint32_t first_common_stream_switch_label = FindFirstStreamSwitchLabel(kernel_graph_ptr); - if (first_common_stream_switch_label == kInvalidDistincLabel) { - AnfAlgo::SetStreamDistinctionLabel(first_cnode_stream_label, second_stream_active_app.get()); - } else { - AnfAlgo::SetStreamDistinctionLabel(first_common_stream_switch_label, second_stream_active_app.get()); - } - std::vector second_active_streams = {kSecondStreamSwitchLabel}; - AnfAlgo::SetNodeAttr(kAttrActiveStreamList, MakeValue>(second_active_streams), - second_stream_active_app); + // getnext loop process + // getnext loop stream switch op + CNodePtr getnext_switch_app = CreateStreamSwitchOp(kernel_graph_ptr, switch_loop_input); + MS_EXCEPTION_IF_NULL(getnext_switch_app); + uint32_t getnext_switch_stream_id = stream_manager.ApplyNewStream(); + AnfAlgo::SetStreamId(getnext_switch_stream_id, getnext_switch_app.get()); + exec_order.push_back(getnext_switch_app); - CNodePtr assign_add_one = CreateStreamAssignAddnOP(kernel_graph_ptr, switch_loop_input); - MS_EXCEPTION_IF_NULL(assign_add_one); - AnfAlgo::SetStreamDistinctionLabel(first_cnode_stream_label, assign_add_one.get()); - - CNodePtr send = CreateSendApplyKernel(kernel_graph_ptr, kFirstEventId); - AnfAlgo::SetStreamDistinctionLabel(kGetNextLabel, send.get()); - CNodePtr recv = CreateRecvApplyKernel(kernel_graph_ptr, kFirstEventId); - AnfAlgo::SetStreamDistinctionLabel(first_cnode_stream_label, recv.get()); - - // reorder graph orders - exec_order.push_back(first_stream_switch_app); + // getnext op + uint32_t getnext_stream_id = stream_manager.ApplyNewStream(); size_t i = 0; for (; i < orders.size(); i++) { auto node = orders[i]; exec_order.push_back(node); - AnfAlgo::SetStreamDistinctionLabel(kGetNextLabel, exec_order[exec_order.size() - 1].get()); + AnfAlgo::SetStreamId(getnext_stream_id, exec_order[exec_order.size() - 1].get()); if (AnfAlgo::GetCNodeName(node) == kGetNextOpName) { break; } } + // update getnext loop stream switch true_branch_stream attr + AnfAlgo::SetNodeAttr(kAttrTrueBranchStream, MakeValue(getnext_stream_id), getnext_switch_app); + + // getnext loop send + CNodePtr send = CreateSendApplyKernel(kernel_graph_ptr, kFirstEventId); + AnfAlgo::SetStreamId(getnext_stream_id, send.get()); exec_order.push_back(send); - exec_order.push_back(second_stream_switch_app); + + // fpbp loop process + // fpbp loop stream switch + CNodePtr fpbp_switch_app = CreateStreamSwitchOp(kernel_graph_ptr, switch_loop_input); + MS_EXCEPTION_IF_NULL(fpbp_switch_app); + uint32_t fpbp_switch_stream_id = stream_manager.ApplyNewStream(); + AnfAlgo::SetStreamId(fpbp_switch_stream_id, fpbp_switch_app.get()); + AnfAlgo::SetNodeAttr(kStreamNeedActivedFirst, MakeValue(true), fpbp_switch_app); + exec_order.push_back(fpbp_switch_app); + + // fpbp loop recv + CNodePtr recv = CreateRecvApplyKernel(kernel_graph_ptr, kFirstEventId); + uint32_t fpbp_stream_id = stream_manager.ApplyNewStream(); + AnfAlgo::SetStreamId(fpbp_stream_id, recv.get()); exec_order.push_back(recv); + + // update fpbp loop stream switch true_branch_stream attr + AnfAlgo::SetNodeAttr(kAttrTrueBranchStream, MakeValue(fpbp_stream_id), fpbp_switch_app); + + // fpbp loop AssignAdd + CNodePtr assign_add_one = CreateStreamAssignAddnOP(kernel_graph_ptr, switch_loop_input); + MS_EXCEPTION_IF_NULL(assign_add_one); + AnfAlgo::SetStreamId(fpbp_stream_id, assign_add_one.get()); exec_order.push_back(assign_add_one); + // fpbp memcpy std::vector memcpy_list; std::vector before_list; std::vector after_list; @@ -244,12 +205,28 @@ void KernelAdjust::InsertSwitchLoop(const std::shared_ptr before_list.emplace_back(cur_cnode); } } - (void)std::copy(before_list.begin(), before_list.end(), std::back_inserter(exec_order)); (void)std::copy(memcpy_list.begin(), memcpy_list.end(), std::back_inserter(exec_order)); - exec_order.push_back(first_stream_active_app); + + // stream active to activate getnext loop + CNodePtr getnext_active_app = CreateStreamActiveOp(kernel_graph_ptr); + MS_EXCEPTION_IF_NULL(getnext_active_app); + std::vector getnext_active_streams = {getnext_switch_stream_id}; + AnfAlgo::SetNodeAttr(kAttrActiveStreamList, MakeValue>(getnext_active_streams), + getnext_active_app); + exec_order.push_back(getnext_active_app); + + // fpbp loop other ops (void)std::copy(after_list.begin(), after_list.end(), std::back_inserter(exec_order)); - exec_order.push_back(second_stream_active_app); + + // stream active to activate fpbp loop + CNodePtr fpbp_active_app = CreateStreamActiveOp(kernel_graph_ptr); + MS_EXCEPTION_IF_NULL(fpbp_active_app); + // specific deal for common ctrl stream policy + std::vector fpbp_active_streams = {fpbp_switch_stream_id}; + AnfAlgo::SetNodeAttr(kAttrActiveStreamList, MakeValue>(fpbp_active_streams), fpbp_active_app); + exec_order.push_back(fpbp_active_app); + kernel_graph_ptr->set_execution_order(exec_order); } diff --git a/mindspore/ccsrc/device/kernel_adjust.h b/mindspore/ccsrc/device/kernel_adjust.h index 87195ecfc4..1a7436b396 100644 --- a/mindspore/ccsrc/device/kernel_adjust.h +++ b/mindspore/ccsrc/device/kernel_adjust.h @@ -39,9 +39,9 @@ constexpr auto kZeroParamName = "zero"; constexpr auto kOneParamName = "one"; constexpr auto kStreamNeedActivedFirst = "stream_need_active_first"; -const uint32_t kFirstStreamSwitchLabel = kInvalidDistincLabel - 1; -const uint32_t kGetNextLabel = kInvalidDistincLabel - 2; -const uint32_t kSecondStreamSwitchLabel = kInvalidDistincLabel - 3; +const uint32_t kFirstStreamSwitchLabel = 0; +const uint32_t kGetNextLabel = 1; +const uint32_t kSecondStreamSwitchLabel = 2; const uint32_t kInvalidEventId = UINT32_MAX; const uint32_t kFirstEventId = kInvalidEventId / 2; namespace device { @@ -51,7 +51,7 @@ class KernelAdjust { static KernelAdjust instance; return instance; } - void Reorder(const std::shared_ptr &kernel_graph_ptr); + void InsertSwitchLoop(const std::shared_ptr &kernel_graph_ptr); bool StepLoadCtrlInputs(const std::shared_ptr &kernel_graph_ptr); void Profiling(NotNull kernel_graph_ptr); @@ -65,7 +65,6 @@ class KernelAdjust { void ReorderGetNext(const std::shared_ptr &kernel_graph_ptr); CNodePtr CreateRecvApplyKernel(const std::shared_ptr &graph_ptr, uint32_t event_id); CNodePtr CreateSendApplyKernel(const std::shared_ptr &graph_ptr, uint32_t event_id); - uint32_t FindFirstStreamSwitchLabel(const std::shared_ptr &kernel_graph_ptr); void CreateSwitchOpParameters(const std::shared_ptr &kernel_graph_ptr, std::map *switch_loop_input); CNodePtr CreateStreamSwitchOp(const std::shared_ptr &kernel_graph_ptr, diff --git a/mindspore/ccsrc/device/kernel_info.h b/mindspore/ccsrc/device/kernel_info.h index 33ddda83c9..84cfaa0fa3 100644 --- a/mindspore/ccsrc/device/kernel_info.h +++ b/mindspore/ccsrc/device/kernel_info.h @@ -35,7 +35,7 @@ class KernelInfo { select_kernel_build_info_ = nullptr; output_address_list_ = {}; workspace_address_list_ = {}; - stream_id_ = 0; + stream_id_ = UINT32_MAX; stream_distinction_label_ = kInvalidDistincLabel; graph_id_ = kInvalidGraphId; } diff --git a/mindspore/ccsrc/device/kernel_runtime.cc b/mindspore/ccsrc/device/kernel_runtime.cc index 9a8e65b474..cc1e3ab8f3 100644 --- a/mindspore/ccsrc/device/kernel_runtime.cc +++ b/mindspore/ccsrc/device/kernel_runtime.cc @@ -102,6 +102,14 @@ bool KernelRuntime::RunTask(const session::KernelGraph *graph) { return false; } +bool KernelRuntime::NodeOutputDeviceAddressExist(const AnfNodePtr &kernel, size_t index) { + MS_EXCEPTION_IF_NULL(kernel); + if (AnfAlgo::OutputAddrExist(kernel, index)) { + return true; + } + return false; +} + size_t KernelRuntime::CountNodeDeviceMemorySize(const mindspore::AnfNodePtr &node, size_t output_index) { MS_EXCEPTION_IF_NULL(node); if (output_index >= AnfAlgo::GetOutputTensorNum(node)) { @@ -146,6 +154,34 @@ void KernelRuntime::RunOpAssignMemory(const std::vector &inpu UpdateRefNodeOutputMem(graph); } +void KernelRuntime::RunOpClearMemory(session::KernelGraph *graph) { + MS_EXCEPTION_IF_NULL(graph); + // clear input parameter memory resource + for (const auto &input_node : graph->inputs()) { + MS_EXCEPTION_IF_NULL(input_node); + AnfAlgo::SetOutputAddr(nullptr, 0, input_node.get()); + } + // clear input value node memory resource + for (const auto &value_node : graph->graph_value_nodes()) { + MS_EXCEPTION_IF_NULL(value_node); + AnfAlgo::SetOutputAddr(nullptr, 0, value_node.get()); + } + for (const auto &cnode : graph->execution_order()) { + MS_EXCEPTION_IF_NULL(cnode); + // clear output memory resource + for (size_t index = 0; index < AnfAlgo::GetOutputTensorNum(cnode); ++index) { + AnfAlgo::SetOutputAddr(nullptr, index, cnode.get()); + } + // clear workspace memory resource + auto kernel_mod = AnfAlgo::GetKernelMod(cnode); + MS_EXCEPTION_IF_NULL(kernel_mod); + auto workspace_lists = kernel_mod->GetWorkspaceSizeList(); + for (size_t index = 0; index < workspace_lists.size(); ++index) { + AnfAlgo::SetWorkspaceAddr(nullptr, index, cnode.get()); + } + } +} + void KernelRuntime::AssignStaticMemory(session::KernelGraph *graph) { AssignStaticMemoryInput(graph); AssignStaticMemoryValueNode(graph); @@ -182,6 +218,7 @@ void KernelRuntime::RunOpAssignInputMemory(const std::vector auto device_address = CreateDeviceAddress(nullptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id); MS_EXCEPTION_IF_NULL(device_address); + MS_EXCEPTION_IF_NULL(mem_manager_); auto ret = mem_manager_->MallocMemFromMemPool(device_address, tensor_size); if (!ret) { MS_LOG(EXCEPTION) << "Malloc device memory failed."; @@ -246,18 +283,37 @@ void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph *graph) { MS_EXCEPTION_IF_NULL(mem_manager_); auto graph_inputs = graph->inputs(); auto graph_valid_input = graph->valid_inputs(); - for (size_t i = 0; i < graph_inputs.size(); i++) { + std::vector need_alloc_nodes; + for (size_t i = 0; i < graph_inputs.size(); ++i) { auto item = graph_inputs[i]; MS_EXCEPTION_IF_NULL(item); - if (!item->isa()) { + if (i < graph_valid_input.size() && !graph_valid_input[i]) { continue; } - if (i < graph_valid_input.size() && !graph_valid_input[i]) { + + if (AnfAlgo::CheckPrimitiveType(item, prim::kPrimMakeTuple)) { + auto outs = AnfAlgo::GetAllOutput(item); + for (auto &out : outs) { + MS_EXCEPTION_IF_NULL(out); + if (!out->isa()) { + continue; + } + if (NodeOutputDeviceAddressExist(out, 0)) { + continue; + } + need_alloc_nodes.push_back(out); + } + } + if (!item->isa()) { continue; } - if (AnfAlgo::OutputAddrExist(item, 0)) { + if (NodeOutputDeviceAddressExist(item, 0)) { continue; } + need_alloc_nodes.push_back(item); + } + + for (auto &item : need_alloc_nodes) { auto output_size = AnfAlgo::GetOutputTensorNum(item); for (size_t index = 0; index < output_size; index++) { TypeId output_type_id = AnfAlgo::GetOutputDeviceDataType(item, index); @@ -431,7 +487,7 @@ void KernelRuntime::AssignNodeOutputMem(int flag, const AnfNodePtr &node, int in if ((kGetAllOuts != index) && (SizeToInt(i) != index)) { continue; } - if (AnfAlgo::OutputAddrExist(node, i)) { + if (NodeOutputDeviceAddressExist(node, i)) { MS_LOG(INFO) << "Already malloc index:" << i; continue; } @@ -493,7 +549,7 @@ void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) { MS_EXCEPTION_IF_NULL(ms_context); for (auto &value_node : graph->graph_value_nodes()) { MS_EXCEPTION_IF_NULL(value_node); - if (AnfAlgo::OutputAddrExist(value_node, 0)) { + if (NodeOutputDeviceAddressExist(value_node, 0)) { MS_LOG(INFO) << "value_node[" << value_node->DebugString() << "] address already exist"; continue; } @@ -583,6 +639,7 @@ void KernelRuntime::GenLaunchArgs(const mindspore::kernel::KernelMod &kernel_mod for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(kernel); ++i) { auto real_input = AnfAlgo::GetRealInputIndex(kernel, i); auto device_address = AnfAlgo::GetPrevNodeOutputAddr(kernel, real_input); + MS_EXCEPTION_IF_NULL(device_address); kernel::AddressPtr input = std::make_shared(); MS_EXCEPTION_IF_NULL(input); input->addr = device_address->ptr_; @@ -619,8 +676,8 @@ void KernelRuntime::GenAddrCleanLaunchArgs(const CNodePtr &cnode, AddressPtrList MS_EXCEPTION_IF_NULL(cnode->inputs()[1]); auto pre_node = (cnode->inputs()[1])->cast(); // set clean output address - if (AnfAlgo::HasNodeAttr(kAttrAutomicOutputIndexs, pre_node)) { - auto clean_output_indexs = AnfAlgo::GetNodeAttr>(pre_node, kAttrAutomicOutputIndexs); + if (AnfAlgo::HasNodeAttr(kAttrAtomicOutputIndexs, pre_node)) { + auto clean_output_indexs = AnfAlgo::GetNodeAttr>(pre_node, kAttrAtomicOutputIndexs); for (auto index : clean_output_indexs) { auto device_address = AnfAlgo::GetOutputAddr(pre_node, index); kernel::AddressPtr input = std::make_shared(); @@ -633,10 +690,10 @@ void KernelRuntime::GenAddrCleanLaunchArgs(const CNodePtr &cnode, AddressPtrList MS_LOG(INFO) << "AtomicAddClean clean output size:" << clean_output_indexs.size(); } // set clean workspace address - if (AnfAlgo::HasNodeAttr(kAttrAutomicWorkspaceSize, pre_node)) { - auto clean_workspaces = AnfAlgo::GetNodeAttr(pre_node, kAttrAutomicWorkspaceSize); - if (clean_workspaces != 0) { - auto device_address = AnfAlgo::GetWorkspaceAddr(pre_node, 0); + if (AnfAlgo::HasNodeAttr(kAttrAtomicWorkspaceIndexs, pre_node)) { + auto clean_workspaces_indexs = AnfAlgo::GetNodeAttr>(pre_node, kAttrAtomicWorkspaceIndexs); + for (const auto &index : clean_workspaces_indexs) { + auto device_address = AnfAlgo::GetWorkspaceAddr(pre_node, index); kernel::AddressPtr workspace = std::make_shared(); MS_EXCEPTION_IF_NULL(workspace); workspace->addr = device_address->ptr_; @@ -644,7 +701,6 @@ void KernelRuntime::GenAddrCleanLaunchArgs(const CNodePtr &cnode, AddressPtrList workspace->size = device_address->size_; kernel_inputs->emplace_back(workspace); } - MS_LOG(INFO) << "AtomicAddClean clean workspace size" << clean_workspaces; } } @@ -673,10 +729,6 @@ bool KernelRuntime::LaunchKernel(const session::KernelGraph *graph) { MS_LOG(ERROR) << "LaunchKernelMod failed!"; return false; } - if (!SyncStream()) { - MS_LOG(ERROR) << "SyncStream failed!"; - return false; - } return true; } diff --git a/mindspore/ccsrc/device/kernel_runtime.h b/mindspore/ccsrc/device/kernel_runtime.h index 668fb2580f..bfe857f61b 100644 --- a/mindspore/ccsrc/device/kernel_runtime.h +++ b/mindspore/ccsrc/device/kernel_runtime.h @@ -47,6 +47,7 @@ class KernelRuntime { virtual bool Init() = 0; virtual void AssignMemory(session::KernelGraph *graph); void RunOpAssignMemory(const std::vector &input_tensors, session::KernelGraph *graph); + void RunOpClearMemory(session::KernelGraph *graph); virtual bool Run(session::KernelGraph *graph); virtual bool DumpData(session::KernelGraph *graph); virtual bool RunTask(const session::KernelGraph *graph); @@ -55,6 +56,7 @@ class KernelRuntime { virtual void AssignStaticMemoryInput(const session::KernelGraph *graph); virtual void AssignStaticMemoryValueNode(session::KernelGraph *graph); virtual void ClearGraphRuntimeResource(uint32_t graph_id); + virtual bool SyncStream() = 0; #ifdef ENABLE_DUMP_E2E DumpConfPtr GetDumpConf(); @@ -67,7 +69,7 @@ class KernelRuntime { protected: virtual DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format, TypeId type_id) = 0; - virtual bool SyncStream() = 0; + virtual bool NodeOutputDeviceAddressExist(const AnfNodePtr &node, size_t index); void AssignStaticMemory(session::KernelGraph *graph); void AssignDynamicMemory(session::KernelGraph *graph); void ReuseAssignDynamicMemory(session::KernelGraph *graph); diff --git a/mindspore/ccsrc/device/kernel_runtime_manager.cc b/mindspore/ccsrc/device/kernel_runtime_manager.cc index ca6f386b50..0f95f3e79b 100644 --- a/mindspore/ccsrc/device/kernel_runtime_manager.cc +++ b/mindspore/ccsrc/device/kernel_runtime_manager.cc @@ -54,8 +54,9 @@ KernelRuntime *KernelRuntimeManager::GetSingleKernelRuntime(const std::string &d return runtime_iter->second.get(); } else if (runtime_map_.size() > 0) { auto cur_runtime_key = runtime_map_.begin()->first; - if (cur_runtime_key.rfind('_') != std::string::npos) { - auto cur_device_id = cur_runtime_key.substr(cur_runtime_key.rfind('_') + 1); + auto find_pos = cur_runtime_key.rfind('_'); + if (find_pos != std::string::npos) { + auto cur_device_id = cur_runtime_key.substr(find_pos + 1); MS_LOG(EXCEPTION) << "Can't change device id in runtime, already set device id: " << cur_device_id << ", set device id: " << device_id << " failed"; } diff --git a/mindspore/ccsrc/device/memory_manager.cc b/mindspore/ccsrc/device/memory_manager.cc index d2a38038c6..5efbcd8a36 100644 --- a/mindspore/ccsrc/device/memory_manager.cc +++ b/mindspore/ccsrc/device/memory_manager.cc @@ -68,6 +68,7 @@ uint8_t *MemoryManager::MallocOutputMem(const AnfNodePtr &node, size_t index, in } else if (flag == kDynamicMem) { ptr = MallocDynamicMem(size, false); } else if (flag == kReuseDynamicMem) { + MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr_); ptr = mem_reuse_util_ptr_->GetNodeOutputPtr(node, index); } return ptr; @@ -75,6 +76,7 @@ uint8_t *MemoryManager::MallocOutputMem(const AnfNodePtr &node, size_t index, in uint8_t *MemoryManager::MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, int flag, size_t size) { if (flag == kReuseDynamicMem) { + MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr_); return mem_reuse_util_ptr_->GetNodeWorkSpacePtr(node, index); } return MallocDynamicMem(size, false); diff --git a/mindspore/ccsrc/ir/CMakeLists.txt b/mindspore/ccsrc/ir/CMakeLists.txt index 77bc1b7661..2a0b81ae04 100644 --- a/mindspore/ccsrc/ir/CMakeLists.txt +++ b/mindspore/ccsrc/ir/CMakeLists.txt @@ -1,3 +1,7 @@ file(GLOB_RECURSE _IR_SRC_LIST ./*.cc dtype/*.cc) +file(GLOB_RECURSE _IR_LITE_SRC_FILES + ./lite/tensor.cc + ) +list(REMOVE_ITEM _IR_SRC_LIST ${_IR_LITE_SRC_FILES}) set_property(SOURCE ${_IR_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_IR) add_library(_mindspore_ir_obj OBJECT ${_IR_SRC_LIST}) diff --git a/mindspore/ccsrc/ir/anf.cc b/mindspore/ccsrc/ir/anf.cc index 29a74b79ba..3b2402172b 100644 --- a/mindspore/ccsrc/ir/anf.cc +++ b/mindspore/ccsrc/ir/anf.cc @@ -26,6 +26,8 @@ #include "ir/func_graph.h" #include "ir/primitive_base.h" +#include "operator/ops.h" + namespace mindspore { // namespace to support intermediate representation definition CNode::CNode(const std::vector &inputs, const FuncGraphPtr &func_graph) @@ -106,10 +108,14 @@ std::string ValueNode::fullname_with_scope() { bool IsPrimitiveCNode(const AnfNodePtr &node, const PrimitivePtr &value) { MS_EXCEPTION_IF_NULL(node); auto cnode = node->cast(); - if (cnode != nullptr) { + if (cnode == nullptr) { + return false; + } + if (value != nullptr) { return cnode->IsApply(value); } - return false; + const auto &prim = GetValueNode(cnode->input(0)); + return prim != nullptr; } PrimitivePtr GetCNodePrimitive(const AnfNodePtr &node) { diff --git a/mindspore/ccsrc/ir/anf.h b/mindspore/ccsrc/ir/anf.h index c2db17aec5..95a018af06 100644 --- a/mindspore/ccsrc/ir/anf.h +++ b/mindspore/ccsrc/ir/anf.h @@ -124,6 +124,7 @@ class AnfNode : public Base { const KernelInfoDevice *kernel_info() const { return kernel_info_.get(); } KernelInfoDevice *kernel_info() { return kernel_info_.get(); } + const KernelInfoDevicePtr &kernel_info_ptr() { return kernel_info_; } void set_kernel_info(const KernelInfoDevicePtr &kernel_info) { kernel_info_ = kernel_info; } AbstractBasePtr abstract() const { return abstract_; } @@ -216,6 +217,7 @@ class CNode : public AnfNode { void set_stop_gradient(bool stop_gradient) { stop_gradient_ = stop_gradient; } std::string fullname_with_scope() override; + void set_fullname_with_scope(const std::string full_name) { fullname_with_scope_ = full_name; } std::string DebugString(int recursive_level = 1) const override; std::string DebugString(bool recursive) const override { return DebugString(recursive ? 1 : 0); } @@ -395,9 +397,9 @@ static S GetValue(const ValuePtr &value) { std::string GetCNodeFuncName(CNodePtr cnode); // used to check whether an AnfNode is a cnode with a kind of Primitive as first input -bool IsPrimitiveCNode(const AnfNodePtr &node, const PrimitivePtr &value); +bool IsPrimitiveCNode(const AnfNodePtr &node, const PrimitivePtr &value = nullptr); -// used to check whether an AnfNode is a cnode with a Primitive as first input +// used to get PrimitivePtr from a cnode first input PrimitivePtr GetCNodePrimitive(const AnfNodePtr &node); // used to check whether an AnfNode is a valuenode having some Primitive value diff --git a/mindspore/ccsrc/ir/anf_extends.cc b/mindspore/ccsrc/ir/anf_extends.cc index 0345ad29f5..432ffdb606 100644 --- a/mindspore/ccsrc/ir/anf_extends.cc +++ b/mindspore/ccsrc/ir/anf_extends.cc @@ -70,7 +70,7 @@ std::string CNode::fullname_with_scope() { } fullname_with_scope_ = name; } else { - // cnode input 0 should be primitive ptr + // cnode input 0 should be primitive ptr or funcgraph ptr auto value_ptr = input(0)->cast(); if (value_ptr == nullptr) { MS_LOG(WARNING) << "Input 0 of cnode is not a value node, its type is " << input(0)->type_name() << "."; @@ -84,11 +84,23 @@ std::string CNode::fullname_with_scope() { return fullname_with_scope_; } - PrimitivePtr prim = GetValue(input_value); + auto prim = input_value->cast(); MS_EXCEPTION_IF_NULL(scope()); - MS_EXCEPTION_IF_NULL(prim); - fullname_with_scope_ = - scope()->name() + "/" + prim->name() + "-op" + id_generator::get_id(shared_from_base()); + fullname_with_scope_ = scope()->name() + "/"; + if (prim != nullptr) { + fullname_with_scope_ += prim->name(); + } else { + auto func_graph = input_value->cast(); + MS_EXCEPTION_IF_NULL(func_graph); + auto fg_flag = func_graph->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL); + if (fg_flag != nullptr) { + auto fg_name = GetValue(fg_flag); + fullname_with_scope_ += "GraphKernel_" + fg_name; + } else { + fullname_with_scope_ += func_graph->ToString(); + } + } + fullname_with_scope_ += "-op" + id_generator::get_id(shared_from_base()); } return fullname_with_scope_; diff --git a/mindspore/ccsrc/ir/dtype/number.h b/mindspore/ccsrc/ir/dtype/number.h index 3930f51d73..f8a746f8d6 100644 --- a/mindspore/ccsrc/ir/dtype/number.h +++ b/mindspore/ccsrc/ir/dtype/number.h @@ -77,9 +77,9 @@ class Bool : public Number { TypeId generic_type_id() const override { return kNumberTypeBool; } TypePtr DeepCopy() const override { return std::make_shared(); } - std::string ToString() const override { return "Bool_"; } - std::string ToReprString() const override { return "bool_"; } - std::string DumpText() const override { return "Bool_"; } + std::string ToString() const override { return "Bool"; } + std::string ToReprString() const override { return "bool"; } + std::string DumpText() const override { return "Bool"; } }; // Int diff --git a/mindspore/ccsrc/ir/dtype/type.h b/mindspore/ccsrc/ir/dtype/type.h index a4035abf50..bfe39af43c 100644 --- a/mindspore/ccsrc/ir/dtype/type.h +++ b/mindspore/ccsrc/ir/dtype/type.h @@ -34,65 +34,9 @@ #include "ir/base.h" #include "ir/named.h" +#include "ir/dtype/type_id.h" namespace mindspore { -// -// Supported meta type -// -enum TypeId : int { - kTypeUnknown = 0, - kMetaTypeBegin = kTypeUnknown, - kMetaTypeType, // Type - kMetaTypeAnything, - kMetaTypeObject, - kMetaTypeTypeType, // TypeType - kMetaTypeProblem, - kMetaTypeExternal, - kMetaTypeNone, - kMetaTypeNull, - kMetaTypeEllipsis, - kMetaTypeEnd, - // - // Object types - // - kObjectTypeBegin = kMetaTypeEnd, - kObjectTypeNumber, - kObjectTypeString, - kObjectTypeList, - kObjectTypeTuple, - kObjectTypeSlice, - kObjectTypeKeyword, - kObjectTypeTensorType, - kObjectTypeClass, - kObjectTypeDictionary, - kObjectTypeFunction, - kObjectTypeJTagged, - kObjectTypeSymbolicKeyType, - kObjectTypeEnvType, - kObjectTypeRefKey, - kObjectTypeRef, - kObjectTypeEnd, - // - // Number Types - // - kNumberTypeBegin = kObjectTypeEnd, - kNumberTypeBool, - kNumberTypeInt, - kNumberTypeInt8, - kNumberTypeInt16, - kNumberTypeInt32, - kNumberTypeInt64, - kNumberTypeUInt, - kNumberTypeUInt8, - kNumberTypeUInt16, - kNumberTypeUInt32, - kNumberTypeUInt64, - kNumberTypeFloat, - kNumberTypeFloat16, - kNumberTypeFloat32, - kNumberTypeFloat64, - kNumberTypeEnd -}; TypeId IntBitsToTypeId(const int nbits); TypeId UIntBitsToTypeId(const int nbits); diff --git a/mindspore/ccsrc/ir/dtype/type_id.h b/mindspore/ccsrc/ir/dtype/type_id.h new file mode 100644 index 0000000000..17862ad798 --- /dev/null +++ b/mindspore/ccsrc/ir/dtype/type_id.h @@ -0,0 +1,91 @@ +/** + * This is the C++ adaptation and derivative work of Myia (https://github.com/mila-iqia/myia/). + * + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_IR_DTYPE_TYPE_ID_H_ +#define MINDSPORE_CCSRC_IR_DTYPE_TYPE_ID_H_ + +#include +#include + +namespace mindspore { +// +// Supported meta type +// +enum TypeId : int { + kTypeUnknown = 0, + kMetaTypeBegin = kTypeUnknown, + kMetaTypeType, // Type + kMetaTypeAnything, + kMetaTypeObject, + kMetaTypeTypeType, // TypeType + kMetaTypeProblem, + kMetaTypeExternal, + kMetaTypeNone, + kMetaTypeNull, + kMetaTypeEllipsis, + kMetaTypeEnd, + // + // Object types + // + kObjectTypeBegin = kMetaTypeEnd, + kObjectTypeNumber, + kObjectTypeString, + kObjectTypeList, + kObjectTypeTuple, + kObjectTypeSlice, + kObjectTypeKeyword, + kObjectTypeTensorType, + kObjectTypeClass, + kObjectTypeDictionary, + kObjectTypeFunction, + kObjectTypeJTagged, + kObjectTypeSymbolicKeyType, + kObjectTypeEnvType, + kObjectTypeRefKey, + kObjectTypeRef, + kObjectTypeEnd, + // + // Number Types + // + kNumberTypeBegin = kObjectTypeEnd, + kNumberTypeBool, + kNumberTypeInt, + kNumberTypeInt8, + kNumberTypeInt16, + kNumberTypeInt32, + kNumberTypeInt64, + kNumberTypeUInt, + kNumberTypeUInt8, + kNumberTypeUInt16, + kNumberTypeUInt32, + kNumberTypeUInt64, + kNumberTypeFloat, + kNumberTypeFloat16, + kNumberTypeFloat32, + kNumberTypeFloat64, + kNumberTypeEnd +}; +// +// TypeId name map +// +const std::unordered_map type_name_map = { + {kNumberTypeBool, "Bool"}, {kNumberTypeInt8, "Int8"}, {kNumberTypeUInt8, "UInt8"}, + {kNumberTypeInt16, "Int16"}, {kNumberTypeInt32, "Int32"}, {kNumberTypeInt64, "Int64"}, + {kNumberTypeFloat16, "Float16"}, {kNumberTypeFloat32, "Float32"}, {kNumberTypeFloat64, "Float64"}}; +} // namespace mindspore +#endif // MINDSPORE_CCSRC_IR_DTYPE_TYPE_ID_H_ diff --git a/mindspore/ccsrc/ir/func_graph.cc b/mindspore/ccsrc/ir/func_graph.cc index d5d80eb2f0..cdca98fc61 100644 --- a/mindspore/ccsrc/ir/func_graph.cc +++ b/mindspore/ccsrc/ir/func_graph.cc @@ -34,7 +34,7 @@ namespace mindspore { * Methods of Graph */ FuncGraph::FuncGraph() - : flags_(), + : attrs_(), transforms_(), parameter_default_value_(), seen_(0), @@ -95,13 +95,27 @@ ParameterPtr FuncGraph::AddWeightParameter(const std::string &name) { return p; } -bool FuncGraph::has_flag(const std::string &flag) { - if (flags_.count(flag)) { - return flags_[flag]; +bool FuncGraph::has_flag(const std::string &key) { + auto iter = attrs_.find(key); + if (iter != attrs_.cend()) { + if (iter->second->isa()) { + return GetValue(iter->second); + } + MS_LOG(WARNING) << "key " << key << " is not a flag, please use has_attr function."; } return false; } +bool FuncGraph::has_attr(const std::string &key) { + auto iter = attrs_.find(key); + return !(iter == attrs_.cend()); +} + +ValuePtr FuncGraph::get_attr(const std::string &key) { + auto iter = attrs_.find(key); + return iter == attrs_.cend() ? nullptr : iter->second; +} + CNodePtr FuncGraph::NewCNode(const std::vector &inputs) { CNodePtr cnode = std::make_shared(inputs, shared_from_base()); if (has_flag(GRAPH_FLAG_HAS_EFFECT)) { diff --git a/mindspore/ccsrc/ir/func_graph.h b/mindspore/ccsrc/ir/func_graph.h index 1a367bde92..5f09dfe6b5 100644 --- a/mindspore/ccsrc/ir/func_graph.h +++ b/mindspore/ccsrc/ir/func_graph.h @@ -38,6 +38,32 @@ namespace mindspore { using BaseRefCounterMap = OrderedMap; using FuncGraphCounterMap = OrderedMap; +struct CNodeIndexHasher { + std::size_t operator()(const CNodeIndexPairPtr pair) const { + MS_EXCEPTION_IF_NULL(pair); + MS_EXCEPTION_IF_NULL(pair->first); + return hash_combine(pair->first->hash(), std::hash()(pair->second)); + } +}; + +struct CNodeIndexEqual { + bool operator()(const CNodeIndexPairPtr lhs, const CNodeIndexPairPtr rhs) const { + if (lhs == nullptr || rhs == nullptr) { + return false; + } + if (lhs == rhs) { + return true; + } + if (lhs->first != rhs->first) { + return false; + } + if (lhs->second != rhs->second) { + return false; + } + return true; + } +}; + template , class CounterEqual = std::equal_to> using CounterOrderedMap = OrderedMap; using AnfNodeCounterMap = CounterOrderedMap; @@ -48,6 +74,7 @@ using FuncGraphMap = OrderedMap; const char FUNC_GRAPH_FLAG_IGNORE_VALUES[] = "ignore_values"; const char FUNC_GRAPH_FLAG_DEFER_INLINE[] = "defer_inline"; const char FUNC_GRAPH_FLAG_CORE[] = "core"; +const char FUNC_GRAPH_ATTR_GRAPH_KERNEL[] = "graph_kernel"; const char FUNC_GRAPH_FLAG_SPECIALIZE_PARAMETER[] = "spec_param"; namespace abstract { @@ -57,9 +84,6 @@ class AbstractFunction; using AbstractFunctionPtr = std::shared_ptr; } // namespace abstract -class FuncGraphManager; -using FuncGraphManagerPtr = std::shared_ptr; - // ANF transform class // either a primitive or a func_graph class FuncGraphTransform { @@ -172,10 +196,19 @@ class FuncGraph : public FuncGraphBase { void set_is_generate(bool generated) { is_generated_ = generated; } bool is_generated() const { return is_generated_; } - bool has_flag(const std::string &flag); - std::unordered_map &flags() { return flags_; } - void set_flags(const std::unordered_map &flags) { flags_ = flags; } - void set_flags(const std::string &key, const bool value) { flags_[key] = value; } + std::unordered_map &attrs() { return attrs_; } + void set_attrs(const std::unordered_map &attrs) { + for (auto &attr : attrs) { + attrs_[attr.first] = attr.second; + } + } + bool has_flag(const std::string &key); + void set_flag(const std::string &key, bool flag) { attrs_[key] = MakeValue(flag); } + void erase_flag(const std::string &key) { (void)attrs_.erase(key); } + + bool has_attr(const std::string &key); + ValuePtr get_attr(const std::string &key); + void set_attr(const std::string &key, const ValuePtr &value) { attrs_[key] = value; } std::unordered_map &transforms() { return transforms_; } void set_transforms(const std::unordered_map &transforms) { @@ -294,7 +327,7 @@ class FuncGraph : public FuncGraphBase { std::unordered_map &make_ref_params() { return make_ref_params_; } - std::unordered_map flags_; + std::unordered_map attrs_; std::unordered_map transforms_; // parameter default value std::map parameter_default_value_; diff --git a/mindspore/ccsrc/ir/func_graph_cloner.cc b/mindspore/ccsrc/ir/func_graph_cloner.cc index 4622bf9ea2..4a0c69d99a 100644 --- a/mindspore/ccsrc/ir/func_graph_cloner.cc +++ b/mindspore/ccsrc/ir/func_graph_cloner.cc @@ -90,6 +90,7 @@ void Cloner::CloneCNode(const AnfNodePtr &node, const FuncGraphPtr &target) { new_node->set_abstract(old_node->abstract()); ScopePtr scope = (node->scope() != kDefaultScope) ? node->scope() : this->scope(); new_node->set_scope(scope); + new_node->set_kernel_info(old_node->kernel_info_ptr()); repl_node_[old_node] = new_node; nodes_.emplace_back(old_node, new_node); TraceManager::EndTrace(); @@ -211,7 +212,7 @@ void Cloner::SetFuncGraphInfo(const FuncGraphPtr &func_graph, FuncGraphPtr *cons MS_EXCEPTION_IF_NULL(target_func_graph); TraceManager::DebugTrace(func_graph->debug_info(), target_relation_); *target_func_graph = std::make_shared(); - (*target_func_graph)->set_flags(func_graph->flags()); + (*target_func_graph)->set_attrs(func_graph->attrs()); (*target_func_graph)->set_transforms(func_graph->transforms()); (*target_func_graph)->set_has_vararg(func_graph->has_vararg()); (*target_func_graph)->set_has_kwarg(func_graph->has_kwarg()); @@ -636,9 +637,14 @@ FuncGraphPtr TransformableClone(const FuncGraphPtr &func_graph, const TraceInfoP if (MsContext::GetInstance()->is_multi_graph_sink()) { if (func_graph->has_flag(FUNC_GRAPH_FLAG_IGNORE_VALUES)) { - new_func_graph->set_flags(FUNC_GRAPH_FLAG_IGNORE_VALUES, true); + new_func_graph->set_flag(FUNC_GRAPH_FLAG_IGNORE_VALUES, true); } } + + if (func_graph->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) { + new_func_graph->set_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL, func_graph->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)); + } + return new_func_graph; } } // namespace mindspore diff --git a/mindspore/ccsrc/ir/func_graph_extends.cc b/mindspore/ccsrc/ir/func_graph_extends.cc index 14998a1eaa..ad7aa6ee0c 100644 --- a/mindspore/ccsrc/ir/func_graph_extends.cc +++ b/mindspore/ccsrc/ir/func_graph_extends.cc @@ -399,8 +399,8 @@ void FuncGraph::ReleaseFullOrderToEffectOrder() { depend_inputs.push_back(*iter); } } - set_flags(GRAPH_FLAG_HAS_EFFECT, false); - set_flags(GRAPH_FLAG_EFFECT_PATIAL_ORDER, true); + set_flag(GRAPH_FLAG_HAS_EFFECT, false); + set_flag(GRAPH_FLAG_EFFECT_PATIAL_ORDER, true); if (!depend_inputs.empty()) { SetEffectDepends(depend_inputs); } diff --git a/mindspore/ccsrc/minnie/param_value_minnie.h b/mindspore/ccsrc/ir/lite/param_value_lite.h similarity index 72% rename from mindspore/ccsrc/minnie/param_value_minnie.h rename to mindspore/ccsrc/ir/lite/param_value_lite.h index 684d8abd5d..2b249cfa4f 100644 --- a/mindspore/ccsrc/minnie/param_value_minnie.h +++ b/mindspore/ccsrc/ir/lite/param_value_lite.h @@ -14,18 +14,18 @@ * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_MINNIE_PARAM_VALUE_MINNIE_H_ -#define MINDSPORE_CCSRC_MINNIE_PARAM_VALUE_MINNIE_H_ +#ifndef MINDSPORE_CCSRC_MINNIE_PARAM_VALUE_LITE_H_ +#define MINDSPORE_CCSRC_MINNIE_PARAM_VALUE_LITE_H_ #include #include "ir/anf.h" namespace mindspore { -class ParamValueMinnie : public ParamValue { +class ParamValueLite : public ParamValue { public: - ParamValueMinnie() : tensor_addr_(nullptr), tensor_size_(0) {} - virtual ~ParamValueMinnie() = default; + ParamValueLite() : tensor_addr_(nullptr), tensor_size_(0) {} + virtual ~ParamValueLite() = default; size_t tensor_size() const { return tensor_size_; } void set_tensor_size(size_t size) { tensor_size_ = size; } @@ -38,7 +38,6 @@ class ParamValueMinnie : public ParamValue { size_t tensor_size_; }; -using ParamValueMinniePtr = std::shared_ptr; - +using ParamValueLitePtr = std::shared_ptr; } // namespace mindspore -#endif // MINDSPORE_CCSRC_MINNIE_PARAM_VALUE_MINNIE_H_ +#endif // MINDSPORE_CCSRC_MINNIE_PARAM_VALUE_LITE_H_ diff --git a/mindspore/ccsrc/ir/lite/tensor.cc b/mindspore/ccsrc/ir/lite/tensor.cc new file mode 100644 index 0000000000..2957495aa4 --- /dev/null +++ b/mindspore/ccsrc/ir/lite/tensor.cc @@ -0,0 +1,152 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "ir/lite/tensor.h" +#include "securec/include/securec.h" + +namespace mindspore { +namespace tensor { +#define kMaxMallocSize 1024 * 1024 * 100 +Tensor::Tensor(const TypeId data_type, const std::vector &shape) : MetaTensor(data_type, shape) {} + +Tensor::Tensor(const TypePtr &type_ptr, const std::vector &shape) : MetaTensor(type_ptr, shape) {} + +Tensor::Tensor(const Tensor &tensor) : MetaTensor(tensor) { + this->data_type_ = tensor.data_type_; + this->shape_ = tensor.shape_; + auto ret = CopyTensorData(tensor); + if (0 != ret) { + MS_LOG(EXCEPTION) << "CopyTensorData error"; + } +} + +int Tensor::CopyTensorData(const Tensor &srcTensor) { + if (srcTensor.data_ == nullptr) { + MS_LOG(ERROR) << "data of srcTensor is nullptr"; + return -1; + } + size_t data_size = this->Size(); + MS_ASSERT(data_size == tensor.Size()); + if (this->data_ == nullptr) { + if (data_size > kMaxMallocSize) { + MS_LOG(ERROR) << "Malloc size is too big while coping data, " << data_size << " bytes"; + return -1; + } + this->data_ = malloc(data_size); + } + memcpy_s(this->data_, data_size, tensor.data_, tensor.Size()); + return 0; +} + +Tensor::~Tensor() { + if (nullptr != this->data_) { + free(this->data_); + } +} + +Tensor &Tensor::operator=(const Tensor &tensor) { + if (&tensor == this) { + return *this; + } + this->shape_ = tensor.shape_; + this->data_type_ = tensor.data_type_; + auto ret = CopyTensorData(tensor); + if (0 != ret) { + MS_LOG(EXCEPTION) << "CopyTensorData error"; + } + return *this; +} + +bool Tensor::operator==(const Tensor &tensor) { + return data_ == tensor.data_ && shape_ == tensor.shape_ && data_type_ == tensor.data_type_; +} + +bool Tensor::operator==(const Value &other) const { + if (other.isa()) { + auto other_ = static_cast(other); + return *this == other_; + } else { + return false; + } +} +} // namespace tensor + +namespace inference { +MSTensor *MSTensor::CreateTensor(TypeId data_type, const std::vector &shape) { + return new Tensor(data_type, shape); +} + +Tensor::Tensor() { this->tensor_impl_ = std::make_shared(); } + +Tensor::Tensor(TypeId data_type, const std::vector &shape) { + this->tensor_impl_ = std::make_shared(data_type, shape); +} + +Tensor::Tensor(std::shared_ptr tensor_ptr) { this->tensor_impl_ = std::move(tensor_ptr); } + +TypeId Tensor::data_type() const { + MS_ASSERT(this->tensor_impl_ != nullptr); + return this->tensor_impl_->data_type(); +} + +TypeId Tensor::set_data_type(TypeId data_type) { + MS_ASSERT(this->tensor_impl_ != nullptr); + return this->tensor_impl_->set_data_type(data_type); +} + +std::vector Tensor::shape() const { + MS_ASSERT(this->tensor_impl_ != nullptr); + return this->tensor_impl_->shape(); +} + +size_t Tensor::set_shape(const std::vector &shape) { + MS_ASSERT(this->tensor_impl_ != nullptr); + return this->tensor_impl_->set_shape(shape); +} + +int Tensor::DimensionSize(size_t index) const { + MS_ASSERT(this->tensor_impl_ != nullptr); + return this->tensor_impl_->DimensionSize(index); +} + +int Tensor::ElementsNum() const { + MS_ASSERT(this->tensor_impl_ != nullptr); + return this->tensor_impl_->ElementsNum(); +} + +std::size_t Tensor::hash() const { + MS_ASSERT(this->tensor_impl_ != nullptr); + return this->tensor_impl_->hash(); +} + +std::shared_ptr Tensor::tensor() const { + MS_ASSERT(this->tensor_impl_ != nullptr); + return this->tensor_impl_; +} + +size_t Tensor::Size() const { + MS_ASSERT(this->tensor_impl_ != nullptr); + return this->tensor_impl_->Size(); +} + +void *Tensor::MutableData() const { + MS_ASSERT(this->tensor_impl_ != nullptr); + return this->tensor_impl_->data(); +} +} // namespace inference +} // namespace mindspore diff --git a/mindspore/ccsrc/ir/lite/tensor.h b/mindspore/ccsrc/ir/lite/tensor.h new file mode 100644 index 0000000000..0dcf5cc0ee --- /dev/null +++ b/mindspore/ccsrc/ir/lite/tensor.h @@ -0,0 +1,97 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_IR_LITE_TENSOR_H_ +#define MINDSPORE_CCSRC_IR_LITE_TENSOR_H_ + +#include +#include +#include "ir/meta_tensor.h" +#include "ir/dtype/type.h" + +namespace mindspore { +namespace tensor { +class Tensor : public MetaTensor { + public: + Tensor() : MetaTensor() {} + + Tensor(const TypeId data_type, const std::vector &shape); + + Tensor(const TypePtr &type_ptr, const std::vector &shape); + + Tensor(const Tensor &tensor); + + ~Tensor(); + + int CopyTensorData(const Tensor &srcTensor); + + MS_DECLARE_PARENT(Tensor, MetaTensor) + + virtual Tensor &operator=(const Tensor &tensor); + + virtual bool operator==(const Tensor &tensor); + + bool operator==(const Value &other) const override; + + size_t Size() const { return MetaTensor::ElementsNum() * GetTypeByte(TypeIdToType(this->data_type_)); } + + void *Data() const { return data_; } + + protected: + void *data_; +}; + +using TensorPtr = std::shared_ptr; +} // namespace tensor + +namespace inference { +class Tensor : public MSTensor { + public: + Tensor(); + + Tensor(TypeId data_type, const std::vector &shape); + + explicit Tensor(std::shared_ptr tensor_ptr); + + ~Tensor() = default; + + TypeId data_type() const override; + + TypeId set_data_type(const TypeId data_type) override; + + std::vector shape() const override; + + size_t set_shape(const std::vector &shape) override; + + int DimensionSize(size_t index) const override; + + int ElementsNum() const override; + + std::size_t hash() const override; + + std::shared_ptr tensor() const; + + size_t Size() const override; + + void *MutableData() const override; + + protected: + std::shared_ptr tensor_impl_; +}; +} // namespace inference +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_IR_LITE_TENSOR_H_ diff --git a/mindspore/ccsrc/ir/manager.cc b/mindspore/ccsrc/ir/manager.cc index 4b9f0c22e9..291a752405 100644 --- a/mindspore/ccsrc/ir/manager.cc +++ b/mindspore/ccsrc/ir/manager.cc @@ -328,9 +328,6 @@ void FuncGraphManager::ProcessEdge(AnfNodePtr node, int index, AnfNodePtr inp, E DropEdge(node, index, inp); } else { MS_LOG(DEBUG) << "Add node " << node->ToString() << " input[" << index << "] " << inp->ToString(); - if (inp->func_graph() != nullptr) { - AddFuncGraph(inp->func_graph()); - } if (IsValueNode(inp)) { MS_LOG(DEBUG) << "Input[" << index << "] is const graph " << inp->ToString(); AddFuncGraph(GetValueNode(inp)); @@ -372,9 +369,8 @@ void FuncGraphManager::AcquireNodes(const std::vector &nodes) { for (auto &node : acq) { MS_EXCEPTION_IF_NULL(node); - FuncGraphPtr fg = node->func_graph(); + auto fg = node->func_graph(); if (fg != nullptr) { - AddFuncGraph(fg); fg->AddNode(node); } ProcessInputs(node, kIncEdge); @@ -468,7 +464,7 @@ void FuncGraphManager::MoveAllCNodeDropGraph(FuncGraphPtr source, FuncGraphPtr t } } -inline void FuncGraphManager::AddEdge(AnfNodePtr node, int index, AnfNodePtr input) { +void FuncGraphManager::AddEdge(AnfNodePtr node, int index, AnfNodePtr input) { auto fg = node->func_graph(); if (input->isa()) { fg->AddValueNode(input); @@ -489,7 +485,7 @@ inline void FuncGraphManager::AddEdge(AnfNodePtr node, int index, AnfNodePtr inp } } -inline void FuncGraphManager::DropEdge(AnfNodePtr node, int index, AnfNodePtr input) { +void FuncGraphManager::DropEdge(AnfNodePtr node, int index, AnfNodePtr input) { auto fg = node->func_graph(); if (input->isa()) { fg->DropValueNode(input); @@ -510,7 +506,7 @@ inline void FuncGraphManager::DropEdge(AnfNodePtr node, int index, AnfNodePtr in } } -inline void FuncGraphManager::MoveAllNodes(FuncGraphPtr source, FuncGraphPtr target) { +void FuncGraphManager::MoveAllNodes(FuncGraphPtr source, FuncGraphPtr target) { target->CopyNodes(source); target->CopyValueNodes(source); target->CopyFuncGraphCNodesIndex(source); @@ -637,103 +633,7 @@ void FuncGraphTransaction::Commit() { manager_->CommitChanges(changes); } -FuncGraphAnalysis::FuncGraphAnalysis(const FuncGraphManager *const manager) - : manager_(manager), include_func_graph_none_(false) {} - -DepCollector::DepCollector(const FuncGraphManager *const manager) : FuncGraphAnalysis(manager) { - MS_EXCEPTION_IF_NULL(manager_); -} - -void DepCollector::OnAddEdge(AnfNodePtr node, int index, AnfNodePtr inp) { OnModEdge(node, index, inp, kIncEdge); } - -void DepCollector::OnDropEdge(AnfNodePtr node, int index, AnfNodePtr inp) { OnModEdge(node, index, inp, kDecEdge); } - -template -bool CounterAnfNodeCollector::Inc(const FuncGraphPtr &func_graph, - const ValueT &key, int count) { - auto &d = count_nodes_map_[func_graph]; - if (d.count(key) == 0) { - d[key] = count; - return true; - } else { - d[key] += count; - } - return false; -} - -template -bool CounterAnfNodeCollector::Dec(const FuncGraphPtr &func_graph, - const ValueT &key, int count) { - MS_EXCEPTION_IF_NULL(func_graph); - auto &d = count_nodes_map_[func_graph]; - if (d.count(key) != 0) { - if (d[key] == count) { - (void)d.erase(key); - return true; - } else { - d[key] -= count; - if (d[key] < 0) { - MS_LOG(EXCEPTION) << "Count of key '" << key - << "' dec from 0. NodeInfo: " << trace::GetDebugInfo(func_graph->debug_info()); - } - } - } - return false; -} - -template -bool CounterAnfNodeCollector::Mod(const FuncGraphPtr &func_graph, - const ValueT &key, int count) { - if (count > 0) { - return Inc(func_graph, key, count); - } else if (count < 0) { - return Dec(func_graph, key, -count); - } else { - MS_LOG(EXCEPTION) << "Count of key '" << key - << "' cannot be 0. NodeInfo: " << trace::GetDebugInfo(func_graph->debug_info()); - } -} - -bool CounterFuncGraphCollector::Inc(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count = 1) { - auto &d = count_func_graphs_map_[func_graph]; - if (d.count(key) == 0) { - d[key] = count; - return true; - } else { - d[key] += count; - } - return false; -} - -bool CounterFuncGraphCollector::Dec(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count = 1) { - auto &d = count_func_graphs_map_[func_graph]; - if (d.count(key) != 0) { - if (d[key] == count) { - (void)d.erase(key); - return true; - } else { - d[key] -= count; - if (d[key] < 0) { - MS_LOG(EXCEPTION) << "Count of key '" << key->ToString() - << "' dec from 0. NodeInfo: " << trace::GetDebugInfo(func_graph->debug_info()); - } - } - } - return false; -} - -bool CounterFuncGraphCollector::Mod(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count) { - if (count > 0) { - return Inc(func_graph, key, count); - } else if (count < 0) { - return Dec(func_graph, key, -count); - } else { - MS_LOG(EXCEPTION) << "Count of key '" << key->ToString() - << "' cannot be 0. NodeInfo: " << trace::GetDebugInfo(func_graph->debug_info()); - } -} - -DepComputer::DepComputer(const FuncGraphManager *const manager) : FuncGraphAnalysis(manager) { +DepComputer::DepComputer(const FuncGraphManager *const manager) : manager_(manager) { MS_EXCEPTION_IF_NULL(manager_); manager_->signals()->InvalidateComputer.connect(this, &DepComputer::OnInvalidateComputer); validate_ = false; @@ -843,16 +743,15 @@ void FVTotalComputer::RealRecompute() { for (auto &fg : manager->func_graphs()) { fv_total_analysis_[fg] = OrderedMap(); - count_nodes_map_[fg] = OrderedMap(); - count_func_graphs_map_[fg] = OrderedMap(); } for (auto &fg : manager->func_graphs()) { + // add all free variable nodes AnfNodeCounterMap items = fg->free_variables(); for (auto &iter : items) { auto curr = fg; while (curr != nullptr) { - (void)CounterAnfNodeCollector::Mod(curr, iter.first, iter.second); + fv_total_analysis_[curr][iter.first] = iter.second; curr = manager->parent(curr); if (curr != nullptr) { const AnfNodeSet &all_nodes = curr->nodes(); @@ -863,6 +762,7 @@ void FVTotalComputer::RealRecompute() { } } + // add all FGs of free variables auto &used = fg->func_graphs_used(); for (auto &iter : used) { auto p = manager->parent(iter.first); @@ -871,21 +771,11 @@ void FVTotalComputer::RealRecompute() { } auto curr = fg; while (curr != p) { - (void)CounterFuncGraphCollector::Mod(curr, iter.first, iter.second); + fv_total_analysis_[curr][iter.first] = iter.second; curr = manager->parent(curr); } } } - for (auto &fg : manager->func_graphs()) { - auto &fvp = count_nodes_map_[fg]; - auto &fvg = count_func_graphs_map_[fg]; - for (auto &item : fvp) { - fv_total_analysis_[fg][item.first] = item.second; - } - for (auto &item : fvg) { - fv_total_analysis_[fg][item.first] = item.second; - } - } } void FuncGraphsUsedTotalComputer::RealRecompute(FuncGraphPtr fg) { diff --git a/mindspore/ccsrc/ir/manager.h b/mindspore/ccsrc/ir/manager.h index e4e5a1fba8..5da3812d25 100644 --- a/mindspore/ccsrc/ir/manager.h +++ b/mindspore/ccsrc/ir/manager.h @@ -88,14 +88,6 @@ FuncGraphManagerPtr Manage(const std::vector &func_graphs, bool ma FuncGraphManagerPtr MakeManager(const std::vector &func_graphs = {}, bool manage = true); struct Signals { - Signal AddFuncGraph; - Signal DropFuncGraph; - Signal AddNode; - Signal DropNode; - Signal AddEdge; - Signal DropEdge; - Signal MoveAllCNode; - Signal InvalidateCollector; Signal InvalidateComputer; }; @@ -103,136 +95,15 @@ enum EdgeProcessDirection { kDecEdge = -1, kIncEdge = 1 }; using CNodeIndexPair = std::pair; using CNodeIndexPairPtr = std::shared_ptr; - -using FuncGraphToFuncGraphCounterMap = OrderedMap>; -template , class CollectorEqual = std::equal_to> -using FuncGraphToAnfNodeCounterMap = OrderedMap>; - -// analysis base class -class FuncGraphAnalysis { - public: - explicit FuncGraphAnalysis(const FuncGraphManager *const manager); - - virtual ~FuncGraphAnalysis() { manager_ = nullptr; } - - virtual size_t size() const { return 0; } - - virtual void OnAddFuncGraph(FuncGraphPtr) {} - - virtual void OnDropFuncGraph(FuncGraphPtr) {} - - virtual void OnMoveAllCNode(FuncGraphPtr, FuncGraphPtr) {} - - protected: - // subclass can reset their own member; - virtual void ExtraReset() {} - - virtual void OnAddNode(AnfNodePtr n) {} - - virtual void OnDropNode(AnfNodePtr n) {} - - virtual void OnAddEdge(AnfNodePtr, int, AnfNodePtr) {} - - virtual void OnDropEdge(AnfNodePtr, int, AnfNodePtr) {} - - const FuncGraphManager *manager_; - bool include_func_graph_none_; -}; - -using FuncGraphToAnfNodeMap = OrderedMap; - -struct CNodeIndexHasher { - std::size_t operator()(const CNodeIndexPairPtr pair) const { - MS_EXCEPTION_IF_NULL(pair); - MS_EXCEPTION_IF_NULL(pair->first); - return hash_combine(pair->first->hash(), std::hash()(pair->second)); - } -}; - -struct CNodeIndexEqual { - bool operator()(const CNodeIndexPairPtr lhs, const CNodeIndexPairPtr rhs) const { - if (lhs == nullptr || rhs == nullptr) { - return false; - } - if (lhs == rhs) { - return true; - } - if (lhs->first != rhs->first) { - return false; - } - if (lhs->second != rhs->second) { - return false; - } - return true; - } -}; - -// graphs analysis which compute in write, read needn't recompute -class DepCollector : public FuncGraphAnalysis { - public: - explicit DepCollector(const FuncGraphManager *manager); - ~DepCollector() override = default; - - void Reset() { ExtraReset(); } - void OnInvalidateCollector() { Reset(); } - - protected: - // inherit from FuncGraphAnalysis - void OnAddEdge(AnfNodePtr node, int index, AnfNodePtr inp) override; - void OnDropEdge(AnfNodePtr node, int index, AnfNodePtr inp) override; - // subclass can override; - virtual void OnModEdge(AnfNodePtr, int, AnfNodePtr, EdgeProcessDirection) {} -}; - -class CounterFuncGraphCollector : public DepCollector { - public: - explicit CounterFuncGraphCollector(const FuncGraphManager *m) : DepCollector(m) {} - ~CounterFuncGraphCollector() override = default; - FuncGraphToFuncGraphCounterMap &count_func_graphs_map() { return count_func_graphs_map_; } - // inherit from FuncGraphAnalysis - size_t size() const override { return count_func_graphs_map_.size(); } - void OnAddFuncGraph(FuncGraphPtr fg) final { count_func_graphs_map_[fg] = OrderedMap(); } - void OnDropFuncGraph(FuncGraphPtr fg) final { (void)count_func_graphs_map_.erase(fg); } - bool Inc(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count); - bool Dec(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count); - bool Mod(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count); - - FuncGraphToFuncGraphCounterMap count_func_graphs_map_; - - protected: - void ExtraReset() override { count_func_graphs_map_.clear(); } -}; - -template , class CollectorEqual = std::equal_to> -class CounterAnfNodeCollector : public DepCollector { - public: - explicit CounterAnfNodeCollector(const FuncGraphManager *m) : DepCollector(m) {} - ~CounterAnfNodeCollector() override = default; - FuncGraphToAnfNodeCounterMap &count_nodes_map() { return count_nodes_map_; } - - size_t size() const override { return count_nodes_map_.size(); } - void OnAddFuncGraph(FuncGraphPtr fg) final { - count_nodes_map_[fg] = OrderedMap(); - } - void OnDropFuncGraph(FuncGraphPtr fg) final { (void)count_nodes_map_.erase(fg); } - - bool Inc(const FuncGraphPtr &func_graph, const ValueT &key, int count); - bool Dec(const FuncGraphPtr &func_graph, const ValueT &key, int count); - bool Mod(const FuncGraphPtr &func_graph, const ValueT &key, int count); - - FuncGraphToAnfNodeCounterMap count_nodes_map_; - - protected: - void ExtraReset() override { count_nodes_map_.clear(); } -}; - using FuncGraphToFuncGraphSetMap = OrderedMap; -// graphs analysis which need dynamic compute by DepCollector in each read -class DepComputer : public FuncGraphAnalysis { +// analysis base class, graphs analysis which need dynamic compute by DepCollector in each read +class DepComputer { public: explicit DepComputer(const FuncGraphManager *manager); - ~DepComputer() override = default; + virtual ~DepComputer() { manager_ = nullptr; } + + virtual size_t size() const { return 0; } void Reset() { ExtraReset(); @@ -250,15 +121,14 @@ class DepComputer : public FuncGraphAnalysis { bool IsValidate(const FuncGraphPtr &fg) { return func_graphs_validate_[fg]; } - void OnAddFuncGraph(FuncGraphPtr) final { Reset(); } - - void OnDropFuncGraph(FuncGraphPtr) final { Reset(); } - protected: + // subclass can reset their own member; + virtual void ExtraReset() {} // subclass do the real compute virtual void RealRecompute() {} virtual void RealRecompute(FuncGraphPtr) {} + const FuncGraphManager *manager_; bool validate_; OrderedMap func_graphs_validate_; @@ -345,12 +215,9 @@ class ScopeComputer final : public DepComputer { using FVTotalMap = OrderedMap>; -class FVTotalComputer final : public DepComputer, - public CounterAnfNodeCollector, - public CounterFuncGraphCollector { +class FVTotalComputer final : public DepComputer { public: - explicit FVTotalComputer(const FuncGraphManager *m) - : DepComputer(m), CounterAnfNodeCollector(m), CounterFuncGraphCollector(m) {} + explicit FVTotalComputer(const FuncGraphManager *m) : DepComputer(m) {} ~FVTotalComputer() override = default; FVTotalMap &fv_total_analysis() { return fv_total_analysis_; } diff --git a/mindspore/ccsrc/ir/meta_tensor.h b/mindspore/ccsrc/ir/meta_tensor.h index a85ef77e83..d78caf3b5d 100644 --- a/mindspore/ccsrc/ir/meta_tensor.h +++ b/mindspore/ccsrc/ir/meta_tensor.h @@ -29,7 +29,7 @@ // brief mindspore namespace. // -// mindspore namespace is the top level namespace of Mindsporeession project. +// mindspore namespace is the top level namespace of MindSpore project. // Other namespace should be a sub namespace of mindspore namespace in the ME project. namespace mindspore { diff --git a/mindspore/ccsrc/ir/optimizer_caller.h b/mindspore/ccsrc/ir/optimizer_caller.h new file mode 100644 index 0000000000..bd30454147 --- /dev/null +++ b/mindspore/ccsrc/ir/optimizer_caller.h @@ -0,0 +1,29 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_IR_OPTIMIZER_CALLER_H_ +#define MINDSPORE_CCSRC_IR_OPTIMIZER_CALLER_H_ + +#include "ir/anf.h" +#include "optimizer/opt.h" + +namespace mindspore { +class OptimizerCaller { + public: + virtual AnfNodePtr operator()(const opt::OptimizerPtr &, const AnfNodePtr &) { return nullptr; } +}; +} // namespace mindspore +#endif // MINDSPORE_CCSRC_IR_OPTIMIZER_CALLER_H_ diff --git a/mindspore/ccsrc/ir/param_value_py.h b/mindspore/ccsrc/ir/param_value_py.h index 6841f4c040..a03e34ac6e 100644 --- a/mindspore/ccsrc/ir/param_value_py.h +++ b/mindspore/ccsrc/ir/param_value_py.h @@ -28,7 +28,7 @@ namespace py = pybind11; class ParamValuePy : public ParamValue { public: ParamValuePy() : value_(py::none()) {} - explicit ParamValuePy(py::object value) : value_(value) {} + explicit ParamValuePy(const py::object &value) : value_(value) {} ~ParamValuePy() override = default; py::object value() { return value_; } diff --git a/mindspore/ccsrc/ir/pattern_matcher.h b/mindspore/ccsrc/ir/pattern_matcher.h new file mode 100644 index 0000000000..6605b9ce4c --- /dev/null +++ b/mindspore/ccsrc/ir/pattern_matcher.h @@ -0,0 +1,310 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_IR_PATTERN_MATCHER_H_ +#define MINDSPORE_CCSRC_IR_PATTERN_MATCHER_H_ + +#include +#include + +#include "ir/anf.h" +#include "operator/ops.h" + +namespace mindspore { + +/// +/// Base class for all recognizable patterns. +/// We implement an Expression Template approach using static polymorphism based on +/// the Curiously Recurring Template Pattern (CRTP) which "achieves a similar effect +/// to the use of virtual functions without the costs..." as described in: +/// https://en.wikipedia.org/wiki/Expression_templates and +/// https://en.wikipedia.org/wiki/Curiously_recurring_template_pattern +/// The TryCapture function tries to capture the pattern with the given node. +/// The GetNode function builds a new node using the captured values. +/// + +template +class PBase { + public: + bool CheckFunc(const opt::PredicateFuncType &func, const AnfNodePtr &node) { + return func(get_object().GetNode(node)); + } + + const T &get_object() const { return *static_cast(this); } + + template + bool TryCapture(const TN &value) const { + get_object().Reset(); + return get_object().TryCapture_(value); + } + + using Internal = T; +}; + +template +class PIsEqual { + public: + bool operator()(const T &lhs, const T &rhs) const { return lhs == rhs; } +}; + +template +class PatternNode : public PBase > { + public: + T GetNode(const AnfNodePtr &node) const { + if (!captured_) { + MS_EXCEPTION(ValueError) << "A Pattern wasn't captured for this Token before the call to GetNode."; + } + return captured_node_; + } + + bool TryCapture_(const T &node) const { + if (!captured_) { + captured_node_ = node; + captured_ = true; + return true; + } + return PIsEqual()(captured_node_, node); + } + + void Reset() const { captured_ = false; } + using Internal = const PatternNode &; + + protected: + mutable T captured_node_; + mutable bool captured_{false}; +}; + +template +class PBinOperation : public PBase > { + public: + PBinOperation(const PrimitivePtr &prim, const T &x, const T2 &y) : prim_(prim), x_(x), y_(y) {} + + AnfNodePtr GetNode(const AnfNodePtr &node) const { + AnfNodePtr lhs = x_.GetNode(node->func_graph()); + AnfNodePtr rhs = y_.GetNode(node->func_graph()); + AnfNodePtrList list = {prim_->cast(), lhs, rhs}; + return NewCNode(list, node->func_graph()); + } + + bool TryCapture_(const AnfNodePtr &node) const { + if (IsPrimitiveCNode(node, prim_)) { + auto cnode = node->cast(); + auto inputs = cnode->inputs(); + if (inputs.size() == 3) { + // Binary Prim assumes only two inputs + if (!x_.TryCapture_(inputs[1]) || !y_.TryCapture_(inputs[2])) { + return false; + } + return true; + } + } + return false; + } + + void Reset() const { + x_.Reset(); + y_.Reset(); + } + + private: + const PrimitivePtr prim_; + typename T::Internal x_; + typename T2::Internal y_; +}; + +/// +/// Helper functions to apply a pattern function on all elements of a tuple +/// +namespace tuple_utils { +template +struct apply_func_tuple_item { + template + static void apply(Func *func, const TTuple &tuple) { + (*func)(Index, std::get(tuple)); + apply_func_tuple_item<(Index + 1) == std::tuple_size::value, (Index + 1), Func>::apply(func, tuple); + } +}; + +template +struct apply_func_tuple_item { + template + static void apply(Func *func, const TTuple &tuple) {} +}; + +template +inline void apply_func_tuple(Func *func, const TTuple &tuple) { + apply_func_tuple_item::value == 0, 0, Func>::apply(func, tuple); +} + +struct PTupleResetCapture { + template + void operator()(size_t i, const T &pattern) const { + pattern.Reset(); + } +}; + +struct PTupleCapture { + explicit PTupleCapture(const AnfNodePtrList tuple) : tuple_(tuple) {} + + template + void operator()(size_t i, const TPattern &pattern) { + // Check if the first node is a Primitive + if (i == 0 && tuple_[i]->isa()) { + auto prim = tuple_[i]->cast(); + if (tuple_[i] != pattern.GetNode(tuple_[i])) { + captured_ = false; + } + } else { + captured_ = captured_ && pattern.TryCapture_(tuple_[i]); + } + } + + const AnfNodePtrList tuple_; + bool captured_{true}; +}; + +struct PTupleGetNode { + explicit PTupleGetNode(const AnfNodePtr &node) : node_(node) {} + + template + void operator()(size_t, const TPattern &pattern) { + args_.push_back(pattern.GetNode(node_)); + } + + const AnfNodePtr &node_; + std::vector args_; +}; +} // namespace tuple_utils + +template +class PCNode : public PBase > { + public: + explicit PCNode(const TArgs &... args) : args_(args...) {} + + AnfNodePtr GetNode(const AnfNodePtr &node) const { + tuple_utils::PTupleGetNode get_node(node); + tuple_utils::apply_func_tuple(&get_node, args_); + return NewCNode(get_node.args_, node->func_graph()); + } + + bool TryCapture_(const AnfNodePtr &node) const { + if (node->isa()) { + auto cnode = node->cast(); + auto inputs = cnode->inputs(); + if (inputs.size() != sizeof...(TArgs)) { + return false; + } + tuple_utils::PTupleCapture capture_func(inputs); + tuple_utils::apply_func_tuple(&capture_func, args_); + return capture_func.captured_; + } + + return false; + } + + void Reset() const { + tuple_utils::PTupleResetCapture reset; + tuple_utils::apply_func_tuple(&reset, args_); + } + + private: + std::tuple args_; +}; + +template +class PPrimitive : public PBase > { + public: + explicit PPrimitive(const PrimitivePtr &prim, const TArgs &... args) : prim_(prim), args_(args...) {} + + AnfNodePtr GetNode(const AnfNodePtr &node) const { + tuple_utils::PTupleGetNode get_node(node); + tuple_utils::apply_func_tuple(&get_node, args_); + auto prim_cnode = get_node.args_; + prim_cnode.insert(prim_cnode.begin(), NewValueNode(prim_)); + return NewCNode(prim_cnode, node->func_graph()); + } + + bool TryCapture_(const AnfNodePtr &node) const { + if (IsPrimitiveCNode(node, prim_)) { + auto cnode = node->cast(); + auto inputs = cnode->inputs(); + if ((inputs.size() - 1) != sizeof...(TArgs)) { + return false; + } + + AnfNodePtrList rest(inputs.begin() + 1, inputs.end()); + tuple_utils::PTupleCapture capture_func(rest); + tuple_utils::apply_func_tuple(&capture_func, args_); + + return capture_func.captured_; + } + + return false; + } + + void Reset() const { + tuple_utils::PTupleResetCapture reset; + tuple_utils::apply_func_tuple(&reset, args_); + } + + private: + const PrimitivePtr prim_; + std::tuple args_; +}; + +// Macro for binary operation functions +#define BIN_OPERATION_PATTERN(Operator, MSPrimitive) \ + template \ + inline PBinOperation Operator(const PBase &x, const PBase &y) { \ + return PBinOperation(MSPrimitive, x.get_object(), y.get_object()); \ + } + +// Arithmetic operations +BIN_OPERATION_PATTERN(operator+, prim::kPrimTensorAdd); +BIN_OPERATION_PATTERN(operator*, prim::kPrimMul); + +// Macros for match and replace +#define MATCH_REPLACE(OrigNode, CaptureNode, ReplaceWith) \ + if ((CaptureNode).TryCapture(OrigNode)) { \ + return (ReplaceWith).GetNode(OrigNode); \ + } + +#define MATCH_REPLACE_IF(OrigNode, CaptureNode, ReplaceWith, Condition) \ + if ((CaptureNode).TryCapture(OrigNode) && (Condition)) { \ + return (ReplaceWith).GetNode(OrigNode); \ + } + +#define MATCH_REPLACE_IF_ELSE(OrigNode, CaptureNode, ReplaceWith, Condition, ElseNode) \ + if ((CaptureNode).TryCapture(OrigNode)) { \ + if ((Condition)) { \ + return (ReplaceWith).GetNode(OrigNode); \ + } \ + return (ElseNode).GetNode(OrigNode); \ + } + +#define MATCH_REPLACE_LAMBDA(OrigNode, CaptureNode, Lambda) \ + if ((CaptureNode).TryCapture(OrigNode)) { \ + return (Lambda)(); \ + } + +#define MATCH_REPLACE_LAMBDA_IF(OrigNode, CaptureNode, Lambda, Condition) \ + if ((CaptureNode).TryCapture(OrigNode) && (Condition)) { \ + return (Lambda)(); \ + } + +} // namespace mindspore + +#endif // #ifndef MINDSPORE_CCSRC_IR_PATTERN_MATCHER_H_ diff --git a/mindspore/ccsrc/ir/primitive.cc b/mindspore/ccsrc/ir/primitive.cc index 4be4489d5b..59497affd5 100644 --- a/mindspore/ccsrc/ir/primitive.cc +++ b/mindspore/ccsrc/ir/primitive.cc @@ -52,9 +52,6 @@ py::function PrimitivePy::GetBpropFunction() { return fn; } else { auto fn = GetBpropFunctionByObj(python_obj_); - if (fn.is_none()) { - MS_LOG(WARNING) << "Can't find bprop function for " << name(); - } return fn; } } @@ -75,7 +72,7 @@ py::function PrimitivePy::GetComputeFunction() { py::function vm_fn = get_fn(python_obj_); if (py::isinstance(vm_fn)) { - MS_LOG(DEBUG) << "Cannot find " << python_obj_.attr("__class__").attr("__name__").cast(); + MS_LOG(WARNING) << "Cannot find " << python_obj_.attr("__class__").attr("__name__").cast(); vm_fn = mindspore::GetComputeFunction(Primitive::name()); } return vm_fn; diff --git a/mindspore/ccsrc/ir/primitive.h b/mindspore/ccsrc/ir/primitive.h index 1dd867fd1f..257302c0c4 100644 --- a/mindspore/ccsrc/ir/primitive.h +++ b/mindspore/ccsrc/ir/primitive.h @@ -49,6 +49,8 @@ class PrimitivePy : public Primitive { void AddPyAttr(const py::str &name, const py::object &obj); py::dict GetAttrDict(); + void set_hook(const py::function &hook) { hook_ = hook; } + py::function hook() const { return hook_; } const bool parse_info_ = true; const py::object &GetPyObj() const { return python_obj_; } @@ -56,6 +58,7 @@ class PrimitivePy : public Primitive { private: py::object python_obj_; + py::function hook_; std::vector signatures_; }; diff --git a/mindspore/ccsrc/ir/primitive_base.h b/mindspore/ccsrc/ir/primitive_base.h index 78623f8542..b34c43d00e 100644 --- a/mindspore/ccsrc/ir/primitive_base.h +++ b/mindspore/ccsrc/ir/primitive_base.h @@ -89,11 +89,8 @@ class Primitive : public Named { return iter == attrs_.cend() ? nullptr : iter->second; } - void set_hook(const py::function &hook) { hook_ = hook; } - py::function hook() const { return hook_; } - const std::unordered_map &attrs() const { return attrs_; } - std::unordered_map &evaluate_added_attrs() { return evaluate_added_attrs_; } + const std::unordered_map &evaluate_added_attrs() const { return evaluate_added_attrs_; } // if Primitive has any attribute, for Primitives like scalar_add, return, etc, don't have any attribute. bool HasAttr() const { return !attrs_.empty(); } @@ -124,7 +121,6 @@ class Primitive : public Named { private: std::string instance_name_; - py::function hook_; bool is_base_; bool has_signature_; PrimType prim_type_; @@ -145,7 +141,10 @@ struct PrimitiveEqual { }; struct PrimitiveHasher { - std::size_t operator()(PrimitivePtr const &prim) const { return prim->Hash(); } + std::size_t operator()(PrimitivePtr const &prim) const { + MS_EXCEPTION_IF_NULL(prim); + return prim->Hash(); + } }; } // namespace mindspore #endif // MINDSPORE_CCSRC_IR_PRIMITIVE_BASE_H_ diff --git a/mindspore/ccsrc/ir/tensor.cc b/mindspore/ccsrc/ir/tensor.cc index 566f9396e6..e5212e922d 100644 --- a/mindspore/ccsrc/ir/tensor.cc +++ b/mindspore/ccsrc/ir/tensor.cc @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -28,9 +29,8 @@ #include "pipeline/static_analysis/abstract_value.h" namespace mindspore { - namespace tensor { - +static uint64_t count = 0; void DataBuf2Contiguous(const py::array &src, py::array *const dest) { if (dest == nullptr) { MS_LOG(EXCEPTION) << "Failed to copy data to a contiguous buffer as dest is nullptr!"; @@ -81,6 +81,7 @@ Tensor::Tensor(const Tensor &tensor, const TypePtr &data_type) : MetaTensor(tensor), device_address_(tensor.device_address_) { init(tensor.data_, data_type); dirty_ = tensor.is_dirty(); + id_ = tensor.id(); } Tensor &Tensor::operator=(const Tensor &tensor) { @@ -89,9 +90,14 @@ Tensor &Tensor::operator=(const Tensor &tensor) { dirty_ = tensor.is_dirty(); device_address_ = tensor.device_address(); data_ = tensor.data_; + id_ = tensor.id(); } return *this; } +Tensor &Tensor::AssignValue(const Tensor &tensor) { + *this = tensor; + return *this; +} bool Tensor::operator==(const Tensor &tensor) const { return (MetaTensor::operator==(tensor) && data_ == tensor.data_); @@ -208,6 +214,7 @@ void Tensor::init(const py::array &input, const TypeId &data_type) { data_ = input; } dirty_ = true; + id_ = std::to_string((uintptr_t)(this)) + std::to_string(count++); } void Tensor::init(TypeId data_type, const std::vector &shape, py::array *const data) { @@ -254,6 +261,7 @@ void Tensor::init(TypeId data_type, const std::vector &shape, py::array *co MS_LOG(EXCEPTION) << "Cannot construct Tensor because of unsupported data type: " << data_type << "."; break; } + id_ = std::to_string((uintptr_t)(this)) + std::to_string(count++); } TypePtr Tensor::SetDtype(const TypePtr type_ptr) { @@ -382,6 +390,28 @@ REGISTER_PYBIND_DEFINE(Tensor, ([](const py::module *m) { .def(py::init(), py::arg("input"), py::arg("dtype") = nullptr) .def(py::init(), py::arg("input"), py::arg("dtype") = nullptr) .def_readonly(PYTHON_TENSOR_FLAG, &Tensor::parse_info_) + .def_property_readonly("dtype", &Tensor::Dtype, R"mydelimiter( + Get the tensor's data type. + + Returns: + type, the data type of tensor. + + Examples: + >>> data = mindspore.Tensor(np.ones((2, 1), np.int32)) + >>> data.dtype + Int32 + )mydelimiter") + .def_property_readonly("shape", &Tensor::GetPyTupleShape, R"mydelimiter( + Get the tensor's shape. + + Returns: + tuple[int], the shape of tensor. + + Examples: + >>> data = mindspore.Tensor(np.ones((3, 3))) + >>> data.shape() + (3, 3) + )mydelimiter") .def("asnumpy", &Tensor::data_sync, R"mydelimiter( Convert tensor to numpy.ndarray. @@ -435,17 +465,6 @@ REGISTER_PYBIND_DEFINE(Tensor, ([](const py::module *m) { >>> data.dim() 2 )mydelimiter") - .def("dtype", &Tensor::Dtype, R"mydelimiter( - Get the tensor's data type. - - Returns: - type, the data type of tensor. - - Examples: - >>> data = mindspore.Tensor(np.ones((2, 1), np.int32)) - >>> data.dtype() - Int32 - )mydelimiter") .def("set_dtype", &Tensor::SetDtype, R"mydelimiter( Set the tensor's data type. @@ -457,16 +476,18 @@ REGISTER_PYBIND_DEFINE(Tensor, ([](const py::module *m) { >>> data.set_dtype(mindspore.int32) mindspore.int32 )mydelimiter") - .def("shape", &Tensor::GetPyTupleShape, R"mydelimiter( - Get the tensor's shape. + .def("assign_value", &Tensor::AssignValue, R"mydelimiter( + Assign another tensor value to this. - Returns: - tuple[int], the shape of tensor. + Arg: + value (:class:`mindspore.tensor`): The value tensor. Examples: - >>> data = mindspore.Tensor(np.ones((3, 3))) - >>> data.shape() - (3, 3) + >>> data = mindspore.Tensor(np.ones((1, 2), np.float32)) + >>> data2 = mindspore.Tensor(np.ones((2, 2), np.float32)) + >>> data.assign_value(data2) + >>> data.shape + (2, 2) )mydelimiter") .def("__str__", &Tensor::ToString) .def("__repr__", &Tensor::ToStringRepr) @@ -485,10 +506,86 @@ REGISTER_PYBIND_DEFINE(Tensor, ([](const py::module *m) { })); (void)py::class_>(*m, "MetaTensor") .def(py::init>(), py::arg("dtype"), py::arg("shape")) + .def(py::pickle( + [](const MetaTensor &t) { // __getstate__ + /* Return a tuple that fully encodes the state of the object */ + return py::make_tuple(static_cast(t.data_type()), t.shape()); + }, + [](const py::tuple &t) { // __setstate__ + if (t.size() != 2) { + throw std::runtime_error("Invalid state!"); + } + /* Create a new C++ instance */ + MetaTensor tensor(TypeId(t[0].cast()), t[1].cast>()); + return tensor; + })) .def_readonly(PYTHON_META_TENSOR_FLAG, &MetaTensor::parse_info_) - .def("dtype", &MetaTensor::Dtype, "Get the MetaTensor's dtype.") - .def("shape", &MetaTensor::shape, "Get the MetaTensor's shape."); + .def_property_readonly("dtype", &MetaTensor::Dtype, "Get the MetaTensor's dtype.") + .def_property_readonly("shape", &MetaTensor::shape, "Get the MetaTensor's shape."); })); - } // namespace tensor + +namespace inference { +MSTensor *MSTensor::CreateTensor(TypeId data_type, const std::vector &shape) { + return new Tensor(data_type, shape); +} + +Tensor::Tensor() { this->tensor_impl_ = std::make_shared(); } + +Tensor::Tensor(TypeId data_type, const std::vector &shape) { + this->tensor_impl_ = std::make_shared(data_type, shape); +} + +Tensor::Tensor(std::shared_ptr tensor_ptr) { this->tensor_impl_ = std::move(tensor_ptr); } + +TypeId Tensor::data_type() const { + MS_ASSERT(this->tensor_impl_ != nullptr); + return this->tensor_impl_->data_type(); +} + +TypeId Tensor::set_data_type(TypeId data_type) { + MS_ASSERT(this->tensor_impl_ != nullptr); + return this->tensor_impl_->set_data_type(data_type); +} + +std::vector Tensor::shape() const { + MS_ASSERT(this->tensor_impl_ != nullptr); + return this->tensor_impl_->shape(); +} + +size_t Tensor::set_shape(const std::vector &shape) { + MS_ASSERT(this->tensor_impl_ != nullptr); + return this->tensor_impl_->set_shape(shape); +} + +int Tensor::DimensionSize(size_t index) const { + MS_ASSERT(this->tensor_impl_ != nullptr); + return this->tensor_impl_->DimensionSize(index); +} + +int Tensor::ElementsNum() const { + MS_ASSERT(this->tensor_impl_ != nullptr); + return this->tensor_impl_->ElementsNum(); +} + +std::size_t Tensor::hash() const { + MS_ASSERT(this->tensor_impl_ != nullptr); + return this->tensor_impl_->hash(); +} + +std::shared_ptr Tensor::tensor() const { + MS_ASSERT(this->tensor_impl_ != nullptr); + return this->tensor_impl_; +} + +size_t Tensor::Size() const { + MS_ASSERT(this->tensor_impl_ != nullptr); + return this->tensor_impl_->data().nbytes(); +} + +void *Tensor::MutableData() const { + MS_ASSERT(this->tensor_impl_ != nullptr); + return this->tensor_impl_->data_c(true); +} +} // namespace inference } // namespace mindspore diff --git a/mindspore/ccsrc/ir/tensor.h b/mindspore/ccsrc/ir/tensor.h index 690fb83f55..1ce657143b 100644 --- a/mindspore/ccsrc/ir/tensor.h +++ b/mindspore/ccsrc/ir/tensor.h @@ -27,6 +27,7 @@ #include "Eigen/Core" #include "device/device_address.h" #include "ir/meta_tensor.h" +#include "include/ms_tensor.h" #include "utils/log_adapter.h" namespace py = pybind11; @@ -34,9 +35,7 @@ namespace py = pybind11; using float16 = Eigen::half; namespace pybind11 { - namespace detail { - // Similar to enums in `pybind11/numpy.h`. Determined by doing: // python3 -c 'import numpy as np; print(np.dtype(np.float16).num)' constexpr int NPY_FLOAT16 = 23; @@ -85,7 +84,6 @@ template <> struct type_caster : public npy_scalar_caster { static constexpr auto name = "float16"; }; - } // namespace detail } // namespace pybind11 @@ -93,10 +91,9 @@ using mindspore::device::DeviceAddress; using DeviceAddressPtr = std::shared_ptr; // brief mindspore namespace. // -// mindspore namespace is the top level namespace of Mindsporeession project. +// mindspore namespace is the top level namespace of MindSpore project. // Other namespace should be a sub namespace of mindspore namespace in the ME project. namespace mindspore { - // brief mindspore::tensor namespace // // A sub namespace in ME to support tensor related definition. @@ -177,6 +174,9 @@ class Tensor : public MetaTensor { // It is different from 'operator==' which just compare shape/type/address, it do real value comparison. bool ValueEqual(const Tensor &other) const; + // assgin value to this tensor + Tensor &AssignValue(const Tensor &tensor); + bool operator==(const Value &other) const override { if (other.isa()) { auto other_ = static_cast(other); @@ -219,6 +219,11 @@ class Tensor : public MetaTensor { // return The pointer to the object void *data_c(bool writable = false); + // brief Get Tensor data byte-size for c++ type + // + // return byte size of Tensor data + size_t Size() const { return this->data().nbytes(); } + // brief Get data type from tensor data. // // param buf The buffer info of the py::array data. @@ -263,16 +268,52 @@ class Tensor : public MetaTensor { DeviceAddressPtr device_address() const { return device_address_; } void set_device_address(const DeviceAddressPtr &device_address) { device_address_ = device_address; } py::array data_sync(); + std::string id() const { return id_; } private: bool dirty_{true}; + std::string id_{""}; DeviceAddressPtr device_address_{nullptr}; }; - using TensorPtr = std::shared_ptr; using TensorPtrList = std::vector>; - } // namespace tensor + +namespace inference { +class Tensor : public MSTensor { + public: + Tensor(); + + Tensor(TypeId data_type, const std::vector &shape); + + explicit Tensor(std::shared_ptr tensor_ptr); + + ~Tensor() = default; + + TypeId data_type() const override; + + TypeId set_data_type(const TypeId data_type) override; + + std::vector shape() const override; + + size_t set_shape(const std::vector &shape) override; + + int DimensionSize(size_t index) const override; + + int ElementsNum() const override; + + std::size_t hash() const override; + + std::shared_ptr tensor() const; + + size_t Size() const override; + + void *MutableData() const override; + + protected: + std::shared_ptr tensor_impl_; +}; +} // namespace inference } // namespace mindspore #endif // MINDSPORE_CCSRC_IR_TENSOR_H_ diff --git a/mindspore/ccsrc/ir/visitor.cc b/mindspore/ccsrc/ir/visitor.cc index efebe3124a..9e63f4f9c1 100644 --- a/mindspore/ccsrc/ir/visitor.cc +++ b/mindspore/ccsrc/ir/visitor.cc @@ -14,11 +14,10 @@ * limitations under the License. */ -#include "ir/visitor.h" #include "ir/func_graph.h" +#include "ir/visitor.h" namespace mindspore { -AnfNodePtr AnfVisitor::operator()(const opt::OptimizerPtr &, const AnfNodePtr &) { return nullptr; } void AnfVisitor::Visit(const AnfNodePtr &node) { node->accept(this); } void AnfVisitor::Visit(const CNodePtr &cnode) { diff --git a/mindspore/ccsrc/ir/visitor.h b/mindspore/ccsrc/ir/visitor.h index e771f7ad28..6dcf28249a 100644 --- a/mindspore/ccsrc/ir/visitor.h +++ b/mindspore/ccsrc/ir/visitor.h @@ -18,14 +18,12 @@ #define MINDSPORE_CCSRC_IR_VISITOR_H_ #include -#include "ir/anf.h" -#include "optimizer/opt.h" +#include "ir/optimizer_caller.h" namespace mindspore { using VisitFuncType = std::function; -class AnfVisitor { +class AnfVisitor : public OptimizerCaller { public: - virtual AnfNodePtr operator()(const opt::OptimizerPtr &, const AnfNodePtr &); virtual void Visit(const AnfNodePtr &); virtual void Visit(const CNodePtr &); virtual void Visit(const ValueNodePtr &); diff --git a/mindspore/ccsrc/kernel/CMakeLists.txt b/mindspore/ccsrc/kernel/CMakeLists.txt index 76e1631d57..ceea6b1a99 100644 --- a/mindspore/ccsrc/kernel/CMakeLists.txt +++ b/mindspore/ccsrc/kernel/CMakeLists.txt @@ -9,6 +9,10 @@ if (ENABLE_D) file(GLOB_RECURSE D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "kernel_query.cc" "kernel_fusion.cc" + "akg/ascend/*.cc" + "akg/akg_kernel_build.cc" + "akg/akg_kernel_attrs_process.cc" + "akg/akg_kernel_metadata.cc" "tbe/*.cc" "aicpu/*.cc" "rts/*.cc" @@ -21,13 +25,19 @@ if (ENABLE_CPU) file(GLOB_RECURSE CPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "cpu/*.cc" ) + + if (NOT ENABLE_MPI) + list(REMOVE_ITEM CPU_SRC_LIST "cpu/allgather_cpu_kernel.cc") + list(REMOVE_ITEM CPU_SRC_LIST "cpu/reduce_scatter_cpu_kernel.cc") + list(REMOVE_ITEM CPU_SRC_LIST "cpu/embedding_look_up_comm_grad_cpu_kernel.cc") + endif () endif () if (ENABLE_GPU) file(GLOB_RECURSE CUDA_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "gpu/*.cu" "akg/gpu/*.cc" - "akg/akgkernelbuild.cc" + "akg/akg_kernel_build.cc" "akg/akg_kernel_attrs_process.cc" ) diff --git a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_build.cc b/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_build.cc index d6217ff1cc..c83994b5f2 100644 --- a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_build.cc +++ b/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_build.cc @@ -24,7 +24,7 @@ #include #include "device/kernel_runtime.h" #include "kernel/aicpu/aicpu_kernel_mod.h" -#include "kernel/akg/akgkernelbuild.h" +#include "kernel/akg/akg_kernel_build.h" #include "proto/tensor.pb.h" #include "proto/tensor_shape.pb.h" #include "proto/attr.pb.h" @@ -50,7 +50,13 @@ bool SetIOIputSize(const std::shared_ptr &anf_node, const size_t &input MS_LOG(EXCEPTION) << "anf_node is not CNode."; } auto cnode = anf_node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + if (cnode->inputs().size() < (i + 1)) { + MS_LOG(ERROR) << "cnode inputs size " << cnode->inputs().size() << " is smaller than " << i + 1; + return false; + } auto input_node = cnode->inputs()[i + 1]; + MS_EXCEPTION_IF_NULL(input_node); if (input_node->isa()) { auto value_ptr = GetValueNode(input_node); auto value = GetValue(value_ptr); @@ -103,13 +109,13 @@ bool SetIOSize(const std::shared_ptr &anf_node, const std::shared_ptrSetOutputSizeList(output_size_list); - return true; } void ParseAttrValue(const std::string &type, const std::string &attr_name, const mindspore::ValuePtr &value, ::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr) { MS_EXCEPTION_IF_NULL(node_attr); + MS_EXCEPTION_IF_NULL(value); if (type == "int") { auto attr_value = GetValue(value); (*node_attr)[attr_name].set_i(attr_value); @@ -146,6 +152,8 @@ void ParseAttrValue(const std::string &type, const std::string &attr_name, const } void SetNodeAttr(const std::shared_ptr &anf_node, mindspore::NodeDef *proto) { + MS_EXCEPTION_IF_NULL(anf_node); + MS_EXCEPTION_IF_NULL(proto); std::string op_name = AnfAlgo::GetCNodeName(anf_node); if (op_name == kInitDataSetQueue) { op_name = kInitData; @@ -161,15 +169,16 @@ void SetNodeAttr(const std::shared_ptr &anf_node, mindspore::NodeDef *p MS_EXCEPTION_IF_NULL(primitive); ::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr = proto->mutable_attrs(); for (const auto &attr_ptr : attrs_ptr) { + MS_EXCEPTION_IF_NULL(attr_ptr); std::string attr_name = attr_ptr->name(); auto value = primitive->GetAttr(attr_name); if (value != nullptr) { if (attr_name == kQueueName || attr_name == kSharedName) { attr_name = kChannelName; - } else if (attr_name == kSeed) { - attr_name = "seed"; - } else if (attr_name == kSeed2) { - attr_name = "seed2"; + } else if (attr_name == kSeed0) { + attr_name = kSeed; + } else if (attr_name == kSeed1) { + attr_name = kSeed2; } std::string type = attr_ptr->type(); ParseAttrValue(type, attr_name, value, node_attr); @@ -179,6 +188,8 @@ void SetNodeAttr(const std::shared_ptr &anf_node, mindspore::NodeDef *p } void SetNodeInputs(const std::shared_ptr &anf_node, mindspore::NodeDef *proto) { + MS_EXCEPTION_IF_NULL(proto); + MS_EXCEPTION_IF_NULL(anf_node); size_t input_num = AnfAlgo::GetInputTensorNum(anf_node); if (input_num == 0) { MS_LOG(INFO) << "Node [" << AnfAlgo::GetCNodeName(anf_node) << "] does not have input."; @@ -193,6 +204,7 @@ void SetNodeInputs(const std::shared_ptr &anf_node, mindspore::NodeDef int32_t input_data_type; if (input_type == kObjectTypeString) { auto cnode = anf_node->cast(); + MS_EXCEPTION_IF_NULL(cnode); auto input_node = cnode->inputs()[input_index + 1]; auto value_ptr = GetValueNode(input_node); auto value = GetValue(value_ptr); @@ -203,19 +215,20 @@ void SetNodeInputs(const std::shared_ptr &anf_node, mindspore::NodeDef input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index); input_data_type = AicpuOpUtil::MsTypeToProtoType(input_type); } + mindspore::TensorShape *tensorShape = node_inputs->mutable_tensor_shape(); for (auto item : input_shape) { mindspore::TensorShape_Dim *dim = tensorShape->add_dim(); dim->set_size((::google::protobuf::int64)item); } - node_inputs->set_tensor_type((mindspore::DataType)input_data_type); - node_inputs->set_mem_device("HBM"); } } void SetNodeOutputs(const std::shared_ptr &anf_node, mindspore::NodeDef *proto) { + MS_EXCEPTION_IF_NULL(proto); + MS_EXCEPTION_IF_NULL(anf_node); size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node); if (output_num == 0) { MS_LOG(INFO) << "Node [" << AnfAlgo::GetCNodeName(anf_node) << "] does not have output. "; @@ -224,63 +237,55 @@ void SetNodeOutputs(const std::shared_ptr &anf_node, mindspore::NodeDef for (size_t output_index = 0; output_index < output_num; output_index++) { ::mindspore::Tensor *node_outputs = proto->add_outputs(); + MS_EXCEPTION_IF_NULL(node_outputs); std::vector output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index); mindspore::TensorShape *tensorShape = node_outputs->mutable_tensor_shape(); + MS_EXCEPTION_IF_NULL(tensorShape); for (auto item : output_shape) { mindspore::TensorShape_Dim *dim = tensorShape->add_dim(); + MS_EXCEPTION_IF_NULL(dim); dim->set_size((::google::protobuf::int64)item); } - TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index); - int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type); node_outputs->set_tensor_type((mindspore::DataType)output_data_type); - node_outputs->set_mem_device("HBM"); } } void SetNodedefProto(const std::shared_ptr &anf_node, mindspore::NodeDef *proto) { - MS_LOG(INFO) << "SetNodedefProto entry"; MS_EXCEPTION_IF_NULL(anf_node); MS_EXCEPTION_IF_NULL(proto); - + MS_LOG(INFO) << "SetNodedefProto entry"; std::string op_name = AnfAlgo::GetCNodeName(anf_node); - if (op_name == "InitDataSetQueue") { - op_name = "InitData"; + if (op_name == kInitDataSetQueue) { + op_name = kInitData; } // set op name proto->set_op(op_name); - // set inputs tensor SetNodeInputs(anf_node, proto); - // set outputs tensor SetNodeOutputs(anf_node, proto); - // set node attr SetNodeAttr(anf_node, proto); - MS_LOG(INFO) << "SetNodedefProto end!"; } bool CreateNodeDefBytes(const std::shared_ptr &anf_node, const std::shared_ptr &kernel_mod_ptr) { - MS_LOG(INFO) << "CreateNodeDefBytes entry"; - MS_EXCEPTION_IF_NULL(anf_node); MS_EXCEPTION_IF_NULL(kernel_mod_ptr); - mindspore::NodeDef proto; + MS_EXCEPTION_IF_NULL(anf_node); + MS_LOG(INFO) << "CreateNodeDefBytes entry"; + mindspore::NodeDef proto; SetNodedefProto(anf_node, &proto); - std::string nodeDefStr; if (!proto.SerializeToString(&nodeDefStr)) { MS_LOG(ERROR) << "Serialize nodeDef to string failed."; return false; } - kernel_mod_ptr->SetNodeDef(nodeDefStr); - MS_LOG(INFO) << "CreateNodeDefBytes end!"; return true; } @@ -288,8 +293,8 @@ bool CreateNodeDefBytes(const std::shared_ptr &anf_node, KernelModPtr AicpuOpBuild(const std::shared_ptr &anf_node) { MS_EXCEPTION_IF_NULL(anf_node); std::string op_name = AnfAlgo::GetCNodeName(anf_node); - if (op_name == "InitDataSetQueue") { - op_name = "InitData"; + if (op_name == kInitDataSetQueue) { + op_name = kInitData; } auto kernel_mod_ptr = std::make_shared(); MS_EXCEPTION_IF_NULL(kernel_mod_ptr); diff --git a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.cc b/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.cc index 7875baaf0e..2213f176cc 100644 --- a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.cc +++ b/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.cc @@ -110,8 +110,8 @@ bool AicpuOpKernelMod::Launch(const std::vector &inputs, const std:: } CreateCpuKernelInfo(inputs, outputs); - if (node_name_ == "TopK") { - node_name_ = "TopKV2"; + if (node_name_ == kTopK) { + node_name_ = kTopKV2; } MS_LOG(INFO) << "Aicpu launch, node_so_:" << node_so_ << ", node name:" << node_name_ << ", args_size:" << args_.length(); @@ -141,8 +141,8 @@ std::vector AicpuOpKernelMod::GenTask(const std::vector (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(output_data_addrs), [](const AddressPtr &output) -> void * { return output->addr; }); - if (node_name_ == "TopK") { - node_name_ = "TopKV2"; + if (node_name_ == kTopK) { + node_name_ = kTopKV2; } AicpuTaskInfoPtr task_info_ptr = make_shared( stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs); diff --git a/mindspore/ccsrc/kernel/aicpu/aicpu_util.h b/mindspore/ccsrc/kernel/aicpu/aicpu_util.h index 3938cfbdea..f2092abbe2 100644 --- a/mindspore/ccsrc/kernel/aicpu/aicpu_util.h +++ b/mindspore/ccsrc/kernel/aicpu/aicpu_util.h @@ -37,9 +37,12 @@ constexpr auto kSharedName = "shared_name"; constexpr auto kShapes = "shapes"; constexpr auto kTypes = "types"; constexpr auto kQueueName = "queue_name"; - -constexpr auto kSeed = "Seed0"; -constexpr auto kSeed2 = "Seed1"; +constexpr auto kSeed = "seed"; +constexpr auto kSeed0 = "Seed0"; +constexpr auto kSeed1 = "Seed1"; +constexpr auto kSeed2 = "seed2"; +constexpr auto kTopK = "TopK"; +constexpr auto kTopKV2 = "TopKV2"; struct AicpuParamHead { uint32_t length; // Total length: include cunstom message diff --git a/mindspore/ccsrc/kernel/akg/akg_kernel_attrs_process.cc b/mindspore/ccsrc/kernel/akg/akg_kernel_attrs_process.cc index c9ff41dc55..3a0cc3eb25 100644 --- a/mindspore/ccsrc/kernel/akg/akg_kernel_attrs_process.cc +++ b/mindspore/ccsrc/kernel/akg/akg_kernel_attrs_process.cc @@ -79,6 +79,10 @@ void SetAkgAttrsForCast(const AnfNodePtr &anf_node) { dst_type = "float32"; } else if (output_type == kFloat16->type_id()) { dst_type = "float16"; + } else if (output_type == kInt32->type_id()) { + dst_type = "int32"; + } else { + MS_LOG(WARNING) << "Unknown cast_to type: " << TypeIdToType(output_type)->ToString(); } AnfAlgo::SetNodeAttr("dst_type", MakeValue(dst_type), anf_node); } diff --git a/mindspore/ccsrc/kernel/akg/akgkernelbuild.cc b/mindspore/ccsrc/kernel/akg/akg_kernel_build.cc similarity index 78% rename from mindspore/ccsrc/kernel/akg/akgkernelbuild.cc rename to mindspore/ccsrc/kernel/akg/akg_kernel_build.cc index c0759172a5..1f88bbb89a 100644 --- a/mindspore/ccsrc/kernel/akg/akgkernelbuild.cc +++ b/mindspore/ccsrc/kernel/akg/akg_kernel_build.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "kernel/akg/akgkernelbuild.h" +#include "kernel/akg/akg_kernel_build.h" #include #include #include @@ -43,7 +43,9 @@ namespace kernel { constexpr int ME_MAX_KERNEL_NAME_LENGTH = 200; constexpr int32_t ARGS_SIZE = 1; constexpr auto kCompileWithJsonFunc = "compilewithjson"; + // json key +constexpr auto kOpDesc = "op_desc"; constexpr auto kInputDesc = "input_desc"; constexpr auto kShape = "shape"; constexpr auto kDataType = "data_type"; @@ -51,13 +53,24 @@ constexpr auto kOutputDesc = "output_desc"; constexpr auto kName = "name"; constexpr auto kTensorName = "tensor_name"; constexpr auto kValue = "value"; -constexpr auto KInpputNames = "input_names"; +constexpr auto KDynInputSizes = "dyn_input_sizes"; +constexpr auto KInputNames = "input_names"; constexpr auto KInput = "input"; constexpr auto KDtype = "dtype"; -int AkgKernelBuild::op_cnt_ = 0; -std::mutex AkgKernelBuild::op_cnt_mtx_; +namespace { +template +std::string Vector2Str(const std::vector &inputs) { + if (!inputs.empty()) { + std::ostringstream oss; + (void)std::copy(inputs.begin(), inputs.end() - 1, std::ostream_iterator(oss, ", ")); + oss << inputs.back(); + return oss.str(); + } + return ""; +} +} // namespace -std::string PyObjectToStr(PyObject *const PyObj) { +std::string AkgKernelBuild::PyObjectToStr(PyObject *const PyObj) { char *pChar = nullptr; std::string str_res; if (PyObj == nullptr) { @@ -76,6 +89,72 @@ std::string PyObjectToStr(PyObject *const PyObj) { return str_res; } +std::string GetTensorName(const nlohmann::json &node_json, const std::string &tag, + const std::pair &position) { + if (node_json.count(tag) == 0) { + MS_LOG(ERROR) << "Node [" << node_json.dump() << "] has no key [" << tag << "]."; + return ""; + } + + auto const &tag_desc = node_json[tag]; + nlohmann::json first_index; + if (tag == kOutputDesc) { + first_index = tag_desc; + } else if (!tag_desc.is_array() || tag_desc.size() <= position.first) { + MS_LOG(ERROR) << "Node [" << tag_desc.dump() << "] has no enough value [" << position.first << "]."; + return ""; + } else { + first_index = tag_desc[position.first]; + } + + if (!first_index.is_array() || first_index.size() <= position.second) { + MS_LOG(ERROR) << "Node [" << first_index.dump() << "] has no enough value [" << position.second << "]."; + return ""; + } + auto const &second_index = first_index[position.second]; + if (second_index.count(kTensorName) == 0) { + MS_LOG(ERROR) << "Node [" << second_index.dump() << "] has no key [" << kTensorName << "]."; + return ""; + } + + return second_index[kTensorName]; +} + +void SetTensorName(const std::string &tag, const std::string &new_name, const std::pair &position, + nlohmann::json *const node_json) { + MS_EXCEPTION_IF_NULL(node_json); + if (node_json->count(tag) == 0) { + MS_LOG(ERROR) << "Node [" << node_json->dump() << "] has no key [" << tag << "]."; + return; + } + + nlohmann::json *tag_desc = &((*node_json)[tag]); + nlohmann::json *first_index; + if (tag == kOutputDesc) { + first_index = tag_desc; + } else if (!tag_desc->is_array() || tag_desc->size() <= position.first) { + MS_LOG(ERROR) << "Node [" << tag_desc->dump() << "] has no enough value [" << position.first << "]."; + return; + } else { + first_index = &((*tag_desc)[position.first]); + } + + if (!first_index->is_array() || first_index->size() <= position.second) { + MS_LOG(ERROR) << "Node [" << first_index->dump() << "] has no enough value [" << position.second << "]."; + return; + } + nlohmann::json *second_index = &((*first_index)[position.second]); + if (second_index->count(kTensorName) == 0) { + MS_LOG(ERROR) << "Node [" << second_index->dump() << "] has no key [" << kTensorName << "]."; + return; + } + (*second_index)[kTensorName] = new_name; + return; +} + +int AkgKernelBuild::op_cnt_ = 0; +std::mutex AkgKernelBuild::op_cnt_mtx_; + std::string AkgKernelBuild::GetProcessor(const AnfNodePtr &anf_node) { MS_EXCEPTION_IF_NULL(anf_node); std::string device; @@ -187,10 +266,7 @@ bool AkgKernelBuild::CreateInputDescJson(const AnfNodePtr &anf_node, nlohmann::j for (size_t input_i = 0; input_i < input_tensor_num; input_i++) { // dtype : float16 auto type_id = AnfAlgo::GetInputDeviceDataType(anf_node, real_input_index); - TypePtr type_ptr = TypeIdToType(type_id); - MS_EXCEPTION_IF_NULL(type_ptr); - std::string dtype = type_ptr->ToString(); - dtype = Dtype2String(dtype); + std::string dtype = TypeId2String(type_id); if (dtype.empty()) { MS_LOG(ERROR) << "Op [" << op_name << "] input [" << input_i << "] data type is null. "; return false; @@ -198,13 +274,23 @@ bool AkgKernelBuild::CreateInputDescJson(const AnfNodePtr &anf_node, nlohmann::j nlohmann::json input_desc_json; input_desc_json[kDataType] = dtype; input_desc_json[kName] = op_input_name; - input_desc_json[kTensorName] = - op_input_name + "_" + std::to_string(real_input_index) + "_" + std::to_string(input_i); - input_desc_json[kShape] = AnfAlgo::GetInputDeviceShape(anf_node, real_input_index); + input_desc_json[kTensorName] = "input_" + std::to_string(GetInputTensorIdxInc(anf_node, real_input_index)); + auto input_shape = AnfAlgo::GetInputDeviceShape(anf_node, real_input_index); + if (GetInputTensorValue(anf_node, real_input_index, &input_desc_json)) { + MS_LOG(WARNING) << "we take input[" << real_input_index << "] of [" << anf_node->DebugString(2) + << "] as const tensor, shape: [" << Vector2Str(input_shape) + << "], value: " << input_desc_json[kValue]; + + input_shape.clear(); + } + if (input_shape.empty()) { + input_shape.push_back(1); + } + input_desc_json[kShape] = input_shape; input_list.emplace_back(input_desc_json); + real_input_index++; } inputs_json->emplace_back(input_list); - real_input_index++; } return true; } @@ -220,10 +306,7 @@ bool AkgKernelBuild::CreateOutputDescJson(const AnfNodePtr &anf_node, nlohmann:: for (size_t i = 0; i < output_tensor_num; i++) { nlohmann::json output_json; auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, i); - TypePtr type_ptr = TypeIdToType(type_id); - MS_EXCEPTION_IF_NULL(type_ptr); - std::string dtype = type_ptr->ToString(); - dtype = Dtype2String(dtype); + std::string dtype = TypeId2String(type_id); if (dtype.empty()) { MS_LOG(ERROR) << "Op [" << op_name << "] output [" << i << "] data type is null. "; return false; @@ -232,7 +315,7 @@ bool AkgKernelBuild::CreateOutputDescJson(const AnfNodePtr &anf_node, nlohmann:: std::string output_name = outputs[i]->name(); output_json[kDataType] = dtype; output_json[kName] = output_name; - output_json[kTensorName] = output_name + "_" + std::to_string(i); + output_json[kTensorName] = "output_" + std::to_string(i) + "_" + std::to_string(GetOutputTensorIdxInc()); output_json[kShape] = AnfAlgo::GetOutputDeviceShape(anf_node, i); outputs_json->push_back(output_json); } @@ -358,15 +441,14 @@ bool AkgKernelBuild::GenerateSingleKernelJson(const AnfNodePtr &anf_node, const MS_EXCEPTION_IF_NULL(op_info_ptr); // get basic params from currentNodeOpDesc - (*node_json)["platform"] = "AKG"; (*node_json)[kName] = op_name; - (*node_json)["fusion_type"] = AnfAlgo::GetFusionType(anf_node); (*node_json)["impl_path"] = op_info_ptr->impl_path(); (*node_json)["process"] = AkgKernelBuild::GetProcessor(anf_node); + (*node_json)["composite"] = false; auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); MS_EXCEPTION_IF_NULL(primitive); - ValuePtr input_names_v = primitive->GetAttr(KInpputNames); + ValuePtr input_names_v = primitive->GetAttr(KInputNames); if (input_names_v == nullptr) { MS_LOG(ERROR) << "ApplyKernel has no input_names, op[" << op_name << "]."; return false; @@ -465,12 +547,12 @@ KernelPackPtr AkgKernelBuild::OpBuild(const std::string &node_json, const AnfNod (void)alarm(0); if (pRes == nullptr) { MS_LOG(ERROR) << "No ret got, failed to call function [" << kCompileWithJsonFunc << "], args:\n(" - << PyObjectToStr(pArg) << ")."; + << AkgKernelBuild::PyObjectToStr(pArg) << ")."; return nullptr; } if (PyObject_IsTrue(pRes) != 1) { MS_LOG(ERROR) << "Illegal ret, failed to call function [" << kCompileWithJsonFunc << "], args:\n(" - << PyObjectToStr(pArg) << ")."; + << AkgKernelBuild::PyObjectToStr(pArg) << ")."; return nullptr; } @@ -513,5 +595,29 @@ KernelPackPtr AkgKernelBuild::BuildByJson(const AnfNodePtr &anf_node, std::vecto << "]"; return kernel_pack; } + +size_t AkgKernelBuild::GetInputTensorIdxInc(const AnfNodePtr &anf_node, size_t input_idx) { + MS_EXCEPTION_IF_NULL(anf_node); + auto cnode = anf_node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + if (input_idx + 1 >= cnode->inputs().size()) { + MS_EXCEPTION(ArgumentError) << "input_idx [" << input_idx << "] is out of index of inputs of [" + << cnode->inputs().size() - 1 << "][" << cnode->DebugString() << "]"; + } + + auto input_node = cnode->input(input_idx + 1); + if (input_tensor_idx_.find(input_node) == input_tensor_idx_.end()) { + size_t index = input_tensor_idx_.size(); + input_tensor_idx_[input_node] = index; + } + + return input_tensor_idx_[input_node]; +} + +size_t AkgKernelBuild::GetOutputTensorIdxInc() { + size_t idx = output_tensor_idx_++; + return idx; +} + } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/akg/akgkernelbuild.h b/mindspore/ccsrc/kernel/akg/akg_kernel_build.h similarity index 70% rename from mindspore/ccsrc/kernel/akg/akgkernelbuild.h rename to mindspore/ccsrc/kernel/akg/akg_kernel_build.h index f8127843bd..d32bd48ce6 100644 --- a/mindspore/ccsrc/kernel/akg/akgkernelbuild.h +++ b/mindspore/ccsrc/kernel/akg/akg_kernel_build.h @@ -32,29 +32,45 @@ namespace mindspore { namespace kernel { class AkgKernelBuild { public: - AkgKernelBuild() = default; + AkgKernelBuild() { + input_tensor_idx_ = {}; + output_tensor_idx_ = 0; + } ~AkgKernelBuild() = default; KernelPackPtr BuildByJson(const AnfNodePtr &anf_node, std::vector *const input_size, std::vector *const output_size); + static std::string GetProcessor(const AnfNodePtr &anf_node); + static std::string PyObjectToStr(PyObject *const PyObj); - private: + protected: bool CreateInputDescJson(const AnfNodePtr &anf_node, nlohmann::json *const inputs_json); bool CreateOutputDescJson(const AnfNodePtr &anf_node, nlohmann::json *const outputs_json); bool CreateAttrDescJson(const AnfNodePtr &anf_node, const std::string &op_name, const std::shared_ptr &op_info, nlohmann::json *const attrs_json); + KernelPackPtr OpBuild(const std::string &node_json, const AnfNodePtr &anf_node); + int GetOpCntInc(); + size_t GetInputTensorIdxInc(const AnfNodePtr &anf_node, size_t input_idx); + size_t GetOutputTensorIdxInc(); bool GenerateSingleKernelJson(const AnfNodePtr &anf_node, const std::string &op_name, nlohmann::json *const node_json); - KernelPackPtr OpBuild(const std::string &node_json, const AnfNodePtr &anf_node); - int GetOpCntInc(); - std::string GetProcessor(const AnfNodePtr &anf_node); static int op_cnt_; // lock for variable fusionOpCnt in singleton mode static std::mutex op_cnt_mtx_; std::string json_name_; std::string json_info_; + std::unordered_map input_tensor_idx_; + size_t output_tensor_idx_; }; + +bool GetIOSize(const nlohmann::json &node_json, std::vector *const input_size, + std::vector *const output_size); +void SetTensorName(const std::string &tag, const std::string &new_name, const std::pair &position, + nlohmann::json *const node_json); +std::string GetTensorName(const nlohmann::json &node_json, const std::string &tag, + const std::pair &position); + } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/akg/akg_kernel_metadata.cc b/mindspore/ccsrc/kernel/akg/akg_kernel_metadata.cc new file mode 100644 index 0000000000..3515add1e0 --- /dev/null +++ b/mindspore/ccsrc/kernel/akg/akg_kernel_metadata.cc @@ -0,0 +1,50 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/akg/akg_kernel_metadata.h" +#include +#include "session/anf_runtime_algorithm.h" +#include "kernel/oplib/oplib.h" +#include "kernel/common_utils.h" + +namespace mindspore { +namespace kernel { +void AkgMetadataInfo(const CNodePtr &kernel_node, + std::vector> *const kernel_info_list) { + MS_EXCEPTION_IF_NULL(kernel_node); + MS_EXCEPTION_IF_NULL(kernel_info_list); + + std::string op_name = AnfAlgo::GetCNodeName(kernel_node); + for (size_t i = 0; i < support_devices.size(); i++) { + auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAKG); + if (op_info_ptr == nullptr) { + continue; + } + + if (!ParseMetadata(kernel_node, op_info_ptr, Processor(i), kernel_info_list)) { + MS_LOG(WARNING) << "Akg parsed metadata of op[" << op_name << "], device[" << support_devices[i] << "] failed."; + } else { + MS_LOG(DEBUG) << "Akg parsed metadata of op[" << op_name << "], device[" << support_devices[i] << "]."; + break; + } + } + + if (kernel_info_list->empty()) { + MS_LOG(WARNING) << "Akg dose not has metadata of op[" << op_name << "]."; + } +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select.h b/mindspore/ccsrc/kernel/akg/akg_kernel_metadata.h similarity index 71% rename from mindspore/ccsrc/kernel/tbe/tbe_kernel_select.h rename to mindspore/ccsrc/kernel/akg/akg_kernel_metadata.h index 3ce66b5148..5e329f0080 100644 --- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select.h +++ b/mindspore/ccsrc/kernel/akg/akg_kernel_metadata.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,19 +14,18 @@ * limitations under the License. */ -#ifndef MINDSPORE_TBE_KERNEL_SELECT_H -#define MINDSPORE_TBE_KERNEL_SELECT_H +#ifndef MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_METADATA_H_ +#define MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_METADATA_H_ #include #include +#include #include -#include "kernel/oplib/opinfo.h" #include "kernel/kernel_build_info.h" namespace mindspore { namespace kernel { -void TbeMetadataInfo(const CNodePtr &kernel_node, std::vector> *kernel_info_list); +void AkgMetadataInfo(const CNodePtr &kernel_node, std::vector> *kernel_info_list); } // namespace kernel } // namespace mindspore - -#endif // MINDSPORE_TBE_KERNEL_SELECT_H +#endif // MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_METADATA_H_ diff --git a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.cc b/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.cc new file mode 100644 index 0000000000..454b8052ab --- /dev/null +++ b/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.cc @@ -0,0 +1,385 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/akg/ascend/akg_ascend_kernel_build.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include "ir/dtype.h" +#include "ir/func_graph.h" +#include "kernel/kernel.h" +#include "kernel/common_utils.h" +#include "kernel/tbe/tbe_utils.h" +#include "kernel/akg/ascend/akg_ascend_kernel_mod.h" +#include "kernel/akg/akg_kernel_attrs_process.h" +#include "session/anf_runtime_algorithm.h" + +namespace mindspore { +namespace kernel { + +constexpr int32_t PARALLEL_ARGS_SIZE = 3; +constexpr int32_t PROCESS_NUM = 16; +constexpr int32_t TIME_OUT = 300; + +constexpr auto kOpDesc = "op_desc"; +constexpr auto kShape = "shape"; +constexpr auto kDataType = "data_type"; +constexpr auto kInputDesc = "input_desc"; +constexpr auto kOutputDesc = "output_desc"; +constexpr auto kTensorName = "tensor_name"; +constexpr auto kCompileAkgKernelParallelFunc = "compile_akg_kernel_parallel"; +constexpr auto kMultiProcModule = "mindspore._extends.parallel_compile.akg_compiler.multi_process_compiler"; + +bool AkgAscendKernelBuilder::CollectJson(const AnfNodePtr &anf_node) { + MS_EXCEPTION_IF_NULL(anf_node); + std::string op_name = AnfAlgo::GetCNodeName(anf_node); + MS_LOG(INFO) << "AKG start compile, op[" << op_name << "], device[" << AkgKernelBuild::GetProcessor(anf_node) << "]"; + auto it = kAkgKernelAttrsProcessMap.find(op_name); + if (it != kAkgKernelAttrsProcessMap.end()) { + it->second(anf_node); + } + MS_LOG(INFO) << "Akg start compile, op[" << op_name << "], device[" << AkgKernelBuild::GetProcessor(anf_node) << "]"; + nlohmann::json node_json; + if (!GenerateSingleKernelJson(anf_node, op_name, &node_json)) { + MS_LOG(ERROR) << "Op[" << op_name << "] create single kernel json failed."; + } + + kernel_json_ = node_json.dump(); + + if (!GetIOSize(node_json, &input_size_list_, &output_size_list_)) { + MS_LOG(ERROR) << "Cal mem size failed."; + return false; + } + + return true; +} + +bool AkgAscendKernelBuilder::CollectFusedJson(const std::vector &anf_nodes, + const std::vector &input_list, + const std::vector &output_list) { + if (anf_nodes.empty() || input_list.empty()) { + MS_LOG(ERROR) << "Invalid input size, anf_nodes [" << anf_nodes.size() << "], input_list [" << input_list.size() + << "]."; + return false; + } + MS_LOG(INFO) << "anf_nodes [" << output_list.size() << "], input_list [" << anf_nodes.size() << "], output_list [" + << input_list.size() << "]."; + + std::map node_json_map; + + for (auto const &anf_node : anf_nodes) { + MS_EXCEPTION_IF_NULL(anf_node); + std::string op_name = AnfAlgo::GetCNodeName(anf_node); + if (!AnfAlgo::IsRealKernel(anf_node)) { + MS_LOG(ERROR) << "Invalid anf node to build [" << anf_node->fullname_with_scope() << "]."; + return false; + } + auto it = kAkgKernelAttrsProcessMap.find(op_name); + if (it != kAkgKernelAttrsProcessMap.end()) { + it->second(anf_node); + } + + nlohmann::json node_json; + if (!GenerateSingleKernelJson(anf_node, op_name, &node_json)) { + MS_LOG(ERROR) << "Op [" << op_name << "] create single kernel json failed."; + return false; + } + // No need for composite op. + node_json.erase("id"); + node_json.erase("op"); + node_json.erase("composite"); + + auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); + MS_EXCEPTION_IF_NULL(primitive); + + if (primitive->GetAttr("fusion") != nullptr) { + node_json["fusion"] = primitive->GetAttr("fusion")->ToString(); + } + + node_json_map[anf_node] = node_json; + } + + for (auto const &anf_node : anf_nodes) { + std::vector dyn_input_sizes; + auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); + MS_EXCEPTION_IF_NULL(primitive); + + if (primitive->GetAttr(kAttrDynInputSizes) != nullptr) { + dyn_input_sizes = GetValue>(primitive->GetAttr(kAttrDynInputSizes)); + } + + bool is_dynamic_input = !dyn_input_sizes.empty(); + size_t input_num = is_dynamic_input ? dyn_input_sizes.size() : AnfAlgo::GetInputTensorNum(anf_node); + size_t real_input_index = 0; + for (size_t i = 0; i < input_num; ++i) { + size_t input_tensor_num = is_dynamic_input ? IntToSize(dyn_input_sizes[i]) : 1; + for (size_t j = 0; j < input_tensor_num; ++j) { + auto tmp_input = GetKernelInput(anf_node, real_input_index); + std::string tensor_name = GetTensorName(node_json_map[anf_node], kInputDesc, std::make_pair(i, j)); + if (node_json_map.find(tmp_input.first) != node_json_map.end()) { + std::string new_tensor_name = + GetTensorName(node_json_map[tmp_input.first], kOutputDesc, std::make_pair(0, tmp_input.second)); + SetTensorName(kInputDesc, new_tensor_name, std::make_pair(i, j), &(node_json_map[anf_node])); + MS_LOG(DEBUG) << "Update [" << real_input_index << "] input [" << tensor_name << "] of [" + << anf_node->fullname_with_scope() << "] to [" << tmp_input.second << "] output [" + << new_tensor_name << "] of [" << tmp_input.first->fullname_with_scope() << "]."; + } else { + MS_LOG(DEBUG) << "[" << real_input_index << "] input " << tensor_name << "] of [" + << anf_node->fullname_with_scope() << "] is out input."; + } + real_input_index++; + } + } + } + + nlohmann::json fused_node_json; + std::vector node_json_desc; + std::transform(anf_nodes.begin(), anf_nodes.end(), std::back_inserter(node_json_desc), + [&node_json_map](const AnfNodePtr &anf_node) { return node_json_map[anf_node]; }); + fused_node_json[kOpDesc] = node_json_desc; + + nlohmann::json inputs_json; + auto input_index = GetInputIndex(anf_nodes, input_list); + for (size_t i = 0; i < input_index.size(); ++i) { + auto tmp_input = input_index[i]; + auto type_id = AnfAlgo::GetInputDeviceDataType(tmp_input.first, tmp_input.second.first); + std::string dtype = TypeId2String(type_id); + nlohmann::json input_desc_json; + input_desc_json[kTensorName] = GetTensorName(node_json_map[tmp_input.first], kInputDesc, tmp_input.second); + input_desc_json[kDataType] = dtype; + input_desc_json[kShape] = AnfAlgo::GetInputDeviceShape(tmp_input.first, tmp_input.second.first); + inputs_json.emplace_back(std::vector{input_desc_json}); + } + fused_node_json[kInputDesc] = inputs_json; + + nlohmann::json outputs_json; + auto output_index = GetOutputIndex(anf_nodes, input_list, output_list); + for (size_t i = 0; i < output_index.size(); ++i) { + auto tmp_output = output_index[i]; + bool found = false; + nlohmann::json output_desc_json; + for (size_t input_i = 0; input_i < input_list.size(); ++input_i) { + if (tmp_output.first == input_list[input_i]) { + output_desc_json = inputs_json[input_i][0]; + found = true; + break; + } + } + if (!found) { + auto type_id = AnfAlgo::GetOutputDeviceDataType(tmp_output.first, tmp_output.second); + std::string dtype = TypeId2String(type_id); + output_desc_json[kTensorName] = + GetTensorName(node_json_map[tmp_output.first], kOutputDesc, std::make_pair(0, tmp_output.second)); + output_desc_json[kDataType] = dtype; + auto output_shape = AnfAlgo::GetOutputDeviceShape(tmp_output.first, tmp_output.second); + if (output_shape.empty()) { + output_shape.push_back(1); + } + output_desc_json[kShape] = output_shape; + } + outputs_json.emplace_back(output_desc_json); + } + fused_node_json[kOutputDesc] = outputs_json; + + size_t hash_id = std::hash()(fused_node_json.dump()); + json_name_ = "Fused_"; + auto fg = anf_nodes[0]->func_graph(); + MS_EXCEPTION_IF_NULL(fg); + auto attr_val = fg->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL); + if (attr_val != nullptr) { + auto fg_attr = GetValue(attr_val); + (void)json_name_.append(fg_attr).append("_"); + } + (void)json_name_.append(std::to_string(hash_id)); + fused_node_json["composite_graph"] = fg->ToString(); + fused_node_json["op"] = json_name_; + fused_node_json["platform"] = "AKG"; + fused_node_json["process"] = "aicore"; + fused_node_json["composite"] = true; + + kernel_json_ = fused_node_json.dump(); + + if (!GetIOSize(fused_node_json, &input_size_list_, &output_size_list_)) { + MS_LOG(ERROR) << "Cal mem size failed."; + return false; + } + + return true; +} + +void GenParallelCompileFuncArgs(const std::vector &kernel_jsons, PyObject **p_args) { + MS_EXCEPTION_IF_NULL(p_args); + *p_args = PyTuple_New(PARALLEL_ARGS_SIZE); + + PyObject *arg1 = PyList_New(kernel_jsons.size()); + for (int i = 0; i < PyList_Size(arg1); ++i) { + PyList_SetItem(arg1, i, Py_BuildValue("s", kernel_jsons[i].c_str())); + } + PyObject *arg2 = Py_BuildValue("i", PROCESS_NUM); + PyObject *arg3 = Py_BuildValue("i", TIME_OUT); + + (void)PyTuple_SetItem(*p_args, 0, arg1); + (void)PyTuple_SetItem(*p_args, 1, arg2); + (void)PyTuple_SetItem(*p_args, 2, arg3); +} + +bool AkgOpParallelBuild(const std::vector> &build_args) { + // Remove cached nodes, gether unique nodes, and collect repeated nodes which need postprecess. + std::vector jsons; + std::unordered_set json_name_set; + std::vector> repeat_nodes; + for (const auto &[builder, anf_node] : build_args) { + MS_EXCEPTION_IF_NULL(anf_node); + auto json_name = builder.json_name(); + MS_LOG(DEBUG) << "Akg start compile op: " << json_name; + auto cached_kernel_pack = tbe::TbeUtils::SearchCache(json_name, AkgKernelBuild::GetProcessor(anf_node)); + if (cached_kernel_pack != nullptr) { + MS_LOG(DEBUG) << "Use cached kernel, json_name_[" << json_name << "], fullname_with_scope[" + << anf_node->fullname_with_scope() << "]."; + auto kernel_mod_ptr = std::make_shared(cached_kernel_pack); + kernel_mod_ptr->SetInputSizeList(builder.input_size_list()); + kernel_mod_ptr->SetOutputSizeList(builder.output_size_list()); + AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get()); + continue; + } + + if (json_name_set.count(json_name) != 0) { + repeat_nodes.push_back({builder, anf_node}); + continue; + } + json_name_set.insert(json_name); + auto node_json = builder.kernel_json(); + kernel::SaveJsonInfo(json_name, node_json); + jsons.push_back(node_json); + } + + // No nodes need to be compiled! + if (jsons.empty()) { + return true; + } + + // Try to call python method to compile nodes parallely. + PyObject *p_module = nullptr; + PyObject *p_func = nullptr; + PyObject *p_arg = nullptr; + PyObject *p_res = nullptr; + + p_module = PyImport_ImportModule(kMultiProcModule); + if (p_module == nullptr) { + MS_LOG(ERROR) << "Failed to import [" << kMultiProcModule << "]."; + return false; + } + + p_func = PyObject_GetAttrString(p_module, kCompileAkgKernelParallelFunc); + GenParallelCompileFuncArgs(jsons, &p_arg); + MS_LOG(DEBUG) << "Call function [" << kCompileAkgKernelParallelFunc << "], try to compile " << jsons.size() + << " Akg kernels parallelly."; + p_res = PyEval_CallObject(p_func, p_arg); + if (p_res == nullptr) { + PyErr_Print(); + MS_LOG(ERROR) << "No ret got, failed to call function [" << kCompileAkgKernelParallelFunc << "], args:\n(" + << AkgKernelBuild::PyObjectToStr(p_arg) << ")."; + return false; + } + if (PyObject_IsTrue(p_res) != 1) { + PyErr_Print(); + MS_LOG(ERROR) << "Illegal ret, failed to call function [" << kCompileAkgKernelParallelFunc << "], args:\n(" + << AkgKernelBuild::PyObjectToStr(p_arg) << ")."; + return false; + } + + // All unique done here, cache them and set kernel. + for (const auto &[builder, anf_node] : build_args) { + auto json_name = builder.json_name(); + auto new_kernel_pack = tbe::TbeUtils::InsertCache(json_name, AkgKernelBuild::GetProcessor(anf_node)); + if (new_kernel_pack == nullptr) { + MS_LOG(ERROR) << "Insert to cache failed, json_name_[" << json_name << "], fullname_with_scope[" + << anf_node->fullname_with_scope() << "]."; + return false; + } + auto kernel_mod_ptr = std::make_shared(new_kernel_pack); + kernel_mod_ptr->SetInputSizeList(builder.input_size_list()); + kernel_mod_ptr->SetOutputSizeList(builder.output_size_list()); + AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get()); + MS_LOG(DEBUG) << "Akg compile " << json_name << " kernel and insert cache successfully!"; + } + + // Handle repeated nodes. + for (const auto &[builder, anf_node] : repeat_nodes) { + auto node_json = builder.kernel_json(); + auto json_name = builder.json_name(); + auto cached_kernel_pack = tbe::TbeUtils::SearchCache(json_name, AkgKernelBuild::GetProcessor(anf_node)); + if (cached_kernel_pack == nullptr) return false; + MS_LOG(INFO) << "Use just compiled kernel, json_name_[" << json_name << "], fullname_with_scope[" + << anf_node->fullname_with_scope() << "]."; + auto kernel_mod_ptr = std::make_shared(cached_kernel_pack); + kernel_mod_ptr->SetInputSizeList(builder.input_size_list()); + kernel_mod_ptr->SetOutputSizeList(builder.output_size_list()); + AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get()); + } + + return true; +} + +bool AkgAscendKernelParallelBuild(const std::vector &anf_nodes) { + std::vector> json_and_node; + for (const auto &anf_node : anf_nodes) { + MS_EXCEPTION_IF_NULL(anf_node); + AkgAscendKernelBuilder akg_cce_kernel_builder; + KernelPackPtr kernel_pack = nullptr; + auto cnode = anf_node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + if (AnfAlgo::IsGraphKernel(cnode)) { + auto func_graph = AnfAlgo::GetCNodeFuncGraphPtr(cnode); + auto mng = func_graph->manager(); + if (mng == nullptr) { + mng = Manage(func_graph, true); + func_graph->set_manager(mng); + } + MS_EXCEPTION_IF_NULL(func_graph); + std::vector node_list; + std::vector input_list; + std::vector output_list; + std::string op_name = AnfAlgo::GetCNodeName(anf_node); + MS_LOG(INFO) << "Akg start compile composite op[" << op_name << "]"; + GetValidKernelNodes(func_graph, &node_list, &input_list, &output_list); + if (!akg_cce_kernel_builder.CollectFusedJson(node_list, input_list, output_list)) { + MS_EXCEPTION(UnknownError) << "Akg build failed composite op[" << op_name << "]."; + } + } else { + if (!akg_cce_kernel_builder.CollectJson(anf_node)) { + MS_EXCEPTION(UnknownError) << "Akg build failed op[" << AnfAlgo::GetCNodeName(anf_node) << "]."; + } + } + json_and_node.push_back({akg_cce_kernel_builder, anf_node}); + } + + if (json_and_node.empty()) { + MS_LOG(DEBUG) << "There is no kernel needed to be compiled."; + return true; + } + + return AkgOpParallelBuild(json_and_node); +} + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.h b/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.h new file mode 100644 index 0000000000..619b583fde --- /dev/null +++ b/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.h @@ -0,0 +1,52 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_BUILD_H_ +#define MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_BUILD_H_ + +#include +#include +#include +#include "ir/anf.h" +#include "kernel/kernel.h" +#include "kernel/akg/akg_kernel_build.h" + +namespace mindspore { +namespace kernel { +class AkgAscendKernelBuilder : public AkgKernelBuild { + public: + AkgAscendKernelBuilder() = default; + ~AkgAscendKernelBuilder() = default; + + bool CollectJson(const AnfNodePtr &anf_node); + bool CollectFusedJson(const std::vector &anf_nodes, const std::vector &input_list, + const std::vector &output_list); + std::string json_name() const { return json_name_; } + std::string kernel_json() const { return kernel_json_; } + const std::vector &input_size_list() const { return input_size_list_; } + const std::vector &output_size_list() const { return output_size_list_; } + + private: + std::string kernel_json_; + std::vector input_size_list_; + std::vector output_size_list_; +}; + +bool AkgAscendKernelParallelBuild(const std::vector &anf_nodes); +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_BUILD_H_ diff --git a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.cc b/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.cc new file mode 100644 index 0000000000..24324f70e0 --- /dev/null +++ b/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.cc @@ -0,0 +1,181 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/akg/ascend/akg_ascend_kernel_mod.h" +#include +#include +#include +#include +#include +#include +#include +#include "nlohmann/json.hpp" +#include "runtime/rt.h" +#include "utils/log_adapter.h" +#include "utils/convert_utils.h" + +namespace mindspore { +namespace kernel { +using std::fstream; +using std::map; +using std::mutex; +using std::string; +using TbeTaskInfoPtr = std::shared_ptr; +using tbe::KernelManager; +constexpr uint32_t DEFAULT_BLOCK_DIM = 1; +/** + * @brief infotable contain func_stub\blockdim\kernel file buffer + */ +AkgKernelMod::AkgKernelMod(const KernelPackPtr &kernel_pack) : kernel_pack_(kernel_pack) {} + +void AkgKernelMod::SetInputSizeList(const std::vector &size_list) { input_size_list_ = size_list; } + +void AkgKernelMod::SetOutputSizeList(const std::vector &size_list) { output_size_list_ = size_list; } + +void AkgKernelMod::SetWorkspaceSizeList(const std::vector &size_list) { workspace_size_list_ = size_list; } + +const std::vector &AkgKernelMod::GetInputSizeList() const { return input_size_list_; } + +const std::vector &AkgKernelMod::GetOutputSizeList() const { return output_size_list_; } + +const std::vector &AkgKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; } + +void DumpData(const std::vector &inputs, const std::vector &outputs) { + const char *dump_data = getenv("MS_KERNEL_DUMP_DATA"); + if (dump_data) { + int idx = 0; + for (const auto &x : inputs) { + std::vector buf(x->size); + if (RT_ERROR_NONE != rtMemcpy(buf.data(), buf.size(), reinterpret_cast(x->addr), x->size, + RT_MEMCPY_DEVICE_TO_HOST)) { + MS_LOG(WARNING) << "Call runtime rtMemcpy error."; + return; + } + + std::string file_name("input_"); + file_name += std::to_string(idx); + std::ofstream file(file_name, std::ios::binary); + if (file.is_open()) { + (void)file.write(buf.data(), SizeToLong(buf.size())); + file.close(); + idx++; + } else { + MS_LOG(ERROR) << "Open file failed."; + return; + } + } + idx = 0; + for (const auto &x : outputs) { + std::vector buf(x->size); + if (RT_ERROR_NONE != rtMemcpy(buf.data(), buf.size(), reinterpret_cast(x->addr), x->size, + RT_MEMCPY_DEVICE_TO_HOST)) { + MS_LOG(WARNING) << "Call runtime rtMemcpy error."; + return; + } + + std::string file_name("output_"); + file_name += std::to_string(idx); + std::ofstream file(file_name, std::ios::binary); + if (file.is_open()) { + (void)file.write(buf.data(), SizeToLong(buf.size())); + file.close(); + idx++; + } else { + MS_LOG(ERROR) << "Open file failed."; + return; + } + } + } +} + +bool AkgKernelMod::Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs, void *stream_ptr) { + if (stream_ptr == 0) { + MS_LOG(ERROR) << "stream_ptr should not be nullptr."; + return false; + } + + if (kernel_pack_ == nullptr) { + MS_LOG(ERROR) << "kernel pack should not be nullptr."; + return false; + } + + uint32_t block_dim = DEFAULT_BLOCK_DIM; // default blockdim equal to 1. + auto func_stub = KernelManager::GenFuncStub(*kernel_pack_, false, &block_dim); + if (func_stub == 0) { + MS_LOG(ERROR) << "GenFuncStub failed."; + return false; + } + + // pack all addresses into a vector. + std::vector runtime_args; + (void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(runtime_args), + [](const AddressPtr &input) -> void * { return input->addr; }); + (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(runtime_args), + [](const AddressPtr &output) -> void * { return output->addr; }); + + rtL2Ctrl_t *l2ctrl = nullptr; + auto stream = reinterpret_cast(stream_ptr); + if (RT_ERROR_NONE != rtKernelLaunch(reinterpret_cast(func_stub), block_dim, runtime_args.data(), + SizeToUint(sizeof(void *) * runtime_args.size()), l2ctrl, stream)) { + MS_LOG(ERROR) << "Call runtime rtKernelLaunch error."; + return false; + } + + DumpData(inputs, outputs); + + return true; +} + +std::vector AkgKernelMod::GenTask(const std::vector &inputs, const std::vector &, + const std::vector &outputs, uint32_t stream_id) { + if (kernel_pack_ == nullptr) { + MS_LOG(EXCEPTION) << "kernel pack should not be nullptr."; + } + + std::vector args; + uint32_t args_size = 0; + std::vector sm_desc; + void *binary = nullptr; + uint32_t binary_size = 0; + std::vector meta_data; + std::vector input_data_addrs; + std::vector output_data_addrs; + std::vector workspace_addrs; + + // pack all addresses into a vector. + (void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(input_data_addrs), + [](const AddressPtr &input) -> void * { return input->addr; }); + (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(output_data_addrs), + [](const AddressPtr &output) -> void * { return output->addr; }); + + uint32_t block_dim = DEFAULT_BLOCK_DIM; // default blockdim equal to 1. + auto func_stub = KernelManager::GenFuncStub(*kernel_pack_, false, &block_dim); + if (func_stub == 0) { + MS_LOG(EXCEPTION) << "GenFuncStub failed."; + } + + std::string stub_func = KernelManager::GetStubFuncName(kernel_pack_); + + MS_LOG(DEBUG) << "The block_dim is:" << block_dim; + + TbeTaskInfoPtr task_info_ptr = make_shared( + stream_id, stub_func, block_dim, args, args_size, sm_desc, binary, binary_size, meta_data, input_data_addrs, + output_data_addrs, workspace_addrs); + return {task_info_ptr}; +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.h b/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.h new file mode 100644 index 0000000000..18d342f629 --- /dev/null +++ b/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.h @@ -0,0 +1,54 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_MOD_H_ +#define MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_MOD_H_ +#include +#include +#include +#include "kernel/ascend_kernel_mod.h" +#include "kernel/tbe/tbe_utils.h" + +namespace mindspore { +namespace kernel { +class AkgKernelMod : public AscendKernelMod { + public: + explicit AkgKernelMod(const KernelPackPtr &kernel_pack); + ~AkgKernelMod() final {} + + void SetInputSizeList(const std::vector &size_list); + void SetOutputSizeList(const std::vector &size_list); + void SetWorkspaceSizeList(const std::vector &size_list); + const std::vector &GetInputSizeList() const override; + const std::vector &GetOutputSizeList() const override; + const std::vector &GetWorkspaceSizeList() const override; + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) override; + std::vector GenTask(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, uint32_t stream_id) override; + + private: + KernelPackPtr kernel_pack_; + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; +}; + +using AkgKernelModPtr = std::shared_ptr; +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_MOD_H_ diff --git a/mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_build.cc b/mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_build.cc index 2bb2cfd267..534e355802 100644 --- a/mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_build.cc +++ b/mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_build.cc @@ -18,7 +18,7 @@ #include #include #include "kernel/kernel.h" -#include "kernel/akg/akgkernelbuild.h" +#include "kernel/akg/akg_kernel_build.h" #include "kernel/akg/gpu/akg_gpu_kernel_mod.h" #include "common/utils.h" diff --git a/mindspore/ccsrc/kernel/common_utils.cc b/mindspore/ccsrc/kernel/common_utils.cc index 2769e0c42a..868abeb1cc 100644 --- a/mindspore/ccsrc/kernel/common_utils.cc +++ b/mindspore/ccsrc/kernel/common_utils.cc @@ -18,10 +18,17 @@ #include #include #include +#include #include +#include #include "nlohmann/json.hpp" #include "session/anf_runtime_algorithm.h" #include "common/utils.h" +#include "ir/manager.h" +#include "ir/meta_tensor.h" +#include "ir/func_graph.h" +#include "operator/ops.h" +#include "utils/graph_utils.h" namespace mindspore { namespace kernel { @@ -47,12 +54,6 @@ const std::map type_id_str_map = { {TypeId::kNumberTypeBool, "bool"}, }; -const std::map DATATYPE_STRING_MAP{ - {"Float32", "float32"}, {"Float16", "float16"}, {"Int8", "int8"}, {"Int16", "int16"}, - {"UInt16", "uint16"}, {"UInt8", "uint8"}, {"Int32", "int32"}, {"UInt32", "uint32"}, - {"Int64", "int64"}, {"UInt64", "uint64"}, {"Bool_", "bool"}, {"Float64", "double"}, -}; - const std::unordered_map dtype_shortdtype_map_ = { {"float16", "f16"}, {"float32", "f32"}, {"float64", "f64"}, {"int8", "i8"}, {"int16", "i16"}, {"int32", "i32"}, {"int64", "i64"}, {"uint8", "u8"}, {"uint16", "u16"}, {"uint32", "u32"}, {"uint64", "u64"}, {"bool", "bool"}, @@ -70,50 +71,6 @@ const std::unordered_map fusion_type_maps = { {"SEGMENT", FusionType::SEGMENT}, {"OPAQUE", FusionType::OPAQUE}, }; -bool IsAtomicNode(const CNodePtr &kernel_node) { - MS_EXCEPTION_IF_NULL(kernel_node); - auto kernel_mod = AnfAlgo::GetKernelMod(kernel_node); - MS_EXCEPTION_IF_NULL(kernel_mod); - auto parameters_indexs = kernel_mod->GenParameters(); - if (parameters_indexs.empty()) { - return false; - } - auto atomic_flag = false; - size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); - size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); - auto workspace_size_list = kernel_mod->GetWorkspaceSizeList(); - size_t workspace_num = kernel_mod->GetWorkspaceSizeList().size(); - if (input_num + workspace_num + output_num > parameters_indexs.size()) { - size_t lossNum = (input_num + workspace_num + output_num) - parameters_indexs.size(); - for (size_t i = 0; i < lossNum; i++) { - parameters_indexs.push_back(0); - } - } - std::vector clean_output_indexs; - // in parameters data sort as input->workspace->output - size_t index = 0; - while (index < output_num) { - if (parameters_indexs[input_num + workspace_num + index] == 1) { - atomic_flag = true; - clean_output_indexs.push_back(SizeToInt(index)); - } - index++; - } - if (atomic_flag) { - AnfAlgo::SetNodeAttr(kAttrAutomicOutputIndexs, MakeValue(clean_output_indexs), kernel_node); - } - for (size_t i = 0; i < workspace_num; ++i) { - if (parameters_indexs[input_num + i] == 1) { - atomic_flag = true; - AnfAlgo::SetNodeAttr(kAttrAutomicWorkspaceSize, - MakeValue(std::accumulate(workspace_size_list.begin(), workspace_size_list.end(), 0)), - kernel_node); - break; - } - } - return atomic_flag; -} - void KernelMeta::Initialize() { kernel_meta_path_ = std::string(kGpuKernelMeta) + "_" + std::to_string(getpid()) + "/"; // remove old kernel cache @@ -242,14 +199,6 @@ TypeId DtypeToTypeId(const std::string &dtypes) { } } -std::string Dtype2String(const std::string &dtypes) { - auto iter = DATATYPE_STRING_MAP.find(dtypes); - if (iter == DATATYPE_STRING_MAP.end()) { - MS_EXCEPTION(ArgumentError) << "Illegal input dtype:" << dtypes; - } - return iter->second; -} - std::string TypeId2String(TypeId type_id) { auto iter = type_id_str_map.find(type_id); if (iter == type_id_str_map.end()) { @@ -360,7 +309,7 @@ bool SetOutputKernelBuilderInfo(const std::vector> &ou output_num = 1; } else { if (output_idx < real_output_num) { - MS_LOG(INFO) << "Set output kernel builder info, output type is optional, output index is :" << output_idx; + MS_LOG(DEBUG) << "Set output kernel builder info, output type is optional, output index is :" << output_idx; output_num = 1; } } @@ -402,7 +351,7 @@ void SetKernelBuildInfo(const std::shared_ptrSetKernelType(AUTO_DIFF_KERNEL); + builder->SetKernelType(AKG_KERNEL); } else if (imply_type == kAICPU) { builder->SetKernelType(AICPU_KERNEL); } else { @@ -525,5 +474,429 @@ std::string GetProcessor(const AnfNodePtr &anf_node) { } return device; } + +bool IsSameShape(const std::vector &shape_a, const std::vector &shape_b) { + if (shape_a.size() != shape_b.size()) { + return false; + } + for (size_t i = 0; i < shape_a.size(); ++i) { + if (shape_a[i] != shape_b[i]) { + return false; + } + } + return true; +} + +int Sign(float x) { + if (x > 0) { + return 1; + } + if (x < 0) { + return -1; + } + return 0; +} + +void DeduplicateIndexedSlices(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim, + size_t outer_dim) { + MS_EXCEPTION_IF_NULL(origin_sparse_grad.value_); + MS_EXCEPTION_IF_NULL(origin_sparse_grad.indices_); + MS_EXCEPTION_IF_NULL(unique_grad); + MS_EXCEPTION_IF_NULL(unique_grad->value_); + MS_EXCEPTION_IF_NULL(unique_grad->indices_); + std::unordered_map index_map; + size_t unique_indices_size = 0; + for (size_t i = 0; i < origin_sparse_grad.indices_size_; ++i) { + int index = origin_sparse_grad.indices_[i]; + if (index < 0 || IntToSize(index) >= first_dim) { + continue; + } + auto iter = index_map.find(index); + if (iter == index_map.end()) { + index_map[index] = unique_indices_size; + unique_grad->indices_[unique_indices_size] = index; + size_t start_index = unique_indices_size * outer_dim; + size_t end_index = start_index + outer_dim; + for (size_t j = start_index, k = i * outer_dim; j < end_index; ++j, ++k) { + unique_grad->value_[j] = origin_sparse_grad.value_[k]; + } + unique_indices_size++; + } else { + size_t first_index = iter->second; + size_t start_index = first_index * outer_dim; + size_t end_index = start_index + outer_dim; + for (size_t j = start_index, k = i * outer_dim; j < end_index; ++j, ++k) { + unique_grad->value_[j] += origin_sparse_grad.value_[k]; + } + } + } + unique_grad->indices_size_ = unique_indices_size; +} + +struct WorkerParamsForReduceSparseGradient { + size_t slice_start_{0}; + size_t slice_end_{0}; + size_t max_length_{0}; + size_t outer_dim_{0}; + std::vector> *sorted_indices_{nullptr}; + std::vector *slice_positions_{nullptr}; + float *src_value_{nullptr}; + SparseGradient *unique_grad_{nullptr}; +}; + +void WorkerForReduceSparseGradient(WorkerParamsForReduceSparseGradient param) { + MS_EXCEPTION_IF_NULL(param.sorted_indices_); + MS_EXCEPTION_IF_NULL(param.slice_positions_); + MS_EXCEPTION_IF_NULL(param.src_value_); + MS_EXCEPTION_IF_NULL(param.unique_grad_); + auto outer_dim = param.outer_dim_; + auto &sorted_indices = *(param.sorted_indices_); + auto &slice_positions = *(param.slice_positions_); + auto unique_grad = param.unique_grad_; + for (size_t slice_id = param.slice_start_; slice_id < param.slice_end_; ++slice_id) { + size_t cur_pos = slice_positions[slice_id]; + int index = sorted_indices[cur_pos].first; + unique_grad->indices_[slice_id] = index; + size_t start_index = slice_id * outer_dim; + auto ret_code = memcpy_s(unique_grad->value_ + start_index, (param.max_length_ - start_index) * sizeof(float), + param.src_value_ + sorted_indices[cur_pos].second, outer_dim * sizeof(float)); + if (ret_code != EOK) { + MS_LOG(EXCEPTION) << "Failed to copy data!"; + } + cur_pos++; + size_t end_pos; + if (slice_id + 1 < slice_positions.size()) { + end_pos = slice_positions[slice_id + 1]; + } else { + end_pos = sorted_indices.size(); + } + while (cur_pos < end_pos) { + for (size_t i = 0; i < outer_dim; ++i) { + unique_grad->value_[start_index + i] += param.src_value_[sorted_indices[cur_pos].second + i]; + } + cur_pos++; + } + } +} + +void ReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim, + size_t outer_dim) { + MS_EXCEPTION_IF_NULL(origin_sparse_grad.value_); + MS_EXCEPTION_IF_NULL(origin_sparse_grad.indices_); + MS_EXCEPTION_IF_NULL(unique_grad); + MS_EXCEPTION_IF_NULL(unique_grad->value_); + MS_EXCEPTION_IF_NULL(unique_grad->indices_); + std::vector> sorted_indices; + sorted_indices.reserve(origin_sparse_grad.indices_size_); + for (size_t i = 0; i < origin_sparse_grad.indices_size_; ++i) { + int index = origin_sparse_grad.indices_[i]; + if (index >= 0 && IntToSize(index) < first_dim) { + sorted_indices.emplace_back(std::pair(index, i * outer_dim)); + } + } + std::sort( + sorted_indices.begin(), sorted_indices.end(), + [](const std::pair &left, const std::pair &right) { return left.first < right.first; }); + int last_index = 0; + std::vector slice_positions; + for (size_t i = 0; i < sorted_indices.size(); ++i) { + if (i == 0 || last_index != sorted_indices[i].first) { + slice_positions.emplace_back(i); + } + last_index = sorted_indices[i].first; + } + size_t thread_num = 8; + if (slice_positions.size() < thread_num) { + thread_num = slice_positions.size(); + } + size_t stride = (slice_positions.size() + thread_num - 1) / thread_num; + thread_num = (slice_positions.size() + stride - 1) / stride; + std::vector threads; + size_t max_length = sorted_indices.size() * outer_dim; + for (size_t i = 0; i < thread_num; ++i) { + size_t slice_start = i * stride; + size_t slice_end = 0; + if (i == thread_num - 1) { + slice_end = slice_positions.size(); + } else { + slice_end = slice_start + stride; + } + WorkerParamsForReduceSparseGradient params{ + slice_start, slice_end, max_length, outer_dim, &sorted_indices, &slice_positions, origin_sparse_grad.value_, + unique_grad}; + threads.emplace_back(std::thread(WorkerForReduceSparseGradient, params)); + } + for (size_t i = 0; i < thread_num; ++i) { + threads[i].join(); + } + unique_grad->indices_size_ = slice_positions.size(); +} + +std::pair GetKernelInput(const AnfNodePtr &anf_node, size_t index) { + MS_EXCEPTION_IF_NULL(anf_node); + + if (index >= AnfAlgo::GetInputTensorNum(anf_node)) { + MS_EXCEPTION(ArgumentError) << "Index is out of the size of anf_node inputs."; + } + + auto cnode = anf_node->cast(); + if (cnode == nullptr) { + return AnfAlgo::VisitKernel(anf_node, 0); + } else { + return AnfAlgo::VisitKernel(anf_node->cast()->input(index + 1), 0); + } +} + +std::vector>> GetInputIndex(const std::vector &node_list, + const std::vector &input_list) { + std::vector>> input_index; + for (size_t i = 0; i < input_list.size(); ++i) { + auto const &input = input_list[i]; + MS_EXCEPTION_IF_NULL(input); + bool found = false; + // using NodeUsersMap = std::unordered_map>>; + auto mng = input->func_graph()->manager(); + MS_EXCEPTION_IF_NULL(mng); + const NodeUsersMap &users = mng->node_users(); + auto input_users = users.find(input); + if (input_users == users.end() || input_users->second.empty()) { + MS_EXCEPTION(ArgumentError) << "Input [" << i << "][" << input->DebugString(2) << "] of [" + << input->func_graph()->ToString() << "] has no users."; + } + + for (auto const &input_user : input_users->second) { + for (auto const &anf_node : node_list) { + if (anf_node != input_user.first) { + continue; + } + + std::vector dyn_input_sizes; + auto prim = AnfAlgo::GetCNodePrimitive(anf_node); + MS_EXCEPTION_IF_NULL(prim); + if (prim->GetAttr(kAttrDynInputSizes) != nullptr) { + dyn_input_sizes = GetValue>(prim->GetAttr(kAttrDynInputSizes)); + } + + if (dyn_input_sizes.empty()) { + input_index.push_back(std::make_pair(anf_node, std::make_pair(IntToSize(input_user.second - 1), 0))); + found = true; + break; + } else { + int used_as_idx = input_user.second - 1; + int accum_idx = 0; + size_t dyn_i = 0; + for (; dyn_i < dyn_input_sizes.size(); ++dyn_i) { + accum_idx += dyn_input_sizes[dyn_i]; + if (used_as_idx < accum_idx) { + input_index.push_back(std::make_pair( + anf_node, std::make_pair(dyn_i, IntToSize(used_as_idx - (accum_idx - dyn_input_sizes[dyn_i]))))); + break; + } + } + if (dyn_i != dyn_input_sizes.size()) { + found = true; + break; + } + } + } + if (found) { + break; + } + } + + if (!found) { + MS_EXCEPTION(ArgumentError) << "Input [" << i << "][" << input->DebugString(2) << "] of [" + << input->func_graph()->ToString() << "] found no related kernel info."; + } + } + return input_index; +} + +std::vector> GetOutputIndex(const std::vector &node_list, + const std::vector &input_list, + const std::vector &output_list) { + std::vector> output_index; + for (size_t i = 0; i < output_list.size(); ++i) { + auto const &output = output_list[i]; + MS_EXCEPTION_IF_NULL(output); + bool found = false; + auto pree_node = AnfAlgo::VisitKernel(output, 0); + + auto pos = std::find(std::begin(node_list), std::end(node_list), pree_node.first); + if (pos != std::end(node_list)) { + output_index.push_back(pree_node); + continue; + } + + auto ret = std::find(std::begin(input_list), std::end(input_list), pree_node.first); + if (ret != std::end(input_list)) { + output_index.push_back(std::make_pair(pree_node.first, 0)); + found = true; + } + + if (!found) { + MS_EXCEPTION(ArgumentError) << "Output [" << i << "][" << output->DebugString(2) << "] of [" + << output->func_graph()->ToString() << "] found no related kernel info."; + } + } + return output_index; +} + +void GetValidKernelNodes(const FuncGraphPtr &func_graph, std::vector *node_list) { + MS_EXCEPTION_IF_NULL(node_list); + + MS_EXCEPTION_IF_NULL(func_graph); + + std::vector node_lists = TopoSort(func_graph->get_return()); + for (auto const &node : node_lists) { + if (!AnfAlgo::IsRealKernel(node) || !node->isa()) { + continue; + } + + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + + if (IsValueNode(cnode->input(kAnfPrimitiveIndex))) { + node_list->push_back(node); + } + } +} + +void GetValidKernelNodes(const FuncGraphPtr &func_graph, std::vector *node_list, + std::vector *input_list, std::vector *output_list) { + MS_EXCEPTION_IF_NULL(node_list); + MS_EXCEPTION_IF_NULL(input_list); + MS_EXCEPTION_IF_NULL(output_list); + MS_EXCEPTION_IF_NULL(func_graph); + + GetValidKernelNodes(func_graph, node_list); + + auto parameters = func_graph->parameters(); + input_list->insert(input_list->begin(), parameters.begin(), parameters.end()); + + auto func_output = func_graph->output(); + MS_EXCEPTION_IF_NULL(func_output); + if (func_output->isa()) { + // multi output. + auto cnode = func_output->cast(); + MS_EXCEPTION_IF_NULL(cnode); + auto input0 = cnode->input(kAnfPrimitiveIndex); + MS_EXCEPTION_IF_NULL(input0); + if (IsPrimitive(input0, prim::kPrimMakeTuple)) { + for (size_t input_idx = 1; input_idx < cnode->inputs().size(); ++input_idx) { + auto input_node = cnode->input(input_idx); + MS_EXCEPTION_IF_NULL(input_node); + output_list->push_back(AnfAlgo::VisitKernel(input_node, 0).first); + } + } else { + // single output. + output_list->push_back(AnfAlgo::VisitKernel(func_output, 0).first); + } + } else { + // single output. + output_list->push_back(AnfAlgo::VisitKernel(func_output, 0).first); + } +} + +bool GetInputTensorValue(const AnfNodePtr &anf_node, size_t input_idx, nlohmann::json *const node_json) { + MS_EXCEPTION_IF_NULL(anf_node); + MS_EXCEPTION_IF_NULL(node_json); + auto cnode = anf_node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + if (input_idx + 1 >= cnode->size()) { + MS_EXCEPTION(ArgumentError) << "input_idx [" << input_idx << "] is out of index of inputs of [" + << cnode->inputs().size() << "][" << cnode->DebugString() << "]"; + } + + auto input_node = cnode->input(input_idx + 1); + if (!IsValueNode(input_node)) { + return false; + } + + auto tensor = GetValueNode(input_node); + if (tensor == nullptr) { + return false; + } + + auto type_id = tensor->data_type(); + auto *data = tensor->data_c(); + MS_EXCEPTION_IF_NULL(data); + if (tensor->DataDim() > 1 || tensor->DataSize() != 1) { + // not const tensor. + MS_LOG(WARNING) << "We take first value of tensor whose datasize != 1, [" << input_node->DebugString(2) << "]"; + } + + if (type_id == kFloat32->type_id()) { + float *val = static_cast(data); + MS_EXCEPTION_IF_NULL(val); + (*node_json)["value"] = val[0]; + MS_LOG(DEBUG) << "Value of tensor[" << cnode->DebugString() << "] is [float32][" << *val << "]."; + return true; + } else if (type_id == kFloat16->type_id()) { + float16 *val = static_cast(data); + MS_EXCEPTION_IF_NULL(val); + (*node_json)["value"] = static_cast(val[0]); + MS_LOG(INFO) << "Value of tensor[" << cnode->DebugString() << "] is [float16][" << *val << "]."; + return true; + } else if (type_id == kInt32->type_id()) { + int *val = static_cast(data); + MS_EXCEPTION_IF_NULL(val); + (*node_json)["value"] = val[0]; + MS_LOG(INFO) << "Value of tensor[" << cnode->DebugString() << "] is [int32][" << *val << "]."; + return true; + } + MS_LOG(ERROR) << "Unknown value type of tensor[" << cnode->DebugString() << "]"; + return false; +} + +void GetGraphRealOutput(const FuncGraphPtr &func_graph, std::vector> *node_list) { + MS_EXCEPTION_IF_NULL(func_graph); + MS_EXCEPTION_IF_NULL(node_list); + auto output = func_graph->output(); + MS_EXCEPTION_IF_NULL(output); + if (AnfAlgo::IsRealKernel(output)) { + // single output. + node_list->push_back(std::make_pair(output, 0)); + return; + } else if (IsPrimitiveCNode(output, prim::kPrimMakeTuple)) { + auto output_cnode = output->cast(); + MS_EXCEPTION_IF_NULL(output_cnode); + // multi output. + auto &inputs = output_cnode->inputs(); + for (size_t i = 1; i < inputs.size(); ++i) { + auto in_with_idx = AnfAlgo::VisitKernel(inputs[i], 0); + node_list->push_back(in_with_idx); + } + return; + } + MS_EXCEPTION(ArgumentError) << "Unknown output type: " << output->DebugString(2) + << " of graph: " << func_graph->ToString(); +} + +bool IsWeightBoundary(const AnfNodePtr &node) { + if (node->isa()) { + return true; + } + if (node->isa() && AnfAlgo::IsParameterWeight(node->cast())) { + return true; + } + return false; +} + +void MultiThreadCompute(const MultiThreadComputeFunc &func, MultiThreadComputeParams *params, size_t thread_num, + size_t total_compute_size) { + std::vector threads; + threads.reserve(thread_num); + size_t start = 0; + size_t once_compute_size = (total_compute_size + thread_num - 1) / thread_num; + while (start < total_compute_size) { + size_t end = (start + once_compute_size) > total_compute_size ? total_compute_size : (start + once_compute_size); + threads.emplace_back(std::thread(func, params, start, end)); + start += once_compute_size; + } + for (size_t i = 0; i < threads.size(); ++i) { + threads[i].join(); + } +} } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/common_utils.h b/mindspore/ccsrc/kernel/common_utils.h index 47fe96c4c9..e25421c57d 100644 --- a/mindspore/ccsrc/kernel/common_utils.h +++ b/mindspore/ccsrc/kernel/common_utils.h @@ -20,9 +20,12 @@ #include #include #include +#include #include #include #include +#include +#include #include "kernel/kernel.h" #include "kernel/oplib/opinfo.h" #include "kernel/kernel_build_info.h" @@ -69,19 +72,64 @@ class KernelMeta { std::unordered_map kernel_meta_map_; }; +struct SparseGradient { + float *value_; + int *indices_; + size_t indices_size_; +}; + +struct MultiThreadComputeParams { + float *var_; + float *accum_; + float *linear_; + float *m_; + float *m_t_; + float *v_; + float lr_; + float l1_; + float l2_; + float lr_power_; + float beta1_; + float beta2_; + float epsilon_; + SparseGradient sparse_grad_; + size_t var_first_dim_size_; + size_t var_outer_dim_size_; + bool use_nesterov_; +}; +using MultiThreadComputeFunc = std::function; + bool CheckCache(const std::string &kernel_name); KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &processor); KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &processor); TypeId DtypeToTypeId(const std::string &dtypes); -std::string Dtype2String(const std::string &dtypes); std::string Dtype2ShortType(const std::string &dtypes); std::string TypeId2String(TypeId type_id); size_t GetDtypeNbyte(const std::string &dtypes); bool ParseMetadata(const CNodePtr &kernel_node, const std::shared_ptr &op_info_ptr, Processor processor, std::vector> *const kernel_info_list); -bool IsAtomicNode(const CNodePtr &kernel_node); void SaveJsonInfo(const std::string &json_name, const std::string &info); std::string GetProcessor(const AnfNodePtr &anf_node); +bool IsSameShape(const std::vector &shape_a, const std::vector &shape_b); +int Sign(float x); +void DeduplicateIndexedSlices(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim, + size_t outer_dim); +void ReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim, + size_t outer_dim); +std::pair GetKernelInput(const AnfNodePtr &anf_node, size_t index); +std::vector>> GetInputIndex(const std::vector &node_list, + const std::vector &input_list); +std::vector> GetOutputIndex(const std::vector &node_list, + const std::vector &input_list, + const std::vector &output_list); +void GetValidKernelNodes(const FuncGraphPtr &func_graph, std::vector *node_list, + std::vector *input_list, std::vector *output_list); +void GetValidKernelNodes(const FuncGraphPtr &func_graph, std::vector *node_list); +bool GetInputTensorValue(const AnfNodePtr &anf_node, size_t input_idx, nlohmann::json *const node_json); +void GetGraphRealOutput(const FuncGraphPtr &func_graph, std::vector> *node_list); +bool IsWeightBoundary(const AnfNodePtr &node); +void MultiThreadCompute(const MultiThreadComputeFunc &func, MultiThreadComputeParams *params, size_t thread_num, + size_t total_compute_size); } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/cpu/addn_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/addn_cpu_kernel.cc index d0db0c7685..5b3194608e 100644 --- a/mindspore/ccsrc/kernel/cpu/addn_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/addn_cpu_kernel.cc @@ -32,17 +32,17 @@ bool AddNCPUKernel::Launch(const std::vector &inputs, const std::vector &outputs) { auto output_addr = reinterpret_cast(outputs[0]->addr); + size_t offset = 0; for (size_t i = 0; i < output_shape_[0]; ++i) { for (size_t j = 0; j < output_shape_[1]; ++j) { for (size_t k = 0; k < output_shape_[2]; ++k) { for (size_t m = 0; m < output_shape_[3]; ++m) { - auto offset = CPUKernelUtils::CalcOffset(output_shape_, i, j, k, m); float sum = 0; for (size_t index = 0; index < input_num_; ++index) { auto input_addr = reinterpret_cast(inputs[index]->addr); sum += input_addr[offset]; } - output_addr[offset] = sum; + output_addr[offset++] = sum; } } } diff --git a/mindspore/ccsrc/kernel/cpu/allgather_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/allgather_cpu_kernel.cc new file mode 100644 index 0000000000..abb0c65d27 --- /dev/null +++ b/mindspore/ccsrc/kernel/cpu/allgather_cpu_kernel.cc @@ -0,0 +1,53 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "kernel/cpu/allgather_cpu_kernel.h" +#include "device/cpu/cpu_device_address.h" +#include "device/cpu/mpi/mpi_adapter.h" +#include "ir/primitive.h" +#include "utils/log_adapter.h" + +namespace mindspore { +namespace kernel { +namespace { +constexpr auto kRanksGroup = "group"; +constexpr auto kAllGatherInputNum = 1; +} // namespace + +void AllGatherCPUKernel::InitKernel(const CNodePtr &kernel_node) { + size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); + if (input_num != kAllGatherInputNum) { + MS_LOG(EXCEPTION) << "allgather input num:" << input_num; + } + + auto ranks_group = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr(kRanksGroup); + if (ranks_group != nullptr) { + ranks_group_ = GetValue>(ranks_group); + } else { + MS_LOG(EXCEPTION) << "Miss attribute " << kRanksGroup; + } +} + +bool AllGatherCPUKernel::Launch(const std::vector &inputs, + const std::vector & /*workspace*/, + const std::vector &outputs) { + auto input_addr = reinterpret_cast(inputs[0]->addr); + auto output_addr = reinterpret_cast(outputs[0]->addr); + auto input_data_num = inputs[0]->size / sizeof(float); + + return device::cpu::MPIAdapter::Instance().AllGather(input_addr, output_addr, ranks_group_, input_data_num); +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/cpu/allgather_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/allgather_cpu_kernel.h new file mode 100644 index 0000000000..94180fa89b --- /dev/null +++ b/mindspore/ccsrc/kernel/cpu/allgather_cpu_kernel.h @@ -0,0 +1,44 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_ +#include +#include +#include "kernel/cpu/cpu_kernel.h" +#include "kernel/cpu/cpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { +class AllGatherCPUKernel : public CPUKernel { + public: + AllGatherCPUKernel() = default; + ~AllGatherCPUKernel() override = default; + + void InitKernel(const CNodePtr &kernel_node) override; + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs) override; + + private: + std::vector ranks_group_; +}; + +MS_REG_CPU_KERNEL(HostAllGather, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + AllGatherCPUKernel); +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.h index 0ce671f4f5..c0ca581974 100644 --- a/mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.h @@ -42,6 +42,16 @@ MS_REG_CPU_KERNEL(ApplyMomentum, .AddInputAttr(kNumberTypeFloat32) .AddOutputAttr(kNumberTypeFloat32), ApplyMomentumCPUKernel); +MS_REG_CPU_KERNEL(ApplyMomentum, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + ApplyMomentumCPUKernel); } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/cpu/argmax_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/argmax_cpu_kernel.h index 16344d6817..aae7435c5c 100644 --- a/mindspore/ccsrc/kernel/cpu/argmax_cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/argmax_cpu_kernel.h @@ -37,7 +37,7 @@ class ArgmaxCPUKernel : public CPUKernel { size_t batch_size_{0}; }; -MS_REG_CPU_KERNEL(Argmax, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), +MS_REG_CPU_KERNEL(Argmax, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeInt32), ArgmaxCPUKernel); } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/cpu/cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/cpu_kernel.cc index c9d3770c6e..2be05038d6 100644 --- a/mindspore/ccsrc/kernel/cpu/cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/cpu_kernel.cc @@ -37,8 +37,8 @@ void CPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { } void CPUKernel::Init(const CNodePtr &kernel_node) { - InitInputOutputSize(kernel_node); InitKernel(kernel_node); + InitInputOutputSize(kernel_node); } void CPUKernelUtils::ExpandDimsTo4(std::vector *shape) { @@ -66,5 +66,15 @@ size_t CPUKernelUtils::GetElementNumOnAxis(const std::vector &shape, int } return result; } + +void CPUKernelUtils::GetElementNumEveryDim(const std::vector &shape, std::vector *element_num) { + size_t accumulation = 1; + element_num->emplace_back(1); + for (size_t i = shape.size() - 1; i > 0; --i) { + accumulation *= shape[i]; + element_num->emplace_back(accumulation); + } + std::reverse(element_num->begin(), element_num->end()); +} } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/cpu/cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/cpu_kernel.h index 2538459336..0836529840 100644 --- a/mindspore/ccsrc/kernel/cpu/cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/cpu_kernel.h @@ -49,6 +49,7 @@ const char AXIS[] = "axis"; const char BEGIN[] = "begin"; const char END[] = "end"; const char SIZE[] = "size"; +const char USE_NESTEROV[] = "use_nesterov"; class CPUKernel : public kernel::KernelMod { public: @@ -78,6 +79,7 @@ class CPUKernelUtils { static void ExpandDimsTo4(std::vector *shape); static size_t CalcOffset(const std::vector &shape, size_t dim0, size_t dim1, size_t dim2, size_t dim3); static size_t GetElementNumOnAxis(const std::vector &shape, int axis); + static void GetElementNumEveryDim(const std::vector &shape, std::vector *element_num); }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.cc new file mode 100644 index 0000000000..837cb647e3 --- /dev/null +++ b/mindspore/ccsrc/kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.cc @@ -0,0 +1,77 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include "kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.h" +#include "device/cpu/cpu_device_address.h" +#include "device/cpu/mpi/mpi_adapter.h" +#include "ir/primitive.h" + +namespace mindspore { +namespace kernel { +void EmbeddingLookUpCommGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { + CheckParam(kernel_node); + split_num_ = AnfAlgo::GetNodeAttr(kernel_node, "split_num"); + MS_LOG(INFO) << "split_num: " << split_num_; + auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + if (input_shape[0] % split_num_ != 0) { + MS_LOG(EXCEPTION) << "Input shape[0] is " << input_shape[0] << ", but it must be multiple of split_num."; + } +} + +bool EmbeddingLookUpCommGradCPUKernel::Launch(const std::vector &inputs, + const std::vector & /*workspace*/, + const std::vector &outputs) { +#if defined(_WIN32) || defined(_WIN64) + auto start_time = std::chrono::steady_clock::now(); +#else + struct timeval start_time, end_time; + (void)gettimeofday(&start_time, nullptr); +#endif + auto input_addr = reinterpret_cast(inputs[0]->addr); + auto output_addr = reinterpret_cast(outputs[0]->addr); + size_t input_size = inputs[0]->size; + size_t output_size = outputs[0]->size; + MS_LOG(DEBUG) << "input addr: " << input_addr << "input size: " << input_size; + MS_LOG(DEBUG) << "output addr: " << output_addr << "output size: " << output_size; + memset_s(output_addr, output_size, 0, output_size); + const std::vector &rank_group = {0, 1, 2, 3, 4, 5, 6, 7}; + size_t input_split_lens = input_size / split_num_ / sizeof(float_t); + size_t output_split_lens = output_size / split_num_ / sizeof(float_t); + for (int i = 0; i < split_num_; i++) { + device::cpu::MPIAdapter::Instance().AllGather(input_addr + i * input_split_lens, + output_addr + i * output_split_lens, rank_group, input_split_lens); + } +#if defined(_WIN32) || defined(_WIN64) + auto end_time = std::chrono::steady_clock::now(); + std::chrono::duration> cost = end_time - start_time; + MS_LOG(INFO) << "EmbeddingLookUpCommGradCPUKernel, used time: " << cost.count() << " us"; +#else + (void)gettimeofday(&end_time, nullptr); + uint64_t time = 1000000 * static_cast(end_time.tv_sec - start_time.tv_sec); + time += static_cast(end_time.tv_usec - start_time.tv_usec); + MS_LOG(INFO) << "EmbeddingLookUpCommGradCPUKernel, used time: " << time << " us"; +#endif + return true; +} + +void EmbeddingLookUpCommGradCPUKernel::CheckParam(const CNodePtr &kernel_node) { + size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); + if (input_num != 1) { + MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but EmbeddingLookUpCommGradCPUKernel needs 1."; + } +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.h new file mode 100644 index 0000000000..7222bd9be1 --- /dev/null +++ b/mindspore/ccsrc/kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.h @@ -0,0 +1,46 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_COMM_GRAD_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_COMM_GRAD_CPU_KERNEL_H_ +#include +#include +#include "kernel/cpu/cpu_kernel.h" +#include "kernel/cpu/cpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { +class EmbeddingLookUpCommGradCPUKernel : public CPUKernel { + public: + EmbeddingLookUpCommGradCPUKernel() : split_num_(1) {} + ~EmbeddingLookUpCommGradCPUKernel() override{}; + + void InitKernel(const CNodePtr &kernel_node) override; + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs) override; + + private: + void CheckParam(const CNodePtr &kernel_node); + int split_num_; +}; + +MS_REG_CPU_KERNEL(EmbeddingLookupCommGrad, + KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + EmbeddingLookUpCommGradCPUKernel); +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_COMM_GRAD_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.cc new file mode 100644 index 0000000000..e91b5d8109 --- /dev/null +++ b/mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.cc @@ -0,0 +1,208 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include "kernel/cpu/embedding_look_up_cpu_kernel.h" +#include "device/cpu/cpu_device_address.h" +#include "device/cpu/mpi/mpi_adapter.h" +#include "ir/primitive.h" + +namespace mindspore { +namespace kernel { +void EmbeddingLookUpCPUKernel::InitKernel(const CNodePtr &kernel_node) { + CheckParam(kernel_node); + input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + input_lens_ = 1; + for (auto shape : input_shape_) { + input_lens_ = input_lens_ * shape; + } + indices_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + indices_lens_ = 1; + for (auto shape : indices_shape_) { + indices_lens_ = indices_lens_ * shape; + } + output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); + axis_ = 4 - input_shape_.size(); + reduce_scatter_flag_ = AnfAlgo::GetNodeAttr(kernel_node, "reduce_scatter_flag"); +#ifdef ENABLE_MPI + if (reduce_scatter_flag_) { + size_t gatherv2_out_lens = 1; + for (int i = 0; i < SizeToInt(input_shape_.size()); i++) { + if (i == 0) { + for (int j = 0; j < SizeToInt(indices_shape_.size()); j++) { + gatherv2_out_lens = gatherv2_out_lens * indices_shape_[j]; + } + } else { + gatherv2_out_lens = gatherv2_out_lens * input_shape_[i]; + } + } + gatherv2_out_lens_ = gatherv2_out_lens * sizeof(float); + gather_v2_out_ = malloc(gatherv2_out_lens_); + if (gather_v2_out_ == nullptr) { + MS_LOG(EXCEPTION) << "EmbeddingLookUpCPUKernel malloc failed, malloc lens: " << gatherv2_out_lens_; + } + auto ret = memset_s(gather_v2_out_, gatherv2_out_lens_, 0, gatherv2_out_lens_); + if (ret != 0) { + MS_LOG(EXCEPTION) << "EmbeddingLookUpCPUKernel memset gatherv2 out buff failed"; + } + split_num_ = AnfAlgo::GetNodeAttr(kernel_node, "split_num"); + } +#else + if (reduce_scatter_flag_) { + MS_LOG(EXCEPTION) << "Not Enable MPI, please build version with -M on when set reduce_scatter_flag true"; + } +#endif + offset_ = AnfAlgo::GetNodeAttr(kernel_node, "offset"); + CPUKernelUtils::ExpandDimsTo4(&input_shape_); + CPUKernelUtils::ExpandDimsTo4(&output_shape_); +} + +bool EmbeddingLookUpCPUKernel::Launch(const std::vector &inputs, + const std::vector & /*workspace*/, + const std::vector &outputs) { + auto output_addr = reinterpret_cast(outputs[0]->addr); + float *gather_out_addr = reduce_scatter_flag_ ? reinterpret_cast(gather_v2_out_) : output_addr; + size_t dim0 = input_shape_[0]; + size_t dim1 = input_shape_[1]; + size_t dim2 = input_shape_[2]; + if (axis_ == 3) { + for (size_t i = 0; i < dim0; ++i) { + for (size_t j = 0; j < dim1; ++j) { + for (size_t k = 0; k < dim2; ++k) { + LookUpTable(inputs, i, j, k, &gather_out_addr); + } + } + } + } else if (axis_ == 2) { + for (size_t i = 0; i < dim0; ++i) { + for (size_t j = 0; j < dim1; ++j) { + LookUpTable(inputs, i, j, 0, &gather_out_addr); + } + } + } else if (axis_ == 1) { + for (size_t i = 0; i < dim0; ++i) { + LookUpTable(inputs, i, 0, 0, &gather_out_addr); + } + } else if (axis_ == 0) { + LookUpTable(inputs, 0, 0, 0, &gather_out_addr); + } +#ifdef ENABLE_MPI + if (reduce_scatter_flag_) { + size_t one_split_lens = gatherv2_out_lens_ / split_num_ / sizeof(float); + size_t reduce_scatter_out_lens = one_split_lens / 8; + const std::vector &group = {0, 1, 2, 3, 4, 5, 6, 7}; + for (int i = 0; i < split_num_; i++) { + device::cpu::MPIAdapter::Instance().ReduceScatter(reinterpret_cast(gather_v2_out_) + i * one_split_lens, + output_addr + i * reduce_scatter_out_lens, group, + one_split_lens / 8, "sum"); + } + } +#endif + return true; +} + +void LookUpTable_task(const float *input_addr, float *output_addr, int *indices_addr, size_t indices_lens, size_t num, + size_t dim0, size_t dim1, size_t dim2, int offset, size_t axis, std::vector input_shape, + size_t input_lens) { + size_t lens = num * sizeof(float); + for (size_t i = 0; i < indices_lens; ++i) { + int indices = indices_addr[i] - offset; + if (indices >= 0) { + size_t index = IntToSize(indices); + if (index < input_shape[axis]) { + size_t pos = 0; + if (axis == 3) { + pos = CPUKernelUtils::CalcOffset(input_shape, dim0, dim1, dim2, index); + } else if (axis == 2) { + pos = CPUKernelUtils::CalcOffset(input_shape, dim0, dim1, index, 0); + } else if (axis == 1) { + pos = CPUKernelUtils::CalcOffset(input_shape, dim0, index, 0, 0); + } else if (axis == 0) { + pos = CPUKernelUtils::CalcOffset(input_shape, index, 0, 0, 0); + } + + if (pos + num <= input_lens) { + auto ret = memcpy_s(output_addr, lens, input_addr + pos, lens); + if (ret != EOK) { + MS_LOG(EXCEPTION) << "LookUpTable task memcpy failed."; + } + } else { + auto ret = memset_s(output_addr, lens, 0, lens); + if (ret != EOK) { + MS_LOG(EXCEPTION) << "LookUpTable task memset failed."; + } + } + } else { + auto ret = memset_s(output_addr, lens, 0, lens); + if (ret != EOK) { + MS_LOG(EXCEPTION) << "LookUpTable task memset failed."; + } + } + } else { + auto ret = memset_s(output_addr, lens, 0, lens); + if (ret != EOK) { + MS_LOG(EXCEPTION) << "LookUpTable task memset failed."; + } + } + output_addr += num; + } +} + +void EmbeddingLookUpCPUKernel::LookUpTable(const std::vector &inputs, size_t dim0, size_t dim1, + size_t dim2, float **output_addr) { + auto input_addr = reinterpret_cast(inputs[0]->addr); + auto indices_addr = reinterpret_cast(inputs[1]->addr); + size_t num = CPUKernelUtils::GetElementNumOnAxis(input_shape_, axis_); + float *task_out_addr = *output_addr; + const size_t thread_num = 8; + std::thread threads[8]; + size_t task_proc_lens = (indices_lens_ + thread_num - 1) / thread_num; + size_t i; + size_t task_offset = 0; + MS_LOG(DEBUG) << "indices_lens_: " << indices_lens_ << " one task proc lens:" << task_proc_lens; + for (i = 0; i < thread_num; i++) { + if (task_offset >= indices_lens_) { + break; + } + MS_LOG(DEBUG) << "task_offset: " << task_offset << " task_proc_lenss:" << task_proc_lens; + threads[i] = + std::thread(LookUpTable_task, input_addr, task_out_addr + task_offset * num, indices_addr + task_offset, + task_proc_lens, num, dim0, dim1, dim2, offset_, axis_, input_shape_, input_lens_); + task_offset += task_proc_lens; + if (task_offset + task_proc_lens > indices_lens_) { + task_proc_lens = indices_lens_ - task_offset; + } + } + for (size_t j = 0; j < i; j++) { + threads[j].join(); + } + *output_addr += num * indices_lens_; +} + +void EmbeddingLookUpCPUKernel::CheckParam(const CNodePtr &kernel_node) { + auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + if (input_shape.size() > 4) { + MS_LOG(EXCEPTION) << "Input dims is " << input_shape.size() + << ", but EmbeddingLookUpCPUKernel olny support 4d or lower."; + } + + size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); + if (input_num != 2) { + MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but EmbeddingLookUpCPUKernel needs 2."; + } +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.h new file mode 100644 index 0000000000..d839571caa --- /dev/null +++ b/mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.h @@ -0,0 +1,74 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_CPU_KERNEL_H_ +#include +#include +#include "kernel/cpu/cpu_kernel.h" +#include "kernel/cpu/cpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { +class EmbeddingLookUpCPUKernel : public CPUKernel { + public: + EmbeddingLookUpCPUKernel() { + axis_ = 0; + offset_ = 0; + split_num_ = 0; + input_lens_ = 0; + indices_lens_ = 0; + gatherv2_out_lens_ = 0; + reduce_scatter_flag_ = false; + gather_v2_out_ = nullptr; + } + ~EmbeddingLookUpCPUKernel() override { + if (gather_v2_out_ != nullptr) { + free(gather_v2_out_); + gather_v2_out_ = nullptr; + } + } + + void InitKernel(const CNodePtr &kernel_node) override; + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs) override; + + private: + void LookUpTable(const std::vector &inputs, size_t dim0, size_t dim1, size_t dim2, + float **output_addr); + void CheckParam(const CNodePtr &kernel_node); + std::vector input_shape_; + std::vector indices_shape_; + std::vector output_shape_; + int axis_; + int offset_; + int split_num_; + size_t input_lens_; + size_t indices_lens_; + size_t gatherv2_out_lens_; + bool reduce_scatter_flag_; + + void *gather_v2_out_; +}; + +MS_REG_CPU_KERNEL( + EmbeddingLookup, + KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32), + EmbeddingLookUpCPUKernel); +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/kernel/cpu/gather_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/gather_cpu_kernel.cc index cb311043ac..9117a533c8 100644 --- a/mindspore/ccsrc/kernel/cpu/gather_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/gather_cpu_kernel.cc @@ -74,8 +74,8 @@ void GatherV2CPUKernel::CopyDataToOutput(const std::vector & size_t dim2, float **output_addr, size_t *buff_size) { auto input_addr = reinterpret_cast(inputs[0]->addr); auto indices_addr = reinterpret_cast(inputs[1]->addr); - - for (size_t i = 0; i < output_shape_[axis_]; ++i) { + size_t elem_num = inputs[1]->size / 4; + for (size_t i = 0; i < elem_num; ++i) { size_t index = IntToSize(indices_addr[i]); size_t pos = 0; if (axis_ == 3) { diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.cc index dab165e017..0a343785f7 100644 --- a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.cc @@ -22,99 +22,120 @@ namespace mindspore { namespace kernel { void LstmCPUKernel::InitKernel(const CNodePtr &kernel_node) { +#ifdef PLATFORM_86 + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); +#endif MS_EXCEPTION_IF_NULL(kernel_node); + using tag = dnnl::memory::format_tag; + using dim = dnnl::memory::dims; std::vector src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + std::vector src_h_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); + std::vector src_c_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 2); bidirectional_ = AnfAlgo::GetNodeAttr(kernel_node, "bidirectional"); input_size_ = AnfAlgo::GetNodeAttr(kernel_node, "input_size"); hidden_size_ = AnfAlgo::GetNodeAttr(kernel_node, "hidden_size"); num_layers_ = AnfAlgo::GetNodeAttr(kernel_node, "num_layers"); + has_bias_ = AnfAlgo::GetNodeAttr(kernel_node, "has_bias"); batch_size_ = SizeToInt(src_shape[1]); seq_len_ = SizeToInt(src_shape[0]); num_directions_ = 1; if (bidirectional_) { num_directions_ = 2; } - int gate_size = 4 * hidden_size_; + if (num_directions_ * num_layers_ != SizeToInt(src_h_shape[0])) { + MS_LOG(EXCEPTION) << "error iteration shape!"; + } + if (num_layers_ <= 0) { + MS_LOG(EXCEPTION) << "layers must be greater than zero!"; + } + if (src_shape.size() != 3 || src_h_shape.size() != 3 || src_c_shape.size() != 3) { + MS_LOG(EXCEPTION) << "conv2d only support 3-D input!"; + } + const int gate_size = 4 * hidden_size_; for (int i = 0; i < num_layers_; ++i) { weight_size_ += gate_size * (i == 0 ? input_size_ : hidden_size_ * num_directions_); weight_h_size_ += gate_size * hidden_size_; } weight_size_ = weight_size_ * num_directions_; weight_h_size_ = weight_h_size_ * num_directions_; -} - -bool LstmCPUKernel::Launch(const std::vector &inputs, - const std::vector & /*workspace*/, - const std::vector &outputs) { - using dt = dnnl::memory::data_type; - using tag = dnnl::memory::format_tag; - using dim = dnnl::memory::dims; auto eng = MKLKernelEngine::Get().engine(); dnnl::stream s(eng); - auto formatted_md = [](dim dimensions, tag layout) { return dnnl::memory::desc{{dimensions}, dt::f32, layout}; }; dnnl::rnn_direction direction = dnnl::rnn_direction::unidirectional; if (bidirectional_) { direction = dnnl::rnn_direction::bidirectional_concat; } - dim src_dims = {seq_len_, batch_size_, input_size_}; dim src_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; dim src_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; - dim weights_dims = {num_layers_, num_directions_, input_size_, 4, hidden_size_}; - dim weights_h_dims = {num_layers_, num_directions_, hidden_size_, 4, hidden_size_}; - dim bias_dims = {num_layers_, num_directions_, 4, hidden_size_}; + weights_dims_ = {num_layers_, num_directions_, input_size_, 4, hidden_size_}; + weights_h_dims_ = {num_layers_, num_directions_, hidden_size_, 4, hidden_size_}; + bias_dims_ = {num_layers_, num_directions_, 4, hidden_size_}; dim dst_dims = {seq_len_, batch_size_, hidden_size_ * num_directions_}; dim dst_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; dim dst_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; dnnl::memory::desc src_desc = formatted_md(src_dims, tag::tnc); dnnl::memory::desc src_h_desc = formatted_md(src_h_dims, tag::ldnc); dnnl::memory::desc src_c_desc = formatted_md(src_c_dims, tag::ldnc); - dnnl::memory::desc weights_desc = formatted_md(weights_dims, tag::ldigo); - dnnl::memory::desc weights_h_desc = formatted_md(weights_h_dims, tag::ldigo); - dnnl::memory::desc bias_desc = formatted_md(bias_dims, tag::ldgo); + dnnl::memory::desc bias_desc = formatted_md(bias_dims_, tag::ldgo); dnnl::memory::desc dst_desc = formatted_md(dst_dims, tag::tnc); dnnl::memory::desc dst_h_desc = formatted_md(dst_h_dims, tag::ldnc); dnnl::memory::desc dst_c_desc = formatted_md(dst_c_dims, tag::ldnc); - dnnl::lstm_forward::desc desc = - dnnl::lstm_forward::desc(dnnl::prop_kind::forward_training, direction, src_desc, src_h_desc, src_c_desc, - weights_desc, weights_h_desc, bias_desc, dst_desc, dst_h_desc, dst_c_desc); - auto prim_desc = dnnl::lstm_forward::primitive_desc(desc, MKLKernelEngine::Get().engine()); - auto workspace_memory = dnnl::memory(prim_desc.workspace_desc(), eng); - auto src_memory = dnnl::memory(formatted_md(src_dims, tag::tnc), eng); - write_to_dnnl_memory(inputs[0]->addr, src_memory); - - auto src_h_memory = dnnl::memory(prim_desc.src_iter_desc(), eng); - auto src_c_memory = dnnl::memory(prim_desc.src_iter_c_desc(), eng); - write_to_dnnl_memory(inputs[1]->addr, src_h_memory); - write_to_dnnl_memory(inputs[2]->addr, src_c_memory); - - auto weights_memory = dnnl::memory(formatted_md(weights_dims, tag::ldigo), eng); - auto weights_h_memory = dnnl::memory(formatted_md(weights_h_dims, tag::ldigo), eng); - auto bias_memory = dnnl::memory(formatted_md(bias_dims, tag::ldgo), eng); - write_to_dnnl_memory(inputs[3]->addr, weights_memory); - write_to_dnnl_memory(reinterpret_cast(inputs[3]->addr) + weight_size_, weights_h_memory); - write_to_dnnl_memory(reinterpret_cast(inputs[3]->addr) + weight_size_ + weight_h_size_, bias_memory); + auto desc = std::make_shared(dnnl::prop_kind::forward_training, direction, src_desc, + src_h_desc, src_c_desc, formatted_md(weights_dims_, tag::any), + formatted_md(weights_h_dims_, tag::any), bias_desc, dst_desc, + dst_h_desc, dst_c_desc); + prim_desc_ = dnnl::lstm_forward::primitive_desc(*desc, eng); + primitive_ = std::make_shared(prim_desc_); + AddArgument(DNNL_ARG_SRC_LAYER, src_desc); + AddArgument(DNNL_ARG_SRC_ITER, src_h_desc); + AddArgument(DNNL_ARG_SRC_ITER_C, src_c_desc); + AddArgument(DNNL_ARG_WEIGHTS_LAYER, prim_desc_.weights_layer_desc()); + AddArgument(DNNL_ARG_WEIGHTS_ITER, prim_desc_.weights_iter_desc()); + AddArgument(DNNL_ARG_BIAS, bias_desc); + AddArgument(DNNL_ARG_DST_LAYER, dst_desc); + AddArgument(DNNL_ARG_DST_ITER, dst_h_desc); + AddArgument(DNNL_ARG_DST_ITER_C, dst_c_desc); + AddArgument(DNNL_ARG_WORKSPACE, prim_desc_.workspace_desc()); +} - auto dst_memory = dnnl::memory(formatted_md(dst_dims, tag::tnc), eng); - auto dst_h_memory = dnnl::memory(prim_desc.dst_iter_desc(), eng); - auto dst_c_memory = dnnl::memory(prim_desc.dst_iter_c_desc(), eng); - dnnl::lstm_forward fw_layer(prim_desc); - workspace_memory.set_data_handle(outputs[3]->addr); - dst_memory.set_data_handle(outputs[0]->addr); - dst_h_memory.set_data_handle(outputs[1]->addr); - dst_c_memory.set_data_handle(outputs[2]->addr); - fw_layer.execute(s, {{DNNL_ARG_SRC_LAYER, src_memory}, - {DNNL_ARG_SRC_ITER, src_h_memory}, - {DNNL_ARG_SRC_ITER_C, src_c_memory}, - {DNNL_ARG_WEIGHTS_LAYER, weights_memory}, - {DNNL_ARG_WEIGHTS_ITER, weights_h_memory}, - {DNNL_ARG_BIAS, bias_memory}, - {DNNL_ARG_DST_LAYER, dst_memory}, - {DNNL_ARG_DST_ITER, dst_h_memory}, - {DNNL_ARG_DST_ITER_C, dst_c_memory}, - {DNNL_ARG_WORKSPACE, workspace_memory}}); +bool LstmCPUKernel::Launch(const std::vector &inputs, + const std::vector & /*workspace*/, + const std::vector &outputs) { + using dt = dnnl::memory::data_type; + using tag = dnnl::memory::format_tag; + auto eng = MKLKernelEngine::Get().engine(); + auto user_weights_memory = dnnl::memory(dnnl::memory::desc{{weights_dims_}, dt::f32, tag::ldgoi}, eng); + auto user_weights_h_memory = dnnl::memory(dnnl::memory::desc{{weights_h_dims_}, dt::f32, tag::ldgoi}, eng); + auto weights_memory = dnnl::memory(prim_desc_.weights_layer_desc(), eng); + auto weights_h_memory = dnnl::memory(prim_desc_.weights_iter_desc(), eng); + user_weights_memory.set_data_handle(inputs[3]->addr); + user_weights_h_memory.set_data_handle(reinterpret_cast(inputs[3]->addr) + weight_size_); + Reorder(&user_weights_memory, &weights_memory); + Reorder(&user_weights_h_memory, &weights_h_memory); + auto bias_memory = dnnl::memory(prim_desc_.bias_desc(), eng); + if (has_bias_) { + bias_memory.set_data_handle(reinterpret_cast(inputs[3]->addr) + weight_size_ + weight_h_size_); + } else { + auto ret = + memset_s(bias_memory.get_data_handle(), prim_desc_.bias_desc().get_size(), 0, prim_desc_.bias_desc().get_size()); + if (ret != 0) { + MS_LOG(EXCEPTION) << "bias memset error"; + } + } + // set handle + SetArgumentHandle(DNNL_ARG_SRC_LAYER, inputs[0]->addr); + SetArgumentHandle(DNNL_ARG_SRC_ITER, inputs[1]->addr); + SetArgumentHandle(DNNL_ARG_SRC_ITER_C, inputs[2]->addr); + SetArgumentHandle(DNNL_ARG_WEIGHTS_LAYER, weights_memory.get_data_handle()); + SetArgumentHandle(DNNL_ARG_WEIGHTS_ITER, weights_h_memory.get_data_handle()); + SetArgumentHandle(DNNL_ARG_BIAS, bias_memory.get_data_handle()); + SetArgumentHandle(DNNL_ARG_DST_LAYER, outputs[0]->addr); + SetArgumentHandle(DNNL_ARG_DST_ITER, outputs[1]->addr); + SetArgumentHandle(DNNL_ARG_DST_ITER_C, outputs[2]->addr); + SetArgumentHandle(DNNL_ARG_WORKSPACE, outputs[3]->addr); + ExecutePrimitive(); return true; } - } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.h index 6cb9a1ff74..d42ff803f0 100644 --- a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.h @@ -14,8 +14,14 @@ * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_KERNEL_CPU_LSTM_CPU_KERNEL_H -#define MINDSPORE_CCSRC_KERNEL_CPU_LSTM_CPU_KERNEL_H +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_LSTM_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_LSTM_CPU_KERNEL_H_ +#if defined(__x86_64__) || defined(__amd64__) || defined(_M_IX86) || defined(_M_X64) +#define PLATFORM_86 +#endif +#ifdef PLATFORM_86 +#include +#endif #include #include #include "kernel/cpu/mkldnn/mkl_cpu_kernel.h" @@ -40,6 +46,11 @@ class LstmCPUKernel : public MKLCPUKernel { int seq_len_; int num_directions_; bool bidirectional_; + bool has_bias_; + dnnl::memory::dims weights_dims_; + dnnl::memory::dims weights_h_dims_; + dnnl::memory::dims bias_dims_; + dnnl::lstm_forward::primitive_desc prim_desc_; }; MS_REG_CPU_KERNEL(LSTM, diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.cc index df4744db6f..d7e7701d85 100644 --- a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.cc @@ -24,39 +24,41 @@ namespace mindspore { namespace kernel { - void LSTMGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); + using tag = dnnl::memory::format_tag; + using dim = dnnl::memory::dims; + auto eng = MKLKernelEngine::Get().engine(); std::vector src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + std::vector src_h_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); + std::vector src_c_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 2); bidirectional_ = AnfAlgo::GetNodeAttr(kernel_node, "bidirectional"); input_size_ = AnfAlgo::GetNodeAttr(kernel_node, "input_size"); hidden_size_ = AnfAlgo::GetNodeAttr(kernel_node, "hidden_size"); num_layers_ = AnfAlgo::GetNodeAttr(kernel_node, "num_layers"); + has_bias_ = AnfAlgo::GetNodeAttr(kernel_node, "has_bias"); batch_size_ = SizeToInt(src_shape[1]); seq_len_ = SizeToInt(src_shape[0]); num_directions_ = 1; if (bidirectional_) { num_directions_ = 2; } - int gate_size = 4 * hidden_size_; + if (num_directions_ * num_layers_ != SizeToInt(src_h_shape[0])) { + MS_LOG(EXCEPTION) << "error iteration shape!"; + } + if (num_layers_ <= 0) { + MS_LOG(EXCEPTION) << "layers must be greater than zero!"; + } + if (src_shape.size() != 3 || src_h_shape.size() != 3 || src_c_shape.size() != 3) { + MS_LOG(EXCEPTION) << "conv2d only support 3-D input!"; + } + const int gate_size = 4 * hidden_size_; for (int i = 0; i < num_layers_; ++i) { weight_size_ += gate_size * (i == 0 ? input_size_ : hidden_size_ * num_directions_); weight_h_size_ += gate_size * hidden_size_; } weight_size_ = weight_size_ * num_directions_; weight_h_size_ = weight_h_size_ * num_directions_; -} - -bool LSTMGradCPUKernel::Launch(const std::vector &inputs, - const std::vector &workspace /*workspace*/, - const std::vector &outputs) { - using tag = dnnl::memory::format_tag; - using dt = dnnl::memory::data_type; - using dim = dnnl::memory::dims; - auto eng = MKLKernelEngine::Get().engine(); - dnnl::stream s(eng); - auto formatted_md = [](dim dimensions, tag layout) { return dnnl::memory::desc{{dimensions}, dt::f32, layout}; }; - auto generic_md = [](dim dimensions) { return dnnl::memory::desc{{dimensions}, dt::f32, tag::any}; }; dnnl::rnn_direction direction = dnnl::rnn_direction::unidirectional; if (bidirectional_) { direction = dnnl::rnn_direction::bidirectional_concat; @@ -64,105 +66,130 @@ bool LSTMGradCPUKernel::Launch(const std::vector &inputs, dim src_dims = {seq_len_, batch_size_, input_size_}; dim src_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; dim src_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; - dim weights_dims = {num_layers_, num_directions_, input_size_, 4, hidden_size_}; - dim weights_h_dims = {num_layers_, num_directions_, hidden_size_, 4, hidden_size_}; - dim bias_dims = {num_layers_, num_directions_, 4, hidden_size_}; + weights_dims_ = {num_layers_, num_directions_, input_size_, 4, hidden_size_}; + weights_h_dims_ = {num_layers_, num_directions_, hidden_size_, 4, hidden_size_}; + bias_dims_ = {num_layers_, num_directions_, 4, hidden_size_}; dim dst_dims = {seq_len_, batch_size_, hidden_size_ * num_directions_}; dim dst_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; dim dst_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; - dnnl::memory::desc src_desc = formatted_md(src_dims, tag::tnc); dnnl::memory::desc src_h_desc = formatted_md(src_h_dims, tag::ldnc); dnnl::memory::desc src_c_desc = formatted_md(src_c_dims, tag::ldnc); - dnnl::memory::desc weights_desc = formatted_md(weights_dims, tag::ldigo); - dnnl::memory::desc weights_h_desc = formatted_md(weights_h_dims, tag::ldigo); - dnnl::memory::desc bias_desc = formatted_md(bias_dims, tag::ldgo); + dnnl::memory::desc bias_desc = formatted_md(bias_dims_, tag::ldgo); dnnl::memory::desc dst_desc = formatted_md(dst_dims, tag::tnc); dnnl::memory::desc dst_h_desc = formatted_md(dst_h_dims, tag::ldnc); dnnl::memory::desc dst_c_desc = formatted_md(dst_c_dims, tag::ldnc); + auto forward_desc = std::make_shared( + dnnl::prop_kind::forward_training, direction, src_desc, src_h_desc, src_c_desc, + formatted_md(weights_dims_, tag::any), formatted_md(weights_h_dims_, tag::any), bias_desc, dst_desc, dst_h_desc, + dst_c_desc); + auto prim_forward_desc = dnnl::lstm_forward::primitive_desc(*forward_desc, eng); + auto backward_desc = std::make_shared( + dnnl::prop_kind::backward, direction, src_desc, src_h_desc, src_c_desc, formatted_md(weights_dims_, tag::any), + formatted_md(weights_h_dims_, tag::any), bias_desc, dst_desc, dst_h_desc, dst_c_desc, src_desc, src_h_desc, + src_c_desc, formatted_md(weights_dims_, tag::any), formatted_md(weights_h_dims_, tag::any), bias_desc, dst_desc, + dst_h_desc, dst_c_desc); + prim_backward_desc_ = dnnl::lstm_backward::primitive_desc(*backward_desc, eng, prim_forward_desc); + primitive_ = std::make_shared(prim_backward_desc_); - dnnl::lstm_forward::desc forward_desc = - dnnl::lstm_forward::desc(dnnl::prop_kind::forward_training, direction, src_desc, src_h_desc, src_c_desc, - weights_desc, weights_h_desc, bias_desc, dst_desc, dst_h_desc, dst_c_desc); - auto prim_forward_desc = dnnl::lstm_forward::primitive_desc(forward_desc, eng); + AddArgument(DNNL_ARG_SRC_LAYER, src_desc); + AddArgument(DNNL_ARG_SRC_ITER, src_h_desc); + AddArgument(DNNL_ARG_SRC_ITER_C, src_c_desc); + AddArgument(DNNL_ARG_WEIGHTS_LAYER, prim_backward_desc_.weights_layer_desc()); + AddArgument(DNNL_ARG_WEIGHTS_ITER, prim_backward_desc_.weights_iter_desc()); + AddArgument(DNNL_ARG_BIAS, bias_desc); + AddArgument(DNNL_ARG_DST_LAYER, dst_desc); + AddArgument(DNNL_ARG_DST_ITER, dst_h_desc); + AddArgument(DNNL_ARG_DST_ITER_C, dst_c_desc); + AddArgument(DNNL_ARG_WORKSPACE, prim_forward_desc.workspace_desc()); + AddArgument(DNNL_ARG_DIFF_SRC_LAYER, src_desc); + AddArgument(DNNL_ARG_DIFF_SRC_ITER, src_h_desc); + AddArgument(DNNL_ARG_DIFF_SRC_ITER_C, src_c_desc); + AddArgument(DNNL_ARG_DIFF_WEIGHTS_LAYER, prim_backward_desc_.diff_weights_layer_desc()); + AddArgument(DNNL_ARG_DIFF_WEIGHTS_ITER, prim_backward_desc_.diff_weights_iter_desc()); + AddArgument(DNNL_ARG_DIFF_BIAS, bias_desc); + AddArgument(DNNL_ARG_DIFF_DST_LAYER, dst_desc); + AddArgument(DNNL_ARG_DIFF_DST_ITER, dst_h_desc); + AddArgument(DNNL_ARG_DIFF_DST_ITER_C, dst_c_desc); +} - dnnl::lstm_backward::desc backward_desc = dnnl::lstm_backward::desc( - dnnl::prop_kind::backward, direction, src_desc, src_h_desc, src_c_desc, generic_md(weights_dims), - generic_md(weights_h_dims), generic_md(bias_dims), dst_desc, dst_h_desc, dst_c_desc, src_desc, src_h_desc, - src_c_desc, weights_desc, weights_h_desc, bias_desc, dst_desc, dst_h_desc, dst_c_desc); - auto prim_backward_desc = dnnl::lstm_backward::primitive_desc(backward_desc, eng, prim_forward_desc); +bool LSTMGradCPUKernel::Launch(const std::vector &inputs, + const std::vector &workspace /*workspace*/, + const std::vector &outputs) { + using dt = dnnl::memory::data_type; + using tag = dnnl::memory::format_tag; + auto eng = MKLKernelEngine::Get().engine(); // construct fw memory - auto src_memory = dnnl::memory(formatted_md(src_dims, tag::tnc), eng); - write_to_dnnl_memory(inputs[0]->addr, src_memory); - - auto src_h_memory = dnnl::memory(prim_forward_desc.src_iter_desc(), eng); - auto src_c_memory = dnnl::memory(prim_forward_desc.src_iter_c_desc(), eng); - write_to_dnnl_memory(inputs[1]->addr, src_h_memory); - write_to_dnnl_memory(inputs[2]->addr, src_c_memory); - - auto user_weights_memory = dnnl::memory(formatted_md(weights_dims, tag::ldigo), eng); - auto user_weights_h_memory = dnnl::memory(formatted_md(weights_h_dims, tag::ldigo), eng); - auto user_bias_memory = dnnl::memory(formatted_md(bias_dims, tag::ldgo), eng); - write_to_dnnl_memory(inputs[3]->addr, user_weights_memory); - write_to_dnnl_memory(reinterpret_cast(inputs[3]->addr) + weight_size_, user_weights_h_memory); - write_to_dnnl_memory(reinterpret_cast(inputs[3]->addr) + weight_size_ + weight_h_size_, user_bias_memory); - auto weights_memory = dnnl::memory(prim_backward_desc.weights_layer_desc(), eng); - auto weights_h_memory = dnnl::memory(prim_backward_desc.weights_iter_desc(), eng); - auto bias_memory = dnnl::memory(prim_forward_desc.bias_desc(), eng); - dnnl::reorder(user_weights_memory, weights_memory).execute(s, user_weights_memory, weights_memory); - dnnl::reorder(user_weights_h_memory, weights_h_memory).execute(s, user_weights_h_memory, weights_h_memory); - dnnl::reorder(user_bias_memory, bias_memory).execute(s, user_bias_memory, bias_memory); - - auto dst_memory = dnnl::memory(formatted_md(dst_dims, tag::tnc), eng); - write_to_dnnl_memory(reinterpret_cast(inputs[4]->addr), dst_memory); - auto dst_h_memory = dnnl::memory(prim_backward_desc.dst_iter_desc(), eng); - write_to_dnnl_memory(reinterpret_cast(inputs[5]->addr), dst_h_memory); - auto dst_c_memory = dnnl::memory(prim_backward_desc.dst_iter_c_desc(), eng); - write_to_dnnl_memory(reinterpret_cast(inputs[6]->addr), dst_c_memory); - auto workspace_memory = dnnl::memory(prim_forward_desc.workspace_desc(), eng); - write_to_dnnl_memory(inputs[10]->addr, workspace_memory); - - // construct diff memory - auto diff_src_memory = dnnl::memory(formatted_md(src_dims, tag::tnc), eng); - auto diff_src_h_memory = dnnl::memory(prim_backward_desc.diff_src_iter_desc(), eng); - auto diff_src_c_memory = dnnl::memory(prim_backward_desc.diff_src_iter_c_desc(), eng); - - auto diff_weights_memory = dnnl::memory(prim_backward_desc.diff_weights_layer_desc(), eng); - auto diff_weights_h_memory = dnnl::memory(prim_backward_desc.diff_weights_iter_desc(), eng); - auto diff_bias_memory = dnnl::memory(prim_backward_desc.diff_bias_desc(), eng); - auto diff_dst_memory = dnnl::memory(formatted_md(dst_dims, tag::tnc), eng); - write_to_dnnl_memory(reinterpret_cast(inputs[7]->addr), diff_dst_memory); - auto diff_dst_h_memory = dnnl::memory(prim_backward_desc.diff_dst_iter_desc(), eng); - write_to_dnnl_memory(reinterpret_cast(inputs[8]->addr), diff_dst_h_memory); - auto diff_dst_c_memory = dnnl::memory(prim_backward_desc.diff_dst_iter_c_desc(), eng); - write_to_dnnl_memory(reinterpret_cast(inputs[9]->addr), diff_dst_c_memory); - - diff_src_memory.set_data_handle(outputs[0]->addr); - diff_src_h_memory.set_data_handle(outputs[1]->addr); - diff_src_c_memory.set_data_handle(outputs[2]->addr); - diff_weights_memory.set_data_handle(outputs[3]->addr); - diff_weights_h_memory.set_data_handle(reinterpret_cast(outputs[3]->addr) + weight_size_); - diff_bias_memory.set_data_handle(reinterpret_cast(outputs[3]->addr) + weight_size_ + weight_h_size_); - dnnl::lstm_backward bwd_layer(prim_backward_desc); - bwd_layer.execute(s, {{DNNL_ARG_SRC_LAYER, src_memory}, - {DNNL_ARG_SRC_ITER, src_h_memory}, - {DNNL_ARG_SRC_ITER_C, src_c_memory}, - {DNNL_ARG_WEIGHTS_LAYER, weights_memory}, - {DNNL_ARG_WEIGHTS_ITER, weights_h_memory}, - {DNNL_ARG_BIAS, bias_memory}, - {DNNL_ARG_DST_LAYER, dst_memory}, - {DNNL_ARG_DST_ITER, dst_h_memory}, - {DNNL_ARG_DST_ITER_C, dst_c_memory}, - {DNNL_ARG_DIFF_SRC_LAYER, diff_src_memory}, - {DNNL_ARG_DIFF_SRC_ITER, diff_src_h_memory}, - {DNNL_ARG_DIFF_SRC_ITER_C, diff_src_c_memory}, - {DNNL_ARG_DIFF_WEIGHTS_LAYER, diff_weights_memory}, - {DNNL_ARG_DIFF_WEIGHTS_ITER, diff_weights_h_memory}, - {DNNL_ARG_DIFF_BIAS, diff_bias_memory}, - {DNNL_ARG_DIFF_DST_LAYER, diff_dst_memory}, - {DNNL_ARG_DIFF_DST_ITER, diff_dst_h_memory}, - {DNNL_ARG_DIFF_DST_ITER_C, diff_dst_c_memory}, - {DNNL_ARG_WORKSPACE, workspace_memory}}); + auto user_weights_memory = dnnl::memory(dnnl::memory::desc{{weights_dims_}, dt::f32, tag::ldgoi}, eng); + auto user_weights_h_memory = dnnl::memory(dnnl::memory::desc{{weights_h_dims_}, dt::f32, tag::ldgoi}, eng); + auto weights_memory = dnnl::memory(prim_backward_desc_.weights_layer_desc(), eng); + auto weights_h_memory = dnnl::memory(prim_backward_desc_.weights_iter_desc(), eng); + auto bias_memory = dnnl::memory(prim_backward_desc_.bias_desc(), eng); + user_weights_memory.set_data_handle(inputs[3]->addr); + user_weights_h_memory.set_data_handle(reinterpret_cast(inputs[3]->addr) + weight_size_); + Reorder(&user_weights_memory, &weights_memory); + Reorder(&user_weights_h_memory, &weights_h_memory); + if (has_bias_) { + bias_memory.set_data_handle(reinterpret_cast(inputs[3]->addr) + weight_size_ + weight_h_size_); + } else { + if (memset_s(bias_memory.get_data_handle(), prim_backward_desc_.bias_desc().get_size(), 0, + prim_backward_desc_.bias_desc().get_size())) { + MS_LOG(EXCEPTION) << "bias memset error"; + } + } + // construct bw memory + auto diff_weights_memory = dnnl::memory(prim_backward_desc_.diff_weights_layer_desc(), eng); + auto diff_weights_h_memory = dnnl::memory(prim_backward_desc_.diff_weights_iter_desc(), eng); + auto diff_bias_memory = dnnl::memory(prim_backward_desc_.diff_bias_desc(), eng); + auto user_diff_weights_memory = dnnl::memory(dnnl::memory::desc{{weights_dims_}, dt::f32, tag::ldgoi}, eng); + auto user_diff_weights_h_memory = dnnl::memory(dnnl::memory::desc{{weights_h_dims_}, dt::f32, tag::ldgoi}, eng); + user_diff_weights_memory.set_data_handle(outputs[3]->addr); + user_diff_weights_h_memory.set_data_handle(reinterpret_cast(outputs[3]->addr) + weight_size_); + if (memset_s(user_diff_weights_memory.get_data_handle(), user_diff_weights_memory.get_desc().get_size(), 0, + user_diff_weights_memory.get_desc().get_size())) { + MS_LOG(EXCEPTION) << "user weights grad memset error"; + } + if (memset_s(user_diff_weights_h_memory.get_data_handle(), user_diff_weights_h_memory.get_desc().get_size(), 0, + user_diff_weights_h_memory.get_desc().get_size())) { + MS_LOG(EXCEPTION) << "user weights iter grad memset error"; + } + if (has_bias_) { + diff_bias_memory.set_data_handle(reinterpret_cast(outputs[3]->addr) + weight_size_ + weight_h_size_); + } + if (memset_s(diff_bias_memory.get_data_handle(), prim_backward_desc_.diff_bias_desc().get_size(), 0, + prim_backward_desc_.diff_bias_desc().get_size())) { + MS_LOG(EXCEPTION) << "bias grad memset error"; + } + if (memset_s(diff_weights_memory.get_data_handle(), diff_weights_memory.get_desc().get_size(), 0, + diff_weights_memory.get_desc().get_size())) { + MS_LOG(EXCEPTION) << "weights grad memset error"; + } + if (memset_s(diff_weights_h_memory.get_data_handle(), diff_weights_h_memory.get_desc().get_size(), 0, + diff_weights_h_memory.get_desc().get_size())) { + MS_LOG(EXCEPTION) << "weights iter grad memset error"; + } + SetArgumentHandle(DNNL_ARG_SRC_LAYER, inputs[0]->addr); + SetArgumentHandle(DNNL_ARG_SRC_ITER, inputs[1]->addr); + SetArgumentHandle(DNNL_ARG_SRC_ITER_C, inputs[2]->addr); + SetArgumentHandle(DNNL_ARG_WEIGHTS_LAYER, weights_memory.get_data_handle()); + SetArgumentHandle(DNNL_ARG_WEIGHTS_ITER, weights_h_memory.get_data_handle()); + SetArgumentHandle(DNNL_ARG_BIAS, bias_memory.get_data_handle()); + SetArgumentHandle(DNNL_ARG_DST_LAYER, inputs[4]->addr); + SetArgumentHandle(DNNL_ARG_DST_ITER, inputs[5]->addr); + SetArgumentHandle(DNNL_ARG_DST_ITER_C, inputs[6]->addr); + SetArgumentHandle(DNNL_ARG_WORKSPACE, inputs[10]->addr); + SetArgumentHandle(DNNL_ARG_DIFF_SRC_LAYER, outputs[0]->addr); + SetArgumentHandle(DNNL_ARG_DIFF_SRC_ITER, outputs[1]->addr); + SetArgumentHandle(DNNL_ARG_DIFF_SRC_ITER_C, outputs[2]->addr); + SetArgumentHandle(DNNL_ARG_DIFF_WEIGHTS_LAYER, diff_weights_memory.get_data_handle()); + SetArgumentHandle(DNNL_ARG_DIFF_WEIGHTS_ITER, diff_weights_h_memory.get_data_handle()); + SetArgumentHandle(DNNL_ARG_DIFF_BIAS, diff_bias_memory.get_data_handle()); + SetArgumentHandle(DNNL_ARG_DIFF_DST_LAYER, inputs[7]->addr); + SetArgumentHandle(DNNL_ARG_DIFF_DST_ITER, inputs[8]->addr); + SetArgumentHandle(DNNL_ARG_DIFF_DST_ITER_C, inputs[9]->addr); + ExecutePrimitive(); + Reorder(&diff_weights_memory, &user_diff_weights_memory); + Reorder(&diff_weights_h_memory, &user_diff_weights_h_memory); return true; } } // namespace kernel diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.h index 22ec1f62db..1f3fb824c0 100644 --- a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.h @@ -41,6 +41,11 @@ class LSTMGradCPUKernel : public MKLCPUKernel { int seq_len_; int num_directions_; bool bidirectional_; + bool has_bias_; + dnnl::memory::dims weights_dims_; + dnnl::memory::dims weights_h_dims_; + dnnl::memory::dims bias_dims_; + dnnl::lstm_backward::primitive_desc prim_backward_desc_; }; MS_REG_CPU_KERNEL(LSTMGrad, @@ -63,5 +68,4 @@ MS_REG_CPU_KERNEL(LSTMGrad, LSTMGradCPUKernel); } // namespace kernel } // namespace mindspore - #endif // MINDSPORE_CCSRC_KERNEL_CPU_LSTM_GRAD_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.cc index 17fca72698..a38470e3a3 100644 --- a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.cc @@ -98,11 +98,9 @@ void MKLCPUKernel::SetArgumentHandle(int arg_key, void *ptr) { } void MKLCPUKernel::ExecutePrimitive() { MKLKernelEngine::Get().Execute(primitive_, arguments_); } -void MKLCPUKernel::write_to_dnnl_memory(void *handle, const dnnl::memory &mem) { - MKLKernelEngine::Get().write_to_dnnl_memory(handle, mem); -} -void MKLCPUKernel::read_from_dnnl_memory(void *handle, const dnnl::memory &mem) { - MKLKernelEngine::Get().read_from_dnnl_memory(handle, mem); + +void MKLCPUKernel::Reorder(dnnl::memory *src_mem, dnnl::memory *dst_mem) { + MKLKernelEngine::Get().Reorder(src_mem, dst_mem); } } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.h index a6b8d68627..10a860afff 100644 --- a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.h @@ -39,10 +39,12 @@ class MKLCPUKernel : public CPUKernel { dnnl::memory::format_tag GetDefaultFormatTag(const dnnl::memory::dims &dims) const; dnnl::memory::desc GetDefaultMemDesc(const std::vector &shape); void ExecutePrimitive(); - void write_to_dnnl_memory(void *handle, const dnnl::memory &mem); - void read_from_dnnl_memory(void *handle, const dnnl::memory &mem); std::unordered_map arguments_; std::shared_ptr primitive_{nullptr}; + inline dnnl::memory::desc formatted_md(const dnnl::memory::dims &dimensions, dnnl::memory::format_tag layout) { + return dnnl::memory::desc{{dimensions}, dnnl::memory::data_type::f32, layout}; + } + void Reorder(dnnl::memory *src_mem, dnnl::memory *dst_mem); }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.cc index f5270a4e9a..5ae9791b12 100644 --- a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.cc +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.cc @@ -33,5 +33,8 @@ dnnl::memory MKLKernelEngine::CreateMemory(const dnnl::memory::desc &mem_desc, b return dnnl::memory(mem_desc, engine_, nullptr); } } +void MKLKernelEngine::Reorder(dnnl::memory *src_mem, dnnl::memory *dst_mem) { + dnnl::reorder(*src_mem, *dst_mem).execute(stream_, *src_mem, *dst_mem); +} } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.h b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.h index b0eaaf405f..99e7ecdfe0 100644 --- a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.h +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.h @@ -41,30 +41,7 @@ class MKLKernelEngine { void Execute(const std::shared_ptr &primitive, const std::unordered_map &arguments); - - inline void read_from_dnnl_memory(void *handle, const dnnl::memory &mem) { - dnnl::engine eng = mem.get_engine(); - size_t bytes = mem.get_desc().get_size(); - if (eng.get_kind() == dnnl::engine::kind::cpu) { - auto dst = reinterpret_cast(handle); - uint8_t *src = reinterpret_cast(mem.get_data_handle()); - for (size_t i = 0; i < bytes; ++i) { - dst[i] = src[i]; - } - } - } - // Read from handle, write to memory - inline void write_to_dnnl_memory(void *handle, const dnnl::memory &mem) { - dnnl::engine eng = mem.get_engine(); - size_t bytes = mem.get_desc().get_size(); - if (eng.get_kind() == dnnl::engine::kind::cpu) { - auto src = reinterpret_cast(handle); - uint8_t *dst = reinterpret_cast(mem.get_data_handle()); - for (size_t i = 0; i < bytes; ++i) { - dst[i] = src[i]; - } - } - } + void Reorder(dnnl::memory *src_mem, dnnl::memory *dst_mem); private: MKLKernelEngine() : engine_(dnnl::engine::kind::cpu, 0), stream_(engine_) {} diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.cc new file mode 100644 index 0000000000..05b1a79924 --- /dev/null +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.cc @@ -0,0 +1,99 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "kernel/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.h" +#include +#include +#include +#include "kernel/cpu/mkldnn/mkl_kernel_engine.h" +#include "device/cpu/cpu_device_address.h" +#include "common/utils.h" + +namespace mindspore { +namespace kernel { +void SoftmaxCrossEntropyWithLogitsCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { + CPUKernel::InitInputOutputSize(kernel_node); + MS_EXCEPTION_IF_NULL(kernel_node); + size_t type_size = sizeof(float); + std::vector shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + size_t tensor_size = std::accumulate(shape.begin(), shape.end(), type_size, std::multiplies()); + workspace_size_list_.emplace_back(tensor_size); +} + +void SoftmaxCrossEntropyWithLogitsCPUKernel::InitKernel(const CNodePtr &kernel_node) { + MS_EXCEPTION_IF_NULL(kernel_node); + std::vector shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + dnnl::memory::dims mem_dims; + mem_dims.insert(mem_dims.end(), shape.begin(), shape.end()); + if (mem_dims.size() != 2) { + MS_LOG(EXCEPTION) << "SoftmaxCrossEntropyWithLogits kernel dims invalid " << mem_dims.size(); + } + batch_size_ = shape[0]; + class_num_ = shape[1]; + if (batch_size_ == 0 || class_num_ == 0) { + MS_LOG(EXCEPTION) << "invalid batch size or class num input!"; + } + dnnl::memory::desc mem_desc(mem_dims, dnnl::memory::data_type::f32, dnnl::memory::format_tag::nc); + + dnnl::softmax_forward::desc desc = dnnl::softmax_forward::desc(dnnl::prop_kind::forward_training, mem_desc, 1); + auto prim_desc = dnnl::softmax_forward::primitive_desc(desc, MKLKernelEngine::Get().engine()); + primitive_ = std::make_shared(prim_desc); + + AddArgument(DNNL_ARG_SRC, mem_desc); + AddArgument(DNNL_ARG_DST, mem_desc); +} + +void SoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const float *logits, const float *labels, + float *output1, float *output2) const { + float epsilon = 1e-6; + for (size_t i = 0; i < batch_size_; ++i) { + output1[i] = 0; + float loss = 0.0; + for (size_t j = 0; j < class_num_; ++j) { + float logit = logf(logits[i * class_num_ + j] <= 0.0 ? epsilon : logits[i * class_num_ + j]); + output2[i * class_num_ + j] = logits[i * class_num_ + j] - labels[i * class_num_ + j]; + loss += labels[i * class_num_ + j] * logit; + } + output1[i] = -loss; + } +} + +bool SoftmaxCrossEntropyWithLogitsCPUKernel::Launch(const std::vector &inputs, + const std::vector &workspace, + const std::vector &outputs) { + if (inputs.empty() || workspace.empty() || outputs.empty()) { + MS_LOG(EXCEPTION) << "error input output size!"; + } + size_t batch_float_size = batch_size_ * sizeof(float); + size_t batch_class_float_size = class_num_ * batch_float_size; + if (inputs[0]->size != workspace[0]->size || inputs[0]->size != batch_class_float_size || + inputs[1]->size != batch_class_float_size) { + MS_LOG(EXCEPTION) << "error input data size!"; + } + if (outputs[1]->size != batch_class_float_size || outputs[0]->size != batch_float_size) { + MS_LOG(EXCEPTION) << "error output data size!"; + } + SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr); + SetArgumentHandle(DNNL_ARG_DST, workspace[0]->addr); + ExecutePrimitive(); + auto labels = reinterpret_cast(inputs[1]->addr); + auto logits = reinterpret_cast(workspace[0]->addr); + auto output1 = reinterpret_cast(outputs[0]->addr); + auto output2 = reinterpret_cast(outputs[1]->addr); + ForwardPostExecute(logits, labels, output1, output2); + return true; +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.h new file mode 100644 index 0000000000..f663508059 --- /dev/null +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.h @@ -0,0 +1,53 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_CPU_KERNEL_H_ + +#include +#include +#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h" + +namespace mindspore { +namespace kernel { +class SoftmaxCrossEntropyWithLogitsCPUKernel : public MKLCPUKernel { + public: + SoftmaxCrossEntropyWithLogitsCPUKernel() = default; + ~SoftmaxCrossEntropyWithLogitsCPUKernel() override = default; + + void InitKernel(const CNodePtr &kernel_node) override; + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs) override; + + protected: + void InitInputOutputSize(const CNodePtr &kernel_node) override; + + private: + void ForwardPostExecute(const float *logits, const float *labels, float *output1, float *output2) const; + size_t class_num_{0}; + size_t batch_size_{0}; +}; +MS_REG_CPU_KERNEL(SoftmaxCrossEntropyWithLogits, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + SoftmaxCrossEntropyWithLogitsCPUKernel); +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_KERNEL_CPU_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/kernel/cpu/reduce_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/reduce_cpu_kernel.cc new file mode 100644 index 0000000000..b12371c933 --- /dev/null +++ b/mindspore/ccsrc/kernel/cpu/reduce_cpu_kernel.cc @@ -0,0 +1,161 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include "kernel/cpu/reduce_cpu_kernel.h" +#include "device/cpu/cpu_device_address.h" + +namespace mindspore { +namespace kernel { +const size_t kReduceTypeMax = 0; +const size_t kReduceTypeMean = 1; +const size_t kReduceTypeSum = 2; +const size_t kMaxDim = 100; +void ReduceCPUKernel::InitKernel(const CNodePtr &kernel_node) { + MS_EXCEPTION_IF_NULL(kernel_node); + std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); + if (kernel_name == "ReduceMax") { + reduce_type_ = kReduceTypeMax; + } else if (kernel_name == "ReduceMean") { + reduce_type_ = kReduceTypeMean; + } else if (kernel_name == "ReduceSum") { + reduce_type_ = kReduceTypeSum; + } else { + MS_LOG(EXCEPTION) << "Array reduce kernel type " << kernel_name << " is not supported."; + } + shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + auto axis_addr = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr(AXIS); + if (axis_addr->isa()) { + auto attr_axis = AnfAlgo::GetNodeAttr>(kernel_node, AXIS); + if (attr_axis.size() > shape_.size()) { + MS_LOG(EXCEPTION) << "invalid axis size: " << axis_.size(); + } else if (attr_axis.empty()) { + axis_.push_back(shape_.size() - 1); + } else { + for (auto axis : attr_axis) { + if (IntToSize(axis) >= (shape_.size())) { + MS_LOG(EXCEPTION) << "axis value is oversize."; + } + axis < 0 ? axis_.push_back(axis + shape_.size()) : axis_.push_back(axis); + } + } + } else if (axis_addr->isa()) { + int axis = AnfAlgo::GetNodeAttr(kernel_node, AXIS); + + if (axis >= 0 && IntToSize(axis) >= shape_.size()) { + MS_LOG(EXCEPTION) << "axis value is oversize."; + } + axis < 0 ? axis_.push_back(axis + shape_.size()) : axis_.push_back(axis); + } else { + MS_LOG(EXCEPTION) << "Attribute axis type is invalid."; + } + for (size_t i = 0; i < shape_.size(); ++i) { + if (shape_[i] <= 0) { + MS_LOG(EXCEPTION) << "shape value is invalid."; + } + left_dims_ *= shape_[i]; + } + for (size_t i = 0; i < axis_.size(); ++i) { + stride_ *= shape_[axis_[i]]; + } + if (stride_ <= 0) { + MS_LOG(EXCEPTION) << "stride_ must greater than zero."; + } + left_dims_ = left_dims_ / stride_; +} +bool ReduceCPUKernel::Launch(const std::vector &inputs, + const std::vector & /*workspaces*/, + const std::vector &outputs) { + if (inputs.empty() || outputs.empty()) { + MS_LOG(EXCEPTION) << "input or output empty!"; + } + size_t out_float_size = left_dims_ * sizeof(float); + size_t in_float_size = stride_ * out_float_size; + if (inputs[0]->size != in_float_size || outputs[0]->size != out_float_size) { + MS_LOG(EXCEPTION) << "invalid input or output data size!"; + } + auto input = reinterpret_cast(inputs[0]->addr); + auto output = reinterpret_cast(outputs[0]->addr); + int size = inputs[0]->size / sizeof(float); + std::vector new_input(IntToSize(size), 0.0); + std::vector transpose_axis; + for (size_t i = 0; i < shape_.size(); ++i) { + bool insert = true; + for (size_t j = 0; j < axis_.size(); ++j) { + if (axis_[j] == i) { + insert = false; + break; + } + } + if (insert) { + transpose_axis.push_back(i); + } + } + (void)transpose_axis.insert(transpose_axis.end(), axis_.begin(), axis_.end()); + Transpose(size, input, shape_, transpose_axis, SizeToInt(shape_.size()), &new_input[0]); + if (reduce_type_ == kReduceTypeMax) { + for (size_t i = 0; i < left_dims_; ++i) { + float value = new_input[i * stride_]; + for (size_t k = 0; k < stride_; ++k) { + if (value < new_input[i * stride_ + k]) { + value = new_input[i * stride_ + k]; + } + } + output[i] = value; + } + } else { + for (size_t i = 0; i < left_dims_; ++i) { + float value = 0.0; + for (size_t k = 0; k < stride_; ++k) { + value += new_input[i * stride_ + k]; + } + if (reduce_type_ == kReduceTypeMean) { + output[i] = value / stride_; + } else { + output[i] = value; + } + } + } + return true; +} +void ReduceCPUKernel::Transpose(const int size, const float *input, const std::vector &input_shape, + const std::vector &input_axis, const int shape_size, float *output) { + int pos_array[kMaxDim]; + int size_offset[kMaxDim]; + size_offset[0] = size / SizeToInt(input_shape[0]); + for (int i = 1; i < shape_size; i++) { + size_offset[i] = size_offset[i - 1] / SizeToInt(input_shape[i]); + } + for (int position = 0; position < size; position += 1) { + int temp_position = position; + pos_array[0] = temp_position / size_offset[0]; + for (int i = 1; i < shape_size; i++) { + temp_position -= pos_array[i - 1] * size_offset[i - 1]; + pos_array[i] = temp_position / size_offset[i]; + } + int new_position = pos_array[SizeToInt(input_axis[shape_size - 1])]; + int new_position_size = 1; + for (int j = shape_size - 2; j >= 0; j--) { + new_position_size *= SizeToInt(input_shape[SizeToInt(input_axis[j + 1])]); + new_position += pos_array[SizeToInt(input_axis[j])] * new_position_size; + } + output[new_position] = input[position]; + } + return; +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/cpu/reduce_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/reduce_cpu_kernel.h new file mode 100644 index 0000000000..27d28ba3bd --- /dev/null +++ b/mindspore/ccsrc/kernel/cpu/reduce_cpu_kernel.h @@ -0,0 +1,52 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_CPU_KERNEL_H_ +#include +#include +#include +#include "kernel/cpu/cpu_kernel.h" +#include "kernel/cpu/cpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { +class ReduceCPUKernel : public CPUKernel { + public: + ReduceCPUKernel() = default; + ~ReduceCPUKernel() override = default; + void InitKernel(const CNodePtr &kernel_node) override; + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs) override; + + private: + void Transpose(const int size, const float *input, const std::vector &input_shape, + const std::vector &input_axis, const int shape_size, float *output); + size_t reduce_type_; + std::vector axis_; + std::vector shape_; + size_t left_dims_ = 1; + size_t stride_ = 1; +}; +MS_REG_CPU_KERNEL(ReduceMean, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + ReduceCPUKernel); +MS_REG_CPU_KERNEL(ReduceMax, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + ReduceCPUKernel); +MS_REG_CPU_KERNEL(ReduceSum, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + ReduceCPUKernel); + +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/kernel/cpu/reduce_scatter_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/reduce_scatter_cpu_kernel.cc new file mode 100644 index 0000000000..fd8a74eb6b --- /dev/null +++ b/mindspore/ccsrc/kernel/cpu/reduce_scatter_cpu_kernel.cc @@ -0,0 +1,54 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "kernel/cpu/reduce_scatter_cpu_kernel.h" +#include "device/cpu/cpu_device_address.h" +#include "device/cpu/mpi/mpi_adapter.h" +#include "ir/primitive.h" + +namespace mindspore { +namespace kernel { +namespace { +constexpr auto kRanksGroup = "group"; +} // namespace + +ReduceScatterCPUKernel::ReduceScatterCPUKernel() : op_type_(device::cpu::kOpTypeSum) {} + +void ReduceScatterCPUKernel::InitKernel(const CNodePtr &kernel_node) { + auto op = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("op"); + if (op != nullptr) { + op_type_ = GetValue(op); + } + + auto ranks_group = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr(kRanksGroup); + if (ranks_group != nullptr) { + ranks_group_ = GetValue>(ranks_group); + } else { + MS_LOG(EXCEPTION) << "Miss attribute " << kRanksGroup; + } +} + +bool ReduceScatterCPUKernel::Launch(const std::vector &inputs, + const std::vector & /*workspace*/, + const std::vector &outputs) { + auto input_addr = reinterpret_cast(inputs[0]->addr); + auto output_addr = reinterpret_cast(outputs[0]->addr); + auto output_data_num = outputs[0]->size / sizeof(float); + + return device::cpu::MPIAdapter::Instance().ReduceScatter(input_addr, output_addr, ranks_group_, output_data_num, + op_type_); +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/cpu/reduce_scatter_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/reduce_scatter_cpu_kernel.h new file mode 100644 index 0000000000..c3bfe571a4 --- /dev/null +++ b/mindspore/ccsrc/kernel/cpu/reduce_scatter_cpu_kernel.h @@ -0,0 +1,45 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_ +#include +#include +#include "kernel/cpu/cpu_kernel.h" +#include "kernel/cpu/cpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { +class ReduceScatterCPUKernel : public CPUKernel { + public: + ReduceScatterCPUKernel(); + ~ReduceScatterCPUKernel() override = default; + + void InitKernel(const CNodePtr &kernel_node) override; + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs) override; + + private: + std::string op_type_; + std::vector ranks_group_; +}; + +MS_REG_CPU_KERNEL(HostReduceScatter, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + ReduceScatterCPUKernel); +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/kernel/cpu/slice_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/slice_cpu_kernel.cc index b1565425e0..d2530430e9 100644 --- a/mindspore/ccsrc/kernel/cpu/slice_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/slice_cpu_kernel.cc @@ -23,7 +23,6 @@ void SliceCPUKernel::InitKernel(const CNodePtr &kernel_node) { CheckParam(kernel_node); input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); - CPUKernelUtils::ExpandDimsTo4(&output_shape_); begin_ = AnfAlgo::GetNodeAttr>(kernel_node, BEGIN); for (size_t i = 0; i < begin_.size(); i++) { @@ -61,6 +60,15 @@ void SliceCPUKernel::InitKernel(const CNodePtr &kernel_node) { end_.emplace_back(begin_[i] + sizes[i]); } } + + ExpandAllMemberDims(); + CPUKernelUtils::GetElementNumEveryDim(input_shape_, &input_element_num_); + CPUKernelUtils::GetElementNumEveryDim(output_shape_, &output_element_num_); +} + +void SliceCPUKernel::ExpandAllMemberDims() { + CPUKernelUtils::ExpandDimsTo4(&output_shape_); + auto input_len = input_shape_.size(); if (input_len < 4) { for (size_t i = 0; i < 4 - input_len; ++i) { @@ -78,12 +86,40 @@ bool SliceCPUKernel::Launch(const std::vector &inputs, auto input_addr = reinterpret_cast(inputs[0]->addr); auto output_addr = reinterpret_cast(outputs[0]->addr); - for (int i = begin_[0]; i < end_[0]; i += strides_[0]) { - for (int j = begin_[1]; j < end_[1]; j += strides_[1]) { - for (int k = begin_[2]; k < end_[2]; k += strides_[2]) { + bool can_copy_memory[3] = {CanCopyMemoryOnAxis(0), CanCopyMemoryOnAxis(1), CanCopyMemoryOnAxis(2)}; + size_t in_start_offset[3] = {begin_[0] * input_element_num_[0], begin_[1] * input_element_num_[1], + begin_[2] * input_element_num_[2]}; + size_t in_step_size[3] = {strides_[0] * input_element_num_[0], strides_[1] * input_element_num_[1], + strides_[2] * input_element_num_[2]}; + + auto in_n_offset = in_start_offset[0]; + auto out_n_offset = 0; + for (int i = begin_[0]; i < end_[0]; + i += strides_[0], in_n_offset += in_step_size[0], out_n_offset += output_element_num_[0]) { + if (can_copy_memory[0]) { + CopyDataToOutput(inputs, in_n_offset, outputs, out_n_offset, input_element_num_[0]); + continue; + } + auto in_c_offset = in_start_offset[1]; + auto out_c_offset = 0; + for (int j = begin_[1]; j < end_[1]; + j += strides_[1], in_c_offset += in_step_size[1], out_c_offset += output_element_num_[1]) { + if (can_copy_memory[1]) { + CopyDataToOutput(inputs, in_n_offset + in_c_offset, outputs, out_n_offset + out_c_offset, + input_element_num_[1]); + continue; + } + auto in_h_offset = in_start_offset[2]; + auto out_h_offset = 0; + for (int k = begin_[2]; k < end_[2]; + k += strides_[2], in_h_offset += in_step_size[2], out_h_offset += output_element_num_[2]) { + if (can_copy_memory[2]) { + CopyDataToOutput(inputs, in_n_offset + in_c_offset + in_h_offset, outputs, + out_n_offset + out_c_offset + out_h_offset, input_element_num_[2]); + continue; + } for (int m = begin_[3]; m < end_[3]; m += strides_[3]) { - auto offset = CPUKernelUtils::CalcOffset(input_shape_, i, j, k, m); - *output_addr++ = input_addr[offset]; + *output_addr++ = input_addr[in_n_offset + in_c_offset + in_h_offset + m]; } } } @@ -92,7 +128,38 @@ bool SliceCPUKernel::Launch(const std::vector &inputs, return true; } -void SliceCPUKernel::CheckParam(const CNodePtr &kernel_node) { +bool SliceCPUKernel::CanCopyMemoryOnAxis(size_t dim) const { + for (size_t i = dim + 1; i < 4; ++i) { + if (begin_[i] != 0 || end_[i] != SizeToInt(input_shape_[i]) || strides_[i] != 1) { + return false; + } + } + return true; +} + +void SliceCPUKernel::CopyDataToOutput(const std::vector &inputs, size_t in_offset, + const std::vector &outputs, size_t out_offset, + size_t copy_num) const { + auto input_addr = reinterpret_cast(inputs[0]->addr); + auto in_buff_size = inputs[0]->size; + auto output_addr = reinterpret_cast(outputs[0]->addr); + auto out_buff_size = outputs[0]->size; + + if ((in_offset + copy_num) * sizeof(float) > in_buff_size) { + MS_LOG(EXCEPTION) << "input memory out of bounds."; + } + if ((out_offset + copy_num) * sizeof(float) > out_buff_size) { + MS_LOG(EXCEPTION) << "output memory out of bounds."; + } + + auto ret = memcpy_s(output_addr + out_offset, out_buff_size - out_offset * sizeof(float), input_addr + in_offset, + copy_num * sizeof(float)); + if (ret != EOK) { + MS_LOG(EXCEPTION) << "memcpy failed. ret:" << ret; + } +} + +void SliceCPUKernel::CheckParam(const CNodePtr &kernel_node) const { size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 1) { MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but SliceCPUKernel needs 1 inputs."; diff --git a/mindspore/ccsrc/kernel/cpu/slice_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/slice_cpu_kernel.h index 788c4f39ad..913c993d7a 100644 --- a/mindspore/ccsrc/kernel/cpu/slice_cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/slice_cpu_kernel.h @@ -33,12 +33,18 @@ class SliceCPUKernel : public CPUKernel { const std::vector &outputs) override; private: - void CheckParam(const CNodePtr &kernel_node); + void ExpandAllMemberDims(); + bool CanCopyMemoryOnAxis(size_t dim) const; + void CopyDataToOutput(const std::vector &inputs, size_t in_offset, + const std::vector &outputs, size_t out_offset, size_t copy_num) const; + void CheckParam(const CNodePtr &kernel_node) const; std::vector begin_; std::vector end_; std::vector strides_; std::vector input_shape_; + std::vector input_element_num_; std::vector output_shape_; + std::vector output_element_num_; }; MS_REG_CPU_KERNEL(Slice, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), diff --git a/mindspore/ccsrc/kernel/cpu/slice_grad_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/slice_grad_cpu_kernel.cc index 2a61a0259a..92eaffe8c6 100644 --- a/mindspore/ccsrc/kernel/cpu/slice_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/slice_grad_cpu_kernel.cc @@ -21,13 +21,13 @@ namespace mindspore { namespace kernel { void SliceGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { CheckParam(kernel_node); - output_dx_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); - input_dy_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); + input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); begin_ = AnfAlgo::GetNodeAttr>(kernel_node, BEGIN); for (size_t i = 0; i < begin_.size(); i++) { if (begin_[i] < 0) { - begin_[i] = begin_[i] + output_dx_shape_[i]; + begin_[i] = begin_[i] + output_shape_[i]; } } @@ -37,35 +37,43 @@ void SliceGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { if (strides != nullptr) { strides_ = AnfAlgo::GetNodeAttr>(kernel_node, STRIDES); end_ = AnfAlgo::GetNodeAttr>(kernel_node, END); - if (strides_.size() != end_.size() || strides_.size() != output_dx_shape_.size()) { + if (strides_.size() != end_.size() || strides_.size() != output_shape_.size()) { MS_LOG(EXCEPTION) << "stride|end|input size must be equal"; } for (size_t i = 0; i < strides_.size(); ++i) { if (strides_[i] < 0) { - strides_[i] = (strides_[i] + output_dx_shape_[i]) > 0 ? (strides_[i] + output_dx_shape_[i]) : 0; + strides_[i] = (strides_[i] + output_shape_[i]) > 0 ? (strides_[i] + output_shape_[i]) : 0; } if (end_[i] < 0) { - end_[i] = (end_[i] + output_dx_shape_[i]) > 0 ? (end_[i] + output_dx_shape_[i]) : 0; + end_[i] = (end_[i] + output_shape_[i]) > 0 ? (end_[i] + output_shape_[i]) : 0; } } } else { auto sizes = AnfAlgo::GetNodeAttr>(kernel_node, SIZE); - if (sizes.size() != output_dx_shape_.size() || begin_.size() != output_dx_shape_.size()) { + if (sizes.size() != output_shape_.size() || begin_.size() != output_shape_.size()) { MS_LOG(EXCEPTION) << "begin|size|input size must be equal"; } for (size_t i = 0; i < sizes.size(); ++i) { if (sizes[i] < 0) { - sizes[i] = (sizes[i] + output_dx_shape_[i]) > 0 ? (sizes[i] + output_dx_shape_[i]) : 0; + sizes[i] = (sizes[i] + output_shape_[i]) > 0 ? (sizes[i] + output_shape_[i]) : 0; } strides_.emplace_back(1); end_.emplace_back(begin_[i] + sizes[i]); } } - CPUKernelUtils::ExpandDimsTo4(&output_dx_shape_); - auto input_len = input_dy_shape_.size(); - if (input_len < 4) { - for (size_t i = 0; i < 4 - input_len; ++i) { - input_dy_shape_.insert(input_dy_shape_.begin(), 1); + + ExpandAllMemberDims(); + CPUKernelUtils::GetElementNumEveryDim(input_shape_, &input_element_num_); + CPUKernelUtils::GetElementNumEveryDim(output_shape_, &output_element_num_); +} + +void SliceGradCPUKernel::ExpandAllMemberDims() { + CPUKernelUtils::ExpandDimsTo4(&input_shape_); + + auto output_len = output_shape_.size(); + if (output_len < 4) { + for (size_t i = 0; i < 4 - output_len; ++i) { + output_shape_.insert(output_shape_.begin(), 1); begin_.insert(begin_.begin(), 0); strides_.insert(strides_.begin(), 1); end_.insert(end_.begin(), 1); @@ -76,22 +84,49 @@ void SliceGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { bool SliceGradCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { - auto input_dy_addr = reinterpret_cast(inputs[0]->addr); - auto output_dx_addr = reinterpret_cast(outputs[0]->addr); + auto input_addr = reinterpret_cast(inputs[0]->addr); + auto output_addr = reinterpret_cast(outputs[0]->addr); - auto out_size = sizeof(float) * output_dx_shape_[0] * output_dx_shape_[1] * output_dx_shape_[2] * output_dx_shape_[3]; - auto ret = memset_s(output_dx_addr, out_size, 0, out_size); + auto ret = memset_s(output_addr, outputs[0]->size, 0, outputs[0]->size); if (ret != EOK) { - MS_LOG(ERROR) << "output buff memset fail."; + MS_LOG(ERROR) << "output buff memset fail. ret:" << ret; return false; } - for (int i = begin_[0]; i < end_[0]; i += strides_[0]) { - for (int j = begin_[1]; j < end_[1]; j += strides_[1]) { - for (int k = begin_[2]; k < end_[2]; k += strides_[2]) { + bool can_copy_memory[3] = {CanCopyMemoryOnAxis(0), CanCopyMemoryOnAxis(1), CanCopyMemoryOnAxis(2)}; + size_t out_start_offset[3] = {begin_[0] * output_element_num_[0], begin_[1] * output_element_num_[1], + begin_[2] * output_element_num_[2]}; + size_t out_step_size[3] = {strides_[0] * output_element_num_[0], strides_[1] * output_element_num_[1], + strides_[2] * output_element_num_[2]}; + + auto in_n_offset = 0; + auto out_n_offset = out_start_offset[0]; + for (int i = begin_[0]; i < end_[0]; + i += strides_[0], in_n_offset += input_element_num_[0], out_n_offset += out_step_size[0]) { + if (can_copy_memory[0]) { + CopyDataToOutput(inputs, in_n_offset, outputs, out_n_offset, input_element_num_[0]); + continue; + } + auto in_c_offset = 0; + auto out_c_offset = out_start_offset[1]; + for (int j = begin_[1]; j < end_[1]; + j += strides_[1], in_c_offset += input_element_num_[1], out_c_offset += out_step_size[1]) { + if (can_copy_memory[1]) { + CopyDataToOutput(inputs, in_n_offset + in_c_offset, outputs, out_n_offset + out_c_offset, + input_element_num_[1]); + continue; + } + auto in_h_offset = 0; + auto out_h_offset = out_start_offset[2]; + for (int k = begin_[2]; k < end_[2]; + k += strides_[2], in_h_offset += input_element_num_[2], out_h_offset += out_step_size[2]) { + if (can_copy_memory[2]) { + CopyDataToOutput(inputs, in_n_offset + in_c_offset + in_h_offset, outputs, + out_n_offset + out_c_offset + out_h_offset, input_element_num_[2]); + continue; + } for (int m = begin_[3]; m < end_[3]; m += strides_[3]) { - auto offset = CPUKernelUtils::CalcOffset(output_dx_shape_, i, j, k, m); - output_dx_addr[offset] = *input_dy_addr++; + output_addr[out_n_offset + out_c_offset + out_h_offset + m] = *input_addr++; } } } @@ -99,7 +134,38 @@ bool SliceGradCPUKernel::Launch(const std::vector &inputs, return true; } -void SliceGradCPUKernel::CheckParam(const CNodePtr &kernel_node) { +bool SliceGradCPUKernel::CanCopyMemoryOnAxis(size_t dim) const { + for (size_t i = dim + 1; i < 4; ++i) { + if (begin_[i] != 0 || end_[i] != SizeToInt(output_shape_[i]) || strides_[i] != 1) { + return false; + } + } + return true; +} + +void SliceGradCPUKernel::CopyDataToOutput(const std::vector &inputs, size_t in_offset, + const std::vector &outputs, size_t out_offset, + size_t copy_num) const { + auto input_addr = reinterpret_cast(inputs[0]->addr); + auto in_buff_size = inputs[0]->size; + auto output_addr = reinterpret_cast(outputs[0]->addr); + auto out_buff_size = outputs[0]->size; + + if ((in_offset + copy_num) * sizeof(float) > in_buff_size) { + MS_LOG(EXCEPTION) << "input memory out of bounds."; + } + if ((out_offset + copy_num) * sizeof(float) > out_buff_size) { + MS_LOG(EXCEPTION) << "output memory out of bounds."; + } + + auto ret = memcpy_s(output_addr + out_offset, out_buff_size - out_offset * sizeof(float), input_addr + in_offset, + copy_num * sizeof(float)); + if (ret != EOK) { + MS_LOG(EXCEPTION) << "memcpy failed. ret:" << ret; + } +} + +void SliceGradCPUKernel::CheckParam(const CNodePtr &kernel_node) const { size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); if (output_num != 1) { MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but SliceGradGpuKernel needs 1 output."; diff --git a/mindspore/ccsrc/kernel/cpu/slice_grad_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/slice_grad_cpu_kernel.h index 847208e4bb..1e42c8ac68 100644 --- a/mindspore/ccsrc/kernel/cpu/slice_grad_cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/slice_grad_cpu_kernel.h @@ -33,12 +33,18 @@ class SliceGradCPUKernel : public CPUKernel { const std::vector &outputs) override; private: - void CheckParam(const CNodePtr &kernel_node); + void ExpandAllMemberDims(); + bool CanCopyMemoryOnAxis(size_t dim) const; + void CopyDataToOutput(const std::vector &inputs, size_t in_offset, + const std::vector &outputs, size_t out_offset, size_t copy_num) const; + void CheckParam(const CNodePtr &kernel_node) const; std::vector begin_; std::vector end_; std::vector strides_; - std::vector input_dy_shape_; - std::vector output_dx_shape_; + std::vector input_shape_; + std::vector input_element_num_; + std::vector output_shape_; + std::vector output_element_num_; }; MS_REG_CPU_KERNEL( diff --git a/mindspore/ccsrc/kernel/cpu/sparse_apply_adam_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/sparse_apply_adam_cpu_kernel.cc new file mode 100644 index 0000000000..5e2fc79576 --- /dev/null +++ b/mindspore/ccsrc/kernel/cpu/sparse_apply_adam_cpu_kernel.cc @@ -0,0 +1,177 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "kernel/cpu/sparse_apply_adam_cpu_kernel.h" +#include "kernel/common_utils.h" +#include "device/cpu/cpu_device_address.h" + +namespace mindspore { +namespace kernel { +namespace { +constexpr size_t kSparseApplyAdamInputSize = 11; + +void ComputeAdam(MultiThreadComputeParams *input_params, size_t start, size_t end) { + MS_EXCEPTION_IF_NULL(input_params); + auto m = input_params->m_; + auto m_t = input_params->m_t_; + auto v = input_params->v_; + auto beta1 = input_params->beta1_; + auto beta2 = input_params->beta2_; + auto use_nesterov = input_params->use_nesterov_; + auto unique_sparse_grad = input_params->sparse_grad_; + auto var_first_dim_size = input_params->var_first_dim_size_; + auto var_outer_dim_size = input_params->var_outer_dim_size_; + for (size_t i = start; i < end; ++i) { + int index = unique_sparse_grad.indices_[i]; + if (index < 0 || IntToSize(index) >= var_first_dim_size) { + MS_LOG(EXCEPTION) << "Index " << index << " in indices is out of range after unique process"; + } + size_t start_index = var_outer_dim_size * index; + size_t end_index = start_index + var_outer_dim_size; + for (size_t j = start_index, k = var_outer_dim_size * i; j < end_index; ++j, ++k) { + auto summed_grad = unique_sparse_grad.value_[k]; + m[j] += (1 - beta1) * summed_grad; + v[j] += (1 - beta2) * summed_grad * summed_grad; + if (use_nesterov) { + m_t[j] = m[j] * beta1 + (1 - beta1) * summed_grad; + } + } + } +} + +void ComputeMomentum(MultiThreadComputeParams *input_params, size_t start, size_t end) { + MS_EXCEPTION_IF_NULL(input_params); + auto m = input_params->m_; + auto v = input_params->v_; + auto beta1 = input_params->beta1_; + auto beta2 = input_params->beta2_; + for (size_t i = start; i < end; ++i) { + m[i] *= beta1; + v[i] *= beta2; + } +} + +void ComputeWeight(MultiThreadComputeParams *input_params, size_t start, size_t end) { + MS_EXCEPTION_IF_NULL(input_params); + auto var = input_params->var_; + auto m = input_params->m_; + auto v = input_params->v_; + auto lr = input_params->lr_; + auto epsilon = input_params->epsilon_; + for (size_t i = start; i < end; ++i) { + var[i] -= lr * m[i] / (std::sqrt(v[i]) + epsilon); + } +} +} // namespace + +void SparseApplyAdamCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { + CPUKernel::InitInputOutputSize(kernel_node); + MS_EXCEPTION_IF_NULL(kernel_node); + workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float)); + workspace_size_list_.emplace_back(indices_size_ * sizeof(int)); +} + +void SparseApplyAdamCPUKernel::InitKernel(const CNodePtr &kernel_node) { + MS_EXCEPTION_IF_NULL(kernel_node); + std::vector var_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + std::vector m_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + std::vector v_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); + std::vector grad_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 9); + std::vector indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 10); + if (!IsSameShape(var_shape, m_shape)) { + MS_LOG(EXCEPTION) << "var and m should have the same shape"; + } + if (!IsSameShape(var_shape, v_shape)) { + MS_LOG(EXCEPTION) << "var and v should have the same shape"; + } + if (var_shape.empty()) { + MS_LOG(EXCEPTION) << "var must be at least 1D"; + } + var_first_dim_size_ = var_shape[0]; + for (size_t i = 1; i < var_shape.size(); ++i) { + if (var_shape[i] != grad_shape[i]) { + MS_LOG(EXCEPTION) << "The shape of var and grad must equal in dimension " << i; + } + var_outer_dim_size_ *= var_shape[i]; + } + if (indices_shape.size() != 1) { + MS_LOG(EXCEPTION) << "indices must be 1D"; + } + indices_size_ = indices_shape[0]; + if (grad_shape[0] != indices_size_) { + MS_LOG(EXCEPTION) << "The first dimension of grad shape must be equal to indices"; + } + if (AnfAlgo::HasNodeAttr(USE_NESTEROV, kernel_node)) { + use_nesterov_ = AnfAlgo::GetNodeAttr(kernel_node, "use_nesterov"); + } +} + +bool SparseApplyAdamCPUKernel::Launch(const std::vector &inputs, + const std::vector &workspace, + const std::vector & /*outputs*/) { + if (inputs.size() < kSparseApplyAdamInputSize) { + MS_LOG(EXCEPTION) << "Error input size!"; + } + + auto var = reinterpret_cast(inputs[0]->addr); + auto m = reinterpret_cast(inputs[1]->addr); + auto v = reinterpret_cast(inputs[2]->addr); + auto beta1_power = reinterpret_cast(inputs[3]->addr)[0]; + if (beta1_power == 1) { + MS_LOG(EXCEPTION) << "The beta1_power should not be 1"; + } + auto beta2_power = reinterpret_cast(inputs[4]->addr)[0]; + auto lr = reinterpret_cast(inputs[5]->addr)[0]; + auto beta1 = reinterpret_cast(inputs[6]->addr)[0]; + auto beta2 = reinterpret_cast(inputs[7]->addr)[0]; + auto epsilon = reinterpret_cast(inputs[8]->addr)[0]; + auto grad = reinterpret_cast(inputs[9]->addr); + auto indices = reinterpret_cast(inputs[10]->addr); + auto new_grad = reinterpret_cast(workspace[0]->addr); + auto new_indices = reinterpret_cast(workspace[1]->addr); + + SparseGradient unique_sparse_grad({new_grad, new_indices, indices_size_}); + ReduceSparseGradient(SparseGradient({grad, indices, indices_size_}), &unique_sparse_grad, var_first_dim_size_, + var_outer_dim_size_); + size_t total_dim_size = var_first_dim_size_ * var_outer_dim_size_; + lr = lr * std::sqrt(1 - beta2_power) / (1 - beta1_power); + + MultiThreadComputeParams input_params; + input_params.m_ = m; + input_params.v_ = v; + input_params.beta1_ = beta1; + input_params.beta2_ = beta2; + const size_t kThreadNum = 16; + MultiThreadCompute(ComputeMomentum, &input_params, kThreadNum, total_dim_size); + + std::vector m_t(m, m + total_dim_size); + input_params.m_t_ = m_t.data(); + input_params.use_nesterov_ = use_nesterov_; + input_params.sparse_grad_ = unique_sparse_grad; + input_params.var_first_dim_size_ = var_first_dim_size_; + input_params.var_outer_dim_size_ = var_outer_dim_size_; + MultiThreadCompute(ComputeAdam, &input_params, kThreadNum, unique_sparse_grad.indices_size_); + + if (use_nesterov_) { + input_params.m_ = input_params.m_t_; + } + input_params.var_ = var; + input_params.lr_ = lr; + input_params.epsilon_ = epsilon; + MultiThreadCompute(ComputeWeight, &input_params, kThreadNum, total_dim_size); + return true; +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/cpu/sparse_apply_adam_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/sparse_apply_adam_cpu_kernel.h new file mode 100644 index 0000000000..c2770d0ebd --- /dev/null +++ b/mindspore/ccsrc/kernel/cpu/sparse_apply_adam_cpu_kernel.h @@ -0,0 +1,63 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_APPLY_ADAM_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_APPLY_ADAM_CPU_KERNEL_H_ + +#include +#include +#include "kernel/cpu/cpu_kernel.h" +#include "kernel/cpu/cpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { +class SparseApplyAdamCPUKernel : public CPUKernel { + public: + SparseApplyAdamCPUKernel() = default; + ~SparseApplyAdamCPUKernel() override = default; + + void InitKernel(const CNodePtr &kernel_node) override; + void InitInputOutputSize(const CNodePtr &kernel_node) override; + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs) override; + + private: + size_t indices_size_{0}; + size_t var_first_dim_size_{0}; + size_t var_outer_dim_size_{1}; + bool use_nesterov_{false}; +}; + +MS_REG_CPU_KERNEL(SparseApplyAdam, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + SparseApplyAdamCPUKernel); +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_APPLY_ADAM_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.cc new file mode 100644 index 0000000000..005195ea33 --- /dev/null +++ b/mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.cc @@ -0,0 +1,156 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "kernel/cpu/sparse_apply_ftrl_cpu_kernel.h" +#include "kernel/common_utils.h" +#include "device/cpu/cpu_device_address.h" + +namespace mindspore { +namespace kernel { +namespace { +constexpr size_t kSparseApplyFtrlInputSize = 5; + +void ComputeFtrl(MultiThreadComputeParams *input_params, size_t start, size_t end) { + MS_EXCEPTION_IF_NULL(input_params); + auto var = input_params->var_; + auto accum = input_params->accum_; + auto linear = input_params->linear_; + auto lr = input_params->lr_; + auto l1 = input_params->l1_; + auto l2 = input_params->l2_; + auto lr_power = input_params->lr_power_; + auto unique_sparse_grad = input_params->sparse_grad_; + auto var_first_dim_size = input_params->var_first_dim_size_; + auto var_outer_dim_size = input_params->var_outer_dim_size_; + for (size_t i = start; i < end; ++i) { + int index = unique_sparse_grad.indices_[i]; + if (index < 0 || IntToSize(index) >= var_first_dim_size) { + MS_LOG(EXCEPTION) << "Index " << index << " in indices is out of range after unique process"; + } + size_t start_index = var_outer_dim_size * index; + size_t end_index = start_index + var_outer_dim_size; + for (size_t j = start_index, k = var_outer_dim_size * i; j < end_index; ++j, ++k) { + auto summed_grad = unique_sparse_grad.value_[k]; + auto accum_new = accum[j] + summed_grad * summed_grad; + if (lr_power == -0.5) { + linear[j] += summed_grad - (std::sqrt(accum_new) - std::sqrt(accum[j])) / lr * var[j]; + } else { + linear[j] += summed_grad - (std::pow(accum_new, -lr_power) - std::pow(accum[j], -lr_power)) / lr * var[j]; + } + auto x = Sign(linear[j]) * l1 - linear[j]; + float y; + if (lr_power == -0.5) { + y = std::sqrt(accum_new) / lr + 2 * l2; + } else { + y = std::pow(accum_new, -lr_power) / lr + 2 * l2; + } + auto pre_shrink = x / y; + var[j] = std::fabs(linear[j]) > l1 ? pre_shrink : 0; + accum[j] = accum_new; + } + } +} +} // namespace + +void SparseApplyFtrlCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { + CPUKernel::InitInputOutputSize(kernel_node); + MS_EXCEPTION_IF_NULL(kernel_node); + workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float)); + workspace_size_list_.emplace_back(indices_size_ * sizeof(int)); +} + +void SparseApplyFtrlCPUKernel::InitKernel(const CNodePtr &kernel_node) { + MS_EXCEPTION_IF_NULL(kernel_node); + std::vector var_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + std::vector accum_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + std::vector linear_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); + std::vector grad_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); + std::vector indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); + if (!IsSameShape(var_shape, accum_shape)) { + MS_LOG(EXCEPTION) << "var and accum should have the same shape"; + } + if (!IsSameShape(var_shape, linear_shape)) { + MS_LOG(EXCEPTION) << "var and linear should have the same shape"; + } + if (var_shape.empty()) { + MS_LOG(EXCEPTION) << "var must be at least 1D"; + } + var_first_dim_size_ = var_shape[0]; + for (size_t i = 1; i < var_shape.size(); ++i) { + if (var_shape[i] != grad_shape[i]) { + MS_LOG(EXCEPTION) << "The shape of var and grad must equal in dimension " << i; + } + var_outer_dim_size_ *= var_shape[i]; + } + if (indices_shape.size() != 1) { + MS_LOG(EXCEPTION) << "indices must be a 1D vector"; + } + indices_size_ = indices_shape[0]; + if (grad_shape[0] != indices_size_) { + MS_LOG(EXCEPTION) << "The first dimension of grad shape must be equal to indices"; + } + lr_ = AnfAlgo::GetNodeAttr(kernel_node, "lr"); + if (lr_ <= 0) { + MS_LOG(EXCEPTION) << "lr should be a positive scalar"; + } + l1_ = AnfAlgo::GetNodeAttr(kernel_node, "l1"); + if (l1_ < 0) { + MS_LOG(EXCEPTION) << "l1 should be a non-negative scalar"; + } + l2_ = AnfAlgo::GetNodeAttr(kernel_node, "l2"); + if (l2_ < 0) { + MS_LOG(EXCEPTION) << "l2 should be a non-negative scalar"; + } + lr_power_ = AnfAlgo::GetNodeAttr(kernel_node, "lr_power"); + if (lr_power_ > 0) { + MS_LOG(EXCEPTION) << "lr_power should be a non-positive scalar"; + } +} + +bool SparseApplyFtrlCPUKernel::Launch(const std::vector &inputs, + const std::vector &workspace, + const std::vector & /*outputs*/) { + if (inputs.size() < kSparseApplyFtrlInputSize) { + MS_LOG(EXCEPTION) << "error input output size!"; + } + + auto var = reinterpret_cast(inputs[0]->addr); + auto accum = reinterpret_cast(inputs[1]->addr); + auto linear = reinterpret_cast(inputs[2]->addr); + auto grad = reinterpret_cast(inputs[3]->addr); + auto indices = reinterpret_cast(inputs[4]->addr); + auto new_grad = reinterpret_cast(workspace[0]->addr); + auto new_indices = reinterpret_cast(workspace[1]->addr); + SparseGradient unique_sparse_grad({new_grad, new_indices, indices_size_}); + ReduceSparseGradient(SparseGradient({grad, indices, indices_size_}), &unique_sparse_grad, var_first_dim_size_, + var_outer_dim_size_); + + MultiThreadComputeParams input_params; + input_params.var_ = var; + input_params.accum_ = accum; + input_params.linear_ = linear; + input_params.lr_ = lr_; + input_params.l1_ = l1_; + input_params.l2_ = l2_; + input_params.lr_power_ = lr_power_; + input_params.sparse_grad_ = unique_sparse_grad; + input_params.var_first_dim_size_ = var_first_dim_size_; + input_params.var_outer_dim_size_ = var_outer_dim_size_; + const size_t kThreadNum = 16; + MultiThreadCompute(ComputeFtrl, &input_params, kThreadNum, unique_sparse_grad.indices_size_); + return true; +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.h new file mode 100644 index 0000000000..b4e5a48109 --- /dev/null +++ b/mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.h @@ -0,0 +1,59 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_APPLY_FTRL_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_APPLY_FTRL_CPU_KERNEL_H_ + +#include +#include "kernel/cpu/cpu_kernel.h" +#include "kernel/cpu/cpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { +class SparseApplyFtrlCPUKernel : public CPUKernel { + public: + SparseApplyFtrlCPUKernel() = default; + ~SparseApplyFtrlCPUKernel() override = default; + + void InitKernel(const CNodePtr &kernel_node) override; + void InitInputOutputSize(const CNodePtr &kernel_node) override; + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs) override; + + private: + size_t indices_size_{0}; + size_t var_first_dim_size_{0}; + size_t var_outer_dim_size_{1}; + float lr_{0}; + float l1_{0}; + float l2_{0}; + float lr_power_{0}; +}; + +MS_REG_CPU_KERNEL(SparseApplyFtrl, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + SparseApplyFtrlCPUKernel); +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_APPLY_FTRL_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel.cc new file mode 100644 index 0000000000..2460dc0f27 --- /dev/null +++ b/mindspore/ccsrc/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel.cc @@ -0,0 +1,147 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "kernel/cpu/sparse_apply_lazy_adam_cpu_kernel.h" +#include "kernel/common_utils.h" +#include "device/cpu/cpu_device_address.h" + +namespace mindspore { +namespace kernel { +namespace { +constexpr size_t kSparseApplyLazyAdamInputSize = 11; + +void ComputeLazyAdam(MultiThreadComputeParams *input_params, size_t start, size_t end) { + MS_EXCEPTION_IF_NULL(input_params); + auto var = input_params->var_; + auto m = input_params->m_; + auto v = input_params->v_; + auto lr = input_params->lr_; + auto beta1 = input_params->beta1_; + auto beta2 = input_params->beta2_; + auto epsilon = input_params->epsilon_; + auto use_nesterov = input_params->use_nesterov_; + auto unique_sparse_grad = input_params->sparse_grad_; + auto var_first_dim_size = input_params->var_first_dim_size_; + auto var_outer_dim_size = input_params->var_outer_dim_size_; + for (size_t i = start; i < end; ++i) { + int index = unique_sparse_grad.indices_[i]; + if (index < 0 || IntToSize(index) >= var_first_dim_size) { + MS_LOG(EXCEPTION) << "Index " << index << " in indices is out of range"; + } + size_t start_index = var_outer_dim_size * index; + size_t end_index = start_index + var_outer_dim_size; + for (size_t j = start_index, k = var_outer_dim_size * i; j < end_index; ++j, ++k) { + auto summed_grad = unique_sparse_grad.value_[k]; + m[j] = beta1 * m[j] + (1 - beta1) * summed_grad; + v[j] = beta2 * v[j] + (1 - beta2) * summed_grad * summed_grad; + if (use_nesterov) { + var[j] -= lr * (m[j] * beta1 + (1 - beta1) * summed_grad) / (std::sqrt(v[j]) + epsilon); + } else { + var[j] -= lr * m[j] / (std::sqrt(v[j]) + epsilon); + } + } + } +} +} // namespace + +void SparseApplyLazyAdamCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { + CPUKernel::InitInputOutputSize(kernel_node); + MS_EXCEPTION_IF_NULL(kernel_node); + workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float)); + workspace_size_list_.emplace_back(indices_size_ * sizeof(int)); +} + +void SparseApplyLazyAdamCPUKernel::InitKernel(const CNodePtr &kernel_node) { + MS_EXCEPTION_IF_NULL(kernel_node); + std::vector var_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + std::vector m_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + std::vector v_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); + std::vector grad_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 9); + std::vector indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 10); + if (!IsSameShape(var_shape, m_shape)) { + MS_LOG(EXCEPTION) << "var and m should have the same shape"; + } + if (!IsSameShape(var_shape, v_shape)) { + MS_LOG(EXCEPTION) << "var and v should have the same shape"; + } + if (var_shape.empty()) { + MS_LOG(EXCEPTION) << "var must be at least 1D"; + } + var_first_dim_size_ = var_shape[0]; + for (size_t i = 1; i < var_shape.size(); ++i) { + if (var_shape[i] != grad_shape[i]) { + MS_LOG(EXCEPTION) << "The shape of var and grad must equal in dimension " << i; + } + var_outer_dim_size_ *= var_shape[i]; + } + if (indices_shape.size() != 1) { + MS_LOG(EXCEPTION) << "indices must be 1D"; + } + indices_size_ = indices_shape[0]; + if (grad_shape[0] != indices_size_) { + MS_LOG(EXCEPTION) << "The first dimension of grad shape must be equal to indices"; + } + if (AnfAlgo::HasNodeAttr(USE_NESTEROV, kernel_node)) { + use_nesterov_ = AnfAlgo::GetNodeAttr(kernel_node, "use_nesterov"); + } +} + +bool SparseApplyLazyAdamCPUKernel::Launch(const std::vector &inputs, + const std::vector &workspace, + const std::vector & /*outputs*/) { + if (inputs.size() < kSparseApplyLazyAdamInputSize) { + MS_LOG(EXCEPTION) << "Error input size!"; + } + + auto var = reinterpret_cast(inputs[0]->addr); + auto m = reinterpret_cast(inputs[1]->addr); + auto v = reinterpret_cast(inputs[2]->addr); + auto beta1_power = reinterpret_cast(inputs[3]->addr)[0]; + if (beta1_power == 1) { + MS_LOG(EXCEPTION) << "The beta1_power should not be 1"; + } + auto beta2_power = reinterpret_cast(inputs[4]->addr)[0]; + auto lr = reinterpret_cast(inputs[5]->addr)[0]; + auto beta1 = reinterpret_cast(inputs[6]->addr)[0]; + auto beta2 = reinterpret_cast(inputs[7]->addr)[0]; + auto epsilon = reinterpret_cast(inputs[8]->addr)[0]; + auto grad = reinterpret_cast(inputs[9]->addr); + auto indices = reinterpret_cast(inputs[10]->addr); + auto new_grad = reinterpret_cast(workspace[0]->addr); + auto new_indices = reinterpret_cast(workspace[1]->addr); + + SparseGradient unique_sparse_grad({new_grad, new_indices, indices_size_}); + ReduceSparseGradient(SparseGradient({grad, indices, indices_size_}), &unique_sparse_grad, var_first_dim_size_, + var_outer_dim_size_); + + lr = lr * std::sqrt(1 - beta2_power) / (1 - beta1_power); + MultiThreadComputeParams input_params; + input_params.var_ = var; + input_params.m_ = m; + input_params.v_ = v; + input_params.lr_ = lr; + input_params.beta1_ = beta1; + input_params.beta2_ = beta2; + input_params.epsilon_ = epsilon; + input_params.use_nesterov_ = use_nesterov_; + input_params.sparse_grad_ = unique_sparse_grad; + input_params.var_first_dim_size_ = var_first_dim_size_; + input_params.var_outer_dim_size_ = var_outer_dim_size_; + const size_t kThreadNum = 16; + MultiThreadCompute(ComputeLazyAdam, &input_params, kThreadNum, unique_sparse_grad.indices_size_); + return true; +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel.h new file mode 100644 index 0000000000..795568a64d --- /dev/null +++ b/mindspore/ccsrc/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel.h @@ -0,0 +1,63 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_APPLY_LAZY_ADAM_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_APPLY_LAZY_ADAM_CPU_KERNEL_H_ + +#include +#include +#include "kernel/cpu/cpu_kernel.h" +#include "kernel/cpu/cpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { +class SparseApplyLazyAdamCPUKernel : public CPUKernel { + public: + SparseApplyLazyAdamCPUKernel() = default; + ~SparseApplyLazyAdamCPUKernel() override = default; + + void InitKernel(const CNodePtr &kernel_node) override; + void InitInputOutputSize(const CNodePtr &kernel_node) override; + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs) override; + + private: + size_t indices_size_{0}; + size_t var_first_dim_size_{0}; + size_t var_outer_dim_size_{1}; + bool use_nesterov_{false}; +}; + +MS_REG_CPU_KERNEL(SparseApplyLazyAdam, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + SparseApplyLazyAdamCPUKernel); +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_APPLY_LAZY_ADAM_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel.cc new file mode 100644 index 0000000000..64cb65764f --- /dev/null +++ b/mindspore/ccsrc/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel.cc @@ -0,0 +1,140 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel.h" +#include "kernel/common_utils.h" +#include "device/cpu/cpu_device_address.h" + +namespace mindspore { +namespace kernel { +namespace { +constexpr size_t kSparseApplyProximalAdagradInputSize = 7; + +void ComputeProximalAdagrad(MultiThreadComputeParams *input_params, size_t start, size_t end) { + MS_EXCEPTION_IF_NULL(input_params); + auto var = input_params->var_; + auto accum = input_params->accum_; + auto lr = input_params->lr_; + auto l1 = input_params->l1_; + auto l2 = input_params->l2_; + auto unique_sparse_grad = input_params->sparse_grad_; + auto var_first_dim_size = input_params->var_first_dim_size_; + auto var_outer_dim_size = input_params->var_outer_dim_size_; + for (size_t i = start; i < end; ++i) { + int index = unique_sparse_grad.indices_[i]; + if (index < 0 || IntToSize(index) >= var_first_dim_size) { + MS_LOG(EXCEPTION) << "Index " << index << " in indices is out of range after unique process"; + } + size_t start_index = var_outer_dim_size * index; + size_t end_index = start_index + var_outer_dim_size; + for (size_t j = start_index, k = var_outer_dim_size * i; j < end_index; ++j, ++k) { + auto summed_grad = unique_sparse_grad.value_[k]; + accum[j] += summed_grad * summed_grad; + auto learning_rate = lr * (1 / std::sqrt(accum[j])); + auto prox_v = var[j]; + prox_v -= summed_grad * learning_rate; + if (l1 > 0) { + var[j] = Sign(prox_v) * std::fmax(std::fabs(prox_v) - learning_rate * l1, static_cast(0.0)) / + (1 + l2 * learning_rate); + } else { + var[j] = prox_v / (1 + l2 * learning_rate); + } + } + } +} +} // namespace + +void SparseApplyProximalAdagradCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { + CPUKernel::InitInputOutputSize(kernel_node); + MS_EXCEPTION_IF_NULL(kernel_node); + workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float)); + workspace_size_list_.emplace_back(indices_size_ * sizeof(int)); +} + +void SparseApplyProximalAdagradCPUKernel::InitKernel(const CNodePtr &kernel_node) { + MS_EXCEPTION_IF_NULL(kernel_node); + std::vector var_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + std::vector accum_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + std::vector lr_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); + std::vector l1_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); + std::vector l2_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); + std::vector grad_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); + std::vector indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); + if (!IsSameShape(var_shape, accum_shape)) { + MS_LOG(EXCEPTION) << "var and accum should have the same shape"; + } + if (var_shape.empty()) { + MS_LOG(EXCEPTION) << "var must be at least 1D"; + } + var_first_dim_size_ = var_shape[0]; + for (size_t i = 1; i < var_shape.size(); ++i) { + if (var_shape[i] != grad_shape[i]) { + MS_LOG(EXCEPTION) << "The shape of var and grad must equal in dimension " << i; + } + var_outer_dim_size_ *= var_shape[i]; + } + if (indices_shape.size() != 1) { + MS_LOG(EXCEPTION) << "indices must be a 1D vector"; + } + indices_size_ = indices_shape[0]; + if (grad_shape[0] != indices_size_) { + MS_LOG(EXCEPTION) << "The first dimension of grad shape must be equal to indices"; + } + if (!lr_shape.empty()) { + MS_LOG(EXCEPTION) << "lr is not a scalar"; + } + if (!l1_shape.empty()) { + MS_LOG(EXCEPTION) << "l1 is not a scalar"; + } + if (!l2_shape.empty()) { + MS_LOG(EXCEPTION) << "l2 is not a scalar"; + } +} + +bool SparseApplyProximalAdagradCPUKernel::Launch(const std::vector &inputs, + const std::vector &workspace, + const std::vector & /*outputs*/) { + if (inputs.size() < kSparseApplyProximalAdagradInputSize) { + MS_LOG(EXCEPTION) << "Wrong input size!"; + } + + auto var = reinterpret_cast(inputs[0]->addr); + auto accum = reinterpret_cast(inputs[1]->addr); + auto lr = reinterpret_cast(inputs[2]->addr)[0]; + auto l1 = reinterpret_cast(inputs[3]->addr)[0]; + auto l2 = reinterpret_cast(inputs[4]->addr)[0]; + auto grad = reinterpret_cast(inputs[5]->addr); + auto indices = reinterpret_cast(inputs[6]->addr); + auto new_grad = reinterpret_cast(workspace[0]->addr); + auto new_indices = reinterpret_cast(workspace[1]->addr); + SparseGradient unique_sparse_grad({new_grad, new_indices, indices_size_}); + ReduceSparseGradient(SparseGradient({grad, indices, indices_size_}), &unique_sparse_grad, var_first_dim_size_, + var_outer_dim_size_); + + MultiThreadComputeParams input_params; + input_params.var_ = var; + input_params.accum_ = accum; + input_params.lr_ = lr; + input_params.l1_ = l1; + input_params.l2_ = l2; + input_params.sparse_grad_ = unique_sparse_grad; + input_params.var_first_dim_size_ = var_first_dim_size_; + input_params.var_outer_dim_size_ = var_outer_dim_size_; + const size_t kThreadNum = 16; + MultiThreadCompute(ComputeProximalAdagrad, &input_params, kThreadNum, unique_sparse_grad.indices_size_); + return true; +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel.h new file mode 100644 index 0000000000..082809a9c2 --- /dev/null +++ b/mindspore/ccsrc/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel.h @@ -0,0 +1,56 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_APPLY_PROXIMAL_ADAGRAD_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_APPLY_PROXIMAL_ADAGRAD_CPU_KERNEL_H_ + +#include +#include +#include "kernel/cpu/cpu_kernel.h" +#include "kernel/cpu/cpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { +class SparseApplyProximalAdagradCPUKernel : public CPUKernel { + public: + SparseApplyProximalAdagradCPUKernel() = default; + ~SparseApplyProximalAdagradCPUKernel() override = default; + + void InitKernel(const CNodePtr &kernel_node) override; + void InitInputOutputSize(const CNodePtr &kernel_node) override; + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs) override; + + private: + size_t indices_size_{0}; + size_t var_first_dim_size_{0}; + size_t var_outer_dim_size_{1}; +}; + +MS_REG_CPU_KERNEL(SparseApplyProximalAdagrad, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeFloat32), + SparseApplyProximalAdagradCPUKernel); +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_APPLY_PROXIMAL_ADAGRAD_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/kernel/cpu/sub_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/sub_cpu_kernel.cc new file mode 100644 index 0000000000..543f0e5cdd --- /dev/null +++ b/mindspore/ccsrc/kernel/cpu/sub_cpu_kernel.cc @@ -0,0 +1,89 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include "kernel/cpu/sub_cpu_kernel.h" +#include "device/cpu/cpu_device_address.h" + +namespace mindspore { +namespace kernel { +void SubCPUKernel::InitKernel(const CNodePtr &kernel_node) { + auto shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + if (shape.size() == 1) { + if (shape[0] != 1) { + MS_LOG(EXCEPTION) << "input 1 only support scalar"; + } + } else { + MS_LOG(EXCEPTION) << "input 1 only support scalar"; + } +} + +void sub_task(const int *in_addr, int *out_addr, size_t lens, int offset) { + for (size_t i = 0; i < lens; i++) { + out_addr[i] = in_addr[i] - offset; + } +} + +bool SubCPUKernel::Launch(const std::vector &inputs, + const std::vector & /*workspace*/, + const std::vector &outputs) { +#if defined(_WIN32) || defined(_WIN64) + auto start_time = std::chrono::steady_clock::now(); +#else + struct timeval start_time, end_time; + (void)gettimeofday(&start_time, nullptr); +#endif + auto input_addr = reinterpret_cast(inputs[0]->addr); + auto output_addr = reinterpret_cast(outputs[0]->addr); + offset_ = *reinterpret_cast(inputs[1]->addr); + MS_LOG(INFO) << "offset: " << offset_; + auto lens = inputs[0]->size / sizeof(int); + if (lens < 10000) { + for (size_t i = 0; i < lens; i++) { + output_addr[i] = input_addr[i] - offset_; + } + } else { + const size_t thread_num = 4; + std::thread threads[4]; + size_t process_lens = (lens + thread_num - 1) / thread_num; + size_t process_offset = 0; + for (size_t i = 0; i < thread_num; i++) { + threads[i] = + std::thread(sub_task, input_addr + process_offset, output_addr + process_offset, process_lens, offset_); + if (process_offset + process_lens > lens) { + process_lens = lens - process_offset; + process_offset = lens; + } else { + process_offset += process_lens; + } + } + for (size_t i = 0; i < thread_num; i++) { + threads[i].join(); + } + } +#if defined(_WIN32) || defined(_WIN64) + auto end_time = std::chrono::steady_clock::now(); + std::chrono::duration> cost = end_time - start_time; + MS_LOG(INFO) << "SubscaleCPUKernel, used time: " << cost.count() << " us"; +#else + (void)gettimeofday(&end_time, nullptr); + uint64_t time = 1000000 * static_cast(end_time.tv_sec - start_time.tv_sec); + time += static_cast(end_time.tv_usec - start_time.tv_usec); + MS_LOG(INFO) << "SubCPUKernel, used time: " << time << " us"; +#endif + return true; +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/cpu/sub_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/sub_cpu_kernel.h new file mode 100644 index 0000000000..54b2c8951a --- /dev/null +++ b/mindspore/ccsrc/kernel/cpu/sub_cpu_kernel.h @@ -0,0 +1,45 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_SUB_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_SUB_CPU_KERNEL_H_ +#include +#include +#include "kernel/cpu/cpu_kernel.h" +#include "kernel/cpu/cpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { +class SubCPUKernel : public CPUKernel { + public: + SubCPUKernel() : offset_(0) {} + ~SubCPUKernel() override = default; + + void InitKernel(const CNodePtr &kernel_node) override; + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs) override; + + private: + int offset_; +}; + +MS_REG_CPU_KERNEL( + Sub, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), + SubCPUKernel); +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_KERNEL_CPU_SUB_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/kernel/gpu/nn/relu_grad_kernel.cc b/mindspore/ccsrc/kernel/gpu/arrays/argmaxwithvalue_gpu_kernel.cc similarity index 56% rename from mindspore/ccsrc/kernel/gpu/nn/relu_grad_kernel.cc rename to mindspore/ccsrc/kernel/gpu/arrays/argmaxwithvalue_gpu_kernel.cc index 848007320f..24c8a9a730 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/relu_grad_kernel.cc +++ b/mindspore/ccsrc/kernel/gpu/arrays/argmaxwithvalue_gpu_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,17 +14,17 @@ * limitations under the License. */ -#include "kernel/gpu/nn/relu_grad_kernel.h" +#include "kernel/gpu/arrays/argmaxwithvalue_gpu_kernel.h" namespace mindspore { namespace kernel { -MS_REG_GPU_KERNEL_ONE( - ReluGrad, - KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), - ReluGradGpuFwdKernel, float) -MS_REG_GPU_KERNEL_ONE( - ReluGrad, - KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), - ReluGradGpuFwdKernel, half) +MS_REG_GPU_KERNEL_TWO( + ArgMaxWithValue, + KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32), + ArgmaxWithValueGpuKernel, float, int) +MS_REG_GPU_KERNEL_TWO( + ArgMaxWithValue, + KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat16), + ArgmaxWithValueGpuKernel, half, int) } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/gpu/nn/tanh_grad_kernel.h b/mindspore/ccsrc/kernel/gpu/arrays/argmaxwithvalue_gpu_kernel.h similarity index 50% rename from mindspore/ccsrc/kernel/gpu/nn/tanh_grad_kernel.h rename to mindspore/ccsrc/kernel/gpu/arrays/argmaxwithvalue_gpu_kernel.h index b5b52d0acf..fb7796b022 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/tanh_grad_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/arrays/argmaxwithvalue_gpu_kernel.h @@ -14,23 +14,20 @@ * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_KERNEL_GPU_NN_TANH_GRAD_KERNEL_H_ -#define MINDSPORE_CCSRC_KERNEL_GPU_NN_TANH_GRAD_KERNEL_H_ +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_ARGMAXWITHVALUEGPUKERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_ARGMAXWITHVALUEGPUKERNEL_H_ -#include #include -#include #include "kernel/gpu/gpu_kernel.h" #include "kernel/gpu/gpu_kernel_factory.h" -#include "kernel/gpu/cuda_impl/tanh_impl.cuh" - +#include "kernel/gpu/cuda_impl/argmaxwithvalue_impl.cuh" namespace mindspore { namespace kernel { -template -class TanhGradKernel : public GpuKernel { +template +class ArgmaxWithValueGpuKernel : public GpuKernel { public: - TanhGradKernel() : input_size_(0) {} - ~TanhGradKernel() override = default; + ArgmaxWithValueGpuKernel() : input_size_(0), output_size_(0), bound_(0), outerSize_(0), innerSize_(0) {} + ~ArgmaxWithValueGpuKernel() override = default; const std::vector &GetInputSizeList() const override { return input_size_list_; } const std::vector &GetOutputSizeList() const override { return output_size_list_; } @@ -38,21 +35,40 @@ class TanhGradKernel : public GpuKernel { bool Launch(const std::vector &inputs, const std::vector &, const std::vector &outputs, void *stream_ptr) override { - auto y_addr = GetDeviceAddress(inputs, 0); - auto dy_addr = GetDeviceAddress(inputs, 1); - auto dx_addr = GetDeviceAddress(outputs, 0); - - TanhGrad(input_size_ / sizeof(T), y_addr, dy_addr, dx_addr, reinterpret_cast(stream_ptr)); + T *input = GetDeviceAddress(inputs, 0); + T *output = GetDeviceAddress(outputs, 1); + S *index = GetDeviceAddress(outputs, 0); + CalArgmaxWithValue(input_size_ / sizeof(T), input, bound_, outerSize_, innerSize_, index, output, + reinterpret_cast(stream_ptr)); return true; } - bool Init(const CNodePtr &kernel_node) override { - auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + bool Init(const CNodePtr &kernel_node) override { + std::vector shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 1); + int dims = shape.size(); + int axis = GetAttr(kernel_node, "axis"); + if (axis < 0) { + axis += dims; + } input_size_ = sizeof(T); - for (auto dim : input_shape) { - input_size_ *= dim; + for (auto x : shape) { + input_size_ *= x; + } + output_size_ = sizeof(S); + for (auto x : output_shape) { + output_size_ *= x; + } + bound_ = shape[axis]; + outerSize_ = 1; + for (int i = axis - 1; i >= 0; i--) { + outerSize_ *= shape[i]; } + innerSize_ = 1; + for (int i = axis + 1; i < dims; i++) { + innerSize_ *= shape[i]; + } InitSizeLists(); return true; } @@ -60,17 +76,21 @@ class TanhGradKernel : public GpuKernel { protected: void InitSizeLists() override { input_size_list_.push_back(input_size_); - input_size_list_.push_back(input_size_); - output_size_list_.push_back(input_size_); + output_size_list_.push_back(output_size_); + output_size_list_.push_back(output_size_ / sizeof(S) * sizeof(T)); } private: + size_t input_size_; + size_t output_size_; std::vector input_size_list_; std::vector output_size_list_; std::vector workspace_size_list_; - size_t input_size_; + int bound_; + int outerSize_; + int innerSize_; }; } // namespace kernel } // namespace mindspore -#endif // MINDSPORE_CCSRC_KERNEL_GPU_NN_TANH_GRAD_KERNEL_H_ +#endif // MINDSPORE_CCSRC_KERNEL_GPU_ARGMAXWITHVALUEGPUKERNEL_H_ diff --git a/mindspore/ccsrc/kernel/gpu/arrays/array_reduce_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/arrays/array_reduce_gpu_kernel.h index 224a3da8ad..e1f995d648 100644 --- a/mindspore/ccsrc/kernel/gpu/arrays/array_reduce_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/arrays/array_reduce_gpu_kernel.h @@ -81,7 +81,7 @@ class ArrayReduceGpuKernel : public GpuKernel { } bool Init(const CNodePtr &kernel_node) override { InitResource(); - data_type_ = kCudnnDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))]; + data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 1) { MS_LOG(ERROR) << "Input number is " << input_num << ", but reduce op needs 1 inputs."; diff --git a/mindspore/ccsrc/kernel/gpu/arrays/slice_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/arrays/slice_gpu_kernel.h index 81910b5091..7f71e548ad 100644 --- a/mindspore/ccsrc/kernel/gpu/arrays/slice_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/arrays/slice_gpu_kernel.h @@ -27,7 +27,8 @@ namespace kernel { template class SliceGpuFwdKernel : public GpuKernel { public: - SliceGpuFwdKernel() : is_strided_slice_(false), input_size_(0), output_size_(0), workspace_size_(0) {} + SliceGpuFwdKernel() + : is_strided_slice_(false), is_null_input_(false), input_size_(0), output_size_(0), workspace_size_(0) {} ~SliceGpuFwdKernel() override = default; const std::vector &GetInputSizeList() const override { return input_size_list_; } const std::vector &GetOutputSizeList() const override { return output_size_list_; } @@ -35,6 +36,9 @@ class SliceGpuFwdKernel : public GpuKernel { bool Launch(const std::vector &inputs, const std::vector &, const std::vector &outputs, void *stream_ptr) override { + if (is_null_input_) { + return true; + } T *input = GetDeviceAddress(inputs, 0); T *output = GetDeviceAddress(outputs, 0); if (is_strided_slice_) { @@ -79,7 +83,11 @@ class SliceGpuFwdKernel : public GpuKernel { if (size_[i] < 0) { size_[i] = (size_[i] + input_shape_[i]) > 0 ? (size_[i] + input_shape_[i]) : 0; } - if (size_[i] == 0) { + if (begin_[i] == size_[i] && is_strided_slice_) { + MS_LOG(WARNING) << "Output is null."; + is_null_input_ = true; + } + if (size_[i] == 0 && strides_[i] > 0) { size_[i] = begin_[i] + 1; } } @@ -143,6 +151,7 @@ class SliceGpuFwdKernel : public GpuKernel { std::vector workspace_size_list_; bool is_strided_slice_; + bool is_null_input_; size_t input_size_; size_t output_size_; size_t workspace_size_; diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/adam_impl.cu b/mindspore/ccsrc/kernel/gpu/cuda_impl/adam_impl.cu new file mode 100644 index 0000000000..3ec63ee03a --- /dev/null +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/adam_impl.cu @@ -0,0 +1,56 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/gpu/cuda_impl/adam_impl.cuh" + +template +__device__ __forceinline__ T SqrtFunc(T input) { + return sqrt(input); +} + +template <> +__device__ __forceinline__ half SqrtFunc(half input) { + return hsqrt(input); +} + +template +__global__ void ApplyAdamKernel(const size_t size, const T *gradient, const T *beta1_power, const T *beta2_power, + const T *learning_rate, const T *beta1, const T *beta2, const T *epsilon, T *variable, + T *m, T *v) { + const T one = static_cast(1.0); + const T new_learning_rate = learning_rate[0] * SqrtFunc(one - beta2_power[0]) / (one - beta1_power[0]); + + for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { + m[i] += (gradient[i] - m[i]) * (one - beta1[0]); + v[i] += (gradient[i] * gradient[i] - v[i]) * (one - beta2[0]); + variable[i] -= new_learning_rate * m[i] / (SqrtFunc(v[i]) + epsilon[0]); + } +} + +template +void ApplyAdam(const size_t size, const T *gradient, const T *beta1_power, const T *beta2_power, const T *learning_rate, + const T *beta1, const T *beta2, const T *epsilon, T *variable, T *m, T *v, cudaStream_t cuda_stream) { + ApplyAdamKernel<<>>( + size, gradient, beta1_power, beta2_power, learning_rate, beta1, beta2, epsilon, variable, m, v); +} + +template void ApplyAdam(const size_t size, const float *gradient, const float *beta1_power, + const float *beta2_power, const float *learning_rate, const float *beta1, + const float *beta2, const float *epsilon, float *variable, float *m, float *v, + cudaStream_t cuda_stream); +template void ApplyAdam(const size_t size, const half *gradient, const half *beta1_power, const half *beta2_power, + const half *learning_rate, const half *beta1, const half *beta2, const half *epsilon, + half *variable, half *m, half *v, cudaStream_t cuda_stream); diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/tanh_impl.cuh b/mindspore/ccsrc/kernel/gpu/cuda_impl/adam_impl.cuh similarity index 59% rename from mindspore/ccsrc/kernel/gpu/cuda_impl/tanh_impl.cuh rename to mindspore/ccsrc/kernel/gpu/cuda_impl/adam_impl.cuh index 71fc4be4dd..f48a113c26 100644 --- a/mindspore/ccsrc/kernel/gpu/cuda_impl/tanh_impl.cuh +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/adam_impl.cuh @@ -14,15 +14,12 @@ * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_TAN_H_ -#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_TAN_H_ +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_ADAM_IMPL_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_ADAM_IMPL_H_ #include "device/gpu/cuda_common.h" +template +void ApplyAdam(const size_t size, const T *gradient, const T *beta1_power, const T *beta2_power, const T *learning_rate, + const T *beta1, const T *beta2, const T *epsilon, T *variable, T *m, T *v, cudaStream_t cuda_stream); -template -void Tanh(const size_t size, const T* x_addr, T* y_addr, cudaStream_t cuda_stream); - -template -void TanhGrad(const size_t size, const T* y_addr, const T* dy_addr, T* dx_addr, cudaStream_t cuda_stream); - -#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_TAN_H_ +#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_ADAM_IMPL_H_ diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/argmaxwithvalue_impl.cu b/mindspore/ccsrc/kernel/gpu/cuda_impl/argmaxwithvalue_impl.cu new file mode 100644 index 0000000000..a0687a2768 --- /dev/null +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/argmaxwithvalue_impl.cu @@ -0,0 +1,58 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "argmaxwithvalue_impl.cuh" +#include "device/gpu/cuda_common.h" +#include "include/cuda_fp16.h" +template +__global__ void ArgmaxWithValue(size_t size, const T* input, const int bound, int outerSize, int innerSize, + S* index, T* output) { + for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim.x) { + for (int i = 0; i < outerSize; i++) { + int inputOutterOffset = i * innerSize * bound; + int outputOutterOffset = i * innerSize; + for (int j = 0; j < innerSize; j++) { + auto outputInnerOffset = outputOutterOffset + j; + S idx = 0; + T maxData = input[j + inputOutterOffset]; + for (S c = 0; c < bound; c++) { + int offset = j + c * innerSize; + auto inputData = input[inputOutterOffset + offset]; + idx = inputData > maxData ? c : idx; + maxData = inputData > maxData ? inputData : maxData; + } + output[outputInnerOffset] = maxData; + index[outputInnerOffset] = idx; + } + } + } + return; +} + +template +void CalArgmaxWithValue(size_t size, const T* input, const int bound_, const int outerSize_, const int innerSize_, + S* index, T* output, cudaStream_t cuda_stream) { + ArgmaxWithValue<<>>(size, input, bound_, outerSize_, innerSize_, + index, output); + return; +} + +template void CalArgmaxWithValue(size_t size, const float* input, const int bound_, const int outerSize_, + const int innerSize_, int* index, float* output, + cudaStream_t cuda_stream); +template void CalArgmaxWithValue(size_t size, const half* input, const int bound_, const int outerSize_, + const int innerSize_, int* index, half* output, + cudaStream_t cuda_stream); diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/argmaxwithvalue_impl.cuh b/mindspore/ccsrc/kernel/gpu/cuda_impl/argmaxwithvalue_impl.cuh new file mode 100644 index 0000000000..0d4f4b62a3 --- /dev/null +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/argmaxwithvalue_impl.cuh @@ -0,0 +1,22 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_ARGMAXWITHVALUE_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_ARGMAXWITHVALUE_H_ +template +void CalArgmaxWithValue(size_t size, const T* input, const int bound_, const int outerSize_, const int innerSize_, + S* index, T* output, cudaStream_t cuda_stream); +#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_ARGMAXWITHVALUE_H_ diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_grad_impl.cu b/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_grad_impl.cu index ce8617283c..5aa087e7f5 100644 --- a/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_grad_impl.cu +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_grad_impl.cu @@ -110,7 +110,13 @@ void NoBroadcastGrad(const int &nums, enum BroadcastGradOpType op, const T *x1, template void NoBroadcastGrad(const int &nums, enum BroadcastGradOpType op, const float *x1, const float *x2, const float *dy, float *dx1, float *dx2, cudaStream_t stream); +template void NoBroadcastGrad(const int &nums, enum BroadcastGradOpType op, const int *x1, const int *x2, + const int *dy, int *dx1, int *dx2, cudaStream_t stream); template void BroadcastGrad(const int &l0, const int &l1, const int &l2, const int &l3, const int &r0, const int &r1, const int &r2, const int &r3, const int &d0, const int &d1, const int &d2, const int &d3, enum BroadcastGradOpType op, const float *x1, const float *x2, const float *dy, float *dx1, float *dx2, cudaStream_t stream); +template void BroadcastGrad(const int &l0, const int &l1, const int &l2, const int &l3, const int &r0, const int &r1, + const int &r2, const int &r3, const int &d0, const int &d1, const int &d2, const int &d3, + enum BroadcastGradOpType op, const int *x1, const int *x2, const int *dy, int *dx1, + int *dx2, cudaStream_t stream); diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_impl.cu b/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_impl.cu index 4953d45ff5..afa94fc56c 100644 --- a/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_impl.cu +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_impl.cu @@ -64,6 +64,11 @@ struct SubFunc { __device__ __forceinline__ S operator()(const T &lhs, const T &rhs) { return (lhs - rhs); } }; +template +struct AddFunc { + __device__ __forceinline__ S operator()(const T &lhs, const T &rhs) { return (lhs + rhs); } +}; + template <> struct PowerFunc { // invalid branch @@ -118,6 +123,9 @@ __global__ void BroadcastKernel(const int l0, const int l1, const int l2, const case BROADCAST_TYPE_SUB: return BroadcastOperator>(l0, l1, l2, l3, r0, r1, r2, r3, d0, d1, d2, d3, input0, input1, output); + case BROADCAST_TYPE_ADD: + return BroadcastOperator>(l0, l1, l2, l3, r0, r1, r2, r3, d0, d1, d2, d3, input0, input1, + output); } } @@ -157,6 +165,8 @@ __global__ void NoBroadcastKernel(const int nums, enum BroadcastOpType op, const return NoBroadcastOperator>(nums, input0, input1, output); case BROADCAST_TYPE_SUB: return NoBroadcastOperator>(nums, input0, input1, output); + case BROADCAST_TYPE_ADD: + return NoBroadcastOperator>(nums, input0, input1, output); } } @@ -182,7 +192,10 @@ template void Broadcast(const int &l0, const int &l1, const int &l2, const int & const int &r2, const int &r3, const int &d0, const int &d1, const int &d2, const int &d3, enum BroadcastOpType op, const half *input0, const half *input1, half *output, cudaStream_t stream); - +template void Broadcast(const int &l0, const int &l1, const int &l2, const int &l3, const int &r0, const int &r1, + const int &r2, const int &r3, const int &d0, const int &d1, const int &d2, const int &d3, + enum BroadcastOpType op, const int *input0, const int *input1, int *output, + cudaStream_t stream); template void NoBroadcast(const int &nums, enum BroadcastOpType op, const float *input0, const float *input1, bool *output, cudaStream_t stream); template void NoBroadcast(const int &nums, enum BroadcastOpType op, const float *input0, const float *input1, @@ -191,3 +204,5 @@ template void NoBroadcast(const int &nums, enum BroadcastOpType op, const half * bool *output, cudaStream_t stream); template void NoBroadcast(const int &nums, enum BroadcastOpType op, const half *input0, const half *input1, half *output, cudaStream_t stream); +template void NoBroadcast(const int &nums, enum BroadcastOpType op, const int *input0, const int *input1, + int *output, cudaStream_t stream); diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_impl.cuh b/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_impl.cuh index 621e14401c..5f6992511d 100644 --- a/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_impl.cuh +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_impl.cuh @@ -28,6 +28,7 @@ enum BroadcastOpType { BROADCAST_TYPE_REALDIV = 5, BROADCAST_TYPE_MUL = 6, BROADCAST_TYPE_SUB = 7, + BROADCAST_TYPE_ADD = 8, BROADCAST_TYPE_INVALID = 0xffffffff, }; diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/dropout_impl.cu b/mindspore/ccsrc/kernel/gpu/cuda_impl/dropout_impl.cu index 940c64ea53..019d71d740 100644 --- a/mindspore/ccsrc/kernel/gpu/cuda_impl/dropout_impl.cu +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/dropout_impl.cu @@ -19,10 +19,10 @@ #include "include/cuda_runtime.h" __global__ void DropoutForwardKernel(const float *input, float *mask, float *output, size_t num_count, - float drop_prob) { - float scale = 1.f / drop_prob; + float keep_prob) { + float scale = 1.f / keep_prob; for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < num_count; i += blockDim.x * gridDim.x) { - mask[i] = mask[i] > drop_prob; + mask[i] = mask[i] <= keep_prob; output[i] = scale * input[i] * mask[i]; } } @@ -34,8 +34,8 @@ void DropoutForward(const float *input, float *mask, float *output, size_t num_c } __global__ void DropoutBackwardKernel(const float *dy, const float *mask, float *dx, size_t num_count, - float drop_prob) { - float scale = 1.f / (1.f - drop_prob); + float keep_prob) { + float scale = 1.f / keep_prob; for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < num_count; i += blockDim.x * gridDim.x) { dx[i] = scale * dy[i] * mask[i]; } diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/dropout_impl.cuh b/mindspore/ccsrc/kernel/gpu/cuda_impl/dropout_impl.cuh index 9aa05d6a08..bd3de6524d 100644 --- a/mindspore/ccsrc/kernel/gpu/cuda_impl/dropout_impl.cuh +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/dropout_impl.cuh @@ -18,9 +18,9 @@ #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_DROPOUT_H_ #include "device/gpu/cuda_common.h" -void DropoutForward(const float *input, float *mask, float *output, size_t num_count, float drop_prob, +void DropoutForward(const float *input, float *mask, float *output, size_t num_count, float keep_prob, cudaStream_t cuda_stream); -void DropoutBackward(const float *dy, const float *mask, float *dx, size_t num_count, float drop_prob, +void DropoutBackward(const float *dy, const float *mask, float *dx, size_t num_count, float keep_prob, cudaStream_t cuda_stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_DROPOUT_H_ diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_per_channel_impl.cu b/mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perchannel_impl.cu similarity index 73% rename from mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_per_channel_impl.cu rename to mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perchannel_impl.cu index b9aac9bdc3..75c5eacb25 100644 --- a/mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_per_channel_impl.cu +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perchannel_impl.cu @@ -19,7 +19,7 @@ #include #include #include -#include "fake_quant_per_channel_impl.cuh" +#include "fake_quant_perchannel_impl.cuh" #include "device/gpu/cuda_common.h" /** @@ -113,44 +113,6 @@ void CalFakeQuantizePerChannel(const float *input, float *output, const int tota input, output, total_size, channel_size, nudge_min, nudge_max, scale, symmetric); } -/** - * UpdateInputMinMaxPerChannel or UpdateInputMinMaxPerChannel With EMA. - * @param input_min - * @param input_max - * @param min - * @param max - * @return - */ -__global__ void UpdateInputMinMaxPerChannel(float *input_min, float *input_max, float *input, int channels, - int per_channel_nums, bool ema, float ema_decay) { - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < channels; i += blockDim.x * gridDim.x) { - thrust::pair sum = - thrust::minmax_element(thrust::device, input + i * per_channel_nums, input + per_channel_nums * (i + 1)); - if (ema) { - input_min[i] = ema_decay * sum.first[0] + (1 - ema_decay) * input_min[i]; - input_max[i] = ema_decay * sum.second[0] + (1 - ema_decay) * input_max[i]; - } else { - input_min[i] = sum.first[0]; - input_max[i] = sum.second[0]; - } - input_min[i] = input_min[i] > 0 ? 0 : input_min[i]; - input_max[i] = input_max[i] < 0 ? 0 : input_max[i]; - } -} - -__global__ void UpdateInputMinMaxPerChannelWithEMA(float *input_min, float *input_max, float min, float max, - const float decay) { - *input_min = decay * (min) + (1 - decay) * (*input_min); - *input_max = decay * (max) + (1 - decay) * (*input_max); -} - -void CalMinMaxPerChannel(float *input, float *input_min, float *input_max, const int total_size, const int channel_size, - const float ema_decay, const bool ema, cudaStream_t cuda_stream) { - int per_channel_num = total_size / channel_size; - UpdateInputMinMaxPerChannel<<>>( - input_min, input_max, input, channel_size, per_channel_num, ema, ema_decay); -} - __global__ void FakeQuantizePerChannelGrad(const float *input, const float *gradient, float *output, const int total_size, const int channel_size, const float *nudge_min, const float *nudge_max) { diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_per_channel_impl.cuh b/mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perchannel_impl.cuh similarity index 100% rename from mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_per_channel_impl.cuh rename to mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perchannel_impl.cuh diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_impl.cu b/mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perlayer_impl.cu similarity index 92% rename from mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_impl.cu rename to mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perlayer_impl.cu index f25727f2c3..11a25ba294 100644 --- a/mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_impl.cu +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perlayer_impl.cu @@ -18,10 +18,10 @@ #include #include #include "device/gpu/cuda_common.h" -#include "fake_quant_impl.cuh" +#include "fake_quant_perlayer_impl.cuh" __global__ void FakeQuantize(const float *input, float *output, const int size, const float *nudge_min, - const float *nudge_max, const float *scale, bool symmetric) { + const float *nudge_max, const float *scale) { float input_x = 0.f; int nudge_input = 0; @@ -35,7 +35,7 @@ __global__ void FakeQuantize(const float *input, float *output, const int size, input_x = nudge_max[0]; } // clamp shift - nudge_input = floor((input_x - nudge_min[0]) / scale[0] + 0.5f); + nudge_input = round((input_x - nudge_min[0]) / scale[0]); // quantize output[i] = nudge_input * scale[0] + nudge_min[0]; @@ -99,8 +99,7 @@ __global__ void UpdateInputMinMax(float *input_min, float *input_max, const floa void CalFakeQuantize(const float *input, float *output, const int size, const float *nudge_min, const float *nudge_max, const float *scale, bool symmetric, cudaStream_t cuda_stream) { - FakeQuantize<<>>(input, output, size, nudge_min, nudge_max, scale, - symmetric); + FakeQuantize<<>>(input, output, size, nudge_min, nudge_max, scale); return; } diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_impl.cuh b/mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perlayer_impl.cuh similarity index 100% rename from mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_impl.cuh rename to mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perlayer_impl.cuh diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/ftrl_impl.cu b/mindspore/ccsrc/kernel/gpu/cuda_impl/ftrl_impl.cu new file mode 100644 index 0000000000..ea6ffdbbdc --- /dev/null +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/ftrl_impl.cu @@ -0,0 +1,87 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/gpu/cuda_impl/ftrl_impl.cuh" + +template +__device__ __forceinline__ T PowFunc(T x, T y) { + return pow(x, y); +} + +template <> +__device__ __forceinline__ half PowFunc(half x, half y) { + return __float2half(pow(__half2float(x), __half2float(y))); +} + +template +__device__ __forceinline__ bool CompareFunc(T x, T y) { + return abs(x) > y; +} + +template <> +__device__ __forceinline__ bool CompareFunc(half x, half y) { + return abs(__half2float(x)) > __half2float(y); +} + +template +__device__ __forceinline__ T Sgn(T x) { + return static_cast(x != 0 ? (x > 0 ? 1 : -1) : 0); +} + +template <> +__device__ __forceinline__ half Sgn(half x) { + return __float2half(__half2float(x) != 0 ? (__half2float(x) > 0 ? 1 : -1) : 0); +} + +template +__global__ void ApplyFtrlKernel(const size_t size, const T *gradient, const T *learning_rate, + const T *l1_regularization, const T *l2_regularization, const T *learning_rate_power, + T *variable, T *accumulation, T *linear) { + const T two = static_cast(2.0); + const T learning_rate_power_val = -learning_rate_power[0]; + + for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { + const T cur_accumulation = accumulation[i] + gradient[i] * gradient[i]; + const T accumulation_power = PowFunc(accumulation[i], learning_rate_power_val); + const T cur_accumulation_power = PowFunc(cur_accumulation, learning_rate_power_val); + const T sigma = (cur_accumulation_power - accumulation_power) / learning_rate[0]; + + linear[i] += gradient[i] - sigma * variable[i]; + variable[i] = CompareFunc(linear[i], l1_regularization[0]) + ? ((l1_regularization[0] * Sgn(linear[i]) - linear[i]) / + (cur_accumulation_power / learning_rate[0] + two * l2_regularization[0])) + : static_cast(0); + accumulation[i] = cur_accumulation; + } +} + +template +void ApplyFtrl(const size_t size, const T *gradient, const T *learning_rate, const T *l1_regularization, + const T *l2_regularization, const T *learning_rate_power, T *variable, T *accumulation, T *linear, + cudaStream_t cuda_stream) { + ApplyFtrlKernel<<>>(size, gradient, learning_rate, l1_regularization, + l2_regularization, learning_rate_power, variable, + accumulation, linear); +} + +template void ApplyFtrl(const size_t size, const float *gradient, const float *learning_rate, + const float *l1_regularization, const float *l2_regularization, + const float *learning_rate_power, float *variable, float *accumulation, float *linear, + cudaStream_t cuda_stream); +template void ApplyFtrl(const size_t size, const half *gradient, const half *learning_rate, + const half *l1_regularization, const half *l2_regularization, + const half *learning_rate_power, half *variable, half *accumulation, half *linear, + cudaStream_t cuda_stream); diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/ftrl_impl.cuh b/mindspore/ccsrc/kernel/gpu/cuda_impl/ftrl_impl.cuh new file mode 100644 index 0000000000..ba4a8fa816 --- /dev/null +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/ftrl_impl.cuh @@ -0,0 +1,26 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_FTRL_IMPL_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_FTRL_IMPL_H_ + +#include "device/gpu/cuda_common.h" +template +void ApplyFtrl(const size_t size, const T *gradient, const T *learning_rate, const T *l1_regularization, + const T *l2_regularization, const T *learning_rate_power, T *variable, T *accumulation, T *linear, + cudaStream_t cuda_stream); + +#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_FTRL_IMPL_H_ diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/gelu_impl.cu b/mindspore/ccsrc/kernel/gpu/cuda_impl/gelu_impl.cu index bb476179d5..e460caec9e 100644 --- a/mindspore/ccsrc/kernel/gpu/cuda_impl/gelu_impl.cu +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/gelu_impl.cu @@ -14,32 +14,62 @@ * limitations under the License. */ - #include "kernel/gpu/cuda_impl/gelu_impl.cuh" #include "device/gpu/cuda_common.h" -template -__global__ void GeluKernel(size_t size, T* input_addr, T* output_addr) { +template +__global__ void GeluKernel(size_t size, T *input_addr, T *output_addr) { // formula: // gelu(x) = 0.5 * x * (1.0 + tanh(y)) // tanh(y) = 2 / (1 + exp(-2y)) - 1) // y = sqrt(2/pi) * (x + 0.044715 * x^3) - for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim.x) { + for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x) { float x = input_addr[pos]; float tanh_res = tanh(0.7978845608 * (x + 0.044715 * x * x * x)); output_addr[pos] = 0.5 * x * (1.0 + tanh_res); } } -template -void Gelu(size_t size, T* input_addr, T* output_addr, cudaStream_t cuda_stream) { +template <> +__global__ void GeluKernel(size_t size, half *input_addr, half *output_addr) { + for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x) { + half x = input_addr[pos]; + float tanh_res = tanh(__half2float(half(0.7978845608) * (x + half(0.044715) * x * x * x))); + output_addr[pos] = half(0.5) * x * (half(1.0) + __float2half(tanh_res)); + } +} + +template <> +__global__ void GeluKernel(size_t size, half2 *input_addr, half2 *output_addr) { + for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x) { + half2 x = input_addr[pos]; + float2 tanh_param = __half22float2(half2(0.7978845608, 0.7978845608) * (x + half2(0.044715, 0.044715) * x * x * x)); + float2 tanh_res; + tanh_res.x = tanh(tanh_param.x); + tanh_res.y = tanh(tanh_param.y); + output_addr[pos] = half2(0.5, 0.5) * x * (half2(1.0, 1.0) + __float22half2_rn(tanh_res)); + } +} + +template +void Gelu(size_t size, T *input_addr, T *output_addr, cudaStream_t cuda_stream) { GeluKernel<<>>(size, input_addr, output_addr); return; } +template <> +void Gelu(size_t size, half *input_addr, half *output_addr, cudaStream_t cuda_stream) { + if (size % 2 == 0) { + GeluKernel<<>>( + size / 2, reinterpret_cast(input_addr), reinterpret_cast(output_addr)); + } else { + GeluKernel<<>>(size, input_addr, output_addr); + } + return; +} -template -__global__ void GeluGradKernel(size_t size, T* dy_addr, T* x_addr, T* dx_addr) { +template +__global__ void GeluGradKernel(size_t size, T *dy_addr, T *x_addr, T *dx_addr) { // formula: // dx = dy * y' // y' = 0.5 * (1 + tanh(tanh_para)) + @@ -48,18 +78,59 @@ __global__ void GeluGradKernel(size_t size, T* dy_addr, T* x_addr, T* dx_addr) { // mul_right = sqrt(2/pi) * (1 + 3 * 0.044715 * x^2)) for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim.x) { T x = x_addr[pos]; - T tanh_res = tanh(0.7978845608 * (x + 0.044715 * x * x * x)); - T mul_right = 0.7978845608 + 0.1070322244 * x * x; - T y_res = 0.5 * (1 + tanh_res) + 0.5 * x * (1 - tanh_res * tanh_res) * mul_right; + T tanh_res = tanh(0.7978845608 * (x + 0.044715 * x * x * x)); + T mul_right = 0.7978845608 + 0.1070322244 * x * x; + T y_res = 0.5 * (1.0 + tanh_res) + 0.5 * x * (1.0 - tanh_res * tanh_res) * mul_right; + dx_addr[pos] = dy_addr[pos] * y_res; + } +} + +template +__global__ void GeluGradKernel(size_t size, half2 *dy_addr, half2 *x_addr, half2 *dx_addr) { + for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim.x) { + half2 x = x_addr[pos]; + float2 tanh_param = __half22float2(half2(0.7978845608, 0.7978845608) * (x + half2(0.044715, 0.044715) * x * x * x)); + float2 tanh_res; + tanh_res.x = tanh(tanh_param.x); + tanh_res.y = tanh(tanh_param.y); + half2 tanh_res_half = __float22half2_rn(tanh_res); + half2 mul_right = half2(0.7978845608, 0.7978845608) + half2(0.1070322244, 0.1070322244) * x * x; + half2 y_res = half2(0.5, 0.5) * (half2(1.0, 1.0) + tanh_res_half) + + half2(0.5, 0.5) * x * (half2(1.0, 1.0) - tanh_res_half * tanh_res_half) * mul_right; + dx_addr[pos] = dy_addr[pos] * y_res; + } +} + +template +__global__ void GeluGradKernel(size_t size, half *dy_addr, half *x_addr, half *dx_addr) { + for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim.x) { + half x = x_addr[pos]; + half tanh_param = half(0.7978845608) * (x + half(0.044715) * x * x * x); + half tanh_res = __float2half_rn(tanh(__half2float(tanh_param))); + half mul_right = half(0.7978845608) + half(0.1070322244) * x * x; + half y_res = half(0.5) * (half(1.0) + tanh_res) + half(0.5) * x * (half(1.0) - tanh_res * tanh_res) * mul_right; dx_addr[pos] = dy_addr[pos] * y_res; } } -template -void GeluGradKernel(size_t size, T* dy_addr, T* x_addr, T* dx_addr, cudaStream_t cuda_stream) { +template +void GeluGradKernel(size_t size, T *dy_addr, T *x_addr, T *dx_addr, cudaStream_t cuda_stream) { GeluGradKernel<<>>(size, dy_addr, x_addr, dx_addr); } +template <> +void GeluGradKernel(size_t size, half *dy_addr, half *x_addr, half *dx_addr, cudaStream_t cuda_stream) { + if (size % 2 == 0) { + GeluGradKernel<<>>( + size / 2, reinterpret_cast(dy_addr), reinterpret_cast(x_addr), + reinterpret_cast(dx_addr)); + } else { + GeluGradKernel<<>>(size, dy_addr, x_addr, dx_addr); + } + return; +} -template void Gelu(size_t size, float* input_addr, float* output_addr, cudaStream_t cuda_stream); -template void GeluGradKernel(size_t size, float* dy_addr, float* x_addr, float* dx_addr, cudaStream_t cuda_stream); +template void Gelu(size_t size, float *input_addr, float *output_addr, cudaStream_t cuda_stream); +template void Gelu(size_t size, half *input_addr, half *output_addr, cudaStream_t cuda_stream); +template void GeluGradKernel(size_t size, float *dy_addr, float *x_addr, float *dx_addr, cudaStream_t cuda_stream); +template void GeluGradKernel(size_t size, half *dy_addr, half *x_addr, half *dx_addr, cudaStream_t cuda_stream); diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cu b/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cu index f8377fd721..e887b98eca 100644 --- a/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cu +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cu @@ -18,10 +18,21 @@ #include #include #include "kernel/gpu/cuda_impl/layer_norm_grad_impl.cuh" +#include "kernel/gpu/cuda_impl/layer_norm_impl.cuh" constexpr int NUM_PER_THREAD_REDUCE = 4; constexpr int WARP_SIZE = 32; +template +inline __device__ T my_pow(T a, double b) { + return pow(a, static_cast(b)); +} + +template <> +inline __device__ half my_pow(half a, double b) { + return __float2half(pow(__half2float(a), static_cast(b))); +} + template inline __device__ void GammaAndBetaThreadReduce(const int& col, const int& row_dim, const int& col_dim, const T& epsilon, const T* dy, const T* x, const T* mean, const T* var, @@ -35,7 +46,7 @@ inline __device__ void GammaAndBetaThreadReduce(const int& col, const int& row_d } int pos = row * col_dim + col; - dg[0] += dy[pos] * pow(var[row] + epsilon, -0.5) * (x[pos] - mean[row]); + dg[0] += dy[pos] * my_pow(var[row] + epsilon, -0.5) * (x[pos] - mean[row]); db[0] += dy[pos]; } } @@ -58,26 +69,26 @@ inline __device__ void GammaAndBetaBlockReduce(const int& col, const int& row_di // load data to share memory // thread(0, 32, 64, 96, ...) keep the data - extern __shared__ T share_mem[]; + DynamicSharedMem share_mem; if (threadIdx.x % WARP_SIZE == 0) { int offset = threadIdx.x / WARP_SIZE * 2; - share_mem[offset] = dg[0]; - share_mem[offset + 1] = db[0]; + share_mem.addr()[offset] = dg[0]; + share_mem.addr()[offset + 1] = db[0]; } __syncthreads(); for (int stride = blockDim.x / WARP_SIZE / 2; stride > 0; stride >>= 1) { if (threadIdx.x < stride) { int offset = (threadIdx.x + stride) * 2; - share_mem[threadIdx.x * 2] += share_mem[offset]; - share_mem[threadIdx.x * 2 + 1] += share_mem[offset + 1]; + share_mem.addr()[threadIdx.x * 2] += share_mem.addr()[offset]; + share_mem.addr()[threadIdx.x * 2 + 1] += share_mem.addr()[offset + 1]; } } __syncthreads(); if (threadIdx.x == 0) { - dg_addr[col] = share_mem[0]; - db_addr[col] = share_mem[1]; + dg_addr[col] = share_mem.addr()[0]; + db_addr[col] = share_mem.addr()[1]; } } @@ -114,13 +125,37 @@ inline __device__ void InputThreadReduce(const int& row, const int& col_dim, con T v1 = dy[pos] * gamma[gamma_offset]; T v2 = x[pos] - mean[row]; - sum1[0] += -0.5 * v1 * v2 * pow(var[row] + epsilon, -1.5); + sum1[0] += -0.5 * v1 * v2 * my_pow(var[row] + epsilon, -1.5); sum2[0] += v1; sum3[0] += -2.0 * v2; } } } +template <> +inline __device__ void InputThreadReduce(const int& row, const int& col_dim, const int& param_dim, const half& epsilon, + half* sum1, half* sum2, half* sum3, const half* dy, const half* x, + const half* mean, const half* var, const half* gamma) { + int loop_num = (col_dim + NUM_PER_THREAD_REDUCE - 1) / NUM_PER_THREAD_REDUCE; + for (int i = threadIdx.x; i < loop_num; i += blockDim.x) { + for (int j = 0; j < NUM_PER_THREAD_REDUCE; j++) { + int col = NUM_PER_THREAD_REDUCE * i + j; + if (col >= col_dim) { + return; + } + + int pos = row * col_dim + col; + int gamma_offset = pos % param_dim; + half v1 = dy[pos] * gamma[gamma_offset]; + half v2 = x[pos] - mean[row]; + + sum1[0] += __float2half(-0.5) * v1 * v2 * my_pow(var[row] + epsilon, -1.5); + sum2[0] += v1; + sum3[0] += __float2half(-2.0) * v2; + } + } +} + template inline __device__ void InputWarpReduce(T* sum1, T* sum2, T* sum3) { for (int delta = (WARP_SIZE >> 1); delta > 0; delta >>= 1) { @@ -166,12 +201,28 @@ inline __device__ void InputProp(const int& row, const int& col_dim, const int& int gamma_offset = pos % param_dim; T v1 = dy[pos] * gamma[gamma_offset]; T v2 = x[pos] - mean[row]; - T v3 = pow(var[row] + epsilon, -0.5); + T v3 = my_pow(var[row] + epsilon, -0.5); dx[pos] = v1 * v3 + share_mem[0] * (2.0 / col_dim) * v2 + (-1.0 * v3 * share_mem[1] + (1.0 / col_dim) * share_mem[0] * share_mem[2]) * (1.0 / col_dim); } } +template <> +inline __device__ void InputProp(const int& row, const int& col_dim, const int& param_dim, const half& epsilon, + const half* dy, const half* x, const half* mean, const half* var, const half* gamma, + half* dx, const half* share_mem) { + for (int col = threadIdx.x; col < col_dim; col += blockDim.x) { + int pos = (row * col_dim + col); + int gamma_offset = pos % param_dim; + half v1 = dy[pos] * gamma[gamma_offset]; + half v2 = x[pos] - mean[row]; + half v3 = my_pow(var[row] + epsilon, -0.5); + dx[pos] = v1 * v3 + share_mem[0] * __float2half(2.0 / col_dim) * v2 + + (__float2half(-1.0) * v3 * share_mem[1] + __float2half(1.0 / col_dim) * share_mem[0] * share_mem[2])\ + * __float2half(1.0 / col_dim); + } +} + template __global__ void InputPropKernel(const int row_dim, const int col_dim, const int param_dim, const T epsilon, const T* dy, const T* x, const T* mean, const T* var, const T* gamma, T* dx) { @@ -179,27 +230,30 @@ __global__ void InputPropKernel(const int row_dim, const int col_dim, const int T sum1 = 0; T sum2 = 0; T sum3 = 0; - extern __shared__ T share_mem[]; + DynamicSharedMem share_mem; InputThreadReduce(row, col_dim, param_dim, epsilon, &sum1, &sum2, &sum3, dy, x, mean, var, gamma); InputWarpReduce(&sum1, &sum2, &sum3); - InputBlockReduce(col_dim, &sum1, &sum2, &sum3, share_mem); - InputProp(row, col_dim, param_dim, epsilon, dy, x, mean, var, gamma, dx, share_mem); + InputBlockReduce(col_dim, &sum1, &sum2, &sum3, share_mem.addr()); + InputProp(row, col_dim, param_dim, epsilon, dy, x, mean, var, gamma, dx, share_mem.addr()); } } template void LayerNormGrad(const int& row_dim, const int& col_dim, const int& param_dim, const T& epsilon, const T* dy, const T* x, const T* mean, const T* var, const T* gamma, T* dx, T* dg, T* db, cudaStream_t stream) { - int share_mem = + int share_mem_size = ((col_dim + NUM_PER_THREAD_REDUCE - 1) / NUM_PER_THREAD_REDUCE + WARP_SIZE - 1) / WARP_SIZE * 3 * sizeof(T); - InputPropKernel<<>>(row_dim, col_dim, param_dim, epsilon, dy, x, mean, var, gamma, - dx); + InputPropKernel<<>>(row_dim, col_dim, param_dim, epsilon, dy, x, mean, var, + gamma, dx); - share_mem = + share_mem_size = ((row_dim + NUM_PER_THREAD_REDUCE - 1) / NUM_PER_THREAD_REDUCE + WARP_SIZE - 1) / WARP_SIZE * 2 * sizeof(T); - GammaAndBetaPropKernel<<>>(row_dim, col_dim, epsilon, dy, x, mean, var, dg, db); + GammaAndBetaPropKernel<<>>(row_dim, col_dim, epsilon, dy, x, mean, var, dg, db); } template void LayerNormGrad(const int& row_dim, const int& col_dim, const int& param_dim, const float& epsilon, const float* dy, const float* x, const float* mean, const float* var, const float* gamma, float* dx, float* dg, float* db, cudaStream_t stream); +template void LayerNormGrad(const int& row_dim, const int& col_dim, const int& param_dim, const half& epsilon, + const half* dy, const half* x, const half* mean, const half* var, const half* gamma, + half* dx, half* dg, half* db, cudaStream_t stream); diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cu b/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cu index db33673744..cfb60f0ba6 100644 --- a/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cu +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cu @@ -23,7 +23,7 @@ constexpr int NUM_PER_THREAD_REDUCE = 4; constexpr int WARP_SIZE = 32; template -inline __device__ void MeanAndVarAccumulation(T* mean, T* var, T* num, const T& val) { +inline __device__ void MeanAndVarAccumulation(T *mean, T *var, T *num, const T &val) { // Welford Algorithm: // \mu_k = \mu_{k-1} + (x_k - \mu_{k-1})/k // \sigma_k^2 = \sigma_{k-1}^2 + (x_k - \mu_{k-1}) * (x_k - \mu_k) @@ -34,8 +34,9 @@ inline __device__ void MeanAndVarAccumulation(T* mean, T* var, T* num, const T& } template -inline __device__ void MeanAndVarMerge(T* m1, T* v1, T* n1, const T& m2, const T& v2, const T& n2) { - if (n2 == 0) { +inline __device__ void MeanAndVarMerge(T *m1, T *v1, T *n1, const T &m2, const T &v2, const T &n2) { + T zero = 0; + if (n2 == zero) { return; } @@ -46,7 +47,7 @@ inline __device__ void MeanAndVarMerge(T* m1, T* v1, T* n1, const T& m2, const T } template -inline __device__ void ThreadReduce(const int& col_dim, const T* block_addr, T* mean, T* var, T* num) { +inline __device__ void ThreadReduce(const int &col_dim, const T *block_addr, T *mean, T *var, T *num) { int loop_num = (col_dim + NUM_PER_THREAD_REDUCE - 1) / NUM_PER_THREAD_REDUCE; for (int i = threadIdx.x; i < loop_num; i += blockDim.x) { for (int j = 0; j < NUM_PER_THREAD_REDUCE; j++) { @@ -60,7 +61,7 @@ inline __device__ void ThreadReduce(const int& col_dim, const T* block_addr, T* } template -inline __device__ void WarpReduce(T* mean, T* var, T* num) { +inline __device__ void WarpReduce(T *mean, T *var, T *num) { for (int delta = (WARP_SIZE >> 1); delta > 0; delta >>= 1) { T mean_other = __shfl_down_sync(0xffffffff, mean[0], delta); T var_other = __shfl_down_sync(0xffffffff, var[0], delta); @@ -70,8 +71,8 @@ inline __device__ void WarpReduce(T* mean, T* var, T* num) { } template -inline __device__ void BlockReduce(const int& col_dim, T* mean, T* var, T* num, T* mean_addr, T* var_addr, - T* share_mem) { +inline __device__ void BlockReduce(const int &col_dim, T *mean, T *var, T *num, T *mean_addr, T *var_addr, + T *share_mem) { if (threadIdx.x >= col_dim) { return; } @@ -96,15 +97,15 @@ inline __device__ void BlockReduce(const int& col_dim, T* mean, T* var, T* num, __syncthreads(); if (threadIdx.x == 0) { - mean_addr[blockIdx.x] = share_mem[0]; // todo: blockDim.x < row + mean_addr[blockIdx.x] = share_mem[0]; share_mem[1] /= col_dim; var_addr[blockIdx.x] = share_mem[1]; } } template -inline __device__ void LayerNorm(const int& row, const int& col_dim, const int& param_dim, const T* x, - const T* share_mem, const T* gamma, const T* beta, const T epsilon, T* y) { +inline __device__ void LayerNorm(const int &row, const int &col_dim, const int ¶m_dim, const T *x, + const T *share_mem, const T *gamma, const T *beta, const T epsilon, T *y) { for (int col = threadIdx.x; col < col_dim; col += blockDim.x) { int pos = row * col_dim + col; int i = pos % param_dim; @@ -112,37 +113,51 @@ inline __device__ void LayerNorm(const int& row, const int& col_dim, const int& } } +template <> +inline __device__ void LayerNorm(const int &row, const int &col_dim, const int ¶m_dim, const half *x, + const half *share_mem, const half *gamma, const half *beta, const half epsilon, + half *y) { + for (int col = threadIdx.x; col < col_dim; col += blockDim.x) { + int pos = row * col_dim + col; + int i = pos % param_dim; + y[pos] = (x[pos] - share_mem[0]) / hsqrt(share_mem[1] + epsilon) * gamma[i] + beta[i]; + } +} + template -__global__ void LayerNormKernel(const int row_dim, const int col_dim, const int param_dim, const T epsilon, const T* x, - const T* gamma, const T* beta, T* y, T* mean_addr, T* var_addr) { +__global__ void LayerNormKernel(const int row_dim, const int col_dim, const int param_dim, const T epsilon, const T *x, + const T *gamma, const T *beta, T *y, T *mean_addr, T *var_addr) { for (auto row = blockIdx.x; row < row_dim; row += gridDim.x) { T mean = 0; T var = 0; T num = 0; - const T* block_addr = x + row * col_dim; - extern __shared__ T share_mem[]; + const T *block_addr = x + row * col_dim; + DynamicSharedMem share_mem; ThreadReduce(col_dim, block_addr, &mean, &var, &num); WarpReduce(&mean, &var, &num); - BlockReduce(col_dim, &mean, &var, &num, mean_addr, var_addr, share_mem); + BlockReduce(col_dim, &mean, &var, &num, mean_addr, var_addr, share_mem.addr()); __syncthreads(); - LayerNorm(row, col_dim, param_dim, x, share_mem, gamma, beta, epsilon, y); + LayerNorm(row, col_dim, param_dim, x, share_mem.addr(), gamma, beta, epsilon, y); } } template -void LayerNorm(const int& row_dim, const int& col_dim, const int& param_dim, const T& epsilon, const T* x, - const T* gamma, const T* beta, T* y, T* mean, T* var, cudaStream_t stream) { +void LayerNorm(const int &row_dim, const int &col_dim, const int ¶m_dim, const T &epsilon, const T *x, + const T *gamma, const T *beta, T *y, T *mean, T *var, cudaStream_t stream) { const dim3 block(row_dim); const dim3 thread(256); // keep the mean/var/num after warp reduce - int share_mem = + int share_mem_size = ((col_dim + NUM_PER_THREAD_REDUCE - 1) / NUM_PER_THREAD_REDUCE + WARP_SIZE - 1) / WARP_SIZE * 3 * sizeof(T); - LayerNormKernel<<>>(row_dim, col_dim, param_dim, epsilon, x, gamma, beta, y, mean, - var); + LayerNormKernel<<>>(row_dim, col_dim, param_dim, epsilon, x, gamma, beta, y, + mean, var); } -template void LayerNorm(const int& row_dim, const int& col_dim, const int& param_dim, const float& epsilon, - const float* x, const float* gamma, const float* beta, float* y, float* mean, float* var, +template void LayerNorm(const int &row_dim, const int &col_dim, const int ¶m_dim, const float &epsilon, + const float *x, const float *gamma, const float *beta, float *y, float *mean, float *var, + cudaStream_t stream); +template void LayerNorm(const int &row_dim, const int &col_dim, const int ¶m_dim, const half &epsilon, + const half *x, const half *gamma, const half *beta, half *y, half *mean, half *var, cudaStream_t stream); diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cuh b/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cuh index 4832b08746..c06a698384 100644 --- a/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cuh +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cuh @@ -19,6 +19,23 @@ #include "device/gpu/cuda_common.h" +template +struct DynamicSharedMem; +template<> +struct DynamicSharedMem { + __device__ float *addr() { + extern __shared__ float addr_float[]; + return addr_float; + } +}; +template<> +struct DynamicSharedMem { + __device__ half *addr() { + extern __shared__ half addr_half[]; + return addr_half; + } +}; + template void LayerNorm(const int& outer, const int& inner, const int& param_dim, const T& epsilon, const T* x, const T* gamma, const T* beta, T* y, T* mean, T* var, cudaStream_t stream); diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/minmax_update_impl.cu b/mindspore/ccsrc/kernel/gpu/cuda_impl/minmax_update_impl.cu new file mode 100644 index 0000000000..27b2cb0232 --- /dev/null +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/minmax_update_impl.cu @@ -0,0 +1,87 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include "minmax_update_impl.cuh" +#include "device/gpu/cuda_common.h" + +__global__ void UpdateInputMinMaxPerLayerWithEMA(const float *input_min, const float *input_max, float *output_min, + float *output_max, const float min, const float max, + const float decay) { + output_min[0] = decay * (min) + (1 - decay) * (input_min[0]); + output_min[0] = input_min[0] > 0 ? 0 : input_min[0]; + output_max[0] = decay * (max) + (1 - decay) * (input_max[0]); + output_max[0] = input_max[0] < 0 ? 0 : input_max[0]; + return; +} + +__global__ void UpdateInputMinMaxPerLayer(float *output_min, float *output_max, const float min, const float max) { + output_min[0] = min > 0 ? 0 : min; + output_max[0] = max < 0 ? 0 : max; + return; +} + +__global__ void UpdateInputMinMaxPerChannel(float *input, float *input_min, float *input_max, float *output_min, + float *output_max, int channels, int per_channel_nums, bool ema, + float ema_decay) { + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < channels; i += blockDim.x * gridDim.x) { + thrust::pair sum = + thrust::minmax_element(thrust::device, input + i * per_channel_nums, input + per_channel_nums * (i + 1)); + if (ema) { + output_min[i] = ema_decay * sum.first[0] + (1 - ema_decay) * input_min[i]; + output_max[i] = ema_decay * sum.second[0] + (1 - ema_decay) * input_max[i]; + } else { + output_min[i] = sum.first[0]; + output_max[i] = sum.second[0]; + } + output_min[i] = input_min[i] > 0 ? 0 : input_min[i]; + output_max[i] = input_max[i] < 0 ? 0 : input_max[i]; + } + return; +} + +void CalMinMaxPerChannel(float *input, float *input_min, float *input_max, float *output_min, float *output_max, + const int total_num, const int channel_num, const float ema_decay, const bool ema, + cudaStream_t cuda_stream) { + int per_channel_num = total_num / channel_num; + UpdateInputMinMaxPerChannel<<>>( + input, input_min, input_max, output_min, output_max, channel_num, per_channel_num, ema, ema_decay); + return; +} + +void CalMinMaxPerLayer(float *input, float *input_min, float *input_max, float *output_min, float *output_max, + const int total_num, const float ema_decay, const bool ema, cudaStream_t cuda_stream) { + float minel = 0.f; + float maxel = 0.f; + auto policy = thrust::cuda::par.on(cuda_stream); + thrust::pair, thrust::device_ptr> tuple; + tuple = + thrust::minmax_element(policy, thrust::device_pointer_cast(input), thrust::device_pointer_cast(input) + total_num); + minel = tuple.first[0]; + maxel = tuple.second[0]; + + if (ema) { + UpdateInputMinMaxPerLayerWithEMA<<<1, 1, 0, cuda_stream>>>(input_min, input_max, output_min, output_max, minel, + maxel, ema_decay); + } else { + UpdateInputMinMaxPerLayer<<<1, 1, 0, cuda_stream>>>(output_min, output_max, minel, maxel); + } + return; +} diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/minmax_update_impl.cuh b/mindspore/ccsrc/kernel/gpu/cuda_impl/minmax_update_impl.cuh new file mode 100644 index 0000000000..5e9becab38 --- /dev/null +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/minmax_update_impl.cuh @@ -0,0 +1,29 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_MIN_MAX_UPDATE_IMPL_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_MIN_MAX_UPDATE_IMPL_H_ + +#include "device/gpu/cuda_common.h" + +void CalMinMaxPerChannel(float *input, float *input_min, float *input_max, float *output_min, float *output_max, + const int total_num, const int channel_num, const float ema_decay, const bool ema, + cudaStream_t cuda_stream); + +void CalMinMaxPerLayer(float *input, float *input_min, float *input_max, float *output_min, float *output_max, + const int size, const float ema_decay, const bool ema, cudaStream_t cuda_stream); + +#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_MIN_MAX_UPDATE_IMPL_H_ diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/momentum_impl.cu b/mindspore/ccsrc/kernel/gpu/cuda_impl/momentum_impl.cu index ae24a8dec9..5a1c9eb687 100755 --- a/mindspore/ccsrc/kernel/gpu/cuda_impl/momentum_impl.cu +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/momentum_impl.cu @@ -15,25 +15,38 @@ */ #include "momentum_impl.cuh" -template -__global__ void MomentumUpdateVariableKernel(const size_t size, T *variable, T *accumulation, const T *learning_rate, - const T *gradient, const T *momentum) { +template +__global__ void MomentumUpdateVariableKernel(const size_t size, T *variable, T *accumulation, const S *learning_rate, + const T *gradient, const S *momentum) { for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (size); i += blockDim.x * gridDim.x) { accumulation[i] = momentum[0] * accumulation[i] + gradient[i]; variable[i] -= learning_rate[0] * accumulation[i]; } return; } -template -void MomentumUpdateVariable(const size_t size, T *variable, T *accumulation, const T *learning_rate, const T *gradient, - const T *momentum, cudaStream_t cuda_stream) { +template <> +__global__ void MomentumUpdateVariableKernel(const size_t size, half *variable, half *accumulation, + const float *learning_rate, const half *gradient, + const float *momentum) { + for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (size); i += blockDim.x * gridDim.x) { + accumulation[i] = __float2half(momentum[0]) * accumulation[i] + gradient[i]; + variable[i] -= __float2half(learning_rate[0]) * accumulation[i]; + } + return; +} +template +void MomentumUpdateVariable(const size_t size, T *variable, T *accumulation, const S *learning_rate, const T *gradient, + const S *momentum, cudaStream_t cuda_stream) { MomentumUpdateVariableKernel<<>>(size, variable, accumulation, learning_rate, gradient, momentum); return; } -template void MomentumUpdateVariable(const size_t size, float *variable, float *accumulation, - const float *learning_rate, const float *gradient, const float *momentum, - cudaStream_t cuda_stream); -template void MomentumUpdateVariable(const size_t size, half *variable, half *accumulation, - const half *learning_rate, const half *gradient, const half *momentum, - cudaStream_t cuda_stream); +template void MomentumUpdateVariable(const size_t size, float *variable, float *accumulation, + const float *learning_rate, const float *gradient, + const float *momentum, cudaStream_t cuda_stream); +template void MomentumUpdateVariable(const size_t size, half *variable, half *accumulation, + const half *learning_rate, const half *gradient, + const half *momentum, cudaStream_t cuda_stream); +template void MomentumUpdateVariable(const size_t size, half *variable, half *accumulation, + const float *learning_rate, const half *gradient, + const float *momentum, cudaStream_t cuda_stream); diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/momentum_impl.cuh b/mindspore/ccsrc/kernel/gpu/cuda_impl/momentum_impl.cuh index 2993e04ff3..5405f5ef1d 100755 --- a/mindspore/ccsrc/kernel/gpu/cuda_impl/momentum_impl.cuh +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/momentum_impl.cuh @@ -18,8 +18,8 @@ #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_MOMENTUMIMPL_H_ #include "device/gpu/cuda_common.h" -template -void MomentumUpdateVariable(const size_t size, T *variable, T *accumulation, const T *learning_rate, const T *gradient, - const T *momentum, cudaStream_t cuda_stream); +template +void MomentumUpdateVariable(const size_t size, T *variable, T *accumulation, const S *learning_rate, const T *gradient, + const S *momentum, cudaStream_t cuda_stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_MOMENTUMIMPL_H_ diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/rmsprop_impl.cu b/mindspore/ccsrc/kernel/gpu/cuda_impl/rmsprop_impl.cu index 31a4d97dff..913aaa3b8d 100644 --- a/mindspore/ccsrc/kernel/gpu/cuda_impl/rmsprop_impl.cu +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/rmsprop_impl.cu @@ -19,17 +19,17 @@ #include "device/gpu/cuda_common.h" template -__global__ void RmsPropKernel(const T* learning_rate, const T* decay, const T* momentum, const T* epsilon, T* variable, +__global__ void RmsPropKernel(const T* learning_rate, const T decay, const T momentum, const T epsilon, T* variable, T* mean_square, T*moment, T* gradients, const size_t size) { for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (size); i += blockDim.x * gridDim.x) { - mean_square[i] = decay[0] * mean_square[i] + (1.0 - decay[0]) * gradients[i] * gradients[i]; - moment[i] = momentum[0] * moment[i] + learning_rate[0] * rsqrt(mean_square[i] + epsilon[0]) * gradients[i]; + mean_square[i] = decay * mean_square[i] + (1.0 - decay) * gradients[i] * gradients[i]; + moment[i] = momentum * moment[i] + learning_rate[0] * rsqrt(mean_square[i] + epsilon) * gradients[i]; variable[i] -= moment[i]; } } template -void RmsProp(const T* learning_rate, const T* decay, const T* momentum, const T* epsilon, +void RmsProp(const T* learning_rate, const T decay, const T momentum, const T epsilon, T* variable, T* mean_square, T* moment, T* gradients, const size_t size, cudaStream_t cuda_stream) { RmsPropKernel<<>>(learning_rate, decay, momentum, epsilon, variable, mean_square, moment, gradients, size); @@ -58,7 +58,7 @@ void RmsPropCenter(const T* learning_rate, const T* decay, const T* momentum, co } template -void RmsProp(const float* learning_rate, const float* decay, const float* momentum, const float* epsilon, +void RmsProp(const float* learning_rate, const float decay, const float momentum, const float epsilon, float* variable, float* mean_square, float* moment, float* gradients, const size_t size, cudaStream_t cuda_stream); diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/rmsprop_impl.cuh b/mindspore/ccsrc/kernel/gpu/cuda_impl/rmsprop_impl.cuh index 62d7e19ba2..b5802dbb67 100644 --- a/mindspore/ccsrc/kernel/gpu/cuda_impl/rmsprop_impl.cuh +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/rmsprop_impl.cuh @@ -19,7 +19,7 @@ #include "device/gpu/cuda_common.h" template -void RmsProp(const T* learning_rate, const T* decay, const T* momentum, const T* epsilon, T* variable, T* mean_square, +void RmsProp(const T* learning_rate, const T decay, const T momentum, const T epsilon, T* variable, T* mean_square, T* moment, T* gradients, const size_t size, cudaStream_t cuda_stream); template diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cu b/mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cu new file mode 100644 index 0000000000..a0082b84c8 --- /dev/null +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cu @@ -0,0 +1,41 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cuh" + +template +__global__ void SigmoidCrossEntropyWithLogitsGradKernel(const size_t size, const T *logits, const S *labels, + T *outputs) { + for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { + if (logits[i] >= 0) { + outputs[i] = 1. / (1. + exp(-logits[i])) - labels[i]; + } else { + const T exp_val = exp(logits[i]); + outputs[i] = exp_val / (1. + exp_val) - labels[i]; + } + } +} + +template +void SigmoidCrossEntropyWithLogitsGrad(const size_t size, const T *logits, const S *labels, T *outputs, + cudaStream_t cuda_stream) { + SigmoidCrossEntropyWithLogitsGradKernel<<>>(size, logits, labels, + outputs); +} + +template void SigmoidCrossEntropyWithLogitsGrad(const size_t size, const float *logits, + const float *labels, float *outputs, + cudaStream_t cuda_stream); diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cuh b/mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cuh new file mode 100644 index 0000000000..2cd4922d25 --- /dev/null +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cuh @@ -0,0 +1,25 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_GRAD_IMPL_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_GRAD_IMPL_H_ + +#include "device/gpu/cuda_common.h" +template +void SigmoidCrossEntropyWithLogitsGrad(const size_t size, const T *logits, const S *labels, T *outputs, + cudaStream_t cuda_stream); + +#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_GRAD_IMPL_H_ diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cu b/mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cu new file mode 100644 index 0000000000..3766f367db --- /dev/null +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cu @@ -0,0 +1,34 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cuh" + +template +__global__ void SigmoidCrossEntropyWithLogitsKernel(const size_t size, const T *logits, const S *labels, T *outputs) { + for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { + const T reverse_factor = static_cast(logits[i] >= 0); + outputs[i] = log1p(exp(logits[i] - 2 * reverse_factor * logits[i])) - logits[i] * (labels[i] - reverse_factor); + } +} + +template +void SigmoidCrossEntropyWithLogits(const size_t size, const T *logits, const S *labels, T *outputs, + cudaStream_t cuda_stream) { + SigmoidCrossEntropyWithLogitsKernel<<>>(size, logits, labels, outputs); +} + +template void SigmoidCrossEntropyWithLogits(const size_t size, const float *logits, const float *labels, + float *outputs, cudaStream_t cuda_stream); diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cuh b/mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cuh new file mode 100644 index 0000000000..575605bde0 --- /dev/null +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cuh @@ -0,0 +1,25 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_IMPL_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_IMPL_H_ + +#include "device/gpu/cuda_common.h" +template +void SigmoidCrossEntropyWithLogits(const size_t size, const T *logits, const S *labels, T *outputs, + cudaStream_t cuda_stream); + +#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_IMPL_H_ diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/tanh_impl.cu b/mindspore/ccsrc/kernel/gpu/cuda_impl/tanh_impl.cu deleted file mode 100644 index 5471ffb5d9..0000000000 --- a/mindspore/ccsrc/kernel/gpu/cuda_impl/tanh_impl.cu +++ /dev/null @@ -1,46 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernel/gpu/cuda_impl/tanh_impl.cuh" -#include - -template -__global__ void TanhKernel(const size_t size, const T* x_addr, T* y_addr) { - for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x) { - y_addr[pos] = tanh(x_addr[pos]); - } -} - -template -__global__ void TanhGradKernel(const size_t size, const T* y_addr, const T* dy_addr, T* dx_addr) { - for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x) { - dx_addr[pos] = dy_addr[pos] * (1 - y_addr[pos] * y_addr[pos]); - } -} - -template -void Tanh(const size_t size, const T* x_addr, T* y_addr, cudaStream_t cuda_stream) { - TanhKernel<<>>(size, x_addr, y_addr); -} - -template -void TanhGrad(const size_t size, const T* y_addr, const T* dy_addr, T* dx_addr, cudaStream_t cuda_stream) { - TanhGradKernel<<>>(size, y_addr, dy_addr, dx_addr); -} - -template void Tanh(const size_t size, const float* x_addr, float* y_addr, cudaStream_t cuda_stream); -template void TanhGrad(const size_t size, const float* y_addr, const float* dy_addr, - float* dx_addr, cudaStream_t cuda_stream); diff --git a/mindspore/ccsrc/kernel/gpu/data/dataset_iterator_kernel.cc b/mindspore/ccsrc/kernel/gpu/data/dataset_iterator_kernel.cc index d416d7df67..13ca191b0b 100644 --- a/mindspore/ccsrc/kernel/gpu/data/dataset_iterator_kernel.cc +++ b/mindspore/ccsrc/kernel/gpu/data/dataset_iterator_kernel.cc @@ -96,7 +96,8 @@ bool DatasetIteratorKernel::Launch(const std::vector &, const std::v } for (size_t i = 0; i < output_size_list_.size(); i++) { - CHECK_CUDA_RET_WITH_EXCEPT(cudaMemcpyAsync(outputs[i]->addr, addr, output_size_list_[i], cudaMemcpyDeviceToDevice, + void *output_addr = GetDeviceAddress(outputs, i); + CHECK_CUDA_RET_WITH_EXCEPT(cudaMemcpyAsync(output_addr, addr, output_size_list_[i], cudaMemcpyDeviceToDevice, reinterpret_cast(stream)), "Cuda Memcpy Failed"); addr = reinterpret_cast(addr) + output_size_list_[i]; diff --git a/mindspore/ccsrc/kernel/gpu/gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/gpu_kernel.h index 9f8090451f..c935798f06 100644 --- a/mindspore/ccsrc/kernel/gpu/gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/gpu_kernel.h @@ -22,6 +22,7 @@ #include #include #include "kernel/kernel.h" +#include "kernel/gpu/kernel_constants.h" #include "device/gpu/gpu_device_manager.h" #include "device/gpu/gpu_common.h" #include "session/anf_runtime_algorithm.h" @@ -63,6 +64,9 @@ class GpuKernel : public KernelMod { } // expand Nd Shape to 4d (N in [0,4]) void ShapeNdTo4d(const std::vector &src, std::vector *dst) { + if (src.size() > 4) { + MS_EXCEPTION(ValueError) << src.size() << "-D data is not supported!"; + } dst->push_back(src.size() < 4 ? 1 : SizeToInt(src[src.size() - 4])); dst->push_back(src.size() < 3 ? 1 : SizeToInt(src[src.size() - 3])); dst->push_back(src.size() < 2 ? 1 : SizeToInt(src[src.size() - 2])); @@ -79,6 +83,22 @@ class GpuKernel : public KernelMod { "must match the corresponding dimension of outC or must be equal to 1."; } } + + // choose the suitable datatype for cudnn/cublas + inline cudnnDataType_t GetCudnnDataType(const std::string &Type) { + auto type = kCudnnDtypeMap.find(Type); + if (type == kCudnnDtypeMap.end()) { + MS_EXCEPTION(TypeError) << Type << " is not supported."; + } + return type->second; + } + inline cudaDataType_t GetCudaDataType(const std::string &Type) { + auto type = kCudaDtypeMap.find(Type); + if (type == kCudaDtypeMap.end()) { + MS_EXCEPTION(TypeError) << Type << " is not supported."; + } + return type->second; + } }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/gpu/math/addn_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/math/addn_gpu_kernel.h index 0b27602761..1498da777f 100644 --- a/mindspore/ccsrc/kernel/gpu/math/addn_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/math/addn_gpu_kernel.h @@ -60,7 +60,7 @@ class AddNGpuFwdKernel : public GpuKernel { } bool Init(const CNodePtr &kernel_node) override { InitResource(); - cudnn_data_type_ = kCudnnDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))]; + cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); num_input_ = GetAttr(kernel_node, "n"); if (IntToSize(num_input_) != input_num) { diff --git a/mindspore/ccsrc/kernel/gpu/math/bias_add_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/math/bias_add_gpu_kernel.h index 5d197e3cde..5a664db2e1 100644 --- a/mindspore/ccsrc/kernel/gpu/math/bias_add_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/math/bias_add_gpu_kernel.h @@ -67,7 +67,7 @@ class BiasAddGpuKernel : public GpuKernel { } bool Init(const CNodePtr &kernel_node) override { InitResource(); - cudnn_data_type_ = kCudnnDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))]; + cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); auto x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); auto num_dims = x_shape.size(); is_null_input_ = CHECK_NULL_INPUT(x_shape); diff --git a/mindspore/ccsrc/kernel/gpu/math/broadcast_gpu_kernel.cc b/mindspore/ccsrc/kernel/gpu/math/broadcast_gpu_kernel.cc index 15beef39d0..e299946780 100644 --- a/mindspore/ccsrc/kernel/gpu/math/broadcast_gpu_kernel.cc +++ b/mindspore/ccsrc/kernel/gpu/math/broadcast_gpu_kernel.cc @@ -47,6 +47,10 @@ MS_REG_GPU_KERNEL_TWO( MS_REG_GPU_KERNEL_TWO( Sub, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), BroadcastOpGpuKernel, float, float) +MS_REG_GPU_KERNEL_TWO( + TensorAdd, + KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + BroadcastOpGpuKernel, float, float) // fp16 MS_REG_GPU_KERNEL_TWO( @@ -77,5 +81,20 @@ MS_REG_GPU_KERNEL_TWO( MS_REG_GPU_KERNEL_TWO( Sub, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), BroadcastOpGpuKernel, half, half) +MS_REG_GPU_KERNEL_TWO( + TensorAdd, + KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), + BroadcastOpGpuKernel, half, half) + +// int32 +MS_REG_GPU_KERNEL_TWO( + TensorAdd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), + BroadcastOpGpuKernel, int, int) +MS_REG_GPU_KERNEL_TWO( + Minimum, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), + BroadcastOpGpuKernel, int, int) +MS_REG_GPU_KERNEL_TWO( + Maximum, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), + BroadcastOpGpuKernel, int, int) } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/gpu/math/broadcast_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/math/broadcast_gpu_kernel.h index c652d9aae4..be7d3a19d4 100644 --- a/mindspore/ccsrc/kernel/gpu/math/broadcast_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/math/broadcast_gpu_kernel.h @@ -68,14 +68,14 @@ class BroadcastOpGpuKernel : public GpuKernel { output_shape_[i] = shape3[i]; output_num_ *= shape3[i]; } - int offset = shape3.size() - shape1.size(); + int lhs_offset = shape3.size() - shape1.size(); for (size_t j = 0; j < shape1.size(); j++) { - lhs_shape_[j + offset] = shape1[j]; + lhs_shape_[j + lhs_offset] = shape1[j]; input1_num_ *= shape1[j]; } - offset = shape3.size() - shape2.size(); + int rhs_offset = shape3.size() - shape2.size(); for (size_t k = 0; k < shape2.size(); k++) { - rhs_shape_[k + offset] = shape2[k]; + rhs_shape_[k + rhs_offset] = shape2[k]; input2_num_ *= shape2[k]; } @@ -98,7 +98,7 @@ class BroadcastOpGpuKernel : public GpuKernel { static std::map kBroadcastTypeMap = { {"Greater", BROADCAST_TYPE_GREATER}, {"Less", BROADCAST_TYPE_LESS}, {"Maximum", BROADCAST_TYPE_MAXIMUM}, {"Minimum", BROADCAST_TYPE_MINIMUM}, {"Pow", BROADCAST_TYPE_POWER}, {"RealDiv", BROADCAST_TYPE_REALDIV}, - {"Mul", BROADCAST_TYPE_MUL}, {"Sub", BROADCAST_TYPE_SUB}, + {"Mul", BROADCAST_TYPE_MUL}, {"Sub", BROADCAST_TYPE_SUB}, {"TensorAdd", BROADCAST_TYPE_ADD}, }; auto iter = kBroadcastTypeMap.find(kernel_name); diff --git a/mindspore/ccsrc/kernel/gpu/math/broadcast_grad_gpu_kernel.cc b/mindspore/ccsrc/kernel/gpu/math/broadcast_grad_gpu_kernel.cc index edc51d4ffd..85598cf940 100644 --- a/mindspore/ccsrc/kernel/gpu/math/broadcast_grad_gpu_kernel.cc +++ b/mindspore/ccsrc/kernel/gpu/math/broadcast_grad_gpu_kernel.cc @@ -34,5 +34,21 @@ MS_REG_GPU_KERNEL_ONE(MaximumGrad, .AddOutputAttr(kNumberTypeFloat32) .AddOutputAttr(kNumberTypeFloat32), BroadcastOpGradGpuKernel, float) +MS_REG_GPU_KERNEL_ONE(MinimumGrad, + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeInt32), + BroadcastOpGradGpuKernel, int) +MS_REG_GPU_KERNEL_ONE(MaximumGrad, + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeInt32), + BroadcastOpGradGpuKernel, int) } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/gpu/math/broadcast_grad_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/math/broadcast_grad_gpu_kernel.h index 3e1f91b5b7..f1eb5fecf9 100644 --- a/mindspore/ccsrc/kernel/gpu/math/broadcast_grad_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/math/broadcast_grad_gpu_kernel.h @@ -74,14 +74,14 @@ class BroadcastOpGradGpuKernel : public GpuKernel { dy_shape_[i] = shape3[i]; output_num_ *= shape3[i]; } - int offset = shape3.size() - shape1.size(); + int x1_offset = shape3.size() - shape1.size(); for (size_t i = 0; i < shape1.size(); i++) { - x1_shape_[i + offset] = shape1[i]; + x1_shape_[i + x1_offset] = shape1[i]; input1_num_ *= shape1[i]; } - offset = shape3.size() - shape2.size(); + int x2_offset = shape3.size() - shape2.size(); for (size_t i = 0; i < shape2.size(); i++) { - x2_shape_[i + offset] = shape2[i]; + x2_shape_[i + x2_offset] = shape2[i]; input2_num_ *= shape2[i]; } diff --git a/mindspore/ccsrc/kernel/gpu/math/matmul_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/math/matmul_gpu_kernel.h index 59153c7041..3ee3493ed6 100644 --- a/mindspore/ccsrc/kernel/gpu/math/matmul_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/math/matmul_gpu_kernel.h @@ -82,9 +82,9 @@ class MatMulGpuKernel : public GpuKernel { } bool Init(const CNodePtr &kernel_node) override { handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCublasHandle(); - dtype_a_ = kCudaDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))]; - dtype_b_ = kCudaDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 1))]; - dtype_c_ = kCudaDtypeMap[TypeIdLabel(AnfAlgo::GetOutputDeviceDataType(kernel_node, 0))]; + dtype_a_ = GetCudaDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); + dtype_b_ = GetCudaDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 1))); + dtype_c_ = GetCudaDataType(TypeIdLabel(AnfAlgo::GetOutputDeviceDataType(kernel_node, 0))); auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); is_null_input_ = CHECK_NULL_INPUT(output_shape); if (is_null_input_) { diff --git a/mindspore/ccsrc/kernel/gpu/math/tensoradd_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/math/tensoradd_gpu_kernel.h deleted file mode 100644 index 67c6a34f3f..0000000000 --- a/mindspore/ccsrc/kernel/gpu/math/tensoradd_gpu_kernel.h +++ /dev/null @@ -1,171 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef MINDSPORE_CCSRC_KERNEL_GPU_TENSORADD_GPU_KERNEL_H_ -#define MINDSPORE_CCSRC_KERNEL_GPU_TENSORADD_GPU_KERNEL_H_ - -#include -#include "kernel/gpu/gpu_kernel.h" -#include "kernel/gpu/gpu_kernel_factory.h" -#include "kernel/gpu/kernel_constants.h" -namespace mindspore { -namespace kernel { -template -class TensorAddGpuFwdKernel : public GpuKernel { - public: - TensorAddGpuFwdKernel() - : cudnn_handle_(nullptr), - inputA_descriptor_(nullptr), - inputB_descriptor_(nullptr), - opTensor_descriptor_(nullptr), - cudnn_data_type_(CUDNN_DATA_FLOAT), - input_size_(0), - output_size_(0), - workspace_size_(0), - is_null_input_(false) {} - ~TensorAddGpuFwdKernel() override { DestroyResource(); } - - const std::vector &GetInputSizeList() const override { return input_size_list_; } - const std::vector &GetOutputSizeList() const override { return output_size_list_; } - const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } - - bool Launch(const std::vector &inputs, const std::vector &, - const std::vector &outputs, void *) { - if (is_null_input_) { - return true; - } - T *input_addr = GetDeviceAddress(inputs, 0); - T *input_addr2 = GetDeviceAddress(inputs, 1); - T *output_addr = GetDeviceAddress(outputs, 0); - const float alpha = 1; - const float beta = 0; - // A + B = C. [ C = op(alpha1[0] * A, alpha2[0] * B) + beta[0] * C ] - // InputA must match the corresponding dimension of the destination tensor outC, and each dimension of the inputB - // must match the corresponding dimension of outC or must be equal to 1. - if (inputs[0]->size > inputs[1]->size) { - CHECK_CUDNN_RET_WITH_EXCEPT( - cudnnOpTensor(cudnn_handle_, opTensor_descriptor_, &alpha, inputA_descriptor_, input_addr, &alpha, - inputB_descriptor_, input_addr2, &beta, inputA_descriptor_, output_addr), - "cudnnOpTensor Add failed"); - } else { - CHECK_CUDNN_RET_WITH_EXCEPT( - cudnnOpTensor(cudnn_handle_, opTensor_descriptor_, &alpha, inputB_descriptor_, input_addr2, &alpha, - inputA_descriptor_, input_addr, &beta, inputB_descriptor_, output_addr), - "cudnnOpTensor Add failed"); - } - return true; - } - bool Init(const CNodePtr &kernel_node) { - InitResource(); - cudnn_data_type_ = kCudnnDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))]; - if (cudnn_data_type_ == CUDNN_DATA_INT32) { - cudnn_data_type_ = CUDNN_DATA_FLOAT; - } - size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); - if (input_num != 2) { - MS_LOG(ERROR) << "Input number is " << input_num << ", but cudnnAddTensor needs 2 inputs."; - return false; - } - size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); - if (output_num != 1) { - MS_LOG(ERROR) << "Output number is " << output_num << ", but cudnnAddTensor needs 1 output."; - return false; - } - auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - auto input_shapeB = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); - auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); - is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(input_shapeB); - if (is_null_input_) { - MS_LOG(WARNING) << "TensorAddGpuFwdKernel input is null"; - InitSizeLists(); - return true; - } - std::vector shapeA; - std::vector shapeB; - std::vector shapeOut; - ShapeNdTo4d(input_shape, &shapeA); - ShapeNdTo4d(input_shapeB, &shapeB); - ShapeNdTo4d(output_shape, &shapeOut); - CheckBroadcast4TensorOp(shapeA, shapeB, shapeOut); - CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensor4dDescriptor(inputA_descriptor_, CUDNN_TENSOR_NCHW, cudnn_data_type_, - shapeA[0], shapeA[1], shapeA[2], shapeA[3]), - "cudnnSetTensor4dDescriptor failed"); - CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensor4dDescriptor(inputB_descriptor_, CUDNN_TENSOR_NCHW, cudnn_data_type_, - shapeB[0], shapeB[1], shapeB[2], shapeB[3]), - "cudnnSetTensor4dDescriptor failed"); - - CHECK_CUDNN_RET_WITH_EXCEPT( - cudnnSetOpTensorDescriptor(opTensor_descriptor_, CUDNN_OP_TENSOR_ADD, CUDNN_DATA_FLOAT, CUDNN_NOT_PROPAGATE_NAN), - "cudnnSetOpTensorDescriptor failed"); - - InitSizeLists(); - return true; - } - - protected: - void InitResource() { - cudnn_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCudnnHandle(); - CHECK_CUDNN_RET_WITH_EXCEPT(cudnnCreateTensorDescriptor(&inputA_descriptor_), "cudnnCreateTensorDescriptor failed"); - CHECK_CUDNN_RET_WITH_EXCEPT(cudnnCreateTensorDescriptor(&inputB_descriptor_), "cudnnCreateTensorDescriptor failed"); - CHECK_CUDNN_RET_WITH_EXCEPT(cudnnCreateOpTensorDescriptor(&opTensor_descriptor_), - "cudnnCreateOpTensorDescriptor failed"); - } - void InitSizeLists() { - if (!is_null_input_) { - CHECK_CUDNN_RET_WITH_EXCEPT(cudnnGetTensorSizeInBytes(inputA_descriptor_, &input_size_), - "cudnnGetTensorSizeInBytes failed"); - input_size_list_.push_back(input_size_); - CHECK_CUDNN_RET_WITH_EXCEPT(cudnnGetTensorSizeInBytes(inputB_descriptor_, &output_size_), - "cudnnGetTensorSizeInBytes failed"); - } - input_size_list_.push_back(output_size_); - - if (output_size_ > input_size_) { - output_size_list_.push_back(output_size_); - } else { - output_size_list_.push_back(input_size_); - } - workspace_size_list_.push_back(workspace_size_); - - return; - } - - private: - void DestroyResource() noexcept { - CHECK_CUDNN_RET_WITH_ERROR(cudnnDestroyTensorDescriptor(inputA_descriptor_), "cudnnDestroyTensorDescriptor failed"); - CHECK_CUDNN_RET_WITH_ERROR(cudnnDestroyTensorDescriptor(inputB_descriptor_), "cudnnDestroyTensorDescriptor failed"); - CHECK_CUDNN_RET_WITH_ERROR(cudnnDestroyOpTensorDescriptor(opTensor_descriptor_), - "cudnnDestroyOpTensorDescriptor failed"); - } - cudnnHandle_t cudnn_handle_; - cudnnTensorDescriptor_t inputA_descriptor_; - cudnnTensorDescriptor_t inputB_descriptor_; - cudnnOpTensorDescriptor_t opTensor_descriptor_; - cudnnDataType_t cudnn_data_type_; - - std::vector input_size_list_; - std::vector output_size_list_; - std::vector workspace_size_list_; - - size_t input_size_; - size_t output_size_; - size_t workspace_size_; - bool is_null_input_; -}; -} // namespace kernel -} // namespace mindspore - -#endif // MINDSPORE_CCSRC_KERNEL_GPU_TENSORADD_GPU_KERNEL_H_ diff --git a/mindspore/ccsrc/kernel/gpu/nn/activation_gpu_kernel.cc b/mindspore/ccsrc/kernel/gpu/nn/activation_gpu_kernel.cc new file mode 100644 index 0000000000..5e80cccd75 --- /dev/null +++ b/mindspore/ccsrc/kernel/gpu/nn/activation_gpu_kernel.cc @@ -0,0 +1,36 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/gpu/nn/activation_gpu_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_ONE(ReLU, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + ActivationGpuFwdKernel, float) +MS_REG_GPU_KERNEL_ONE(ReLU, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), + ActivationGpuFwdKernel, half) + +MS_REG_GPU_KERNEL_ONE(Tanh, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + ActivationGpuFwdKernel, float) +MS_REG_GPU_KERNEL_ONE(Tanh, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), + ActivationGpuFwdKernel, half) + +MS_REG_GPU_KERNEL_ONE(Sigmoid, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + ActivationGpuFwdKernel, float) +MS_REG_GPU_KERNEL_ONE(Sigmoid, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), + ActivationGpuFwdKernel, half) +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/gpu/nn/relu_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/activation_gpu_kernel.h similarity index 79% rename from mindspore/ccsrc/kernel/gpu/nn/relu_gpu_kernel.h rename to mindspore/ccsrc/kernel/gpu/nn/activation_gpu_kernel.h index 4cebc45831..bf6cfa7b23 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/relu_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/activation_gpu_kernel.h @@ -18,6 +18,8 @@ #define MINDSPORE_CCSRC_KERNEL_GPU_NN_RELU_GPU_KERNEL_H_ #include +#include +#include #include "kernel/gpu/gpu_kernel.h" #include "kernel/gpu/gpu_kernel_factory.h" #include "kernel/gpu/kernel_constants.h" @@ -25,9 +27,9 @@ namespace mindspore { namespace kernel { template -class ReLUGpuFwdKernel : public GpuKernel { +class ActivationGpuFwdKernel : public GpuKernel { public: - ReLUGpuFwdKernel() + ActivationGpuFwdKernel() : cudnn_handle_(nullptr), activation_desc_(nullptr), mode_(CUDNN_ACTIVATION_RELU), @@ -37,7 +39,7 @@ class ReLUGpuFwdKernel : public GpuKernel { input_size_(0), output_size_(0), workspace_size_(0) {} - ~ReLUGpuFwdKernel() override { DestroyResource(); } + ~ActivationGpuFwdKernel() override { DestroyResource(); } const std::vector &GetInputSizeList() const override { return input_size_list_; } const std::vector &GetOutputSizeList() const override { return output_size_list_; } const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } @@ -54,33 +56,39 @@ class ReLUGpuFwdKernel : public GpuKernel { const float beta = 0; CHECK_CUDNN_RET_WITH_EXCEPT(cudnnActivationForward(cudnn_handle_, activation_desc_, &alpha, data_descriptor_, input, &beta, data_descriptor_, output), - "ReLUGpuFwdKernel failed"); + "cudnnActivationForward failed"); return true; } bool Init(const CNodePtr &kernel_node) override { + auto node_name = AnfAlgo::GetCNodeName(kernel_node); + auto iter = kernel_map.find(node_name); + if (iter == kernel_map.end()) { + MS_LOG(EXCEPTION) << "Kernel: " << node_name << " not support."; + } + mode_ = iter->second; + InitResource(); - cudnn_data_type_ = kCudnnDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))]; + cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 1) { - MS_LOG(ERROR) << "Argument number is " << input_num << ", but ReLUGpuFwdKernel needs 1."; + MS_LOG(ERROR) << "Argument number is " << input_num << ", but ActivationGpuFwdKernel needs 1."; return false; } auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); is_null_input_ = CHECK_NULL_INPUT(input_shape); if (is_null_input_) { - MS_LOG(WARNING) << "ReLUGpuFwdKernel input is null."; + MS_LOG(WARNING) << "ActivationGpuFwdKernel input is null."; InitSizeLists(); return true; } - mode_ = CUDNN_ACTIVATION_RELU; std::vector shape; ShapeNdTo4d(input_shape, &shape); CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetActivationDescriptor(activation_desc_, mode_, CUDNN_NOT_PROPAGATE_NAN, 0.0), - "SetActivationDescriptor failed"); + "cudnnSetActivationDescriptor failed"); CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensor4dDescriptor(data_descriptor_, CUDNN_TENSOR_NCHW, cudnn_data_type_, shape[0], shape[1], shape[2], shape[3]), - "SetTensor4dDescriptor failed"); + "cudnnSetTensor4dDescriptor failed"); InitSizeLists(); return true; } @@ -110,6 +118,11 @@ class ReLUGpuFwdKernel : public GpuKernel { CHECK_CUDNN_RET_WITH_ERROR(cudnnDestroyTensorDescriptor(data_descriptor_), "cudnnDestroyTensorDescriptor failed"); } + std::map kernel_map = {{"ReLU", CUDNN_ACTIVATION_RELU}, + {"Tanh", CUDNN_ACTIVATION_TANH}, + {"ELU", CUDNN_ACTIVATION_ELU}, + {"Sigmoid", CUDNN_ACTIVATION_SIGMOID}}; + cudnnHandle_t cudnn_handle_; cudnnActivationDescriptor_t activation_desc_; cudnnActivationMode_t mode_; diff --git a/mindspore/ccsrc/kernel/gpu/math/tensoradd_gpu_kernel.cc b/mindspore/ccsrc/kernel/gpu/nn/activation_grad_kernel.cc similarity index 53% rename from mindspore/ccsrc/kernel/gpu/math/tensoradd_gpu_kernel.cc rename to mindspore/ccsrc/kernel/gpu/nn/activation_grad_kernel.cc index 69716e9165..35d11f8b47 100644 --- a/mindspore/ccsrc/kernel/gpu/math/tensoradd_gpu_kernel.cc +++ b/mindspore/ccsrc/kernel/gpu/nn/activation_grad_kernel.cc @@ -14,20 +14,35 @@ * limitations under the License. */ -#include "kernel/gpu/math/tensoradd_gpu_kernel.h" +#include "kernel/gpu/nn/activation_grad_kernel.h" namespace mindspore { namespace kernel { MS_REG_GPU_KERNEL_ONE( - TensorAdd, + ReluGrad, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), - TensorAddGpuFwdKernel, float) + ActivationGradGpuKernel, float) MS_REG_GPU_KERNEL_ONE( - TensorAdd, + ReluGrad, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), - TensorAddGpuFwdKernel, half) + ActivationGradGpuKernel, half) + +MS_REG_GPU_KERNEL_ONE( + TanhGrad, + KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + ActivationGradGpuKernel, float) MS_REG_GPU_KERNEL_ONE( - TensorAdd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), - TensorAddGpuFwdKernel, int) + TanhGrad, + KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), + ActivationGradGpuKernel, half) + +MS_REG_GPU_KERNEL_ONE( + SigmoidGrad, + KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + ActivationGradGpuKernel, float) +MS_REG_GPU_KERNEL_ONE( + SigmoidGrad, + KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), + ActivationGradGpuKernel, half) } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/gpu/nn/relu_grad_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/activation_grad_kernel.h similarity index 77% rename from mindspore/ccsrc/kernel/gpu/nn/relu_grad_kernel.h rename to mindspore/ccsrc/kernel/gpu/nn/activation_grad_kernel.h index ccc037f6e7..38e34eb752 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/relu_grad_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/activation_grad_kernel.h @@ -18,6 +18,8 @@ #define MINDSPORE_CCSRC_KERNEL_GPU_NN_RELU_GRAD_KERNEL_H_ #include +#include +#include #include "kernel/gpu/gpu_kernel.h" #include "kernel/gpu/gpu_kernel_factory.h" #include "kernel/gpu/kernel_constants.h" @@ -25,9 +27,9 @@ namespace mindspore { namespace kernel { template -class ReluGradGpuFwdKernel : public GpuKernel { +class ActivationGradGpuKernel : public GpuKernel { public: - ReluGradGpuFwdKernel() + ActivationGradGpuKernel() : cudnn_handle_(nullptr), activation_desc_(nullptr), mode_(CUDNN_ACTIVATION_RELU), @@ -35,7 +37,7 @@ class ReluGradGpuFwdKernel : public GpuKernel { is_null_input_(false), cudnn_data_type_(CUDNN_DATA_FLOAT), input_size_(0) {} - ~ReluGradGpuFwdKernel() override { DestroyResource(); } + ~ActivationGradGpuKernel() override { DestroyResource(); } const std::vector &GetInputSizeList() const override { return input_size_list_; } const std::vector &GetOutputSizeList() const override { return output_size_list_; } const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } @@ -45,8 +47,15 @@ class ReluGradGpuFwdKernel : public GpuKernel { if (is_null_input_) { return true; } - T *y = GetDeviceAddress(inputs, 1); - T *dy = GetDeviceAddress(inputs, 0); + T *dy = nullptr; + T *y = nullptr; + if (mode_ == CUDNN_ACTIVATION_RELU || mode_ == CUDNN_ACTIVATION_ELU) { + dy = GetDeviceAddress(inputs, 0); + y = GetDeviceAddress(inputs, 1); + } else { + y = GetDeviceAddress(inputs, 0); + dy = GetDeviceAddress(inputs, 1); + } T *dx = GetDeviceAddress(outputs, 0); const float alpha = 1; @@ -59,18 +68,24 @@ class ReluGradGpuFwdKernel : public GpuKernel { return true; } bool Init(const CNodePtr &kernel_node) override { + auto node_name = AnfAlgo::GetCNodeName(kernel_node); + auto iter = kernel_map.find(node_name); + if (iter == kernel_map.end()) { + MS_LOG(EXCEPTION) << "Kernel: " << node_name << " not support."; + } + mode_ = iter->second; + InitResource(); - cudnn_data_type_ = kCudnnDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))]; + cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 2) { - MS_LOG(ERROR) << "Argument number is " << input_num << ", but ReluGradGpuFwdKernel needs 2."; + MS_LOG(ERROR) << "Argument number is " << input_num << ", but ActivationGradGpuKernel needs 2."; return false; } auto input_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); - mode_ = CUDNN_ACTIVATION_RELU; is_null_input_ = CHECK_NULL_INPUT(input_shape); if (is_null_input_) { - MS_LOG(WARNING) << "ReluGradGpuFwdKernel input is null."; + MS_LOG(WARNING) << "ActivationGradGpuKernel input is null."; InitSizeLists(); return true; } @@ -110,6 +125,10 @@ class ReluGradGpuFwdKernel : public GpuKernel { CHECK_CUDNN_RET_WITH_ERROR(cudnnDestroyTensorDescriptor(data_descriptor_), "cudnnDestroyTensorDescriptor failed"); } + std::map kernel_map = {{"ReluGrad", CUDNN_ACTIVATION_RELU}, + {"TanhGrad", CUDNN_ACTIVATION_TANH}, + {"ELUGrad", CUDNN_ACTIVATION_ELU}, + {"SigmoidGrad", CUDNN_ACTIVATION_SIGMOID}}; cudnnHandle_t cudnn_handle_; cudnnActivationDescriptor_t activation_desc_; cudnnActivationMode_t mode_; diff --git a/mindspore/ccsrc/kernel/gpu/nn/adam_gpu_kernel.cc b/mindspore/ccsrc/kernel/gpu/nn/adam_gpu_kernel.cc new file mode 100644 index 0000000000..049a5cc280 --- /dev/null +++ b/mindspore/ccsrc/kernel/gpu/nn/adam_gpu_kernel.cc @@ -0,0 +1,54 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/gpu/nn/adam_gpu_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_ONE(Adam, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + AdamGpuKernel, float) +MS_REG_GPU_KERNEL_ONE(Adam, + KernelAttr() + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16), + AdamGpuKernel, half) +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/gpu/nn/adam_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/adam_gpu_kernel.h new file mode 100644 index 0000000000..93c6381ab3 --- /dev/null +++ b/mindspore/ccsrc/kernel/gpu/nn/adam_gpu_kernel.h @@ -0,0 +1,142 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_NN_ADAM_GPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_NN_ADAM_GPU_KERNEL_H_ + +#include +#include "kernel/gpu/gpu_kernel.h" +#include "kernel/gpu/gpu_kernel_factory.h" +#include "kernel/gpu/cuda_impl/adam_impl.cuh" +namespace mindspore { +namespace kernel { +template +class AdamGpuKernel : public GpuKernel { + public: + AdamGpuKernel() + : variable_size_(0), + m_size_(0), + v_size_(0), + beta1_power_size_(0), + beta2_power_size_(0), + learning_rate_size_(0), + beta1_size_(0), + beta2_size_(0), + epsilon_size_(0), + gradient_size_(0) {} + + ~AdamGpuKernel() override = default; + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &, const std::vector &, + void *stream_ptr) override { + T *variable = GetDeviceAddress(inputs, 0); + T *m = GetDeviceAddress(inputs, 1); + T *v = GetDeviceAddress(inputs, 2); + T *beta1_power = GetDeviceAddress(inputs, 3); + T *beta2_power = GetDeviceAddress(inputs, 4); + T *learning_rate = GetDeviceAddress(inputs, 5); + T *beta1 = GetDeviceAddress(inputs, 6); + T *beta2 = GetDeviceAddress(inputs, 7); + T *epsilon = GetDeviceAddress(inputs, 8); + T *gradient = GetDeviceAddress(inputs, 9); + ApplyAdam(inputs[0]->size / sizeof(T), gradient, beta1_power, beta2_power, learning_rate, beta1, beta2, epsilon, + variable, m, v, reinterpret_cast(stream_ptr)); + return true; + } + + bool Init(const CNodePtr &kernel_node) override { + size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); + if (input_num != 10) { + MS_LOG(ERROR) << "Input number is " << input_num << ", but ftrl needs 10 inputs."; + return false; + } + + variable_size_ = sizeof(T); + m_size_ = sizeof(T); + v_size_ = sizeof(T); + beta1_power_size_ = sizeof(T); + beta2_power_size_ = sizeof(T); + learning_rate_size_ = sizeof(T); + beta1_size_ = sizeof(T); + beta2_size_ = sizeof(T); + epsilon_size_ = sizeof(T); + gradient_size_ = sizeof(T); + + auto variable_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + for (size_t i = 0; i < variable_shape.size(); i++) { + variable_size_ *= variable_shape[i]; + } + + auto m_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + for (size_t i = 0; i < m_shape.size(); i++) { + m_size_ *= m_shape[i]; + } + + auto v_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); + for (size_t i = 0; i < v_shape.size(); i++) { + v_size_ *= v_shape[i]; + } + + auto gradient_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 9); + for (size_t i = 0; i < gradient_shape.size(); i++) { + gradient_size_ *= gradient_shape[i]; + } + + InitSizeLists(); + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(variable_size_); + input_size_list_.push_back(m_size_); + input_size_list_.push_back(v_size_); + input_size_list_.push_back(beta1_power_size_); + input_size_list_.push_back(beta2_power_size_); + input_size_list_.push_back(learning_rate_size_); + input_size_list_.push_back(beta1_size_); + input_size_list_.push_back(beta2_size_); + input_size_list_.push_back(epsilon_size_); + input_size_list_.push_back(gradient_size_); + output_size_list_.push_back(0); + output_size_list_.push_back(0); + output_size_list_.push_back(0); + } + + private: + size_t variable_size_; + size_t m_size_; + size_t v_size_; + size_t beta1_power_size_; + size_t beta2_power_size_; + size_t learning_rate_size_; + size_t beta1_size_; + size_t beta2_size_; + size_t epsilon_size_; + size_t gradient_size_; + + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; +}; +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_KERNEL_GPU_NN_ADAM_GPU_KERNEL_H_ diff --git a/mindspore/ccsrc/kernel/gpu/nn/bias_add_grad_gpu_kenel.h b/mindspore/ccsrc/kernel/gpu/nn/bias_add_grad_gpu_kenel.h index c93a050649..9b4f18d24c 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/bias_add_grad_gpu_kenel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/bias_add_grad_gpu_kenel.h @@ -68,7 +68,7 @@ class BiasAddGradGpuKernel : public GpuKernel { } bool Init(const CNodePtr &kernel_node) override { InitResource(); - cudnn_data_type_ = kCudnnDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))]; + cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); auto dy_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); auto num_dims = dy_shape.size(); if (num_dims < 2) { diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h index 7bb6aa2a6d..f51cbfef33 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h @@ -191,7 +191,7 @@ class Conv2dGpuFwdKernel : public GpuKernel { CHECK_CUDNN_RET_WITH_ERROR(cudnnDestroyTensorDescriptor(input_desc_), "cudnnDestroyTensorDescriptor failed"); } bool CheckParam(const CNodePtr &kernel_node) { - cudnn_data_type_ = kCudnnDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))]; + cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 2) { MS_LOG(ERROR) << "Input number is " << input_num << ", but conv2d needs 2 inputs."; diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h index b126b542dd..0d7be25772 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h @@ -98,7 +98,7 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel { if (!CheckParam(kernel_node)) { return false; } - cudnn_data_type_ = kCudnnDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))]; + cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); auto dy_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); auto in_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); is_null_input_ = CHECK_NULL_INPUT(dy_shape) || CHECK_NULL_INPUT(in_shape); diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h index f7f371067f..a33ea5b4da 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h @@ -98,7 +98,7 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { if (!CheckParam(kernel_node)) { return false; } - cudnn_data_type_ = kCudnnDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))]; + cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); auto dy_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); auto filter_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); is_null_input_ = CHECK_NULL_INPUT(dy_shape); diff --git a/mindspore/ccsrc/kernel/gpu/nn/dropout_gpu_kernel.cc b/mindspore/ccsrc/kernel/gpu/nn/dropout_gpu_kernel.cc index 0d2a6be9c8..b84dc628e0 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/dropout_gpu_kernel.cc +++ b/mindspore/ccsrc/kernel/gpu/nn/dropout_gpu_kernel.cc @@ -23,7 +23,7 @@ DropoutGpuFwdKernel::DropoutGpuFwdKernel() : cudnn_handle_(nullptr), is_null_input_(false), num_count_(0), - drop_prob_(0.0), + keep_prob_(0.0), states_init_(false), mask_generator_(nullptr) {} @@ -54,7 +54,7 @@ bool DropoutGpuFwdKernel::Init(const CNodePtr &kernel_node) { for (size_t x : input_shape) { num_count_ *= x; } - drop_prob_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("drop_prob")); + keep_prob_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("keep_prob")); InitSizeLists(); return true; @@ -68,14 +68,12 @@ void DropoutGpuFwdKernel::DestroyResource() noexcept {} void DropoutGpuFwdKernel::InitSizeLists() { size_t input_size = num_count_ * sizeof(float); - size_t workspace_size = 0; input_size_list_.push_back(input_size); output_size_list_.push_back(input_size); // output size: the same with input size output_size_list_.push_back(input_size); // mask size: the same with input size - workspace_size_list_.push_back(workspace_size); } -bool DropoutGpuFwdKernel::Launch(const std::vector &inputs, const std::vector &workspace, +bool DropoutGpuFwdKernel::Launch(const std::vector &inputs, const std::vector &, const std::vector &outputs, void *stream_ptr) { if (is_null_input_) { return true; @@ -92,7 +90,7 @@ bool DropoutGpuFwdKernel::Launch(const std::vector &inputs, const st } curandGenerateUniform(mask_generator_, mask, num_count_); - DropoutForward(input, mask, output, num_count_, drop_prob_, reinterpret_cast(stream_ptr)); + DropoutForward(input, mask, output, num_count_, keep_prob_, reinterpret_cast(stream_ptr)); return true; } diff --git a/mindspore/ccsrc/kernel/gpu/nn/dropout_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/dropout_gpu_kernel.h index accff17429..81eb78c880 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/dropout_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/dropout_gpu_kernel.h @@ -52,7 +52,7 @@ class DropoutGpuFwdKernel : public GpuKernel { cudnnHandle_t cudnn_handle_; bool is_null_input_; size_t num_count_; - float drop_prob_; + float keep_prob_; bool states_init_; curandGenerator_t mask_generator_; std::vector input_size_list_; diff --git a/mindspore/ccsrc/kernel/gpu/nn/dropout_grad_kernel.cc b/mindspore/ccsrc/kernel/gpu/nn/dropout_grad_kernel.cc index 44f603f02d..2194805e92 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/dropout_grad_kernel.cc +++ b/mindspore/ccsrc/kernel/gpu/nn/dropout_grad_kernel.cc @@ -20,7 +20,7 @@ namespace mindspore { namespace kernel { DropoutGradGpuFwdKernel::DropoutGradGpuFwdKernel() - : cudnn_handle_(nullptr), is_null_input_(false), num_count_(0), drop_prob_(0.0) {} + : cudnn_handle_(nullptr), is_null_input_(false), num_count_(0), keep_prob_(0.0) {} DropoutGradGpuFwdKernel::~DropoutGradGpuFwdKernel() { DestroyResource(); } @@ -50,7 +50,7 @@ bool DropoutGradGpuFwdKernel::Init(const CNodePtr &kernel_node) { for (size_t x : input_shape) { num_count_ *= x; } - drop_prob_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("drop_prob")); + keep_prob_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("keep_prob")); InitSizeLists(); return true; @@ -66,15 +66,13 @@ void DropoutGradGpuFwdKernel::InitSizeLists() { size_t dy_size = num_count_ * sizeof(float); size_t mask_size = dy_size; size_t dx_size = dy_size; - size_t workspace_size = 0; input_size_list_.push_back(dy_size); input_size_list_.push_back(mask_size); output_size_list_.push_back(dx_size); - workspace_size_list_.push_back(workspace_size); } -bool DropoutGradGpuFwdKernel::Launch(const std::vector &inputs, const std::vector &workspace, +bool DropoutGradGpuFwdKernel::Launch(const std::vector &inputs, const std::vector &, const std::vector &outputs, void *stream_ptr) { if (is_null_input_) { return true; @@ -84,7 +82,7 @@ bool DropoutGradGpuFwdKernel::Launch(const std::vector &inputs, cons auto *mask = reinterpret_cast(inputs[1]->addr); auto *dx = reinterpret_cast(outputs[0]->addr); - DropoutBackward(dy, mask, dx, num_count_, drop_prob_, reinterpret_cast(stream_ptr)); + DropoutBackward(dy, mask, dx, num_count_, keep_prob_, reinterpret_cast(stream_ptr)); return true; } diff --git a/mindspore/ccsrc/kernel/gpu/nn/dropout_grad_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/dropout_grad_kernel.h index 79d4117b58..4991b9dad5 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/dropout_grad_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/dropout_grad_kernel.h @@ -45,7 +45,7 @@ class DropoutGradGpuFwdKernel : public GpuKernel { cudnnHandle_t cudnn_handle_; bool is_null_input_; size_t num_count_; - float drop_prob_; + float keep_prob_; std::vector input_size_list_; std::vector output_size_list_; std::vector workspace_size_list_; diff --git a/mindspore/ccsrc/kernel/gpu/nn/ftrl_gpu_kernel.cc b/mindspore/ccsrc/kernel/gpu/nn/ftrl_gpu_kernel.cc new file mode 100644 index 0000000000..4d30130931 --- /dev/null +++ b/mindspore/ccsrc/kernel/gpu/nn/ftrl_gpu_kernel.cc @@ -0,0 +1,46 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/gpu/nn/ftrl_gpu_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_ONE(ApplyFtrl, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + FtrlGpuKernel, float) +MS_REG_GPU_KERNEL_ONE(ApplyFtrl, + KernelAttr() + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16), + FtrlGpuKernel, half) +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/gpu/nn/ftrl_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/ftrl_gpu_kernel.h new file mode 100644 index 0000000000..9e2153965b --- /dev/null +++ b/mindspore/ccsrc/kernel/gpu/nn/ftrl_gpu_kernel.h @@ -0,0 +1,130 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_NN_FTRL_GPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_NN_FTRL_GPU_KERNEL_H_ + +#include +#include "kernel/gpu/gpu_kernel.h" +#include "kernel/gpu/gpu_kernel_factory.h" +#include "kernel/gpu/cuda_impl/ftrl_impl.cuh" +namespace mindspore { +namespace kernel { +template +class FtrlGpuKernel : public GpuKernel { + public: + FtrlGpuKernel() + : variable_size_(0), + accumulation_size_(0), + linear_size_(0), + gradient_size_(0), + learning_rate_size_(0), + l1_regularization_size_(0), + l2_regularization_size_(0), + learning_rate_power_size_(0) {} + + ~FtrlGpuKernel() override = default; + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &, const std::vector &, + void *stream_ptr) override { + T *variable = GetDeviceAddress(inputs, 0); + T *accumulation = GetDeviceAddress(inputs, 1); + T *linear = GetDeviceAddress(inputs, 2); + T *gradient = GetDeviceAddress(inputs, 3); + T *learning_rate = GetDeviceAddress(inputs, 4); + T *l1_regularization = GetDeviceAddress(inputs, 5); + T *l2_regularization = GetDeviceAddress(inputs, 6); + T *learning_rate_power = GetDeviceAddress(inputs, 7); + ApplyFtrl(inputs[0]->size / sizeof(T), gradient, learning_rate, l1_regularization, l2_regularization, + learning_rate_power, variable, accumulation, linear, reinterpret_cast(stream_ptr)); + return true; + } + + bool Init(const CNodePtr &kernel_node) override { + size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); + if (input_num != 8) { + MS_LOG(ERROR) << "Input number is " << input_num << ", but ftrl needs 8 inputs."; + return false; + } + + variable_size_ = sizeof(T); + accumulation_size_ = sizeof(T); + linear_size_ = sizeof(T); + gradient_size_ = sizeof(T); + learning_rate_size_ = sizeof(T); + l1_regularization_size_ = sizeof(T); + l2_regularization_size_ = sizeof(T); + learning_rate_power_size_ = sizeof(T); + + auto variable_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + for (size_t i = 0; i < variable_shape.size(); i++) { + variable_size_ *= variable_shape[i]; + } + + auto accumulation_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + for (size_t i = 0; i < accumulation_shape.size(); i++) { + accumulation_size_ *= accumulation_shape[i]; + } + + auto linear_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); + for (size_t i = 0; i < linear_shape.size(); i++) { + linear_size_ *= linear_shape[i]; + } + + auto gradient_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); + for (size_t i = 0; i < gradient_shape.size(); i++) { + gradient_size_ *= gradient_shape[i]; + } + + InitSizeLists(); + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(variable_size_); + input_size_list_.push_back(accumulation_size_); + input_size_list_.push_back(linear_size_); + input_size_list_.push_back(gradient_size_); + input_size_list_.push_back(learning_rate_size_); + input_size_list_.push_back(l1_regularization_size_); + input_size_list_.push_back(l2_regularization_size_); + input_size_list_.push_back(learning_rate_power_size_); + output_size_list_.push_back(0); + } + + private: + size_t variable_size_; + size_t accumulation_size_; + size_t linear_size_; + size_t gradient_size_; + size_t learning_rate_size_; + size_t l1_regularization_size_; + size_t l2_regularization_size_; + size_t learning_rate_power_size_; + + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; +}; +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_KERNEL_GPU_NN_FTRL_GPU_KERNEL_H_ diff --git a/mindspore/ccsrc/kernel/gpu/nn/fused_batch_norm_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/fused_batch_norm_gpu_kernel.h index c08b341e78..b0a898209b 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/fused_batch_norm_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/fused_batch_norm_gpu_kernel.h @@ -82,7 +82,7 @@ class FusedBatchNormGpuKernel : public GpuKernel { } bool Init(const CNodePtr &kernel_node) override { InitResource(); - cudnn_data_type_ = kCudnnDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))]; + cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 5) { MS_LOG(EXCEPTION) << "input tensor size is " << input_num << ", FusedBatchNormGpuKernel should be 5"; diff --git a/mindspore/ccsrc/kernel/gpu/nn/fused_batchnorm_grad_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/fused_batchnorm_grad_gpu_kernel.h index 153b0286b3..712354b17c 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/fused_batchnorm_grad_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/fused_batchnorm_grad_gpu_kernel.h @@ -75,7 +75,7 @@ class FusedBatchNormGradGpuKernel : public GpuKernel { } bool Init(const CNodePtr &kernel_node) override { InitResource(); - cudnn_data_type_ = kCudnnDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))]; + cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 5) { MS_LOG(EXCEPTION) << "input tensor size is " << input_num << ", FusedBatchNormGradGpuKernel should be 5"; diff --git a/mindspore/ccsrc/kernel/gpu/nn/gelu_grad_kernel.cc b/mindspore/ccsrc/kernel/gpu/nn/gelu_grad_kernel.cc index 2b6c53aa28..32d91be80a 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/gelu_grad_kernel.cc +++ b/mindspore/ccsrc/kernel/gpu/nn/gelu_grad_kernel.cc @@ -25,5 +25,12 @@ MS_REG_GPU_KERNEL_ONE(GeluGrad, .AddInputAttr(kNumberTypeFloat32) .AddOutputAttr(kNumberTypeFloat32), GeLUGpuGradKernel, float) +MS_REG_GPU_KERNEL_ONE(GeluGrad, + KernelAttr() + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16), + GeLUGpuGradKernel, half) } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/gpu/nn/gelu_kernel.cc b/mindspore/ccsrc/kernel/gpu/nn/gelu_kernel.cc index 604dee04c4..ca54ff68ad 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/gelu_kernel.cc +++ b/mindspore/ccsrc/kernel/gpu/nn/gelu_kernel.cc @@ -20,5 +20,7 @@ namespace mindspore { namespace kernel { MS_REG_GPU_KERNEL_ONE(Gelu, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), GeluGpuKernel, float) +MS_REG_GPU_KERNEL_ONE(Gelu, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), + GeluGpuKernel, half) } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.cc b/mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.cc index e67b745ab3..19e4dc17a6 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.cc +++ b/mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.cc @@ -27,5 +27,14 @@ MS_REG_GPU_KERNEL_ONE(LayerNorm, .AddOutputAttr(kNumberTypeFloat32) .AddOutputAttr(kNumberTypeFloat32), LayerNormGpuKernel, float) +MS_REG_GPU_KERNEL_ONE(LayerNorm, + KernelAttr() + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16), + LayerNormGpuKernel, half) } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.cc b/mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.cc index e268161349..7991d42499 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.cc +++ b/mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.cc @@ -29,5 +29,16 @@ MS_REG_GPU_KERNEL_ONE(LayerNormGrad, .AddOutputAttr(kNumberTypeFloat32) .AddOutputAttr(kNumberTypeFloat32), LayerNormGradGpuKernel, float) +MS_REG_GPU_KERNEL_ONE(LayerNormGrad, + KernelAttr() + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16), + LayerNormGradGpuKernel, half) } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.h index 01247f0ed6..42eda96b02 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.h @@ -89,7 +89,7 @@ class LstmGpuKernel : public GpuKernel { } bool Init(const CNodePtr &kernel_node) override { InitResource(); - cudnn_data_type_ = kCudnnDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))]; + cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); seq_len_ = SizeToInt(input_shape[0]); batch_size_ = SizeToInt(input_shape[1]); diff --git a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.h index 5591b0c817..6eeefa262c 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.h @@ -105,7 +105,7 @@ class LstmGradDataGpuKernel : public GpuKernel { } bool Init(const CNodePtr &kernel_node) override { InitResource(); - cudnn_data_type_ = kCudnnDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))]; + cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); auto input_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); seq_len_ = SizeToInt(input_shape[0]); batch_size_ = SizeToInt(input_shape[1]); diff --git a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.h index dd6aae9a00..a1a4852c84 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.h @@ -84,7 +84,7 @@ class LstmGradWeightGpuKernel : public GpuKernel { } bool Init(const CNodePtr &kernel_node) override { InitResource(); - cudnn_data_type_ = kCudnnDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))]; + cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); seq_len_ = SizeToInt(input_shape[0]); batch_size_ = SizeToInt(input_shape[1]); diff --git a/mindspore/ccsrc/kernel/gpu/nn/momentum_gpu_kernel.cc b/mindspore/ccsrc/kernel/gpu/nn/momentum_gpu_kernel.cc index 4a77f7342b..e8b2b17706 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/momentum_gpu_kernel.cc +++ b/mindspore/ccsrc/kernel/gpu/nn/momentum_gpu_kernel.cc @@ -18,7 +18,7 @@ namespace mindspore { namespace kernel { -MS_REG_GPU_KERNEL_ONE(ApplyMomentum, +MS_REG_GPU_KERNEL_TWO(ApplyMomentum, KernelAttr() .AddInputAttr(kNumberTypeFloat32) .AddInputAttr(kNumberTypeFloat32) @@ -26,8 +26,8 @@ MS_REG_GPU_KERNEL_ONE(ApplyMomentum, .AddInputAttr(kNumberTypeFloat32) .AddInputAttr(kNumberTypeFloat32) .AddOutputAttr(kNumberTypeFloat32), - MomentumGpuKernel, float) -MS_REG_GPU_KERNEL_ONE(ApplyMomentum, + MomentumGpuKernel, float, float) +MS_REG_GPU_KERNEL_TWO(ApplyMomentum, KernelAttr() .AddInputAttr(kNumberTypeFloat16) .AddInputAttr(kNumberTypeFloat16) @@ -35,6 +35,15 @@ MS_REG_GPU_KERNEL_ONE(ApplyMomentum, .AddInputAttr(kNumberTypeFloat16) .AddInputAttr(kNumberTypeFloat16) .AddOutputAttr(kNumberTypeFloat16), - MomentumGpuKernel, half) + MomentumGpuKernel, half, half) +MS_REG_GPU_KERNEL_TWO(ApplyMomentum, + KernelAttr() + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat16), + MomentumGpuKernel, half, float) } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/gpu/nn/momentum_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/momentum_gpu_kernel.h index 8452c177db..5abfb9e97b 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/momentum_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/momentum_gpu_kernel.h @@ -23,7 +23,7 @@ #include "kernel/gpu/cuda_impl/momentum_impl.cuh" namespace mindspore { namespace kernel { -template +template class MomentumGpuKernel : public GpuKernel { public: MomentumGpuKernel() @@ -37,9 +37,9 @@ class MomentumGpuKernel : public GpuKernel { void *stream_ptr) override { T *variable = GetDeviceAddress(inputs, 0); T *accumulation = GetDeviceAddress(inputs, 1); - T *learning_rate = GetDeviceAddress(inputs, 2); + S *learning_rate = GetDeviceAddress(inputs, 2); T *gradient = GetDeviceAddress(inputs, 3); - T *momentum = GetDeviceAddress(inputs, 4); + S *momentum = GetDeviceAddress(inputs, 4); MomentumUpdateVariable(inputs[0]->size / sizeof(T), variable, accumulation, learning_rate, gradient, momentum, reinterpret_cast(stream_ptr)); return true; @@ -53,9 +53,9 @@ class MomentumGpuKernel : public GpuKernel { variable_size_ = sizeof(T); accumulation_size_ = sizeof(T); - learning_rate_size_ = sizeof(T); + learning_rate_size_ = sizeof(S); gradient_size_ = sizeof(T); - momentum_size_ = sizeof(T); + momentum_size_ = sizeof(S); auto variable_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); for (size_t i = 0; i < variable_shape.size(); i++) { diff --git a/mindspore/ccsrc/kernel/gpu/nn/pooling_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/pooling_gpu_kernel.h index faff453775..0dda1e8998 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/pooling_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/pooling_gpu_kernel.h @@ -88,7 +88,7 @@ class PoolingGpuFwdKernel : public GpuKernel { if (!CheckParam(kernel_node)) { return false; } - cudnn_data_type_ = kCudnnDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))]; + cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); is_null_input_ = CHECK_NULL_INPUT(input_shape); if (is_null_input_) { diff --git a/mindspore/ccsrc/kernel/gpu/nn/pooling_grad_gpu_kernel.cc b/mindspore/ccsrc/kernel/gpu/nn/pooling_grad_gpu_kernel.cc index 57bd231129..c3d4a44943 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/pooling_grad_gpu_kernel.cc +++ b/mindspore/ccsrc/kernel/gpu/nn/pooling_grad_gpu_kernel.cc @@ -24,27 +24,27 @@ MS_REG_GPU_KERNEL_ONE(MaxPoolGrad, .AddInputAttr(kNumberTypeFloat32) .AddInputAttr(kNumberTypeFloat32) .AddOutputAttr(kNumberTypeFloat32), - PoolingGradGpuFwdKernel, float) + PoolingGradGpuKernel, float) MS_REG_GPU_KERNEL_ONE(MaxPoolGrad, KernelAttr() .AddInputAttr(kNumberTypeFloat16) .AddInputAttr(kNumberTypeFloat16) .AddInputAttr(kNumberTypeFloat16) .AddOutputAttr(kNumberTypeFloat16), - PoolingGradGpuFwdKernel, half) + PoolingGradGpuKernel, half) MS_REG_GPU_KERNEL_ONE(AvgPoolGradGpu, KernelAttr() .AddInputAttr(kNumberTypeFloat32) .AddInputAttr(kNumberTypeFloat32) .AddInputAttr(kNumberTypeFloat32) .AddOutputAttr(kNumberTypeFloat32), - PoolingGradGpuFwdKernel, float) + PoolingGradGpuKernel, float) MS_REG_GPU_KERNEL_ONE(AvgPoolGradGpu, KernelAttr() .AddInputAttr(kNumberTypeFloat16) .AddInputAttr(kNumberTypeFloat16) .AddInputAttr(kNumberTypeFloat16) .AddOutputAttr(kNumberTypeFloat16), - PoolingGradGpuFwdKernel, half) + PoolingGradGpuKernel, half) } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/gpu/nn/pooling_grad_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/pooling_grad_gpu_kernel.h index df3454c581..e8f1ebc1af 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/pooling_grad_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/pooling_grad_gpu_kernel.h @@ -28,9 +28,9 @@ namespace mindspore { namespace kernel { template -class PoolingGradGpuFwdKernel : public GpuKernel { +class PoolingGradGpuKernel : public GpuKernel { public: - PoolingGradGpuFwdKernel() + PoolingGradGpuKernel() : cudnn_handle_(nullptr), pooling_descriptor_(nullptr), y_descriptor_(nullptr), @@ -55,7 +55,7 @@ class PoolingGradGpuFwdKernel : public GpuKernel { padded_size_(0), workspace_size_(0), use_pad_(true) {} - ~PoolingGradGpuFwdKernel() override { DestroyResource(); } + ~PoolingGradGpuKernel() override { DestroyResource(); } const std::vector &GetInputSizeList() const override { return input_size_list_; } const std::vector &GetOutputSizeList() const override { return output_size_list_; } @@ -108,7 +108,7 @@ class PoolingGradGpuFwdKernel : public GpuKernel { auto input_mask = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(input_mask); if (is_null_input_) { - MS_LOG(WARNING) << "PoolingGradGpuFwdKernel input is null."; + MS_LOG(WARNING) << "PoolingGradGpuKernel input is null."; InitSizeLists(); return true; } @@ -196,7 +196,7 @@ class PoolingGradGpuFwdKernel : public GpuKernel { bool CheckParam(const CNodePtr &kernel_node) { size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 3) { - MS_LOG(ERROR) << "Input number is " << input_num << ", but PoolingGradGpuFwdKernel needs 3 inputs."; + MS_LOG(ERROR) << "Input number is " << input_num << ", but PoolingGradGpuKernel needs 3 inputs."; return false; } return true; @@ -239,7 +239,7 @@ class PoolingGradGpuFwdKernel : public GpuKernel { void SetPoolingMode(const CNodePtr &kernel_node) { pad_mode_ = GetAttr(kernel_node, "padding"); stride_ = GetAttr>(kernel_node, "strides"); - cudnn_data_type_ = kCudnnDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))]; + cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); mode_ = AnfAlgo::GetCNodeName(kernel_node); if (mode_ == "AvgPoolGradGpu") { pooling_mode_ = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING; diff --git a/mindspore/ccsrc/kernel/gpu/nn/rmsprop_gpu_kernel.cc b/mindspore/ccsrc/kernel/gpu/nn/rmsprop_gpu_kernel.cc index 707aa77647..032e8eeec4 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/rmsprop_gpu_kernel.cc +++ b/mindspore/ccsrc/kernel/gpu/nn/rmsprop_gpu_kernel.cc @@ -25,9 +25,6 @@ MS_REG_GPU_KERNEL_ONE(ApplyRMSProp, .AddInputAttr(kNumberTypeFloat32) .AddInputAttr(kNumberTypeFloat32) .AddInputAttr(kNumberTypeFloat32) - .AddInputAttr(kNumberTypeFloat32) - .AddInputAttr(kNumberTypeFloat32) - .AddInputAttr(kNumberTypeFloat32) .AddOutputAttr(kNumberTypeFloat32), RMSPropGpuKernel, float) diff --git a/mindspore/ccsrc/kernel/gpu/nn/rmsprop_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/rmsprop_gpu_kernel.h index 7eaedfba52..9e148b690d 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/rmsprop_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/rmsprop_gpu_kernel.h @@ -27,7 +27,7 @@ namespace kernel { template class RMSPropGpuKernel : public GpuKernel { public: - RMSPropGpuKernel() : size_(1), use_center_(false) {} + RMSPropGpuKernel() : size_(1), use_center_(false), decay_(0.0), momentum_(0.9), epsilon_(1e-12) {} ~RMSPropGpuKernel() override = default; const std::vector &GetInputSizeList() const override { return input_size_list_; } @@ -40,13 +40,10 @@ class RMSPropGpuKernel : public GpuKernel { T *variable = GetDeviceAddress(inputs, 0); T *mean_square = GetDeviceAddress(inputs, 1); T *moment = GetDeviceAddress(inputs, 2); - T *gradients = GetDeviceAddress(inputs, 3); - T *learning_rate = GetDeviceAddress(inputs, 4); - T *decay = GetDeviceAddress(inputs, 5); - T *momentum = GetDeviceAddress(inputs, 6); - T *epsilon = GetDeviceAddress(inputs, 7); + T *learning_rate = GetDeviceAddress(inputs, 3); + T *gradients = GetDeviceAddress(inputs, 4); - RmsProp(learning_rate, decay, momentum, epsilon, variable, mean_square, moment, gradients, size_, + RmsProp(learning_rate, decay_, momentum_, epsilon_, variable, mean_square, moment, gradients, size_, reinterpret_cast(stream)); } else { T *variable = GetDeviceAddress(inputs, 0); @@ -70,6 +67,11 @@ class RMSPropGpuKernel : public GpuKernel { use_center_ = true; } + if (node_name == "ApplyRMSProp") { + decay_ = GetAttr(kernel_node, "rho"); + momentum_ = GetAttr(kernel_node, "momentum"); + epsilon_ = GetAttr(kernel_node, "epsilon"); + } auto input_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); for (auto &dim : input_shape) { size_ *= dim; @@ -81,24 +83,33 @@ class RMSPropGpuKernel : public GpuKernel { protected: void InitSizeLists() override { size_t input_size = size_ * sizeof(T); - input_size_list_.push_back(input_size); - if (use_center_) { + if (!use_center_) { + input_size_list_.push_back(input_size); input_size_list_.push_back(input_size); + input_size_list_.push_back(input_size); + input_size_list_.push_back(sizeof(T)); + input_size_list_.push_back(input_size); + output_size_list_.push_back(input_size); + } else { + input_size_list_.push_back(input_size); + input_size_list_.push_back(input_size); + input_size_list_.push_back(input_size); + input_size_list_.push_back(input_size); + input_size_list_.push_back(input_size); + input_size_list_.push_back(sizeof(T)); + input_size_list_.push_back(sizeof(T)); + input_size_list_.push_back(sizeof(T)); + input_size_list_.push_back(sizeof(T)); + output_size_list_.push_back(input_size); } - - input_size_list_.push_back(input_size); - input_size_list_.push_back(input_size); - input_size_list_.push_back(input_size); - input_size_list_.push_back(sizeof(T)); - input_size_list_.push_back(sizeof(T)); - input_size_list_.push_back(sizeof(T)); - input_size_list_.push_back(sizeof(T)); - output_size_list_.push_back(0); } private: size_t size_; bool use_center_; + float decay_; + float momentum_; + float epsilon_; std::vector input_size_list_; std::vector output_size_list_; diff --git a/mindspore/ccsrc/kernel/gpu/nn/tanh_grad_kernel.cc b/mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.cc similarity index 81% rename from mindspore/ccsrc/kernel/gpu/nn/tanh_grad_kernel.cc rename to mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.cc index 97176680d0..1e650811fd 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/tanh_grad_kernel.cc +++ b/mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.cc @@ -14,13 +14,13 @@ * limitations under the License. */ -#include "kernel/gpu/nn/tanh_grad_kernel.h" +#include "kernel/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.h" namespace mindspore { namespace kernel { -MS_REG_GPU_KERNEL_ONE( - TanhGrad, +MS_REG_GPU_KERNEL_TWO( + SigmoidCrossEntropyWithLogits, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), - TanhGradKernel, float) + SigmoidCrossEntropyWithLogitsGpuKernel, float, float) } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.h new file mode 100644 index 0000000000..8d0efe90b4 --- /dev/null +++ b/mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.h @@ -0,0 +1,97 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_NN_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_GPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_NN_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_GPU_KERNEL_H_ + +#include +#include "kernel/gpu/gpu_kernel.h" +#include "kernel/gpu/gpu_kernel_factory.h" +#include "kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cuh" + +namespace mindspore { +namespace kernel { +template +class SigmoidCrossEntropyWithLogitsGpuKernel : public GpuKernel { + public: + SigmoidCrossEntropyWithLogitsGpuKernel() : logits_size_(0), labels_size_(0), outputs_size_(0) {} + + ~SigmoidCrossEntropyWithLogitsGpuKernel() override = default; + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs, void *stream_ptr) override { + T *logits_addr = GetDeviceAddress(inputs, 0); + S *labels_addr = GetDeviceAddress(inputs, 1); + T *outputs_addr = GetDeviceAddress(outputs, 0); + + SigmoidCrossEntropyWithLogits(inputs[0]->size / sizeof(T), logits_addr, labels_addr, outputs_addr, + reinterpret_cast(stream_ptr)); + return true; + } + + bool Init(const CNodePtr &kernel_node) override { + size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); + if (input_num != 2) { + MS_LOG(ERROR) << "Input number is " << input_num << ", but SigmoidCrossEntropyWithLogits needs 2 inputs."; + return false; + } + logits_size_ = sizeof(T); + labels_size_ = sizeof(S); + outputs_size_ = sizeof(T); + + auto logits_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + for (size_t i = 0; i < logits_shape.size(); i++) { + logits_size_ *= logits_shape[i]; + } + + auto labels_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + for (size_t i = 0; i < labels_shape.size(); i++) { + labels_size_ *= labels_shape[i]; + } + + auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); + for (size_t i = 0; i < output_shape.size(); i++) { + outputs_size_ *= output_shape[i]; + } + + InitSizeLists(); + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(logits_size_); + input_size_list_.push_back(labels_size_); + output_size_list_.push_back(outputs_size_); + } + + private: + size_t logits_size_; + size_t labels_size_; + size_t outputs_size_; + + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; +}; +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_KERNEL_GPU_NN_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_GPU_KERNEL_H_ diff --git a/mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.cc b/mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.cc new file mode 100644 index 0000000000..dabc4df850 --- /dev/null +++ b/mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.cc @@ -0,0 +1,29 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_TWO(SigmoidCrossEntropyWithLogitsGrad, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + SigmoidCrossEntropyWithLogitsGradGpuKernel, float, float) +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.h new file mode 100644 index 0000000000..01f416f6b7 --- /dev/null +++ b/mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.h @@ -0,0 +1,96 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_NN_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_GRAD_GPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_NN_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_GRAD_GPU_KERNEL_H_ + +#include +#include "kernel/gpu/gpu_kernel.h" +#include "kernel/gpu/gpu_kernel_factory.h" +#include "kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cuh" + +namespace mindspore { +namespace kernel { +template +class SigmoidCrossEntropyWithLogitsGradGpuKernel : public GpuKernel { + public: + SigmoidCrossEntropyWithLogitsGradGpuKernel() : logits_size_(0), labels_size_(0), outputs_size_(0) {} + ~SigmoidCrossEntropyWithLogitsGradGpuKernel() override = default; + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs, void *stream_ptr) override { + T *logits_addr = GetDeviceAddress(inputs, 0); + S *labels_addr = GetDeviceAddress(inputs, 1); + T *outputs_addr = GetDeviceAddress(outputs, 0); + + SigmoidCrossEntropyWithLogitsGrad(inputs[0]->size / sizeof(T), logits_addr, labels_addr, outputs_addr, + reinterpret_cast(stream_ptr)); + return true; + } + + bool Init(const CNodePtr &kernel_node) override { + size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); + if (input_num != 3) { + MS_LOG(ERROR) << "Input number is " << input_num << ", but SigmoidCrossEntropyWithLogitsGrad needs 3 inputs."; + return false; + } + logits_size_ = sizeof(T); + labels_size_ = sizeof(S); + outputs_size_ = sizeof(T); + + auto logits_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + for (size_t i = 0; i < logits_shape.size(); i++) { + logits_size_ *= logits_shape[i]; + } + + auto labels_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + for (size_t i = 0; i < labels_shape.size(); i++) { + labels_size_ *= labels_shape[i]; + } + + auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); + for (size_t i = 0; i < output_shape.size(); i++) { + outputs_size_ *= output_shape[i]; + } + + InitSizeLists(); + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(logits_size_); + input_size_list_.push_back(labels_size_); + output_size_list_.push_back(outputs_size_); + } + + private: + size_t logits_size_; + size_t labels_size_; + size_t outputs_size_; + + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; +}; +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_KERNEL_GPU_NN_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_GRAD_GPU_KERNEL_H_ diff --git a/mindspore/ccsrc/kernel/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.h index 6840f0a1eb..8256174bcb 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.h @@ -87,7 +87,7 @@ class SoftmaxCrossEntropyWithLogitsGpuKernel : public GpuKernel { << ", but SoftmaxCrossEntropyWithLogitsGpuKernel needs 2 output."; return false; } - cudnn_data_type_ = kCudnnDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))]; + cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); InferInputOutputSize(kernel_node); CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensor4dDescriptor(logits_descriptor_, CUDNN_TENSOR_NCHW, cudnn_data_type_, diff --git a/mindspore/ccsrc/kernel/gpu/nn/softmax_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/softmax_gpu_kernel.h index 060bc57d56..9d5a2a24e1 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/softmax_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/softmax_gpu_kernel.h @@ -95,7 +95,7 @@ class SoftmaxGpuKernel : public GpuKernel { bool Init(const CNodePtr &kernel_node) override { InitResource(); - cudnn_data_type_ = kCudnnDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))]; + cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 1) { MS_LOG(ERROR) << "Input number is " << input_num << ", but softmax needs 1 input."; diff --git a/mindspore/ccsrc/kernel/gpu/nn/softmax_grad_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/softmax_grad_gpu_kernel.h index 003b55c0ed..d73503d5a5 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/softmax_grad_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/softmax_grad_gpu_kernel.h @@ -98,7 +98,7 @@ class SoftmaxGradGpuKernel : public GpuKernel { bool Init(const CNodePtr &kernel_node) override { InitResource(); - cudnn_data_type_ = kCudnnDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))]; + cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 2) { MS_LOG(ERROR) << "Input number is " << input_num << ", but softmax grad needs 2 input."; diff --git a/mindspore/ccsrc/kernel/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.h index 0749172cc6..6950f0e308 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.h @@ -89,7 +89,7 @@ class SparseSoftmaxCrossEntropyWithLogitsGpuKernel : public GpuKernel { return false; } is_grad_ = GetAttr(kernel_node, "is_grad"); - cudnn_data_type_ = kCudnnDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))]; + cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); InferInputOutputSize(kernel_node); CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensor4dDescriptor(logits_descriptor_, CUDNN_TENSOR_NCHW, cudnn_data_type_, diff --git a/mindspore/ccsrc/kernel/gpu/nn/tanh_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/tanh_gpu_kernel.h deleted file mode 100644 index 7060ad1792..0000000000 --- a/mindspore/ccsrc/kernel/gpu/nn/tanh_gpu_kernel.h +++ /dev/null @@ -1,75 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef MINDSPORE_CCSRC_KERNEL_GPU_NN_TANH_GPU_KERNEL_H_ -#define MINDSPORE_CCSRC_KERNEL_GPU_NN_TANH_GPU_KERNEL_H_ - -#include -#include -#include -#include "kernel/gpu/gpu_kernel.h" -#include "kernel/gpu/gpu_kernel_factory.h" -#include "kernel/gpu/cuda_impl/tanh_impl.cuh" - -namespace mindspore { -namespace kernel { -template -class TanhGpuKernel : public GpuKernel { - public: - TanhGpuKernel() : input_size_(0) {} - ~TanhGpuKernel() override = default; - - const std::vector &GetInputSizeList() const override { return input_size_list_; } - const std::vector &GetOutputSizeList() const override { return output_size_list_; } - const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } - - bool Launch(const std::vector &inputs, const std::vector &, - const std::vector &outputs, void *stream_ptr) override { - auto x_addr = GetDeviceAddress(inputs, 0); - auto y_addr = GetDeviceAddress(outputs, 0); - - Tanh(input_size_ / sizeof(T), x_addr, y_addr, reinterpret_cast(stream_ptr)); - return true; - } - bool Init(const CNodePtr &kernel_node) override { - auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - - input_size_ = sizeof(T); - for (auto dim : input_shape) { - input_size_ *= dim; - } - - InitSizeLists(); - return true; - } - - protected: - void InitSizeLists() override { - input_size_list_.push_back(input_size_); - input_size_list_.push_back(input_size_); - output_size_list_.push_back(input_size_); - } - - private: - std::vector input_size_list_; - std::vector output_size_list_; - std::vector workspace_size_list_; - size_t input_size_; -}; -} // namespace kernel -} // namespace mindspore - -#endif // MINDSPORE_CCSRC_KERNEL_GPU_NN_LSTM_GPU_KERNEL_H_ diff --git a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_gpu_kernel.h index 5d2dee3ec7..b898f34689 100644 --- a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_gpu_kernel.h @@ -39,12 +39,10 @@ class BatchNormFold2GpuKernel : public GpuKernel { ~BatchNormFold2GpuKernel() override { DestroyResource(); } const std::vector &GetInputSizeList() const override { return input_size_list_; } - const std::vector &GetOutputSizeList() const override { return output_size_list_; } - const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } - bool Launch(const std::vector &inputs, const std::vector &workspace, + bool Launch(const std::vector &inputs, const std::vector &, const std::vector &outputs, void *stream_ptr) override { if (is_null_input_) { return true; @@ -111,10 +109,7 @@ class BatchNormFold2GpuKernel : public GpuKernel { input_size_list_.push_back(weight_size); // running_std input_size_list_.push_back(weight_size); // running_mean input_size_list_.push_back(sizeof(int32_t)); // global_step - output_size_list_.push_back(input_size); - - workspace_size_list_.push_back(sizeof(int32_t)); } private: diff --git a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_grad_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_grad_gpu_kernel.h index 28a4cf6cd6..e0bafdb96a 100644 --- a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_grad_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_grad_gpu_kernel.h @@ -39,9 +39,7 @@ class BatchNormFold2GradGpuKernel : public GpuKernel { ~BatchNormFold2GradGpuKernel() override { DestroyResource(); } const std::vector &GetInputSizeList() const override { return input_size_list_; } - const std::vector &GetOutputSizeList() const override { return output_size_list_; } - const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } bool Launch(const std::vector &inputs, const std::vector &workspace, diff --git a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_gpu_kernel.h index a90e9b47d7..6cd001fd2e 100644 --- a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_gpu_kernel.h @@ -47,9 +47,7 @@ class BatchNormFoldGpuKernel : public GpuKernel { ~BatchNormFoldGpuKernel() override { DestroyResource(); } const std::vector &GetInputSizeList() const override { return input_size_list_; } - const std::vector &GetOutputSizeList() const override { return output_size_list_; } - const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } bool Launch(const std::vector &inputs, const std::vector &workspace, @@ -141,7 +139,7 @@ class BatchNormFoldGpuKernel : public GpuKernel { input_size_ = sizeof(T) * batch_ * channel_ * height_ * width_; output_size_ = sizeof(T) * channel_; - cudnnDataType_t cudnnDataType = kCudnnDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))]; + cudnnDataType_t cudnnDataType = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); CHECK_CUDNN_RET_WITH_EXCEPT( cudnnSetTensor4dDescriptor(x_desc_, CUDNN_TENSOR_NCHW, cudnnDataType, batch_, channel_, height_, width_), "Set x desc failed"); diff --git a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_grad_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_grad_gpu_kernel.h index 8cbe5b6927..7a3ed7ef91 100644 --- a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_grad_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_grad_gpu_kernel.h @@ -46,9 +46,8 @@ class BatchNormFoldGradGpuKernel : public GpuKernel { const std::vector &GetOutputSizeList() const override { return output_size_list_; } const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } - bool Launch(const std::vector &inputs, const std::vector &workspace, + bool Launch(const std::vector &inputs, const std::vector &, const std::vector &outputs, void *stream_ptr) override { - (void)workspace; // 'd_batch_mean', 'd_batch_std', 'x', 'batch_mean', 'batch_std', 'current_step' T *d_batch_mean = GetDeviceAddress(inputs, 0); T *d_batch_std = GetDeviceAddress(inputs, 1); @@ -139,11 +138,8 @@ class BatchNormFoldGradGpuKernel : public GpuKernel { input_size_list_.push_back(channel_size_); input_size_list_.push_back(channel_size_); input_size_list_.push_back(sizeof(int)); - // 'dx' output_size_list_.push_back(input_size_); - - workspace_size_list_.push_back(workspace_size_); } private: diff --git a/mindspore/ccsrc/kernel/gpu/quant/correction_mul_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/quant/correction_mul_gpu_kernel.h index 38a9532ef5..29aeabb03a 100644 --- a/mindspore/ccsrc/kernel/gpu/quant/correction_mul_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/quant/correction_mul_gpu_kernel.h @@ -33,7 +33,8 @@ class CorrectionMulGpuKernel : public GpuKernel { const std::vector &GetInputSizeList() const override { return input_size_list_; } const std::vector &GetOutputSizeList() const override { return output_size_list_; } const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } - bool Launch(const std::vector &inputs, const std::vector &workspace, + + bool Launch(const std::vector &inputs, const std::vector &, const std::vector &outputs, void *stream_ptr) override { auto *weight = GetDeviceAddress(inputs, 0); auto *gamma = GetDeviceAddress(inputs, 1); @@ -74,10 +75,9 @@ class CorrectionMulGpuKernel : public GpuKernel { input_size_list_.push_back(input_size); // weight input_size_list_.push_back(weight_size); // gamma input_size_list_.push_back(weight_size); // running_std - size_t workspace_size = 0; output_size_list_.push_back(input_size); - workspace_size_list_.push_back(workspace_size); } + void InitResource() override {} private: diff --git a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_per_channel_gpu_kernel.cc b/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_gpu_kernel.cc similarity index 52% rename from mindspore/ccsrc/kernel/gpu/quant/fake_quant_per_channel_gpu_kernel.cc rename to mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_gpu_kernel.cc index 083bf7f011..ffed550fbb 100644 --- a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_per_channel_gpu_kernel.cc +++ b/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_gpu_kernel.cc @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "kernel/gpu/quant/fake_quant_per_channel_gpu_kernel.h" -#include "kernel/gpu/cuda_impl/fake_quant_per_channel_impl.cuh" +#include "kernel/gpu/quant/fake_quant_perchannel_gpu_kernel.h" +#include "kernel/gpu/cuda_impl/fake_quant_perchannel_impl.cuh" #include #include #include @@ -25,21 +25,15 @@ namespace mindspore { namespace kernel { FakeQuantPerChannelGpuKernel::FakeQuantPerChannelGpuKernel() : input_size_(0), - min_size_(0), - max_size_(0), - output_size_(0), - workspace_size_(0), + num_channels_(0), num_bits_(0), - quant_min_(0), - quant_max_(0), - quant_delay_(0), - ema_(false), - ema_decay_(0), - global_step_(0), training_(false), - channel_out_(0), + symmetric_(false), narrow_range_(false), - symmetric_(false) {} + quant_delay_(0), + quant_min_(0), + quant_max_(0), + global_step_(0) {} const std::vector &FakeQuantPerChannelGpuKernel::GetInputSizeList() const { return input_size_list_; } @@ -60,90 +54,56 @@ bool FakeQuantPerChannelGpuKernel::Init(const CNodePtr &kernel_node) { return false; } + // get attribute num_bits_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("num_bits")); - ema_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("ema")); - ema_decay_ = 1.0 - GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("ema_decay")); + training_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("training")); + symmetric_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("symmetric")); + narrow_range_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("narrow_range")); + quant_delay_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("quant_delay")); if (num_bits_ <= 2 || num_bits_ >= 16) { MS_LOG(EXCEPTION) << "Attr \'num_bits\' " << num_bits_ << "is out of range, expected between 2 and 16."; return false; } - quant_delay_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("quant_delay")); if (quant_delay_ < 0) { MS_LOG(EXCEPTION) << "Attr \'quant_delay\' " << num_bits_ << " is less then 0, require larger than 0."; return false; } - training_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("training")); - - symmetric_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("symmetric")); - if (symmetric_) { - quant_min_ = 0 - (1 << (num_bits_ - 1)); - quant_max_ = (1 << (num_bits_ - 1)) - 1; - } else { - quant_min_ = 0; - quant_max_ = (1 << num_bits_) - 1; - } - - narrow_range_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("narrow_range")); + // quant min and max value + quant_min_ = 0; + quant_max_ = (1 << num_bits_) - 1; if (narrow_range_) { quant_min_++; } // shape info for gpu auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - channel_out_ = SizeToInt(input_shape[0]); - min_size_ = sizeof(float) * channel_out_; - max_size_ = sizeof(float) * channel_out_; + num_channels_ = SizeToInt(input_shape[0]); input_size_ = sizeof(float); for (size_t i = 0; i < input_shape.size(); i++) { input_size_ *= input_shape[i]; } - output_size_ = input_size_; - InitSizeLists(); return true; } void FakeQuantPerChannelGpuKernel::InitSizeLists() { - input_size_list_.push_back(input_size_); // input in tensor - input_size_list_.push_back(min_size_); // min one scalar - input_size_list_.push_back(max_size_); // max on scalar - output_size_list_.push_back(output_size_); // output in tensor - workspace_size_list_.push_back(sizeof(float) * channel_out_); // scale in channel - workspace_size_list_.push_back(sizeof(float) * channel_out_); // min in channel - workspace_size_list_.push_back(sizeof(float) * channel_out_); // max in channel -} - -void FakeQuantPerChannelGpuKernel::CalFakeQuantizeForTraining(float *input, float *output, float *input_min, - float *input_max, float *d_nudge_min, float *d_nudge_max, - float *d_scale, void *stream_ptr) { - // calculate the input min and max according by the parameter ema and ema_decay. - CalMinMaxPerChannel(input, input_min, input_max, input_size_ / sizeof(float), channel_out_, ema_decay_, ema_, - reinterpret_cast(stream_ptr)); - // control flow for quant_delay - if (global_step_ >= quant_delay_) { - // real launch - CalNudgePerChannel(input_min, input_max, quant_min_, quant_max_, d_nudge_min, d_nudge_max, d_scale, channel_out_, - reinterpret_cast(stream_ptr)); - CalFakeQuantizePerChannel(input, output, input_size_ / sizeof(float), channel_out_, d_nudge_min, d_nudge_max, - d_scale, symmetric_, reinterpret_cast(stream_ptr)); - } else { - CHECK_CUDA_RET_WITH_ERROR( - cudaMemcpyAsync(output, input, input_size_, cudaMemcpyDeviceToDevice, reinterpret_cast(stream_ptr)), - "Copy gpu memory failed."); - } - global_step_++; + input_size_list_.push_back(input_size_); // input in tensor + input_size_list_.push_back(sizeof(float) * num_channels_); // min one scalar + input_size_list_.push_back(sizeof(float) * num_channels_); // max on scalar + output_size_list_.push_back(input_size_); // output in tensor + workspace_size_list_.push_back(sizeof(float) * num_channels_); // scale in channel + workspace_size_list_.push_back(sizeof(float) * num_channels_); // min in channel + workspace_size_list_.push_back(sizeof(float) * num_channels_); // max in channel } -void FakeQuantPerChannelGpuKernel::CalFakeQuantizeForInfer(float *input, float *output, float *input_min, - float *input_max, float *d_nudge_min, float *d_nudge_max, - float *d_scale, void *stream_ptr) { - // real launch - CalNudgePerChannel(input_min, input_max, quant_min_, quant_max_, d_nudge_min, d_nudge_max, d_scale, channel_out_, +void FakeQuantPerChannelGpuKernel::CalFakeQuantize(float *input, float *output, float *input_min, float *input_max, + float *nudge_min, float *nudge_max, float *scale, void *stream_ptr) { + CalNudgePerChannel(input_min, input_max, quant_min_, quant_max_, nudge_min, nudge_max, scale, num_channels_, reinterpret_cast(stream_ptr)); - CalFakeQuantizePerChannel(input, output, input_size_ / sizeof(float), channel_out_, d_nudge_min, d_nudge_max, d_scale, + CalFakeQuantizePerChannel(input, output, input_size_ / sizeof(float), num_channels_, nudge_min, nudge_max, scale, symmetric_, reinterpret_cast(stream_ptr)); } @@ -155,9 +115,9 @@ bool FakeQuantPerChannelGpuKernel::Launch(const std::vector &inputs, float *input = GetDeviceAddress(inputs, 0); float *input_min = GetDeviceAddress(inputs, 1); float *input_max = GetDeviceAddress(inputs, 2); - float *d_scale = GetDeviceAddress(workspace, 0); - float *d_nudge_min = GetDeviceAddress(workspace, 1); - float *d_nudge_max = GetDeviceAddress(workspace, 2); + float *scale = GetDeviceAddress(workspace, 0); + float *nudge_min = GetDeviceAddress(workspace, 1); + float *nudge_max = GetDeviceAddress(workspace, 2); if (input == nullptr) { MS_LOG(EXCEPTION) << "FakeQuantPerChannelGpuKernel input is null."; @@ -167,14 +127,21 @@ bool FakeQuantPerChannelGpuKernel::Launch(const std::vector &inputs, } if (training_) { - CalFakeQuantizeForTraining(input, output, input_min, input_max, d_nudge_min, d_nudge_max, d_scale, stream_ptr); + if (global_step_ >= quant_delay_) { + CalFakeQuantize(input, output, input_min, input_max, nudge_min, nudge_max, scale, stream_ptr); + } else { + CHECK_CUDA_RET_WITH_ERROR(cudaMemcpyAsync(output, input, input_size_, cudaMemcpyDeviceToDevice, + reinterpret_cast(stream_ptr)), + "Copy gpu memory failed."); + } + global_step_++; } else { - CalFakeQuantizeForInfer(input, output, input_min, input_max, d_nudge_min, d_nudge_max, d_scale, stream_ptr); + CalFakeQuantize(input, output, input_min, input_max, nudge_min, nudge_max, scale, stream_ptr); } return true; } -MS_REG_GPU_KERNEL(FakeQuantWithMinMaxPerChannel, FakeQuantPerChannelGpuKernel) +MS_REG_GPU_KERNEL(FakeQuantPerChannel, FakeQuantPerChannelGpuKernel) } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_per_channel_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_gpu_kernel.h similarity index 75% rename from mindspore/ccsrc/kernel/gpu/quant/fake_quant_per_channel_gpu_kernel.h rename to mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_gpu_kernel.h index bea1a7421f..122fe96af3 100755 --- a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_per_channel_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_gpu_kernel.h @@ -39,31 +39,23 @@ class FakeQuantPerChannelGpuKernel : public GpuKernel { void InitSizeLists() override; private: - void CalFakeQuantizeForTraining(float *input, float *output, float *input_min, float *input_max, float *d_nudge_min, - float *d_nudge_max, float *d_scale, void *stream_ptr); - void CalFakeQuantizeForInfer(float *input, float *output, float *input_min, float *input_max, float *d_nudge_min, - float *d_nudge_max, float *d_scale, void *stream_ptr); + void CalFakeQuantize(float *input, float *output, float *input_min, float *input_max, float *nudge_min, + float *nudge_max, float *scale, void *stream_ptr); size_t input_size_; - size_t min_size_; - size_t max_size_; - size_t output_size_; - size_t workspace_size_; std::vector input_size_list_; std::vector output_size_list_; std::vector workspace_size_list_; + int num_channels_; int num_bits_; + bool training_; + bool symmetric_; + bool narrow_range_; + int quant_delay_; float quant_min_; float quant_max_; - int quant_delay_; - bool ema_; - float ema_decay_; int global_step_; - bool training_; - int channel_out_; - bool narrow_range_; - bool symmetric_; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_per_channel_grad_gpu_kernel.cc b/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.cc similarity index 73% rename from mindspore/ccsrc/kernel/gpu/quant/fake_quant_per_channel_grad_gpu_kernel.cc rename to mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.cc index 88c976285c..a57516eb2c 100644 --- a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_per_channel_grad_gpu_kernel.cc +++ b/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.cc @@ -14,21 +14,17 @@ * limitations under the License. */ -#include "kernel/gpu/quant/fake_quant_per_channel_grad_gpu_kernel.h" -#include "kernel/gpu/cuda_impl/fake_quant_per_channel_impl.cuh" +#include "kernel/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.h" +#include "kernel/gpu/cuda_impl/fake_quant_perchannel_impl.cuh" namespace mindspore { namespace kernel { FakeQuantPerChannelGradGpuKernel::FakeQuantPerChannelGradGpuKernel() : input_size_(0), - min_size_(0), - max_size_(0), - output_size_(0), - workspace_size_(0), num_bits_(0), quant_min_(0), quant_max_(0), - channel_out_(0), + num_channels_(0), quant_delay_(0), global_step_(0), narrow_range_(false), @@ -64,42 +60,34 @@ bool FakeQuantPerChannelGradGpuKernel::Init(const CNodePtr &kernel_node) { } symmetric_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("symmetric")); - if (symmetric_) { - quant_min_ = 0 - (1 << (num_bits_ - 1)); - quant_max_ = (1 << (num_bits_ - 1)) - 1; - } else { - quant_min_ = 0; - quant_max_ = (1 << num_bits_) - 1; - } - narrow_range_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("narrow_range")); + + // quant min and max value + quant_min_ = 0; + quant_max_ = (1 << num_bits_) - 1; if (narrow_range_) { quant_min_++; } auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - channel_out_ = SizeToInt(input_shape[0]); - min_size_ = sizeof(float) * channel_out_; - max_size_ = sizeof(float) * channel_out_; + num_channels_ = SizeToInt(input_shape[0]); input_size_ = sizeof(float); for (size_t i = 0; i < input_shape.size(); i++) { input_size_ *= input_shape[i]; } - output_size_ = input_size_; - InitSizeLists(); return true; } void FakeQuantPerChannelGradGpuKernel::InitSizeLists() { - input_size_list_.push_back(input_size_); // gradient - input_size_list_.push_back(input_size_); // input - input_size_list_.push_back(min_size_); // min - input_size_list_.push_back(max_size_); // max - output_size_list_.push_back(output_size_); - workspace_size_list_.push_back(sizeof(float) * channel_out_); // scale in channel - workspace_size_list_.push_back(sizeof(float) * channel_out_); // min in channel - workspace_size_list_.push_back(sizeof(float) * channel_out_); // max in channel + input_size_list_.push_back(input_size_); // gradient + input_size_list_.push_back(input_size_); // input + input_size_list_.push_back(sizeof(float) * num_channels_); // min + input_size_list_.push_back(sizeof(float) * num_channels_); // max + output_size_list_.push_back(input_size_); // output + workspace_size_list_.push_back(sizeof(float) * num_channels_); // scale in channel + workspace_size_list_.push_back(sizeof(float) * num_channels_); // min in channel + workspace_size_list_.push_back(sizeof(float) * num_channels_); // max in channel } bool FakeQuantPerChannelGradGpuKernel::Launch(const std::vector &inputs, @@ -111,9 +99,9 @@ bool FakeQuantPerChannelGradGpuKernel::Launch(const std::vector &inp float *input = GetDeviceAddress(inputs, 1); float *input_min = GetDeviceAddress(inputs, 2); float *input_max = GetDeviceAddress(inputs, 3); - float *d_scale = GetDeviceAddress(workspace, 0); - float *d_nudge_min = GetDeviceAddress(workspace, 1); - float *d_nudge_max = GetDeviceAddress(workspace, 2); + float *scale = GetDeviceAddress(workspace, 0); + float *nudge_min = GetDeviceAddress(workspace, 1); + float *nudge_max = GetDeviceAddress(workspace, 2); if (gradient == nullptr) { MS_LOG(EXCEPTION) << "FakeQuantPerChannelGradGpuKernel gradient is null"; @@ -130,9 +118,9 @@ bool FakeQuantPerChannelGradGpuKernel::Launch(const std::vector &inp int total_size = input_size_ / sizeof(float); if (global_step_ >= quant_delay_) { - CalNudgePerChannel(input_min, input_max, quant_min_, quant_max_, d_nudge_min, d_nudge_max, d_scale, channel_out_, + CalNudgePerChannel(input_min, input_max, quant_min_, quant_max_, nudge_min, nudge_max, scale, num_channels_, reinterpret_cast(stream_ptr)); - CalFakeQuantizePerChannelGrad(input, gradient, output, total_size, channel_out_, d_nudge_min, d_nudge_max, + CalFakeQuantizePerChannelGrad(input, gradient, output, total_size, num_channels_, nudge_min, nudge_max, reinterpret_cast(stream_ptr)); } else { CHECK_CUDA_RET_WITH_ERROR(cudaMemcpyAsync(output, gradient, input_size_, cudaMemcpyDeviceToDevice, @@ -143,6 +131,6 @@ bool FakeQuantPerChannelGradGpuKernel::Launch(const std::vector &inp return true; } -MS_REG_GPU_KERNEL(FakeQuantWithMinMaxPerChannelGrad, FakeQuantPerChannelGradGpuKernel) +MS_REG_GPU_KERNEL(FakeQuantPerChannelGrad, FakeQuantPerChannelGradGpuKernel) } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_per_channel_grad_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.h similarity index 91% rename from mindspore/ccsrc/kernel/gpu/quant/fake_quant_per_channel_grad_gpu_kernel.h rename to mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.h index fe760d85d2..d863a2c99f 100644 --- a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_per_channel_grad_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.h @@ -40,10 +40,6 @@ class FakeQuantPerChannelGradGpuKernel : public GpuKernel { private: size_t input_size_; - size_t min_size_; - size_t max_size_; - size_t output_size_; - size_t workspace_size_; std::vector input_size_list_; std::vector output_size_list_; std::vector workspace_size_list_; @@ -51,7 +47,7 @@ class FakeQuantPerChannelGradGpuKernel : public GpuKernel { int num_bits_; float quant_min_; float quant_max_; - int channel_out_; + int num_channels_; int quant_delay_; int global_step_; bool narrow_range_; diff --git a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_gpu_kernel.cc b/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_gpu_kernel.cc similarity index 50% rename from mindspore/ccsrc/kernel/gpu/quant/fake_quant_gpu_kernel.cc rename to mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_gpu_kernel.cc index ade7c32da0..845fb5b923 100644 --- a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_gpu_kernel.cc +++ b/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_gpu_kernel.cc @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "kernel/gpu/quant/fake_quant_gpu_kernel.h" -#include "kernel/gpu/cuda_impl/fake_quant_impl.cuh" +#include "kernel/gpu/quant/fake_quant_perlayer_gpu_kernel.h" +#include "kernel/gpu/cuda_impl/fake_quant_perlayer_impl.cuh" #include #include #include @@ -23,31 +23,25 @@ namespace mindspore { namespace kernel { -FakeQuantGpuKernel::FakeQuantGpuKernel() +FakeQuantPerLayerGpuKernel::FakeQuantPerLayerGpuKernel() : input_size_(0), - min_size_(0), - max_size_(0), - output_size_(0), - workspace_size_(0), - num_bits_(0), quant_min_(0), quant_max_(0), - quant_num_(0), - quant_delay_(0), - ema_(false), - ema_decay_(0), + quant_num_(1), global_step_(0), + num_bits_(0), + quant_delay_(0), training_(false), narrow_range_(false), symmetric_(false) {} -const std::vector &FakeQuantGpuKernel::GetInputSizeList() const { return input_size_list_; } +const std::vector &FakeQuantPerLayerGpuKernel::GetInputSizeList() const { return input_size_list_; } -const std::vector &FakeQuantGpuKernel::GetOutputSizeList() const { return output_size_list_; } +const std::vector &FakeQuantPerLayerGpuKernel::GetOutputSizeList() const { return output_size_list_; } -const std::vector &FakeQuantGpuKernel::GetWorkspaceSizeList() const { return workspace_size_list_; } +const std::vector &FakeQuantPerLayerGpuKernel::GetWorkspaceSizeList() const { return workspace_size_list_; } -bool FakeQuantGpuKernel::Init(const CNodePtr &kernel_node) { +bool FakeQuantPerLayerGpuKernel::Init(const CNodePtr &kernel_node) { size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 3) { MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but FakeQuant GpuKernel OP needs 3 output."; @@ -59,95 +53,73 @@ bool FakeQuantGpuKernel::Init(const CNodePtr &kernel_node) { } num_bits_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("num_bits")); - ema_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("ema")); - ema_decay_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("ema_decay")); + quant_delay_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("quant_delay")); training_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("training")); + symmetric_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("symmetric")); + narrow_range_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("narrow_range")); if (num_bits_ <= 2 || num_bits_ >= 16) { MS_LOG(EXCEPTION) << "Attr \'num_bits\' " << num_bits_ << " is out of range, expected between 2 and 16."; } - quant_delay_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("quant_delay")); if (quant_delay_ < 0) { MS_LOG(EXCEPTION) << "Attr \'quant_delay\' " << num_bits_ << "is less then 0, require larger than 0."; } - symmetric_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("symmetric")); - if (symmetric_) { - quant_min_ = 0 - (1 << (num_bits_ - 1)); - quant_max_ = (1 << (num_bits_ - 1)) - 1; - } else { - quant_min_ = 0; - quant_max_ = (1 << num_bits_) - 1; - } - - narrow_range_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("narrow_range")); + // quant min and max value + quant_min_ = 0; + quant_max_ = (1 << num_bits_) - 1; if (narrow_range_) { quant_min_++; } - if (quant_num_ == 0) { - quant_num_ = 1; - } + // init size auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); for (size_t i = 0; i < input_shape.size(); ++i) { quant_num_ *= SizeToInt(input_shape[i]); } - input_size_ = sizeof(float); - min_size_ = sizeof(float); - max_size_ = sizeof(float); for (size_t i = 0; i < input_shape.size(); i++) { input_size_ *= input_shape[i]; } - output_size_ = input_size_; InitSizeLists(); return true; } -void FakeQuantGpuKernel::InitSizeLists() { - input_size_list_.push_back(input_size_); // input - input_size_list_.push_back(min_size_); // min - input_size_list_.push_back(max_size_); // max - output_size_list_.push_back(output_size_); - workspace_size_list_.push_back(workspace_size_); +void FakeQuantPerLayerGpuKernel::InitSizeLists() { + input_size_list_.push_back(input_size_); // x + input_size_list_.push_back(sizeof(float)); // min + input_size_list_.push_back(sizeof(float)); // max + output_size_list_.push_back(input_size_); // y + workspace_size_list_.push_back(sizeof(float)); // scale + workspace_size_list_.push_back(sizeof(float)); // nudge_min + workspace_size_list_.push_back(sizeof(float)); // nudge_max } -bool FakeQuantGpuKernel::Launch(const std::vector &inputs, const std::vector &workspace, - const std::vector &outputs, void *stream_ptr) { +bool FakeQuantPerLayerGpuKernel::Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) { float *output = GetDeviceAddress(outputs, 0); float *input = GetDeviceAddress(inputs, 0); float *input_min = GetDeviceAddress(inputs, 1); float *input_max = GetDeviceAddress(inputs, 2); + float *scale = GetDeviceAddress(workspace, 0); + float *nudge_min = GetDeviceAddress(workspace, 1); + float *nudge_max = GetDeviceAddress(workspace, 2); if (input == nullptr) { - MS_LOG(EXCEPTION) << "FakeQuantGpuKernel input x is null."; - } - if (input_min == nullptr) { - MS_LOG(EXCEPTION) << "FakeQuantGpuKernel input min is null."; + MS_LOG(EXCEPTION) << "FakeQuantPerLayerGpuKernel input x is null."; } - if (input_max == nullptr) { - MS_LOG(EXCEPTION) << "FakeQuantGpuKernel input max is null."; + if (input_min == nullptr || input_max == nullptr) { + MS_LOG(EXCEPTION) << "FakeQuantPerLayerGpuKernel input min or input max is null."; } - // Allocate space for device copies - int size = sizeof(float); - float *d_scale = nullptr; - float *d_nudge_min = nullptr; - float *d_nudge_max = nullptr; - CHECK_CUDA_RET_WITH_ERROR(cudaMalloc(reinterpret_cast(&d_scale), size), "Malloc gpu memory failed"); - CHECK_CUDA_RET_WITH_ERROR(cudaMalloc(reinterpret_cast(&d_nudge_min), size), "Malloc gpu memory failed"); - CHECK_CUDA_RET_WITH_ERROR(cudaMalloc(reinterpret_cast(&d_nudge_max), size), "Malloc gpu memory failed"); - if (training_) { - // calculate the input min and max according by the parameter ema and ema_decay. - CalMinMax(input, input_min, input_max, quant_num_, ema_decay_, ema_, reinterpret_cast(stream_ptr)); // control flow for quant_delay if (global_step_ >= quant_delay_) { // real launch - CalNudge(input_min, input_max, quant_min_, quant_max_, d_nudge_min, d_nudge_max, d_scale, + CalNudge(input_min, input_max, quant_min_, quant_max_, nudge_min, nudge_max, scale, reinterpret_cast(stream_ptr)); - CalFakeQuantize(input, output, quant_num_, d_nudge_min, d_nudge_max, d_scale, symmetric_, + CalFakeQuantize(input, output, quant_num_, nudge_min, nudge_max, scale, symmetric_, reinterpret_cast(stream_ptr)); } else { CHECK_CUDA_RET_WITH_ERROR(cudaMemcpyAsync(output, input, input_size_, cudaMemcpyDeviceToDevice, @@ -157,20 +129,15 @@ bool FakeQuantGpuKernel::Launch(const std::vector &inputs, const std global_step_++; } else { // real launch - CalNudge(input_min, input_max, quant_min_, quant_max_, d_nudge_min, d_nudge_max, d_scale, + CalNudge(input_min, input_max, quant_min_, quant_max_, nudge_min, nudge_max, scale, reinterpret_cast(stream_ptr)); - CalFakeQuantize(input, output, quant_num_, d_nudge_min, d_nudge_max, d_scale, symmetric_, + CalFakeQuantize(input, output, quant_num_, nudge_min, nudge_max, scale, symmetric_, reinterpret_cast(stream_ptr)); } - // Cleanup - CHECK_CUDA_RET_WITH_ERROR(cudaFree(d_scale), "Free gpu memory failed"); - CHECK_CUDA_RET_WITH_ERROR(cudaFree(d_nudge_min), "Free gpu memory failed"); - CHECK_CUDA_RET_WITH_ERROR(cudaFree(d_nudge_max), "Free gpu memory failed"); - return true; } -MS_REG_GPU_KERNEL(FakeQuantWithMinMax, FakeQuantGpuKernel) +MS_REG_GPU_KERNEL(FakeQuantPerLayer, FakeQuantPerLayerGpuKernel) } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_gpu_kernel.h similarity index 77% rename from mindspore/ccsrc/kernel/gpu/quant/fake_quant_gpu_kernel.h rename to mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_gpu_kernel.h index 5a594c615f..38810e06df 100755 --- a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_gpu_kernel.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_KERNEL_GPU_FAKEQUANT_GPUKERNEL_H_ -#define MINDSPORE_CCSRC_KERNEL_GPU_FAKEQUANT_GPUKERNEL_H_ +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_FAKEQUANT_PERLAYER_GPUKERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_FAKEQUANT_PERLAYER_GPUKERNEL_H_ #include #include "kernel/gpu/gpu_kernel.h" @@ -23,10 +23,10 @@ namespace mindspore { namespace kernel { -class FakeQuantGpuKernel : public GpuKernel { +class FakeQuantPerLayerGpuKernel : public GpuKernel { public: - FakeQuantGpuKernel(); - ~FakeQuantGpuKernel() = default; + FakeQuantPerLayerGpuKernel(); + ~FakeQuantPerLayerGpuKernel() = default; const std::vector &GetInputSizeList() const override; const std::vector &GetOutputSizeList() const override; @@ -40,22 +40,16 @@ class FakeQuantGpuKernel : public GpuKernel { private: size_t input_size_; - size_t min_size_; - size_t max_size_; - size_t output_size_; - size_t workspace_size_; std::vector input_size_list_; std::vector output_size_list_; std::vector workspace_size_list_; - int num_bits_; float quant_min_; float quant_max_; int quant_num_; - int quant_delay_; - bool ema_; - float ema_decay_; int global_step_; + int num_bits_; + int quant_delay_; bool training_; bool narrow_range_; bool symmetric_; @@ -63,4 +57,4 @@ class FakeQuantGpuKernel : public GpuKernel { } // namespace kernel } // namespace mindspore -#endif // MINDSPORE_CCSRC_KERNEL_GPU_FAKEQUANT_GPUKERNEL_H_ +#endif // MINDSPORE_CCSRC_KERNEL_GPU_FAKEQUANT_PERLAYER_GPUKERNEL_H_ diff --git a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_grad_gpu_kernel.cc b/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.cc similarity index 51% rename from mindspore/ccsrc/kernel/gpu/quant/fake_quant_grad_gpu_kernel.cc rename to mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.cc index 7b7e3f1737..9c6584e239 100644 --- a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_grad_gpu_kernel.cc +++ b/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.cc @@ -14,31 +14,30 @@ * limitations under the License. */ -#include "kernel/gpu/quant/fake_quant_grad_gpu_kernel.h" -#include "kernel/gpu/cuda_impl/fake_quant_impl.cuh" +#include "kernel/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.h" +#include "kernel/gpu/cuda_impl/fake_quant_perlayer_impl.cuh" namespace mindspore { namespace kernel { -FakeQuantGradGpuKernel::FakeQuantGradGpuKernel() +FakeQuantPerLayerGradGpuKernel::FakeQuantPerLayerGradGpuKernel() : input_size_(0), - min_size_(0), - max_size_(0), - output_size_(0), workspace_size_(0), num_bits_(0), quant_min_(0), quant_max_(0), - quant_size_(0), + quant_num_(1), quant_delay_(0), - global_step_(0) {} + global_step_(0), + narrow_range_(false), + symmetric_(false) {} -const std::vector &FakeQuantGradGpuKernel::GetInputSizeList() const { return input_size_list_; } +const std::vector &FakeQuantPerLayerGradGpuKernel::GetInputSizeList() const { return input_size_list_; } -const std::vector &FakeQuantGradGpuKernel::GetOutputSizeList() const { return output_size_list_; } +const std::vector &FakeQuantPerLayerGradGpuKernel::GetOutputSizeList() const { return output_size_list_; } -const std::vector &FakeQuantGradGpuKernel::GetWorkspaceSizeList() const { return workspace_size_list_; } +const std::vector &FakeQuantPerLayerGradGpuKernel::GetWorkspaceSizeList() const { return workspace_size_list_; } -bool FakeQuantGradGpuKernel::Init(const CNodePtr &kernel_node) { +bool FakeQuantPerLayerGradGpuKernel::Init(const CNodePtr &kernel_node) { size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 4) { MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but FakeQuantGrad GpuKernel OP needs 4 output."; @@ -59,78 +58,67 @@ bool FakeQuantGradGpuKernel::Init(const CNodePtr &kernel_node) { MS_LOG(EXCEPTION) << "Attr \'quant_delay_\' " << quant_delay_ << " is less then 0, require larger than 0."; } + symmetric_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("symmetric")); + narrow_range_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("narrow_range")); + + // quant min and max value quant_min_ = 0; quant_max_ = (1 << num_bits_) - 1; - - if (quant_size_ == 0) { - quant_size_ = 1; + if (narrow_range_) { + quant_min_++; } + + // init size auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); for (size_t i = 0; i < input_shape.size(); ++i) { - quant_size_ *= SizeToInt(input_shape[i]); + quant_num_ *= SizeToInt(input_shape[i]); } - input_size_ = sizeof(float); - min_size_ = sizeof(float); - max_size_ = sizeof(float); for (size_t i = 0; i < input_shape.size(); i++) { input_size_ *= input_shape[i]; } - output_size_ = input_size_; - InitSizeLists(); return true; } -void FakeQuantGradGpuKernel::InitSizeLists() { - input_size_list_.push_back(input_size_); // gradient - input_size_list_.push_back(input_size_); // input - input_size_list_.push_back(min_size_); // min - input_size_list_.push_back(max_size_); // max - output_size_list_.push_back(output_size_); - workspace_size_list_.push_back(workspace_size_); +void FakeQuantPerLayerGradGpuKernel::InitSizeLists() { + input_size_list_.push_back(input_size_); // gradient + input_size_list_.push_back(input_size_); // input + input_size_list_.push_back(sizeof(float)); // min + input_size_list_.push_back(sizeof(float)); // max + output_size_list_.push_back(input_size_); // output + workspace_size_list_.push_back(sizeof(float)); // scale + workspace_size_list_.push_back(sizeof(float)); // nudge_min + workspace_size_list_.push_back(sizeof(float)); // nudge_max } -bool FakeQuantGradGpuKernel::Launch(const std::vector &inputs, const std::vector &workspace, - const std::vector &outputs, void *stream_ptr) { +bool FakeQuantPerLayerGradGpuKernel::Launch(const std::vector &inputs, + const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) { float *output = GetDeviceAddress(outputs, 0); float *gradient = GetDeviceAddress(inputs, 0); float *input = GetDeviceAddress(inputs, 1); float *input_min = GetDeviceAddress(inputs, 2); float *input_max = GetDeviceAddress(inputs, 3); + float *scale = GetDeviceAddress(workspace, 0); + float *nudge_min = GetDeviceAddress(workspace, 1); + float *nudge_max = GetDeviceAddress(workspace, 2); if (gradient == nullptr) { - MS_LOG(EXCEPTION) << "FakeQuantGradGpuKernel gradient is null"; + MS_LOG(EXCEPTION) << "FakeQuantPerLayerGradGpuKernel gradient is null"; } if (input == nullptr) { - MS_LOG(EXCEPTION) << "FakeQuantGradGpuKernel input is null."; - } - if (input_min == nullptr) { - MS_LOG(EXCEPTION) << "FakeQuantGradGpuKernel input min is null."; + MS_LOG(EXCEPTION) << "FakeQuantPerLayerGradGpuKernel input is null."; } - if (input_max == nullptr) { - MS_LOG(EXCEPTION) << "FakeQuantGradGpuKernel input max is null."; + if (input_min == nullptr || input_max == nullptr) { + MS_LOG(EXCEPTION) << "FakeQuantPerLayerGradGpuKernel input min or max is null."; } if (global_step_ >= quant_delay_) { - float *d_scale = nullptr; - float *d_nudge_min = nullptr; - float *d_nudge_max = nullptr; - int size = sizeof(float); - // Allocate space for device copies - CHECK_CUDA_RET_WITH_ERROR(cudaMalloc(reinterpret_cast(&d_scale), size), "Malloc gpu memory failed"); - CHECK_CUDA_RET_WITH_ERROR(cudaMalloc(reinterpret_cast(&d_nudge_min), size), "Malloc gpu memory failed"); - CHECK_CUDA_RET_WITH_ERROR(cudaMalloc(reinterpret_cast(&d_nudge_max), size), "Malloc gpu memory failed"); - - CalNudge(input_min, input_max, quant_min_, quant_max_, d_nudge_min, d_nudge_max, d_scale, + CalNudge(input_min, input_max, quant_min_, quant_max_, nudge_min, nudge_max, scale, reinterpret_cast(stream_ptr)); - CalFakeQuantizeGrad(input, gradient, output, quant_size_, d_nudge_min, d_nudge_max, + CalFakeQuantizeGrad(input, gradient, output, quant_num_, nudge_min, nudge_max, reinterpret_cast(stream_ptr)); - - // Cleanup - CHECK_CUDA_RET_WITH_ERROR(cudaFree(d_scale), "Free gpu memory failed"); - CHECK_CUDA_RET_WITH_ERROR(cudaFree(d_nudge_min), "Free gpu memory failed"); - CHECK_CUDA_RET_WITH_ERROR(cudaFree(d_nudge_max), "Free gpu memory failed"); } else { CHECK_CUDA_RET_WITH_ERROR(cudaMemcpyAsync(output, gradient, input_size_, cudaMemcpyDeviceToDevice, reinterpret_cast(stream_ptr)), @@ -140,6 +128,6 @@ bool FakeQuantGradGpuKernel::Launch(const std::vector &inputs, const return true; } -MS_REG_GPU_KERNEL(FakeQuantWithMinMaxGrad, FakeQuantGradGpuKernel) +MS_REG_GPU_KERNEL(FakeQuantPerLayerGrad, FakeQuantPerLayerGradGpuKernel) } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_grad_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.h similarity index 76% rename from mindspore/ccsrc/kernel/gpu/quant/fake_quant_grad_gpu_kernel.h rename to mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.h index 04c505d2bd..ae2ea5bfac 100644 --- a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_grad_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_KERNEL_GPU_FAKEQUANT_GRAD_GPUKERNEL_H_ -#define MINDSPORE_CCSRC_KERNEL_GPU_FAKEQUANT_GRAD_GPUKERNEL_H_ +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_FAKEQUANT_PERLAYER_GRAD_GPUKERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_FAKEQUANT_PERLAYER_GRAD_GPUKERNEL_H_ #include #include "kernel/gpu/gpu_kernel.h" @@ -23,10 +23,10 @@ namespace mindspore { namespace kernel { -class FakeQuantGradGpuKernel : public GpuKernel { +class FakeQuantPerLayerGradGpuKernel : public GpuKernel { public: - FakeQuantGradGpuKernel(); - ~FakeQuantGradGpuKernel() = default; + FakeQuantPerLayerGradGpuKernel(); + ~FakeQuantPerLayerGradGpuKernel() = default; const std::vector &GetInputSizeList() const override; const std::vector &GetOutputSizeList() const override; @@ -40,9 +40,6 @@ class FakeQuantGradGpuKernel : public GpuKernel { private: size_t input_size_; - size_t min_size_; - size_t max_size_; - size_t output_size_; size_t workspace_size_; std::vector input_size_list_; std::vector output_size_list_; @@ -51,11 +48,13 @@ class FakeQuantGradGpuKernel : public GpuKernel { int num_bits_; float quant_min_; float quant_max_; - int quant_size_; + int quant_num_; int quant_delay_; int global_step_; + bool narrow_range_; + bool symmetric_; }; } // namespace kernel } // namespace mindspore -#endif // MINDSPORE_CCSRC_KERNEL_GPU_FAKEQUANT_GRAD_GPUKERNEL_H_ +#endif // MINDSPORE_CCSRC_KERNEL_GPU_FAKEQUANT_PERLAYER_GRAD_GPUKERNEL_H_ diff --git a/mindspore/ccsrc/kernel/gpu/quant/minmax_update_perchannel_gpu_kernel.cc b/mindspore/ccsrc/kernel/gpu/quant/minmax_update_perchannel_gpu_kernel.cc new file mode 100644 index 0000000000..a8ce72148b --- /dev/null +++ b/mindspore/ccsrc/kernel/gpu/quant/minmax_update_perchannel_gpu_kernel.cc @@ -0,0 +1,96 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/gpu/quant/minmax_update_perchannel_gpu_kernel.h" +#include "kernel/gpu/cuda_impl/minmax_update_impl.cuh" +#include +#include +#include +#include + +namespace mindspore { +namespace kernel { +MinMaxUpdatePerChannelGpuKernel::MinMaxUpdatePerChannelGpuKernel() + : input_size_(0), quant_num_(1), ema_(false), ema_decay_(0), num_channels_(0) {} + +const std::vector &MinMaxUpdatePerChannelGpuKernel::GetInputSizeList() const { return input_size_list_; } + +const std::vector &MinMaxUpdatePerChannelGpuKernel::GetOutputSizeList() const { return output_size_list_; } + +const std::vector &MinMaxUpdatePerChannelGpuKernel::GetWorkspaceSizeList() const { + return workspace_size_list_; +} + +bool MinMaxUpdatePerChannelGpuKernel::Init(const CNodePtr &kernel_node) { + size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); + if (input_num != 3) { + MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but FakeQuant GpuKernel OP needs 3 output."; + } + + size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); + if (output_num != 2) { + MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but FakeQuant GpuKernel OP needs 1 output."; + } + + ema_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("ema")); + ema_decay_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("ema_decay")); + + // init size + auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + num_channels_ = SizeToInt(input_shape[0]); + for (size_t i = 0; i < input_shape.size(); ++i) { + quant_num_ *= SizeToInt(input_shape[i]); + } + input_size_ = sizeof(float); + for (size_t i = 0; i < input_shape.size(); i++) { + input_size_ *= input_shape[i]; + } + InitSizeLists(); + return true; +} + +void MinMaxUpdatePerChannelGpuKernel::InitSizeLists() { + input_size_list_.push_back(input_size_); // input + input_size_list_.push_back(sizeof(float) * num_channels_); // min + input_size_list_.push_back(sizeof(float) * num_channels_); // max + output_size_list_.push_back(sizeof(float) * num_channels_); // output min + output_size_list_.push_back(sizeof(float) * num_channels_); // output max +} + +bool MinMaxUpdatePerChannelGpuKernel::Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs, void *stream_ptr) { + float *output_min = GetDeviceAddress(outputs, 0); + float *output_max = GetDeviceAddress(outputs, 1); + float *input = GetDeviceAddress(inputs, 0); + float *input_min = GetDeviceAddress(inputs, 1); + float *input_max = GetDeviceAddress(inputs, 2); + + if (input == nullptr) { + MS_LOG(EXCEPTION) << "MinMaxUpdatePerChannelGpuKernel input x is null."; + } + if (input_min == nullptr || input_max == nullptr) { + MS_LOG(EXCEPTION) << "MinMaxUpdatePerChannelGpuKernel input min or input max is null."; + } + + // calculate the input min and max according by the parameter ema and ema_decay. + CalMinMaxPerChannel(input, input_min, input_max, output_min, output_max, input_size_ / sizeof(float), num_channels_, + ema_decay_, ema_, reinterpret_cast(stream_ptr)); + return true; +} + +MS_REG_GPU_KERNEL(MinMaxUpdatePerChannel, MinMaxUpdatePerChannelGpuKernel) +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/gpu/quant/minmax_update_perchannel_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/quant/minmax_update_perchannel_gpu_kernel.h new file mode 100644 index 0000000000..563a583ca1 --- /dev/null +++ b/mindspore/ccsrc/kernel/gpu/quant/minmax_update_perchannel_gpu_kernel.h @@ -0,0 +1,55 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_MINMAX_UPDATE_PERCHANNEL_GPUKERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_MINMAX_UPDATE_PERCHANNEL_GPUKERNEL_H_ + +#include +#include "kernel/gpu/gpu_kernel.h" +#include "kernel/gpu/gpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { +class MinMaxUpdatePerChannelGpuKernel : public GpuKernel { + public: + MinMaxUpdatePerChannelGpuKernel(); + ~MinMaxUpdatePerChannelGpuKernel() = default; + + const std::vector &GetInputSizeList() const override; + const std::vector &GetOutputSizeList() const override; + const std::vector &GetWorkspaceSizeList() const override; + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) override; + bool Init(const CNodePtr &kernel) override; + + protected: + void InitSizeLists() override; + + private: + size_t input_size_; + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + + int quant_num_; + bool ema_; + float ema_decay_; + int num_channels_; +}; +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_KERNEL_GPU_MINMAX_UPDATE_PERCHANNEL_GPUKERNEL_H_ diff --git a/mindspore/ccsrc/kernel/gpu/quant/minmax_update_perlayer_gpu_kernel.cc b/mindspore/ccsrc/kernel/gpu/quant/minmax_update_perlayer_gpu_kernel.cc new file mode 100644 index 0000000000..3659665b23 --- /dev/null +++ b/mindspore/ccsrc/kernel/gpu/quant/minmax_update_perlayer_gpu_kernel.cc @@ -0,0 +1,93 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/gpu/quant/minmax_update_perlayer_gpu_kernel.h" +#include "kernel/gpu/cuda_impl/minmax_update_impl.cuh" +#include +#include +#include +#include + +namespace mindspore { +namespace kernel { +MinMaxUpdatePerLayerGpuKernel::MinMaxUpdatePerLayerGpuKernel() + : input_size_(0), quant_num_(1), ema_(false), ema_decay_(0) {} + +const std::vector &MinMaxUpdatePerLayerGpuKernel::GetInputSizeList() const { return input_size_list_; } + +const std::vector &MinMaxUpdatePerLayerGpuKernel::GetOutputSizeList() const { return output_size_list_; } + +const std::vector &MinMaxUpdatePerLayerGpuKernel::GetWorkspaceSizeList() const { return workspace_size_list_; } + +bool MinMaxUpdatePerLayerGpuKernel::Init(const CNodePtr &kernel_node) { + size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); + if (input_num != 3) { + MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but FakeQuant GpuKernel OP needs 3 output."; + } + + size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); + if (output_num != 2) { + MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but FakeQuant GpuKernel OP needs 1 output."; + } + + ema_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("ema")); + ema_decay_ = GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("ema_decay")); + + // init size + auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + for (size_t i = 0; i < input_shape.size(); ++i) { + quant_num_ *= SizeToInt(input_shape[i]); + } + input_size_ = sizeof(float); + for (size_t i = 0; i < input_shape.size(); i++) { + input_size_ *= input_shape[i]; + } + InitSizeLists(); + return true; +} + +void MinMaxUpdatePerLayerGpuKernel::InitSizeLists() { + input_size_list_.push_back(input_size_); // input + input_size_list_.push_back(sizeof(float)); // input min + input_size_list_.push_back(sizeof(float)); // input max + output_size_list_.push_back(sizeof(float)); // output min + output_size_list_.push_back(sizeof(float)); // output max +} + +bool MinMaxUpdatePerLayerGpuKernel::Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs, void *stream_ptr) { + float *output_min = GetDeviceAddress(outputs, 0); + float *output_max = GetDeviceAddress(outputs, 1); + float *input = GetDeviceAddress(inputs, 0); + float *input_min = GetDeviceAddress(inputs, 1); + float *input_max = GetDeviceAddress(inputs, 2); + + if (input == nullptr) { + MS_LOG(EXCEPTION) << "MinMaxUpdatePerLayerGpuKernel input x is null."; + } + if (input_min == nullptr || input_max == nullptr) { + MS_LOG(EXCEPTION) << "MinMaxUpdatePerLayerGpuKernel input min or input max is null."; + } + + CalMinMaxPerLayer(input, input_min, input_max, output_min, output_max, quant_num_, ema_decay_, ema_, + reinterpret_cast(stream_ptr)); + + return true; +} + +MS_REG_GPU_KERNEL(MinMaxUpdatePerLayer, MinMaxUpdatePerLayerGpuKernel) +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/gpu/quant/minmax_update_perlayer_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/quant/minmax_update_perlayer_gpu_kernel.h new file mode 100644 index 0000000000..a237b6dc26 --- /dev/null +++ b/mindspore/ccsrc/kernel/gpu/quant/minmax_update_perlayer_gpu_kernel.h @@ -0,0 +1,54 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_MINMAX_UPDATE_PERLAYER_GPUKERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_MINMAX_UPDATE_PERLAYER_GPUKERNEL_H_ + +#include +#include "kernel/gpu/gpu_kernel.h" +#include "kernel/gpu/gpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { +class MinMaxUpdatePerLayerGpuKernel : public GpuKernel { + public: + MinMaxUpdatePerLayerGpuKernel(); + ~MinMaxUpdatePerLayerGpuKernel() = default; + + const std::vector &GetInputSizeList() const override; + const std::vector &GetOutputSizeList() const override; + const std::vector &GetWorkspaceSizeList() const override; + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) override; + bool Init(const CNodePtr &kernel) override; + + protected: + void InitSizeLists() override; + + private: + size_t input_size_; + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + + int quant_num_; + bool ema_; + float ema_decay_; +}; +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_KERNEL_GPU_MINMAX_UPDATE_PERLAYER_GPUKERNEL_H_ diff --git a/mindspore/ccsrc/kernel/hccl/hccl_kernel.cc b/mindspore/ccsrc/kernel/hccl/hccl_kernel.cc index 493998c168..87fb8d743d 100644 --- a/mindspore/ccsrc/kernel/hccl/hccl_kernel.cc +++ b/mindspore/ccsrc/kernel/hccl/hccl_kernel.cc @@ -129,7 +129,7 @@ std::vector HcclKernel::GenTask(const std::vector &inpu const std::vector &workspace, const std::vector &outputs, uint32_t stream_id) { if (inputs.empty() || outputs.empty()) { - MS_LOG(EXCEPTION) << "inputs or outputs is empty"; + MS_LOG(EXCEPTION) << "Inputs or outputs is empty"; } stream_id_ = stream_id; std::string hccl_type = AnfAlgo::GetCNodeName(anf_node_); diff --git a/mindspore/ccsrc/kernel/hccl/hccl_kernel_metadata.cc b/mindspore/ccsrc/kernel/hccl/hccl_kernel_metadata.cc index f0a0dda258..601d5cf1ea 100755 --- a/mindspore/ccsrc/kernel/hccl/hccl_kernel_metadata.cc +++ b/mindspore/ccsrc/kernel/hccl/hccl_kernel_metadata.cc @@ -23,6 +23,8 @@ namespace mindspore { namespace kernel { void HcclMetadataInfo(const CNodePtr &kernel_node, std::vector> *kernel_info_list) { + const std::vector kHcclSupportTypes = {kNumberTypeInt8, kNumberTypeInt32, kNumberTypeFloat16, + kNumberTypeFloat32, kNumberTypeInt16}; MS_EXCEPTION_IF_NULL(kernel_info_list); MS_EXCEPTION_IF_NULL(kernel_node); std::string op_name = AnfAlgo::GetCNodeName(kernel_node); @@ -30,27 +32,27 @@ void HcclMetadataInfo(const CNodePtr &kernel_node, std::vector inputs_format{}; - std::vector inputs_type{}; - for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(kernel_node); ++input_index) { - inputs_format.emplace_back(AnfAlgo::GetPrevNodeOutputFormat(kernel_node, input_index)); - inputs_type.push_back(AnfAlgo::GetPrevNodeOutputDeviceDataType(kernel_node, input_index)); - } - - std::vector outputs_format; - std::vector outputs_type; - for (size_t output_index = 0; output_index < AnfAlgo::GetOutputTensorNum(kernel_node); ++output_index) { - outputs_format.emplace_back(AnfAlgo::GetPrevNodeOutputFormat(kernel_node, output_index)); - outputs_type.push_back(AnfAlgo::GetOutputInferDataType(kernel_node, output_index)); + for (const auto &type : kHcclSupportTypes) { + std::vector inputs_format{}; + std::vector inputs_type{}; + for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(kernel_node); ++input_index) { + inputs_format.emplace_back(AnfAlgo::GetPrevNodeOutputFormat(kernel_node, input_index)); + inputs_type.push_back(type); + } + std::vector outputs_format; + std::vector outputs_type; + for (size_t output_index = 0; output_index < AnfAlgo::GetOutputTensorNum(kernel_node); ++output_index) { + outputs_format.emplace_back(AnfAlgo::GetPrevNodeOutputFormat(kernel_node, output_index)); + outputs_type.push_back(type); + } + auto builder = KernelBuildInfo::KernelBuildInfoBuilder(); + builder.SetInputsFormat(inputs_format); + builder.SetInputsDeviceType(inputs_type); + builder.SetOutputsFormat(outputs_format); + builder.SetOutputsDeviceType(outputs_type); + builder.SetKernelType(HCCL_KERNEL); + kernel_info_list->push_back(builder.Build()); } - auto builder = KernelBuildInfo::KernelBuildInfoBuilder(); - builder.SetInputsFormat(inputs_format); - builder.SetInputsDeviceType(inputs_type); - builder.SetOutputsFormat(outputs_format); - builder.SetOutputsDeviceType(outputs_type); - builder.SetKernelType(HCCL_KERNEL); - kernel_info_list->push_back(builder.Build()); } } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/hccl/hcom_all_broadcast.cc b/mindspore/ccsrc/kernel/hccl/hcom_all_broadcast.cc index dba692606c..9dbe708ef9 100644 --- a/mindspore/ccsrc/kernel/hccl/hcom_all_broadcast.cc +++ b/mindspore/ccsrc/kernel/hccl/hcom_all_broadcast.cc @@ -32,7 +32,12 @@ bool HcomAllBroadCastKernel::Launch(const std::vector &inputs, if (context_ptr->enable_task_sink()) { return true; } + if (inputs.empty() || hccl_data_type_list_.empty()) { + MS_LOG(ERROR) << "BroadCast param is empty"; + return false; + } const char *tag = "Hccl-BroadCast"; + MS_EXCEPTION_IF_NULL(inputs[0]); hcclResult_t ret = hcom_broadcast(tag, inputs[0]->addr, hccl_count_, hccl_data_type_list_[0], root_id_, nullptr, stream_ptr); if (ret != HCCL_SUCCESS) { diff --git a/mindspore/ccsrc/kernel/hccl/hcom_all_gather.cc b/mindspore/ccsrc/kernel/hccl/hcom_all_gather.cc index 67cd1001e3..6494f7fd12 100644 --- a/mindspore/ccsrc/kernel/hccl/hcom_all_gather.cc +++ b/mindspore/ccsrc/kernel/hccl/hcom_all_gather.cc @@ -31,6 +31,10 @@ bool HcomAllGatherKernel::Launch(const std::vector &inputs, const st if (context_ptr->enable_task_sink()) { return true; } + if (inputs.empty() || hccl_data_type_list_.empty()) { + MS_LOG(ERROR) << "AllGather param is empty"; + return false; + } const char *tag = "Hccl-AllGather"; hcclResult_t ret = hcom_all_gather(tag, inputs[0]->addr, outputs[0]->addr, hccl_count_, hccl_data_type_list_[0], nullptr, stream_ptr); diff --git a/mindspore/ccsrc/kernel/hccl/hcom_all_reduce.cc b/mindspore/ccsrc/kernel/hccl/hcom_all_reduce.cc index 2bf9823e5d..35a058e766 100644 --- a/mindspore/ccsrc/kernel/hccl/hcom_all_reduce.cc +++ b/mindspore/ccsrc/kernel/hccl/hcom_all_reduce.cc @@ -31,6 +31,10 @@ bool HcomAllReduceKernel::Launch(const std::vector &inputs, const st if (context_ptr->enable_task_sink()) { return true; } + if (inputs.empty() || outputs.empty() || hccl_data_type_list_.empty()) { + MS_LOG(ERROR) << "AllReduce param is empty"; + return false; + } const char *tag = "Hccl-AllReduce"; hcclResult_t ret = hcom_all_reduce(tag, inputs[0]->addr, outputs[0]->addr, hccl_count_, hccl_data_type_list_[0], op_type_, nullptr, stream_ptr); diff --git a/mindspore/ccsrc/kernel/hccl/hcom_all_reduce_scatter.cc b/mindspore/ccsrc/kernel/hccl/hcom_all_reduce_scatter.cc index 05217108d9..dea516885d 100644 --- a/mindspore/ccsrc/kernel/hccl/hcom_all_reduce_scatter.cc +++ b/mindspore/ccsrc/kernel/hccl/hcom_all_reduce_scatter.cc @@ -32,6 +32,10 @@ bool HcomAllReduceScatterKernel::Launch(const std::vector &inputs, if (context_ptr->enable_task_sink()) { return true; } + if (inputs.empty() || outputs.empty() || hccl_data_type_list_.empty()) { + MS_LOG(ERROR) << "ReduceScatter param is empty"; + return false; + } const char *tag = "Hccl-ReduceScatter"; hcclResult_t ret = hcom_reduce_scatter(tag, inputs[0]->addr, outputs[0]->addr, hccl_count_, hccl_data_type_list_[0], op_type_, nullptr, stream_ptr); diff --git a/mindspore/ccsrc/kernel/hccl/hcom_util.cc b/mindspore/ccsrc/kernel/hccl/hcom_util.cc index f2d35878d8..61a4d43eb5 100644 --- a/mindspore/ccsrc/kernel/hccl/hcom_util.cc +++ b/mindspore/ccsrc/kernel/hccl/hcom_util.cc @@ -66,6 +66,7 @@ bool HcomUtil::GetHcomDataType(const AnfNodePtr &anf_node, vector &shape, size_t *size) { + MS_EXCEPTION_IF_NULL(size); int tmp_size = 1; uint32_t type_size = 4; for (size_t i = 0; i < shape.size(); i++) { @@ -84,6 +85,7 @@ bool HcomUtil::GetHcclOpSize(const hcclDataType_t &data_type, const vector #include "mindspore/ccsrc/kernel/kernel.h" #include "kernel/kernel.h" -#include "kernel/akg/akgkernelbuild.h" +#include "kernel/akg/akg_kernel_build.h" #include "nlohmann/json.hpp" #include "securec/include/securec.h" #include "pipeline/parse/python_adapter.h" diff --git a/mindspore/ccsrc/kernel/kernel.h b/mindspore/ccsrc/kernel/kernel.h index 271f6f20fa..7bccce49c3 100644 --- a/mindspore/ccsrc/kernel/kernel.h +++ b/mindspore/ccsrc/kernel/kernel.h @@ -27,11 +27,11 @@ #include "utils/log_adapter.h" namespace mindspore { -enum KernelType : int { UNKNOWN_KERNEL_TYPE = 0, AUTO_DIFF_KERNEL, AICPU_KERNEL, RT_KERNEL, HCCL_KERNEL, TBE_KERNEL }; +enum KernelType : int { UNKNOWN_KERNEL_TYPE = 0, AKG_KERNEL, AICPU_KERNEL, RT_KERNEL, HCCL_KERNEL, TBE_KERNEL }; namespace kernel { -enum Axis { +enum Axis : int { N = 0, C, H, @@ -45,6 +45,7 @@ enum FusionType { COMMREDUCE, SEGMENT, OPAQUE, + DYNAMIC, UNKNOWN_FUSION_TYPE = -1, }; enum OpPattern { diff --git a/mindspore/ccsrc/kernel/kernel_build_info.cc b/mindspore/ccsrc/kernel/kernel_build_info.cc index ce7164a0d1..c912a0c199 100644 --- a/mindspore/ccsrc/kernel/kernel_build_info.cc +++ b/mindspore/ccsrc/kernel/kernel_build_info.cc @@ -105,7 +105,12 @@ bool KernelBuildInfo::operator==(const KernelBuildInfo &other) const { return false; } if (inputs_format_ != other.inputs_format_ || outputs_format_ != other.outputs_format_) { - return false; + if (op_pattern_ != kFormatAgnosticPattern) { + return false; + } else { + MS_LOG(INFO) << "this kernel build info:" << this->ToString() + << ", other kernel build info: " << other.ToString(); + } } return !(inputs_device_type_ != other.inputs_device_type_ || outputs_device_type_ != other.outputs_device_type_); } @@ -167,5 +172,20 @@ void KernelBuildInfo::KernelBuildInfoBuilder::SetOpPattern(OpPattern pattern) { MS_EXCEPTION_IF_NULL(kernel_build_info_); kernel_build_info_->op_pattern_ = pattern; } +void KernelBuildInfo::KernelBuildInfoBuilder::SetInputFormat(const std::string &format, size_t index) { + MS_EXCEPTION_IF_NULL(kernel_build_info_); + if (index >= kernel_build_info_->inputs_format_.size()) { + MS_LOG(EXCEPTION) << "index outof range!"; + } + kernel_build_info_->inputs_format_[index] = format; +} + +void KernelBuildInfo::KernelBuildInfoBuilder::SetOutputFormat(const std::string &format, size_t index) { + MS_EXCEPTION_IF_NULL(kernel_build_info_); + if (index >= kernel_build_info_->outputs_format_.size()) { + MS_LOG(EXCEPTION) << "index outof range!"; + } + kernel_build_info_->outputs_format_[index] = format; +} } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/kernel_build_info.h b/mindspore/ccsrc/kernel/kernel_build_info.h index d17b41a6fc..ca1083fd68 100644 --- a/mindspore/ccsrc/kernel/kernel_build_info.h +++ b/mindspore/ccsrc/kernel/kernel_build_info.h @@ -31,7 +31,7 @@ class KernelBuildInfo { class KernelBuildInfoBuilder; KernelBuildInfo() { - kernel_type_ = AUTO_DIFF_KERNEL; + kernel_type_ = TBE_KERNEL; fusion_type_ = OPAQUE; processor_ = AICORE; op_pattern_ = kCommonPattern; @@ -131,6 +131,10 @@ class KernelBuildInfo::KernelBuildInfoBuilder { void SetOpPattern(OpPattern pattern); + void SetInputFormat(const std::string &format, size_t index); + + void SetOutputFormat(const std::string &format, size_t index); + std::shared_ptr Build(); private: diff --git a/mindspore/ccsrc/kernel/kernel_fusion.cc b/mindspore/ccsrc/kernel/kernel_fusion.cc index 4e1ad97e23..be79eca15a 100644 --- a/mindspore/ccsrc/kernel/kernel_fusion.cc +++ b/mindspore/ccsrc/kernel/kernel_fusion.cc @@ -102,7 +102,8 @@ std::map KernelFusion(const std::vector while (!build_manger->IsAllTaskFinish()) { int task_id = -1; char *task_result = nullptr; - auto ret = build_manger->WaitOne(&task_id, &task_result); + char *pre_build_result = nullptr; + auto ret = build_manger->WaitOne(&task_id, &task_result, &pre_build_result); if (!ret) { MS_EXCEPTION(ArgumentError) << "Build Failed. wait one ret:" << ret << ", task id:" << task_id; } diff --git a/mindspore/ccsrc/kernel/kernel_query.cc b/mindspore/ccsrc/kernel/kernel_query.cc index 8d3ee64591..5eda847917 100755 --- a/mindspore/ccsrc/kernel/kernel_query.cc +++ b/mindspore/ccsrc/kernel/kernel_query.cc @@ -20,7 +20,8 @@ #include "kernel/aicpu/aicpu_kernel_metadata.h" #include "kernel/rts/rt_kernel_info.h" #include "kernel/hccl/hccl_kernel_metadata.h" -#include "kernel/tbe/tbe_kernel_select.h" +#include "kernel/tbe/tbe_kernel_select/tbe_kernel_select.h" +#include "kernel/akg/akg_kernel_metadata.h" #include "session/anf_runtime_algorithm.h" namespace mindspore { @@ -31,7 +32,7 @@ void FilterInvalidKernelInfo(const CNodePtr &kernel_node, MS_EXCEPTION_IF_NULL(kernel_info_list); std::vector> filtered_list; (void)std::copy_if(kernel_info_list->begin(), kernel_info_list->end(), std::back_inserter(filtered_list), - [&](const std::shared_ptr &kernel_build_info) { + [&kernel_node](const std::shared_ptr &kernel_build_info) { return AnfAlgo::GetOutputTensorNum(kernel_node) == kernel_build_info->GetOutputNum() && AnfAlgo::GetInputTensorNum(kernel_node) == kernel_build_info->GetInputNum(); }); @@ -39,28 +40,40 @@ void FilterInvalidKernelInfo(const CNodePtr &kernel_node, kernel_info_list->clear(); (void)std::copy(filtered_list.begin(), filtered_list.end(), std::back_inserter(*kernel_info_list)); } else { - MS_LOG(WARNING) << "All kernel Info list does not match any kernel info "; + MS_LOG(INFO) << "All kernel Info list does not match any kernel info "; for (size_t index = 0; index < kernel_info_list->size(); ++index) { - MS_EXCEPTION_IF_NULL(kernel_info_list->at(index)); - MS_LOG(WARNING) << "kernel [ " << index << " ] :" << kernel_info_list->at(index)->ToString(); + std::ostringstream buffer; + auto kernel_info = kernel_info_list->at(index); + MS_EXCEPTION_IF_NULL(kernel_info); + if (AnfAlgo::GetOutputTensorNum(kernel_node) != kernel_info->GetOutputNum()) { + buffer << "Kernel node's output size [" << AnfAlgo::GetOutputTensorNum(kernel_node) << "]" + << " cannot match the kernel's output size [" << kernel_info->GetOutputNum() << "]"; + } else { + buffer << "Kernel node's output size [" << AnfAlgo::GetInputTensorNum(kernel_node) << "]" + << " cannot match the kernel's output size [" << kernel_info->GetInputNum() << "]"; + } + MS_LOG(INFO) << "kernel [ " << index << " ] :" << kernel_info->ToString() << buffer.str(); } kernel_info_list->clear(); - MS_LOG(WARNING) << "node" << kernel_node->DebugString() << "'s output size : [" - << AnfAlgo::GetOutputTensorNum(kernel_node) << "]" - << "input size : [" << AnfAlgo::GetInputTensorNum(kernel_node) << "] cannot match any kernelInfo !"; + MS_LOG(INFO) << "node" << kernel_node->DebugString() << "'s output size : [" + << AnfAlgo::GetOutputTensorNum(kernel_node) << "]" + << "input size : [" << AnfAlgo::GetInputTensorNum(kernel_node) << "] cannot match any kernelInfo !"; } } } // namespace -void KernelQuery(const CNodePtr &kernel_node, std::vector> *kernel_info_list) { + +void KernelQueryAll(const CNodePtr &kernel_node, + std::vector> *kernel_info_list) { MS_EXCEPTION_IF_NULL(kernel_node); MS_EXCEPTION_IF_NULL(kernel_info_list); + TbeMetadataInfo(kernel_node, kernel_info_list); - FilterInvalidKernelInfo(kernel_node, kernel_info_list); + if (kernel_info_list->empty()) { AicpuMetadataInfo(kernel_node, kernel_info_list); if (!kernel_info_list->empty()) { - MS_LOG(WARNING) << "The node [" << kernel_node->DebugString() - << "] cannot find valid TBE kernel info, try to get aicpu kernel info"; + MS_LOG(INFO) << "The node [" << kernel_node->DebugString() + << "] cannot find valid TBE kernel info, try to get aicpu kernel info"; AnfAlgo::SetNodeAttr(kAttrIsAICPUKernel, MakeValue(true), kernel_node); } } @@ -75,6 +88,28 @@ void KernelQuery(const CNodePtr &kernel_node, std::vectorempty()) { MS_LOG(EXCEPTION) << "Op " << kernel_node->DebugString() << "kernel query fail!"; } +} + +void KernelQuery(const CNodePtr &kernel_node, std::vector> *kernel_info_list, + KernelType kernel_type) { + MS_EXCEPTION_IF_NULL(kernel_node); + MS_EXCEPTION_IF_NULL(kernel_info_list); + + std::string op_name = AnfAlgo::GetCNodeName(kernel_node); + + switch (kernel_type) { + case KernelType::AKG_KERNEL: + AkgMetadataInfo(kernel_node, kernel_info_list); + break; + default: + KernelQueryAll(kernel_node, kernel_info_list); + break; + } + + if (kernel_info_list->empty()) { + MS_EXCEPTION(NotExistsError) << "Op[" << kernel_node->DebugString() << "] kernel query fail!"; + } + // check output FilterInvalidKernelInfo(kernel_node, kernel_info_list); } @@ -106,7 +141,6 @@ bool IsSupportedByAICore(const AnfNodePtr &kernel_node, const KernelBuildInfoPtr auto cnode = kernel_node->cast(); MS_EXCEPTION_IF_NULL(cnode); TbeMetadataInfo(cnode, &kernel_info_list); - FilterInvalidKernelInfo(cnode, &kernel_info_list); return std::any_of(kernel_info_list.begin(), kernel_info_list.end(), [&select_kernel_build_info](const kernel::KernelBuildInfoPtr item) { MS_EXCEPTION_IF_NULL(item); diff --git a/mindspore/ccsrc/kernel/kernel_query.h b/mindspore/ccsrc/kernel/kernel_query.h index fe8696a919..257b0cf073 100644 --- a/mindspore/ccsrc/kernel/kernel_query.h +++ b/mindspore/ccsrc/kernel/kernel_query.h @@ -25,7 +25,8 @@ namespace mindspore { namespace kernel { -void KernelQuery(const CNodePtr &kernel_node, std::vector> *kernel_info_list); +void KernelQuery(const CNodePtr &kernel_node, std::vector> *kernel_info_list, + KernelType kernel_type = KernelType::UNKNOWN_KERNEL_TYPE); void AICPUQuery(const CNodePtr &kernel_node, std::vector> *kernel_info_list); bool IsSupportedByAICPU(const AnfNodePtr &kernel_node, const KernelBuildInfoPtr &select_kernel_build_info); bool IsSupportedByAICore(const AnfNodePtr &kernel_node, const KernelBuildInfoPtr &select_kernel_build_info); diff --git a/mindspore/ccsrc/kernel/oplib/opinfo.h b/mindspore/ccsrc/kernel/oplib/opinfo.h index 8d7b543ea6..f224a97efc 100644 --- a/mindspore/ccsrc/kernel/oplib/opinfo.h +++ b/mindspore/ccsrc/kernel/oplib/opinfo.h @@ -90,17 +90,36 @@ class OpIOInfo { class OpInfo { public: OpInfo() = default; + OpInfo(const OpInfo &opinfo) { + op_name_ = opinfo.op_name(); + imply_type_ = opinfo.imply_type(); + + impl_path_ = opinfo.impl_path(); + fusion_type_ = opinfo.fusion_type(); + async_flag_ = opinfo.async_flag_; + binfile_name_ = opinfo.binfile_name_; + compute_cost_ = opinfo.compute_cost_; + kernel_name_ = opinfo.kernel_name(); + partial_flag_ = opinfo.partial_flag_; + dynamic_format_ = opinfo.dynamic_format_; + op_pattern_ = opinfo.op_pattern(); + for (auto attr : opinfo.attrs_ptr()) { + attrs_ptr_.push_back(std::make_shared(*attr)); + } + for (auto input : opinfo.inputs_ptr()) { + inputs_ptr_.push_back(std::make_shared(*input)); + } + for (auto output : opinfo.outputs_ptr()) { + outputs_ptr_.push_back(std::make_shared(*output)); + } + ref_infos_ = opinfo.ref_infos(); + } ~OpInfo() = default; std::string op_name() const { return op_name_; } OpImplyType imply_type() const { return imply_type_; } std::string impl_path() const { return impl_path_; } std::string fusion_type() const { return fusion_type_; } - bool async_flag() const { return async_flag_; } - std::string binfile_name() const { return binfile_name_; } - int compute_cost() const { return compute_cost_; } std::string kernel_name() const { return kernel_name_; } - bool partial_flag() const { return partial_flag_; } - bool dynamic_format() const { return dynamic_format_; } OpPattern op_pattern() const { return op_pattern_; } std::vector> attrs_ptr() const { return attrs_ptr_; } std::vector> inputs_ptr() const { return inputs_ptr_; } @@ -116,16 +135,15 @@ class OpInfo { void set_compute_cost(const int compute_cost) { compute_cost_ = compute_cost; } void set_kernel_name(const std::string &kernel_name) { kernel_name_ = kernel_name; } void set_partial_flag(const bool partial_flag) { partial_flag_ = partial_flag; } - void set_dynamic_format(const bool dynamic_format) { dynamic_format_ = dynamic_format; } void set_op_pattern(const OpPattern op_pattern) { op_pattern_ = op_pattern; } void add_attrs_ptr(const std::shared_ptr &attr) { attrs_ptr_.push_back(attr); } void add_inputs_ptr(const std::shared_ptr &input) { inputs_ptr_.push_back(input); } void add_outputs_ptr(const std::shared_ptr &output) { outputs_ptr_.push_back(output); } - void set_inputs_ptr(const std::vector> &inputs) { inputs_ptr_ = inputs; } - void set_outputs_ptr(const std::vector> &outputs) { outputs_ptr_ = outputs; } bool is_ref() const { return !ref_infos_.empty(); } bool has_ref_index(size_t out_index) const { return ref_infos_.find(out_index) != ref_infos_.end(); } void add_ref_pair(size_t out_index, size_t in_index) { (void)ref_infos_.emplace(out_index, in_index); } + void ClearInputs() { (void)inputs_ptr_.clear(); } + void ClearOutputs() { (void)outputs_ptr_.clear(); } private: std::string op_name_; diff --git a/mindspore/ccsrc/kernel/oplib/oplib.cc b/mindspore/ccsrc/kernel/oplib/oplib.cc index b1bff36518..35bc407026 100644 --- a/mindspore/ccsrc/kernel/oplib/oplib.cc +++ b/mindspore/ccsrc/kernel/oplib/oplib.cc @@ -35,7 +35,7 @@ constexpr auto kKernelName = "kernel_name"; constexpr auto kPartialFlag = "partial_flag"; constexpr auto kReshapeType = "reshape_type"; constexpr auto kOpPattern = "op_pattern"; -constexpr auto kDynamicFormat = "dynamic_format"; +constexpr auto kDynamicFormat = "dynamicFormat"; constexpr auto kFormatAgnostic = "formatAgnostic"; constexpr auto kBroadcast = "broadcast"; constexpr auto kReduce = "reduce"; @@ -100,22 +100,28 @@ bool OpLib::RegOp(const std::string &json_string, const std::string &impl_path) void OpLib::DecodeTBESpecificInfo(const nlohmann::json &obj, const std::shared_ptr &op_info) { const std::map kOpPatternMap = {{kFormatAgnostic, kFormatAgnosticPattern}, - {kFormatAgnostic, kBroadcastPattern}, + {kBroadcast, kBroadcastPattern}, {kReduce, kReducePattern}, {kDynamicFormat, kDynamicFormatPattern}}; + MS_EXCEPTION_IF_NULL(op_info); op_info->set_async_flag(obj.at(kAsyncFlag)); op_info->set_binfile_name(obj.at(kBinfileName)); op_info->set_compute_cost(obj.at(kComputeCost)); op_info->set_kernel_name(obj.at(kKernelName)); op_info->set_partial_flag(obj.at(kPartialFlag)); + if (obj.find(kOpPattern) != obj.end()) { - if (kOpPatternMap.find(obj.at(kOpPattern)) != kOpPatternMap.end()) { - op_info->set_op_pattern(obj.at(kOpPattern)); + std::string op_pattern = obj.at(kOpPattern); + auto find_iter = kOpPatternMap.find(op_pattern); + if (find_iter == kOpPatternMap.end()) { + if (!op_pattern.empty()) { + MS_LOG(WARNING) << "Op pattern set value error: " << op_pattern; + } + op_info->set_op_pattern(kCommonPattern); + } else { + op_info->set_op_pattern(find_iter->second); } } - if (obj.find(kDynamicFormat) != obj.end()) { - op_info->set_dynamic_format(obj.at(kDynamicFormat)); - } } bool OpLib::DecodeOpInfo(const nlohmann::json &obj, const mindspore::kernel::OpImplyType imply_type, @@ -194,6 +200,7 @@ bool OpLib::DecodeAttr(const nlohmann::json &obj, const OpImplyType imply_type, bool OpLib::DecodeDtypeFormat(const nlohmann::json &dtype_format, const std::shared_ptr &op_io, size_t index) { + MS_EXCEPTION_IF_NULL(op_io); bool ret = true; try { std::vector dtype; @@ -213,6 +220,7 @@ bool OpLib::DecodeDtypeFormat(const nlohmann::json &dtype_format, const std::sha bool OpLib::DecodeInputOutput(const nlohmann::json &obj, const OpImplyType imply_type, const OpIOType io_type, const std::shared_ptr &op_info, const nlohmann::json &dtype_format) { + MS_EXCEPTION_IF_NULL(op_info); bool ret = true; try { std::shared_ptr op_io = std::make_shared(); @@ -264,8 +272,7 @@ std::shared_ptr OpLib::FindOp(const std::string &op_name, OpImplyType im auto context = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context); bool is_gpu = (context->device_target() == kGPUDevice); - if ((is_gpu && (imply_type == kTBE || imply_type == kAICPU)) || - (!is_gpu && (imply_type != kTBE && imply_type != kAICPU))) { + if (is_gpu && (imply_type == kTBE || imply_type == kAICPU)) { MS_LOG(ERROR) << "FindOp failed: opname: " << op_name << ", imply_type: " << ImplTypeToStr(imply_type) << ", current op num: " << op_info_.size(); return nullptr; diff --git a/mindspore/ccsrc/kernel/oplib/oplib.h b/mindspore/ccsrc/kernel/oplib/oplib.h index 3d4dcad908..47183455a2 100644 --- a/mindspore/ccsrc/kernel/oplib/oplib.h +++ b/mindspore/ccsrc/kernel/oplib/oplib.h @@ -29,7 +29,12 @@ class OpLib { OpLib() = default; virtual ~OpLib() = default; bool RegOp(const std::string &json_string, const std::string &impl_path); + static void RegOpInfo(std::shared_ptr opinfo) { + op_info_.emplace_back(opinfo); + return; + } static std::shared_ptr FindOp(const std::string &op_name, OpImplyType imply_type); + static const std::vector> &GetAllOpsInfo() { return op_info_; } protected: static std::vector> op_info_; diff --git a/mindspore/ccsrc/kernel/oplib/oploader.h b/mindspore/ccsrc/kernel/oplib/oploader.h new file mode 100644 index 0000000000..dd4c37e80b --- /dev/null +++ b/mindspore/ccsrc/kernel/oplib/oploader.h @@ -0,0 +1,43 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_OPLOADER_H +#define MINDSPORE_OPLOADER_H + +#include +#include "kernel/oplib/oplib.h" + +namespace mindspore { +namespace kernel { +class OpInfoLoaderPy { + public: + OpInfoLoaderPy() = default; + + ~OpInfoLoaderPy() = default; + + size_t GetAllOpsInfo() { + auto ops = OpLib::GetAllOpsInfo(); + auto op_infos = new std::vector(); + for (auto op_info : ops) { + auto new_op_info = new OpInfo(*op_info); + op_infos->emplace_back(new_op_info); + } + return (size_t)op_infos; + } +}; +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_OPLOADER_H diff --git a/mindspore/ccsrc/kernel/rts/label_switch.cc b/mindspore/ccsrc/kernel/rts/label_switch.cc index 168e1f4844..d84407a930 100644 --- a/mindspore/ccsrc/kernel/rts/label_switch.cc +++ b/mindspore/ccsrc/kernel/rts/label_switch.cc @@ -67,9 +67,7 @@ std::vector LabelSwitchKernel::GenTask(const std::vector task_info_list; cond_ = inputs[0]->addr; - // todo: need update ge task info define - auto task_info_ptr = std::make_shared(stream_id, 0); - // auto task_info_ptr = std::make_shared(stream_id, label_size_, label_list_, cond_); + auto task_info_ptr = std::make_shared(stream_id, label_size_, label_list_, cond_); MS_EXCEPTION_IF_NULL(task_info_ptr); task_info_list.emplace_back(task_info_ptr); return task_info_list; @@ -77,7 +75,6 @@ std::vector LabelSwitchKernel::GenTask(const std::vector> LabelSwitchDesc::GetKernelInfo() { std::vector> label_switch_build_info{}; - vector input_format{kOpFormat_DEFAULT, kOpFormat_DEFAULT}; vector input_type{kNumberTypeUInt32, kNumberTypeBool}; if (input_format.size() != input_type.size()) { diff --git a/mindspore/ccsrc/kernel/rts/recv.cc b/mindspore/ccsrc/kernel/rts/recv.cc index b68380dac8..c195fd1c92 100644 --- a/mindspore/ccsrc/kernel/rts/recv.cc +++ b/mindspore/ccsrc/kernel/rts/recv.cc @@ -37,6 +37,9 @@ bool RecvKernel::Init(const AnfNodePtr &anf_node) { MS_EXCEPTION_IF_NULL(anf_node); auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); MS_EXCEPTION_IF_NULL(primitive); + if (!AnfAlgo::HasNodeAttr(kAttrEventId, anf_node->cast())) { + MS_LOG(EXCEPTION) << "RecvKernel has no attr kAttrEventId"; + } event_id_ = GetValue(primitive->GetAttr(kAttrEventId)); MS_LOG(INFO) << "recv op event_id_:" << event_id_; return true; diff --git a/mindspore/ccsrc/kernel/rts/send.cc b/mindspore/ccsrc/kernel/rts/send.cc index ebcb53069e..ccdd43ebb6 100644 --- a/mindspore/ccsrc/kernel/rts/send.cc +++ b/mindspore/ccsrc/kernel/rts/send.cc @@ -34,6 +34,9 @@ bool SendKernel::Init(const AnfNodePtr &anf_node) { MS_EXCEPTION_IF_NULL(anf_node); auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); MS_EXCEPTION_IF_NULL(primitive); + if (!AnfAlgo::HasNodeAttr(kAttrEventId, anf_node->cast())) { + MS_LOG(EXCEPTION) << "SendKernel has no attr kAttrEventId"; + } event_id_ = GetValue(primitive->GetAttr(kAttrEventId)); MS_LOG(INFO) << "send op event id:" << event_id_; return true; diff --git a/mindspore/ccsrc/kernel/rts/stream_active.cc b/mindspore/ccsrc/kernel/rts/stream_active.cc index 3666dd670f..4f0895a0be 100644 --- a/mindspore/ccsrc/kernel/rts/stream_active.cc +++ b/mindspore/ccsrc/kernel/rts/stream_active.cc @@ -36,6 +36,9 @@ bool StreamActiveKernel::Init(const AnfNodePtr &anf_node) { MS_LOG(INFO) << "stream active op init start"; auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); MS_EXCEPTION_IF_NULL(primitive); + if (!AnfAlgo::HasNodeAttr(kAttrActiveStreamList, anf_node->cast())) { + MS_LOG(EXCEPTION) << "StreamActiveKernel has no attr kAttrActiveStreamList"; + } active_streams_index_ = GetValue>(primitive->GetAttr(kAttrActiveStreamList)); return true; } diff --git a/mindspore/ccsrc/kernel/rts/stream_switch.cc b/mindspore/ccsrc/kernel/rts/stream_switch.cc index 9dfb3e8de0..bab6b04366 100644 --- a/mindspore/ccsrc/kernel/rts/stream_switch.cc +++ b/mindspore/ccsrc/kernel/rts/stream_switch.cc @@ -42,8 +42,17 @@ bool StreamSwitchKernel::Init(const AnfNodePtr &anf_node) { MS_LOG(INFO) << "stream switch op init start"; auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); MS_EXCEPTION_IF_NULL(primitive); + if (!AnfAlgo::HasNodeAttr(kAttrSwitchCondition, anf_node->cast())) { + MS_LOG(EXCEPTION) << "StreamSwitchKernel has no attr kAttrSwitchCondition"; + } cond_ = tagRtCondition(GetValue(primitive->GetAttr(kAttrSwitchCondition))); + if (!AnfAlgo::HasNodeAttr(kAttrTrueBranchStream, anf_node->cast())) { + MS_LOG(EXCEPTION) << "StreamSwitchKernel has no attr kAttrTrueBranchStream"; + } true_stream_index_ = GetValue(primitive->GetAttr(kAttrTrueBranchStream)); + if (!AnfAlgo::HasNodeAttr(kAttrDataType, anf_node->cast())) { + MS_LOG(EXCEPTION) << "StreamSwitchKernel has no attr kAttrDataType"; + } data_type_ = tagRtSwitchDataType(GetValue(primitive->GetAttr(kAttrDataType))); MS_LOG(INFO) << "cond_:" << static_cast(cond_) << ", true_stream_index_:" << true_stream_index_ << ", data_type_:" << static_cast(data_type_); @@ -54,7 +63,7 @@ bool StreamSwitchKernel::Launch(const std::vector &inputs, const std const std::vector &outputs, void *stream_ptr) { MS_LOG(INFO) << "stream switch op launch start"; if (inputs.size() != 2) { - MS_LOG(ERROR) << "Stream switch inputs size is " << inputs.size() << ", only support 2"; + MS_LOG(EXCEPTION) << "Stream switch inputs size is " << inputs.size() << ", only support 2"; } void *loop_cnt = inputs[0]->addr; @@ -73,7 +82,7 @@ std::vector StreamSwitchKernel::GenTask(const std::vector tbe_func_adapter_map = { {"softmax", "softmax_v2"}, {"log_softmax", "log_softmax_v2"}, + {"apply_momentum", "apply_momentum_d"}, + {"apply_ftrl", "apply_ftrl_d"}, {"re_lu6", "relu6"}, {"re_lu6_grad", "relu6_grad"}, {"re_lu", "relu"}, @@ -51,10 +53,12 @@ static std::map tbe_func_adapter_map = { {"scatter_nd", "scatter_nd_d"}, {"tile", "tile_d"}, {"gather_v2", "gather_v2_d"}, + {"sparse_gather_v2", "gather_v2_d"}, {"batch_mat_mul", "batch_matmul"}, {"b_n_training_reduce", "bn_training_reduce"}, {"b_n_training_update", "bn_training_update"}, {"b_n_training_update_v2", "bn_training_update_v2"}, + {"b_n_training_update_v3", "bn_training_update_v3"}, {"b_n_training_reduce_grad", "bn_training_reduce_grad"}, {"b_n_training_update_grad", "bn_training_update_grad"}, {"b_n_infer", "bn_infer"}, @@ -66,17 +70,27 @@ static std::map tbe_func_adapter_map = { {"strided_slice", "strided_slice_d"}, {"strided_slice_grad", "strided_slice_grad_d"}, {"sparse_apply_ftrl", "sparse_apply_ftrl_d"}, + {"apply_ada_max", "apply_ada_max_d"}, + {"apply_adadelta", "apply_adadelta_d"}, + {"apply_adagrad", "apply_adagrad_d"}, + {"apply_adagrad_v2", "apply_adagradv2_d"}, + {"sparse_apply_adagrad", "sparse_apply_adagrad_d"}, + {"apply_proximal_adagrad", "apply_proximal_adagrad_d"}, + {"sparse_apply_proximal_adagrad", "sparse_apply_proximal_adagrad_d"}, {"transpose", "transpose_d"}, {"fill", "fill_d"}, {"unsorted_segment_sum", "unsorted_segment_sum_d"}, {"concat", "concat_d"}, {"slice", "slice_d"}, {"reduce_sum", "reduce_sum_d"}, + {"inplace_add", "inplace_add_d"}, + {"inplace_sub", "inplace_sub_d"}, {"one_hot", "one_hot_d"}, {"sum", "reduce_sum_d"}, {"lamb_next_mv_with_decay", "lamb_next_m_v_with_decay"}, {"lamb_next_mv", "lamb_next_m_v"}, {"split", "split_d"}, + {"split_v", "split_v_d"}, {"resize_nearest_neighbor", "resize_nearest_neighbor_v2_d"}, {"resize_nearest_neighbor_grad", "resize_nearest_neighbor_v2_grad_d"}, {"pad", "pad_d"}, @@ -88,7 +102,7 @@ static std::map tbe_func_adapter_map = { {"batch_to_space_nd", "batch_to_space_nd_d"}, {"resize_bilinear", "resize_bilinear_v2_d"}, {"resize_bilinear_grad", "resize_bilinear_v2_grad"}, - {"adam", "apply_adam"}, + {"adam", "apply_adam_d"}, {"r_oi_align", "roi_align"}, {"r_oi_align_grad", "roi_align_grad"}, {"i_ou", "iou"}, @@ -97,6 +111,9 @@ static std::map tbe_func_adapter_map = { {"n_ms_with_mask", "nms_with_mask"}, {"square_sum_all", "square_sum_all"}, {"cum_sum", "cumsum_d"}, + {"range", "range_d"}, + {"lin_space", "lin_space_d"}, + {"inv_grad", "inv_grad"}, {"apply_rms_prop", "apply_rms_prop_d"}, {"cum_prod", "cumprod_d"}, {"reduce_all", "reduce_all_d"}, @@ -104,7 +121,13 @@ static std::map tbe_func_adapter_map = { {"unsorted_segment_min", "unsorted_segment_min_d"}, {"reduce_prod", "reduce_prod_d"}, {"a_cos", "acos"}, - {"a_cos_grad", "acos_grad"}}; + {"a_cos_grad", "acos_grad"}, + {"histogram_fixed_width", "histogram_fixed_width_d"}, + {"broadcast_to", "broadcast_to_d"}, + {"inplace_update", "inplace_update_d"}, + {"matrix_diag", "matrix_diag_d"}, + {"matrix_diag_part", "matrix_diag_part_d"}, + {"matrix_set_diag", "matrix_set_diag_d"}}; void TbeAdapter::NormalizeFuncName(std::string *func_name) { if (func_name == nullptr) { @@ -138,7 +161,7 @@ void TbeAdapter::NormalizeFuncName(std::string *func_name) { *func_name = name_tmp; auto iter = tbe_func_adapter_map.find(*func_name); if (iter != tbe_func_adapter_map.end()) { - MS_LOG(INFO) << "map actual op from me " << func_name << "to tbe op" << iter->second; + MS_LOG(INFO) << "map actual op from me " << *func_name << " to tbe op" << iter->second; *func_name = iter->second; } } @@ -176,6 +199,18 @@ void TbeAdapter::InputOrderPass(const std::string &op_name, std::vectorpush_back(inputs_list[i]); } + } else if (op_name == "ApplyCenteredRMSProp") { + // Parameter order of ApplyCenteredRMSProp's TBE implementation is different from python API, so map + // TBE parameter to correspond python API parameter by latter's index using hardcode + inputs_json->push_back(inputs_list[0]); + inputs_json->push_back(inputs_list[1]); + inputs_json->push_back(inputs_list[2]); + inputs_json->push_back(inputs_list[3]); + inputs_json->push_back(inputs_list[5]); + inputs_json->push_back(inputs_list[6]); + inputs_json->push_back(inputs_list[7]); + inputs_json->push_back(inputs_list[8]); + inputs_json->push_back(inputs_list[4]); } else { inputs_json->push_back(inputs_list[1]); inputs_json->push_back(inputs_list[0]); @@ -316,10 +351,10 @@ static int TypeStrToDstType(const std::string &type_str) { ret = 4; } else if (type_str == "UInt64") { ret = 10; - } else if (type_str == "Bool_") { + } else if (type_str == "Bool") { ret = 12; } else { - MS_EXCEPTION(ArgumentError) << "type str is invailed: " << type_str; + MS_LOG(INFO) << "Error type str is invailed: " << type_str; } return ret; } diff --git a/mindspore/ccsrc/kernel/tbe/tbe_adapter.h b/mindspore/ccsrc/kernel/tbe/tbe_adapter.h index 0208d6c6a6..51c4cfd777 100644 --- a/mindspore/ccsrc/kernel/tbe/tbe_adapter.h +++ b/mindspore/ccsrc/kernel/tbe/tbe_adapter.h @@ -27,7 +27,7 @@ // the TBE back-end operator implementation difference namespace mindspore { namespace kernel { -enum kCreaterType : int { SINGLE_BUILD = 0, PREBUILD, OP_SELECT_FORMAT, CHECK_SUPPORTED }; +enum kCreaterType : int { SINGLE_BUILD = 0, PREBUILD, OP_SELECT_FORMAT, CHECK_SUPPORTED, OP_PRE_COMPILE }; namespace tbe { using FAttrsPass = void (*)(const AnfNodePtr &anf_node, const std::vector> &op_info_attrs, nlohmann::json *attrs_json); diff --git a/mindspore/ccsrc/kernel/tbe/tbe_convert_utils.cc b/mindspore/ccsrc/kernel/tbe/tbe_convert_utils.cc index 1159bd888d..90c5557253 100644 --- a/mindspore/ccsrc/kernel/tbe/tbe_convert_utils.cc +++ b/mindspore/ccsrc/kernel/tbe/tbe_convert_utils.cc @@ -45,13 +45,13 @@ const std::map type_id_str_maps = { {TypeId::kNumberTypeInt64, "int64"}, {TypeId::kNumberTypeUInt, "uint"}, {TypeId::kNumberTypeUInt8, "uint8"}, {TypeId::kNumberTypeUInt16, "uint16"}, {TypeId::kNumberTypeUInt32, "uint32"}, {TypeId::kNumberTypeUInt64, "uint64"}, - {TypeId::kNumberTypeBool, "bool"}, + {TypeId::kNumberTypeBool, "int8"}, }; const std::map type_str_maps = { {"Float32", "float32"}, {"Float16", "float16"}, {"Int8", "int8"}, {"Int16", "int16"}, {"UInt16", "uint16"}, {"UInt8", "uint8"}, {"Int32", "int32"}, {"UInt32", "uint32"}, - {"Int64", "int64"}, {"UInt64", "uint64"}, {"Bool_", "int8"}, {"Float64", "float64"}, + {"Int64", "int64"}, {"UInt64", "uint64"}, {"Bool", "int8"}, {"Float64", "float64"}, }; const std::unordered_map type_nbyte_maps = { @@ -63,7 +63,7 @@ const std::unordered_map type_nbyte_maps = { const std::unordered_map fusion_type_maps = { {"CONVLUTION", FusionType::CONVLUTION}, {"ELEMWISE", FusionType::ELEMWISE}, {"COMMREDUCE", FusionType::COMMREDUCE}, - {"SEGMENT", FusionType::SEGMENT}, {"OPAQUE", FusionType::OPAQUE}, + {"SEGMENT", FusionType::SEGMENT}, {"DYNAMIC", FusionType::DYNAMIC}, {"OPAQUE", FusionType::OPAQUE}, }; TypeId DtypeToTypeId(const std::string &dtypes) { @@ -74,18 +74,10 @@ TypeId DtypeToTypeId(const std::string &dtypes) { return iter->second; } -std::string DtypeToString(const std::string &dtypes) { - auto iter = type_str_maps.find(dtypes); - if (iter == type_str_maps.end()) { - MS_LOG(EXCEPTION) << "Illegal input dtype: " << dtypes; - } - return iter->second; -} - std::string TypeIdToString(TypeId type_id) { auto iter = type_id_str_maps.find(type_id); if (iter == type_id_str_maps.end()) { - MS_LOG(EXCEPTION) << "Illegal input dtype." << TypeIdLabel(type_id); + MS_LOG(EXCEPTION) << "Illegal input dtype: " << TypeIdLabel(type_id); } return iter->second; } @@ -101,7 +93,7 @@ size_t GetDtypeNbyte(const std::string &dtypes) { FusionType GetFusionType(const std::string &pattern) { auto iter = fusion_type_maps.find(pattern); if (iter == fusion_type_maps.end()) { - MS_LOG(DEBUG) << "Illegal fusion pattern: " << pattern; + MS_LOG(INFO) << "Illegal fusion pattern: " << pattern; return UNKNOWN_FUSION_TYPE; } return iter->second; @@ -115,7 +107,7 @@ std::string GetProcessor(const AnfNodePtr &anf_node) { device = kProcessorAiCore; break; default: - MS_LOG(DEBUG) << "Unknown processor type." << anf_node->fullname_with_scope(); + MS_LOG(INFO) << "Unknown processor type." << anf_node->fullname_with_scope(); break; } return device; diff --git a/mindspore/ccsrc/kernel/tbe/tbe_convert_utils.h b/mindspore/ccsrc/kernel/tbe/tbe_convert_utils.h index 9b9b3770df..2c8d3008b9 100644 --- a/mindspore/ccsrc/kernel/tbe/tbe_convert_utils.h +++ b/mindspore/ccsrc/kernel/tbe/tbe_convert_utils.h @@ -28,8 +28,6 @@ namespace tbe { constexpr auto kProcessorAiCore = "aicore"; TypeId DtypeToTypeId(const std::string &dtypes); -std::string DtypeToString(const std::string &dtypes); - std::string TypeIdToString(TypeId type_id); size_t GetDtypeNbyte(const std::string &dtypes); diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc b/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc index bd5b0d6323..76df819043 100644 --- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc +++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc @@ -15,15 +15,12 @@ */ #include "kernel/tbe/tbe_kernel_build.h" - #include #include #include -#include - #include "operator/ops.h" +#include "parallel/ops_info/ops_utils.h" #include "session/anf_runtime_algorithm.h" -#include "kernel/tbe/tbe_kernel_mod.h" #include "kernel/tbe/tbe_adapter.h" #include "kernel/tbe/tbe_python_funcs.h" #include "kernel/tbe/tbe_convert_utils.h" @@ -37,6 +34,43 @@ constexpr auto kFusionOpList = "op_list"; constexpr auto kFusionKernelNamePrfix = "te_fusion"; constexpr auto kOptional = "optional_"; constexpr auto kOpFormat_FRACTAL_Z = "FRACTAL_Z"; +constexpr auto kPlatform = "platform"; +constexpr auto kPlatTBE = "TBE"; +constexpr auto kGenModel = "gen_model"; +constexpr auto kSingle = "single"; +constexpr auto kImplPath = "impl_path"; +constexpr auto kJInputs = "inputs"; +constexpr auto kJOutputs = "outputs"; +constexpr auto kJAttrs = "attrs"; +constexpr auto kJKernelName = "kernel_name"; +constexpr auto kJOpInfo = "op_info"; +constexpr auto kJDtype = "dtype"; +constexpr auto kJtype = "type"; +constexpr auto kJName = "name"; +constexpr auto kJOriShape = "ori_shape"; +constexpr auto kJOriFormat = "ori_format"; +constexpr auto kJShape = "shape"; +constexpr auto kJFormat = "format"; +constexpr auto kJValid = "valid"; +constexpr auto kJParamType = "param_type"; +constexpr auto kParamDynamic = "dynamic"; +constexpr auto kParamRequred = "required"; +constexpr auto kJDataType = "data_type"; +constexpr auto kJOutputIndex = "output_index"; +constexpr auto kJOutputDesc = "output_desc"; +constexpr auto kJInputDesc = "input_desc"; +constexpr auto kVTypeInt = "int"; +constexpr auto kVTypeStr = "str"; +constexpr auto kVTypeBool = "bool"; +constexpr auto kVTypeFloat = "float"; +constexpr auto kVTypeListInt = "listInt"; +constexpr auto kVTypeInt32 = "Int32"; +constexpr auto kVTypeListUInt64 = "listUInt64"; +constexpr auto kVTypeListFloat = "listFloat"; +constexpr auto kVTypeListListInt = "listListInt"; +constexpr auto kJValue = "value"; +constexpr auto kJDynIndex = "dyn_index"; +constexpr auto kJFuncName = "func_name"; std::string NormalizeFullScopeName(const string &full_scope_name) { // exp:Default/ReLU-op0 -->Default_ReLU_op0 @@ -46,51 +80,51 @@ std::string NormalizeFullScopeName(const string &full_scope_name) { return normal_ret; } -bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const shared_ptr &anf_node, +bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr &anf_node, nlohmann::json *kernel_json) { MS_EXCEPTION_IF_NULL(anf_node); MS_EXCEPTION_IF_NULL(kernel_json); std::string op_name = AnfAlgo::GetCNodeName(anf_node); auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kTBE); MS_EXCEPTION_IF_NULL(op_info_ptr); - (*kernel_json)["platform"] = "TBE"; - (*kernel_json)["gen_model"] = "single"; - (*kernel_json)["impl_path"] = op_info_ptr->impl_path(); + (*kernel_json)[kPlatform] = kPlatTBE; + (*kernel_json)[kGenModel] = kSingle; + (*kernel_json)[kImplPath] = op_info_ptr->impl_path(); nlohmann::json op_info_json; if (op_info_ptr->impl_path().empty()) { tbe::TbeAdapter::NormalizeFuncName(&op_name); } else { op_name = op_info_ptr->kernel_name(); } - op_info_json["name"] = op_name; + op_info_json[kJName] = op_name; // generate inputs json nlohmann::json inputs_json; if (!GenTbeInputsJson(anf_node, op_info_ptr, &inputs_json)) { MS_LOG(ERROR) << "Anf Node [" << op_name << "] generate inputs json failed"; return false; } - op_info_json["inputs"] = inputs_json; + op_info_json[kJInputs] = inputs_json; // generate outputs json nlohmann::json outputs_json; if (!GenTbeOutputsJson(anf_node, op_info_ptr, &outputs_json)) { MS_LOG(ERROR) << "Anf Node [" << op_name << "] generate outputs json failed"; return false; } - op_info_json["outputs"] = outputs_json; + op_info_json[kJOutputs] = outputs_json; // generate attrs json nlohmann::json attrs_json; (void)GenTbeAttrJson(anf_node, op_info_ptr, &attrs_json); - op_info_json["attrs"] = attrs_json; + op_info_json[kJAttrs] = attrs_json; std::string json_str = op_info_json.dump(); size_t hash_id = std::hash()(json_str); json_name_ = op_name + "_" + std::to_string(hash_id); json_info_ = json_str; if (creater_type_ == PREBUILD) { - op_info_json["kernel_name"] = NormalizeFullScopeName(anf_node->fullname_with_scope()); + op_info_json[kJKernelName] = NormalizeFullScopeName(anf_node->fullname_with_scope()); } else { - op_info_json["kernel_name"] = json_name_; + op_info_json[kJKernelName] = json_name_; } - (*kernel_json)["op_info"] = op_info_json; + (*kernel_json)[kJOpInfo] = op_info_json; if (creater_type_ == SINGLE_BUILD) { TbeUtils::SaveJsonInfo(json_name_, json_info_); } @@ -101,9 +135,10 @@ bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const shared_ptr &anf_node, size_t real_input_index, bool value, - const shared_ptr &input_ptr, const string &op_input_name, - size_t input_i, vector *input_list) { +bool TbeKernelJsonCreator::GenInputDescJson(const std::shared_ptr &anf_node, size_t real_input_index, + bool value, const std::shared_ptr &input_ptr, + const string &op_input_name, size_t input_i, + std::vector *input_list) { MS_EXCEPTION_IF_NULL(anf_node); MS_EXCEPTION_IF_NULL(input_ptr); MS_EXCEPTION_IF_NULL(input_list); @@ -111,51 +146,30 @@ bool TbeKernelJsonCreator::GenInputDescJson(const shared_ptr &anf_node, if (input_ptr->name() == "input_indices" && op_name == kTopKOpName) { TbeAdapter::GenTopKV2IndicesTensorInfo(anf_node, real_input_index, input_list, creater_type_); } else { - // dtype : float16 - auto tensor_dtype = - std::make_shared(TypeIdToType(AnfAlgo::GetInputDeviceDataType(anf_node, real_input_index))); - MS_EXCEPTION_IF_NULL(tensor_dtype); - std::string dtype = tensor_dtype->element()->ToString(); - dtype = tbe::DtypeToString(dtype); - - // format - std::string format = AnfAlgo::GetInputFormat(anf_node, real_input_index); - if (format == kOpFormat_DEFAULT) { - format = kOpFormat_NCHW; - } else if (format == kOpFormat_FRAC_Z) { - format = kOpFormat_FRACTAL_Z; - } - - nlohmann::json input_desc_json; - input_desc_json["dtype"] = dtype; - input_desc_json["name"] = op_input_name + std::to_string(input_i); + auto dtype = GetDeviceInputType(anf_node, real_input_index); + auto format = GetDeviceInputFormat(anf_node, real_input_index); + auto shape = GetDeviceInputShape(anf_node, real_input_index); auto ori_shape = AnfAlgo::GetPrevNodeOutputInferShape(anf_node, real_input_index); if (ori_shape.empty()) { ori_shape.emplace_back(1); } - input_desc_json["ori_shape"] = ori_shape; - input_desc_json["ori_format"] = kOpFormat_NCHW; - auto shape = AnfAlgo::GetInputDeviceShape(anf_node, real_input_index); - if (shape.empty()) { - shape.emplace_back(1); - } - if (creater_type_ == OP_SELECT_FORMAT || creater_type_ == CHECK_SUPPORTED) { - input_desc_json["shape"] = ori_shape; - input_desc_json["format"] = kOpFormat_NCHW; - } else { - input_desc_json["shape"] = shape; - input_desc_json["format"] = format; - } - input_desc_json["valid"] = value; - input_desc_json["param_type"] = input_ptr->param_type(); + nlohmann::json input_desc_json; + input_desc_json[kJDtype] = dtype; + input_desc_json[kJName] = op_input_name + std::to_string(input_i); + input_desc_json[kJOriShape] = ori_shape; + input_desc_json[kJOriFormat] = kOpFormat_NCHW; + input_desc_json[kJShape] = shape; + input_desc_json[kJFormat] = format; + input_desc_json[kJValid] = value; + input_desc_json[kJParamType] = input_ptr->param_type(); input_list->emplace_back(input_desc_json); } return true; } -bool TbeKernelJsonCreator::GenInputList(const shared_ptr &anf_node, size_t input_tensor_num, - const shared_ptr &input_ptr, size_t *real_input_index, - string *op_input_name, vector *input_list) { +bool TbeKernelJsonCreator::GenInputList(const std::shared_ptr &anf_node, size_t input_tensor_num, + const std::shared_ptr &input_ptr, size_t *real_input_index, + string *op_input_name, std::vector *input_list) { MS_EXCEPTION_IF_NULL(anf_node); MS_EXCEPTION_IF_NULL(input_ptr); MS_EXCEPTION_IF_NULL(real_input_index); @@ -170,8 +184,8 @@ bool TbeKernelJsonCreator::GenInputList(const shared_ptr &anf_node, siz if (input_ptr->param_type() == "optional") { *op_input_name = input_ptr->name() + "_optional_"; nlohmann::json input_desc_json; - input_desc_json["valid"] = false; - input_desc_json["name"] = *op_input_name + std::to_string(*real_input_index); + input_desc_json[kJValid] = false; + input_desc_json[kJName] = *op_input_name + std::to_string(*real_input_index); input_list->emplace_back(input_desc_json); continue; } @@ -200,7 +214,7 @@ bool TbeKernelJsonCreator::GenInputList(const shared_ptr &anf_node, siz return true; } -bool GetInputNameAndRealNum(const std::shared_ptr &anf_node, const shared_ptr &input_ptr, +bool GetInputNameAndRealNum(const std::shared_ptr &anf_node, const std::shared_ptr &input_ptr, size_t *dyn_input_index, size_t *input_num, std::string *op_input_name) { MS_EXCEPTION_IF_NULL(anf_node); MS_EXCEPTION_IF_NULL(input_ptr); @@ -214,7 +228,7 @@ bool GetInputNameAndRealNum(const std::shared_ptr &anf_node, const shar dyn_input_sizes = GetValue>(primitive->GetAttr(kAttrDynInputSizes)); } - if (input_ptr->param_type() == "dynamic") { + if (input_ptr->param_type() == kParamDynamic) { if (*dyn_input_index >= dyn_input_sizes.size()) { MS_LOG(ERROR) << "dyn input index" << *dyn_input_index << "is over dyn input num" << dyn_input_sizes.size(); return false; @@ -280,9 +294,9 @@ bool TbeKernelJsonCreator::GenTbeOutputsJson(const std::shared_ptr &anf return GenOutputDescJson(anf_node, outputs_ptr, outputs_json); } -bool TbeKernelJsonCreator::GenOutputDescJson(const shared_ptr &anf_node, - const vector> &outputs_ptr, - nlohmann::json *outputs_json) { +bool TbeKernelJsonCreator::GenOutputDescJson( + const std::shared_ptr &anf_node, + const std::vector> &outputs_ptr, nlohmann::json *outputs_json) { MS_EXCEPTION_IF_NULL(outputs_json); size_t output_idx = 0; auto op_name = AnfAlgo::GetCNodeName(anf_node); @@ -290,9 +304,9 @@ bool TbeKernelJsonCreator::GenOutputDescJson(const shared_ptrparam_type() == "required") { + if (output_ptr->param_type() == kParamRequred) { output_obj_num = 1; - } else if (output_ptr->param_type() == "dynamic") { + } else if (output_ptr->param_type() == kParamDynamic) { if (outputs_ptr.size() > 1) { MS_LOG(ERROR) << "Dynamic output is unsupported multi output!"; return false; @@ -303,8 +317,8 @@ bool TbeKernelJsonCreator::GenOutputDescJson(const shared_ptrname() << " is optional, output is none."; std::vector output_list; nlohmann::json output_obj; - output_obj["name"] = output_ptr->name(); - output_obj["valid"] = false; + output_obj[kJName] = output_ptr->name(); + output_obj[kJValid] = false; output_list.emplace_back(output_obj); (*outputs_json).push_back(output_list); continue; @@ -319,46 +333,28 @@ bool TbeKernelJsonCreator::GenOutputDescJson(const shared_ptr &anf_node, const size_t &output_obj_num, - const shared_ptr &output_ptr, size_t *output_idx, - vector *output_list) { +void TbeKernelJsonCreator::GenOutputList(const std::shared_ptr &anf_node, const size_t &output_obj_num, + const std::shared_ptr &output_ptr, size_t *output_idx, + std::vector *output_list) { MS_EXCEPTION_IF_NULL(output_idx); MS_EXCEPTION_IF_NULL(output_list); for (size_t i = 0; i < output_obj_num; i++) { - nlohmann::json output_obj; - auto type_ptr = std::make_shared(TypeIdToType(AnfAlgo::GetOutputDeviceDataType(anf_node, *output_idx))); - std::string dtype = type_ptr->element()->ToString(); - dtype = tbe::DtypeToString(dtype); - std::string format = AnfAlgo::GetOutputFormat(anf_node, *output_idx); - if (format == kOpFormat_DEFAULT) { - format = kOpFormat_NCHW; - } else if (format == kOpFormat_FRAC_Z) { - format = kOpFormat_FRACTAL_Z; - } - std::vector ori_shape; - if (AnfAlgo::GetOutputInferShape(anf_node, *output_idx).empty()) { + auto dtype = GetDeviceOutputType(anf_node, *output_idx); + auto format = GetDeviceOutputFormat(anf_node, *output_idx); + auto shape = GetDeviceOutputShape(anf_node, *output_idx); + std::vector ori_shape = AnfAlgo::GetOutputInferShape(anf_node, *output_idx); + if (ori_shape.empty()) { ori_shape.emplace_back(1); - } else { - ori_shape = AnfAlgo::GetOutputInferShape(anf_node, *output_idx); } - output_obj["dtype"] = dtype; - auto shape = AnfAlgo::GetOutputDeviceShape(anf_node, *output_idx); - if (shape.empty()) { - shape.emplace_back(1); - } - if (creater_type_ == OP_SELECT_FORMAT || creater_type_ == CHECK_SUPPORTED) { - output_obj["shape"] = ori_shape; - output_obj["format"] = kOpFormat_NCHW; - } else { - output_obj["shape"] = shape; - output_obj["format"] = format; - } - output_obj["ori_shape"] = ori_shape; - output_obj["ori_format"] = kOpFormat_NCHW; - output_obj["name"] = output_ptr->name(); - output_obj["valid"] = true; - output_obj["param_type"] = output_ptr->param_type(); - + nlohmann::json output_obj; + output_obj[kJDtype] = dtype; + output_obj[kJShape] = shape; + output_obj[kJFormat] = format; + output_obj[kJOriShape] = ori_shape; + output_obj[kJOriFormat] = kOpFormat_NCHW; + output_obj[kJName] = output_ptr->name(); + output_obj[kJValid] = true; + output_obj[kJParamType] = output_ptr->param_type(); output_list->emplace_back(output_obj); (*output_idx)++; } @@ -379,24 +375,24 @@ bool TbeKernelJsonCreator::GenTbeAttrJson(const std::shared_ptr &anf_no for (const auto &attr_ptr : attrs_ptr) { std::string attr_name = attr_ptr->name(); nlohmann::json attr_obj; - attr_obj["name"] = attr_name; - if (op_name == "LayerNorm" && attr_obj["name"] == "epsilon" && creater_type_ == OP_SELECT_FORMAT) { + attr_obj[kJName] = attr_name; + if (op_name == parallel::LAYER_NORM && attr_obj[kJName] == "epsilon" && creater_type_ == OP_SELECT_FORMAT) { continue; } if (primitive->GetAttr(attr_name) != nullptr) { auto value = primitive->GetAttr(attr_name); std::string type = attr_ptr->type(); ParseAttrValue(type, value, &attr_obj); - attr_obj["valid"] = true; + attr_obj[kJValid] = true; } else { if (op_info->impl_path().empty()) { - attr_obj["valid"] = false; + attr_obj[kJValid] = false; } else { - if (attr_ptr->param_type() == "required" && creater_type_ == SINGLE_BUILD) { + if (attr_ptr->param_type() == kParamRequred && creater_type_ == SINGLE_BUILD) { MS_LOG(EXCEPTION) << "op name: " << op_info->op_name() << " attr: " << attr_name << " is required, but not set."; } else { - attr_obj["valid"] = false; + attr_obj[kJValid] = false; } } } @@ -409,53 +405,134 @@ void TbeKernelJsonCreator::ParseAttrValue(const std::string &type, const mindspo nlohmann::json *attr_obj) { MS_EXCEPTION_IF_NULL(value); MS_EXCEPTION_IF_NULL(attr_obj); - if (type == "int") { + if (type == kVTypeInt) { auto attr_value = GetValue(value); - (*attr_obj)["value"] = attr_value; - } else if (type == "str") { + (*attr_obj)[kJValue] = attr_value; + } else if (type == kVTypeStr) { auto attr_value = GetValue(value); if (attr_value == kOpFormat_FRAC_Z) { attr_value = kOpFormat_FRACTAL_Z; } - (*attr_obj)["value"] = attr_value; - } else if (type == "bool") { + (*attr_obj)[kJValue] = attr_value; + } else if (type == kVTypeBool) { auto attr_value = GetValue(value); - (*attr_obj)["value"] = attr_value; - } else if (type == "float") { + (*attr_obj)[kJValue] = attr_value; + } else if (type == kVTypeFloat) { auto attr_value = GetValue(value); - (*attr_obj)["value"] = attr_value; - } else if (type == "listInt") { + (*attr_obj)[kJValue] = attr_value; + } else if (type == kVTypeListInt) { std::vector attr_value; auto value_type = value->type(); MS_EXCEPTION_IF_NULL(value_type); auto value_type_str = value_type->ToString(); - if (value_type_str == "Int32") { + if (value_type_str == kVTypeInt32) { int data = GetValue(value); attr_value.push_back(data); } else { attr_value = GetValue>(value); } - (*attr_obj)["value"] = attr_value; - } else if (type == "listFloat") { + (*attr_obj)[kJValue] = attr_value; + } else if (type == kVTypeListFloat) { std::vector attr_value; auto value_type = value->type(); MS_EXCEPTION_IF_NULL(value_type); auto value_type_str = value_type->ToString(); - if (value_type_str == "float") { + if (value_type_str == kVTypeFloat) { auto data = GetValue(value); attr_value.push_back(data); } else { attr_value = GetValue>(value); } - (*attr_obj)["value"] = attr_value; - } else if (type == "listListInt") { + (*attr_obj)[kJValue] = attr_value; + } else if (type == kVTypeListUInt64) { + auto attr_value = GetValue>(value); + (*attr_obj)[kJValue] = attr_value; + } else if (type == kVTypeListListInt) { auto attr_value = GetValue>>(value); - (*attr_obj)["value"] = attr_value; + (*attr_obj)[kJValue] = attr_value; } else { MS_LOG(EXCEPTION) << "type: " << type << "not support"; } } +std::vector TbeKernelJsonCreator::GetDeviceInputShape(const AnfNodePtr &anf_node, size_t real_index) const { + MS_EXCEPTION_IF_NULL(anf_node); + std::vector shape; + if (creater_type_ == OP_SELECT_FORMAT || creater_type_ == CHECK_SUPPORTED) { + shape = AnfAlgo::GetPrevNodeOutputInferShape(anf_node, real_index); + } else { + shape = AnfAlgo::GetInputDeviceShape(anf_node, real_index); + } + if (shape.empty()) { + shape.emplace_back(1); + } + return shape; +} + +std::string TbeKernelJsonCreator::GetDeviceInputType(const AnfNodePtr &anf_node, size_t real_index) const { + MS_EXCEPTION_IF_NULL(anf_node); + TypeId type_id; + if (creater_type_ == OP_SELECT_FORMAT) { + type_id = AnfAlgo::GetPrevNodeOutputInferDataType(anf_node, real_index); + } else { + type_id = AnfAlgo::GetInputDeviceDataType(anf_node, real_index); + } + return tbe::TypeIdToString(type_id); +} + +std::string TbeKernelJsonCreator::GetDeviceInputFormat(const AnfNodePtr &anf_node, size_t real_index) const { + MS_EXCEPTION_IF_NULL(anf_node); + std::string format = kOpFormat_NCHW; + if (creater_type_ != OP_SELECT_FORMAT && creater_type_ != CHECK_SUPPORTED) { + format = AnfAlgo::GetInputFormat(anf_node, real_index); + if (format == kOpFormat_FRAC_Z) { + format = kOpFormat_FRACTAL_Z; + } else if (format == kOpFormat_DEFAULT) { + format = kOpFormat_NCHW; + } + } + return format; +} + +std::vector TbeKernelJsonCreator::GetDeviceOutputShape(const AnfNodePtr &anf_node, size_t real_index) const { + MS_EXCEPTION_IF_NULL(anf_node); + std::vector shape; + if (creater_type_ == OP_SELECT_FORMAT || creater_type_ == CHECK_SUPPORTED) { + shape = AnfAlgo::GetOutputInferShape(anf_node, real_index); + } else { + shape = AnfAlgo::GetOutputDeviceShape(anf_node, real_index); + } + if (shape.empty()) { + shape.emplace_back(1); + } + return shape; +} + +std::string TbeKernelJsonCreator::GetDeviceOutputType(const AnfNodePtr &anf_node, size_t real_index) const { + MS_EXCEPTION_IF_NULL(anf_node); + TypeId type_id; + if (creater_type_ == OP_SELECT_FORMAT) { + type_id = AnfAlgo::GetOutputInferDataType(anf_node, real_index); + } else { + type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, real_index); + } + return tbe::TypeIdToString(type_id); +} + +std::string TbeKernelJsonCreator::GetDeviceOutputFormat(const AnfNodePtr &anf_node, size_t real_index) const { + MS_EXCEPTION_IF_NULL(anf_node); + std::string format = kOpFormat_NCHW; + if (creater_type_ != OP_SELECT_FORMAT && creater_type_ != CHECK_SUPPORTED) { + format = AnfAlgo::GetOutputFormat(anf_node, real_index); + if (format == kOpFormat_FRAC_Z) { + format = kOpFormat_FRACTAL_Z; + } else if (format == kOpFormat_DEFAULT) { + format = kOpFormat_NCHW; + } + } + return format; +} + bool TbeKernelBuild::GetIOSize(const nlohmann::json &kernel_json, std::vector *input_size_list, std::vector *output_size_list) { if (input_size_list == nullptr || output_size_list == nullptr) { @@ -464,35 +541,35 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &kernel_json, std::vectorclear(); output_size_list->clear(); - for (size_t i = 0; i < kernel_json["op_info"]["inputs"].size(); i++) { - for (size_t m = 0; m < kernel_json["op_info"]["inputs"][i].size(); m++) { + for (size_t i = 0; i < kernel_json[kJOpInfo][kJInputs].size(); i++) { + for (size_t m = 0; m < kernel_json[kJOpInfo][kJInputs][i].size(); m++) { size_t size_i = 1; - if (kernel_json["op_info"]["inputs"][i][m]["valid"] == false) { - std::string input_name = kernel_json["op_info"]["inputs"][i][m]["name"]; + if (kernel_json[kJOpInfo][kJInputs][i][m][kJValid] == false) { + std::string input_name = kernel_json[kJOpInfo][kJInputs][i][m][kJName]; MS_LOG(INFO) << "Input name:" << input_name << "is optional, valid is false."; continue; } - for (const auto &j : kernel_json["op_info"]["inputs"][i][m]["shape"]) { + for (const auto &j : kernel_json[kJOpInfo][kJInputs][i][m][kJShape]) { size_i *= static_cast(j); } - std::string dtype = kernel_json["op_info"]["inputs"][i][m]["dtype"]; + std::string dtype = kernel_json[kJOpInfo][kJInputs][i][m][kJDtype]; size_t nbyte = tbe::GetDtypeNbyte(dtype); size_i *= nbyte; input_size_list->push_back(size_i); } } - for (size_t i = 0; i < kernel_json["op_info"]["outputs"].size(); i++) { - for (size_t m = 0; m < kernel_json["op_info"]["outputs"][i].size(); m++) { + for (size_t i = 0; i < kernel_json[kJOpInfo][kJOutputs].size(); i++) { + for (size_t m = 0; m < kernel_json[kJOpInfo][kJOutputs][i].size(); m++) { size_t size_i = 1; - if (kernel_json["op_info"]["outputs"][i][m]["valid"] == false) { - std::string output_name = kernel_json["op_info"]["outputs"][i][m]["name"]; + if (kernel_json[kJOpInfo][kJOutputs][i][m][kJValid] == false) { + std::string output_name = kernel_json[kJOpInfo][kJOutputs][i][m][kJName]; MS_LOG(INFO) << "Output name:" << output_name << " is optional, valid is false."; continue; } - for (const auto &j : kernel_json["op_info"]["outputs"][i][m]["shape"]) { + for (const auto &j : kernel_json[kJOpInfo][kJOutputs][i][m][kJShape]) { size_i *= static_cast(j); } - std::string dtype = kernel_json["op_info"]["outputs"][i][m]["dtype"]; + std::string dtype = kernel_json[kJOpInfo][kJOutputs][i][m][kJDtype]; size_t nbyte = tbe::GetDtypeNbyte(dtype); size_i *= nbyte; output_size_list->push_back(size_i); @@ -501,9 +578,9 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &kernel_json, std::vector &input_nodes, - const vector &compute_nodes, nlohmann::json *fusion_str, - std::string *fusion_kernel) { +bool TbeKernelBuild::GenFusionScopeJson(const std::vector &input_nodes, + const std::vector &compute_nodes, + nlohmann::json *fusion_str, std::string *fusion_kernel) { MS_EXCEPTION_IF_NULL(fusion_str); MS_EXCEPTION_IF_NULL(fusion_kernel); // get input layer info @@ -513,7 +590,7 @@ bool TbeKernelBuild::GenFusionScopeJson(const vector &inp return false; } // gen fusion scopre_op jsom - vector compute_list; + std::vector compute_list; (*fusion_kernel) = kFusionKernelNamePrfix; // index: fusion build option input record, next one from 0 static size_t index = 0; @@ -526,7 +603,7 @@ bool TbeKernelBuild::GenFusionScopeJson(const vector &inp } index = 0; // gen data input json - vector data_list; + std::vector data_list; for (const auto &layer : input_layers) { for (const auto &data_input : layer) { nlohmann::json data_str; @@ -549,51 +626,51 @@ void TbeKernelBuild::GenDescJson(const std::shared_ptr &anf_ if (node_out_idx > 0) { output_desc_name = output_desc_name + "_" + std::to_string(node_out_idx); } - (*output_desc)["name"] = NormalizeFullScopeName(output_desc_name); + (*output_desc)[kJName] = NormalizeFullScopeName(output_desc_name); auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, node_out_idx); - (*output_desc)["data_type"] = tbe::TypeIdToString(type_id); + (*output_desc)[kJDataType] = tbe::TypeIdToString(type_id); auto ori_shape = AnfAlgo::GetOutputInferShape(anf_node, node_out_idx); if (ori_shape.empty()) { ori_shape.emplace_back(1); } - (*output_desc)["ori_shape"] = ori_shape; + (*output_desc)[kJOriShape] = ori_shape; auto shape = AnfAlgo::GetOutputDeviceShape(anf_node, node_out_idx); if (shape.empty()) { shape.emplace_back(1); } - (*output_desc)["shape"] = shape; + (*output_desc)[kJShape] = shape; auto format = AnfAlgo::GetOutputFormat(anf_node, node_out_idx); if (format == kOpFormat_DEFAULT) { format = ori_shape.size() == 4 ? kOpFormat_NCHW : kOpFormat_ND; } - (*output_desc)["format"] = format; - (*output_desc)["ori_format"] = kOpFormat_NCHW; - (*output_desc)["output_index"] = desc_output_idx; + (*output_desc)[kJFormat] = format; + (*output_desc)[kJOriFormat] = kOpFormat_NCHW; + (*output_desc)[kJOutputIndex] = desc_output_idx; if (fusion_data_type == kFusionAddN && format == kOpFormat_NC1HWC0) { std::vector spec_shape = {}; spec_shape.emplace_back(shape[0]); spec_shape.emplace_back(shape[1]); spec_shape.emplace_back(shape[2] * shape[3]); spec_shape.emplace_back(shape[4]); - (*output_desc)["shape"] = spec_shape; - } else if (fusion_data_type == kFusionReLUGradV2 && (*output_desc)["data_type"] == "uint8") { + (*output_desc)[kJShape] = spec_shape; + } else if (fusion_data_type == kFusionReLUGradV2) { std::vector spec_shape = {}; spec_shape.emplace_back(shape[0]); spec_shape.emplace_back(shape[1]); spec_shape.emplace_back(shape[2] * shape[3]); spec_shape.emplace_back(16); - (*output_desc)["shape"] = spec_shape; - (*output_desc)["data_type"] = "bool"; + (*output_desc)[kJShape] = spec_shape; + (*output_desc)[kJDataType] = kVTypeBool; } } -void TbeKernelBuild::GenReusedOutputDesc(const shared_ptr &anf_node, size_t index, +void TbeKernelBuild::GenReusedOutputDesc(const std::shared_ptr &anf_node, size_t index, size_t output_index, nlohmann::json *output_desc) { std::string output_desc_name = anf_node->fullname_with_scope() + "_" + std::to_string(index); - (*output_desc)["name"] = NormalizeFullScopeName(output_desc_name); - (*output_desc)["output_index"] = output_index; + (*output_desc)[kJName] = NormalizeFullScopeName(output_desc_name); + (*output_desc)[kJOutputIndex] = output_index; std::vector shape; - (*output_desc)["shape"] = shape; + (*output_desc)[kJShape] = shape; } bool TbeKernelBuild::GetSpecInputLayers(const std::string &op_name, @@ -618,6 +695,8 @@ bool TbeKernelBuild::GetInputLayers(const std::vector &in const std::vector &compute_nodes, std::vector> *input_layers, std::map *spec_data_input) { + MS_EXCEPTION_IF_NULL(input_layers); + MS_EXCEPTION_IF_NULL(spec_data_input); auto result = std::find_if(compute_nodes.begin(), compute_nodes.end(), [](const auto &it) { auto op_name = AnfAlgo::GetCNodeName(it); return op_name == kConv2DBackpropInputOpName; @@ -673,10 +752,10 @@ bool TbeKernelBuild::GenFusionDataInputJson(const std::shared_ptrfullname_with_scope() << " index:" << real_idx; - // "output_desc" + // kJOutputDesc nlohmann::json output_desc; GenDescJson(real_node, real_idx, real_idx, &output_desc, fusion_data_type); output_desc_list.push_back(output_desc); - (*data_str)["name"] = NormalizeFullScopeName(real_node->fullname_with_scope()); + (*data_str)[kJName] = NormalizeFullScopeName(real_node->fullname_with_scope()); } - (*data_str)["output_desc"] = output_desc_list; - (*data_str)["type"] = "Data"; + (*data_str)[kJOutputDesc] = output_desc_list; + (*data_str)[kJtype] = "Data"; return true; } @@ -726,6 +805,7 @@ bool TbeKernelBuild::IsDynamicInput(const mindspore::CNodePtr &cnode) { } size_t TbeKernelBuild::GetOptionalInput(const mindspore::CNodePtr &cnode, bool is_dynamic_input) { + MS_EXCEPTION_IF_NULL(cnode); if (is_dynamic_input) { return 0; } @@ -740,8 +820,8 @@ size_t TbeKernelBuild::GetOptionalInput(const mindspore::CNodePtr &cnode, bool i } std::string TbeKernelBuild::GetRealOpType(const std::string &origin_type) { - static std::map buffer_fussion_op_map = {{"DepthwiseConv2dNative", "DepthwiseConv2D"}, - {"TensorAdd", "Add"}}; + static std::map buffer_fussion_op_map = { + {parallel::DEPTHWISE_CONV2D_NATIVE, parallel::DEPTHWISE_CONV2D}, {parallel::TENSOR_ADD, parallel::ADD}}; string result = origin_type; auto iter = buffer_fussion_op_map.find(origin_type); if (iter != buffer_fussion_op_map.end()) { @@ -767,7 +847,7 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode, GenDescJson(real_node, real_idx, real_idx, &input_desc); if (is_dynamic_input) { MS_LOG(INFO) << "node has dynamic input."; - input_desc["dyn_index"] = (i - 1); + input_desc[kJDynIndex] = (i - 1); } input_desc_list_tmp.emplace_back(input_desc); } @@ -776,7 +856,7 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode, MS_LOG(INFO) << "node has optional input."; for (size_t i = 0; i < optional_num; ++i) { nlohmann::json optional_input_desc; - optional_input_desc["name"] = std::string(kOptional) + std::to_string(*index); + optional_input_desc[kJName] = std::string(kOptional) + std::to_string(*index); (*index)++; (*layer_iter)->emplace_back(nullptr); input_desc_list_tmp.emplace_back(optional_input_desc); @@ -802,6 +882,7 @@ std::vector TbeKernelBuild::GetDescOutputIndex(const std::vector &o bool TbeKernelBuild::GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode, std::vector *output_desc_list) { + MS_EXCEPTION_IF_NULL(output_desc_list); auto output_size = AnfAlgo::GetOutputTensorNum(cnode); if (AnfAlgo::HasNodeAttr(kAttrOutputUsedNum, cnode)) { auto output_used_nums = AnfAlgo::GetNodeAttr>(cnode, kAttrOutputUsedNum); @@ -844,22 +925,22 @@ bool TbeKernelBuild::GenFusionComputeJson(const mindspore::AnfNodePtr &compute_n // gen input desc std::vector input_desc_list; (void)GenFusionComputeInputJson(cnode, layer_iter, &input_desc_list, index); - (*compute_op_str)["input_desc"] = input_desc_list; + (*compute_op_str)[kJInputDesc] = input_desc_list; // gen output desc std::vector output_desc_list; if (!GenFusionComputeOutputJson(cnode, &output_desc_list)) { MS_LOG(INFO) << "Fusion Error: gen fusion output desc faild, node full name: " << cnode->fullname_with_scope(); return false; } - (*compute_op_str)["output_desc"] = output_desc_list; + (*compute_op_str)[kJOutputDesc] = output_desc_list; // gen others auto origin_type = AnfAlgo::GetCNodeName(cnode); // replace special op type for buffer fusion op auto type = GetRealOpType(origin_type); - (*compute_op_str)["type"] = type; + (*compute_op_str)[kJtype] = type; tbe::TbeAdapter::NormalizeFuncName(&type); - (*compute_op_str)["func_name"] = type; - (*compute_op_str)["name"] = NormalizeFullScopeName(cnode->fullname_with_scope()); + (*compute_op_str)[kJFuncName] = type; + (*compute_op_str)[kJName] = NormalizeFullScopeName(cnode->fullname_with_scope()); (void)(*fusion_kernel_name).append("_"); (void)(*fusion_kernel_name).append(type); return true; @@ -867,16 +948,17 @@ bool TbeKernelBuild::GenFusionComputeJson(const mindspore::AnfNodePtr &compute_n size_t TbeKernelBuild::GetIOSizeImpl(const nlohmann::json &desc) { size_t ret = 1; - for (const auto &shape_item : desc["shape"]) { + for (const auto &shape_item : desc[kJShape]) { ret *= static_cast(shape_item); } - std::string data_type = desc["data_type"]; + std::string data_type = desc[kJDataType]; size_t nbyte = tbe::GetDtypeNbyte(data_type); ret *= nbyte; return ret; } -bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list, const vector &output_nodes, +bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list, + const std::vector &output_nodes, std::vector *input_size_list, std::vector *output_size_list) { MS_EXCEPTION_IF_NULL(input_size_list); MS_EXCEPTION_IF_NULL(output_size_list); @@ -884,15 +966,15 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list, const vecto output_size_list->clear(); for (const auto &op : fusion_op_list) { - if (op["type"] == "Data") { - const auto &data_output_desc = op["output_desc"]; + if (op[kJtype] == "Data") { + const auto &data_output_desc = op[kJOutputDesc]; for (const auto &data_output : data_output_desc) { - if (data_output["shape"] == "NULL") { + if (data_output[kJShape] == "NULL") { break; } auto ret = GetIOSizeImpl(data_output); input_size_list->push_back(ret); - MS_LOG(INFO) << "Fusion info: scope input name: " << op["name"] << ", size: " << ret; + MS_LOG(INFO) << "Fusion info: scope input name: " << op[kJName] << ", size: " << ret; } } } @@ -904,13 +986,13 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list, const vecto auto normal_name = NormalizeFullScopeName(real_node->fullname_with_scope()); MS_LOG(INFO) << "Fusion info: real node name: " << normal_name << ", real output index: " << real_idx; for (const auto &op : fusion_op_list) { - if (op["name"] == normal_name) { - auto op_output_desces = op["output_desc"]; + if (op[kJName] == normal_name) { + auto op_output_desces = op[kJOutputDesc]; if (output_node != real_node) { // tuple_get item MS_LOG(INFO) << "output is a tuple getitem node"; auto output_desc = op_output_desces[real_idx]; - if (output_desc["shape"].empty()) { + if (output_desc[kJShape].empty()) { MS_LOG(INFO) << "Fusion error: output_desc's shape is empty. real_index " << real_idx; return false; } @@ -919,7 +1001,7 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list, const vecto MS_LOG(INFO) << "Fusion info: scope output index: " << real_idx << ", size: " << ret; } else { for (const auto &output_desc : op_output_desces) { - if (output_desc["shape"].empty()) { + if (output_desc[kJShape].empty()) { MS_LOG(INFO) << "Fusion info: output_desc's shape is empty, may be this node output"; continue; } diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h b/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h index 2ddab34d49..eef02efa87 100644 --- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h +++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h @@ -93,7 +93,7 @@ class TbeKernelJsonCreator { nlohmann::json *outputs_json); bool GenTbeAttrJson(const std::shared_ptr &anf_node, const std::shared_ptr &op_info, nlohmann::json *attrs_json); - void ParseAttrValue(const std::string &type, const ValuePtr &value, nlohmann::json *attr_obj); + static void ParseAttrValue(const std::string &type, const ValuePtr &value, nlohmann::json *attr_obj); bool GenInputDescJson(const std::shared_ptr &anf_node, size_t real_input_index, bool value, const std::shared_ptr &input_ptr, const string &op_input_name, size_t input_i, std::vector *input_list); @@ -105,6 +105,13 @@ class TbeKernelJsonCreator { void GenOutputList(const std::shared_ptr &anf_node, const size_t &output_obj_num, const std::shared_ptr &output_ptr, size_t *output_idx, std::vector *output_list); + std::vector GetDeviceInputShape(const AnfNodePtr &anf_node, size_t real_index) const; + std::string GetDeviceInputType(const AnfNodePtr &anf_node, size_t real_index) const; + std::string GetDeviceInputFormat(const AnfNodePtr &anf_node, size_t real_index) const; + std::vector GetDeviceOutputShape(const AnfNodePtr &anf_node, size_t real_index) const; + std::string GetDeviceOutputType(const AnfNodePtr &anf_node, size_t real_index) const; + std::string GetDeviceOutputFormat(const AnfNodePtr &anf_node, size_t real_index) const; + kCreaterType creater_type_; std::string json_name_; std::string json_info_; diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_parallel_build.cc b/mindspore/ccsrc/kernel/tbe/tbe_kernel_parallel_build.cc index 577af45d59..79e5e0e109 100644 --- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_parallel_build.cc +++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_parallel_build.cc @@ -42,6 +42,41 @@ constexpr auto kStartCompileOp = "start_compile_op"; constexpr auto kWaitOne = "wait_one"; constexpr auto kResetTaskInfo = "reset_task_info"; +bool TbeOpParallelPreBuild(const std::vector &anf_nodes) { + auto build_manger = std::make_shared(); + MS_EXCEPTION_IF_NULL(build_manger); + for (const auto &anf_node : anf_nodes) { + // gen kernel json + MS_EXCEPTION_IF_NULL(anf_node); + nlohmann::json kernel_json; + TbeKernelJsonCreator creator(OP_PRE_COMPILE); + if (!creator.GenTbeSingleKernelJson(anf_node, &kernel_json)) { + MS_LOG(ERROR) << "GenTbeSingleKernelJson failed"; + return false; + } + kernel_json["compile_type"] = "pre_build"; + // op build + auto task_id = build_manger->StartCompileOp(kernel_json); + build_manger->SavePreTaskInfo(task_id, anf_node); + } + while (!build_manger->IsAllPreTaskFinish()) { + int task_id = -1; + char *task_result = nullptr; + char *pre_build_result = nullptr; + auto ret = build_manger->WaitOne(&task_id, &task_result, &pre_build_result); + if (!ret) { + MS_EXCEPTION(ArgumentError) << "Pre Build Failed. wait one ret:" << ret << ", task id:" << task_id; + } + + if ((task_result != nullptr) && (strcmp(task_result, "Success") != 0)) { + MS_EXCEPTION(ArgumentError) << "task pre compile Failed, task id:" << task_id << ", cause:" << task_result; + } + + build_manger->PreTaskFinishProcess(task_id, pre_build_result); + } + return true; +} + bool TbeOpParallelBuild(std::vector anf_nodes) { auto build_manger = std::make_shared(); MS_EXCEPTION_IF_NULL(build_manger); @@ -82,7 +117,8 @@ bool TbeOpParallelBuild(std::vector anf_nodes) { while (!build_manger->IsAllTaskFinish()) { int task_id = -1; char *task_result = nullptr; - auto ret = build_manger->WaitOne(&task_id, &task_result); + char *pre_build_result = nullptr; + auto ret = build_manger->WaitOne(&task_id, &task_result, &pre_build_result); if (!ret) { MS_EXCEPTION(ArgumentError) << "Build Failed. wait one ret:" << ret << ", task id:" << task_id; } @@ -116,7 +152,7 @@ int32_t ParallelBuildManager::StartCompileOp(const nlohmann::json &kernel_json) return task_id; } -bool ParallelBuildManager::WaitOne(int *task_id, char **task_result) const { +bool ParallelBuildManager::WaitOne(int *task_id, char **task_result, char **pre_build_result) const { MS_LOG(INFO) << "wait task start."; MS_EXCEPTION_IF_NULL(task_id); MS_EXCEPTION_IF_NULL(task_result); @@ -128,10 +164,15 @@ bool ParallelBuildManager::WaitOne(int *task_id, char **task_result) const { MS_EXCEPTION(ArgumentError) << "Failed to call function wait_one"; return false; } - (void)PyArg_ParseTuple(pRes, "is", task_id, task_result); + (void)PyArg_ParseTuple(pRes, "iss", task_id, task_result, pre_build_result); return true; } +void ParallelBuildManager::SavePreTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node) { + MS_LOG(INFO) << "SavePreTaskInfo, task id: " << task_id; + pre_task_map_[task_id] = anf_node; +} + void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node, const std::string &json_name, const std::vector &input_size_list, const std::vector &output_size_list, int32_t scope_id) { @@ -150,11 +191,42 @@ void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNod task_map_[task_id] = task_info; } +bool ParallelBuildManager::IsAllPreTaskFinish() const { + MS_LOG(INFO) << "wait pre build process task_num: " << pre_task_map_.size(); + return pre_task_map_.empty(); +} + bool ParallelBuildManager::IsAllTaskFinish() const { MS_LOG(INFO) << "wait process task_num: " << task_map_.size(); return task_map_.empty(); } +void ParallelBuildManager::PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result) { + auto task_iter = pre_task_map_.find(task_id); + if (task_iter == pre_task_map_.end()) { + MS_EXCEPTION(ArgumentError) << "can find pre task_id:" << task_id; + } + auto node = task_iter->second; + auto builder = + std::make_shared(AnfAlgo::GetSelectKernelBuildInfo(node)); + std::string start_flag = "fusion_pattern_start"; + std::string end_flag = "fusion_pattern_end"; + int start = pre_build_result.find(start_flag); + int end = pre_build_result.find(end_flag); + if (start != -1 && end != -1 && end >= start) { + std::string result = pre_build_result.substr(start + start_flag.size(), end - start - start_flag.size()); + if (result == "") { + (void)pre_task_map_.erase(task_iter); + return; + } + transform(result.begin(), result.end(), result.begin(), ::toupper); + FusionType fusion_type = tbe::GetFusionType(result); + builder->SetFusionType(fusion_type); + AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), node.get()); + } + (void)pre_task_map_.erase(task_iter); +} + std::pair ParallelBuildManager::TaskFinishProcess(int32_t task_id, bool set_kernel_mod) { auto task_iter = task_map_.find(task_id); if (task_iter == task_map_.end()) { @@ -167,7 +239,7 @@ std::pair ParallelBuildManager::TaskFinishProcess(int32_t if (set_kernel_mod) { MS_EXCEPTION(ArgumentError) << "build kernel name:" << task_iter->second.json_name << " failed."; } else { - MS_LOG(DEBUG) << "fusion build kernel name:" << task_iter->second.json_name << "failed."; + MS_LOG(INFO) << "fusion build kernel name:" << task_iter->second.json_name << "failed."; auto ret = std::make_pair(task_iter->second.scope_id, nullptr); (void)task_map_.erase(task_iter); return ret; @@ -177,7 +249,7 @@ std::pair ParallelBuildManager::TaskFinishProcess(int32_t task_iter->second.output_size_list, kernel_pack); MS_EXCEPTION_IF_NULL(kernel_mod); if (set_kernel_mod) { - AnfAlgo ::SetKernelMod(kernel_mod, task_iter->second.node); + AnfAlgo::SetKernelMod(kernel_mod, task_iter->second.node); } auto ret = std::make_pair(task_iter->second.scope_id, kernel_mod); (void)task_map_.erase(task_iter); @@ -202,7 +274,7 @@ bool ParallelBuildManager::GenSameOpKernelMod() const { bool ret = SearchInCache(task_info.json_name, task_info.processor, task_info.input_size_list, task_info.output_size_list, task_info.node); if (!ret) { - MS_LOG(DEBUG) << "can't find " << task_info.json_name << " in cache."; + MS_LOG(INFO) << "can't find " << task_info.json_name << " in cache."; return false; } } diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_parallel_build.h b/mindspore/ccsrc/kernel/tbe/tbe_kernel_parallel_build.h index 776aa0b1fc..c900baf036 100644 --- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_parallel_build.h +++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_parallel_build.h @@ -26,6 +26,7 @@ #include namespace mindspore { namespace kernel { +bool TbeOpParallelPreBuild(const std::vector &anf_nodes); bool TbeOpParallelBuild(std::vector anf_nodes); struct KernelBuildTaskInfo { @@ -42,6 +43,7 @@ class ParallelBuildManager { ParallelBuildManager(); ~ParallelBuildManager(); int32_t StartCompileOp(const nlohmann::json &kernel_json) const; + void SavePreTaskInfo(int32_t task_id, const AnfNodePtr &anf_node); void SaveTaskInfo(int32_t task_id, const AnfNodePtr &anf_node, const std::string &json_name, const std::vector &input_size_list, const std::vector &output_size_list, int32_t scope_id = 0); @@ -52,8 +54,10 @@ class ParallelBuildManager { const std::vector &input_size_list, const std::vector &output_size_list, AnfNode *node) const; - bool WaitOne(int *task_id, char **task_result) const; + bool WaitOne(int *task_id, char **task_result, char **pre_build_result) const; + bool IsAllPreTaskFinish() const; bool IsAllTaskFinish() const; + void PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result); std::pair TaskFinishProcess(int32_t task_id, bool set_kernel_mod = true); KernelModPtr GenKernelMod(const string &json_name, const string &processor, const std::vector &input_size_list, const std::vector &output_size_list, @@ -62,6 +66,7 @@ class ParallelBuildManager { private: PyObject *tbe_parallel_compiler_; + std::map pre_task_map_; std::map task_map_; std::vector same_op_list_; }; diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select.cc b/mindspore/ccsrc/kernel/tbe/tbe_kernel_select.cc deleted file mode 100644 index aedb0b3eaf..0000000000 --- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select.cc +++ /dev/null @@ -1,664 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernel/tbe/tbe_kernel_select.h" - -#include -#include -#include -#include - -#include "session/anf_runtime_algorithm.h" -#include "kernel/oplib/oplib.h" -#include "kernel/tbe/tbe_kernel_build.h" -#include "nlohmann/json.hpp" -#include "common/utils.h" -#include "utils/context/ms_context.h" -#include "kernel/tbe/tbe_python_funcs.h" -#include "pre_activate/common/helper.h" -#include "kernel/tbe/tbe_convert_utils.h" - -namespace mindspore { -namespace kernel { -constexpr auto kName = "name"; -constexpr auto kDtype = "dtype"; -constexpr auto kFormat = "format"; -constexpr auto kPrefixInput = "input"; -constexpr auto kPrefixOutput = "output"; -const std::map DYNAMIC_FORMAT_MAP = {{"NCHW", "DefaultFormat"}, - {"NHWC", "DefaultFormat"}, - {"ND", "DefaultFormat"}, - {"FRACTAL_Z", "FracZ"}, - {"NDHWC", "DefaultFormat"}}; -static const std::vector CHECK_SUPPORTED_OPTYPE{ - "MatMul", "BatchMatMul", "TopK", "InTopK", "Pack", "GatherNd", "UnsortedSegmentMinD", "UnsortedSegmentProdD", "Cast"}; - -bool CheckSupported(const AnfNodePtr &anf_node, const KernelBuildInfoPtr &select_kernel_build_info) { - MS_EXCEPTION_IF_NULL(anf_node); - MS_EXCEPTION_IF_NULL(select_kernel_build_info); - - std::string op_name = AnfAlgo::GetCNodeName(anf_node); - auto iter = std::find(CHECK_SUPPORTED_OPTYPE.begin(), CHECK_SUPPORTED_OPTYPE.end(), op_name); - if (iter == CHECK_SUPPORTED_OPTYPE.end()) { - MS_LOG(DEBUG) << "Op " << op_name << "this op does not need to check op supported."; - return true; - } - - // replace kernel_info with current kernel info - auto ori_select_kernel_info = AnfAlgo::GetSelectKernelBuildInfo(anf_node); - AnfAlgo::SetSelectKernelBuildInfo(select_kernel_build_info, anf_node.get()); - - nlohmann::json kernel_json; - TbeKernelJsonCreator creator(CHECK_SUPPORTED); - bool ret = creator.GenTbeSingleKernelJson(anf_node, &kernel_json); - if (!ret) { - MS_LOG(DEBUG) << "GenTbeSingleKernelJson failed"; - AnfAlgo::SetSelectKernelBuildInfo(ori_select_kernel_info, anf_node.get()); - return false; - } - - ret = TbePythonFuncs::CheckSupported(kernel_json); - AnfAlgo::SetSelectKernelBuildInfo(ori_select_kernel_info, anf_node.get()); - return ret; -} - -bool CheckJsonItemValidity(const nlohmann::json &json_obj, const std::string &key_name, - const std::vector &keys) { - if (!json_obj[key_name].is_object()) { - MS_LOG(DEBUG) << key_name << "is not an object!"; - return false; - } - for (auto key : keys) { - if (json_obj[key_name].find(key) == json_obj[key_name].end()) { - MS_LOG(DEBUG) << "Key" << key << "of " << key_name << " is not found!"; - return false; - } - } - return true; -} - -std::vector SplitStr(const std::string &string, const std::string &sep) { - std::vector result; - size_t start = 0; - size_t index = string.find(sep, start); - std::string substr; - while (index != std::string::npos) { - if (string.size() > start) { - substr = string.substr(start, index - start); - } - (void)substr.erase(0, substr.find_first_not_of(' ')); - (void)substr.erase(substr.find_last_not_of(' ') + 1); - auto iter = DYNAMIC_FORMAT_MAP.find(substr); - if (iter != DYNAMIC_FORMAT_MAP.end()) { - substr = iter->second; - } - result.push_back(substr); - start = index + sep.size(); - index = string.find(sep, start); - } - - if (string.size() > start) { - substr = string.substr(start); - } - (void)substr.erase(0, substr.find_first_not_of(' ')); - (void)substr.erase(substr.find_last_not_of(' ') + 1); - auto iter = DYNAMIC_FORMAT_MAP.find(substr); - if (iter != DYNAMIC_FORMAT_MAP.end()) { - substr = iter->second; - } - result.push_back(substr); - return result; -} - -void ConvertFormatDtype(const std::string &format, const std::string &dtype, const std::shared_ptr &io_info) { - MS_EXCEPTION_IF_NULL(io_info); - std::vector format_vec = SplitStr(format, ","); - std::vector dtype_vec = SplitStr(dtype, ","); - io_info->set_formats(format_vec); - io_info->set_dtypes(dtype_vec); -} - -bool ParseDynamicFormatJson(const std::string &jsonStr, std::vector> *const inputs, - std::vector> *const outputs) { - nlohmann::json json_obj = nlohmann::json::parse(jsonStr); - if (!json_obj.is_object()) { - MS_LOG(DEBUG) << "JsonStr is not an object, the jsonStr is:" << jsonStr; - return false; - } - std::vector keys = {kName, kDtype, kFormat}; - for (const auto &item : json_obj.items()) { - std::string key_name; - key_name = item.key(); - if (key_name.empty()) { - MS_LOG(DEBUG) << "Key name is empty!"; - return false; - } - if (!CheckJsonItemValidity(json_obj, key_name, keys)) { - return false; - } - if (key_name.compare(0, strlen(kPrefixInput), kPrefixInput) == 0) { - std::shared_ptr input = std::make_shared(); - MS_EXCEPTION_IF_NULL(input); - input->set_name(json_obj[key_name].at(kName)); - ConvertFormatDtype(json_obj[key_name].at(kFormat), json_obj[key_name].at(kDtype), input); - inputs->emplace_back(input); - } else if (key_name.compare(0, strlen(kPrefixOutput), kPrefixOutput) == 0) { - std::shared_ptr output = std::make_shared(); - MS_EXCEPTION_IF_NULL(output); - output->set_name(json_obj[key_name].at(kName)); - ConvertFormatDtype(json_obj[key_name].at(kFormat), json_obj[key_name].at(kDtype), output); - outputs->emplace_back(output); - } else { - MS_LOG(DEBUG) << "Key name:" << key_name << " is undefined!"; - return false; - } - } - return true; -} - -std::string OpSelectFormat(const std::shared_ptr &anf_node) { - nlohmann::json kernel_json; - std::string res_json_str; - TbeKernelJsonCreator creator(OP_SELECT_FORMAT); - bool ret = creator.GenTbeSingleKernelJson(anf_node, &kernel_json); - if (!ret) { - MS_LOG(DEBUG) << "GenTbeSingleKernelJson failed"; - return res_json_str; - } - res_json_str = TbePythonFuncs::OpSelectFormat(kernel_json); - MS_LOG(INFO) << "Dynamic select foramt response result:" << res_json_str; - return res_json_str; -} - -void SetTidyInputsInfo(const std::shared_ptr &anf_node, - const std::shared_ptr &builder, - const std::vector> &inputs) { - std::vector inputs_type; - std::vector inputs_format; - std::vector dyn_input_sizes; - size_t dyn_input_idx = 0; - size_t kernel_info_index = 0; - size_t real_input_num = AnfAlgo::GetInputTensorNum(anf_node); - auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); - MS_EXCEPTION_IF_NULL(primitive); - if (primitive->GetAttr("dyn_input_sizes") != nullptr) { - dyn_input_sizes = GetValue>(primitive->GetAttr("dyn_input_sizes")); - } - for (size_t i = 0; i < inputs.size(); i++) { - MS_EXCEPTION_IF_NULL(inputs[i]); - std::string param_type = inputs[i]->param_type(); - if (i >= real_input_num) { - MS_LOG(INFO) << "Input index: " << i << " is out of real_input_num:" << real_input_num; - continue; - } - auto type_id = AnfAlgo::GetPrevNodeOutputInferDataType(anf_node, i); - auto format = kOpFormat_DEFAULT; - if (param_type == "dynamic") { - if (!dyn_input_sizes.empty()) { - for (int t = 0; t < dyn_input_sizes[dyn_input_idx]; t++) { - kernel_info_index++; - inputs_type.emplace_back(type_id); - inputs_format.emplace_back(format); - } - dyn_input_idx++; - } - } else if (param_type == "required") { - kernel_info_index++; - inputs_type.emplace_back(type_id); - inputs_format.emplace_back(format); - } else { - if (kernel_info_index < real_input_num) { - MS_LOG(INFO) << "Input type is optional, input index is :" << kernel_info_index; - kernel_info_index++; - inputs_type.emplace_back(type_id); - inputs_format.emplace_back(format); - } - } - } - builder->SetInputsDeviceType(inputs_type); - builder->SetInputsFormat(inputs_format); -} - -void SetTidyOutputsInfo(const std::shared_ptr &anf_node, - const std::shared_ptr &builder, - const std::vector> &outputs) { - std::vector outputs_type; - std::vector outputs_format; - auto real_output_num = AnfAlgo::GetOutputTensorNum(anf_node); - size_t output_idx = 0; - for (const auto &output : outputs) { - MS_EXCEPTION_IF_NULL(output); - if (output_idx >= real_output_num) { - continue; - } - size_t output_num = 0; - if (output->param_type() == "dynamic") { - if (outputs.size() > 1) { - MS_EXCEPTION(ArgumentError) << "Dynamic output is unsupported multi output!"; - } - output_num = real_output_num; - } else if (output->param_type() == "required") { - output_num = 1; - } else { - if (output_idx < real_output_num) { - MS_LOG(INFO) << "Set output kernel builder info, output type is optional, output index is :" << output_idx; - output_num = 1; - } - } - for (size_t i = 0; i < output_num; i++) { - auto type_id = AnfAlgo::GetOutputInferDataType(anf_node, output_idx); - outputs_type.emplace_back(type_id); - outputs_format.emplace_back(kOpFormat_DEFAULT); - output_idx++; - } - } - builder->SetOutputsDeviceType(outputs_type); - builder->SetOutputsFormat(outputs_format); -} - -void GenTidyKernelBuildInfo(const std::shared_ptr &anf_node, - const std::vector> &inputs, - const std::vector> &outputs) { - auto builder_tmp = std::make_shared(); - builder_tmp->SetKernelType(TBE_KERNEL); - SetTidyInputsInfo(anf_node, builder_tmp, inputs); - SetTidyOutputsInfo(anf_node, builder_tmp, outputs); - AnfAlgo::SetSelectKernelBuildInfo(builder_tmp->Build(), anf_node.get()); -} - -void ReplaceByDynamicFormatDtype(const CNodePtr &kernel_node, const std::shared_ptr &op_info_ptr, - const std::shared_ptr &op_info_new_ptr) { - std::vector> inputs_static = op_info_ptr->inputs_ptr(); - std::vector> outputs_static = op_info_ptr->outputs_ptr(); - std::vector> inputs_dyn; - std::vector> outputs_dyn; - if ((op_info_ptr->imply_type() == kTBE) && (!mindspore::opt::IsNopNode(kernel_node->cast()))) { - // 1. create tidy kernelBuildInfo in order to generate json for calling op_select_format - auto anf_node = kernel_node->cast>(); - auto kernel_build_info_ptr = AnfAlgo::GetSelectKernelBuildInfo(anf_node); - GenTidyKernelBuildInfo(kernel_node, inputs_static, outputs_static); - - // 2.get dynamic format from op_impl - std::string res_json_str; - auto context_ptr = MsContext::GetInstance(); - MS_EXCEPTION_IF_NULL(context_ptr); - if (context_ptr->execution_mode() != kPynativeMode) { - res_json_str = OpSelectFormat(kernel_node); - } - if (!res_json_str.empty()) { - (void)ParseDynamicFormatJson(res_json_str, &inputs_dyn, &outputs_dyn); - } - if (inputs_static.size() != inputs_dyn.size()) { - inputs_dyn.clear(); - } - if (outputs_static.size() != outputs_dyn.size()) { - outputs_dyn.clear(); - } - - // 3. resume kernel node's SelectKernelBuildInfo - // As it has been replaced by GenTidyKernelBuildInfo in order to call python func - AnfAlgo::SetSelectKernelBuildInfo(kernel_build_info_ptr, anf_node.get()); - } - // 4.replace by dynamic format and dtype - if (inputs_dyn.empty() && outputs_dyn.empty()) { - MS_LOG(INFO) << "Dynamic select format response is empty, use static register info."; - op_info_new_ptr->set_inputs_ptr(inputs_static); - op_info_new_ptr->set_outputs_ptr(outputs_static); - } else { - MS_LOG(INFO) << "Dynamic select format response successful, use dynamic format."; - for (size_t i = 0; i < inputs_static.size(); i++) { - inputs_dyn[i]->set_param_type(inputs_static[i]->param_type()); - inputs_dyn[i]->set_reshape_type(inputs_static[i]->reshape_type()); - } - for (size_t j = 0; j < outputs_static.size(); j++) { - outputs_dyn[j]->set_param_type(outputs_static[j]->param_type()); - outputs_dyn[j]->set_reshape_type(outputs_static[j]->reshape_type()); - } - op_info_new_ptr->set_inputs_ptr(inputs_dyn); - op_info_new_ptr->set_outputs_ptr(outputs_dyn); - } - - // 5.copy other opinfo to new op_info_new - op_info_new_ptr->set_op_name(op_info_ptr->op_name()); - op_info_new_ptr->set_imply_type(op_info_ptr->imply_type()); - op_info_new_ptr->set_fusion_type(op_info_ptr->fusion_type()); -} - -bool StringToAxisVector(const std::string &reshape_type_str, std::vector *reshape_type_vec) { - for (const auto &c : reshape_type_str) { - switch (c) { - case 'N': - reshape_type_vec->push_back(kernel::N); - break; - case 'C': - reshape_type_vec->push_back(kernel::C); - break; - case 'H': - reshape_type_vec->push_back(kernel::H); - break; - case 'W': - reshape_type_vec->push_back(kernel::W); - break; - default: - MS_LOG(ERROR) << "Unknown axis " << c << "in reshape type."; - return false; - } - } - return true; -} - -bool SetKernelBuilderInputInfo(const std::vector> &inputs, size_t real_input_num, - size_t builder_idex, const std::vector &dyn_input_sizes, - const std::shared_ptr &builder) { - MS_EXCEPTION_IF_NULL(builder); - - std::vector inputs_device_type; - std::vector inputs_format; - size_t dyn_input_idx = 0; - size_t kernel_info_index = 0; - MS_EXCEPTION_IF_NULL(inputs[0]); - size_t kernel_info_cnt = inputs[0]->dtypes().size(); - - std::vector> reshape_types; - for (const auto &input : inputs) { - MS_EXCEPTION_IF_NULL(input); - std::string param_type = input->param_type(); - std::vector dtypes = input->dtypes(); - std::vector formats = input->formats(); - if (dtypes.size() != kernel_info_cnt || formats.size() != kernel_info_cnt) { - MS_LOG(ERROR) << "Set input kernel builder info, dtyps size != formats size."; - return false; - } - - std::vector reshape_type; - if (!StringToAxisVector(input->reshape_type(), &reshape_type)) { - return false; - } - - if (param_type == "dynamic") { - if (dyn_input_sizes.empty()) { - MS_LOG(ERROR) << "Set input kernel builder info, dyn_input_sizes's size is 0 when param_type is dynamic"; - return false; - } - - for (int t = 0; t < dyn_input_sizes[dyn_input_idx]; t++) { - kernel_info_index++; - auto type_id = tbe::DtypeToTypeId(dtypes[builder_idex]); - inputs_device_type.push_back(type_id); - inputs_format.push_back(formats[builder_idex]); - reshape_types.push_back(reshape_type); - } - dyn_input_idx++; - } else if (param_type == "required") { - kernel_info_index++; - auto type_id = tbe::DtypeToTypeId(dtypes[builder_idex]); - inputs_device_type.push_back(type_id); - inputs_format.push_back(formats[builder_idex]); - reshape_types.push_back(reshape_type); - } else { - if (kernel_info_index < real_input_num) { - MS_LOG(INFO) << "Set input kernel builder info, input type is optional, input index is " << kernel_info_index; - kernel_info_index++; - auto type_id = tbe::DtypeToTypeId(dtypes[builder_idex]); - inputs_device_type.push_back(type_id); - inputs_format.push_back(formats[builder_idex]); - reshape_types.push_back(reshape_type); - } - } - } - - builder->SetInputReshapeType(reshape_types); - builder->SetInputsDeviceType(inputs_device_type); - builder->SetInputsFormat(inputs_format); - return true; -} - -bool SetKernelBuilderOutputInfo(const std::vector> &outputs, size_t builder_idex, - const size_t &real_output_num, - const std::shared_ptr &builder) { - // not now but in the next we need to support dynamic output case - MS_EXCEPTION_IF_NULL(builder); - - size_t output_idx = 0; - std::vector outputs_device_type; - std::vector outputs_format; - MS_EXCEPTION_IF_NULL(outputs[0]); - size_t kernel_info_cnt = outputs[0]->dtypes().size(); - - std::vector> reshape_types; - for (const auto &output : outputs) { - MS_EXCEPTION_IF_NULL(output); - if (output_idx >= real_output_num) { - MS_LOG(WARNING) << "real_output_num: " << real_output_num << ", output_idx: " << output_idx << "is out of limit!"; - continue; - } - std::vector reshape_type; - if (!StringToAxisVector(output->reshape_type(), &reshape_type)) { - return false; - } - - size_t output_num = 0; - if (output->param_type() == "dynamic") { - if (outputs.size() > 1) { - MS_LOG(EXCEPTION) << "Dynamic output is unsupported multi output!"; - } - output_num = real_output_num; - } else if (output->param_type() == "required") { - output_num = 1; - } else { - if (output_idx < real_output_num) { - MS_LOG(INFO) << "Set output kernel builder info, output type is optional, output index is " << output_idx; - output_num = 1; - } - } - - for (size_t i = 0; i < output_num; i++) { - std::vector dtypes = output->dtypes(); - std::vector formats = output->formats(); - if (dtypes.size() != kernel_info_cnt || formats.size() != kernel_info_cnt) { - MS_LOG(ERROR) << "Set output kernel builder info, dtyps size != formats size."; - return false; - } - auto type_id = tbe::DtypeToTypeId(dtypes[builder_idex]); - outputs_device_type.push_back(type_id); - outputs_format.push_back(formats[builder_idex]); - reshape_types.push_back(reshape_type); - output_idx++; - } - } - - builder->SetOutputReshapeType(reshape_types); - builder->SetOutputsFormat(outputs_format); - builder->SetOutputsDeviceType(outputs_device_type); - return true; -} - -void SetKernelBuildCommonInfo(const std::shared_ptr &builder, - Processor processor, const std::shared_ptr &op_info_ptr) { - MS_EXCEPTION_IF_NULL(builder); - MS_EXCEPTION_IF_NULL(op_info_ptr); - - builder->SetProcessor(processor); - std::string fusion_type = op_info_ptr->fusion_type(); - if (tbe::GetFusionType(fusion_type) != UNKNOWN_FUSION_TYPE) { - builder->SetFusionType(tbe::GetFusionType(fusion_type)); - } - builder->SetOpPattern(op_info_ptr->op_pattern()); - builder->SetKernelType(TBE_KERNEL); -} - -bool ParseMetadata(const CNodePtr &kernel_node, const std::shared_ptr &op_info_ptr, - std::vector> *const kernel_info_list) { - MS_EXCEPTION_IF_NULL(kernel_node); - MS_EXCEPTION_IF_NULL(kernel_info_list); - size_t real_input_num = AnfAlgo::GetInputTensorNum(kernel_node); - size_t real_output_num = AnfAlgo::GetOutputTensorNum(kernel_node); - std::vector> inputs = op_info_ptr->inputs_ptr(); - std::vector> outputs = op_info_ptr->outputs_ptr(); - std::vector dyn_input_sizes; - auto primitive = AnfAlgo::GetCNodePrimitive(kernel_node); - MS_EXCEPTION_IF_NULL(primitive); - if (primitive->GetAttr("dyn_input_sizes") != nullptr) { - dyn_input_sizes = GetValue>(primitive->GetAttr("dyn_input_sizes")); - } - if (!inputs.empty()) { - MS_EXCEPTION_IF_NULL(inputs[0]); - size_t kernel_info_cnt = inputs[0]->dtypes().size(); - for (size_t j = 0; j < kernel_info_cnt; j++) { - auto builder = std::make_shared(); - MS_EXCEPTION_IF_NULL(builder); - SetKernelBuildCommonInfo(builder, Processor::AICORE, op_info_ptr); - - if (!SetKernelBuilderInputInfo(inputs, real_input_num, j, dyn_input_sizes, builder)) { - MS_LOG(ERROR) << "Parse kernel metadata, set inputs kernel builder info failed."; - return false; - } - - if (!outputs.empty()) { - if (!SetKernelBuilderOutputInfo(outputs, j, real_output_num, builder)) { - MS_LOG(ERROR) << "Parse kernel metadata, set outputs kernel builder info failed."; - return false; - } - } - - kernel_info_list->push_back(builder->Build()); - } - } else if (!outputs.empty()) { - MS_EXCEPTION_IF_NULL(outputs[0]); - size_t kernel_info_cnt = outputs[0]->dtypes().size(); - for (size_t j = 0; j < kernel_info_cnt; j++) { - auto builder = std::make_shared(); - MS_EXCEPTION_IF_NULL(builder); - SetKernelBuildCommonInfo(builder, Processor::AICORE, op_info_ptr); - - if (!SetKernelBuilderOutputInfo(outputs, j, real_output_num, builder)) { - MS_LOG(ERROR) << "Parse kernel metadata, set outputs kernel builder info failed."; - return false; - } - - kernel_info_list->push_back(builder->Build()); - } - } - return true; -} - -bool IsShapeMatchFormat(const std::vector &shape, const std::string &format) { - // if format is default, it remarkes support all format - if (kOpFormatList.find(format) == kOpFormatList.end()) { - MS_LOG(EXCEPTION) << "Got the unknown format " << format; - } - if (format == kOpFormat_DEFAULT) { - return true; - } - if (format == kOpFormat_NDHWC && shape.size() != kShape5dDims) { - return false; - } - // if shape size is 0, the shape will be a scalar - if (shape.empty()) { - return true; - } - if (shape.size() > kShape4dDims) { - return false; - } - if (format == kOpFormat_FRAC_NZ && shape.size() < 2) { - return false; - } - return true; -} - -bool IsValidKernelInfo(const std::shared_ptr &kernel_node, const kernel::KernelBuildInfo &kernel_build_info) { - MS_EXCEPTION_IF_NULL(kernel_node); - auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); - const size_t kCAxis = 1; - for (size_t index = 0; index < kernel_build_info.GetOutputNum(); ++index) { - auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, index); - if (kernel_build_info.GetOutputFormat(index) == kOpFormat_FRACTAL_Z_C04) { - if (output_shape.size() != kShape4dDims || output_shape[kCAxis] > 4) { - return false; - } - return false; - } - if (!IsShapeMatchFormat(output_shape, kernel_build_info.GetOutputFormat(index))) { - return false; - } - if (kernel_name == "ReduceMean") { - auto keep_dims = AnfAlgo::GetNodeAttr(kernel_node, kAttrKeepDims); - if (!keep_dims && kernel_build_info.GetOutputFormat(index) != kOpFormat_DEFAULT) { - return false; - } - } - } - for (size_t index = 0; index < kernel_build_info.GetInputNum(); ++index) { - auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, index); - if (!IsShapeMatchFormat(input_shape, kernel_build_info.GetInputFormat(index))) { - return false; - } - if (kernel_build_info.GetInputFormat(index) == kOpFormat_FRACTAL_Z_C04) { - if (input_shape.size() != kShape4dDims || input_shape[kCAxis] > 4) { - return false; - } - return false; - } - if (kernel_name == "ReduceMean") { - auto keep_dims = AnfAlgo::GetNodeAttr(kernel_node, kAttrKeepDims); - if (!keep_dims && kernel_build_info.GetInputFormat(index) != kOpFormat_DEFAULT) { - return false; - } - } - } - if (AnfAlgo::GetCNodeName(kernel_node) == prim::kPrimCast->name()) { - return AnfAlgo::GetOutputInferDataType(kernel_node, 0) == kernel_build_info.GetOutputDeviceType(0) && - AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0) == kernel_build_info.GetInputDeviceType(0); - } - return true; -} - -void TbeMetadataInfo(const CNodePtr &kernel_node, std::vector> *kernel_info_list) { - MS_EXCEPTION_IF_NULL(kernel_node); - MS_EXCEPTION_IF_NULL(kernel_info_list); - std::vector> parse_info_list; - - std::string op_name = AnfAlgo::GetCNodeName(kernel_node); - auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kTBE); - if (op_info_ptr == nullptr) { - return; - } - // dynamic get op format and dtype and replace opinfo - auto op_info_new_ptr = std::make_shared(); - ReplaceByDynamicFormatDtype(kernel_node, op_info_ptr, op_info_new_ptr); - - if (!ParseMetadata(kernel_node, op_info_new_ptr, &parse_info_list)) { - MS_LOG(INFO) << "Tbe parsed metadata of op[" << op_name << "] failed."; - return; - } - - auto context_ptr = MsContext::GetInstance(); - MS_EXCEPTION_IF_NULL(context_ptr); - for (const auto &parse_info : parse_info_list) { - if (IsValidKernelInfo(kernel_node, *(parse_info))) { - if (CheckSupported(kernel_node, parse_info)) { - kernel_info_list->push_back(parse_info); - } else { - MS_LOG(INFO) << "CheckSupported Failed for TBE op" << op_name << " kernel info."; - } - } - if (kernel_info_list->empty()) { - MS_LOG(DEBUG) << "Tbe dose not have op [" << op_name << "]."; - } - } -} -} // namespace kernel -} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/common_utils.h b/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/common_utils.h new file mode 100644 index 0000000000..c07197610e --- /dev/null +++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/common_utils.h @@ -0,0 +1,30 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_KERNEL_TBE_KERNEL_SELECT_COMMON_UTILS_H_ +#define MINDSPORE_CCSRC_KERNEL_TBE_KERNEL_SELECT_COMMON_UTILS_H_ +#include +#include +namespace mindspore { +namespace kernel { +struct SupportFormat { + std::vector> input_format; + std::vector> output_format; +}; +using SupportFormatItem = std::vector; +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_KERNEL_TBE_COMMON_UTILS_H_ diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.cc b/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.cc new file mode 100644 index 0000000000..9d28af3f3f --- /dev/null +++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.cc @@ -0,0 +1,319 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.h" +#include "utils/utils.h" +#include "session/anf_runtime_algorithm.h" +#include "kernel/tbe/tbe_kernel_select/common_utils.h" + +namespace mindspore { +namespace kernel { +constexpr char kDynInputKey[] = "dyn_input_sizes"; +constexpr size_t kInputIndex_0 = 0; +constexpr size_t kChannelN = 0; +constexpr size_t kChannelC = 1; +constexpr size_t kAlignmented16 = 16; +// 1. all shape no scalar and same +// 2. part scalar : no_scalar (shape size > xxx && alig xxx) +// 3. all no_scalar and not same (broad cast xxx dim) +bool TbeKernelBroadCastSelecter::GetShapeInfo(SupportFormat *support_format) { + MS_EXCEPTION_IF_NULL(support_format); + input_num_ = 0; + output_num_ = 0; + input_shapes_.clear(); + output_shapes_.clear(); + if (AnfAlgo::HasNodeAttr(kDynInputKey, cnode_ptr_)) { + MS_LOG(INFO) << "This broadcast node has dynamic input."; + auto dynamic_size_vec = AnfAlgo::GetNodeAttr>(cnode_ptr_, kDynInputKey); + if (dynamic_size_vec.empty() || dynamic_size_vec[0] < 2) { + MS_LOG(EXCEPTION) << "dynamic attr set error, please check."; + } + auto dynamic_input_shape0_ = AnfAlgo::GetPrevNodeOutputInferShape(cnode_ptr_, kInputIndex_0); + PadScalarShape(&dynamic_input_shape0_); + input_shapes_.emplace_back(dynamic_input_shape0_); + input_num_ = 1; + } else { + input_num_ = AnfAlgo::GetInputTensorNum(cnode_ptr_); + for (size_t i = 0; i < input_num_; ++i) { + auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(cnode_ptr_, i); + PadScalarShape(&input_shape); + input_shapes_.emplace_back(input_shape); + } + } + + output_num_ = AnfAlgo::GetOutputTensorNum(cnode_ptr_); + for (size_t i = 0; i < output_num_; ++i) { + auto output = AnfAlgo::GetOutputInferShape(cnode_ptr_, i); + PadScalarShape(&output); + output_shapes_.emplace_back(output); + } + AssignSupportFormat(kOpFormat_DEFAULT, support_format); + return true; +} + +bool TbeKernelBroadCastSelecter::IsBroadCastSupport5HD(SupportFormat *support_format) const { + MS_EXCEPTION_IF_NULL(support_format); + if (IsSameShape()) { + if (!HasScalarInput()) { + AssignSupportFormat(kOpFormat_NC1HWC0, support_format); + return true; + } else { + return false; + } + } + SupportFormatItem input_support_format; + SupportFormatItem output_support_format; + if (HasScalarInput()) { + for (const auto &shape : input_shapes_) { + if (IsScalarShape(shape)) { + input_support_format.emplace_back(kOpFormat_DEFAULT); + } else { + if (!Is4DShape(shape)) { + return false; + } + if (shape[kChannelC] % kAlignmented16 != 0) { + return false; + } + input_support_format.emplace_back(kOpFormat_NC1HWC0); + } + } + } else { + for (const auto &shape : input_shapes_) { + if (!Is4DShape(shape)) { + return false; + } + } + auto shape_tmp = input_shapes_[0]; + auto broadcast_c_axis = std::any_of( + input_shapes_.begin(), input_shapes_.end(), + [&shape_tmp](const std::vector &elem) { return shape_tmp.at(kChannelC) != elem.at(kChannelC); }); + if (broadcast_c_axis) { + MS_LOG(INFO) << "This node broadcast c channel."; + return false; + } + input_support_format.assign(input_num_, kOpFormat_NC1HWC0); + } + GenOutputSupportFormat(kOpFormat_NC1HWC0, &output_support_format); + support_format->input_format.emplace_back(input_support_format); + support_format->output_format.emplace_back(output_support_format); + return true; +} + +bool TbeKernelBroadCastSelecter::IsBroadCastSupportFracZ(SupportFormat *support_format) const { + MS_EXCEPTION_IF_NULL(support_format); + if (IsSameShape()) { + if (!HasScalarInput()) { + AssignSupportFormat(kOpFormat_FRAC_Z, support_format); + return true; + } else { + return false; + } + } + SupportFormatItem input_support_format; + SupportFormatItem output_support_format; + if (HasScalarInput()) { + for (const auto &shape : input_shapes_) { + if (IsScalarShape(shape)) { + input_support_format.emplace_back(kOpFormat_DEFAULT); + } else { + if (!Is4DShape(shape)) { + return false; + } + if (shape[kChannelN] % kAlignmented16 != 0 || shape[kChannelC] % kAlignmented16 != 0) { + return false; + } + input_support_format.emplace_back(kOpFormat_FRAC_Z); + } + } + } else { + return false; + } + GenOutputSupportFormat(kOpFormat_FRAC_Z, &output_support_format); + support_format->input_format.emplace_back(input_support_format); + support_format->output_format.emplace_back(output_support_format); + return true; +} +bool TbeKernelBroadCastSelecter::IsBroadCastSupportC1HWNCoC0(SupportFormat *support_format) const { + MS_EXCEPTION_IF_NULL(support_format); + if (IsSameShape()) { + if (!HasScalarInput()) { + AssignSupportFormat(kOpFormat_C1HWNCoC0, support_format); + return true; + } else { + return false; + } + } + SupportFormatItem input_support_format; + SupportFormatItem output_support_format; + if (HasScalarInput()) { + for (const auto &shape : input_shapes_) { + if (IsScalarShape(shape)) { + input_support_format.emplace_back(kOpFormat_DEFAULT); + } else { + if (!Is4DShape(shape)) { + return false; + } + if (shape[kChannelN] % kAlignmented16 != 0) { + return false; + } + input_support_format.emplace_back(kOpFormat_C1HWNCoC0); + } + } + } else { + for (const auto &shape : input_shapes_) { + if (!Is4DShape(shape)) { + return false; + } + } + auto shape_tmp = input_shapes_[0]; + auto broadcast_nc_axis = + std::any_of(input_shapes_.begin(), input_shapes_.end(), [&shape_tmp](const std::vector &elem) { + return (shape_tmp.at(kChannelC) != elem.at(kChannelC) || shape_tmp.at(kChannelN) != elem.at(kChannelN)); + }); + if (broadcast_nc_axis) { + MS_LOG(INFO) << "This node broadcast n || c channel."; + return false; + } + input_support_format.assign(input_num_, kOpFormat_C1HWNCoC0); + } + GenOutputSupportFormat(kOpFormat_C1HWNCoC0, &output_support_format); + support_format->input_format.emplace_back(input_support_format); + support_format->output_format.emplace_back(output_support_format); + return true; +} + +bool TbeKernelBroadCastSelecter::IsBroadCastSupportFracNZ(SupportFormat *support_format) const { + MS_EXCEPTION_IF_NULL(support_format); + if (IsSameShape()) { + if (!HasScalarInput()) { + AssignSupportFormat(kOpFormat_FRAC_NZ, support_format); + return true; + } else { + return false; + } + } + SupportFormatItem input_support_format; + SupportFormatItem output_support_format; + if (HasScalarInput()) { + for (const auto &shape : input_shapes_) { + if (IsScalarShape(shape)) { + input_support_format.emplace_back(kOpFormat_DEFAULT); + } else { + if (shape.size() < kShape2dDims) { + return false; + } + if (shape[shape.size() - 1] % kAlignmented16 != 0 || shape[shape.size() - 2] % kAlignmented16 != 0) { + return false; + } + input_support_format.emplace_back(kOpFormat_FRAC_NZ); + } + } + } else { + auto less_2dims = std::any_of(input_shapes_.begin(), input_shapes_.end(), + [](const std::vector &elem) { return elem.size() < kShape2dDims; }); + if (less_2dims) { + MS_LOG(INFO) << "This node dim less 2."; + return false; + } + + auto shape_tmp = input_shapes_[0]; + auto broadcast_last_dim = + std::any_of(input_shapes_.begin(), input_shapes_.end(), [&shape_tmp](const std::vector &elem) { + return (shape_tmp.at(shape_tmp.size() - 1) != elem.at(elem.size() - 1)) || + (shape_tmp.at(shape_tmp.size() - 2) != elem.at(elem.size() - 2)); + }); + if (broadcast_last_dim) { + MS_LOG(INFO) << "This node broadcast last channel."; + return false; + } + + input_support_format.assign(input_num_, kOpFormat_FRAC_NZ); + } + GenOutputSupportFormat(kOpFormat_FRAC_NZ, &output_support_format); + support_format->input_format.emplace_back(input_support_format); + support_format->output_format.emplace_back(output_support_format); + return true; +} + +bool TbeKernelBroadCastSelecter::IsBroadCastSupportNDC1HWC0(SupportFormat *support_format) const { + MS_EXCEPTION_IF_NULL(support_format); + return false; +} + +bool TbeKernelBroadCastSelecter::Is4DShape(const std::vector &shape) const { + return shape.size() == kShape4dDims; +} + +bool TbeKernelBroadCastSelecter::IsSameShape() const { + auto shape = input_shapes_.begin(); + for (const auto &item : input_shapes_) { + if (shape->size() != item.size()) { + return false; + } + for (size_t i = 0; i < shape->size(); ++i) { + if (shape->at(i) != item.at(i)) { + return false; + } + } + } + return true; +} + +void TbeKernelBroadCastSelecter::PadScalarShape(std::vector *shape) const { + MS_EXCEPTION_IF_NULL(shape); + if (shape->empty()) { + shape->emplace_back(1); + } +} + +bool TbeKernelBroadCastSelecter::IsScalarShape(const std::vector &shape) const { + return (shape.size() == 1 && shape[0] == 1); +} + +bool TbeKernelBroadCastSelecter::HasScalarInput() const { + bool ret = false; + for (const auto &shape : input_shapes_) { + if (IsScalarShape(shape)) { + ret = true; + break; + } + } + return ret; +} + +void TbeKernelBroadCastSelecter::GenOutputSupportFormat(const std::string &support_format, + SupportFormatItem *output_support_item) const { + MS_EXCEPTION_IF_NULL(output_support_item); + for (const auto &shape : output_shapes_) { + if (IsScalarShape(shape)) { + output_support_item->emplace_back(kOpFormat_DEFAULT); + } else { + output_support_item->emplace_back(support_format); + } + } +} + +void TbeKernelBroadCastSelecter::AssignSupportFormat(const std::string &support_format_str, + mindspore::kernel::SupportFormat *support_format) const { + MS_EXCEPTION_IF_NULL(support_format); + SupportFormatItem input_support_format; + SupportFormatItem output_support_format; + input_support_format.assign(input_num_, support_format_str); + output_support_format.assign(output_num_, support_format_str); + support_format->input_format.emplace_back(input_support_format); + support_format->output_format.emplace_back(output_support_format); +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.h b/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.h new file mode 100644 index 0000000000..af711ddf29 --- /dev/null +++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.h @@ -0,0 +1,56 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_TBE_KERNEL_BROADCAST_SELECTER_H_ +#define MINDSPORE_CCSRC_KERNEL_TBE_KERNEL_BROADCAST_SELECTER_H_ + +#include +#include +#include +#include "ir/anf.h" +#include "kernel/tbe/tbe_kernel_select/common_utils.h" + +namespace mindspore { +namespace kernel { +class TbeKernelBroadCastSelecter { + public: + explicit TbeKernelBroadCastSelecter(CNodePtr cnode_ptr) : cnode_ptr_(std::move(cnode_ptr)) {} + ~TbeKernelBroadCastSelecter() = default; + bool GetShapeInfo(SupportFormat *support_format); + bool IsBroadCastSupport5HD(SupportFormat *support_format) const; + bool IsBroadCastSupportFracZ(SupportFormat *support_format) const; + bool IsBroadCastSupportC1HWNCoC0(SupportFormat *support_format) const; + bool IsBroadCastSupportFracNZ(SupportFormat *support_format) const; + bool IsBroadCastSupportNDC1HWC0(SupportFormat *support_format) const; + + private: + bool IsSameShape() const; + void PadScalarShape(std::vector *shape) const; + bool Is4DShape(const std::vector &shape) const; + bool IsScalarShape(const std::vector &shape) const; + bool HasScalarInput() const; + void GenOutputSupportFormat(const std::string &support_format, SupportFormatItem *output_support_item) const; + void AssignSupportFormat(const std::string &support_format_str, SupportFormat *support_format) const; + // broadcast + CNodePtr cnode_ptr_; + size_t input_num_{}; + size_t output_num_{}; + std::vector> input_shapes_; + std::vector> output_shapes_; +}; +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_TBE_KERNEL_BROADCAST_SELECTER_HELPER_H diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.cc b/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.cc new file mode 100644 index 0000000000..da0466feaa --- /dev/null +++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.cc @@ -0,0 +1,179 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h" +#include +#include +#include "utils/utils.h" +#include "session/anf_runtime_algorithm.h" +#include "kernel/tbe/tbe_kernel_select/common_utils.h" + +namespace mindspore { +namespace kernel { +constexpr char kKeepDims[] = "keep_dims"; +constexpr char kAxis[] = "axis"; +constexpr char kTypeInt32[] = "Int32"; +constexpr size_t kInputIndex_0 = 0; +constexpr size_t kOutputIndex_0 = 0; +constexpr size_t kChannelN = 0; +constexpr size_t kChannelC = 1; +constexpr size_t kReduceNZMinDim = 3; + +bool TbeKernelReduceSelecter::GetShapeInfo(SupportFormat *support_format) { + MS_EXCEPTION_IF_NULL(support_format); + input_shape_.clear(); + output_shape_.clear(); + axis_.clear(); + auto input_num = AnfAlgo::GetInputTensorNum(cnode_ptr_); + auto output_num = AnfAlgo::GetOutputTensorNum(cnode_ptr_); + if (input_num != 1 || output_num != 1) { + MS_LOG(EXCEPTION) << "Reduce operator only support one input/output, input num: " << input_num + << ", output num: " << output_num; + } + // get input/output shape + input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(cnode_ptr_, kInputIndex_0); + PadScalarShape(&input_shape_); + output_shape_ = AnfAlgo::GetOutputInferShape(cnode_ptr_, kOutputIndex_0); + PadScalarShape(&output_shape_); + // get keep dim attr + GetReduceAttrKeepDim(); + // get axis attr + GetReduceAttrAxis(); + AssignSupportFormat(kOpFormat_DEFAULT, support_format); + return true; +} + +bool TbeKernelReduceSelecter::IsReduceSupport5HD(SupportFormat *support_format) const { + MS_EXCEPTION_IF_NULL(support_format); + if (!Is4DShape(input_shape_)) { + return false; + } + if (!keep_dims_ || axis_.empty()) { + return false; + } + auto reduce_c_axis = std::any_of(axis_.begin(), axis_.end(), [](const size_t &elem) { return (elem == kChannelC); }); + if (reduce_c_axis) { + return false; + } + AssignSupportFormat(kOpFormat_NC1HWC0, support_format); + return true; +} + +bool TbeKernelReduceSelecter::IsReduceSupportNDC1HWC0(SupportFormat *support_format) const { + MS_EXCEPTION_IF_NULL(support_format); + // like to 5HD + return false; +} + +bool TbeKernelReduceSelecter::IsReduceSupportFracZ(SupportFormat *support_format) const { + return IsFracZAndC1HWNCoC0Common(kOpFormat_FRAC_Z, support_format); +} + +bool TbeKernelReduceSelecter::IsReduceSupportC1HWNCoC0(SupportFormat *support_format) const { + return IsFracZAndC1HWNCoC0Common(kOpFormat_C1HWNCoC0, support_format); +} + +bool TbeKernelReduceSelecter::IsReduceSupportFracNZ(SupportFormat *support_format) const { + MS_EXCEPTION_IF_NULL(support_format); + if (input_shape_.size() < kReduceNZMinDim) { + return false; + } + if (axis_.empty()) { + return false; + } + auto reduce_last_axis = std::any_of(axis_.begin(), axis_.end(), [this](const size_t &elem) { + return (elem == (this->input_shape_.size() - 1) || elem == (this->input_shape_.size() - 2)); + }); + if (reduce_last_axis) { + return false; + } + AssignSupportFormat(kOpFormat_FRAC_NZ, support_format); + return true; +} + +bool TbeKernelReduceSelecter::IsFracZAndC1HWNCoC0Common(const std::string &format, + mindspore::kernel::SupportFormat *support_format) const { + MS_EXCEPTION_IF_NULL(support_format); + if (!Is4DShape(input_shape_)) { + return false; + } + if (!keep_dims_ || axis_.empty()) { + return false; + } + auto reduce_n_c_axis = std::any_of(axis_.begin(), axis_.end(), + [](const size_t &elem) { return (elem == kChannelC || elem == kChannelN); }); + if (reduce_n_c_axis) { + return false; + } + AssignSupportFormat(format, support_format); + return true; +} + +void TbeKernelReduceSelecter::GetReduceAttrAxis() { + auto primitive = AnfAlgo::GetCNodePrimitive(cnode_ptr_); + MS_EXCEPTION_IF_NULL(primitive); + auto axis = primitive->GetAttr(kAxis); + if (axis == nullptr) { + MS_LOG(INFO) << "This node does't have axie attr."; + return; + } + auto type = axis->type(); + MS_EXCEPTION_IF_NULL(type); + std::vector axis_list; + if (type->ToString() == kTypeInt32) { + axis_list.emplace_back(GetValue(axis)); + } else { + axis_list = GetValue>(axis); + } + for (const auto &elem : axis_list) { + if (elem < 0) { + axis_.emplace_back(input_shape_.size() + elem); + } else { + axis_.emplace_back(IntToSize(elem)); + } + } +} + +void TbeKernelReduceSelecter::GetReduceAttrKeepDim() { + if (!AnfAlgo::HasNodeAttr(kKeepDims, cnode_ptr_)) { + MS_LOG(INFO) << "This node does't have keep_attr."; + keep_dims_ = false; + return; + } + keep_dims_ = AnfAlgo::GetNodeAttr(cnode_ptr_, kKeepDims); +} + +void TbeKernelReduceSelecter::AssignSupportFormat(const std::string &support_format_str, + mindspore::kernel::SupportFormat *support_format) const { + MS_EXCEPTION_IF_NULL(support_format); + SupportFormatItem input_support_format; + SupportFormatItem output_support_format; + input_support_format.emplace_back(support_format_str); + output_support_format.emplace_back(support_format_str); + support_format->input_format.emplace_back(input_support_format); + support_format->output_format.emplace_back(output_support_format); +} + +bool TbeKernelReduceSelecter::Is4DShape(const std::vector &shape) const { return shape.size() == kShape4dDims; } + +void TbeKernelReduceSelecter::PadScalarShape(std::vector *shape) const { + MS_EXCEPTION_IF_NULL(shape); + if (shape->empty()) { + shape->emplace_back(1); + } +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h b/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h new file mode 100644 index 0000000000..e66525fd64 --- /dev/null +++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h @@ -0,0 +1,52 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_TBE_KERNEL_REDUCE_SELECTER_H_ +#define MINDSPORE_CCSRC_KERNEL_TBE_KERNEL_REDUCE_SELECTER_H_ +#include +#include +#include +#include "ir/anf.h" +#include "kernel/tbe/tbe_kernel_select/common_utils.h" +namespace mindspore { +namespace kernel { +class TbeKernelReduceSelecter { + public: + explicit TbeKernelReduceSelecter(CNodePtr cnode_ptr) : cnode_ptr_(std::move(cnode_ptr)) {} + ~TbeKernelReduceSelecter() = default; + bool GetShapeInfo(SupportFormat *support_format); + bool IsReduceSupport5HD(SupportFormat *support_format) const; + bool IsReduceSupportNDC1HWC0(SupportFormat *support_format) const; + bool IsReduceSupportFracZ(SupportFormat *support_format) const; + bool IsReduceSupportC1HWNCoC0(SupportFormat *support_format) const; + bool IsReduceSupportFracNZ(SupportFormat *support_format) const; + + private: + bool IsFracZAndC1HWNCoC0Common(const std::string &format, SupportFormat *support_format) const; + void GetReduceAttrAxis(); + void GetReduceAttrKeepDim(); + void AssignSupportFormat(const std::string &support_format_str, SupportFormat *support_format) const; + bool Is4DShape(const std::vector &shape) const; + void PadScalarShape(std::vector *shape) const; + CNodePtr cnode_ptr_; + std::vector input_shape_{}; + std::vector output_shape_{}; + std::vector axis_{}; + bool keep_dims_ = false; +}; +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_TBE_KERNEL_REDUCE_SELECTER_H diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_select.cc b/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_select.cc new file mode 100644 index 0000000000..573ad176cf --- /dev/null +++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_select.cc @@ -0,0 +1,624 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/tbe/tbe_kernel_select/tbe_kernel_select.h" +#include +#include +#include +#include +#include "session/anf_runtime_algorithm.h" +#include "kernel/oplib/oplib.h" +#include "kernel/tbe/tbe_kernel_build.h" +#include "nlohmann/json.hpp" +#include "utils/context/ms_context.h" +#include "kernel/tbe/tbe_python_funcs.h" +#include "pre_activate/common/helper.h" +#include "kernel/tbe/tbe_convert_utils.h" +#include "parallel/ops_info/ops_utils.h" +#include "kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.h" +#include "kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h" +#include "kernel/tbe/tbe_kernel_select/common_utils.h" + +namespace mindspore { +namespace kernel { +constexpr auto kName = "name"; +constexpr auto kDtype = "dtype"; +constexpr auto kFormat = "format"; +constexpr auto kPrefixInput = "input"; +constexpr auto kPrefixOutput = "output"; +constexpr char kDynInputKey[] = "dyn_input_sizes"; +constexpr char kParamTypeDynamic[] = "dynamic"; +constexpr char kParamTypeRequre[] = "required"; +constexpr char kParamTypeOptional[] = "optional"; +void TbeMetadataInfo(const CNodePtr &kernel_node, std::vector> *kernel_info_list) { + auto tbe_selecter = TbeKernelSelect(kernel_node, kernel_info_list); + tbe_selecter.TbeMetadataInfoEx(); +} + +TbeKernelSelect::TbeKernelSelect(CNodePtr kernel_node, std::vector> *kernel_info_list) + : cnode_ptr_(std::move(kernel_node)), kernel_info_list_(kernel_info_list) {} + +void TbeKernelSelect::TbeMetadataInfoEx() { + MS_EXCEPTION_IF_NULL(cnode_ptr_); + MS_EXCEPTION_IF_NULL(kernel_info_list_); + node_name_ = AnfAlgo::GetCNodeName(cnode_ptr_); + auto op_info_ptr = OpLib::FindOp(node_name_, kTBE); + if (!op_info_ptr) { + MS_LOG(INFO) << "Warning: Cann't find tbe core opinfo, node type: " << node_name_; + return; + } + MS_LOG(INFO) << "Start to tbe metadata info. node type: " << node_name_ + << ", node name: " << cnode_ptr_->fullname_with_scope(); + OpPattern pattern = op_info_ptr->op_pattern(); + if (pattern == kCommonPattern) { + GetCommonPatternKernelInfo(*op_info_ptr); + } else if (pattern == kDynamicFormatPattern) { + GetDynamicFormatPatternKernelInfo(*op_info_ptr); + } else if (pattern == kFormatAgnosticPattern) { + GetAgnosticPatternKernelInfo(*op_info_ptr); + } else if (pattern == kBroadcastPattern) { + GetBroadcastPatternKernelInfo(*op_info_ptr); + } else if (pattern == kReducePattern) { + GetReducePatternKernelInfo(*op_info_ptr); + } else { + MS_LOG(INFO) << "Warning: op pattern is invailed."; + } + // check support + FilterInVaildKernelInfo(); + MS_LOG(INFO) << "End get kernel build info size: " << kernel_info_list_->size() << ", after tbe select."; +} + +void TbeKernelSelect::GetCommonPatternKernelInfo(const OpInfo &op_info) { + MS_LOG(INFO) << "start."; + // get dynamic inputs + auto primitive = AnfAlgo::GetCNodePrimitive(cnode_ptr_); + MS_EXCEPTION_IF_NULL(primitive); + std::vector dyn_input_sizes; + if (primitive->HasAttr(kDynInputKey)) { + dyn_input_sizes = GetValue>(primitive->GetAttr(kDynInputKey)); + } + // get real input/output num + size_t real_input_tensor_num = AnfAlgo::GetInputTensorNum(cnode_ptr_); + const auto inputs_info = op_info.inputs_ptr(); + size_t real_output_tensor_num = AnfAlgo::GetOutputTensorNum(cnode_ptr_); + const auto outputs_info = op_info.outputs_ptr(); + if (inputs_info.empty() && outputs_info.empty()) { + MS_LOG(EXCEPTION) << "op info input & output is null, please check."; + } + // create kernel build info from opinfo + size_t kernel_build_info_num = + inputs_info.empty() ? outputs_info[0]->dtypes().size() : inputs_info[0]->dtypes().size(); + for (size_t kernel_build_info_index = 0; kernel_build_info_index < kernel_build_info_num; ++kernel_build_info_index) { + auto builder = KernelBuildInfo::KernelBuildInfoBuilder(); + SetTbeBuildCommonInfo(op_info, &builder); + std::vector inputs_format; + std::vector inputs_device_type; + std::vector> inputs_reshape_type; + // input + if (!GenBuilderItem(true, kernel_build_info_index, real_input_tensor_num, inputs_info, dyn_input_sizes, + &inputs_format, &inputs_device_type, &inputs_reshape_type)) { + break; + } + builder.SetInputsDeviceType(inputs_device_type); + builder.SetInputsFormat(inputs_format); + builder.SetInputReshapeType(inputs_reshape_type); + // output + std::vector outputs_format; + std::vector outputs_device_type; + std::vector> outputs_reshape_type; + if (!GenBuilderItem(false, kernel_build_info_index, real_output_tensor_num, outputs_info, dyn_input_sizes, + &outputs_format, &outputs_device_type, &outputs_reshape_type)) { + break; + } + builder.SetOutputsDeviceType(outputs_device_type); + builder.SetOutputsFormat(outputs_format); + builder.SetOutputReshapeType(outputs_reshape_type); + kernel_info_list_->emplace_back(builder.Build()); + } + MS_LOG(INFO) << "end."; +} + +void TbeKernelSelect::GetDynamicFormatPatternKernelInfo(const OpInfo &op_info) { + MS_LOG(INFO) << "start."; + // + OpInfo op_info_new; + CreateNewOpInfo(op_info, &op_info_new); + GetCommonPatternKernelInfo(op_info_new); + MS_LOG(INFO) << "end."; +} + +void TbeKernelSelect::GetAgnosticPatternKernelInfo(const OpInfo &op_info) { + MS_LOG(INFO) << "start."; + if (op_info.inputs_ptr().size() != 1) { + MS_LOG(EXCEPTION) << "AgnosticPattern only support one input."; + } + auto format = AnfAlgo::GetPrevNodeOutputFormat(cnode_ptr_, 0); + if (kOpFormatList.find(format) == kOpFormatList.end()) { + MS_LOG(INFO) << "Got the unknown format " << format; + format = kOpFormat_DEFAULT; + } + SupportFormat support_format; + SupportFormatItem input_item; + SupportFormatItem output_item; + input_item.assign(op_info.inputs_ptr().size(), format); + output_item.assign(op_info.outputs_ptr().size(), format); + support_format.input_format.emplace_back(input_item); + support_format.output_format.emplace_back(output_item); + PrintSupportedFormat(support_format); + OpInfo op_info_new; + CreateNewOpInfo(op_info, support_format, &op_info_new); + GetCommonPatternKernelInfo(op_info_new); + MS_LOG(INFO) << "end."; +} + +void TbeKernelSelect::GetBroadcastPatternKernelInfo(const OpInfo &op_info) { + MS_LOG(INFO) << "start."; + auto broadcast_selecter = TbeKernelBroadCastSelecter(cnode_ptr_); + SupportFormat support_format; + broadcast_selecter.GetShapeInfo(&support_format); + if (!broadcast_selecter.IsBroadCastSupport5HD(&support_format)) { + MS_LOG(INFO) << "Node(" << node_name_ << ") does not support 5HD."; + } + if (!broadcast_selecter.IsBroadCastSupportFracZ(&support_format)) { + MS_LOG(INFO) << "Node(" << node_name_ << ") does not support FracZ."; + } + if (!broadcast_selecter.IsBroadCastSupportC1HWNCoC0(&support_format)) { + MS_LOG(INFO) << "Node(" << node_name_ << ") does not support C1HWNCoC0."; + } + if (!broadcast_selecter.IsBroadCastSupportFracNZ(&support_format)) { + MS_LOG(INFO) << "Node(" << node_name_ << ") does not support FracNZ."; + } + PrintSupportedFormat(support_format); + OpInfo op_info_new; + CreateNewOpInfo(op_info, support_format, &op_info_new); + GetCommonPatternKernelInfo(op_info_new); + MS_LOG(INFO) << "end."; +} + +void TbeKernelSelect::GetReducePatternKernelInfo(const OpInfo &op_info) { + MS_LOG(INFO) << "start."; + auto reduce_selecter = TbeKernelReduceSelecter(cnode_ptr_); + SupportFormat support_format; + reduce_selecter.GetShapeInfo(&support_format); + if (!reduce_selecter.IsReduceSupport5HD(&support_format)) { + MS_LOG(INFO) << "Node (" << node_name_ << ") reduce not support 5HD."; + } + if (reduce_selecter.IsReduceSupportFracZ(&support_format)) { + MS_LOG(INFO) << "Node (" << node_name_ << ") reduce not support FracZ."; + } + if (reduce_selecter.IsReduceSupportC1HWNCoC0(&support_format)) { + MS_LOG(INFO) << "Node (" << node_name_ << ") reduce not support C1HWNCoC0."; + } + if (reduce_selecter.IsReduceSupportFracNZ(&support_format)) { + MS_LOG(INFO) << "Node (" << node_name_ << ") reduce not support FracNZ."; + } + PrintSupportedFormat(support_format); + OpInfo op_info_new; + CreateNewOpInfo(op_info, support_format, &op_info_new); + GetCommonPatternKernelInfo(op_info_new); + MS_LOG(INFO) << "end."; +} + +void TbeKernelSelect::FilterInVaildKernelInfo() { + if (kernel_info_list_->empty()) { + MS_LOG(INFO) << "Warning: get kernel build info failed."; + return; + } + auto kernel_build_info_iter = kernel_info_list_->begin(); + while (kernel_build_info_iter != kernel_info_list_->end()) { + if (!FilterInVaildShape(kernel_build_info_iter)) { + MS_LOG(INFO) << "Filter invaild shape, filter item info: " << (*kernel_build_info_iter)->ToString(); + kernel_build_info_iter = kernel_info_list_->erase(kernel_build_info_iter); + continue; + } + if (!TbeCheckSupported(kernel_build_info_iter)) { + MS_LOG(INFO) << "Check support shape, filter item info: " << (*kernel_build_info_iter)->ToString(); + kernel_build_info_iter = kernel_info_list_->erase(kernel_build_info_iter); + continue; + } + kernel_build_info_iter++; + } +} + +bool TbeKernelSelect::FilterInVaildShape( + const mindspore::kernel::TbeKernelSelect::KernelBuildInfoIter &kernel_build_info_iter) { + MS_EXCEPTION_IF_NULL((*kernel_build_info_iter)); + auto kernel_build_info_inputs_format = (*kernel_build_info_iter)->GetAllInputFormats(); + for (size_t i = 0; i < kernel_build_info_inputs_format.size(); ++i) { + auto shape = AnfAlgo::GetPrevNodeOutputInferShape(cnode_ptr_, i); + auto format = kernel_build_info_inputs_format.at(i); + if (!IsShapeMatchFormat(shape, format)) { + MS_LOG(INFO) << "The " << i << "th input check failed."; + return false; + } + } + auto kernel_build_info_outputs_format = (*kernel_build_info_iter)->GetAllOutputFormats(); + for (size_t j = 0; j < kernel_build_info_outputs_format.size(); ++j) { + auto shape = AnfAlgo::GetOutputInferShape(cnode_ptr_, j); + auto format = kernel_build_info_outputs_format.at(j); + if (!IsShapeMatchFormat(shape, format)) { + MS_LOG(INFO) << "The " << j << "th input check failed."; + return false; + } + } + return true; +} + +bool TbeKernelSelect::IsShapeMatchFormat(const std::vector &shape, const std::string &format) { + if (format == kOpFormat_DEFAULT) { + return true; + } + static std::set kServerNotSupportFormat = {kOpFormat_NC1HWC0_C04, kOpFormat_FRACTAL_Z_C04}; + // if format is default, it remarkes support all format + if (kOpFormatList.find(format) == kOpFormatList.end()) { + MS_LOG(EXCEPTION) << "Got the unknown format " << format; + } + // server not support format with C04 suffix + if (std::find(kServerNotSupportFormat.begin(), kServerNotSupportFormat.end(), format) != + kServerNotSupportFormat.end()) { + MS_LOG(INFO) << "Warning: Server not support format with C04 suffix."; + return false; + } + // not support format: + // 1 NDHWC with shape size != 5 + // 2 FRAC_NZ with shape size < 2 + // 3 !NDHWC with shape size > 4 + if ((format == kOpFormat_NDHWC && shape.size() != kShape5dDims) || + (format == kOpFormat_FRAC_NZ && shape.size() < kShape2dDims) || + (format != kOpFormat_NDHWC && shape.size() > kShape4dDims)) { + MS_LOG(INFO) << "Warning: Shape format check failed, format: " << format << ", size: " << shape.size(); + return false; + } + return true; +} + +bool TbeKernelSelect::TbeCheckSupported( + const mindspore::kernel::TbeKernelSelect::KernelBuildInfoIter &kernel_build_info_iter) { + MS_EXCEPTION_IF_NULL((*kernel_build_info_iter)); + static const std::set kCheckSupportedOpType = {parallel::MATMUL, + parallel::BATCHMATMUL, + parallel::TOPK, + parallel::IN_TOPK, + parallel::PACK, + parallel::GATHER_ND, + parallel::UNSORTEF_SEGMENT_MIND, + parallel::UNSORTEF_SEGMENT_PRODD, + parallel::CAST}; + auto iter = std::find(kCheckSupportedOpType.begin(), kCheckSupportedOpType.end(), node_name_); + if (iter == kCheckSupportedOpType.end()) { + return true; + } + MS_LOG(INFO) << "Check support start."; + // replace kernel_info with current kernel info + auto kernel_build_info_tmp = AnfAlgo::GetSelectKernelBuildInfo(cnode_ptr_); + AnfAlgo::SetSelectKernelBuildInfo(*kernel_build_info_iter, cnode_ptr_.get()); + nlohmann::json kernel_json; + TbeKernelJsonCreator creator(CHECK_SUPPORTED); + bool ret = creator.GenTbeSingleKernelJson(cnode_ptr_, &kernel_json); + if (!ret) { + MS_LOG(EXCEPTION) << "Gen tbe single kernel json for check support failed."; + } + ret = TbePythonFuncs::CheckSupported(kernel_json); + AnfAlgo::SetSelectKernelBuildInfo(kernel_build_info_tmp, cnode_ptr_.get()); + return ret; +} + +void TbeKernelSelect::SetTbeBuildCommonInfo(const mindspore::kernel::OpInfo &op_info, + mindspore::kernel::KernelBuildInfo::KernelBuildInfoBuilder *builder) { + MS_EXCEPTION_IF_NULL(builder); + builder->SetProcessor(AICORE); + std::string fusion_type = op_info.fusion_type(); + if (tbe::GetFusionType(fusion_type) != UNKNOWN_FUSION_TYPE) { + builder->SetFusionType(tbe::GetFusionType(fusion_type)); + } + builder->SetOpPattern(op_info.op_pattern()); + builder->SetKernelType(TBE_KERNEL); +} + +bool TbeKernelSelect::GenBuilderItem(bool is_input, size_t kernel_build_info_index, size_t real_io_tensor_num, + const std::vector> &ios_info, + const std::vector &dyn_input_sizes, std::vector *formats, + std::vector *device_types, std::vector> *reshape_types) { + MS_EXCEPTION_IF_NULL(formats); + MS_EXCEPTION_IF_NULL(device_types); + MS_EXCEPTION_IF_NULL(reshape_types); + size_t dynamic_input_index = 0; + size_t real_io_tensor_index = 0; + size_t io_info_index = 0; + size_t io_info_num = ios_info.size(); + for (; io_info_index < io_info_num && real_io_tensor_index < real_io_tensor_num; io_info_index++) { + std::shared_ptr io_info_item = ios_info[io_info_index]; + auto kernel_build_info_dtype = io_info_item->dtypes().at(kernel_build_info_index); + std::string kernel_build_info_format; + if (!io_info_item->formats().empty()) { + kernel_build_info_format = io_info_item->formats().at(kernel_build_info_index); + } + std::string io_param_type = io_info_item->param_type(); + std::vector reshape_type; + StringToAxisVector(io_info_item->reshape_type(), &reshape_type); + if (io_param_type == kParamTypeDynamic) { + // dynamic io + if (is_input) { + if (dynamic_input_index >= dyn_input_sizes.size()) { + MS_LOG(EXCEPTION) << "dyn_input_sizes attr set error, dynamic_input_index: " << dynamic_input_index + << ", dyn_input_sizes size: " << dyn_input_sizes.size(); + } + int dynamic_input_size = dyn_input_sizes[dynamic_input_index]; + for (int i = 0; i < dynamic_input_size; ++i) { + device_types->emplace_back(tbe::DtypeToTypeId(kernel_build_info_dtype)); + formats->emplace_back(kernel_build_info_format); + reshape_types->emplace_back(reshape_type); + } + dynamic_input_index++; + real_io_tensor_index += dynamic_input_size; + } else { + if (ios_info.size() != 1) { + MS_LOG(EXCEPTION) << "if output is dynamic, so output must has one output."; + } + for (size_t i = 0; i < real_io_tensor_num; ++i) { + device_types->emplace_back(tbe::DtypeToTypeId(kernel_build_info_dtype)); + formats->emplace_back(kernel_build_info_format); + reshape_types->emplace_back(reshape_type); + } + real_io_tensor_index += real_io_tensor_num; + } + } else if (io_param_type == kParamTypeRequre || io_param_type == kParamTypeOptional) { + // requre or optional io + device_types->emplace_back(tbe::DtypeToTypeId(kernel_build_info_dtype)); + formats->emplace_back(kernel_build_info_format); + reshape_types->emplace_back(reshape_type); + real_io_tensor_index++; + } else { + MS_LOG(EXCEPTION) << "op info's param type is not match: " << io_param_type; + } + } + + if (io_info_index != io_info_num) { + MS_LOG(INFO) << "Warning: io_info_index(" << io_info_index << ") != io_info_num(" << io_info_num + << "), this node may has optional input/output."; + } + if (real_io_tensor_index != real_io_tensor_num) { + std::string io_type = is_input ? "inputs " : "outputs"; + MS_LOG(INFO) << node_name_ << "'s " << io_type << "op io info num: " << io_info_num + << ", real io tensor num:" << real_io_tensor_num << "real_io_tensor_index(" << real_io_tensor_index + << ") != real_io_tensor_num(" << real_io_tensor_num << ")"; + return false; + } + return true; +} + +void TbeKernelSelect::StringToAxisVector(const std::string &reshape_type_str, std::vector *reshape_type_vec) { + MS_EXCEPTION_IF_NULL(reshape_type_vec); + for (const auto &c : reshape_type_str) { + switch (c) { + case 'N': + reshape_type_vec->push_back(kernel::N); + break; + case 'C': + reshape_type_vec->push_back(kernel::C); + break; + case 'H': + reshape_type_vec->push_back(kernel::H); + break; + case 'W': + reshape_type_vec->push_back(kernel::W); + break; + default: + MS_LOG(EXCEPTION) << "Unknown axis " << c << "in reshape type."; + } + } +} + +void TbeKernelSelect::CreateNewOpIOInfo(const mindspore::kernel::OpIOInfo &op_io_info, + const std::vector> &support_format_item, size_t index, + mindspore::kernel::OpIOInfo *op_io_info_new) { + MS_EXCEPTION_IF_NULL(op_io_info_new); + op_io_info_new->set_index(op_io_info.index()); + op_io_info_new->set_name(op_io_info.name()); + op_io_info_new->set_param_type(op_io_info.param_type()); + op_io_info_new->set_need_compile(op_io_info.need_compile()); + op_io_info_new->set_reshape_type(op_io_info.reshape_type()); + op_io_info_new->set_shape(op_io_info.shape()); + // dtype + std::vector dtype_new; + auto dtype = op_io_info.dtypes(); + for (size_t i = 0; i < support_format_item.size(); ++i) { + dtype_new.insert(dtype_new.end(), dtype.begin(), dtype.end()); + } + op_io_info_new->set_dtypes(dtype_new); + // format + std::vector format_new; + for (const auto &formats : support_format_item) { + auto format = formats.at(index); + for (size_t j = 0; j < dtype.size(); ++j) { + format_new.emplace_back(format); + } + } + op_io_info_new->set_formats(format_new); +} + +std::vector TbeKernelSelect::SplitStrToVec(const std::string &op_select_json_item) { + const std::map kDynamicFormatMap = { + {"NCHW", "DefaultFormat"}, {"ND", "DefaultFormat"}, {"FRACTAL_Z", "FracZ"}}; + if (op_select_json_item.empty()) { + MS_LOG(EXCEPTION) << "Op select ret item is null."; + } + const char space = ' '; + const char sep = ','; + std::string op_select_tmp = op_select_json_item + ","; + std::vector ret; + auto begin = op_select_tmp.find_first_not_of(space, 0); + auto sep_pos = op_select_tmp.find(sep); + if (begin >= sep_pos) { + MS_LOG(EXCEPTION) << "Select ret json is error."; + } + while (sep_pos != std::string::npos) { + auto obj = op_select_tmp.substr(begin, sep_pos - begin); + if (kDynamicFormatMap.find(obj) != kDynamicFormatMap.end()) { + obj = kDynamicFormatMap.at(obj); + } + ret.emplace_back(obj); + begin = op_select_tmp.find_first_not_of(space, sep_pos + 1); + sep_pos = op_select_tmp.find(sep, begin); + } + return ret; +} + +std::string TbeKernelSelect::OpSelectFormat() { + nlohmann::json kernel_json; + std::string res_json_str; + TbeKernelJsonCreator creator(OP_SELECT_FORMAT); + bool ret = creator.GenTbeSingleKernelJson(cnode_ptr_, &kernel_json); + if (!ret) { + MS_LOG(EXCEPTION) << "GenTbeSingleKernelJson failed."; + } + res_json_str = TbePythonFuncs::OpSelectFormat(kernel_json); + if (res_json_str.empty()) { + MS_LOG(EXCEPTION) << "op select format error."; + } + MS_LOG(INFO) << "Dynamic select foramt response result:" << res_json_str; + return res_json_str; +} + +void TbeKernelSelect::CreateNewOpInfo(const mindspore::kernel::OpInfo &op_info, const SupportFormat &support_format, + mindspore::kernel::OpInfo *op_info_new) { + MS_EXCEPTION_IF_NULL(op_info_new); + if (op_info.inputs_ptr().size() != support_format.input_format[0].size() || + op_info.outputs_ptr().size() != support_format.output_format[0].size()) { + MS_LOG(EXCEPTION) << "BroadCast input/output size not match, op info input size:" << op_info.inputs_ptr().size() + << ", input support size: " << support_format.input_format[0].size() + << ", op info output size: " << op_info.outputs_ptr().size() + << ", output support size: " << support_format.output_format[0].size(); + } + *op_info_new = op_info; + op_info_new->ClearInputs(); + op_info_new->ClearOutputs(); + for (size_t i = 0; i < op_info.inputs_ptr().size(); ++i) { + auto input = op_info.inputs_ptr().at(i); + auto input_new = std::make_shared(); + CreateNewOpIOInfo(*input, support_format.input_format, i, input_new.get()); + op_info_new->add_inputs_ptr(input_new); + } + for (size_t j = 0; j < op_info.outputs_ptr().size(); ++j) { + auto output = op_info.outputs_ptr().at(j); + auto output_new = std::make_shared(); + CreateNewOpIOInfo(*output, support_format.output_format, j, output_new.get()); + op_info_new->add_outputs_ptr(output_new); + } +} + +struct SelectOpIOInfo { + std::string name; + std::vector dtypes; + std::vector formats; +}; + +void TbeKernelSelect::CreateNewOpInfo(const mindspore::kernel::OpInfo &op_info, + mindspore::kernel::OpInfo *op_info_new) { + MS_EXCEPTION_IF_NULL(op_info_new); + auto op_seclect_json = OpSelectFormat(); + if (!op_seclect_json.empty()) { + nlohmann::json json_obj = nlohmann::json::parse(op_seclect_json); + if (!json_obj.is_object()) { + MS_LOG(EXCEPTION) << "JsonStr is not an object, the jsonStr is:" << op_seclect_json; + } + std::vector inputs; + std::vector outputs; + for (const auto &item : json_obj.items()) { + const std::string &item_name = item.key(); + bool is_input = (item_name.find(kPrefixInput) != std::string::npos); + bool is_output = (item_name.find(kPrefixOutput) != std::string::npos); + if (!is_input && !is_output) { + MS_LOG(EXCEPTION) << "op select ret json is error."; + } + if (is_input) { + SelectOpIOInfo select_input; + select_input.name = item.value().at(kName); + std::string input_dtype_item = item.value().at(kDtype); + select_input.dtypes = SplitStrToVec(input_dtype_item); + std::string input_format_item = item.value().at(kFormat); + select_input.formats = SplitStrToVec(input_format_item); + inputs.emplace_back(select_input); + } else if (is_output) { + SelectOpIOInfo select_output; + select_output.name = item.value().at(kName); + std::string input_dtype_item = item.value().at(kDtype); + select_output.dtypes = SplitStrToVec(input_dtype_item); + std::string input_format_item = item.value().at(kFormat); + select_output.formats = SplitStrToVec(input_format_item); + outputs.emplace_back(select_output); + } + } + + if (op_info.inputs_ptr().size() != inputs.size() || op_info.outputs_ptr().size() != outputs.size()) { + MS_LOG(EXCEPTION) << "select format input/output size not equal, please check register."; + } + + *op_info_new = op_info; + op_info_new->ClearInputs(); + op_info_new->ClearOutputs(); + for (size_t i = 0; i < op_info.inputs_ptr().size(); ++i) { + auto input_new = std::make_shared(); + CreateNewOpIOInfo(*op_info.inputs_ptr().at(i), inputs.at(i).dtypes, inputs.at(i).formats, input_new.get()); + op_info_new->add_inputs_ptr(input_new); + } + for (size_t i = 0; i < op_info.outputs_ptr().size(); ++i) { + auto output_new = std::make_shared(); + CreateNewOpIOInfo(*op_info.outputs_ptr().at(i), outputs.at(i).dtypes, outputs.at(i).formats, output_new.get()); + op_info_new->add_outputs_ptr(output_new); + } + } +} + +void TbeKernelSelect::CreateNewOpIOInfo(const mindspore::kernel::OpIOInfo &op_io_info, + const std::vector &support_dtype, + const std::vector &support_format, + mindspore::kernel::OpIOInfo *op_io_info_new) { + MS_EXCEPTION_IF_NULL(op_io_info_new); + op_io_info_new->set_index(op_io_info.index()); + op_io_info_new->set_name(op_io_info.name()); + op_io_info_new->set_param_type(op_io_info.param_type()); + op_io_info_new->set_need_compile(op_io_info.need_compile()); + op_io_info_new->set_reshape_type(op_io_info.reshape_type()); + op_io_info_new->set_shape(op_io_info.shape()); + // dtype && format + op_io_info_new->set_dtypes(support_dtype); + op_io_info_new->set_formats(support_format); +} + +void TbeKernelSelect::PrintSupportedFormat(const SupportFormat &support_format) { + if (support_format.input_format.size() != support_format.output_format.size()) { + MS_LOG(EXCEPTION) << "Input(" << support_format.input_format.size() << ")Output(" + << support_format.output_format.size() << ") size not match."; + } + for (size_t i = 0; i < support_format.input_format.size(); ++i) { + auto input_items = support_format.input_format.at(i); + auto output_items = support_format.output_format.at(i); + std::string print_str = "["; + for (const auto &input : input_items) { + print_str.append(input); + print_str.append(", "); + } + print_str.append("] -->"); + for (const auto &output : output_items) { + print_str.append(output); + print_str.append(", "); + } + MS_LOG(INFO) << "Support format: " << print_str; + } +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_select.h b/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_select.h new file mode 100644 index 0000000000..c400bdbb6f --- /dev/null +++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_select.h @@ -0,0 +1,77 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_TBE_KERNEL_SELECT_H +#define MINDSPORE_TBE_KERNEL_SELECT_H + +#include +#include +#include +#include "kernel/oplib/opinfo.h" +#include "kernel/kernel_build_info.h" +#include "kernel/tbe/tbe_kernel_select/common_utils.h" + +namespace mindspore { +namespace kernel { +void TbeMetadataInfo(const CNodePtr &kernel_node, std::vector> *kernel_info_list); + +class TbeKernelSelect { + using OpInfoPtr = std::shared_ptr; + using KernelBuildInfoIter = std::vector>::iterator; + + public: + TbeKernelSelect(CNodePtr kernel_node, std::vector> *kernel_info_list); + ~TbeKernelSelect() = default; + void TbeMetadataInfoEx(); + + private: + void GetCommonPatternKernelInfo(const OpInfo &op_info); + void GetDynamicFormatPatternKernelInfo(const OpInfo &op_info); + void GetAgnosticPatternKernelInfo(const OpInfo &op_info); + void GetBroadcastPatternKernelInfo(const OpInfo &op_info); + void GetReducePatternKernelInfo(const OpInfo &op_info); + void FilterInVaildKernelInfo(); + bool FilterInVaildShape(const KernelBuildInfoIter &kernel_build_info_iter); + static bool IsShapeMatchFormat(const std::vector &shape, const std::string &format); + bool TbeCheckSupported(const KernelBuildInfoIter &kernel_build_info_iter); + static void SetTbeBuildCommonInfo(const OpInfo &op_info, KernelBuildInfo::KernelBuildInfoBuilder *builder); + bool GenBuilderItem(bool is_input, size_t kernel_build_info_index, size_t real_io_tensor_num, + const std::vector> &ios_info, const std::vector &dyn_input_sizes, + std::vector *formats, std::vector *device_types, + std::vector> *reshape_types); + static void StringToAxisVector(const std::string &reshape_type_str, std::vector *reshape_type_vec); + static void CreateNewOpInfo(const OpInfo &op_info, const SupportFormat &support_format, OpInfo *op_info_new); + static void CreateNewOpIOInfo(const OpIOInfo &op_io_info, + const std::vector> &support_format_item, size_t index, + OpIOInfo *op_io_info_new); + // op select(dynamic) + void CreateNewOpInfo(const mindspore::kernel::OpInfo &op_info, mindspore::kernel::OpInfo *op_info_new); + static void CreateNewOpIOInfo(const OpIOInfo &op_io_info, const std::vector &support_dtype, + const std::vector &support_format, OpIOInfo *op_io_info_new); + static std::vector SplitStrToVec(const std::string &op_select_json_item); + std::string OpSelectFormat(); + + static void PrintSupportedFormat(const SupportFormat &support_format); + + private: + CNodePtr cnode_ptr_; + std::vector> *kernel_info_list_; + std::string node_name_; +}; +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_TBE_KERNEL_SELECT_H diff --git a/mindspore/ccsrc/kernel/tbe/tbe_utils.cc b/mindspore/ccsrc/kernel/tbe/tbe_utils.cc index 5980a0fd88..a930fd3dca 100644 --- a/mindspore/ccsrc/kernel/tbe/tbe_utils.cc +++ b/mindspore/ccsrc/kernel/tbe/tbe_utils.cc @@ -67,12 +67,12 @@ void TbeUtils::SaveJsonInfo(const std::string &json_name, const std::string &inf filewrite << info << std::endl; filewrite.close(); if (nullptr == realpath(path.c_str(), real_path)) { - MS_LOG(DEBUG) << "dir: " << path << "does not exit."; + MS_LOG(INFO) << "dir: " << path << "does not exit."; return; } MS_LOG(INFO) << "real path is: " << real_path; if (chmod(real_path, S_IRUSR) == -1) { - MS_LOG(DEBUG) << "modify file: " << real_path << "to read only fail."; + MS_LOG(INFO) << "modify file: " << real_path << "to read only fail."; } } @@ -93,7 +93,7 @@ KernelPackPtr TbeUtils::SearchCache(const std::string &kernel_name, const std::s // search cache. KernelMeta *bin_map = KernelMeta::GetInstance(); if (bin_map == nullptr) { - MS_LOG(DEBUG) << "kernel cache is invalid."; + MS_LOG(INFO) << "kernel cache is invalid."; return nullptr; } return bin_map->GetKernelPack(kernel_name, processor); @@ -118,14 +118,14 @@ int KernelManager::BinaryRegister(const mindspore::kernel::FlexArray &kernel_buf dev_bin.data = kernel_buffer.contents; auto iter = magic_maps.find(magic); if (iter == magic_maps.end()) { - MS_LOG(DEBUG) << "Invalid magic number: " << magic; + MS_LOG(INFO) << "Invalid magic number: " << magic; return -1; } dev_bin.magic = iter->second; dev_bin.length = kernel_buffer.len; dev_bin.version = 2; if (RT_ERROR_NONE != rtDevBinaryRegister(&dev_bin, module)) { - MS_LOG(DEBUG) << "Call runtime rtDevBinaryRegister error."; + MS_LOG(INFO) << "Call runtime rtDevBinaryRegister error."; return -1; } return 0; @@ -158,14 +158,14 @@ uintptr_t KernelManager::GenFuncStub(const mindspore::kernel::KernelPack &kernel } void *module = nullptr; if (0 != BinaryRegister((*kernel_pack.GetKernel()), &module, magic)) { - MS_LOG(DEBUG) << "Call runtime BinaryRegister error."; + MS_LOG(INFO) << "Call runtime BinaryRegister error."; return 0; } // to diff different funcs. uintptr_t funcstub = ++kernel_stub_gen_; if (RT_ERROR_NONE != rtFunctionRegister(module, reinterpret_cast(funcstub), funcname.c_str(), funcname.c_str(), 0)) { - MS_LOG(DEBUG) << "Call runtime rtFunctionRegister error."; + MS_LOG(INFO) << "Call runtime rtFunctionRegister error."; return 0; } // cache the registered kernelmeta. @@ -236,7 +236,7 @@ KernelPackPtr KernelMeta::GetKernelPack(const std::string &kernel_name, const st (void)cce_json.append(kernel_name).append(kJsonSuffix); ret = std::make_shared(); if (!ret->LoadKernelMeta(cce_json, processor)) { - MS_LOG(DEBUG) << "Read cache json and bin file failed[" << cce_json << "]"; + MS_LOG(INFO) << "Read cache json and bin file failed[" << cce_json << "]"; return nullptr; } kernel_pack_map_[kernel_name] = ret; diff --git a/mindspore/ccsrc/mindrecord/include/common/shard_utils.h b/mindspore/ccsrc/mindrecord/include/common/shard_utils.h index 65a8d53e72..8aa5bdfbda 100644 --- a/mindspore/ccsrc/mindrecord/include/common/shard_utils.h +++ b/mindspore/ccsrc/mindrecord/include/common/shard_utils.h @@ -73,6 +73,10 @@ enum ShardType { kCV = 1, }; +enum TaskType { + kCommonTask = 0, + kPaddedTask = 1, +}; enum SamplerType { kCustomTopNSampler, kCustomTopPercentSampler, kSubsetRandomSampler, kPKSampler }; enum ShuffleType { kShuffleCategory, kShuffleSample }; diff --git a/mindspore/ccsrc/mindrecord/include/shard_column.h b/mindspore/ccsrc/mindrecord/include/shard_column.h index 496e7ec3ea..968d82e717 100644 --- a/mindspore/ccsrc/mindrecord/include/shard_column.h +++ b/mindspore/ccsrc/mindrecord/include/shard_column.h @@ -67,7 +67,7 @@ class ShardColumn { /// \brief get column value by column name MSRStatus GetColumnValueByName(const std::string &column_name, const std::vector &columns_blob, const json &columns_json, const unsigned char **data, - std::unique_ptr *data_ptr, uint64_t *n_bytes, + std::unique_ptr *data_ptr, uint64_t *const n_bytes, ColumnDataType *column_data_type, uint64_t *column_data_type_size, std::vector *column_shape); @@ -88,13 +88,17 @@ class ShardColumn { /// \brief get column value from blob MSRStatus GetColumnFromBlob(const std::string &column_name, const std::vector &columns_blob, const unsigned char **data, std::unique_ptr *data_ptr, - uint64_t *n_bytes); + uint64_t *const n_bytes); + std::pair GetColumnTypeByName(const std::string &column_name, + ColumnDataType *column_data_type, + uint64_t *column_data_type_size, + std::vector *column_shape); - private: /// \brief get column value from json MSRStatus GetColumnFromJson(const std::string &column_name, const json &columns_json, std::unique_ptr *data_ptr, uint64_t *n_bytes); + private: /// \brief get float value from json template MSRStatus GetFloat(std::unique_ptr *data_ptr, const json &json_column_value, bool use_double); @@ -115,7 +119,7 @@ class ShardColumn { /// \brief uncompress integer array column template - static MSRStatus UncompressInt(const uint64_t &column_id, std::unique_ptr *data_ptr, + static MSRStatus UncompressInt(const uint64_t &column_id, std::unique_ptr *const data_ptr, const std::vector &columns_blob, uint64_t *num_bytes, uint64_t shift_idx); /// \brief convert big-endian bytes to unsigned int diff --git a/mindspore/ccsrc/mindrecord/include/shard_distributed_sample.h b/mindspore/ccsrc/mindrecord/include/shard_distributed_sample.h new file mode 100644 index 0000000000..ef0ad738c4 --- /dev/null +++ b/mindspore/ccsrc/mindrecord/include/shard_distributed_sample.h @@ -0,0 +1,53 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDRECORD_INCLUDE_SHARD_DISTRIBUTED_SAMPLE_H_ +#define MINDRECORD_INCLUDE_SHARD_DISTRIBUTED_SAMPLE_H_ + +#include +#include +#include +#include +#include "mindrecord/include/shard_operator.h" +#include "mindrecord/include/shard_shuffle.h" +#include "mindrecord/include/shard_sample.h" + +namespace mindspore { +namespace mindrecord { +class ShardDistributedSample : public ShardSample { + public: + ShardDistributedSample(int num_shards, int shard_id, int no_of_padded_samples, bool shuffle, uint32_t seed); + + ShardDistributedSample(int num_shards, int shard_id, bool shuffle, uint32_t seed); + + void SetNumPaddedSamples(int no_of_padded_samples) { no_of_padded_samples_ = no_of_padded_samples; } + + ~ShardDistributedSample() override{}; + + MSRStatus PreExecute(ShardTask &tasks) override; + + int64_t GetNumSamples(int64_t dataset_size, int64_t num_classes) override; + + private: + bool shuffle_; + int no_of_padded_samples_; + bool first_epoch_; // check (num_sample + num_padded) % num_shards == 0 in first epoch + ShardTask task_; // maintain the input tasks in first epoch +}; +} // namespace mindrecord +} // namespace mindspore + +#endif // MINDRECORD_INCLUDE_SHARD_DISTRIBUTED_SAMPLE_H_ diff --git a/mindspore/ccsrc/mindrecord/include/shard_index_generator.h b/mindspore/ccsrc/mindrecord/include/shard_index_generator.h index f91d0f17a7..b081b7a0a0 100644 --- a/mindspore/ccsrc/mindrecord/include/shard_index_generator.h +++ b/mindspore/ccsrc/mindrecord/include/shard_index_generator.h @@ -91,7 +91,7 @@ class ShardIndexGenerator { INDEX_FIELDS GenerateIndexFields(const std::vector &schema_detail); - MSRStatus ExecuteTransaction(const int &shard_no, const std::pair &db, + MSRStatus ExecuteTransaction(const int &shard_no, std::pair &db, const std::vector &raw_page_ids, const std::map &blob_id_to_page_id); MSRStatus CreateShardNameTable(sqlite3 *db, const std::string &shard_name); diff --git a/mindspore/ccsrc/mindrecord/include/shard_operator.h b/mindspore/ccsrc/mindrecord/include/shard_operator.h index 59c77074a1..f33e3db5f4 100644 --- a/mindspore/ccsrc/mindrecord/include/shard_operator.h +++ b/mindspore/ccsrc/mindrecord/include/shard_operator.h @@ -17,6 +17,7 @@ #ifndef MINDRECORD_INCLUDE_SHARD_OPERATOR_H_ #define MINDRECORD_INCLUDE_SHARD_OPERATOR_H_ +#include #include "mindrecord/include/shard_task.h" namespace mindspore { @@ -37,6 +38,14 @@ class ShardOperator { } return SUCCESS; } + virtual bool HasChildOp() { return child_op_ != nullptr; } + + virtual MSRStatus SetChildOp(std::shared_ptr child_op) { + if (child_op != nullptr) child_op_ = child_op; + return SUCCESS; + } + + virtual std::shared_ptr GetChildOp() { return child_op_; } virtual MSRStatus PreExecute(ShardTask &tasks) { return SUCCESS; } @@ -44,7 +53,10 @@ class ShardOperator { virtual MSRStatus SufExecute(ShardTask &tasks) { return SUCCESS; } - virtual int64_t GetNumSamples(int64_t dataset_size, int64_t num_classes) { return -1; } + virtual int64_t GetNumSamples(int64_t dataset_size, int64_t num_classes) { return 0; } + + private: + std::shared_ptr child_op_ = nullptr; }; } // namespace mindrecord } // namespace mindspore diff --git a/mindspore/ccsrc/mindrecord/include/shard_reader.h b/mindspore/ccsrc/mindrecord/include/shard_reader.h index 8db7761fb8..1f2138d6d5 100644 --- a/mindspore/ccsrc/mindrecord/include/shard_reader.h +++ b/mindspore/ccsrc/mindrecord/include/shard_reader.h @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -44,6 +45,7 @@ #include "mindrecord/include/common/shard_utils.h" #include "mindrecord/include/shard_category.h" #include "mindrecord/include/shard_column.h" +#include "mindrecord/include/shard_distributed_sample.h" #include "mindrecord/include/shard_error.h" #include "mindrecord/include/shard_index_generator.h" #include "mindrecord/include/shard_operator.h" @@ -58,7 +60,8 @@ using ROW_GROUPS = std::tuple>>, std::vector>>; using ROW_GROUP_BRIEF = std::tuple>, std::vector>; -using TASK_RETURN_CONTENT = std::pair, json>>>; +using TASK_RETURN_CONTENT = + std::pair, json>>>>; const int kNumBatchInMap = 1000; // iterator buffer size in row-reader mode const int kNumPageInBuffer = 16; // page buffer size in block-reader mode @@ -78,7 +81,8 @@ class ShardReader { /// \return MSRStatus the status of MSRStatus MSRStatus Open(const std::vector &file_paths, bool load_dataset, int n_consumer = 4, const std::vector &selected_columns = {}, - const std::vector> &operators = {}, const bool &block_reader = false); + const std::vector> &operators = {}, const bool &block_reader = false, + const int num_padded = 0); /// \brief open files and initialize reader, python API /// \param[in] file_paths the path of ONE file, any file in dataset is fine or file list @@ -127,7 +131,7 @@ class ShardReader { /// \param[out] count # of rows /// \return MSRStatus the status of MSRStatus MSRStatus CountTotalRows(const std::vector &file_paths, bool load_dataset, - const std::shared_ptr &op, int64_t *count); + const std::shared_ptr &op, int64_t *count, const int num_padded); /// \brief shuffle task with incremental seed /// \return void @@ -182,7 +186,8 @@ class ShardReader { /// \brief return a row by id /// \return a batch of images and image data - std::vector, json>> GetNextById(const int64_t &task_id, const int32_t &consumer_id); + std::pair, json>>> GetNextById(const int64_t &task_id, + const int32_t &consumer_id); /// \brief return a batch in block-reader mode, given that one is ready /// \return a batch of images and image data @@ -330,6 +335,8 @@ class ShardReader { bool all_in_index_ = true; // if all columns are stored in index-table bool interrupt_ = false; // reader interrupted + int num_padded_; // number of padding samples + // Delivery/Iterator mode begin const std::string kThreadName = "THRD_ITER_"; // prefix of thread name std::vector thread_set_; // thread list diff --git a/mindspore/ccsrc/mindrecord/include/shard_sample.h b/mindspore/ccsrc/mindrecord/include/shard_sample.h index 7905f328f9..a32acbff6e 100644 --- a/mindspore/ccsrc/mindrecord/include/shard_sample.h +++ b/mindspore/ccsrc/mindrecord/include/shard_sample.h @@ -38,22 +38,22 @@ class ShardSample : public ShardOperator { ~ShardSample() override{}; - const std::pair GetPartitions() const; - MSRStatus Execute(ShardTask &tasks) override; MSRStatus SufExecute(ShardTask &tasks) override; int64_t GetNumSamples(int64_t dataset_size, int64_t num_classes) override; - private: + protected: int numerator_; int denominator_; - int no_of_samples_; int partition_id_; + int no_of_samples_; + std::shared_ptr shuffle_op_; + + private: std::vector indices_; SamplerType sampler_type_; - std::shared_ptr shuffle_op_; }; } // namespace mindrecord } // namespace mindspore diff --git a/mindspore/ccsrc/mindrecord/include/shard_sequential_sample.h b/mindspore/ccsrc/mindrecord/include/shard_sequential_sample.h new file mode 100644 index 0000000000..a8ee3a36db --- /dev/null +++ b/mindspore/ccsrc/mindrecord/include/shard_sequential_sample.h @@ -0,0 +1,48 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDRECORD_INCLUDE_SHARD_SEQUENTIAL_SAMPLE_H_ +#define MINDRECORD_INCLUDE_SHARD_SEQUENTIAL_SAMPLE_H_ + +#include +#include +#include +#include +#include "mindrecord/include/shard_sample.h" + +namespace mindspore { +namespace mindrecord { +class ShardSequentialSample : public ShardSample { + public: + ShardSequentialSample(int n, int offset); + + ShardSequentialSample(float per, float per_offset); + + ~ShardSequentialSample() override{}; + + MSRStatus Execute(ShardTask &tasks) override; + + int64_t GetNumSamples(int64_t dataset_size, int64_t num_classes) override; + + private: + int offset_; + float per_; + float per_offset_; +}; +} // namespace mindrecord +} // namespace mindspore + +#endif // MINDRECORD_INCLUDE_SHARD_SEQUENTIAL_SAMPLE_H_ diff --git a/mindspore/ccsrc/mindrecord/include/shard_shuffle.h b/mindspore/ccsrc/mindrecord/include/shard_shuffle.h index a9c54e6239..adb172bdcc 100644 --- a/mindspore/ccsrc/mindrecord/include/shard_shuffle.h +++ b/mindspore/ccsrc/mindrecord/include/shard_shuffle.h @@ -26,12 +26,20 @@ class ShardShuffle : public ShardOperator { public: explicit ShardShuffle(uint32_t seed = 0, ShuffleType shuffle_type = kShuffleCategory); + ShardShuffle(uint32_t seed, int64_t no_of_samples, bool replacement, bool reshuffle_each_epoch, + ShuffleType shuffle_type = kShuffleSample); + ~ShardShuffle() override{}; MSRStatus Execute(ShardTask &tasks) override; + int64_t GetNumSamples(int64_t dataset_size, int64_t num_classes) override; + private: uint32_t shuffle_seed_; + int64_t no_of_samples_; + bool replacement_; + bool reshuffle_each_epoch_; ShuffleType shuffle_type_; }; } // namespace mindrecord diff --git a/mindspore/ccsrc/mindrecord/include/shard_task.h b/mindspore/ccsrc/mindrecord/include/shard_task.h index d48c25c9cd..4a12eb9e45 100644 --- a/mindspore/ccsrc/mindrecord/include/shard_task.h +++ b/mindspore/ccsrc/mindrecord/include/shard_task.h @@ -17,6 +17,7 @@ #ifndef MINDRECORD_INCLUDE_SHARD_TASK_H_ #define MINDRECORD_INCLUDE_SHARD_TASK_H_ +#include #include #include #include @@ -27,11 +28,20 @@ namespace mindspore { namespace mindrecord { class ShardTask { public: + ShardTask(); + + ShardTask(const ShardTask &task); // copy construction + + ShardTask &operator=(const ShardTask &task); // assignment operator + + ~ShardTask() = default; + void MakePerm(); - void InsertTask(int shard_id, int group_id, const std::vector &offset, const json &label); + void InsertTask(TaskType task_type, int shard_id, int group_id, const std::vector &offset, + const json &label); - void InsertTask(std::tuple, std::vector, json> task); + void InsertTask(std::tuple, std::vector, json> task); void PopBack(); @@ -39,16 +49,17 @@ class ShardTask { uint32_t SizeOfRows() const; - std::tuple, std::vector, json> &GetTaskByID(size_t id); + std::tuple, std::vector, json> &GetTaskByID(size_t id); - std::tuple, std::vector, json> &GetRandomTask(); + std::tuple, std::vector, json> &GetRandomTask(); static ShardTask Combine(std::vector &category_tasks, bool replacement, int64_t num_elements); - uint32_t categories = 1; + uint32_t categories; - std::vector, std::vector, json>> task_list_; std::vector permutation_; + + std::vector, std::vector, json>> task_list_; }; } // namespace mindrecord } // namespace mindspore diff --git a/mindspore/ccsrc/mindrecord/io/shard_index_generator.cc b/mindspore/ccsrc/mindrecord/io/shard_index_generator.cc index 905968e3a2..16c730bd4c 100644 --- a/mindspore/ccsrc/mindrecord/io/shard_index_generator.cc +++ b/mindspore/ccsrc/mindrecord/io/shard_index_generator.cc @@ -335,15 +335,15 @@ MSRStatus ShardIndexGenerator::BindParameterExecuteSQL( int index = sqlite3_bind_parameter_index(stmt, common::SafeCStr(place_holder)); if (field_type == "INTEGER") { - if (sqlite3_bind_int(stmt, index, std::stoi(field_value)) != SQLITE_OK) { + if (sqlite3_bind_int64(stmt, index, std::stoll(field_value)) != SQLITE_OK) { MS_LOG(ERROR) << "SQL error: could not bind parameter, index: " << index - << ", field value: " << std::stoi(field_value); + << ", field value: " << std::stoll(field_value); return FAILED; } } else if (field_type == "NUMERIC") { - if (sqlite3_bind_double(stmt, index, std::stod(field_value)) != SQLITE_OK) { + if (sqlite3_bind_double(stmt, index, std::stold(field_value)) != SQLITE_OK) { MS_LOG(ERROR) << "SQL error: could not bind parameter, index: " << index - << ", field value: " << std::stoi(field_value); + << ", field value: " << std::stold(field_value); return FAILED; } } else if (field_type == "NULL") { @@ -514,7 +514,7 @@ INDEX_FIELDS ShardIndexGenerator::GenerateIndexFields(const std::vector &s return {SUCCESS, std::move(fields)}; } -MSRStatus ShardIndexGenerator::ExecuteTransaction(const int &shard_no, const std::pair &db, +MSRStatus ShardIndexGenerator::ExecuteTransaction(const int &shard_no, std::pair &db, const std::vector &raw_page_ids, const std::map &blob_id_to_page_id) { // Add index data to database @@ -556,6 +556,7 @@ MSRStatus ShardIndexGenerator::ExecuteTransaction(const int &shard_no, const std MS_LOG(ERROR) << "Close database failed"; return FAILED; } + db.second = nullptr; return SUCCESS; } diff --git a/mindspore/ccsrc/mindrecord/io/shard_reader.cc b/mindspore/ccsrc/mindrecord/io/shard_reader.cc index fcb588fff8..99fa0c447d 100644 --- a/mindspore/ccsrc/mindrecord/io/shard_reader.cc +++ b/mindspore/ccsrc/mindrecord/io/shard_reader.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "mindrecord/include/shard_distributed_sample.h" #include "mindrecord/include/shard_reader.h" #include "common/utils.h" @@ -45,6 +46,7 @@ ShardReader::ShardReader() { row_id_ = 0; num_blocks_ = 0; block_reader_ = false; + num_padded_ = 0; } std::pair> ShardReader::GetMeta(const std::string &file_path, json &meta_data) { @@ -113,6 +115,7 @@ MSRStatus ShardReader::Init(const std::vector &file_paths, bool loa MS_LOG(ERROR) << "Error in select statement, sql: " << sql << ", error: " << errmsg; sqlite3_free(errmsg); sqlite3_close(db); + db = nullptr; return FAILED; } else { MS_LOG(DEBUG) << "Get " << static_cast(name.size()) << " records from index."; @@ -121,6 +124,7 @@ MSRStatus ShardReader::Init(const std::vector &file_paths, bool loa MS_LOG(ERROR) << "DB file can not match file " << file; sqlite3_free(errmsg); sqlite3_close(db); + db = nullptr; return FAILED; } } @@ -218,7 +222,11 @@ void ShardReader::FileStreamsOperator() { } for (int i = static_cast(database_paths_.size()) - 1; i >= 0; --i) { if (database_paths_[i] != nullptr) { - (void)sqlite3_close(database_paths_[i]); + auto ret = sqlite3_close(database_paths_[i]); + if (ret != SQLITE_OK) { + MS_LOG(ERROR) << "Close db failed. Error code: " << ret << "."; + } + database_paths_[i] = nullptr; } } } @@ -346,6 +354,7 @@ MSRStatus ShardReader::ReadAllRowsInShard(int shard_id, const std::string &sql, MS_LOG(ERROR) << "Error in select statement, sql: " << sql << ", error: " << errmsg; sqlite3_free(errmsg); sqlite3_close(db); + db = nullptr; return FAILED; } MS_LOG(INFO) << "Get " << static_cast(labels.size()) << " records from shard " << shard_id << " index."; @@ -399,6 +408,7 @@ void ShardReader::GetClassesInShard(sqlite3 *db, int shard_id, const std::string if (ret != SQLITE_OK) { sqlite3_free(errmsg); sqlite3_close(db); + db = nullptr; MS_LOG(ERROR) << "Error in select sql statement, sql:" << common::SafeCStr(sql) << ", error: " << errmsg; return; } @@ -523,6 +533,7 @@ std::vector> ShardReader::GetImageOffset(int page_id, int MS_LOG(ERROR) << "Error in select statement, sql: " << sql << ", error: " << errmsg; sqlite3_free(errmsg); sqlite3_close(db); + db = nullptr; return std::vector>(); } else { MS_LOG(DEBUG) << "Get " << static_cast(image_offsets.size()) << "records from index."; @@ -662,6 +673,7 @@ std::pair> ShardReader::GetLabelsFromPage( MS_LOG(ERROR) << "Error in select statement, sql: " << sql << ", error: " << errmsg; sqlite3_free(errmsg); sqlite3_close(db); + db = nullptr; return {FAILED, {}}; } MS_LOG(DEBUG) << "Get " << label_offsets.size() << "records from index."; @@ -698,6 +710,7 @@ std::pair> ShardReader::GetLabels(int page_id, int MS_LOG(ERROR) << "Error in select statement, sql: " << sql << ", error: " << errmsg; sqlite3_free(errmsg); sqlite3_close(db); + db = nullptr; return {FAILED, {}}; } else { MS_LOG(DEBUG) << "Get " << static_cast(labels.size()) << "records from index."; @@ -790,23 +803,51 @@ int64_t ShardReader::GetNumClasses(const std::string &category_field) { } MSRStatus ShardReader::CountTotalRows(const std::vector &file_paths, bool load_dataset, - const std::shared_ptr &op, int64_t *count) { + const std::shared_ptr &ops, int64_t *count, const int num_padded) { if (SUCCESS != Init(file_paths, load_dataset)) { return FAILED; } int64_t num_samples = num_rows_; - if (std::dynamic_pointer_cast(op)) { - auto category_op = std::dynamic_pointer_cast(op); - std::string category_field = category_op->GetCategoryField(); - auto num_classes = GetNumClasses(category_field); - num_samples = category_op->GetNumSamples(num_rows_, num_classes); - } else if (std::dynamic_pointer_cast(op)) { - num_samples = op->GetNumSamples(num_rows_, 0); - } else { - } - if (-1 == num_samples) { - MS_LOG(ERROR) << "Failed to get dataset size."; - return FAILED; + bool root = true; + std::stack> stack_ops; + std::shared_ptr op(ops); + while (op != nullptr) { + stack_ops.push(op); + op = op->GetChildOp(); + } + while (!stack_ops.empty()) { + op = stack_ops.top(); + stack_ops.pop(); + if (std::dynamic_pointer_cast(op)) { + num_samples = op->GetNumSamples(num_samples, 0); + if (num_padded > 0 && root == true) { + num_samples += num_padded; + MS_LOG(DEBUG) << "Padding samples work on shuffle sampler."; + root = false; + } + } else if (std::dynamic_pointer_cast(op)) { + auto category_op = std::dynamic_pointer_cast(op); + std::string category_field = category_op->GetCategoryField(); + auto num_classes = GetNumClasses(category_field); + num_samples = category_op->GetNumSamples(num_samples, num_classes); + } else if (std::dynamic_pointer_cast(op)) { + if (std::dynamic_pointer_cast(op)) { + auto sampler_op = std::dynamic_pointer_cast(op); + if (root == true) { + sampler_op->SetNumPaddedSamples(num_padded); + num_samples = op->GetNumSamples(num_samples, 0); + if (-1 == num_samples) { + MS_LOG(ERROR) << "Dataset size plus number of padded samples is not divisible by number of shards."; + return FAILED; + } + root = false; + } + } else { + num_samples = op->GetNumSamples(num_samples, 0); + } + } else { + if (num_padded > 0) num_samples += num_padded; + } } *count = num_samples; return SUCCESS; @@ -814,7 +855,8 @@ MSRStatus ShardReader::CountTotalRows(const std::vector &file_paths MSRStatus ShardReader::Open(const std::vector &file_paths, bool load_dataset, int n_consumer, const std::vector &selected_columns, - const std::vector> &operators, const bool &block_reader) { + const std::vector> &operators, const bool &block_reader, + int num_padded) { // Open file and set header by ShardReader auto ret = Init(file_paths, load_dataset); if (SUCCESS != ret) { @@ -844,6 +886,7 @@ MSRStatus ShardReader::Open(const std::vector &file_paths, bool loa // Initialize argument shard_count_ = static_cast(file_paths_.size()); n_consumer_ = n_consumer; + num_padded_ = num_padded; operators_ = operators; @@ -935,7 +978,7 @@ MSRStatus ShardReader::CreateTasksByBlock(const std::vector(rg); auto group_id = std::get<1>(rg); auto n_Rows = std::get<3>(rg); - tasks_.InsertTask(shard_id, group_id, std::vector{n_Rows}, json{}); + tasks_.InsertTask(TaskType::kCommonTask, shard_id, group_id, std::vector{n_Rows}, json{}); } return SUCCESS; } @@ -986,7 +1029,7 @@ MSRStatus ShardReader::CreateTasksByCategory(const std::vector(details)[iStart], + categoryTasks[categoryNo].InsertTask(TaskType::kCommonTask, shard_id, group_id, std::get<4>(details)[iStart], std::get<5>(details)[iStart]); category_index++; } @@ -1014,7 +1057,7 @@ MSRStatus ShardReader::CreateTasksByRow(const std::vector{offsets[shard_id][i][2], offsets[shard_id][i][3]}, local_columns[shard_id][i]); } @@ -1044,6 +1087,11 @@ MSRStatus ShardReader::CreateTasks(const std::vector 0) { + for (int i = 0; i < num_padded_; ++i) { + tasks_.InsertTask(TaskType::kPaddedTask, 0, 0, {}, json()); + } + } } else { if (SUCCESS != CreateTasksByCategory(row_group_summary, operators[category_operator])) { return FAILED; @@ -1070,18 +1118,27 @@ MSRStatus ShardReader::CreateTasks(const std::vector= static_cast(tasks_.Size())) { - return std::make_pair(FAILED, std::vector, json>>()); + return std::make_pair(FAILED, + std::make_pair(TaskType::kCommonTask, std::vector, json>>())); } // Pick up task from task list auto task = tasks_.GetTaskByID(tasks_.permutation_[task_id]); - auto shard_id = std::get<0>(std::get<0>(task)); - auto group_id = std::get<1>(std::get<0>(task)); - auto addr = std::get<1>(task); + // check task type + auto task_type = std::get<0>(task); + if (task_type == TaskType::kPaddedTask) { + return std::make_pair(SUCCESS, + std::make_pair(TaskType::kPaddedTask, std::vector, json>>())); + } + + auto shard_id = std::get<0>(std::get<1>(task)); + auto group_id = std::get<1>(std::get<1>(task)); + auto addr = std::get<2>(task); const auto &ret = shard_header_->GetPageByGroupId(group_id, shard_id); if (SUCCESS != ret.first) { - return std::make_pair(FAILED, std::vector, json>>()); + return std::make_pair(FAILED, + std::make_pair(TaskType::kCommonTask, std::vector, json>>())); } const std::shared_ptr &page = ret.second; @@ -1093,7 +1150,8 @@ TASK_RETURN_CONTENT ShardReader::ConsumerOneTask(int task_id, uint32_t consumer_ if (!io_seekg.good() || io_seekg.fail() || io_seekg.bad()) { MS_LOG(ERROR) << "File seekg failed"; file_streams_random_[consumer_id][shard_id]->close(); - return std::make_pair(FAILED, std::vector, json>>()); + return std::make_pair(FAILED, + std::make_pair(TaskType::kCommonTask, std::vector, json>>())); } auto &io_read = @@ -1101,14 +1159,15 @@ TASK_RETURN_CONTENT ShardReader::ConsumerOneTask(int task_id, uint32_t consumer_ if (!io_read.good() || io_read.fail() || io_read.bad()) { MS_LOG(ERROR) << "File read failed"; file_streams_random_[consumer_id][shard_id]->close(); - return std::make_pair(FAILED, std::vector, json>>()); + return std::make_pair(FAILED, + std::pair(TaskType::kCommonTask, std::vector, json>>())); } // Deliver batch data to output map std::vector, json>> batch; - batch.emplace_back(std::move(images), std::move(std::get<2>(task))); + batch.emplace_back(std::move(images), std::move(std::get<3>(task))); - return std::make_pair(SUCCESS, std::move(batch)); + return std::make_pair(SUCCESS, std::make_pair(TaskType::kCommonTask, std::move(batch))); } MSRStatus ShardReader::ConsumerByRow(int consumer_id) { @@ -1133,7 +1192,7 @@ MSRStatus ShardReader::ConsumerByRow(int consumer_id) { if (SUCCESS != ret.first) { return FAILED; } - const auto &batch = ret.second; + const auto &batch = (ret.second).second; // Hanging if maximum map size exceeded // otherwise, set batch data in map { @@ -1193,8 +1252,8 @@ MSRStatus ShardReader::ConsumerByBlock(int consumer_id) { // Pick up task from task list auto task = tasks_.GetTaskByID(tasks_.permutation_[task_id]); - auto shard_id = std::get<0>(std::get<0>(task)); - auto group_id = std::get<1>(std::get<0>(task)); + auto shard_id = std::get<0>(std::get<1>(task)); + auto group_id = std::get<1>(std::get<1>(task)); auto row_group_brief = ReadRowGroupBrief(group_id, shard_id, selected_columns_); if (SUCCESS != std::get<0>(row_group_brief)) { return FAILED; @@ -1302,17 +1361,17 @@ std::vector, json>> ShardReader::GetNext() { return *res; } -std::vector, json>> ShardReader::GetNextById(const int64_t &task_id, - const int32_t &consumer_id) { +std::pair, json>>> ShardReader::GetNextById( + const int64_t &task_id, const int32_t &consumer_id) { if (interrupt_) { - return std::vector, json>>(); + return std::make_pair(TaskType::kCommonTask, std::vector, json>>()); } if (block_reader_) { - return GetBlockNext(); + return std::make_pair(TaskType::kCommonTask, GetBlockNext()); } const auto &ret = ConsumerOneTask(task_id, consumer_id); if (SUCCESS != ret.first) { - return std::vector, json>>(); + return std::make_pair(TaskType::kCommonTask, std::vector, json>>()); } return std::move(ret.second); } @@ -1364,12 +1423,26 @@ void ShardReader::Reset() { } void ShardReader::ShuffleTask() { + if (block_reader_) return; + // exist shuffle and distributed sampler in ops, skip shuffle + bool has_sharding = false; for (const auto &op : operators_) { - if (block_reader_ || !std::dynamic_pointer_cast(op)) continue; - if (SUCCESS != (*op)(tasks_)) { - MS_LOG(WARNING) << "Reshuffle reader tasks failed."; + if (std::dynamic_pointer_cast(op)) { + has_sharding = true; } } + for (const auto &op : operators_) { + if (std::dynamic_pointer_cast(op) && has_sharding == false) { + if (SUCCESS != (*op)(tasks_)) { + MS_LOG(WARNING) << "Redo randomSampler failed."; + } + } else if (std::dynamic_pointer_cast(op)) { + if (SUCCESS != (*op)(tasks_)) { + MS_LOG(WARNING) << "Redo distributeSampler failed."; + } + } + } + if (tasks_.permutation_.empty()) tasks_.MakePerm(); } } // namespace mindrecord diff --git a/mindspore/ccsrc/mindrecord/io/shard_segment.cc b/mindspore/ccsrc/mindrecord/io/shard_segment.cc index 86c79ca05a..fb1120b178 100644 --- a/mindspore/ccsrc/mindrecord/io/shard_segment.cc +++ b/mindspore/ccsrc/mindrecord/io/shard_segment.cc @@ -43,6 +43,7 @@ std::pair> ShardSegment::GetCategoryFields() { MS_LOG(ERROR) << "Error in select statement, sql: " << sql << ", error: " << errmsg; sqlite3_free(errmsg); sqlite3_close(database_paths_[0]); + database_paths_[0] = nullptr; return {FAILED, vector{}}; } else { MS_LOG(INFO) << "Get " << static_cast(field_names.size()) << " records from index."; @@ -53,6 +54,7 @@ std::pair> ShardSegment::GetCategoryFields() { if (field_names[idx].size() < 2) { sqlite3_free(errmsg); sqlite3_close(database_paths_[0]); + database_paths_[0] = nullptr; return {FAILED, vector{}}; } candidate_category_fields_.push_back(field_names[idx][1]); @@ -107,6 +109,7 @@ std::pair>> ShardSegmen MS_LOG(ERROR) << "Error in select statement, sql: " << sql << ", error: " << errmsg; sqlite3_free(errmsg); sqlite3_close(db); + db = nullptr; return {FAILED, std::vector>()}; } else { MS_LOG(INFO) << "Get " << static_cast(field_count.size()) << " records from index."; diff --git a/mindspore/ccsrc/mindrecord/io/shard_writer.cc b/mindspore/ccsrc/mindrecord/io/shard_writer.cc index 9756b475e5..913caab550 100644 --- a/mindspore/ccsrc/mindrecord/io/shard_writer.cc +++ b/mindspore/ccsrc/mindrecord/io/shard_writer.cc @@ -90,7 +90,7 @@ MSRStatus ShardWriter::OpenDataFiles(bool append) { fs->close(); // open the mindrecord file to write - fs->open(common::SafeCStr(file), std::ios::out | std::ios::binary); + fs->open(common::SafeCStr(file), std::ios::out | std::ios::in | std::ios::binary | std::ios::trunc); if (!fs->good()) { MS_LOG(ERROR) << "MindRecord file could not opened."; return FAILED; diff --git a/mindspore/ccsrc/mindrecord/meta/shard_category.cc b/mindspore/ccsrc/mindrecord/meta/shard_category.cc index dfca92a08c..bd427a330a 100644 --- a/mindspore/ccsrc/mindrecord/meta/shard_category.cc +++ b/mindspore/ccsrc/mindrecord/meta/shard_category.cc @@ -41,7 +41,7 @@ int64_t ShardCategory::GetNumSamples(int64_t dataset_size, int64_t num_classes) if (dataset_size > 0 && num_classes > 0 && num_categories_ > 0 && num_elements_ > 0) { return std::min(num_categories_, num_classes) * num_elements_; } - return -1; + return 0; } } // namespace mindrecord } // namespace mindspore diff --git a/mindspore/ccsrc/mindrecord/meta/shard_column.cc b/mindspore/ccsrc/mindrecord/meta/shard_column.cc index 86ad0c96d7..28dc243e17 100644 --- a/mindspore/ccsrc/mindrecord/meta/shard_column.cc +++ b/mindspore/ccsrc/mindrecord/meta/shard_column.cc @@ -66,9 +66,28 @@ ShardColumn::ShardColumn(const std::shared_ptr &shard_header, bool num_blob_column_ = blob_column_.size(); } +std::pair ShardColumn::GetColumnTypeByName(const std::string &column_name, + ColumnDataType *column_data_type, + uint64_t *column_data_type_size, + std::vector *column_shape) { + // Skip if column not found + auto column_category = CheckColumnName(column_name); + if (column_category == ColumnNotFound) { + return {FAILED, ColumnNotFound}; + } + + // Get data type and size + auto column_id = column_name_id_[column_name]; + *column_data_type = column_data_type_[column_id]; + *column_data_type_size = ColumnDataTypeSize[*column_data_type]; + *column_shape = column_shape_[column_id]; + + return {SUCCESS, column_category}; +} + MSRStatus ShardColumn::GetColumnValueByName(const std::string &column_name, const std::vector &columns_blob, const json &columns_json, const unsigned char **data, - std::unique_ptr *data_ptr, uint64_t *n_bytes, + std::unique_ptr *data_ptr, uint64_t *const n_bytes, ColumnDataType *column_data_type, uint64_t *column_data_type_size, std::vector *column_shape) { // Skip if column not found @@ -231,7 +250,7 @@ MSRStatus ShardColumn::GetInt(std::unique_ptr *data_ptr, const MSRStatus ShardColumn::GetColumnFromBlob(const std::string &column_name, const std::vector &columns_blob, const unsigned char **data, std::unique_ptr *data_ptr, - uint64_t *n_bytes) { + uint64_t *const n_bytes) { uint64_t offset_address = 0; auto column_id = column_name_id_[column_name]; if (GetColumnAddressInBlock(column_id, columns_blob, n_bytes, &offset_address) == FAILED) { @@ -304,7 +323,7 @@ std::vector ShardColumn::CompressBlob(const std::vector &blob) } vector ShardColumn::CompressInt(const vector &src_bytes, const IntegerType &int_type) { - uint64_t i_size = kUnsignedOne << int_type; + uint64_t i_size = kUnsignedOne << static_cast(int_type); // Get number of elements uint64_t src_n_int = src_bytes.size() / i_size; // Calculate bitmap size (bytes) @@ -325,20 +344,20 @@ vector ShardColumn::CompressInt(const vector &src_bytes, const // Initialize destination data type IntegerType dst_int_type = kInt8Type; // Shift to next int position - uint64_t pos = i * (kUnsignedOne << int_type); + uint64_t pos = i * (kUnsignedOne << static_cast(int_type)); // Narrow down this int int64_t i_n = BytesLittleToMinIntType(src_bytes, pos, int_type, &dst_int_type); // Write this int to destination blob uint64_t u_n = *reinterpret_cast(&i_n); auto temp_bytes = UIntToBytesLittle(u_n, dst_int_type); - for (uint64_t j = 0; j < (kUnsignedOne << dst_int_type); j++) { + for (uint64_t j = 0; j < (kUnsignedOne << static_cast(dst_int_type)); j++) { dst_bytes[i_dst++] = temp_bytes[j]; } // Update date type in bit map dst_bytes[i / kNumDataOfByte + kBytesOfColumnLen] |= - (dst_int_type << (kDataTypeBits * (kNumDataOfByte - kUnsignedOne - (i % kNumDataOfByte)))); + (static_cast(dst_int_type) << (kDataTypeBits * (kNumDataOfByte - kUnsignedOne - (i % kNumDataOfByte)))); } // Resize destination blob dst_bytes.resize(i_dst); @@ -366,7 +385,7 @@ MSRStatus ShardColumn::GetColumnAddressInBlock(const uint64_t &column_id, const } template -MSRStatus ShardColumn::UncompressInt(const uint64_t &column_id, std::unique_ptr *data_ptr, +MSRStatus ShardColumn::UncompressInt(const uint64_t &column_id, std::unique_ptr *const data_ptr, const std::vector &columns_blob, uint64_t *num_bytes, uint64_t shift_idx) { auto num_elements = BytesBigToUInt64(columns_blob, shift_idx, kInt32Type); @@ -387,7 +406,10 @@ MSRStatus ShardColumn::UncompressInt(const uint64_t &column_id, std::unique_ptr< auto data = reinterpret_cast(array_data.get()); *data_ptr = std::make_unique(*num_bytes); - memcpy(data_ptr->get(), data, *num_bytes); + int ret_code = memcpy_s(data_ptr->get(), *num_bytes, data, *num_bytes); + if (ret_code != 0) { + MS_LOG(ERROR) << "Failed to copy data!"; + } return SUCCESS; } @@ -395,14 +417,14 @@ MSRStatus ShardColumn::UncompressInt(const uint64_t &column_id, std::unique_ptr< uint64_t ShardColumn::BytesBigToUInt64(const std::vector &bytes_array, const uint64_t &pos, const IntegerType &i_type) { uint64_t result = 0; - for (uint64_t i = 0; i < (kUnsignedOne << i_type); i++) { + for (uint64_t i = 0; i < (kUnsignedOne << static_cast(i_type)); i++) { result = (result << kBitsOfByte) + bytes_array[pos + i]; } return result; } std::vector ShardColumn::UIntToBytesBig(uint64_t value, const IntegerType &i_type) { - uint64_t n_bytes = kUnsignedOne << i_type; + uint64_t n_bytes = kUnsignedOne << static_cast(i_type); std::vector result(n_bytes, 0); for (uint64_t i = 0; i < n_bytes; i++) { result[n_bytes - 1 - i] = value & std::numeric_limits::max(); @@ -412,7 +434,7 @@ std::vector ShardColumn::UIntToBytesBig(uint64_t value, const IntegerTy } std::vector ShardColumn::UIntToBytesLittle(uint64_t value, const IntegerType &i_type) { - uint64_t n_bytes = kUnsignedOne << i_type; + uint64_t n_bytes = kUnsignedOne << static_cast(i_type); std::vector result(n_bytes, 0); for (uint64_t i = 0; i < n_bytes; i++) { result[i] = value & std::numeric_limits::max(); @@ -424,8 +446,9 @@ std::vector ShardColumn::UIntToBytesLittle(uint64_t value, const Intege int64_t ShardColumn::BytesLittleToMinIntType(const std::vector &bytes_array, const uint64_t &pos, const IntegerType &src_i_type, IntegerType *dst_i_type) { uint64_t u_temp = 0; - for (uint64_t i = 0; i < (kUnsignedOne << src_i_type); i++) { - u_temp = (u_temp << kBitsOfByte) + bytes_array[pos + (kUnsignedOne << src_i_type) - kUnsignedOne - i]; + for (uint64_t i = 0; i < (kUnsignedOne << static_cast(src_i_type)); i++) { + u_temp = (u_temp << kBitsOfByte) + + bytes_array[pos + (kUnsignedOne << static_cast(src_i_type)) - kUnsignedOne - i]; } int64_t i_out; diff --git a/mindspore/ccsrc/mindrecord/meta/shard_distributed_sample.cc b/mindspore/ccsrc/mindrecord/meta/shard_distributed_sample.cc new file mode 100644 index 0000000000..b7e890da7c --- /dev/null +++ b/mindspore/ccsrc/mindrecord/meta/shard_distributed_sample.cc @@ -0,0 +1,79 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mindrecord/include/shard_distributed_sample.h" + +using mindspore::LogStream; +using mindspore::ExceptionType::NoExceptionType; +using mindspore::MsLogLevel::ERROR; + +namespace mindspore { +namespace mindrecord { +ShardDistributedSample::ShardDistributedSample(int num_shards, int shard_id, int no_of_padded_samples, bool shuffle, + uint32_t seed) + : ShardSample(1, num_shards, shard_id), + shuffle_(shuffle), + no_of_padded_samples_(no_of_padded_samples), + first_epoch_(true) { + shuffle_op_ = std::make_shared(seed, kShuffleSample); +} + +ShardDistributedSample::ShardDistributedSample(int num_shards, int shard_id, bool shuffle, uint32_t seed) + : ShardDistributedSample(num_shards, shard_id, 0, shuffle, seed) {} + +int64_t ShardDistributedSample::GetNumSamples(int64_t dataset_size, int64_t num_classes) { + if (no_of_padded_samples_ <= 0) { + if (dataset_size % denominator_ == 0) { + return dataset_size / denominator_ * numerator_; + } else { + return dataset_size / denominator_ * numerator_ + 1; + } + } else { + auto padded_size = dataset_size + no_of_padded_samples_; + if (padded_size % denominator_ == 0) { + return padded_size / denominator_ * numerator_; + } else { + return -1; + } + } + return 0; +} + +MSRStatus ShardDistributedSample::PreExecute(ShardTask &tasks) { + auto total_no = tasks.Size(); + if (no_of_padded_samples_ > 0 && first_epoch_) { + if (total_no % denominator_ != 0) { + MS_LOG(ERROR) << "Dataset size plus number of padded samples is not divisible by number of shards. " + << "task size: " << total_no << ", number padded: " << no_of_padded_samples_ + << ", denominator: " << denominator_; + return FAILED; + } + } + if (first_epoch_) { + first_epoch_ = false; + task_ = tasks; + } else { + tasks = task_; + } + if (shuffle_ == true) { + if (SUCCESS != (*shuffle_op_)(tasks)) { + return FAILED; + } + } + return SUCCESS; +} +} // namespace mindrecord +} // namespace mindspore diff --git a/mindspore/ccsrc/mindrecord/meta/shard_sample.cc b/mindspore/ccsrc/mindrecord/meta/shard_sample.cc index d7842a11a3..c207747194 100644 --- a/mindspore/ccsrc/mindrecord/meta/shard_sample.cc +++ b/mindspore/ccsrc/mindrecord/meta/shard_sample.cc @@ -25,32 +25,32 @@ namespace mindrecord { ShardSample::ShardSample(int n) : numerator_(0), denominator_(0), - no_of_samples_(n), partition_id_(0), + no_of_samples_(n), indices_({}), sampler_type_(kCustomTopNSampler) {} ShardSample::ShardSample(int num, int den) : numerator_(num), denominator_(den), - no_of_samples_(0), partition_id_(0), + no_of_samples_(0), indices_({}), sampler_type_(kCustomTopPercentSampler) {} ShardSample::ShardSample(int num, int den, int par) : numerator_(num), denominator_(den), - no_of_samples_(0), partition_id_(par), + no_of_samples_(0), indices_({}), sampler_type_(kCustomTopPercentSampler) {} ShardSample::ShardSample(const std::vector &indices, uint32_t seed) : numerator_(0), denominator_(0), - no_of_samples_(0), partition_id_(0), + no_of_samples_(0), indices_(indices), sampler_type_(kSubsetRandomSampler) { shuffle_op_ = std::make_shared(seed); @@ -71,19 +71,12 @@ int64_t ShardSample::GetNumSamples(int64_t dataset_size, int64_t num_classes) { if (sampler_type_ == kSubsetRandomSampler) { return indices_.size(); } - return -1; -} - -const std::pair ShardSample::GetPartitions() const { - if (numerator_ == 1 && denominator_ > 1) { - return std::pair(denominator_, partition_id_); - } - return std::pair(-1, -1); + return 0; } MSRStatus ShardSample::Execute(ShardTask &tasks) { int no_of_categories = static_cast(tasks.categories); - int total_no = static_cast(tasks.Size()); + int total_no = static_cast(tasks.Size()); // make sure task_size int taking = 0; if (sampler_type_ == kCustomTopNSampler) { // non sharding case constructor #1 @@ -97,7 +90,7 @@ MSRStatus ShardSample::Execute(ShardTask &tasks) { } else { // constructor TopPercent if (numerator_ > 0 && denominator_ > 0 && numerator_ <= denominator_) { if (numerator_ == 1 && denominator_ > 1) { // sharding - taking = (total_no / denominator_) + (total_no % denominator_ == 0 ? 0 : 1); + taking = (total_no + denominator_ - 1) / denominator_; } else { // non sharding taking = total_no * numerator_ / denominator_; taking -= (taking % no_of_categories); diff --git a/mindspore/ccsrc/mindrecord/meta/shard_sequential_sample.cc b/mindspore/ccsrc/mindrecord/meta/shard_sequential_sample.cc new file mode 100644 index 0000000000..a7fa4e7343 --- /dev/null +++ b/mindspore/ccsrc/mindrecord/meta/shard_sequential_sample.cc @@ -0,0 +1,74 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mindrecord/include/shard_sequential_sample.h" + +using mindspore::LogStream; +using mindspore::ExceptionType::NoExceptionType; +using mindspore::MsLogLevel::ERROR; + +namespace mindspore { +namespace mindrecord { +ShardSequentialSample::ShardSequentialSample(int n, int offset) + : ShardSample(n), offset_(offset), per_(0.0f), per_offset_(0.0f) {} + +ShardSequentialSample::ShardSequentialSample(float per, float per_offset) + : ShardSample(0), offset_(0), per_(per), per_offset_(per_offset) {} + +int64_t ShardSequentialSample::GetNumSamples(int64_t dataset_size, int64_t num_classes) { + if (no_of_samples_ == 0 && (per_ >= -kEpsilon && per_ <= kEpsilon)) { + return dataset_size; + } + if (per_ > kEpsilon && per_ <= 1.0f) { + return dataset_size * kEpsilon; + } + return no_of_samples_; +} + +MSRStatus ShardSequentialSample::Execute(ShardTask &tasks) { + int total_no = static_cast(tasks.Size()); + int taking; + if (no_of_samples_ == 0 && (per_ >= -kEpsilon && per_ <= kEpsilon)) { + taking = total_no; + } else if (per_ > kEpsilon && per_ <= 1.0f) { + taking = total_no * kEpsilon; + } else { + taking = no_of_samples_; + } + + if (tasks.permutation_.empty()) { + ShardTask new_tasks; + total_no = static_cast(tasks.Size()); + for (int i = offset_; i < taking + offset_; ++i) { + new_tasks.InsertTask(tasks.GetTaskByID(i % total_no)); + } + std::swap(tasks, new_tasks); + } else { // shuffled + ShardTask new_tasks; + if (taking > static_cast(tasks.permutation_.size())) { + return FAILED; + } + total_no = static_cast(tasks.permutation_.size()); + for (size_t i = offset_; i < taking + offset_; ++i) { + new_tasks.InsertTask(tasks.GetTaskByID(tasks.permutation_[i % total_no])); + } + std::swap(tasks, new_tasks); + } + return SUCCESS; +} + +} // namespace mindrecord +} // namespace mindspore diff --git a/mindspore/ccsrc/mindrecord/meta/shard_shuffle.cc b/mindspore/ccsrc/mindrecord/meta/shard_shuffle.cc index d33400ef38..5cf49b04f0 100644 --- a/mindspore/ccsrc/mindrecord/meta/shard_shuffle.cc +++ b/mindspore/ccsrc/mindrecord/meta/shard_shuffle.cc @@ -21,17 +21,53 @@ namespace mindspore { namespace mindrecord { ShardShuffle::ShardShuffle(uint32_t seed, ShuffleType shuffle_type) - : shuffle_seed_(seed), shuffle_type_(shuffle_type) {} + : shuffle_seed_(seed), + no_of_samples_(0), + replacement_(false), + reshuffle_each_epoch_(true), + shuffle_type_(shuffle_type) {} + +ShardShuffle::ShardShuffle(uint32_t seed, int64_t no_of_samples, bool replacement, bool reshuffle_each_epoch, + ShuffleType shuffle_type) + : shuffle_seed_(seed), + no_of_samples_(no_of_samples), + replacement_(replacement), + reshuffle_each_epoch_(reshuffle_each_epoch), + shuffle_type_(shuffle_type) {} + +int64_t ShardShuffle::GetNumSamples(int64_t dataset_size, int64_t num_classes) { + if (replacement_) { + return no_of_samples_ == 0 ? dataset_size : no_of_samples_; + } + return dataset_size; +} MSRStatus ShardShuffle::Execute(ShardTask &tasks) { + if (reshuffle_each_epoch_) shuffle_seed_++; if (tasks.categories < 1) { return FAILED; } - if (shuffle_type_ == kShuffleSample) { + if (shuffle_type_ == kShuffleSample) { // shuffle each sample if (tasks.permutation_.empty() == true) { tasks.MakePerm(); } - std::shuffle(tasks.permutation_.begin(), tasks.permutation_.end(), std::default_random_engine(shuffle_seed_)); + if (replacement_ == true) { + ShardTask new_tasks; + if (no_of_samples_ == 0) { + no_of_samples_ = static_cast(tasks.Size()); + } + if (no_of_samples_ <= 0) { + MS_LOG(ERROR) << "no_of_samples need to be positive."; + return FAILED; + } + new_tasks.task_list_.reserve(no_of_samples_); + for (uint32_t i = 0; i < no_of_samples_; ++i) { + new_tasks.InsertTask(tasks.GetRandomTask()); + } + std::swap(tasks, new_tasks); + } else { + std::shuffle(tasks.permutation_.begin(), tasks.permutation_.end(), std::default_random_engine(shuffle_seed_)); + } } else { // shuffle unit like: (a1, b1, c1),(a2, b2, c2),..., (an, bn, cn) uint32_t individual_size = tasks.Size() / tasks.categories; std::vector> new_permutations(tasks.categories, std::vector(individual_size)); @@ -46,7 +82,6 @@ MSRStatus ShardShuffle::Execute(ShardTask &tasks) { } } } - shuffle_seed_++; return SUCCESS; } } // namespace mindrecord diff --git a/mindspore/ccsrc/mindrecord/meta/shard_task.cc b/mindspore/ccsrc/mindrecord/meta/shard_task.cc index 3abc725a7b..8baa3c26cd 100644 --- a/mindspore/ccsrc/mindrecord/meta/shard_task.cc +++ b/mindspore/ccsrc/mindrecord/meta/shard_task.cc @@ -24,6 +24,19 @@ using mindspore::MsLogLevel::DEBUG; namespace mindspore { namespace mindrecord { +ShardTask::ShardTask() : categories(1) {} + +ShardTask::ShardTask(const ShardTask &other) + : categories(other.categories), permutation_(other.permutation_), task_list_(other.task_list_) {} + +ShardTask &ShardTask::operator=(const ShardTask &other) { + ShardTask tmp(other); + std::swap(categories, tmp.categories); + permutation_.swap(tmp.permutation_); + task_list_.swap(tmp.task_list_); + return *this; +} + void ShardTask::MakePerm() { permutation_ = std::vector(task_list_.size()); for (uint32_t i = 0; i < task_list_.size(); i++) { @@ -31,16 +44,18 @@ void ShardTask::MakePerm() { } } -void ShardTask::InsertTask(int shard_id, int group_id, const std::vector &offset, const json &label) { +void ShardTask::InsertTask(TaskType task_type, int shard_id, int group_id, const std::vector &offset, + const json &label) { MS_LOG(DEBUG) << "Into insert task, shard_id: " << shard_id << ", group_id: " << group_id << ", label: " << label.dump() << ", size of task_list_: " << task_list_.size() << "."; - task_list_.emplace_back(std::make_tuple(shard_id, group_id), offset, label); + task_list_.emplace_back(task_type, std::make_tuple(shard_id, group_id), offset, label); } -void ShardTask::InsertTask(std::tuple, std::vector, json> task) { - MS_LOG(DEBUG) << "Into insert task, shard_id: " << std::get<0>(std::get<0>(task)) - << ", group_id: " << std::get<1>(std::get<0>(task)) << ", label: " << std::get<2>(task).dump() +void ShardTask::InsertTask(std::tuple, std::vector, json> task) { + MS_LOG(DEBUG) << "Into insert task, shard_id: " << std::get<0>(std::get<1>(task)) + << ", group_id: " << std::get<1>(std::get<1>(task)) << ", label: " << std::get<3>(task).dump() << ", size of task_list_: " << task_list_.size() << "."; + task_list_.push_back(std::move(task)); } @@ -52,24 +67,25 @@ uint32_t ShardTask::SizeOfRows() const { if (task_list_.size() == 0) return static_cast(0); // 1 task is 1 page - auto sum_num_rows = [](int x, std::tuple, std::vector, json> y) { - return x + std::get<1>(y)[0]; + auto sum_num_rows = [](int x, std::tuple, std::vector, json> y) { + return x + std::get<2>(y)[0]; }; uint32_t nRows = std::accumulate(task_list_.begin(), task_list_.end(), 0, sum_num_rows); return nRows; } -std::tuple, std::vector, json> &ShardTask::GetTaskByID(size_t id) { +std::tuple, std::vector, json> &ShardTask::GetTaskByID(size_t id) { MS_ASSERT(id < task_list_.size()); return task_list_[id]; } -std::tuple, std::vector, json> &ShardTask::GetRandomTask() { +std::tuple, std::vector, json> &ShardTask::GetRandomTask() { std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution<> dis(0, task_list_.size() - 1); return task_list_[dis(gen)]; } + ShardTask ShardTask::Combine(std::vector &category_tasks, bool replacement, int64_t num_elements) { ShardTask res; if (category_tasks.empty()) return res; diff --git a/mindspore/ccsrc/minnie/tensor_minnie.h b/mindspore/ccsrc/minnie/tensor_minnie.h deleted file mode 100644 index 1d4ff705d2..0000000000 --- a/mindspore/ccsrc/minnie/tensor_minnie.h +++ /dev/null @@ -1,77 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef MINDSPORE_CCSRC_MINNIE_TENSOR_MINNIE_H_ -#define MINDSPORE_CCSRC_MINNIE_TENSOR_MINNIE_H_ - -#include - -#include "ir/meta_tensor.h" - -namespace mindspore { -namespace tensor { -// definition of Tensor Minnie -class TensorMinnie : public MetaTensor { - public: - TensorMinnie() : MetaTensor() {} - ~TensorMinnie() override = default; - MS_DECLARE_PARENT(TensorMinnie, MetaTensor) - - // brief Overloads operator = for TensorMinnie. - // - // The constructed TensorMinnie object has the same type and shape with tensor_base. - // - // param meta_tensor An existing TensorMinnie object. - virtual TensorMinnie &operator=(const TensorMinnie &tensor); - - // brief Compares two TensorMinnie objects. - // - // The constructed TensorMinnie object has the same type and shape with tensor_base. - // - // param meta_tensor The TensorMinnie object to be compared. - // return true: If having same type and shape, return true, or return false. - virtual bool operator==(const TensorMinnie &tensor); - - // brief Get the tensor's size for C++ - // - // return size_t - size_t tensor_size() const { return tensor_size_; } - - // brief Set Tensor data size for c++ type - void set_tensor_size(size_t size) { tensor_size_ = size; } - - // brief Get Tensor data pointer for c++ type - // - // return The pointer to the object - void *tensor_addr() const { return tensor_addr_; } - - // brief Set Tensor data pointer for c++ type - void set_tensor_addr(void *addr) { tensor_addr_ = addr; } - - protected: - // brief Data addr of the tensor. - void *tensor_addr_; - - // brief Data size of the tensor. - size_t tensor_size_; -}; - -using TensorMinniePtr = std::shared_ptr; - -} // namespace tensor -} // namespace mindspore - -#endif // MINDSPORE_CCSRC_MINNIE_TENSOR_MINNIE_H_ diff --git a/mindspore/ccsrc/onnx/ir_exporter.cc b/mindspore/ccsrc/onnx/ir_exporter.cc new file mode 100644 index 0000000000..d74233d79a --- /dev/null +++ b/mindspore/ccsrc/onnx/ir_exporter.cc @@ -0,0 +1,621 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ir/param_value_py.h" +#include "debug/anf_ir_utils.h" +#include "operator/ops.h" +#include "proto/onnx.pb.h" + +namespace mindspore { +using FloatPtr = std::shared_ptr; +using IntPtr = std::shared_ptr; + +// anf type to onnx type map +static std::unordered_map g_data_type_map = { + {kNumberTypeBool, onnx::TensorProto_DataType_BOOL}, {kNumberTypeInt8, onnx::TensorProto_DataType_INT8}, + {kNumberTypeInt16, onnx::TensorProto_DataType_INT16}, {kNumberTypeInt32, onnx::TensorProto_DataType_INT32}, + {kNumberTypeInt64, onnx::TensorProto_DataType_INT64}, {kNumberTypeUInt8, onnx::TensorProto_DataType_UINT8}, + {kNumberTypeUInt16, onnx::TensorProto_DataType_UINT16}, {kNumberTypeUInt32, onnx::TensorProto_DataType_UINT32}, + {kNumberTypeUInt64, onnx::TensorProto_DataType_UINT64}, {kNumberTypeFloat16, onnx::TensorProto_DataType_FLOAT16}, + {kNumberTypeFloat32, onnx::TensorProto_DataType_FLOAT}, {kNumberTypeFloat64, onnx::TensorProto_DataType_DOUBLE}, + {kObjectTypeString, onnx::TensorProto_DataType_STRING}, +}; + +static std::unordered_map g_data_bits_int_map = { + {8, onnx::TensorProto_DataType_INT8}, + {16, onnx::TensorProto_DataType_INT16}, + {32, onnx::TensorProto_DataType_INT32}, + {64, onnx::TensorProto_DataType_INT64}, +}; + +static std::unordered_map g_data_bits_float_map = { + {16, onnx::TensorProto_DataType_FLOAT16}, + {32, onnx::TensorProto_DataType_FLOAT}, +}; + +// Can build different builder according to format +class IrExportBuilder; +using IrExportBuilderPtr = std::shared_ptr; + +class IrExporter { + public: + explicit IrExporter(IrExportBuilderPtr builder) : builder_(builder) {} + virtual ~IrExporter() = default; + std::string GetDumpString(const FuncGraphPtr &func_graph); + + private: + IrExportBuilderPtr builder_; +}; + +class IrExportBuilder { + public: + IrExportBuilder() = default; + ~IrExportBuilder() { google::protobuf::ShutdownProtobufLibrary(); } + std::string GetProtoString(const FuncGraphPtr &func_graph); + void BuildModelInfo(); + void BuildModel(const FuncGraphPtr &func_graph); + + private: + void BuildFuncGraph(const FuncGraphPtr &func_graph, onnx::GraphProto *const graph_proto); + void BuildParameters(const FuncGraphPtr &func_graph, onnx::GraphProto *const graph_proto); + void BuildNodes(const FuncGraphPtr &func_graph, onnx::GraphProto *const graph_proto); + void BuildOutput(const CNodePtr &node, onnx::GraphProto *const graph_proto); + void BuildCNode(const CNodePtr &node, onnx::GraphProto *const graph_proto); + std::string BuildInputNode(const AnfNodePtr &node, onnx::GraphProto *const graph_proto); + + void SetValueInfoProto(const AnfNodePtr &node, onnx::ValueInfoProto *const value_proto); + void SetValueInfoProto(const TypePtr &type, const BaseShapePtr &shape, onnx::ValueInfoProto *const value_proto); + void SetParamToTensorProto(const ParameterPtr ¶m, onnx::TensorProto *const tensor_proto); + void SetTensorProto(const TypePtr &type, const BaseShapePtr &shape, onnx::TensorProto *const tensor_proto); + void SetAttributeProto(const AnfNodePtr &node, onnx::NodeProto *const node_proto); + void SetShapeToNodeProto(const CNodePtr &node, onnx::NodeProto *const node_proto); + void SetShapeToNodeProto(const TypePtr &type, const BaseShapePtr &shape, onnx::NodeProto *const node_proto, + std::string suffix = "0"); + void SetValueToAttributeProto(const ValuePtr &value, onnx::AttributeProto *const attr_proto); + void SetTypeToAttributeProto(const ValuePtr &value, onnx::AttributeProto *const attr_proto); + void SetScalarToAttributeProto(const ValuePtr &value, onnx::AttributeProto *const attr_proto); + void SetTensorToAttributeProto(const ValuePtr &value, onnx::AttributeProto *const attr_proto); + void SetScalarToProto(const ValuePtr &value, onnx::TensorProto *const tensor_proto); + void SetSequenceToAttributeProto(const ValueSequeuePtr &value, onnx::AttributeProto *const attr_proto); + + onnx::TensorProto_DataType GetOnnxDataType(TypeId type_id); + onnx::TensorProto_DataType GetOnnxDataBitsIntType(int bits); + onnx::TensorProto_DataType GetOnnxDataBitsFloatType(int bits); + std::string GetNodeName(const AnfNodePtr &node); + std::string GetUniqueNodeName(const AnfNodePtr &node); + std::string GetOpTypeName(const AnfNodePtr &node); + size_t AllocateIndex() { return ++node_index_; } + void ResetIndex() { node_index_ = 0; } + + private: + onnx::ModelProto model_; + onnx::NodeProto *last_node_{nullptr}; + std::list todo_; + std::map node_index_map_; + size_t node_index_{0}; +}; + +using IrExporterPtr = std::shared_ptr; + +std::string IrExporter::GetDumpString(const FuncGraphPtr &func_graph) { + if ((builder_ == nullptr) || (func_graph == nullptr)) { + MS_LOG(EXCEPTION) << "Input params is null."; + } + + // Export model info + builder_->BuildModelInfo(); + + // Export model and return string + builder_->BuildModel(func_graph); + + return builder_->GetProtoString(func_graph); +} + +std::string IrExportBuilder::GetProtoString(const FuncGraphPtr &func_graph) { + MS_LOG(DEBUG) << "BuildModel complete!"; + return model_.SerializeAsString(); +} + +void IrExportBuilder::BuildModelInfo() { + model_.set_ir_version(onnx::IR_VERSION_2019_1_22); + model_.set_producer_name("MindSpore"); + model_.set_model_version(1); +} + +void IrExportBuilder::BuildModel(const FuncGraphPtr &func_graph) { + onnx::GraphProto *graph_proto = model_.mutable_graph(); + graph_proto->set_name(func_graph->ToString()); + ResetIndex(); + todo_.clear(); + todo_.push_back(func_graph); + while (!todo_.empty()) { + FuncGraphPtr fg = todo_.back(); + todo_.pop_back(); + BuildFuncGraph(fg, graph_proto); + } +} + +void IrExportBuilder::BuildFuncGraph(const FuncGraphPtr &func_graph, onnx::GraphProto *const graph_proto) { + // Export parameters + // 1. parameters should be mapped to ValueInfoProto + // 2. parameters with default value should be mapped to Initializer + BuildParameters(func_graph, graph_proto); + + // Export operator nodes(include output) + BuildNodes(func_graph, graph_proto); +} + +void IrExportBuilder::BuildParameters(const FuncGraphPtr &func_graph, onnx::GraphProto *const graph_proto) { + for (auto &item : func_graph->parameters()) { + auto param = item->cast(); + if (param == nullptr) { + MS_LOG(EXCEPTION) << "Parameter: '" << item->ToString() << "' could not cast to parameter."; + } + onnx::ValueInfoProto *input_proto = graph_proto->add_input(); + std::string param_name = GetUniqueNodeName(param); + input_proto->set_name(param_name); + SetValueInfoProto(param, input_proto); + if (!param->has_default()) { + MS_LOG(DEBUG) << "Parameter: '" << item->ToString() << "' has no default"; + continue; + } + + // Using ONNX initializer to set parameter's default value + onnx::TensorProto *initializer_proto = graph_proto->add_initializer(); + initializer_proto->set_name(param_name); + SetParamToTensorProto(param, initializer_proto); + auto param_value = std::dynamic_pointer_cast(param->default_param()); + py::object obj = param_value->value(); + py::object data = obj.attr("data"); + if (py::isinstance(data)) { + auto method = data.attr("asnumpy"); + py::array npy_data = method(); + initializer_proto->set_raw_data(npy_data.request(true).ptr, static_cast(npy_data.nbytes())); + } + } +} + +onnx::TensorProto_DataType IrExportBuilder::GetOnnxDataType(TypeId type_id) { + auto iter = g_data_type_map.find(type_id); + if (iter == g_data_type_map.end()) { + MS_LOG(EXCEPTION) << "Convert type error, unsupported type! " << type_id; + } + return iter->second; +} + +onnx::TensorProto_DataType IrExportBuilder::GetOnnxDataBitsIntType(int bits) { + auto iter = g_data_bits_int_map.find(bits); + if (iter == g_data_bits_int_map.end()) { + MS_LOG(EXCEPTION) << "Convert bits int error, unsupported bits! " << bits; + } + return iter->second; +} + +onnx::TensorProto_DataType IrExportBuilder::GetOnnxDataBitsFloatType(int bits) { + auto iter = g_data_bits_float_map.find(bits); + if (iter == g_data_bits_float_map.end()) { + MS_LOG(EXCEPTION) << "Convert bits float error, unsupported bits! " << bits; + } + return iter->second; +} + +void IrExportBuilder::SetValueInfoProto(const AnfNodePtr &node, onnx::ValueInfoProto *const value_proto) { + if (node == nullptr || value_proto == nullptr) { + MS_LOG(EXCEPTION) << "AnfNode or ValueInfo is null!"; + } + MS_LOG(DEBUG) << "SetValueInfoProto: " << node->DebugString(); + SetValueInfoProto(node->Type(), node->Shape(), value_proto); +} + +void IrExportBuilder::SetValueInfoProto(const TypePtr &type, const BaseShapePtr &shape, + onnx::ValueInfoProto *const value_proto) { + onnx::TypeProto *type_proto = value_proto->mutable_type(); + if (type->isa() && shape->isa()) { + auto tensor = type->cast(); + auto elem_type = tensor->element(); + const auto &dims = shape->cast()->shape(); + type_proto->mutable_tensor_type()->set_elem_type(GetOnnxDataType(elem_type->type_id())); + for (const auto &dim : dims) { + MS_LOG(DEBUG) << "SetValueInfoProto dim: " << dim; + type_proto->mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_value(dim); + } + } else if (type->isa()) { + auto tup_shape = shape->cast(); + type_proto->set_denotation(std::to_string(tup_shape->shape().size())); + } else { + MS_LOG(EXCEPTION) << "Value type: " << type->type_name() << " is not supported!"; + } +} + +void IrExportBuilder::SetTensorToAttributeProto(const ValuePtr &value, onnx::AttributeProto *const attr_proto) { + if (value == nullptr || attr_proto == nullptr) { + MS_LOG(EXCEPTION) << "ValuePtr or AttributeProto is null!"; + } + attr_proto->set_ref_attr_name("tensor"); + attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR); + onnx::TensorProto *tensor_proto = attr_proto->mutable_t(); + auto data = value->cast(); + tensor_proto->set_raw_data(data->data().request(true).ptr, static_cast(data->data().nbytes())); + auto dtype = data->data_type(); + auto shape = data->shape_c(); + tensor_proto->set_data_type(GetOnnxDataType(dtype)); + for (const auto &dim : shape) { + tensor_proto->add_dims(dim); + } +} + +void IrExportBuilder::SetTensorProto(const TypePtr &type, const BaseShapePtr &shape, + onnx::TensorProto *const tensor_proto) { + if (!type->isa() || !shape->isa()) { + MS_LOG(EXCEPTION) << "Type or shape is not supported! " << type->ToString(); + } + auto tensor = type->cast(); + const auto &dims = shape->cast()->shape(); + tensor_proto->set_data_type(GetOnnxDataType(tensor->element()->type_id())); + for (const auto &dim : dims) { + tensor_proto->add_dims(dim); + } +} + +void IrExportBuilder::SetParamToTensorProto(const ParameterPtr ¶m, onnx::TensorProto *const tensor_proto) { + if (param == nullptr || tensor_proto == nullptr) { + MS_LOG(EXCEPTION) << "Parameter or TensorProto is null!"; + } + MS_LOG(DEBUG) << "SetParamToTensorProto: " << param->DebugString(); + SetTensorProto(param->Type(), param->Shape(), tensor_proto); +} + +void IrExportBuilder::BuildNodes(const FuncGraphPtr &func_graph, onnx::GraphProto *const graph_proto) { + std::vector nodes = TopoSort(func_graph->get_return(), SuccIncoming, AlwaysInclude); + for (const AnfNodePtr &node : nodes) { + if (!node->isa()) { + MS_LOG(DEBUG) << "Node: '" << node->ToString() << "' is not cnode"; + continue; + } + auto cnode = node->cast(); + if (cnode == func_graph->get_return()) { + BuildOutput(cnode, graph_proto); + } else { + BuildCNode(cnode, graph_proto); + } + } +} + +void IrExportBuilder::BuildOutput(const CNodePtr &node, onnx::GraphProto *const graph_proto) { + if (node->size() != 2) { + MS_LOG(EXCEPTION) << "Number of inputs of return node is not equal to 2."; + } + AnfNodePtr arg = node->input(1); + // Using make_tuple to set multi-output + if (IsPrimitiveCNode(arg, prim::kPrimMakeTuple)) { + auto tuple_node = arg->cast(); + for (size_t i = 1; i < tuple_node->size(); i++) { + auto input_node = arg->cast()->input(i); + onnx::ValueInfoProto *output_proto = graph_proto->add_output(); + auto output_name = GetUniqueNodeName(tuple_node->input(i)); + output_proto->set_name(output_name); + last_node_->add_output(output_name); + SetValueInfoProto(tuple_node->input(i), output_proto); + } + } else { + onnx::ValueInfoProto *output_proto = graph_proto->add_output(); + std::string output_name = GetUniqueNodeName(node); + output_proto->set_name(output_name); + last_node_->add_output(output_name); + SetValueInfoProto(arg, output_proto); + } +} + +std::string IrExportBuilder::GetOpTypeName(const AnfNodePtr &node) { + // May be ValueNode/CNode/Parameter + std::string type_name = ""; + if (IsValueNode(node)) { + PrimitivePtr prim = GetValueNode(node); + type_name = prim->ToString(); + } else if (IsValueNode(node)) { + FuncGraphPtr fg = GetValueNode(node); + todo_.push_back(fg); + type_name = fg->ToString(); + } else if (node->isa() || node->isa()) { + type_name = node->ToString(); + } else { + MS_LOG(EXCEPTION) << "Need to support op type: " << node->type_name(); + } + MS_LOG(DEBUG) << "ExportType: " << type_name; + return type_name; +} + +void IrExportBuilder::SetShapeToNodeProto(const TypePtr &type, const BaseShapePtr &shape, + onnx::NodeProto *const node_proto, std::string suffix) { + onnx::AttributeProto *attr_proto = node_proto->add_attribute(); + attr_proto->set_ref_attr_name("shape"); + if (suffix.compare("0") != 0) { + attr_proto->set_name("shape" + suffix); + } else { + attr_proto->set_name("shape"); + } + onnx::TensorProto *tensor_proto = attr_proto->mutable_t(); + SetTensorProto(type, shape, tensor_proto); +} + +void IrExportBuilder::SetShapeToNodeProto(const CNodePtr &node, onnx::NodeProto *const node_proto) { + // Get shape of cnode + // 1. prim ArgMaxWithValue need to get shape from tuple element + // 2. some cnode doesn't has shape, such as LayerNorm + // 3. other cnodes have shape + if (node->IsApply(prim::kPrimArgMaxWithValue) || node->IsApply(prim::kPrimLayerNorm)) { + auto type = node->Type(); + auto shape = node->Shape(); + if (!type->isa()) { + MS_LOG(EXCEPTION) << "Output data of ArgMaxWithValue cnode must be tuple: " << type->type_name(); + } + auto elements = type->cast()->elements(); + auto tuple_shape = shape->cast()->shape(); + for (size_t i = 0; i < elements.size(); i++) { + SetShapeToNodeProto(elements[i], tuple_shape[i], node_proto, std::to_string(i)); + } + } else { + auto type = node->Type(); + auto shape = node->Shape(); + if (!type->isa() || !shape->isa()) { + MS_LOG(DEBUG) << "Cnode has no shape: " << node->ToString(); + return; + } + SetShapeToNodeProto(type, shape, node_proto); + } +} + +void IrExportBuilder::BuildCNode(const CNodePtr &node, onnx::GraphProto *const graph_proto) { + auto inputs_size = node->size(); + if (inputs_size < 1) { + MS_LOG(EXCEPTION) << "Inputs of apply node is empty"; + } + + // Need to build input node before dealing with cnode + std::vector op_inputs; + std::vector input_names; + for (size_t i = 1; i < inputs_size; i++) { + auto input = node->input(i); + op_inputs.push_back(input); + input_names.push_back(BuildInputNode(input, graph_proto)); + } + + // Build cnode + onnx::NodeProto *node_proto = graph_proto->add_node(); + std::string output_name = GetUniqueNodeName(node); + node_proto->add_output(output_name); + node_proto->set_name(output_name); + node_proto->set_domain(node->fullname_with_scope()); + AnfNodePtr op = node->input(0); + std::string type_name = GetOpTypeName(op); + node_proto->set_op_type(type_name); + last_node_ = node_proto; + SetShapeToNodeProto(node, node_proto); + (void)std::for_each(input_names.begin(), input_names.end(), + [&node_proto](const string &name) { node_proto->add_input(name); }); + + // Add primitive attrs + if (IsValueNode(op)) { + auto prim = GetValueNode(op); + for (auto attr : prim->attrs()) { + MS_LOG(DEBUG) << "attr: " << attr.first << " " << attr.second->DumpText() << " " << attr.second->type_name(); + onnx::AttributeProto *attr_proto = node_proto->add_attribute(); + attr_proto->set_name(attr.first); + SetValueToAttributeProto(attr.second, attr_proto); + } + } else { + MS_LOG(EXCEPTION) << "Need to support op type: " << op->type_name(); + } +} + +std::string IrExportBuilder::BuildInputNode(const AnfNodePtr &node, onnx::GraphProto *const graph_proto) { + std::string node_name = GetUniqueNodeName(node); + if (node->isa()) { + // When node input is a ValueNode, need to create a Constant Node + onnx::NodeProto *node_proto = graph_proto->add_node(); + node_proto->add_output(node_name); + SetAttributeProto(node, node_proto); + } + return node_name; +} + +std::string IrExportBuilder::GetUniqueNodeName(const AnfNodePtr &node) { + // Naming anfnode + // 1. parameter is unique in one func_graph + // 2. cnode and valuenode may be reduplicative, so add index to identify. + std::string node_name = ""; + if (node->isa()) { + node_name = GetNodeName(node); + } else if (node->isa() || node->isa()) { + auto iter = node_index_map_.find(node); + if (iter != node_index_map_.end()) { + node_name = GetNodeName(node) + ":" + std::to_string(iter->second); + } else { + auto node_idx = AllocateIndex(); + node_index_map_[node] = node_idx; + node_name = GetNodeName(node) + ":" + std::to_string(node_idx); + } + } else { + MS_LOG(EXCEPTION) << "Can not support type of node:" << node->ToString(); + } + MS_LOG(DEBUG) << "Node name: " << node_name; + return node_name; +} + +std::string IrExportBuilder::GetNodeName(const AnfNodePtr &node) { + std::string node_name = ""; + if ((node != nullptr) && (node->func_graph() != nullptr)) { + node_name = node->func_graph()->ToString() + ":"; + } + node_name += node->ToString(); + MS_LOG(DEBUG) << "GetNodeName: " << node_name; + return node_name; +} + +void IrExportBuilder::SetAttributeProto(const AnfNodePtr &node, onnx::NodeProto *const node_proto) { + if (node == nullptr || node_proto == nullptr) { + MS_LOG(EXCEPTION) << "AnfNode or NodeProto is null!"; + } + auto value = node->cast()->value(); + node_proto->set_op_type("Constant"); + onnx::AttributeProto *attr_proto = node_proto->add_attribute(); + attr_proto->set_name("value"); + MS_LOG(DEBUG) << "Set Constant attribute: " << value->ToString(); + SetValueToAttributeProto(value, attr_proto); +} + +void IrExportBuilder::SetTypeToAttributeProto(const ValuePtr &value, onnx::AttributeProto *const attr_proto) { + if (value == nullptr || attr_proto == nullptr) { + MS_LOG(EXCEPTION) << "ValuePtr or AttributeProto is null!"; + } + attr_proto->set_ref_attr_name("type"); + attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR); + onnx::TensorProto *tensor_proto = attr_proto->mutable_t(); + if (value->isa()) { + auto int_value = value->cast(); + tensor_proto->set_data_type(GetOnnxDataBitsIntType(int_value->nbits())); + } else if (value->isa()) { + auto float_value = value->cast(); + tensor_proto->set_data_type(GetOnnxDataBitsFloatType(float_value->nbits())); + } else if (value->isa()) { + tensor_proto->set_name("tensor"); + auto elem_type = value->cast()->element(); + if (elem_type->isa()) { + auto int_value = elem_type->cast(); + tensor_proto->set_data_type(GetOnnxDataBitsIntType(int_value->nbits())); + } else if (elem_type->isa()) { + auto float_value = elem_type->cast(); + tensor_proto->set_data_type(GetOnnxDataBitsFloatType(float_value->nbits())); + } else { + MS_LOG(EXCEPTION) << "Unsupported type " << elem_type->type_name(); + } + } else { + MS_LOG(EXCEPTION) << "Unsupported type: " << value->type_name(); + } +} + +void IrExportBuilder::SetValueToAttributeProto(const ValuePtr &value, onnx::AttributeProto *const attr_proto) { + if (value == nullptr || attr_proto == nullptr) { + MS_LOG(EXCEPTION) << "ValuePtr or AttributeProto is null!"; + } + if (value->isa() || value->isa()) { + SetScalarToAttributeProto(value, attr_proto); + } else if (value->isa() || value->isa()) { + SetTypeToAttributeProto(value, attr_proto); + } else if (value->isa()) { + SetSequenceToAttributeProto(value->cast(), attr_proto); + } else if (value->isa()) { + SetTensorToAttributeProto(value, attr_proto); + } else { + MS_LOG(EXCEPTION) << "Unsupported type: " << value->type_name(); + } +} + +void IrExportBuilder::SetScalarToAttributeProto(const ValuePtr &value, onnx::AttributeProto *const attr_proto) { + if (value == nullptr || attr_proto == nullptr) { + MS_LOG(EXCEPTION) << "ValuePtr or AttributeProto is null!"; + } + attr_proto->set_ref_attr_name("scalar"); + attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR); + onnx::TensorProto *tensor_proto = attr_proto->mutable_t(); + SetScalarToProto(value, tensor_proto); +} + +void IrExportBuilder::SetScalarToProto(const ValuePtr &value, onnx::TensorProto *const tensor_proto) { + if (value == nullptr || tensor_proto == nullptr) { + MS_LOG(EXCEPTION) << "ValuePtr or TensorProto is null!"; + } + if (value->isa()) { + tensor_proto->set_data_type(onnx::TensorProto_DataType_STRING); + tensor_proto->add_string_data(GetValue(value)); + } else if (value->isa()) { + tensor_proto->set_data_type(onnx::TensorProto_DataType_BOOL); + tensor_proto->add_int32_data(GetValue(value)); + } else if (value->isa()) { + tensor_proto->set_data_type(onnx::TensorProto_DataType_INT8); + tensor_proto->add_int32_data(value->cast()->value()); + } else if (value->isa()) { + tensor_proto->set_data_type(onnx::TensorProto_DataType_INT16); + tensor_proto->add_int32_data(value->cast()->value()); + } else if (value->isa()) { + tensor_proto->set_data_type(onnx::TensorProto_DataType_INT32); + tensor_proto->add_int32_data(value->cast()->value()); + } else if (value->isa()) { + tensor_proto->set_data_type(onnx::TensorProto_DataType_INT64); + tensor_proto->add_int64_data(value->cast()->value()); + } else if (value->isa()) { + tensor_proto->set_data_type(onnx::TensorProto_DataType_FLOAT); + tensor_proto->add_float_data(GetValue(value)); + } else { + MS_LOG(EXCEPTION) << "Unsupported scalar type: " << value->type_name(); + } +} + +void IrExportBuilder::SetSequenceToAttributeProto(const ValueSequeuePtr &value, + onnx::AttributeProto *const attr_proto) { + if (value == nullptr || attr_proto == nullptr) { + MS_LOG(EXCEPTION) << "ValueSequeuePtr or AttributeProto is null!"; + } + attr_proto->set_ref_attr_name("scalar"); + attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR); + onnx::TensorProto *tensor_proto = attr_proto->mutable_t(); + if (value->isa()) { + const ValueTuplePtr &tuple_value = value->cast(); + if (tuple_value->value().size() == 0) { + MS_LOG(DEBUG) << "SetSequenceToAttributeProto tuple size is 0"; + return; + } + auto type_id = tuple_value->value()[0]->type()->type_id(); + tensor_proto->set_data_type(GetOnnxDataType(type_id)); + for (const auto &item : tuple_value->value()) { + SetScalarToProto(item, tensor_proto); + } + } else if (value->isa()) { + const ValueListPtr &list_value = value->cast(); + if (list_value->value().size() == 0) { + MS_LOG(DEBUG) << "SetSequenceToAttributeProto list size is 0"; + return; + } + auto type_id = list_value->value()[0]->type()->type_id(); + tensor_proto->set_data_type(GetOnnxDataType(type_id)); + for (const auto &item : list_value->value()) { + SetScalarToProto(item, tensor_proto); + } + } +} + +std::string GetBinaryProtoString(const FuncGraphPtr &func_graph) { + auto builder = std::make_shared(); + if (builder == nullptr) { + MS_LOG(ERROR) << "Create ir exporter failed!"; + return ""; + } + auto exporter = std::make_shared(builder); + if (exporter == nullptr) { + return ""; + } + return exporter->GetDumpString(func_graph); +} +} // namespace mindspore diff --git a/mindspore/ccsrc/operator/composite/composite.cc b/mindspore/ccsrc/operator/composite/composite.cc index 31ba49fa0b..75532b9fbd 100644 --- a/mindspore/ccsrc/operator/composite/composite.cc +++ b/mindspore/ccsrc/operator/composite/composite.cc @@ -334,8 +334,8 @@ ArgsPairList HyperMap::Harmonize(const FuncGraphPtr &func_graph, const ArgsPairL FuncGraphPtr HyperMap::GenerateFromTypes(const TypePtrList &args_spec_list) { FuncGraphPtr ptrGraph = std::make_shared(); - ptrGraph->set_flags(FUNC_GRAPH_FLAG_CORE, true); - ptrGraph->set_flags(FUNC_GRAPH_FLAG_SPECIALIZE_PARAMETER, true); + ptrGraph->set_flag(FUNC_GRAPH_FLAG_CORE, true); + ptrGraph->set_flag(FUNC_GRAPH_FLAG_SPECIALIZE_PARAMETER, true); ptrGraph->debug_info()->set_name("hyper_map"); AnfNodePtr ptrFnArg = nullptr; @@ -389,7 +389,7 @@ FuncGraphPtr Tail::GenerateTupleFuncGraph(const abstract::AbstractTuplePtr &a_tu MS_EXCEPTION_IF_NULL(a_tuple); FuncGraphPtr ret = std::make_shared(); - ret->set_flags(FUNC_GRAPH_FLAG_CORE, true); + ret->set_flag(FUNC_GRAPH_FLAG_CORE, true); ret->debug_info()->set_name("tail"); AnfNodePtr ptrTup = ret->add_parameter(); @@ -409,7 +409,7 @@ FuncGraphPtr Tail::GenerateListFuncGraph(const abstract::AbstractListPtr &a_list MS_EXCEPTION_IF_NULL(a_list); FuncGraphPtr ret = std::make_shared(); - ret->set_flags(FUNC_GRAPH_FLAG_CORE, true); + ret->set_flag(FUNC_GRAPH_FLAG_CORE, true); ret->debug_info()->set_name("tail"); AnfNodePtr ptrList = ret->add_parameter(); @@ -481,10 +481,10 @@ FuncGraphPtr MakeTupleGradient::GenerateFuncGraph(const AbstractBasePtrList &arg grads.push_back(b->NewCNode({NewValueNode(prim::kPrimTupleGetItem), dout, NewValueNode(i)})); } - b->set_flags(FUNC_GRAPH_FLAG_CORE, true); + b->set_flag(FUNC_GRAPH_FLAG_CORE, true); b->set_output(b->NewCNode(grads)); - fg->set_flags(FUNC_GRAPH_FLAG_CORE, true); + fg->set_flag(FUNC_GRAPH_FLAG_CORE, true); fg->set_output(fg->NewCNode({NewValueNode(prim::kPrimMakeTuple), out, NewValueNode(b)})); (void)fg->transforms().emplace("primal", FuncGraphTransform(prim::kPrimMakeTuple)); return fg; @@ -501,9 +501,15 @@ GradOperation::GradOperation(const std::string &name, bool get_all, bool get_by_ } FuncGraphPtr GradOperation::GetGrad(AnfNodePtr node, const AnfNodePtr &weights, - const std::vector ¶ms_list, bool applyJ) { + const std::vector ¶ms_list, const std::vector &args, + bool applyJ) { FuncGraphPtr ret = std::make_shared(); - ret->set_flags(FUNC_GRAPH_FLAG_CORE, true); + ret->set_flag(FUNC_GRAPH_FLAG_CORE, true); + + auto weights_node = weights; + if (weights == nullptr && !args.empty()) { + weights_node = ret->NewCNode(args); + } ValueNodePtr opsJ = NewValueNode(prim::kPrimJ); ValueNodePtr opsTupleItem = NewValueNode(prim::kPrimTupleGetItem); @@ -537,7 +543,7 @@ FuncGraphPtr GradOperation::GetGrad(AnfNodePtr node, const AnfNodePtr &weights, inputs.push_back(NewValueNode(1)); AnfNodePtr ptrBprop = ret->NewCNode(inputs); - doGetGrad(ret, out, ptrBprop, weights, opsTupleItem); + doGetGrad(ret, out, ptrBprop, weights_node, opsTupleItem); return ret; } @@ -619,7 +625,7 @@ FuncGraphPtr GradOperation::GenerateFuncGraph(const AbstractBasePtrList &args_sp std::ostringstream ss; ss << "grad{" << nparam << "}"; - dfBuilder->set_flags(FUNC_GRAPH_FLAG_CORE, true); + dfBuilder->set_flag(FUNC_GRAPH_FLAG_CORE, true); dfBuilder->debug_info()->set_name(ss.str()); ParameterPtr param_graph = dfBuilder->add_parameter(); @@ -665,7 +671,7 @@ FuncGraphPtr ListMap::GenerateFuncGraph(const AbstractBasePtrList &args_spec_lis } FuncGraphPtr fg_ptr = std::make_shared(); - fg_ptr->set_flags(FUNC_GRAPH_FLAG_CORE, true); + fg_ptr->set_flag(FUNC_GRAPH_FLAG_CORE, true); fg_ptr->debug_info()->set_name("list_map"); AnfNodePtr fn = fg_ptr->add_parameter(); @@ -735,7 +741,7 @@ void ListMap::MakeCond(const std::vector &lists, const FuncGraphPtr // cond = reduce(lambda a, b: g.apply(P.bool_and, a, b), hasnexts) FuncGraphPtr fgtrue_ptr = std::make_shared(); fgtrue_ptr->debug_info()->set_name("ftrue"); - fgtrue_ptr->set_flags(FUNC_GRAPH_FLAG_CORE, true); + fgtrue_ptr->set_flag(FUNC_GRAPH_FLAG_CORE, true); CNodePtr fgtrue_output_cnode = fgtrue_ptr->NewCNode({NewValueNode(fgnext_ptr), fn, resl}); auto inputs = fgtrue_output_cnode->inputs(); @@ -745,7 +751,7 @@ void ListMap::MakeCond(const std::vector &lists, const FuncGraphPtr FuncGraphPtr fgfalse_ptr = std::make_shared(); fgfalse_ptr->debug_info()->set_name("ffalse"); - fgfalse_ptr->set_flags(FUNC_GRAPH_FLAG_CORE, true); + fgfalse_ptr->set_flag(FUNC_GRAPH_FLAG_CORE, true); fgfalse_ptr->set_output(resl); AnfNodePtr output_cnode = fg_ptr->NewCNode({NewValueNode(prim::kPrimSwitch), NewValueNode(std::string("cond")), @@ -802,7 +808,7 @@ FuncGraphPtr TupleAdd::GenerateFuncGraph(const AbstractBasePtrList &args_spec_li } FuncGraphPtr ret = std::make_shared(); - ret->set_flags(FUNC_GRAPH_FLAG_CORE, true); + ret->set_flag(FUNC_GRAPH_FLAG_CORE, true); AnfNodePtr p_tup_a = ret->add_parameter(); AnfNodePtr p_tup_b = ret->add_parameter(); @@ -906,7 +912,7 @@ FuncGraphPtr TupleSlice::GenerateFuncGraph(const AbstractBasePtrList &args_spec_ GenerateTupleSliceParameter(tuple, slice, &start_index, &stop_index, &step_value); FuncGraphPtr ret = std::make_shared(); - ret->set_flags(FUNC_GRAPH_FLAG_CORE, true); + ret->set_flag(FUNC_GRAPH_FLAG_CORE, true); AnfNodePtr p_tuple = ret->add_parameter(); (void)ret->add_parameter(); @@ -926,206 +932,6 @@ FuncGraphPtr TupleSlice::GenerateFuncGraph(const AbstractBasePtrList &args_spec_ return ret; } -int ConvertBinaryToDecimal(const std::vector &number_bin) { - unsigned int number_dec = 0; - for (size_t index = 0; index < number_bin.size(); index++) { - number_dec |= number_bin[index] << index; - } - return static_cast(number_dec); -} - -void ParseSlice(const AbstractSlicePtr &slice, std::vector *begin, std::vector *end, - std::vector *strides, int length) { - MS_EXCEPTION_IF_NULL(slice); - MS_EXCEPTION_IF_NULL(begin); - MS_EXCEPTION_IF_NULL(end); - MS_EXCEPTION_IF_NULL(strides); - if (length <= 0) { - MS_LOG(EXCEPTION) << "Could not slice a dim when it's length less than 1"; - } - - int start_default = 0; - int stop_default = length; - int step_default = 1; - int step_value = CheckSliceMember(slice->step(), step_default, "step"); - if (step_value < 0) { - start_default = -1; - stop_default = -(length + 1); - } - - begin->push_back(CheckSliceMember(slice->start(), start_default, "begin")); - end->push_back(CheckSliceMember(slice->stop(), stop_default, "stop")); - strides->push_back(step_value); -} - -int GenerateStridedSliceParametersFromTuple(const AbstractTuplePtr &slice_tuple, const std::vector &shape, - std::vector *begin, std::vector *end, std::vector *strides) { - MS_EXCEPTION_IF_NULL(slice_tuple); - MS_EXCEPTION_IF_NULL(begin); - MS_EXCEPTION_IF_NULL(end); - MS_EXCEPTION_IF_NULL(strides); - - size_t slice_tuple_size = slice_tuple->size(); - size_t shape_size = shape.size(); - if (slice_tuple_size > shape_size) { - MS_LOG(EXCEPTION) << "The number of slice data to slice tensor should be less than the rank of tensor," - "when the rank of tensor is " - << shape_size << ", the number of slice is " << slice_tuple_size; - } - - std::vector shrink; - auto slice_tuple_eles = slice_tuple->elements(); - size_t ellipsis_num = 0; - - for (size_t index = 0; index < slice_tuple_size; index++) { - if (slice_tuple_eles[index]->isa()) { - AbstractSlicePtr slice = dyn_cast(slice_tuple_eles[index]); - ParseSlice(slice, begin, end, strides, shape[index]); - shrink.push_back(0); - continue; - } - - if (slice_tuple_eles[index]->isa()) { - int ele_index = GetArgScalarValue(dyn_cast(slice_tuple_eles[index]), "slice_tuple"); - begin->push_back(ele_index); - end->push_back(ele_index + 1); - strides->push_back(1); - shrink.push_back(1); - continue; - } - - if (slice_tuple_eles[index]->isa()) { - ellipsis_num++; - if (ellipsis_num > 1) { - MS_LOG(EXCEPTION) << "Tensor slice supports at most one ellipsis"; - } - size_t ellipsis_len = shape_size - (slice_tuple_size - 1); - begin->insert(begin->end(), ellipsis_len, 0); - end->insert(end->end(), shape.begin() + index, shape.begin() + index + ellipsis_len); - strides->insert(strides->end(), ellipsis_len, 1); - shrink.insert(shrink.end(), ellipsis_len, 0); - continue; - } - - MS_LOG(EXCEPTION) << "Slice tuple only could contain slice, int number or ellipsis, but got " - << slice_tuple_eles[index]->ToString(); - } - - if (ellipsis_num == 0) { - for (size_t index = slice_tuple_size; index < shape_size; index++) { - begin->push_back(0); - end->push_back(shape[index]); - strides->push_back(1); - } - } - return ConvertBinaryToDecimal(shrink); -} - -int GenerateStridedSliceParametersFromSlice(const AbstractSlicePtr &slice, const std::vector &shape, - std::vector *begin, std::vector *end, std::vector *strides) { - MS_EXCEPTION_IF_NULL(begin); - MS_EXCEPTION_IF_NULL(end); - MS_EXCEPTION_IF_NULL(strides); - size_t shape_size = shape.size(); - if (shape_size == 0) { - MS_LOG(EXCEPTION) << "Could slice a scalar tensor"; - } - - ParseSlice(slice, begin, end, strides, shape[0]); - - for (size_t index = 1; index < shape_size; index++) { - begin->push_back(0); - end->push_back(shape[index]); - strides->push_back(1); - } - - return 0; -} - -int GenerateStridedSliceParametersFromNumber(const AbstractScalarPtr &scalar, const std::vector &shape, - std::vector *begin, std::vector *end, - std::vector *strides) { - MS_EXCEPTION_IF_NULL(begin); - MS_EXCEPTION_IF_NULL(end); - MS_EXCEPTION_IF_NULL(strides); - int ele_index = GetArgScalarValue(scalar, "slice_tuple"); - - begin->push_back(ele_index); - end->push_back(ele_index + 1); - strides->push_back(1); - - for (size_t index = 1; index < shape.size(); index++) { - begin->push_back(0); - end->push_back(shape[index]); - strides->push_back(1); - } - - return 1; -} - -FuncGraphPtr ExpandADim(const FuncGraphPtr &ret_graph, const AnfNodePtr &tensor_node) { - auto PrimExpandDims = GetPythonOps("expand_dims", "mindspore.ops.functional"); - ret_graph->set_output(NewCNode({NewValueNode(PrimExpandDims), tensor_node, NewValueNode(0)}, ret_graph)); - return ret_graph; -} - -FuncGraphPtr TensorSlice::GenerateFuncGraph(const AbstractBasePtrList &args_spec_list) { - // slice a tensor - // args: tensor, slice or slice tuple - const std::string op_name = std::string("TensorSlice"); - abstract::CheckArgsSize(op_name, args_spec_list, 2); - AbstractTensorPtr tensorPtr = abstract::CheckArg(op_name, args_spec_list, 0); - - FuncGraphPtr ret_graph = std::make_shared(); - ret_graph->set_flags(FUNC_GRAPH_FLAG_CORE, true); - AnfNodePtr tensor_node = ret_graph->add_parameter(); - (void)ret_graph->add_parameter(); - - auto shape = tensorPtr->shape()->shape(); - std::vector begin; - std::vector end; - std::vector strides; - int shrink_axis_mask; - - if (args_spec_list[1]->isa()) { - AbstractTuplePtr tuple_ptr = dyn_cast(args_spec_list[1]); - shrink_axis_mask = GenerateStridedSliceParametersFromTuple(tuple_ptr, shape, &begin, &end, &strides); - } else if (args_spec_list[1]->isa()) { - AbstractSlicePtr slice_ptr = dyn_cast(args_spec_list[1]); - shrink_axis_mask = GenerateStridedSliceParametersFromSlice(slice_ptr, shape, &begin, &end, &strides); - } else if (args_spec_list[1]->isa()) { - AbstractScalarPtr scalar_ptr = dyn_cast(args_spec_list[1]); - if (scalar_ptr->BuildValue()->isa()) { - if (scalar_ptr->BuildValue()->cast()->value()) { - return ExpandADim(ret_graph, tensor_node); - } - MS_LOG(EXCEPTION) << "TensorSlice not support the index is False."; - } - shrink_axis_mask = GenerateStridedSliceParametersFromNumber(scalar_ptr, shape, &begin, &end, &strides); - } else if (args_spec_list[1]->isa()) { - ret_graph->set_output(tensor_node); - return ret_graph; - } else if (args_spec_list[1]->isa()) { - return ExpandADim(ret_graph, tensor_node); - } else { - std::ostringstream args_info; - for (const auto &arg : args_spec_list) { - MS_EXCEPTION_IF_NULL(arg); - args_info << arg->ToString() << "\n"; - } - MS_LOG(EXCEPTION) - << "TensorSlice requires the input should be one of [slice, ellipsis, int number, bool, none, tuple] , but got " - << args_info.str(); - } - - auto PrimStridedSliceClass = prim::GetPythonOps("StridedSlice", "mindspore.ops.operations"); - auto PrimStridedSlice = ret_graph->NewCNode({NewValueNode(PrimStridedSliceClass), NewValueNode(0), NewValueNode(0), - NewValueNode(0), NewValueNode(0), NewValueNode(shrink_axis_mask)}); - ret_graph->set_output(ret_graph->NewCNode( - {PrimStridedSlice, tensor_node, NewValueNode(begin), NewValueNode(end), NewValueNode(strides)})); - return ret_graph; -} - FuncGraphPtr TupleGetItemTensor::GenerateFuncGraph(const AbstractBasePtrList &args_spec_list) { // select indexed item // args: tuple of items, index @@ -1135,7 +941,7 @@ FuncGraphPtr TupleGetItemTensor::GenerateFuncGraph(const AbstractBasePtrList &ar AbstractBasePtrList branches = branches_abs->elements(); if (branches.size() > 0 && branches[0] != nullptr && branches[0]->isa()) { FuncGraphPtr ret_graph = std::make_shared(); - ret_graph->set_flags(FUNC_GRAPH_FLAG_CORE, true); + ret_graph->set_flag(FUNC_GRAPH_FLAG_CORE, true); AnfNodePtr functions = ret_graph->add_parameter(); auto index = ret_graph->add_parameter(); @@ -1156,11 +962,6 @@ REGISTER_PYBIND_DEFINE(TupleSlice_, ([](const py::module *m) { .def(py::init()); })); -REGISTER_PYBIND_DEFINE(TensorSlice_, ([](const py::module *m) { - (void)py::class_>(*m, "TensorSlice_") - .def(py::init()); - })); - REGISTER_PYBIND_DEFINE(TupleGetItemTensor_, ([](const py::module *m) { (void)py::class_>( *m, "TupleGetItemTensor_") diff --git a/mindspore/ccsrc/operator/composite/composite.h b/mindspore/ccsrc/operator/composite/composite.h index 0ec8723396..5944c81fb0 100644 --- a/mindspore/ccsrc/operator/composite/composite.h +++ b/mindspore/ccsrc/operator/composite/composite.h @@ -129,7 +129,7 @@ class GradOperation : public MetaFuncGraph { MS_DECLARE_PARENT(GradOperation, MetaFuncGraph) FuncGraphPtr GetGrad(AnfNodePtr ptrNode, const AnfNodePtr &weights, const std::vector &ptrParams, - bool applyJ = false); + const std::vector &args = {}, bool applyJ = false); FuncGraphPtr GenerateFuncGraph(const AbstractBasePtrList &args_spec_list) override; bool sens_param() const { return sens_param_; } bool get_all_; @@ -175,16 +175,6 @@ class TupleSlice : public MetaFuncGraph { }; using TupleSlicePtr = std::shared_ptr; -class TensorSlice : public MetaFuncGraph { - public: - explicit TensorSlice(const std::string &name) : MetaFuncGraph(name) {} - ~TensorSlice() override = default; - MS_DECLARE_PARENT(TensorSlice, MetaFuncGraph) - FuncGraphPtr GenerateFuncGraph(const AbstractBasePtrList &args_spec_list) override; - friend bool operator==(const TensorSlice &lhs, const TensorSlice &rhs) { return lhs.name_ == rhs.name_; } -}; -using TensorSlicePtr = std::shared_ptr; - class TupleGetItemTensor : public MetaFuncGraph { public: explicit TupleGetItemTensor(const std::string &name) : MetaFuncGraph(name) {} diff --git a/mindspore/ccsrc/operator/composite/do_signature.cc b/mindspore/ccsrc/operator/composite/do_signature.cc index 0cc4ee0483..d9bcef3031 100644 --- a/mindspore/ccsrc/operator/composite/do_signature.cc +++ b/mindspore/ccsrc/operator/composite/do_signature.cc @@ -65,55 +65,57 @@ void ProcessDefault(const std::string &func_name, const AbstractBasePtrList &arg } } } -bool CompareTensorScalarType(const TypeId &tensor_type, const size_t &t_type_number, const TypeId &scalar_type, - const size_t &s_type_number) { - if (scalar_type == kNumberTypeFloat16 || scalar_type == kNumberTypeFloat32 || scalar_type == kNumberTypeFloat64) { - if (tensor_type == kNumberTypeFloat16 || tensor_type == kNumberTypeFloat32 || tensor_type == kNumberTypeFloat64) { - return t_type_number >= s_type_number; - } - return false; - } - return true; -} -void setMaxType(TypeId *max_type_id, TypeId *max_type, size_t *max_type_number, const TypeId type_id, const TypeId type, - const size_t type_number) { +void SetMaxType(TypeId *max_type_id, size_t *max_type_number, const TypeId type_id, const size_t type_number) { *max_type_id = type_id; - *max_type = type; *max_type_number = type_number; } -TypeId GetMaxTypeId(const abstract::AbstractBasePtrList &args_spec_list, std::vector indexs, - const std::set &write_indexs) { +bool GetTensorOrScalarTypeInfo(AbstractBasePtr arg_value, bool is_write, TypeId *arg_type_id, + TypeId *arg_type = nullptr) { + if (arg_value->isa()) { + if (is_write) { + arg_value = arg_value->cast()->ref_origin(); + } else { + arg_value = arg_value->cast()->ref(); + } + } + if (arg_value->isa()) { + auto tensor = arg_value->cast(); + auto tensor_type = tensor->element()->BuildType(); + MS_EXCEPTION_IF_NULL(tensor_type); + *arg_type_id = tensor_type->type_id(); + if (arg_type != nullptr) { + *arg_type = kObjectTypeTensorType; + } + return true; + } + if (arg_value->isa()) { + auto scalar = arg_value->cast(); + auto scalar_type = scalar->BuildType(); + MS_EXCEPTION_IF_NULL(scalar_type); + *arg_type_id = scalar_type->type_id(); + if (arg_type != nullptr) { + *arg_type = kObjectTypeNumber; + } + return true; + } + return false; +} + +TypeId GetMaxTypeId(const abstract::AbstractBasePtrList &args_spec_list, std::vector indices, + const std::set &write_indices) { TypeId max_type_id = kTypeUnknown; - TypeId max_type = kTypeUnknown; size_t max_type_number = 0; bool has_int8 = false; - for (const auto &index : indexs) { + for (const auto &index : indices) { TypeId arg_type_id = kTypeUnknown; TypeId arg_type = kTypeUnknown; - AbstractBasePtr arg_value = args_spec_list[index]; - if (arg_value->isa()) { - auto is_write = (write_indexs.find(index) != write_indexs.end()); - if (is_write) { - arg_value = arg_value->cast()->ref_origin(); - } else { - arg_value = arg_value->cast()->ref(); - } + auto is_write = (write_indices.find(index) != write_indices.end()); + if (!GetTensorOrScalarTypeInfo(args_spec_list[index], is_write, &arg_type_id, &arg_type)) { + continue; } - if (arg_value->isa()) { - auto tensor = arg_value->cast(); - auto tensor_type = tensor->element()->BuildType(); - MS_EXCEPTION_IF_NULL(tensor_type); - arg_type_id = tensor_type->type_id(); - arg_type = kObjectTypeTensorType; - } else if (arg_value->isa()) { - auto scalar = arg_value->cast(); - auto scalar_type = scalar->BuildType(); - MS_EXCEPTION_IF_NULL(scalar_type); - arg_type_id = scalar_type->type_id(); - arg_type = kObjectTypeNumber; - } else { + if (arg_type != kObjectTypeTensorType) { continue; } auto it = type_map.find(arg_type_id); @@ -124,24 +126,11 @@ TypeId GetMaxTypeId(const abstract::AbstractBasePtrList &args_spec_list, std::ve has_int8 = true; } if (max_type_id == kTypeUnknown) { - setMaxType(&max_type_id, &max_type, &max_type_number, arg_type_id, arg_type, it->second); + SetMaxType(&max_type_id, &max_type_number, arg_type_id, it->second); continue; } - - if (max_type == arg_type) { - if (it->second > max_type_number) { - setMaxType(&max_type_id, &max_type, &max_type_number, arg_type_id, arg_type, it->second); - } - } else { - if (arg_type == kObjectTypeTensorType) { - if (CompareTensorScalarType(arg_type_id, it->second, max_type_id, max_type_number)) { - setMaxType(&max_type_id, &max_type, &max_type_number, arg_type_id, arg_type, it->second); - } - } else { - if (!CompareTensorScalarType(max_type_id, max_type_number, arg_type_id, it->second)) { - setMaxType(&max_type_id, &max_type, &max_type_number, arg_type_id, arg_type, it->second); - } - } + if (it->second > max_type_number) { + SetMaxType(&max_type_id, &max_type_number, arg_type_id, it->second); } } @@ -154,28 +143,28 @@ TypeId GetMaxTypeId(const abstract::AbstractBasePtrList &args_spec_list, std::ve // Get the largest type of index in the same SignatureEnumDType of arguments. std::map GetMaxDtype(const std::vector &dtypes, const abstract::AbstractBasePtrList &args_spec_list, - const std::set &write_indexs) { + const std::set &write_indices) { // record index for signature.dtypes of the same type // eg. [T, T1, T, T2, T, T1, T3] -> {{T:(0,2,4)}, {T1:(1,5)}, {T2:(3)}, {T3:(6)}} - std::map> type_indexs; + std::map> type_indices; for (size_t i = 0; i < dtypes.size(); ++i) { - auto it = type_indexs.find(dtypes[i]); - if (it == type_indexs.end()) { - (void)type_indexs.insert(std::make_pair(dtypes[i], std::vector{i})); + auto it = type_indices.find(dtypes[i]); + if (it == type_indices.end()) { + (void)type_indices.insert(std::make_pair(dtypes[i], std::vector{i})); } else { it->second.push_back(i); } } std::map dst_type; - for (auto it = type_indexs.begin(); it != type_indexs.end(); (void)++it) { + for (auto it = type_indices.begin(); it != type_indices.end(); (void)++it) { auto type = it->first; - auto indexs = it->second; + auto indices = it->second; // If the number of arguments belonging to the same SignatureEnumDType is less than 2, skip it. - if (indexs.size() < 2) { + if (indices.size() < 2) { continue; } bool has_tensor = false; - for (const auto &index : indexs) { + for (const auto &index : indices) { AbstractBasePtr arg_value = args_spec_list[index]; if (arg_value->isa()) { arg_value = arg_value->cast()->ref(); @@ -189,7 +178,7 @@ std::map GetMaxDtype(const std::vector &signature, const abstract::AbstractBasePtrList &args_spec_list, const FuncGraphPtr &graph, - std::vector *const op_inputs, const std::set &write_indexs) { + std::vector *const op_inputs, const std::set &write_indices) { std::vector dtypes; (void)std::transform(signature.begin(), signature.end(), std::back_inserter(dtypes), [](const Signature &sig) { return sig.dtype; }); @@ -213,54 +202,40 @@ void DoAutoCast(const std::string &func_name, const std::vector &sign return; } // Stat the index of the arguments with the largest type in the same SignatureEnumDType. - std::map dst_type = GetMaxDtype(dtypes, args_spec_list, write_indexs); + std::map dst_type = GetMaxDtype(dtypes, args_spec_list, write_indices); // Identify which arg requires auto cast for (size_t i = 0; i < args_spec_list.size(); ++i) { auto it = dst_type.find(dtypes[i]); if (it == dst_type.end() || it->second == kTypeUnknown) { continue; } - auto rw_it = write_indexs.find(i); - auto is_write = (rw_it != write_indexs.end()); + auto rw_it = write_indices.find(i); + auto is_write = (rw_it != write_indices.end()); - AbstractBasePtr arg_value = args_spec_list[i]; - if (arg_value->isa()) { - if (is_write) { - arg_value = arg_value->cast()->ref_origin(); - } else { - arg_value = arg_value->cast()->ref(); - } - } TypeId arg_type_id = kTypeUnknown; - if (arg_value->isa()) { - auto tensor = arg_value->cast(); - auto tensor_type = tensor->element()->BuildType(); - MS_EXCEPTION_IF_NULL(tensor_type); - arg_type_id = tensor_type->type_id(); - } else if (arg_value->isa()) { - auto scalar = arg_value->cast(); - auto scalar_type = scalar->BuildType(); - MS_EXCEPTION_IF_NULL(scalar_type); - arg_type_id = scalar_type->type_id(); - } - auto it_map = type_map.find(arg_type_id); - if (it_map == type_map.end()) { + AbstractBasePtr arg_value = args_spec_list[i]; + (void)GetTensorOrScalarTypeInfo(arg_value, is_write, &arg_type_id); + auto it_map = type_name_map.find(arg_type_id); + if (it_map == type_name_map.end()) { continue; } if (is_write) { if (arg_type_id != it->second) { - MS_LOG(EXCEPTION) << "In op '" << func_name << "', argument '" << args_spec_list[i] - << "' can not cast type from '" << TypeIdLabel(arg_type_id) << "' to '" - << TypeIdLabel(it->second) << "' automatically."; + auto it_name_map = type_name_map.find(it->second); + if (it_name_map == type_name_map.end()) { + continue; + } + MS_LOG(EXCEPTION) << "In op '" << func_name << "', \n" + << "the type of writable argument is '" << it_map->second << "', " + << "but the largest type in the same SignatureEumDtype is '" << it_name_map->second + << "'. The writable arg type is not equal to the largest type, " + << "so can not cast automatically."; } continue; } if (arg_value->isa() && arg_type_id == it->second) { continue; } - if ((arg_type_id == kNumberTypeBool || it->second == kNumberTypeBool) && arg_type_id != it->second) { - continue; - } (*op_inputs)[i + 1] = DoCast((*op_inputs)[i + 1], it->second, graph); } } @@ -282,12 +257,16 @@ AnfNodePtr BuildNewCNode(const FuncGraphPtr &func_graph, const std::string &func } } std::vector op_inputs; - std::set write_indexs; + std::set write_indices; op_inputs.push_back(NewValueNode(function)); // Assume, the write input of op is always the first input. We check if any write op, // and add cast op on other inputs to keep the same type with assigned parameter. for (size_t i = 0; i < args_spec_list.size(); ++i) { AnfNodePtr param = params_list[i]; + if (args_spec_list[i] == nullptr) { + op_inputs.push_back(param); + continue; + } SignatureEnumRW sig = SignatureEnumRW::kRWDefault; // If sig_size is 0 use defalut. if (sig_size > 0 && i < sig_size) { @@ -295,13 +274,14 @@ AnfNodePtr BuildNewCNode(const FuncGraphPtr &func_graph, const std::string &func } else if (has_var && i >= sig_size) { sig = signature[sig_size - 1].rw; } + TypePtr type = args_spec_list[i]->GetTypeTrack(); if (type && type->type_id() == kObjectTypeRef) { if (sig == SignatureEnumRW::kRWRead) { param = func_graph->NewCNode({NewValueNode(prim::kPrimGetRefValue), param}); } else if (sig == SignatureEnumRW::kRWWrite) { param = func_graph->NewCNode({NewValueNode(prim::kPrimGetRefOrigin), param}); - write_indexs.insert(i); + write_indices.insert(i); } // If sig is SignatureEnumRW::kRWRef, not do anything. } else if (sig == SignatureEnumRW::kRWWrite && type->type_id() != kObjectTypeRefKey) { @@ -311,7 +291,7 @@ AnfNodePtr BuildNewCNode(const FuncGraphPtr &func_graph, const std::string &func } // process default ProcessDefault(func_name, args_spec_list, signature, has_var, &op_inputs); - DoAutoCast(func_name, signature, args_spec_list, func_graph, &op_inputs, write_indexs); + DoAutoCast(func_name, signature, args_spec_list, func_graph, &op_inputs, write_indices); return func_graph->NewCNode(op_inputs); } } // namespace @@ -330,7 +310,7 @@ FuncGraphPtr DoSignatureMetaFuncGraph::GenerateFuncGraph(const AbstractBasePtrLi } auto new_cnode = BuildNewCNode(func_graph, name_, function_, args_spec_list, func_graph->parameters()); func_graph->set_output(new_cnode); - func_graph->set_flags(FUNC_GRAPH_FLAG_CORE, true); + func_graph->set_flag(FUNC_GRAPH_FLAG_CORE, true); return func_graph; } } // namespace prim diff --git a/mindspore/ccsrc/operator/composite/list_append_operation.cc b/mindspore/ccsrc/operator/composite/list_append_operation.cc index b5a4fc626e..236a5b7062 100644 --- a/mindspore/ccsrc/operator/composite/list_append_operation.cc +++ b/mindspore/ccsrc/operator/composite/list_append_operation.cc @@ -35,7 +35,7 @@ FuncGraphPtr ListAppend::GenerateFuncGraph(const abstract::AbstractBasePtrList & MS_EXCEPTION_IF_NULL(arg0_list); FuncGraphPtr ret = std::make_shared(); - ret->set_flags(FUNC_GRAPH_FLAG_CORE, true); + ret->set_flag(FUNC_GRAPH_FLAG_CORE, true); ret->debug_info()->set_name("append"); AnfNodePtr arg0_node = ret->add_parameter(); diff --git a/mindspore/ccsrc/operator/composite/map.cc b/mindspore/ccsrc/operator/composite/map.cc new file mode 100644 index 0000000000..a054da5f4d --- /dev/null +++ b/mindspore/ccsrc/operator/composite/map.cc @@ -0,0 +1,289 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "operator/composite/map.h" +#include +#include +#include +#include + +#include "ir/anf.h" +#include "ir/func_graph.h" +#include "pipeline/static_analysis/abstract_value.h" +#include "pipeline/static_analysis/abstract_function.h" +#include "pipeline/static_analysis/dshape.h" +#include "pybind_api/api_register.h" +#include "debug/trace.h" +#include "operator/ops.h" +#include "./common.h" + +namespace mindspore { +// namespace to support composite operators definition +namespace prim { +using FuncGraphAbstractClosure = mindspore::abstract::FuncGraphAbstractClosure; + +AnfNodePtr Map::FullMakeLeaf(const FuncGraphPtr &func_graph, const AnfNodePtr &fn_arg, const AnfNodePtrList &args) { + MS_LOG(DEBUG) << "Map FullMakeLeaf non recursive.\n"; + MS_EXCEPTION_IF_NULL(func_graph); + std::vector inputs; + if (fn_arg != nullptr) { + inputs.emplace_back(fn_arg); + } else { + inputs.emplace_back(NewValueNode(fn_leaf_)); + } + inputs.insert(inputs.end(), args.begin(), args.end()); + return func_graph->NewCNode(inputs); +} + +FuncGraphPtr Map::GenerateLeafFunc(const size_t &args_size) { + // Generate func for leaf nodes + FuncGraphPtr ptrGraph = std::make_shared(); + ptrGraph->set_flag(FUNC_GRAPH_FLAG_CORE, true); + ptrGraph->set_flag(FUNC_GRAPH_FLAG_SPECIALIZE_PARAMETER, true); + ptrGraph->debug_info()->set_name("map"); + AnfNodePtr ptrFnArg = nullptr; + if (fn_leaf_ == nullptr) { + ptrFnArg = ptrGraph->add_parameter(); + } + AnfNodePtrList args; + for (size_t i = 0; i < args_size; ++i) { + args.emplace_back(ptrGraph->add_parameter()); + } + ptrGraph->set_output(FullMakeLeaf(ptrGraph, ptrFnArg, args)); + return ptrGraph; +} + +AnfNodePtr Map::FullMakeList(const std::shared_ptr &type, const FuncGraphPtr &func_graph, + const AnfNodePtr &fn_arg, const ArgsPairList &arg_pairs) { + MS_EXCEPTION_IF_NULL(func_graph); + MS_EXCEPTION_IF_NULL(type); + + std::size_t size = type->elements().size(); + bool is_not_same = + std::any_of(arg_pairs.begin(), arg_pairs.end(), [size](const std::pair &item) { + auto lhs = std::dynamic_pointer_cast(item.second); + MS_EXCEPTION_IF_NULL(lhs); + return lhs->elements().size() != size; + }); + if (is_not_same) { + MS_LOG(EXCEPTION) << "List in Map should have same length"; + } + + std::vector inputs; + inputs.push_back(NewValueNode(prim::kPrimMakeList)); + + for (int i = 0; i < SizeToInt(size); ++i) { + MS_LOG(DEBUG) << "GenerateLeafFunc for the " << i << "th arg of the target"; + auto ptrGraph = GenerateLeafFunc(arg_pairs.size()); + auto fn = NewValueNode(ptrGraph); + + std::vector inputs2; + inputs2.push_back(fn); + if (fn_arg != nullptr) { + inputs2.push_back(fn_arg); + } + + (void)std::transform( + arg_pairs.begin(), arg_pairs.end(), std::back_inserter(inputs2), + [&func_graph, i](const std::pair &item) { + return func_graph->NewCNode({NewValueNode(prim::kPrimListGetItem), item.first, NewValueNode(i)}); + }); + + inputs.push_back(func_graph->NewCNode(inputs2)); + } + return func_graph->NewCNode(inputs); +} + +AnfNodePtr Map::FullMakeTuple(const std::shared_ptr &type, const FuncGraphPtr &func_graph, + const AnfNodePtr &fn_arg, const ArgsPairList &arg_pairs) { + MS_EXCEPTION_IF_NULL(func_graph); + MS_EXCEPTION_IF_NULL(type); + + std::size_t size = type->elements().size(); + bool is_not_same = + std::any_of(arg_pairs.begin(), arg_pairs.end(), [size](const std::pair &item) { + auto lhs = std::dynamic_pointer_cast(item.second); + MS_EXCEPTION_IF_NULL(lhs); + return lhs->elements().size() != size; + }); + if (is_not_same) { + MS_LOG(EXCEPTION) << "tuple in Map should have same length"; + } + + std::vector inputs; + inputs.push_back(NewValueNode(prim::kPrimMakeTuple)); + + for (int i = 0; i < SizeToInt(size); ++i) { + MS_LOG(DEBUG) << "GenerateLeafFunc for the " << i << "th arg of the tuple inputs"; + auto ptrGraph = GenerateLeafFunc(arg_pairs.size()); + auto fn = NewValueNode(ptrGraph); + + std::vector inputs2; + inputs2.push_back(fn); + if (fn_arg != nullptr) { + inputs2.push_back(fn_arg); + } + + (void)std::transform( + arg_pairs.begin(), arg_pairs.end(), std::back_inserter(inputs2), + [&func_graph, &i](std::pair item) { + return func_graph->NewCNode({NewValueNode(prim::kPrimTupleGetItem), item.first, NewValueNode(i)}); + }); + + inputs.push_back(func_graph->NewCNode(inputs2)); + } + return func_graph->NewCNode(inputs); +} + +AnfNodePtr Map::FullMakeClass(const std::shared_ptr &type, const FuncGraphPtr &func_graph, + const AnfNodePtr &fn_arg, const ArgsPairList &arg_pairs) { + MS_EXCEPTION_IF_NULL(type); + MS_EXCEPTION_IF_NULL(func_graph); + + std::vector inputs; + inputs.push_back(NewValueNode(prim::kPrimMakeRecord)); + inputs.push_back(NewValueNode(type)); + + std::size_t attrSize = type->GetAttributes().size(); + for (std::size_t i = 0; i < attrSize; ++i) { + MS_LOG(DEBUG) << "GenerateLeafFunc for the " << i << "th element of the inputs"; + auto ptrGraph = GenerateLeafFunc(arg_pairs.size()); + auto fn = NewValueNode(ptrGraph); + + std::vector inputs2; + inputs2.push_back(fn); + if (fn_arg != nullptr) { + inputs2.push_back(fn_arg); + } + + int j = 0; + for (auto item : arg_pairs) { + inputs2.push_back(func_graph->NewCNode({NewValueNode(prim::kPrimGetAttr), item.first, NewValueNode(j)})); + j++; + } + + inputs.push_back(func_graph->NewCNode(inputs2)); + } + return func_graph->NewCNode(inputs); +} + +AnfNodePtr Map::Make(const FuncGraphPtr &func_graph, const AnfNodePtr &fn_arg, const ArgsPairList &arg_pairs) { + bool found = false; + TypeId id = kObjectTypeEnd; + std::pair pair; + for (auto &item : arg_pairs) { + pair = item; + MS_LOG(DEBUG) << "Map " << pair.second->ToString(); + id = item.second->type_id(); + if (nonleaf_.count(id)) { + found = true; + break; + } + } + + if (found) { + // In a nonleaf situation, all arguments must have the same generic. + bool is_not_same = + std::any_of(arg_pairs.begin(), arg_pairs.end(), [pair](const std::pair &item) { + if (item.first != pair.first) { + return item.second->type_id() != pair.second->type_id(); + } + return false; + }); + if (is_not_same) { + std::ostringstream oss; + oss << "There are " << arg_pairs.size() << " inputs of `" << name_ << "`, corresponding type info:\n" + << trace::GetDebugInfo(func_graph->debug_info()) << "\n"; + int idx = 0; + for (auto &item : arg_pairs) { + oss << ++idx << ": " << item.second->ToString() << "\n"; + } + MS_LOG(EXCEPTION) << "Map cannot match up all input types of arguments.\n" + << oss.str() << pair.second->ToString() << "\n"; + } + } + + switch (id) { + case kObjectTypeList: { + auto type = std::static_pointer_cast(pair.second); + return FullMakeList(type, func_graph, fn_arg, arg_pairs); + } + case kObjectTypeTuple: { + auto type = std::static_pointer_cast(pair.second); + return FullMakeTuple(type, func_graph, fn_arg, arg_pairs); + } + case kObjectTypeClass: { + auto type = std::static_pointer_cast(pair.second); + return FullMakeClass(type, func_graph, fn_arg, arg_pairs); + } + default: + MS_LOG(EXCEPTION) << "Map can only be applied to list, tuple and class " + << ", but got " << pair.second->ToString(); + } +} + +FuncGraphPtr Map::GenerateFromTypes(const TypePtrList &args_spec_list) { + FuncGraphPtr ptrGraph = std::make_shared(); + ptrGraph->set_flag(FUNC_GRAPH_FLAG_CORE, true); + ptrGraph->set_flag(FUNC_GRAPH_FLAG_SPECIALIZE_PARAMETER, true); + ptrGraph->debug_info()->set_name("map"); + + AnfNodePtr ptrFnArg = nullptr; + std::size_t i = 0; + if (fn_leaf_ == nullptr) { + ptrFnArg = ptrGraph->add_parameter(); + i = 1; + } + ArgsPairList arg_pairs; + std::size_t size = args_spec_list.size(); + for (; i < size; ++i) { + MS_LOG(DEBUG) << "GenerateFromTypes for elements from " << args_spec_list[i]->ToString(); + arg_pairs.push_back(std::make_pair(ptrGraph->add_parameter(), args_spec_list[i])); + } + + ptrGraph->set_output(Make(ptrGraph, ptrFnArg, arg_pairs)); + return ptrGraph; +} + +abstract::AbstractBasePtrList Map::NormalizeArgs(const AbstractBasePtrList &args_spec_list) const { + if (fn_leaf_ == nullptr) { + MS_EXCEPTION_IF_NULL(args_spec_list[0]); + // Assert that map's function param does not contain free variables + if (args_spec_list[0]->isa()) { + auto graph_func = dyn_cast(args_spec_list[0]); + auto func_graph = graph_func->func_graph(); + if (func_graph->parent() != nullptr) { + MS_LOG(EXCEPTION) << "Map don't support Closure with free variable yet."; + } + } + } + + AbstractBasePtrList broadened; + (void)std::transform(args_spec_list.begin(), args_spec_list.end(), std::back_inserter(broadened), + [](const AbstractBasePtr &arg) -> AbstractBasePtr { + MS_EXCEPTION_IF_NULL(arg); + return arg->Broaden(); + }); + return broadened; +} + +REGISTER_PYBIND_DEFINE(Map_, ([](const py::module *m) { + (void)py::class_>(*m, "Map_") + .def(py::init>(), py::arg("leaf")) + .def(py::init<>()); + })); +} // namespace prim +} // namespace mindspore diff --git a/mindspore/ccsrc/operator/composite/map.h b/mindspore/ccsrc/operator/composite/map.h new file mode 100644 index 0000000000..02d374214a --- /dev/null +++ b/mindspore/ccsrc/operator/composite/map.h @@ -0,0 +1,98 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_OPERATOR_COMPOSITE_MAP_H_ +#define MINDSPORE_CCSRC_OPERATOR_COMPOSITE_MAP_H_ + +#include +#include +#include +#include + +#include "ir/dtype.h" +#include "ir/meta_func_graph.h" +#include "operator/composite/multitype_funcgraph.h" + +namespace mindspore { +// namespace to support composite operators definition +namespace prim { +using ArgsPairList = std::vector>; + +class Map : public MetaFuncGraph { + public: + explicit Map(const std::shared_ptr &fn_leaf = nullptr) + : MetaFuncGraph("map"), + fn_leaf_(fn_leaf), + broadcast_(false), + nonleaf_({kObjectTypeList, kObjectTypeTuple, kObjectTypeClass}) { + Init(); + } + Map(const Map &h) : MetaFuncGraph("map"), fn_leaf_(h.fn_leaf_), broadcast_(h.broadcast_), nonleaf_(h.nonleaf_) { + Init(); + } + Map &operator=(const Map &h) { + if (this != &h) { + fn_leaf_ = h.fn_leaf_; + broadcast_ = h.broadcast_; + nonleaf_ = h.nonleaf_; + if (fn_leaf_) { + name_ = "map[" + fn_leaf_->name() + "]"; + } + } + return *this; + } + ~Map() override = default; + MS_DECLARE_PARENT(Map, MetaFuncGraph) + abstract::AbstractBasePtrList NormalizeArgs(const abstract::AbstractBasePtrList &args_spec_list) const override; + FuncGraphPtr GenerateFromTypes(const TypePtrList &args_spec_list) override; + MetaFuncGraphPtr GetFnLeaf() { return fn_leaf_; } + + private: + FuncGraphPtr GenerateLeafFunc(const size_t &args_size); + AnfNodePtr FullMakeLeaf(const FuncGraphPtr &func_graph, const AnfNodePtr &fn_arg, const AnfNodePtrList &args); + AnfNodePtr FullMakeList(const std::shared_ptr &type, const FuncGraphPtr &func_graph, const AnfNodePtr &fn_arg, + const ArgsPairList &arg_pairs); + AnfNodePtr FullMakeTuple(const std::shared_ptr &type, const FuncGraphPtr &func_graph, const AnfNodePtr &fn_arg, + const ArgsPairList &arg_pairs); + AnfNodePtr FullMakeClass(const std::shared_ptr &type, const FuncGraphPtr &func_graph, const AnfNodePtr &fn_arg, + const ArgsPairList &arg_pairs); + AnfNodePtr Make(const FuncGraphPtr &graph, const AnfNodePtr &fn_arg, const ArgsPairList &arg_pairs); + void Init() { + if (fn_leaf_ != nullptr) { + name_ = "map[" + fn_leaf_->name() + "]"; + } + signatures_ = + // def map(func:read, *args:ref): + std::vector({{"func", SignatureEnumRW::kRWRead, SignatureEnumKind::kKindDefault}, + {"args", SignatureEnumRW::kRWRef, SignatureEnumKind::kKindVarPositional}}); + } + + MultitypeFuncGraphPtr fn_leaf_; + bool broadcast_; + std::set nonleaf_; +}; +using MapPtr = std::shared_ptr; +class MapPy : public Map { + public: + explicit MapPy(const std::shared_ptr &fn_leaf = nullptr) : Map(fn_leaf) {} + ~MapPy() override = default; + MS_DECLARE_PARENT(MapPy, Map) +}; +using MapPyPtr = std::shared_ptr; +} // namespace prim +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_OPERATOR_COMPOSITE_MAP_H_ diff --git a/mindspore/ccsrc/operator/composite/multitype_funcgraph.cc b/mindspore/ccsrc/operator/composite/multitype_funcgraph.cc index e3957d044a..88b3134508 100644 --- a/mindspore/ccsrc/operator/composite/multitype_funcgraph.cc +++ b/mindspore/ccsrc/operator/composite/multitype_funcgraph.cc @@ -39,7 +39,6 @@ namespace mindspore { // namespace to support composite operators definition namespace prim { - MultitypeFuncGraph::MultitypeFuncGraph(const std::string &name) : MetaFuncGraph(name) { fn_cache_.clear(); signatures_ = std::vector({// def multitype(*args:ref): @@ -148,6 +147,5 @@ REGISTER_PYBIND_DEFINE(MultitypeFuncGraph_, ([](const py::module *m) { .def(py::init()) .def("register_fn", &MultitypeFuncGraph::PyRegister); })); - } // namespace prim } // namespace mindspore diff --git a/mindspore/ccsrc/operator/composite/multitype_funcgraph.h b/mindspore/ccsrc/operator/composite/multitype_funcgraph.h index b38625d62c..feb38f17ba 100644 --- a/mindspore/ccsrc/operator/composite/multitype_funcgraph.h +++ b/mindspore/ccsrc/operator/composite/multitype_funcgraph.h @@ -34,7 +34,6 @@ namespace mindspore { // namespace to support composite operators definition namespace prim { - class MultitypeFuncGraph : public MetaFuncGraph { public: explicit MultitypeFuncGraph(const std::string &name); @@ -59,7 +58,6 @@ class MultitypeFuncGraph : public MetaFuncGraph { std::unordered_map fn_cache_py_; }; using MultitypeFuncGraphPtr = std::shared_ptr; - } // namespace prim } // namespace mindspore diff --git a/mindspore/ccsrc/operator/composite/unpack_call.cc b/mindspore/ccsrc/operator/composite/unpack_call.cc index 6363d495c5..3993d41597 100644 --- a/mindspore/ccsrc/operator/composite/unpack_call.cc +++ b/mindspore/ccsrc/operator/composite/unpack_call.cc @@ -51,7 +51,7 @@ FuncGraphPtr UnpackCall::GenerateFuncGraph(const AbstractBasePtrList &args_spec_ (void)abstract::CheckArg(op_name, args_spec_list, 0); auto ret_graph = std::make_shared(); - ret_graph->set_flags(FUNC_GRAPH_FLAG_CORE, true); + ret_graph->set_flag(FUNC_GRAPH_FLAG_CORE, true); AnfNodePtr fnNode = ret_graph->add_parameter(); std::vector elems; diff --git a/mindspore/ccsrc/operator/composite/zip_operation.cc b/mindspore/ccsrc/operator/composite/zip_operation.cc index 4d34163f28..33e21da044 100644 --- a/mindspore/ccsrc/operator/composite/zip_operation.cc +++ b/mindspore/ccsrc/operator/composite/zip_operation.cc @@ -57,7 +57,7 @@ FuncGraphPtr ZipOperation::GenerateFuncGraph(const AbstractBasePtrList &args_spe return (x->cast()->size() < y->cast()->size()); }); FuncGraphPtr ret_graph = std::make_shared(); - ret_graph->set_flags(FUNC_GRAPH_FLAG_CORE, true); + ret_graph->set_flag(FUNC_GRAPH_FLAG_CORE, true); for (size_t idx = 0; idx < args_spec_list.size(); idx++) { (void)ret_graph->add_parameter(); } diff --git a/mindspore/ccsrc/operator/ops.cc b/mindspore/ccsrc/operator/ops.cc index 36bbe1a650..f86cbd7fd2 100755 --- a/mindspore/ccsrc/operator/ops.cc +++ b/mindspore/ccsrc/operator/ops.cc @@ -50,6 +50,12 @@ const PrimitivePtr kPrimBoolNot = std::make_shared("bool_not"); const PrimitivePtr kPrimBoolAnd = std::make_shared("bool_and"); const PrimitivePtr kPrimBoolOr = std::make_shared("bool_or"); const PrimitivePtr kPrimBoolEq = std::make_shared("bool_eq"); +const PrimitivePtr kPrimGreater = std::make_shared("Greater"); +const PrimitivePtr kPrimGreaterEqual = std::make_shared("GreaterEqual"); +const PrimitivePtr kPrimLess = std::make_shared("Less"); +const PrimitivePtr kPrimLessEqual = std::make_shared("LessEqual"); +const PrimitivePtr kPrimEqual = std::make_shared("Equal"); +const PrimitivePtr kPrimNotEqual = std::make_shared("NotEqual"); // Type introspection const PrimitivePtr kPrimTypeOf = std::make_shared("typeof"); @@ -133,6 +139,8 @@ const PrimitivePtr kPrimConcat = std::make_shared("Concat"); const PrimitivePtr kPrimSqueeze = std::make_shared("Squeeze"); const PrimitivePtr kPrimTranspose = std::make_shared("Transpose"); const PrimitivePtr kPrimGatherV2 = std::make_shared("GatherV2"); +const PrimitivePtr kPrimEmbeddingLookup = std::make_shared("EmbeddingLookup"); +const PrimitivePtr kPrimEmbeddingLookupCommGrad = std::make_shared("EmbeddingLookupCommGrad"); const PrimitivePtr kPrimSize = std::make_shared("Size"); const PrimitivePtr kPrimArgMax = std::make_shared("Argmax"); const PrimitivePtr kPrimPack = std::make_shared("Pack"); @@ -145,6 +153,7 @@ const PrimitivePtr kPrimAddN = std::make_shared("AddN"); const PrimitivePtr KPrimTransData = std::make_shared("TransData"); const PrimitivePtr kPrimNMSWithMask = std::make_shared("NMSWithMask"); const PrimitivePtr kPrimPad = std::make_shared("Pad"); +const PrimitivePtr kPrimArgMaxWithValue = std::make_shared("ArgMaxWithValue"); // Maths const PrimitivePtr kPrimTensorAdd = std::make_shared("TensorAdd"); @@ -163,14 +172,20 @@ const PrimitivePtr kPrimMul = std::make_shared("Mul"); const PrimitivePtr kPrimMinimum = std::make_shared("Minimum"); const PrimitivePtr kPrimMaximum = std::make_shared("Maximum"); const PrimitivePtr kPrimSquare = std::make_shared("Square"); -const PrimitivePtr kPrimEqual = std::make_shared("Equal"); -const PrimitivePtr kPrimLess = std::make_shared("Less"); -const PrimitivePtr kPrimLessEqual = std::make_shared("LessEqual"); const PrimitivePtr kPrimCumSum = std::make_shared("CumSum"); const PrimitivePtr kPrimCumProd = std::make_shared("CumProd"); +const PrimitivePtr kPrimSubscalar = std::make_shared("Subscalar"); +const PrimitivePtr kPrimInplaceAdd = std::make_shared("InplaceAdd"); +const PrimitivePtr kPrimInplaceSub = std::make_shared("InplaceSub"); +const PrimitivePtr kPrimPow = std::make_shared("Pow"); +const PrimitivePtr kPrimRealDiv = std::make_shared("RealDiv"); +const PrimitivePtr kPrimSqrt = std::make_shared("Sqrt"); +const PrimitivePtr kPrimReciprocal = std::make_shared("Reciprocal"); +const PrimitivePtr kPrimExpandDims = std::make_shared("ExpandDims"); // NN const PrimitivePtr kPrimFlatten = std::make_shared("Flatten"); +const PrimitivePtr kPrimSoftmax = std::make_shared("Softmax"); const PrimitivePtr kPrimLogSoftmax = std::make_shared("LogSoftmax"); const PrimitivePtr kPrimLogSoftmaxGrad = std::make_shared("LogSoftmaxGrad"); const PrimitivePtr kPrimTanh = std::make_shared("Tanh"); @@ -205,18 +220,21 @@ const PrimitivePtr kPrimLayerNormGrad = std::make_shared("LayerNormGr const PrimitivePtr kPrimLayerNormXBackprop = std::make_shared("LayerNormXBackprop"); const PrimitivePtr kPrimLayerNormBetaGammaBackprop = std::make_shared("LayerNormBetaGammaBackprop"); const PrimitivePtr kPrimDropoutGenMask = std::make_shared("DropoutGenMask"); +const PrimitivePtr kPrimDropoutDoMask = std::make_shared("DropoutDoMask"); const PrimitivePtr kPrimOneHot = std::make_shared("OneHot"); const PrimitivePtr kPrimGelu = std::make_shared("Gelu"); const PrimitivePtr kPrimGeluGrad = std::make_shared("GeluGrad"); const PrimitivePtr kPrimRelu = std::make_shared("ReLU"); const PrimitivePtr kPrimReluV2 = std::make_shared("ReLUV2"); -const PrimitivePtr kPrimZerosLikeTensor = std::make_shared("zeros_like_tensor"); +const PrimitivePtr kPrimZerosLike = std::make_shared("ZerosLike"); const PrimitivePtr kPrimFakeBprop = std::make_shared("fake_bprop"); const PrimitivePtr kPrimBpropCut = std::make_shared("bprop_cut"); +const PrimitivePtr kPrimFakeQuantPerLayer = std::make_shared("FakeQuantPerLayer"); +const PrimitivePtr kPrimFakeQuantPerChannel = std::make_shared("FakeQuantPerChannel"); // Other miscellaneous const PrimitivePtr kPrimIdentity = std::make_shared("identity"); -const PrimitivePtr kPrimPartial = std::make_shared("partial"); +const PrimitivePtr kPrimPartial = std::make_shared("Partial"); const PrimitivePtr kPrimJ = std::make_shared("J"); const PrimitivePtr kPrimEnvSetItem = std::make_shared("env_setitem"); const PrimitivePtr kPrimEnvGetItem = std::make_shared("env_getitem"); @@ -233,7 +251,7 @@ const PrimitivePtr kPrimCheckBprop = std::make_shared("CheckBprop"); const PrimitivePtr kPrimPrint = std::make_shared("Print"); const PrimitivePtr kPrimMakeRef = std::make_shared("make_ref"); -const PrimitivePtr kPrimDepend = std::make_shared("depend"); +const PrimitivePtr kPrimDepend = std::make_shared("Depend"); const PrimitivePtr kPrimStateSetItem = std::make_shared("state_setitem"); const PrimitivePtr kPrimBroadcastGradientArgs = std::make_shared("BroadcastGradientArgs"); @@ -242,11 +260,15 @@ const PrimitivePtr kPrimIs_ = std::make_shared("is_"); const PrimitivePtr kPrimIsNot = std::make_shared("is_not"); const PrimitivePtr kPrimInDict = std::make_shared("in_dict"); const PrimitivePtr kPrimNotInDict = std::make_shared("not_in_dict"); +const PrimitivePtr kPrimMixedPrecisionCast = std::make_shared("mixed_precision_cast"); +const PrimitivePtr kPrimIsConsant = std::make_shared("is_constant"); +const PrimitivePtr kPrimEquivFormat = std::make_shared("EquivFormat"); // Comm ops const PrimitivePtr kPrimMirror = std::make_shared("_MirrorOperator"); const PrimitivePtr kPrimVirtualDiv = std::make_shared("_VirtualDiv"); const PrimitivePtr kPrimVirtualDataset = std::make_shared("_VirtualDataset"); +const PrimitivePtr kPrimAllReduce = std::make_shared("AllReduce"); // Debug ops const PrimitivePtr kPrimScalarSummary = std::make_shared("ScalarSummary"); diff --git a/mindspore/ccsrc/operator/ops.h b/mindspore/ccsrc/operator/ops.h index 03527f7be2..65327cf407 100755 --- a/mindspore/ccsrc/operator/ops.h +++ b/mindspore/ccsrc/operator/ops.h @@ -27,7 +27,8 @@ namespace mindspore { // namespace to support primitive operators namespace prim { ValuePtr GetPythonOps(const std::string &op_name, - const std::string &module_name = "mindspore._extends.parse.standard_method"); + const std::string &module_name = "mindspore._extends.parse.standard_method", + bool use_signature = false); // Arithmetic extern const PrimitivePtr kPrimScalarAdd; @@ -58,6 +59,12 @@ extern const PrimitivePtr kPrimBoolNot; extern const PrimitivePtr kPrimBoolAnd; extern const PrimitivePtr kPrimBoolOr; extern const PrimitivePtr kPrimBoolEq; +extern const PrimitivePtr kPrimGreater; +extern const PrimitivePtr kPrimGreaterEqual; +extern const PrimitivePtr kPrimLess; +extern const PrimitivePtr kPrimLessEqual; +extern const PrimitivePtr kPrimEqual; +extern const PrimitivePtr kPrimNotEqual; // Type introspection extern const PrimitivePtr kPrimTypeOf; @@ -140,6 +147,8 @@ extern const PrimitivePtr kPrimConcat; extern const PrimitivePtr kPrimSqueeze; extern const PrimitivePtr kPrimTranspose; extern const PrimitivePtr kPrimGatherV2; +extern const PrimitivePtr kPrimEmbeddingLookup; +extern const PrimitivePtr kPrimEmbeddingLookupCommGrad; extern const PrimitivePtr kPrimSize; extern const PrimitivePtr kPrimArgMax; extern const PrimitivePtr kPrimPack; @@ -153,6 +162,11 @@ extern const PrimitivePtr kPrimAddN; extern const PrimitivePtr KPrimTransData; extern const PrimitivePtr kPrimNMSWithMask; extern const PrimitivePtr kPrimPad; +extern const PrimitivePtr kPrimArgMaxWithValue; +extern const PrimitivePtr kPrimRealDiv; +extern const PrimitivePtr kPrimSqrt; +extern const PrimitivePtr kPrimReciprocal; +extern const PrimitivePtr kPrimExpandDims; // Maths extern const PrimitivePtr kPrimTensorAdd; @@ -176,9 +190,14 @@ extern const PrimitivePtr kPrimLess; extern const PrimitivePtr kPrimLessEqual; extern const PrimitivePtr kPrimCumSum; extern const PrimitivePtr kPrimCumProd; +extern const PrimitivePtr kPrimSubscalar; +extern const PrimitivePtr kPrimInplaceAdd; +extern const PrimitivePtr kPrimInplaceSub; +extern const PrimitivePtr kPrimPow; // NN extern const PrimitivePtr kPrimFlatten; +extern const PrimitivePtr kPrimSoftmax; extern const PrimitivePtr kPrimLogSoftmax; extern const PrimitivePtr kPrimLogSoftmaxGrad; extern const PrimitivePtr kPrimApplyCenteredRMSProp; @@ -211,15 +230,18 @@ extern const PrimitivePtr kPrimLayerNormGrad; extern const PrimitivePtr kPrimLayerNormXBackprop; extern const PrimitivePtr kPrimLayerNormBetaGammaBackprop; extern const PrimitivePtr kPrimDropoutGenMask; +extern const PrimitivePtr kPrimDropoutDoMask; extern const PrimitivePtr kPrimOneHot; extern const PrimitivePtr kPrimGelu; extern const PrimitivePtr kPrimGeluGrad; extern const PrimitivePtr kPrimRelu; extern const PrimitivePtr kPrimReluV2; extern const PrimitivePtr kPrimActivation; -extern const PrimitivePtr kPrimZerosLikeTensor; +extern const PrimitivePtr kPrimZerosLike; extern const PrimitivePtr kPrimFakeBprop; extern const PrimitivePtr kPrimBpropCut; +extern const PrimitivePtr kPrimFakeQuantPerLayer; +extern const PrimitivePtr kPrimFakeQuantPerChannel; // Other Miscellaneous extern const PrimitivePtr kPrimIdentity; @@ -251,8 +273,12 @@ extern const PrimitivePtr kPrimIs_; extern const PrimitivePtr kPrimIsNot; extern const PrimitivePtr kPrimInDict; extern const PrimitivePtr kPrimNotInDict; +extern const PrimitivePtr kPrimMixedPrecisionCast; +extern const PrimitivePtr kPrimIsConsant; +extern const PrimitivePtr kPrimEquivFormat; // Comm ops +extern const PrimitivePtr kPrimAllReduce; extern const PrimitivePtr kPrimMirror; extern const PrimitivePtr kPrimVirtualDiv; extern const PrimitivePtr kPrimVirtualDataset; diff --git a/mindspore/ccsrc/operator/ops_extends.cc b/mindspore/ccsrc/operator/ops_extends.cc index 6a192eca10..d415b45adf 100755 --- a/mindspore/ccsrc/operator/ops_extends.cc +++ b/mindspore/ccsrc/operator/ops_extends.cc @@ -23,10 +23,10 @@ namespace mindspore { // namespace to support primitive operators namespace prim { -ValuePtr GetPythonOps(const std::string &op_name, const std::string &module_name) { +ValuePtr GetPythonOps(const std::string &op_name, const std::string &module_name, bool use_signature) { py::object obj = parse::python_adapter::GetPyFn(module_name, op_name); ValuePtr node = nullptr; - bool succ = parse::ConvertData(obj, &node); + bool succ = parse::ConvertData(obj, &node, use_signature); if (!succ) { MS_LOG(EXCEPTION) << "get Python op " << op_name << " from " << module_name << " fail"; } diff --git a/mindspore/ccsrc/operator/prim_nn.cc b/mindspore/ccsrc/operator/prim_nn.cc index d057fd925d..d9a0071757 100644 --- a/mindspore/ccsrc/operator/prim_nn.cc +++ b/mindspore/ccsrc/operator/prim_nn.cc @@ -271,8 +271,8 @@ AbstractBasePtr InferImplRelu(const AnalysisEnginePtr &, const PrimitivePtr &pri return args_spec_list[0]->Broaden(); } -AbstractBasePtr InferImplZerosLikeTensor(const AnalysisEnginePtr &, const PrimitivePtr &primitive, - const AbstractBasePtrList &args_spec_list) { +AbstractBasePtr InferImplZerosLike(const AnalysisEnginePtr &, const PrimitivePtr &primitive, + const AbstractBasePtrList &args_spec_list) { // Inputs: a tensor. CheckArgsSize(primitive->name(), args_spec_list, 1); return args_spec_list[0]->Broaden(); diff --git a/mindspore/ccsrc/operator/prim_others.cc b/mindspore/ccsrc/operator/prim_others.cc index b8e89378e6..432b12f83b 100644 --- a/mindspore/ccsrc/operator/prim_others.cc +++ b/mindspore/ccsrc/operator/prim_others.cc @@ -14,9 +14,14 @@ * limitations under the License. */ +#include +#include + +#include "ir/dtype.h" +#include "common/utils.h" +#include "operator/ops.h" #include "pipeline/static_analysis/param_validator.h" #include "pipeline/static_analysis/prim.h" -#include "operator/ops.h" #include "pipeline/static_analysis/utils.h" #include "utils/symbolic.h" @@ -50,6 +55,81 @@ AbstractBasePtr InferImplJ(const AnalysisEnginePtr &, const PrimitivePtr &primit return AbstractFunction::MakeAbstractFunction(jv); } +class UndeterminedShapeType { + public: + explicit UndeterminedShapeType(const std::string &env_str) { + // param_name indices_shape indices_type values_shape values_type dense_shape + // export UNDETERMINED_SPARSE_SHAPE_TYPES="sparse_key_w1:2:Int32:2 1 2:Float32:3 1 2;sparse_key_w2:2:Int32:2 1 + // 2:Float32:3 1 2" + std::vector fields; + string tmp; + std::stringstream input(env_str); + while (std::getline(input, tmp, ':')) { + fields.push_back(tmp); + } + if (fields.size() != fields_num) { + MS_LOG(EXCEPTION) << "Expect " << fields_num << " fields, but got " << fields.size(); + } + + param_name_ = fields[0]; + + indices_shape_ = GetShape(fields[1]); + indices_type_ = StringToType(fields[2]); + + values_shape_ = GetShape(fields[3]); + values_type_ = StringToType(fields[4]); + + auto dense_shape_vec = GetShape(fields[5]); + AbstractBasePtrList dense_shape_list; + (void)std::transform(dense_shape_vec.begin(), dense_shape_vec.end(), std::back_inserter(dense_shape_list), + [](const auto &elem) { return FromValue(elem, false); }); + dense_shape_ = dense_shape_list; + } + ~UndeterminedShapeType() = default; + const std::string ¶m_name() { return param_name_; } + const std::vector &indices_shape() { return indices_shape_; } + const TypePtr &indices_type() { return indices_type_; } + const std::vector &values_shape() { return values_shape_; } + const TypePtr &values_type() { return values_type_; } + const AbstractBasePtrList &dense_shape() { return dense_shape_; } + + private: + std::string param_name_; + std::vector indices_shape_; + TypePtr indices_type_; + std::vector values_shape_; + TypePtr values_type_; + AbstractBasePtrList dense_shape_; + static const size_t fields_num; + + std::vector GetShape(const std::string &shape_str); +}; +std::vector UndeterminedShapeType::GetShape(const std::string &shape_str) { + std::vector ret; + std::istringstream iss(shape_str); + int elem; + while (iss.good()) { + iss >> elem; + ret.emplace_back(elem); + } + return ret; +} +const size_t UndeterminedShapeType::fields_num = 6; + +std::unordered_map g_undetermined_configs; +void InitUndeterminedFromEnv(const std::string &sparse_shape_types) { + if (!g_undetermined_configs.empty()) { + return; + } + std::string tmp; + std::stringstream input(sparse_shape_types); + while (std::getline(input, tmp, ';')) { + auto config = UndeterminedShapeType(tmp); + g_undetermined_configs.insert(std::make_pair(config.param_name(), config)); + MS_LOG(DEBUG) << "Undetermined config from env: " << tmp; + } +} + AbstractBasePtr InferImplEnvGetItem(const AnalysisEnginePtr &, const PrimitivePtr &primitive, const AbstractBasePtrList &args_spec_list) { MS_EXCEPTION_IF_NULL(primitive); @@ -62,6 +142,37 @@ AbstractBasePtr InferImplEnvGetItem(const AnalysisEnginePtr &, const PrimitivePt if (type->type_id() != kObjectTypeSymbolicKeyType) { MS_LOG(EXCEPTION) << "EnvGetItem evaluator args[1] should be a SymbolicKeyInstance but: " << key->ToString(); } + + if (!key->sparse_grad().empty()) { + // Will be fixed once undetermined type ready + auto sparse_shape_types = common::GetEnv("UNDETERMINED_SPARSE_SHAPE_TYPES"); + if (sparse_shape_types.empty()) { + sparse_shape_types = "sparse_key_w1:2:Int32:2 1 2:Float32:3 1 2;sparse_key_w2:2:Int32:2 1 2:Float32:3 1 2"; + } + InitUndeterminedFromEnv(sparse_shape_types); + + auto shape_types = g_undetermined_configs.find(key->sparse_grad()); + if (shape_types == g_undetermined_configs.end()) { + MS_LOG(EXCEPTION) << "Param " << key->ToString() + << " has sparse_grad, but shape/type is not configured in env UNDETERMINED_SPARSE_SHAPE_TYPES: " + << sparse_shape_types; + } + MS_LOG(DEBUG) << "EnvGetItem is sparse_grad " << key->ToString(); + AbstractBasePtrList sparse_list; + // indices + auto indices_ele = std::make_shared(kAnyValue, shape_types->second.indices_type()); + auto indices = + std::make_shared(indices_ele, std::make_shared(shape_types->second.indices_shape())); + sparse_list.emplace_back(indices); + // values + auto dout_ele = std::make_shared(kAnyValue, shape_types->second.values_type()); + auto dout = std::make_shared(dout_ele, std::make_shared(shape_types->second.values_shape())); + sparse_list.emplace_back(dout); + // dense_shape + sparse_list.emplace_back(std::make_shared(shape_types->second.dense_shape())); + return std::make_shared(sparse_list); + } + if (!key->GetValueTrack()->isa()) { return dflt; } @@ -80,8 +191,6 @@ AbstractBasePtr InferImplEnvSetItem(const AnalysisEnginePtr &, const PrimitivePt CheckArgsSize(primitive->name(), args_spec_list, 3); auto key = args_spec_list[1]; - auto value = args_spec_list[2]; - ValuePtr key_value_ptr = key->GetValueTrack(); MS_EXCEPTION_IF_NULL(key_value_ptr); auto key_value_track = key_value_ptr->cast(); @@ -91,7 +200,6 @@ AbstractBasePtr InferImplEnvSetItem(const AnalysisEnginePtr &, const PrimitivePt } auto expected = key_value_track->abstract(); MS_EXCEPTION_IF_NULL(expected); - (void)expected->Join(value); return std::make_shared(kAnyValue, std::make_shared()); } @@ -126,7 +234,9 @@ AbstractBasePtr InferImplMakeRef(const AnalysisEnginePtr &, const PrimitivePtr & if (type->type_id() != kObjectTypeRefKey) { MS_LOG(EXCEPTION) << "First input of make_ref should be a RefKey but a " << type->ToString(); } - return std::make_shared(args_spec_list[0], args_spec_list[1], args_spec_list[2]); + auto ret = std::make_shared(args_spec_list[0], args_spec_list[1], args_spec_list[2]); + ret->set_sparse_grad(args_spec_list[2]->sparse_grad()); + return ret; } AbstractBasePtr InferImplGetRefKey(const AnalysisEnginePtr &, const PrimitivePtr &, diff --git a/mindspore/ccsrc/operator/prim_statement.cc b/mindspore/ccsrc/operator/prim_statement.cc index c297e128e2..5eb8d39996 100644 --- a/mindspore/ccsrc/operator/prim_statement.cc +++ b/mindspore/ccsrc/operator/prim_statement.cc @@ -110,7 +110,8 @@ AbstractBasePtr InferImplSwitch(const AnalysisEnginePtr &, const PrimitivePtr &, ValuePtr v = cond->GetValueTrack(); MS_EXCEPTION_IF_NULL(v); - if (v->isa()) { + // for tensor as condition, keeps both true and false branch. + if (v->isa() || cond->isa()) { MS_EXCEPTION_IF_NULL(tb); return tb->Join(fb); } @@ -228,5 +229,15 @@ AbstractBasePtr InferImplNotInDict(const AnalysisEnginePtr &, const PrimitivePtr // Inputs: x, t return std::make_shared(!IsInDict(primitive, args_spec_list)); } +AbstractBasePtr InferImplIsConstant(const AnalysisEnginePtr &, const PrimitivePtr &primitive, + const AbstractBasePtrList &args_spec_list) { + // statement: isconstant(x) + // Inputs: x + if (args_spec_list.size() != 1) { + MS_LOG(EXCEPTION) << "IsConstant requires args input size = 1"; + } + ValuePtr v = args_spec_list[0]->BuildValue(); + return std::make_shared(!v->isa()); +} } // namespace abstract } // namespace mindspore diff --git a/mindspore/ccsrc/operator/prim_structures.cc b/mindspore/ccsrc/operator/prim_structures.cc index 7b0bba98a5..33c7a1e209 100644 --- a/mindspore/ccsrc/operator/prim_structures.cc +++ b/mindspore/ccsrc/operator/prim_structures.cc @@ -205,13 +205,14 @@ AbstractBasePtr InferTupleOrListGetItem(const std::string &op_name, const Abstra ValuePtr index_value = index->BuildValue(); if (!index_value->isa()) { - MS_LOG(EXCEPTION) << op_name << " evaluator index should be an int32 number, but got " << index_value->ToString(); + MS_EXCEPTION(IndexError) << op_name << " evaluator index should be an int32 number, but got " + << index_value->ToString(); } int idx_v = GetValue(index_value); std::size_t nelems = queue->elements().size(); if (idx_v >= SizeToInt(nelems) || idx_v < -SizeToInt(nelems)) { - MS_LOG(EXCEPTION) << op_name << " evaluator index should be in range[-" << SizeToInt(nelems) << ", " - << SizeToInt(nelems) << "), but got " << idx_v << "."; + MS_EXCEPTION(IndexError) << op_name << " evaluator index should be in range[-" << SizeToInt(nelems) << ", " + << SizeToInt(nelems) << "), but got " << idx_v << "."; } std::size_t uidx_v = 0; @@ -232,18 +233,21 @@ AbstractBasePtr InferTupleOrListSetItem(const std::string &op_name, const Abstra ValuePtr index_value = index->BuildValue(); if (!index_value->isa()) { - MS_LOG(EXCEPTION) << op_name << " evaluator index should be an int32 number, but got " << index_value->ToString(); + MS_EXCEPTION(IndexError) << op_name << " evaluator index should be an int32 number, but got " + << index_value->ToString(); } int idx_v = GetValue(index_value); if (idx_v < 0) { - MS_LOG(EXCEPTION) << "The index of " << typeid(T).name() << " should be positive number, but got " << idx_v << "."; + MS_EXCEPTION(IndexError) << "The index of " << typeid(T).name() << " should be positive number, but got " << idx_v + << "."; } size_t uidx_v = IntToSize(idx_v); AbstractBasePtrList elements = queue->elements(); std::size_t nelems = elements.size(); if (uidx_v >= nelems) { - MS_LOG(EXCEPTION) << op_name << " evaluator the index: " << uidx_v << " to set out of range: " << nelems - 1 << "."; + MS_EXCEPTION(IndexError) << op_name << " evaluator the index: " << uidx_v << " to set out of range: " << nelems - 1 + << "."; } elements[uidx_v] = args_spec_list[2]; return std::make_shared(elements); diff --git a/mindspore/ccsrc/optimizer/ad/dfunctor.cc b/mindspore/ccsrc/optimizer/ad/dfunctor.cc index bdefcfeba1..e192f3912e 100644 --- a/mindspore/ccsrc/optimizer/ad/dfunctor.cc +++ b/mindspore/ccsrc/optimizer/ad/dfunctor.cc @@ -45,17 +45,26 @@ DFunctor::DFunctor(const FuncGraphPtr &primal_graph, const pipeline::ResourceBas : primal_graph_(primal_graph), resources_(resources), need_cut_(false), is_top_(false) { TraceManager::DebugTrace(std::make_shared(primal_graph->debug_info())); k_graph_ = std::make_shared(); + if (primal_graph->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) { + std::string grad_op_name = GetValue(primal_graph->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)); + k_graph_->set_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL, MakeValue(grad_op_name)); + } TraceManager::EndTrace(); TraceManager::DebugTrace(std::make_shared(primal_graph->debug_info())); tape_ = std::make_shared(); + // Add "_Grad" postfix + if (primal_graph->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) { + std::string grad_op_name = GetValue(primal_graph->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) + "_Grad"; + tape_->set_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL, MakeValue(grad_op_name)); + } TraceManager::EndTrace(); dout_ = tape_->add_parameter(); } -void DFunctor::Init(const DFunctorPtr &functor, bool is_top) { - func_graph_to_functor_[primal_graph_] = functor; +void DFunctor::Init(bool is_top) { + func_graph_to_functor_[primal_graph_] = shared_from_this(); is_top_ = is_top; if (is_top) { scope_ = primal_graph_->scope(); @@ -368,10 +377,10 @@ FuncGraphPtr DFunctor::KUserDefined(const FuncGraphPtr &primal) { (void)primal->transforms().insert(std::make_pair("grad", FuncGraphTransform(fg))); (void)fg->transforms().insert(std::make_pair("primal", FuncGraphTransform(primal))); // Reset defer_inline to enable successive inlining - primal->set_flags(FUNC_GRAPH_FLAG_DEFER_INLINE, false); + primal->set_flag(FUNC_GRAPH_FLAG_DEFER_INLINE, false); auto functor = std::make_shared(primal, resources_); - functor->Init(functor); + functor->Init(); functor->k_graph_ = fg; return fg; @@ -394,7 +403,7 @@ AnfNodePtr DFunctor::MapToK(const FuncGraphPtr &primal) { } auto functor = std::make_shared(primal, resources_); - functor->Init(functor); + functor->Init(); functor->MapObject(); functor->MapMorphism(); @@ -551,6 +560,10 @@ AdjointPtr DFunctor::FindAdjoint(const AnfNodePtr &primal) { } void DFunctor::CallDoutHoleOnTape() { + if (!is_top_) { + return; + } + // Call dout hole of all adjoint. for (auto &f : func_graph_to_functor_) { for (auto &adjoint : f.second->anfnode_to_adjoin_) { diff --git a/mindspore/ccsrc/optimizer/ad/dfunctor.h b/mindspore/ccsrc/optimizer/ad/dfunctor.h index d11926b379..13a6d5388b 100644 --- a/mindspore/ccsrc/optimizer/ad/dfunctor.h +++ b/mindspore/ccsrc/optimizer/ad/dfunctor.h @@ -35,14 +35,40 @@ namespace mindspore { namespace ad { -using Registry = std::unordered_map; +struct PrimitiveTotalEqual { + bool operator()(PrimitivePtr const &t1, PrimitivePtr const &t2) const { + if (t1->name() != t2->name()) { + return false; + } + + auto const &attrs1 = t1->attrs(); + auto const &attrs2 = t2->attrs(); + if (attrs1.size() != attrs2.size()) { + return false; + } + + for (auto &attr : attrs1) { + if (!t2->HasAttr(attr.first)) { + return false; + } + + if (!(*(attr.second) == *(t2->GetAttr(attr.first)))) { + return false; + } + } + + return true; + } +}; + +using Registry = std::unordered_map; class KPrim; extern KPrim g_k_prims; class DFunctor; using DFunctorPtr = std::shared_ptr; // D Functor's rules to map closure object and morphisms. -class DFunctor { +class DFunctor : public std::enable_shared_from_this { public: DFunctor(const FuncGraphPtr &primal_graph, const pipeline::ResourceBasePtr &resources); ~DFunctor() = default; @@ -54,7 +80,9 @@ class DFunctor { // Construct user defined k object. FuncGraphPtr KUserDefined(const FuncGraphPtr &primal); // Register functor objects to form a global view. - void Init(const DFunctorPtr &functor, bool is_top = false); + void Init(bool is_top = false); + bool IsInScope(const AnfNodePtr &node); + // Clear resources. static void Clear(); @@ -62,7 +90,6 @@ class DFunctor { // Map one morphism. AdjointPtr MapMorphism(const AnfNodePtr &morph); bool IsFreeMorphism(const AnfNodePtr &node); - bool IsInScope(const AnfNodePtr &node); // Map morphism that's not attached to output. void MapFreeMorphism(); void BackPropagateFv(const AnfNodePtr &fv, const AnfNodePtr &din); diff --git a/mindspore/ccsrc/optimizer/ad/grad.cc b/mindspore/ccsrc/optimizer/ad/grad.cc index 7e1fdb842e..d141dc6eea 100644 --- a/mindspore/ccsrc/optimizer/ad/grad.cc +++ b/mindspore/ccsrc/optimizer/ad/grad.cc @@ -23,7 +23,7 @@ namespace mindspore { namespace ad { -FuncGraphPtr Grad(const FuncGraphPtr &func_graph, const pipeline::ResourceBasePtr &resources) { +FuncGraphPtr Grad(const FuncGraphPtr &func_graph, const pipeline::ResourceBasePtr &resources, bool is_top) { MS_EXCEPTION_IF_NULL(func_graph); auto gradkv = func_graph->transforms().find("grad"); if (gradkv != func_graph->transforms().end()) { @@ -37,7 +37,7 @@ FuncGraphPtr Grad(const FuncGraphPtr &func_graph, const pipeline::ResourceBasePt auto multi_graph_sink = [&func_graph](const FuncGraphPtr &f) { if (MsContext::GetInstance()->is_multi_graph_sink()) { if (func_graph->has_flag(FUNC_GRAPH_FLAG_IGNORE_VALUES)) { - f->set_flags(FUNC_GRAPH_FLAG_IGNORE_VALUES, true); + f->set_flag(FUNC_GRAPH_FLAG_IGNORE_VALUES, true); } } }; @@ -46,14 +46,18 @@ FuncGraphPtr Grad(const FuncGraphPtr &func_graph, const pipeline::ResourceBasePt auto user_defined = f->KUserDefined(func_graph); if (user_defined != nullptr) { multi_graph_sink(user_defined); - DFunctor::Clear(); + if (is_top) { + DFunctor::Clear(); + } return user_defined; } - f->Init(f, true); + f->Init(is_top); f->MapObject(); f->MapMorphism(); auto ret = f->k_graph(); - DFunctor::Clear(); + if (is_top) { + DFunctor::Clear(); + } multi_graph_sink(ret); return ret; @@ -71,5 +75,7 @@ MetaFuncGraphPtr Kmeta(const PrimitivePtr &prim, const pipeline::ResourceBasePtr MetaFuncGraphPtr fg = g_k_prims.KMetaFuncGraph(prim); return fg; } + +void CleanRes() { DFunctor::Clear(); } } // namespace ad } // namespace mindspore diff --git a/mindspore/ccsrc/optimizer/ad/grad.h b/mindspore/ccsrc/optimizer/ad/grad.h index 12826311dc..a878aa9df7 100644 --- a/mindspore/ccsrc/optimizer/ad/grad.h +++ b/mindspore/ccsrc/optimizer/ad/grad.h @@ -28,9 +28,10 @@ namespace mindspore { namespace ad { using ResourcePtr = std::shared_ptr; -FuncGraphPtr Grad(const FuncGraphPtr &func_graph, const pipeline::ResourceBasePtr &resources); +FuncGraphPtr Grad(const FuncGraphPtr &func_graph, const pipeline::ResourceBasePtr &resources, bool is_top = true); FuncGraphPtr Kprim(const ValueNodePtr &value_node, const pipeline::ResourceBasePtr &resources); MetaFuncGraphPtr Kmeta(const PrimitivePtr &prim, const pipeline::ResourceBasePtr &); +void CleanRes(); } // namespace ad } // namespace mindspore diff --git a/mindspore/ccsrc/optimizer/ad/kprim.cc b/mindspore/ccsrc/optimizer/ad/kprim.cc index 600c4f5cc9..a9883cbf63 100644 --- a/mindspore/ccsrc/optimizer/ad/kprim.cc +++ b/mindspore/ccsrc/optimizer/ad/kprim.cc @@ -82,7 +82,7 @@ MetaFuncGraphPtr KPrim::KMetaFuncGraph(const PrimitivePtr &prim) { return iter->second; } - if (prim->name() == "make_tuple") { + if (prim->Hash() == prim::kPrimMakeTuple->Hash() && prim->name() == prim::kPrimMakeTuple->name()) { MetaFuncGraphPtr meta = std::make_shared("make_tuple_gradient"); bprop_registry_meta_[prim::kPrimMakeTuple] = meta; return meta; @@ -111,7 +111,7 @@ FuncGraphPtr KPrim::KPrimitive(const ValueNodePtr &value_node, const pipeline::R return fprop; } - if (prim->name() == "make_tuple") { + if (prim->Hash() == prim::kPrimMakeTuple->Hash() && prim->name() == prim::kPrimMakeTuple->name()) { return nullptr; } @@ -238,8 +238,12 @@ FuncGraphPtr KPrim::BpropCut(const ValueNodePtr &value_node, const pipeline::Res auto func_graph = std::make_shared(); std::vector outputs; - auto bprop_cut = std::make_shared("bprop_cut"); - bprop_cut->set_hook(prim->hook()); + auto bprop_cut = std::make_shared("bprop_cut", py::object()); + if (!prim->is_base()) { + PrimitivePyPtr prim_py = dyn_cast(prim); + bprop_cut->set_hook(prim_py->hook()); + } + auto cell_id = GetValue(prim->GetAttr("cell_id")); if (cell_id != "") { (void)bprop_cut->AddAttr("cell_hook", MakeValue(true)); diff --git a/mindspore/ccsrc/optimizer/clean.cc b/mindspore/ccsrc/optimizer/clean.cc index fafe26e2ed..6a54597282 100644 --- a/mindspore/ccsrc/optimizer/clean.cc +++ b/mindspore/ccsrc/optimizer/clean.cc @@ -78,7 +78,10 @@ AnfNodePtr ConvertGetAttrToTupleGetItem(const CNodePtr &node) { MS_EXCEPTION_IF_NULL(cons); auto dt = data->abstract(); - MS_EXCEPTION_IF_NULL(dt); + if (dt == nullptr) { + return nullptr; + } + if (!dt->isa()) { MS_LOG(EXCEPTION) << "First parameter of getattr is not AbstractClass, but " << dt->type_name() << "."; } diff --git a/mindspore/ccsrc/optimizer/graph_kernel_reuse.cc b/mindspore/ccsrc/optimizer/graph_kernel_reuse.cc new file mode 100644 index 0000000000..dc20ad925e --- /dev/null +++ b/mindspore/ccsrc/optimizer/graph_kernel_reuse.cc @@ -0,0 +1,157 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "optimizer/graph_kernel_reuse.h" +#include +#include +#include +#include "./common.h" +#include "utils/graph_utils.h" + +namespace mindspore { +/* namespace to support opt */ +namespace opt { + +bool GraphKernelReuse::CompareNode(const AnfNodePtr a, const AnfNodePtr b) { + if (a->abstract() && b->abstract()) { + auto a_type = a->abstract()->GetTypeTrack(); + auto b_type = b->abstract()->GetTypeTrack(); + + if (a_type != b_type) { + return false; + } + + auto a_shape = a->abstract()->GetShapeTrack(); + auto b_shape = b->abstract()->GetShapeTrack(); + if (a_shape != nullptr && a_shape == b_shape) { + return true; + } + + if (a_shape != nullptr && b_shape != nullptr && a_shape->isa() && + b_shape->isa()) { + return a_shape->cast()->shape() == b_shape->cast()->shape(); + } + } + return false; +} + +bool GraphKernelReuse::DoReplace(const FuncGraphManagerPtr manager) { + bool changed = false; + auto fgs = manager->func_graphs(); + for (FuncGraphPtr &fg : fgs) { + if (!fg->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) { + continue; + } + std::string key = GetValue(fg->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)); + if (graph_kernel_ops.find(key) != graph_kernel_ops.end()) { + if (find(graph_kernel_ops[key].begin(), graph_kernel_ops[key].end(), fg) == graph_kernel_ops[key].end()) { + FuncGraphPtr new_fg = nullptr; + for (auto &cfg : graph_kernel_ops[key]) { + // If two graphs have different size then continue + auto fg_topos = TopoSort(fg->get_return()); + auto cfg_topos = TopoSort(cfg->get_return()); + if (fg_topos.size() != cfg_topos.size()) { + continue; + } + + // Compare const tensor + bool has_same = true; + for (size_t i = 0; i < fg_topos.size(); ++i) { + if (IsValueNode(fg_topos[i])) { + if (!IsValueNode(cfg_topos[i])) { + has_same = false; + break; + } + + auto tensor1 = GetValueNode(fg_topos[i]); + auto tensor2 = GetValueNode(cfg_topos[i]); + if (!tensor1->ValueEqual(*tensor2)) { + has_same = false; + break; + } + } + } + + if (!has_same) { + continue; + } + + auto fg_input = fg->parameters(); + auto cfg_input = cfg->parameters(); + if (fg_input.size() != cfg_input.size()) { + continue; + } + // Compare input + for (size_t i = 0; i < fg_input.size(); ++i) { + if (!CompareNode(fg_input[i], cfg_input[i])) { + has_same = false; + break; + } + } + if (!has_same) { + continue; + } + + // Compare output + if (!CompareNode(fg->output(), cfg->output())) { + continue; + } + + // Find reusable fg + new_fg = cfg; + break; + } + + if (new_fg != nullptr) { + // Replace current fg with existing fg + auto users = fg->func_graph_cnodes_index(); + for (auto &iter : users) { + auto cnode = iter.first->first->cast(); + auto new_input = cnode->inputs(); + auto main_graph = cnode->func_graph(); + MS_EXCEPTION_IF_NULL(main_graph); + if (IsPrimitiveCNode(cnode, prim::kPrimPartial)) { + new_input[1] = NewValueNode(new_fg); + } else { + new_input[0] = NewValueNode(new_fg); + } + auto new_cnode = main_graph->NewCNode(new_input); + manager->Replace(iter.first->first, new_cnode); + changed = true; + } + + } else { + // Add current fg to map + graph_kernel_ops[key].push_back(fg); + } + } + } else { + graph_kernel_ops[key] = {fg}; + } + } + + return changed; +} + +bool GraphKernelReuse::ReuseGraphKernel(const FuncGraphPtr root, const FuncGraphManagerPtr manager) { + MS_EXCEPTION_IF_NULL(manager); + manager->AddFuncGraph(root); + + return DoReplace(manager); +} + +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/optimizer/graph_kernel_reuse.h b/mindspore/ccsrc/optimizer/graph_kernel_reuse.h new file mode 100644 index 0000000000..ed5cc93d18 --- /dev/null +++ b/mindspore/ccsrc/optimizer/graph_kernel_reuse.h @@ -0,0 +1,53 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_OPTIMIZER_GRAPH_KERNEL_OP_REUSE_H +#define MINDSPORE_CCSRC_OPTIMIZER_GRAPH_KERNEL_OP_REUSE_H + +#include +#include +#include +#include + +#include "optimizer/optimizer.h" + +namespace mindspore { +namespace opt { + +// Common subexpression elimination. +class GraphKernelReuse { + public: + GraphKernelReuse() : count(0) {} + virtual ~GraphKernelReuse() = default; + + bool operator()(const FuncGraphPtr &root, const OptimizerPtr &optimizer) { + bool chg = ReuseGraphKernel(root, optimizer->resource()->manager()); + return chg; + } + + bool CompareNode(const AnfNodePtr a, const AnfNodePtr other); + bool DoReplace(const FuncGraphManagerPtr manager); + + bool ReuseGraphKernel(const FuncGraphPtr root, const FuncGraphManagerPtr manager); + + private: + std::unordered_map> graph_kernel_ops; + int count; +}; + +} // namespace opt +} // namespace mindspore +#endif // MINDSPORE_CCSRC_OPTIMIZER_GRAPH_KERNEL_OP_REUSE_H diff --git a/mindspore/ccsrc/optimizer/irpass.cc b/mindspore/ccsrc/optimizer/irpass.cc index 107bf1eb57..72177ccb06 100644 --- a/mindspore/ccsrc/optimizer/irpass.cc +++ b/mindspore/ccsrc/optimizer/irpass.cc @@ -41,6 +41,8 @@ #include "optimizer/irpass/incorporate_call.h" #include "optimizer/irpass/grad_var_prepare.h" #include "optimizer/irpass/param_replace.h" +#include "optimizer/irpass/mark_interface_fusion.h" +#include "optimizer/opt.h" namespace mindspore { namespace opt { @@ -48,12 +50,13 @@ namespace irpass { OptimizeIRPassLib::OptimizeIRPassLib() { arithmetic_simplify_ = MakeSubstitution(ArithmeticSimplify(), "arithmetic_simplify", {prim::kPrimScalarAdd, prim::kPrimScalarMul, prim::kPrimTensorAdd, - prim::kPrimIdentity, prim::kPrimMomentum, prim::kPrimMul}); + prim::kPrimIdentity, prim::kPrimMomentum, prim::kPrimMul, prim::kPrimPow}); special_op_eliminate_ = MakeSubstitution(SpecialOpEliminater(), "special_op_eliminate", - {prim::kPrimInsertGradientOf, prim::kPrimHookBackward, prim::kPrimPrintShapeType, - prim::kPrimGetRefKey, prim::kPrimMirror, prim::kPrimVirtualDiv}); - zero_like_fill_zero_ = MakeSubstitution(ZeroLikeFillZero(), "zero_like_fill_zero", prim::kPrimZerosLikeTensor); + {prim::kPrimInsertGradientOf, prim::kPrimStopGradient, prim::kPrimHookBackward, + prim::kPrimPrintShapeType, prim::kPrimGetRefKey, prim::kPrimMirror, prim::kPrimVirtualDiv}); + zero_like_fill_zero_ = MakeSubstitution(ZeroLikeFillZero(), "zero_like_fill_zero", prim::kPrimZerosLike); + adjust_all_reduce_mul_add_ = MakeSubstitution(AdjustAllReduceMulAdd(), "adjust_all_reduce_mul_add", prim::kPrimAddN); // ops eliminate item_tuple_eliminate_ = @@ -69,11 +72,11 @@ OptimizeIRPassLib::OptimizeIRPassLib() { same_eliminate_ = MakeSubstitution(SameEliminater(), "same_eliminate", prim::kPrimSameTypeShape); check_bprop_eliminate_ = MakeSubstitution(CheckBpropEliminater(), "check_bprop_eliminate", prim::kPrimCheckBprop); reset_defer_inline_ = MakeSubstitution(ResetDeferInline(), "reset_defer_inline", IsValueNode); + depend_value_elim_ = MakeSubstitution(DependValueElim(), "depend_value_elim", prim::kPrimDepend); // Env Item Eliminate + env_get_item_eliminate_ = MakeSubstitution(EnvGetItemEliminater(), "env_get_item_eliminate", prim::kPrimEnvGetItem); new_env_get_item_ = MakeSubstitution(NewEnvGetItem(), "new_env_get_item", prim::kPrimEnvGetItem); - add_env_get_item_ = MakeSubstitution(AddEnvGetItem(), "add_env_get_item", prim::kPrimEnvGetItem); - env_get_set_item_ = MakeSubstitution(EnvGetSetItem(), "env_get_set_item", prim::kPrimEnvGetItem); incorporate_env_getitem_ = MakeSubstitution(IncorporateEnvGetitem(), "incorporate_env_get_item", prim::kPrimEnvGetItem); incorporate_env_getitem_switch_ = @@ -81,17 +84,16 @@ OptimizeIRPassLib::OptimizeIRPassLib() { // Ref eliminate make_ref_eliminate_ = MakeSubstitution(MakeRefEliminater(), "make_ref_eliminate", prim::kPrimMakeRef); + get_ref_param_eliminate_ = MakeSubstitution(GetRefParamEliminater(), "get_ref_param_eliminate", + {prim::kPrimGetRefValue, prim::kPrimGetRefOrigin}); get_make_ref_eliminate_ = MakeSubstitution(GetMakeRefEliminater(), "get_make_ref_eliminate", {prim::kPrimGetRefKey, prim::kPrimGetRefValue, prim::kPrimGetRefOrigin}); replace_refkey_by_param_ = MakeSubstitution(ReplaceRefkeyByParam(), "replace_refkey_by_param", IsValueNode, opt::FORCE_RENORM); replace_old_param_ = MakeSubstitution(ReplaceOldParam(), "replace_old_param", IsParam); - // Gradient transforms expand_jprim_ = MakeSubstitution(ExpandJPrim(), "expand_jprim", prim::kPrimJ); - stop_gradient_eliminate_ = - MakeSubstitution(StopGradientEliminater(), "stop_gradient_eliminate", prim::kPrimStopGradient); minmaximum_grad_ = MakeSubstitution(MinMaximumGrad(), "minmaximum_grad", prim::kPrimTupleGetItem); // branch culling @@ -112,9 +114,10 @@ OptimizeIRPassLib::OptimizeIRPassLib() { specialize_transform_ = MakeSubstitution(SpecializeOnGraphArguments(), "specialize_transform", IsCNodeGraph); // Incorporation - incorporate_getitem_ = MakeSubstitution(IncorporateGetitem(), "incorporate_getitem", prim::kPrimTupleGetItem); - incorporate_getitem_switch_ = - MakeSubstitution(IncorporateGetitemSwitch(), "incorporate_getitem_switch", prim::kPrimTupleGetItem); + incorporate_getitem_set_ = + MakeSubstitution(IncorporateGetitemSet(), "incorporate_getitem_set", prim::kPrimTupleGetItem); + incorporate_getitem_from_param_ = + MakeSubstitution(IncorporateGetitemFromParam(), "incorporate_getitem_from_param", IsCNodeGraphKernel); incorporate_call_ = MakeSubstitution(IncorporateCall(), "incorporate_call", IsCNodeDup); incorporate_call_switch_ = MakeSubstitution(IncorporateCallSwitch(), "incorporate_call_switch", IsCNodeDup); @@ -124,6 +127,17 @@ OptimizeIRPassLib::OptimizeIRPassLib() { // Convert print_tuple_wrapper_ = MakeSubstitution(PrintTupleWrapper(), "print_tuple_wrapper", prim::kPrimPrint); + + // Unused parameter eliminate + unused_parameter_eliminate_ = + MakeSubstitution(UnusedParasEliminater(), "unused_parameter_eliminate", IsCNodeGraphKernel); + unused_output_eliminate_ = MakeSubstitution(UnusedOutputEliminater(), "unused_output_eliminate", IsCNodeGraphKernel); + + // AddN eliminate + addn_eliminate_ = MakeSubstitution(AddNEliminater(), "addn_eliminate", IsCNodeGraphKernel); + + // Mark interface fusion + mark_interface_fusion_ = MakeSubstitution(MarkInterfaceFusion(), "mark_interface_fusion", prim::kPrimSelect); } ResolveIRPassLib::ResolveIRPassLib() { diff --git a/mindspore/ccsrc/optimizer/irpass.h b/mindspore/ccsrc/optimizer/irpass.h index 02bfee65d6..5e1550c883 100644 --- a/mindspore/ccsrc/optimizer/irpass.h +++ b/mindspore/ccsrc/optimizer/irpass.h @@ -35,6 +35,7 @@ class OptimizeIRPassLib { SubstitutionPtr arithmetic_simplify_; SubstitutionPtr special_op_eliminate_; SubstitutionPtr zero_like_fill_zero_; + SubstitutionPtr adjust_all_reduce_mul_add_; // ops eliminate SubstitutionPtr item_tuple_eliminate_; @@ -47,16 +48,17 @@ class OptimizeIRPassLib { SubstitutionPtr same_eliminate_; SubstitutionPtr check_bprop_eliminate_; SubstitutionPtr reset_defer_inline_; + SubstitutionPtr depend_value_elim_; // Env Item Eliminate + SubstitutionPtr env_get_item_eliminate_; SubstitutionPtr new_env_get_item_; - SubstitutionPtr add_env_get_item_; - SubstitutionPtr env_get_set_item_; SubstitutionPtr incorporate_env_getitem_; SubstitutionPtr incorporate_env_getitem_switch_; // Ref eliminate SubstitutionPtr make_ref_eliminate_; + SubstitutionPtr get_ref_param_eliminate_; SubstitutionPtr get_make_ref_eliminate_; SubstitutionPtr replace_refkey_by_param_; SubstitutionPtr replace_old_param_; @@ -73,7 +75,6 @@ class OptimizeIRPassLib { // Gradient irpasses SubstitutionPtr expand_jprim_; - SubstitutionPtr stop_gradient_eliminate_; SubstitutionPtr minmaximum_grad_; // inline @@ -82,8 +83,8 @@ class OptimizeIRPassLib { SubstitutionPtr specialize_transform_; // Incorporation - SubstitutionPtr incorporate_getitem_; - SubstitutionPtr incorporate_getitem_switch_; + SubstitutionPtr incorporate_getitem_set_; + SubstitutionPtr incorporate_getitem_from_param_; SubstitutionPtr incorporate_call_; SubstitutionPtr incorporate_call_switch_; @@ -92,6 +93,16 @@ class OptimizeIRPassLib { // Convert SubstitutionPtr print_tuple_wrapper_; + + // Unused parameter eliminate + SubstitutionPtr unused_parameter_eliminate_; + SubstitutionPtr unused_output_eliminate_; + + // AddN eliminate + SubstitutionPtr addn_eliminate_; + + // Fusion + SubstitutionPtr mark_interface_fusion_; }; // the collection of irpass for resolve action @@ -141,9 +152,23 @@ inline bool IsCNodeGraph(const AnfNodePtr &node) { return false; } + auto inp0 = node->cast()->input(0); + return IsValueNode(inp0); +} + +// Check if CNode Input 0 is Func Graph of graph kernel. +inline bool IsCNodeGraphKernel(const AnfNodePtr &node) { + if (node == nullptr || !node->isa()) { + return false; + } + auto inp0 = node->cast()->input(0); if (IsValueNode(inp0)) { - return true; + auto fg = GetValueNode(inp0); + if (fg == nullptr) { + return false; + } + return fg->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL); } return false; } @@ -155,10 +180,7 @@ inline bool IsCNodeDup(const AnfNodePtr &node) { } auto inp0 = node->cast()->input(0); - if (inp0 != nullptr && inp0->isa()) { - return true; - } - return false; + return (inp0 != nullptr) && inp0->isa(); } } // namespace irpass } // namespace opt diff --git a/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h b/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h index ab191aab20..1836a88dbc 100644 --- a/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h +++ b/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h @@ -83,6 +83,216 @@ class MultiplyByZeroOrOne : public AnfVisitor { AnfNodePtr x_{nullptr}; }; +// Support class used for checking if all values of a Tensor are equal `check_value_` +// Supported data types: double, float/float32, int/int32 +class CheckTensorConstant { + public: + explicit CheckTensorConstant(int _check_value = 0) : check_value_(_check_value) {} + ~CheckTensorConstant() = default; + bool IsTensorConstant(const ValuePtr &value) { + if (!value->isa()) { + return false; + } + auto tensor_ptr = dyn_cast(value); + TypeId tensor_type = tensor_ptr->Dtype()->type_id(); + if ((tensor_type == TypeId::kNumberTypeFloat32) || (tensor_type == TypeId::kNumberTypeFloat)) { + float *data2 = reinterpret_cast(tensor_ptr->data_c()); + for (int i = 0; i < tensor_ptr->DataSize(); i++) { + if (fabs(data2[i] - check_value_) > FLT_EPSILON) { + return false; + } + } + return true; + } else if (tensor_type == TypeId::kNumberTypeFloat64) { + double *data2 = reinterpret_cast(tensor_ptr->data_c()); + for (int i = 0; i < tensor_ptr->DataSize(); i++) { + if (fabs(data2[i] - check_value_) > DBL_EPSILON) { + return false; + } + } + return true; + } else if ((tensor_type == TypeId::kNumberTypeInt32) || (tensor_type == TypeId::kNumberTypeInt)) { + int *data2 = reinterpret_cast(tensor_ptr->data_c()); + for (int i = 0; i < tensor_ptr->DataSize(); i++) { + if (data2[i] != check_value_) { + return false; + } + } + return true; + } + // Un-support Data Types + return false; + } + + bool IsTensorScalarConstant(const ValuePtr &value) { + if (!value->isa()) { + return false; + } + auto tensor_ptr = dyn_cast(value); + if ((tensor_ptr->DataSize() > 1) || (tensor_ptr->DataDim() > 0)) { + return false; + } + return IsTensorConstant(value); + } + + private: + int check_value_; +}; + +// {prim::kPrimMul, 0, X}, {prim::kPrimMul, X, 0} +// {prim::kPrimMul, 1, X}, {prim::kPrimMul, X, 1} +class TensorMultiplyByZeroOrOne : public AnfVisitor { + public: + TensorMultiplyByZeroOrOne() : zero_(MakeValue(0)) {} + ~TensorMultiplyByZeroOrOne() override = default; + AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { + Reset(); + AnfVisitor::Match(prim::kPrimMul)(node); + + if (is_zero_) { + if (x_->func_graph() != node->func_graph()) { + return nullptr; + } + return NewTensorFilledWithData(node); + } + if (is_one_) { + return NewTensorFilledWithData(node, x_); + } + return nullptr; + } + + void Visit(const AnfNodePtr &node) override { + if (is_zero_ || is_one_) { + x_ = node; + return; + } + + if (IsParam(node)) { + x_ = node; + return; + } + + if (IsCNode(node)) { + CNodePtr cnode = node->cast(); + if (IsPrimitive(cnode->input(0), prim::kPrimZerosLike)) { + is_zero_ = true; + return; + } + x_ = node; + return; + } + auto value = node->cast()->value(); + if (CheckTensorConstant(0).IsTensorConstant(value)) { + is_zero_ = true; + return; + } else if (CheckTensorConstant(1).IsTensorConstant(value)) { + is_one_ = true; + return; + } + x_ = node; + } + + void Visit(const ValueNodePtr &vnode) override { + auto value = vnode->value(); + if (CheckTensorConstant(0).IsTensorConstant(value)) { + is_zero_ = true; + return; + } else if (CheckTensorConstant(1).IsTensorConstant(value)) { + is_one_ = true; + return; + } + x_ = vnode; + } + void Reset() { + x_ = nullptr; + is_one_ = false; + is_zero_ = false; + } + + void *GetPointerToTensorData(const AnfNodePtr &node, bool writable = false) { + if (!node->isa()) { + return nullptr; + } + + auto value = node->cast()->value(); + + if (!value->isa()) { + return nullptr; + } + + tensor::TensorPtr tensor_ptr = dyn_cast(value); + return tensor_ptr->data_c(writable); + } + + // Make a new tensor (when possible) with the same shape as of `node` + // If x is nullptr then fill new tensor will "0" + // If x is a tensor with empty shape then fill new tensor with the single value of x + // If x is a tensor with same shape as `node` then return x as result + AnfNodePtr NewTensorFilledWithData(const AnfNodePtr &node, const AnfNodePtr &x = nullptr) { + if ((node->abstract() == nullptr) || !node->abstract()->isa()) { + return nullptr; + } + + auto tensor_abstract = node->abstract()->cast(); + TypePtr tensor_type_ptr = tensor_abstract->element()->BuildType(); + std::vector tensor_shape = tensor_abstract->shape()->shape(); + + auto new_tensor_ptr = std::make_shared(tensor_type_ptr->type_id(), tensor_shape); + size_t mem_size = GetTypeByte(tensor_type_ptr) * IntToSize(new_tensor_ptr->ElementsNum()); + char *data = reinterpret_cast(new_tensor_ptr->data_c(true)); + + if (x == nullptr) { + std::memset(data, 0, mem_size); + auto new_vnode = NewValueNode(new_tensor_ptr); + new_vnode->set_abstract(new_tensor_ptr->ToAbstract()); + return new_vnode; + } + // x is not nullptr + if (x->isa()) { + if ((x->abstract() == nullptr) || !x->abstract()->isa()) { + return nullptr; + } + auto x_abstract = x->abstract()->cast(); + std::vector x_shape = x_abstract->shape()->shape(); + + if (x_shape != tensor_shape) { + return nullptr; + } + return x; + } + + if (!x->isa()) { + return nullptr; + } + auto x_value = x->cast()->value(); + if (!x_value->isa()) { + return nullptr; + } + + auto x_tensor_ptr = dyn_cast(x_value); + + if ((x_tensor_ptr->DataSize() > 1) && (x_tensor_ptr->DataSize() != new_tensor_ptr->DataSize())) { + return nullptr; + } + char *source_data = reinterpret_cast(GetPointerToTensorData(x)); + if (x_tensor_ptr->DataSize() == 1) { + for (int i = 0; i < new_tensor_ptr->ElementsNum(); i++) { + memcpy(source_data, data + i * GetTypeByte(tensor_type_ptr), GetTypeByte(tensor_type_ptr)); + } + } else { + memcpy(source_data, data, mem_size); + } + auto new_vnode = NewValueNode(new_tensor_ptr); + new_vnode->set_abstract(new_tensor_ptr->ToAbstract()); + return new_vnode; + } + + private: + bool is_zero_{false}, is_one_{false}; + ValuePtr zero_; + AnfNodePtr x_{nullptr}; +}; + // {prim::kPrimScalarAdd, X, 0} // {prim::kPrimScalarAdd, 0, X} class AddByZero : public AnfVisitor { @@ -101,7 +311,8 @@ class AddByZero : public AnfVisitor { } void Visit(const AnfNodePtr &node) override { - if (node->isa() && *GetValueNode(node) == *zero_) { + if (node->isa() && + ((*GetValueNode(node) == *zero_) || CheckTensorConstant(0).IsTensorScalarConstant(GetValueNode(node)))) { is_zero_ = true; return; } @@ -120,8 +331,8 @@ class AddByZero : public AnfVisitor { AnfNodePtr x_{nullptr}; }; -// {prim::kPrimTensorAdd, {PrimZerosLikeTensor, Y}, X}, -// {prim::kPrimTensorAdd, X, {PrimZerosLikeTensor, Y}} +// {prim::kPrimTensorAdd, {kPrimZerosLike, Y}, X}, +// {prim::kPrimTensorAdd, X, {kPrimZerosLike, Y}} class TensorAddByZero : public AnfVisitor { public: AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { @@ -135,7 +346,11 @@ class TensorAddByZero : public AnfVisitor { } void Visit(const AnfNodePtr &node) override { - if (IsPrimitive(node, prim::kPrimZerosLikeTensor)) { + if (IsPrimitive(node, prim::kPrimZerosLike)) { + is_zero_ = true; + return; + } + if (node->isa() && CheckTensorConstant(0).IsTensorScalarConstant(GetValueNode(node))) { is_zero_ = true; return; } @@ -143,6 +358,14 @@ class TensorAddByZero : public AnfVisitor { x_ = node; } + void Visit(const ValueNodePtr &vnode) override { + auto value = vnode->value(); + if (CheckTensorConstant(0).IsTensorConstant(value)) { + is_zero_ = true; + return; + } + } + void Reset() { x_ = nullptr; is_zero_ = false; @@ -153,7 +376,7 @@ class TensorAddByZero : public AnfVisitor { AnfNodePtr x_{nullptr}; }; -// {PrimMomentum, {PrimZerosLikeTensor, X}, Y, Z, Xs} -> {prim::kPrimMakeTuple, Z, Y} +// {PrimMomentum, {kPrimZerosLike, X}, Y, Z, Xs} -> {prim::kPrimMakeTuple, Z, Y} class OptUpdateZeroTensor : public AnfVisitor { public: AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { @@ -163,13 +386,13 @@ class OptUpdateZeroTensor : public AnfVisitor { // {PrimMomentum, {...}, Y, Z, Xs} auto &inputs = node->cast()->inputs(); - if (inputs.size() < 4 || !IsPrimitiveCNode(inputs[1], prim::kPrimZerosLikeTensor)) { + if (inputs.size() < 4 || !IsPrimitiveCNode(inputs[1], prim::kPrimZerosLike)) { return nullptr; } auto y = inputs[2]; auto z = inputs[3]; - // {PrimZerosLikeTensor, X} + // {kPrimZerosLike, X} if (inputs[1]->cast()->size() != 2) { return nullptr; } @@ -183,29 +406,143 @@ class OptUpdateZeroTensor : public AnfVisitor { // {prim::kPrimMul, {...}, {prim::kPrimMul, Tensor1, Tensor2}} class ConstantDuplicateMul : public AnfVisitor { public: + // Support function to multiply two constant tensors: partially support broadcasting shapes + template + void Multiply(void *in_data_1, int in_data_1_size, void *in_data_2, int in_data_2_size, void **out_data, + int out_data_size) { + T *data_1 = reinterpret_cast(in_data_1); + T *data_2 = reinterpret_cast(in_data_2); + T *data_out = new T[out_data_size]; + + if (in_data_1_size == 1) { + for (int i = 0; i < out_data_size; i++) { + data_out[i] = data_1[0]; + } + } else { + for (int i = 0; i < out_data_size; i++) { + data_out[i] = data_1[i]; + } + } + if (in_data_2_size == 1) { + for (int i = 0; i < out_data_size; i++) { + data_out[i] *= data_2[0]; + } + } else { + for (int i = 0; i < out_data_size; i++) { + data_out[i] *= data_2[i]; + } + } + *out_data = reinterpret_cast(data_out); + return; + } + + AnfNodePtr MulConstantTensors(const AnfNodePtr &vnode_1, const AnfNodePtr &vnode_2, const AnfNodePtr &node_3) { + if (!vnode_1->isa() || !vnode_2->isa() || (vnode_1->abstract() == nullptr) || + (vnode_2->abstract() == nullptr) || (node_3->abstract() == nullptr)) { + return nullptr; + } + + auto value_1 = GetValueNode(vnode_1); + auto value_2 = GetValueNode(vnode_2); + + if (!value_1->isa() || !value_2->isa()) { + return nullptr; + } + + auto tensor_ptr_1 = dyn_cast(value_1); + auto tensor_ptr_2 = dyn_cast(value_2); + + auto tensor_1_abstract = vnode_1->abstract()->cast(); + auto tensor_2_abstract = vnode_1->abstract()->cast(); + auto tensor_3_abstract = node_3->abstract()->cast(); + + TypePtr tensor_1_type_ptr = tensor_1_abstract->element()->BuildType(); + TypePtr tensor_2_type_ptr = tensor_2_abstract->element()->BuildType(); + TypePtr tensor_3_type_ptr = tensor_3_abstract->element()->BuildType(); + + if ((tensor_1_type_ptr->type_id() != tensor_3_type_ptr->type_id()) || + (tensor_2_type_ptr->type_id() != tensor_3_type_ptr->type_id())) { + return nullptr; + } + + std::vector tensor_out_shape = tensor_3_abstract->shape()->shape(); + + int data_out_size = 1; + for (auto it : tensor_out_shape) { + data_out_size *= it; + } + if ((tensor_ptr_1->DataSize() > 1) && (tensor_ptr_1->DataSize() != data_out_size)) { + return nullptr; + } + if ((tensor_ptr_2->DataSize() > 1) && (tensor_ptr_2->DataSize() != data_out_size)) { + return nullptr; + } + + void *data_out; + + if ((tensor_3_type_ptr->type_id() == TypeId::kNumberTypeFloat32) || + (tensor_3_type_ptr->type_id() == TypeId::kNumberTypeFloat)) { + Multiply(tensor_ptr_1->data_c(), tensor_ptr_1->DataSize(), tensor_ptr_2->data_c(), + tensor_ptr_2->DataSize(), &data_out, data_out_size); + } else { + if (tensor_3_type_ptr->type_id() == TypeId::kNumberTypeFloat64) { + Multiply(tensor_ptr_1->data_c(), tensor_ptr_1->DataSize(), tensor_ptr_2->data_c(), + tensor_ptr_2->DataSize(), &data_out, data_out_size); + } else { + if ((tensor_3_type_ptr->type_id() == TypeId::kNumberTypeInt32) || + (tensor_3_type_ptr->type_id() == TypeId::kNumberTypeInt)) { + Multiply(tensor_ptr_1->data_c(), tensor_ptr_1->DataSize(), tensor_ptr_2->data_c(), + tensor_ptr_2->DataSize(), &data_out, data_out_size); + } else { + // Un-support data types + return nullptr; + } + } + } + + auto new_tensor_ptr = std::make_shared(tensor_3_type_ptr->type_id(), tensor_out_shape); + size_t mem_size = GetTypeByte(tensor_3_type_ptr) * IntToSize(new_tensor_ptr->ElementsNum()); + char *data = reinterpret_cast(new_tensor_ptr->data_c(true)); + memcpy(data, data_out, mem_size); + + auto new_vnode = NewValueNode(new_tensor_ptr); + new_vnode->set_abstract(new_tensor_ptr->ToAbstract()); + return new_vnode; + } + AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { Reset(); // {prim::kPrimMul, Tensor1, {...}} AnfVisitor::Match(prim::kPrimMul, {IsNode, IsNode})(node); - if (vnode_ == nullptr || cnode_ == nullptr) { + if (vnode_ == nullptr || c_p_node_ == nullptr) { + return nullptr; + } + + if (!IsCNode(c_p_node_)) { return nullptr; } + auto tensor1 = vnode_; - auto mul = cnode_; + auto mul = c_p_node_->cast(); Reset(); // {prim::kPrimMul, Tensor2, {...}} AnfVisitor::Match(prim::kPrimMul, {IsNode, IsNode})(mul); - if (vnode_ == nullptr || cnode_ == nullptr) { + if (vnode_ == nullptr || c_p_node_ == nullptr) { return nullptr; } auto tensor2 = vnode_; - auto cnode = cnode_; + auto c_p_node = c_p_node_; auto PrimMul = GetValueNode(mul->input(0)); auto fg = node->func_graph(); - auto ttmul = NewCNode({NewValueNode(PrimMul), tensor1, tensor2}, fg); - return NewCNode({NewValueNode(PrimMul), cnode, ttmul}, fg); + + auto new_mul_tensor = MulConstantTensors(tensor1, tensor2, c_p_node); + if (new_mul_tensor == nullptr) { + auto ttmul = NewCNode({NewValueNode(PrimMul), tensor1, tensor2}, fg); + return NewCNode({NewValueNode(PrimMul), c_p_node, ttmul}, fg); + } + return NewCNode({NewValueNode(PrimMul), c_p_node, new_mul_tensor}, fg); } void Visit(const AnfNodePtr &node) override { @@ -213,36 +550,170 @@ class ConstantDuplicateMul : public AnfVisitor { vnode_ = node; } - if (IsCNode(node)) { - cnode_ = node->cast(); + if (IsCNode(node) || IsParam(node)) { + c_p_node_ = node; } } void Reset() { vnode_ = nullptr; - cnode_ = nullptr; + c_p_node_ = nullptr; } private: AnfNodePtr vnode_; - CNodePtr cnode_; + AnfNodePtr c_p_node_; +}; + +class PowerOneEliminate : public AnfVisitor { + public: + AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { + if (!IsPrimitiveCNode(node, prim::kPrimPow) || node->func_graph() == nullptr) { + return nullptr; + } + + auto &inputs = node->cast()->inputs(); + if (!IsValueNode(inputs[2])) { + return nullptr; + } + auto scalar = GetValueNode(inputs[2]); + if (scalar->isa() && GetValue(scalar) == 1.0) { + return inputs[1]; + } else if (scalar->isa() && GetValue(scalar) == 1) { + return inputs[1]; + } + return nullptr; + } +}; + +// grad = AllReduce(grad) / worker_number +// grad = grad + weight * decy +// -> +// grad = grad + weight * decy +// grad = AllReduce(grad) / worker_number + +// {prim::kPrimAddN, {prim::kPrimMakeTuple, {prim::kPrimMul, {prim::kPrimAllReduce, X}, Y}, Z}} -> +// {prim::kPrimMul, {prim::kPrimAllReduce, {prim::kPrimAddN,{prim::kPrimMakeTuple, Z, X}}}, Y} +class AdjustAllReduceMulAdd : public AnfVisitor { + public: + AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { + Reset(); + // {prim::kPrimAddN, Zs} + if (!IsPrimitiveCNode(node, prim::kPrimAddN)) { + return nullptr; + } + auto addn = node->cast(); + if (addn->size() != 2) { + return nullptr; + } + AnfVisitor::Match(prim::kPrimMakeTuple, {IsNode, IsNode})(addn->input(1)); + if (x_ == nullptr || y_ == nullptr || z_ == nullptr || all_reduce_fg_ == nullptr) { + return nullptr; + } + auto addn_maketuple = addn->input(1); + + auto fg = all_reduce_fg_; + // addn inputs cross the graph, make the inputs same as allreduce node. + if (z_->isa() && fg != z_->func_graph()) { + auto cnode_z = z_->cast(); + z_ = NewCNode(cnode_z->inputs(), fg); + } + + auto addn_op_node = addn->input(0); + auto make_tuple_op_node = addn->input(1)->cast()->input(0); + + AnfNodePtr tuple = NewCNode({make_tuple_op_node, z_, x_}, fg); + AnfNodePtr add = NewCNode({addn_op_node, tuple}, fg); + AnfNodePtr all_reduce = NewCNode({all_reduce_, add}, fg); + AnfNodePtr mul = NewCNode({mul_, all_reduce, y_}, fg); + ProcessDependEdge(fg, addn_maketuple, all_reduce); + return mul; + } + void ProcessDependEdge(const FuncGraphPtr &fg, const AnfNodePtr &addn_maketuple, const AnfNodePtr &new_node) { + // If has dynamic loss scale. + auto &users_map = fg->manager()->node_users(); + auto it = users_map.find(mul_cnode_); + if (it != users_map.end()) { + auto users = it->second; + for (auto &user_pair : users) { + auto node = user_pair.first; + if (node != addn_maketuple) { + if (IsPrimitiveCNode(node, prim::kPrimMakeTuple)) { + fg->manager()->SetEdge(node, user_pair.second, new_node); + } + } + } + } + } + void Visit(const AnfNodePtr &node) override { + if (level_ == 0) { + level_ = 1; + is_reduce_match_ = false; + // {prim::kPrimMul, {prim::kPrimAllReduce, X}, Y} + AnfVisitor::Match(prim::kPrimMul)(node); + level_ = 0; + if (is_reduce_match_) { + mul_ = node->cast()->input(0); + mul_cnode_ = node->cast(); + y_ = tmp_; + } else { + z_ = node; + } + } + + if (level_ == 1) { + // {prim::kPrimAllReduce, X} + if (IsPrimitiveCNode(node, prim::kPrimAllReduce)) { + auto cnode = node->cast(); + if (cnode->size() > 1) { + all_reduce_ = cnode->input(0); + x_ = cnode->input(1); + is_reduce_match_ = true; + all_reduce_fg_ = cnode->func_graph(); + } + } else { + tmp_ = node; + } + } + } + + void Reset() { + level_ = 0; + is_reduce_match_ = false; + x_ = nullptr; + y_ = nullptr; + z_ = nullptr; + tmp_ = nullptr; + all_reduce_fg_ = nullptr; + } + + private: + int level_{0}; + bool is_reduce_match_{false}; + AnfNodePtr x_{nullptr}, y_{nullptr}, z_{nullptr}, tmp_{nullptr}; + AnfNodePtr all_reduce_{nullptr}, mul_{nullptr}, mul_cnode_{nullptr}; + FuncGraphPtr all_reduce_fg_{nullptr}; }; class ArithmeticSimplify { public: ArithmeticSimplify() : multiply_by_zero_or_one_(), + tensor_multiply_by_zero_or_one_(), add_by_zero_(), tensor_add_by_zero_(), identity_(prim::kPrimIdentity), opt_update_zero_tensor_(), - constant_duplicate_mul_() { + constant_duplicate_mul_(), + power_one_() { eliminaters_.emplace_back(multiply_by_zero_or_one_); + eliminaters_.emplace_back(tensor_multiply_by_zero_or_one_); eliminaters_.emplace_back(add_by_zero_); eliminaters_.emplace_back(tensor_add_by_zero_); eliminaters_.emplace_back(identity_); eliminaters_.emplace_back(opt_update_zero_tensor_); eliminaters_.emplace_back(constant_duplicate_mul_); + eliminaters_.emplace_back(power_one_); } ~ArithmeticSimplify() = default; @@ -259,11 +730,13 @@ class ArithmeticSimplify { private: MultiplyByZeroOrOne multiply_by_zero_or_one_; + TensorMultiplyByZeroOrOne tensor_multiply_by_zero_or_one_; AddByZero add_by_zero_; TensorAddByZero tensor_add_by_zero_; PrimEliminater identity_; OptUpdateZeroTensor opt_update_zero_tensor_; ConstantDuplicateMul constant_duplicate_mul_; + PowerOneEliminate power_one_; std::vector eliminaters_{}; }; } // namespace irpass diff --git a/mindspore/ccsrc/optimizer/irpass/branch_culling.h b/mindspore/ccsrc/optimizer/irpass/branch_culling.h index b2d6718857..2b5b30bdbf 100644 --- a/mindspore/ccsrc/optimizer/irpass/branch_culling.h +++ b/mindspore/ccsrc/optimizer/irpass/branch_culling.h @@ -20,147 +20,65 @@ #include #include -#include "optimizer/optimizer.h" -#include "optimizer/irpass.h" -#include "ir/visitor.h" #include "ir/func_graph.h" #include "ir/func_graph_cloner.h" +#include "ir/optimizer_caller.h" +#include "ir/pattern_matcher.h" #include "operator/ops.h" +#include "optimizer/irpass.h" namespace mindspore { namespace opt { namespace irpass { // {prim::kPrimSwitch, true, X, Y} // {prim::kPrimSwitch, false, X, Y} -class SwitchSimplify : public AnfVisitor { +class SwitchSimplify : public OptimizerCaller { public: AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { - Reset(); - auto getx = [this](const AnfNodePtr &node) -> bool { - this->x_ = node; - return true; - }; - auto gety = [this](const AnfNodePtr &node) -> bool { - this->y_ = node; - return true; + PatternNode cond, true_br, false_br; + auto SwitchSimplLambda = [&node, &cond, &true_br, &false_br]() -> AnfNodePtr { + auto cond_value_ = GetValue(GetValueNode(cond.GetNode(node))); + if (cond_value_) { + return true_br.GetNode(node); + } + return false_br.GetNode(node); }; - AnfVisitor::Match(prim::kPrimSwitch, {IsValueNode, getx, gety})(node); - // simplify the switch - if (is_match_) { - if (cond_) { - return x_; - } - return y_; - } + MATCH_REPLACE_LAMBDA_IF(node, PPrimitive(prim::kPrimSwitch, cond, true_br, false_br), SwitchSimplLambda, + cond.CheckFunc(IsValueNode, node)); return nullptr; } - - void Visit(const AnfNodePtr &node) override { - if (!is_match_ && IsValueNode(node)) { - cond_ = GetValue(GetValueNode(node)); - is_match_ = true; - } - } - - void Reset() { - x_ = nullptr; - y_ = nullptr; - cond_ = false; - is_match_ = false; - } - - private: - bool is_match_{false}, cond_{false}; - AnfNodePtr x_{nullptr}, y_{nullptr}; }; // {prim::kPrimTupleGetItem, {prim::kPrimSwith, X0, X1, X2}, C} => // {prim::kPrimSwith, X0, {prim::kPrimTupleGetItem, X1, C}, {prim::kPrimTupleGetItem, X2, C}} -class FloatTupleGetItemSwitch : public AnfVisitor { +class FloatTupleGetItemSwitch : public OptimizerCaller { public: AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { - Reset(); - AnfVisitor::Match(prim::kPrimTupleGetItem, {IsCNode, IsVNode})(node); - - auto fg = node->func_graph(); - if (Xs_.empty() || c_ == nullptr || fg == nullptr) { - return nullptr; - } - - auto true_node = fg->NewCNode({NewValueNode(prim::kPrimTupleGetItem), Xs_[1], c_}); - auto false_node = fg->NewCNode({NewValueNode(prim::kPrimTupleGetItem), Xs_[2], c_}); - - return fg->NewCNode({NewValueNode(prim::kPrimSwitch), Xs_[0], true_node, false_node}); - } - - void Visit(const CNodePtr &cnode) override { - // {prim::kPrimSwith, X1, X2, X3} - if (!IsPrimitiveCNode(cnode, prim::kPrimSwitch) || cnode->size() != 4) { - return; - } - - // copy X1, X2, X3 - auto &inputs = cnode->inputs(); - (void)std::copy(inputs.begin() + 1, inputs.end(), std::back_inserter(Xs_)); - } - - void Visit(const ValueNodePtr &vnode) override { c_ = vnode; } - - void Reset() { - Xs_.clear(); - c_ = nullptr; + PatternNode cond, true_br, false_br, x; + MATCH_REPLACE_IF(node, + PPrimitive(prim::kPrimTupleGetItem, PPrimitive(prim::kPrimSwitch, cond, true_br, false_br), x), + PPrimitive(prim::kPrimSwitch, cond, PPrimitive(prim::kPrimTupleGetItem, true_br, x), + PPrimitive(prim::kPrimTupleGetItem, false_br, x)), + x.CheckFunc(IsVNode, node)); + return nullptr; } - - private: - AnfNodePtr c_{nullptr}; - std::vector Xs_{}; }; // {prim::kPrimEnvGetItem, {prim::kPrimSwitch, X1, X2, X3}, X4, X5} => // {prim::kPrimSwitch, X1, {prim::kPrimEnvGetItem, X2, X4, X5}, {prim::kPrimEnvGetItem, X3, X4, X5}} -class FloatEnvGetItemSwitch : public AnfVisitor { +class FloatEnvGetItemSwitch : public OptimizerCaller { public: AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { - is_match_ = false; - AnfVisitor::Match(prim::kPrimEnvGetItem, {IsCNode, IsNode, IsNode})(node); - if (!is_match_) { - return nullptr; - } - - // {prim::kPrimEnvGetItem, {...}, X4, X5} - auto cnode = node->cast(); - auto sw_node = cnode->input(1)->cast(); - auto x4 = cnode->input(2); - auto x5 = cnode->input(3); + PatternNode cond, true_br, false_br, x, x2; + MATCH_REPLACE(node, + PPrimitive(prim::kPrimEnvGetItem, PPrimitive(prim::kPrimSwitch, cond, true_br, false_br), x, x2), + PPrimitive(prim::kPrimSwitch, cond, PPrimitive(prim::kPrimEnvGetItem, true_br, x, x2), + PPrimitive(prim::kPrimEnvGetItem, false_br, x, x2))); - is_match_ = false; - AnfVisitor::Match(prim::kPrimSwitch, {IsNode, IsNode, IsNode})(sw_node); - if (!is_match_) { - return nullptr; - } - - // {prim::kPrimSwitch, X1, X2, X3} - auto x1 = sw_node->input(1); - auto x2 = sw_node->input(2); - auto x3 = sw_node->input(3); - - auto fg = node->func_graph(); - if (fg == nullptr) { - return nullptr; - } - - auto true_node = fg->NewCNode({NewValueNode(prim::kPrimEnvGetItem), x2, x4, x5}); - auto false_node = fg->NewCNode({NewValueNode(prim::kPrimEnvGetItem), x3, x4, x5}); - - return fg->NewCNode({NewValueNode(prim::kPrimSwitch), x1, true_node, false_node}); + return nullptr; } - - void Visit(const AnfNodePtr &) override { is_match_ = true; } - - private: - bool is_match_{false}; }; namespace internal { @@ -173,79 +91,64 @@ AnfNodePtr TransformMergeBranches(const AnfNodePtr &true_output_node, const AnfN } // namespace internal // {{prim::kPrimSwitch, X, G1, G2}, Xs} -class ConvertSwitchReplacement : public AnfVisitor { +class ConvertSwitchReplacement : public OptimizerCaller { public: AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { if (!node->isa() || node->func_graph() == nullptr) { return nullptr; } - Reset(); - auto cnode = node->cast(); - if (cnode->size() < 1) { + auto cnode_ = node->cast(); + if (cnode_->size() < 1) { return nullptr; } - // {prim::kPrimSwitch, X, G1, G2} - AnfVisitor::Match(prim::kPrimSwitch, {IsNode, IsValueNode, IsValueNode})(cnode->input(0)); - if (g2_ == nullptr || g1_->output() == nullptr || g2_->output() == nullptr) { - return nullptr; - } - // for switch replace method, only graphs without graph inside can be replaced - for (auto &item : g1_->value_nodes()) { - auto value_node = item.first; - if (IsValueNode(value_node)) { - return nullptr; + auto node_ = cnode_->input(0); + + PatternNode cond, true_br, false_br; + + auto ConvertSwitchLambda = [&node_, &cond, &true_br, &false_br]() -> AnfNodePtr { + auto g1_ = GetValueNode(true_br.GetNode(node_)); + auto g2_ = GetValueNode(false_br.GetNode(node_)); + auto x_ = cond.GetNode(node_); + + // for switch replace method, only graphs without graph inside can be replaced + for (auto &item : g1_->value_nodes()) { + auto value_node = item.first; + if (IsValueNode(value_node)) { + return nullptr; + } } - } - for (auto &item : g2_->value_nodes()) { - auto value_node = item.first; - if (IsValueNode(value_node)) { - return nullptr; + for (auto &item : g2_->value_nodes()) { + auto value_node = item.first; + if (IsValueNode(value_node)) { + return nullptr; + } } - } - auto true_output = g1_->output()->abstract(); - auto false_output = g2_->output()->abstract(); - auto trans_g1 = internal::TransformGraphCondTrueBranchNodes(g1_, x_); - auto trans_g2 = internal::TransformGraphCondFalseBranchNodes(g2_, x_); - - std::vector params; - auto fg = node->func_graph(); - auto cloned_g1 = InlineClone(trans_g1, fg, params); - auto cloned_g2 = InlineClone(trans_g2, fg, params); - auto nnode = internal::TransformMergeBranches(cloned_g1, cloned_g2, true_output, false_output, x_, fg); - return nnode; - } + auto true_output = g1_->output()->abstract(); + auto false_output = g2_->output()->abstract(); + auto trans_g1 = internal::TransformGraphCondTrueBranchNodes(g1_, x_); + auto trans_g2 = internal::TransformGraphCondFalseBranchNodes(g2_, x_); - void Visit(const AnfNodePtr &node) override { - if (x_ == nullptr) { - x_ = node; - return; - } - AnfVisitor::Visit(node); - } + std::vector params; + auto fg = node_->func_graph(); + auto cloned_g1 = InlineClone(trans_g1, fg, params); + auto cloned_g2 = InlineClone(trans_g2, fg, params); + auto nnode = internal::TransformMergeBranches(cloned_g1, cloned_g2, true_output, false_output, x_, fg); - void Visit(const ValueNodePtr &vnode) override { - auto g = GetValueNode(vnode); - if (g1_ == nullptr) { - g1_ = g; - } else { - g2_ = g; - } - } + return nnode; + }; - void Reset() { - x_ = nullptr; - g1_ = nullptr; - g2_ = nullptr; - } + MATCH_REPLACE_LAMBDA_IF( + node_, PPrimitive(prim::kPrimSwitch, cond, true_br, false_br), ConvertSwitchLambda, + true_br.CheckFunc(IsValueNode, node_) && false_br.CheckFunc(IsValueNode, node_)); - private: - AnfNodePtr x_{nullptr}; - FuncGraphPtr g1_{nullptr}, g2_{nullptr}; + return nullptr; + } }; + } // namespace irpass } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/optimizer/irpass/env_item_eliminate.h b/mindspore/ccsrc/optimizer/irpass/env_item_eliminate.h index ce29b32d14..0f59c69fef 100644 --- a/mindspore/ccsrc/optimizer/irpass/env_item_eliminate.h +++ b/mindspore/ccsrc/optimizer/irpass/env_item_eliminate.h @@ -225,6 +225,33 @@ class EnvGetSetItem : public AnfVisitor { bool is_match_{false}; }; +class EnvGetItemEliminater { + public: + EnvGetItemEliminater() : new_env_get_item_(), add_env_get_item_(), env_get_set_item_() { + eliminaters_.emplace_back(new_env_get_item_); + eliminaters_.emplace_back(add_env_get_item_); + eliminaters_.emplace_back(env_get_set_item_); + } + ~EnvGetItemEliminater() = default; + + AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) { + AnfNodePtr new_node; + for (auto &eliminater : eliminaters_) { + new_node = eliminater(optimizer, node); + if (new_node != nullptr) { + return new_node; + } + } + return nullptr; + } + + private: + NewEnvGetItem new_env_get_item_; + AddEnvGetItem add_env_get_item_; + EnvGetSetItem env_get_set_item_; + std::vector eliminaters_{}; +}; + // {prim::kPrimEnvGetItem, {G, Xs}, C, Y} class IncorporateEnvGetitem : public AnfVisitor { public: diff --git a/mindspore/ccsrc/optimizer/irpass/gradient_eliminate.h b/mindspore/ccsrc/optimizer/irpass/gradient_eliminate.h index 651dc3a2f2..671d9bde49 100644 --- a/mindspore/ccsrc/optimizer/irpass/gradient_eliminate.h +++ b/mindspore/ccsrc/optimizer/irpass/gradient_eliminate.h @@ -55,21 +55,6 @@ class ExpandJPrim : public AnfVisitor { private: ValueNodePtr x_{nullptr}; }; - -// stop_gradient(x) ==> x -class StopGradientEliminater : public AnfVisitor { - public: - AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { - x_ = nullptr; - AnfVisitor::Match(prim::kPrimStopGradient)(node); - return x_; - } - - void Visit(const AnfNodePtr &node) override { x_ = node; } - - private: - AnfNodePtr x_{nullptr}; -}; } // namespace irpass } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/optimizer/irpass/incorporate_getitem.h b/mindspore/ccsrc/optimizer/irpass/incorporate_getitem.h index 77f3fa7b36..5afee45e95 100644 --- a/mindspore/ccsrc/optimizer/irpass/incorporate_getitem.h +++ b/mindspore/ccsrc/optimizer/irpass/incorporate_getitem.h @@ -21,6 +21,7 @@ #include #include #include +#include #include "optimizer/irpass.h" #include "optimizer/optimizer.h" @@ -28,7 +29,6 @@ #include "ir/func_graph.h" #include "ir/func_graph_cloner.h" #include "operator/ops.h" - namespace mindspore { namespace opt { namespace irpass { @@ -81,13 +81,32 @@ class IncorporateGetitem : public AnfVisitor { AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { Reset(); AnfVisitor::Match(prim::kPrimTupleGetItem, {IsCNode, IsValueNode})(node); + if (node->func_graph() == nullptr || idx_ == -1 || fg_ == nullptr) { + return nullptr; + } - if (node->func_graph() != nullptr && idx_ >= 0 && fg_ != nullptr) { - auto new_fg = getitem_transform_(fg_, idx_); - (void)args_.insert(args_.begin(), NewValueNode(new_fg)); - return node->func_graph()->NewCNode(args_); + if (fg_->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) { + // If graph kernel has muti output, do not split. + // some graph kernel output has EnvInstance node or DeadCode node should split. + auto output = fg_->output(); + if (IsPrimitiveCNode(output, prim::kPrimMakeTuple)) { + auto output_cnode = output->cast(); + auto outputs = output_cnode->inputs(); + int real_output_cnt = 0; + for (size_t i = 1; i < outputs.size(); ++i) { + if (IsCNode(outputs[i]) || IsValueNode(outputs[i]) || IsParam(outputs[i])) { + real_output_cnt++; + if (real_output_cnt > 1) { + return nullptr; + } + } + } + } } - return nullptr; + + auto new_fg = getitem_transform_(fg_, idx_); + (void)args_.insert(args_.begin(), NewValueNode(new_fg)); + return node->func_graph()->NewCNode(args_); } void Visit(const CNodePtr &cnode) override { @@ -115,6 +134,172 @@ class IncorporateGetitem : public AnfVisitor { internal::GetitemTransform getitem_transform_; }; +class IncorporateGetitemFromParam : public AnfVisitor { + public: + void Process(const FuncGraphPtr &func_graph, const CNodePtr &cnode, const AnfNodePtr ¶m, size_t input_idx) { + auto mng = func_graph->manager(); + MS_EXCEPTION_IF_NULL(mng); + auto &node_users = mng->node_users(); + if (node_users.find(param) == node_users.end() || node_users[param].empty()) { + args_.push_back(cnode->input(input_idx + 1)); + return; + } + + for (auto &user : node_users[param]) { + if (!IsPrimitiveCNode(user.first, prim::kPrimTupleGetItem)) { + // we do not process this case. + args_.push_back(cnode->input(input_idx + 1)); + return; + } + } + + // update new args. + if (IsPrimitiveCNode(cnode->input(input_idx + 1), prim::kPrimMakeTuple)) { + // case 1 + replace_parameters_[input_idx] = true; + need_update_ = true; + auto make_tuple_cnode = cnode->input(input_idx + 1)->cast(); + auto &make_tuple_cnode_inputs = make_tuple_cnode->inputs(); + inputs_num_[input_idx] = make_tuple_cnode_inputs.size() - 1; + args_.insert(args_.end(), make_tuple_cnode_inputs.begin() + 1, make_tuple_cnode_inputs.end()); + } else { + // case 2 + auto prev_cnode = cnode->input(input_idx + 1)->cast(); + auto prev_fg = GetValueNode(prev_cnode->input(0)); + auto fg_output = prev_fg->output(); + if (!IsPrimitiveCNode(fg_output, prim::kPrimMakeTuple)) { + MS_LOG(ERROR) << "The return of: " << prev_fg->ToString() + << " should be a make tuple, but got: " << fg_output->DebugString(); + return; + } + replace_parameters_[input_idx] = true; + need_update_ = true; + auto make_tuple_cnode = fg_output->cast(); + inputs_num_[input_idx] = make_tuple_cnode->inputs().size() - 1; + for (size_t output_i = 0; output_i < inputs_num_[input_idx]; ++output_i) { + auto new_getitem = + func_graph->NewCNode({NewValueNode(prim::kPrimTupleGetItem), prev_cnode, NewValueNode(SizeToInt(output_i))}); + auto aptr = std::make_shared(std::make_shared(SizeToInt(output_i))); + new_getitem->input(2)->set_abstract(aptr); + new_getitem->set_abstract(make_tuple_cnode->input(output_i + 1)->abstract()); + args_.push_back(new_getitem); + } + } + } + + AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { + if (node->func_graph() == nullptr) { + return nullptr; + } + + Reset(); + + auto cnode = node->cast(); + if (cnode == nullptr) { + return nullptr; + } + auto &inputs = cnode->inputs(); + auto fg = GetValueNode(inputs[0]); + if (fg == nullptr) { + return nullptr; + } + auto mng = fg->manager(); + MS_EXCEPTION_IF_NULL(mng); + auto parameters = fg->parameters(); + if (parameters.size() != inputs.size() - 1) { + return nullptr; + } + replace_parameters_ = std::vector(parameters.size(), false); + inputs_num_ = std::vector(parameters.size(), 1); + auto node_fg = node->func_graph(); + + for (size_t i = 1; i < inputs.size(); ++i) { + if (IsPrimitiveCNode(inputs[i], prim::kPrimMakeTuple) || IsCNodeGraphKernel(inputs[i])) { + Process(node_fg, cnode, parameters[i - 1], i - 1); + } else { + args_.push_back(inputs[i]); + } + } + + if (!need_update_) { + return nullptr; + } + + FuncGraphPtr new_fg = TransformableClone(fg, std::make_shared("sp")); + mng->AddFuncGraph(new_fg); + + auto node_users = mng->node_users(); + std::vector new_fg_parameters = new_fg->parameters(); + std::vector new_parameters; + size_t curr_input_idx{0}; + for (size_t param_i = 0; param_i < new_fg_parameters.size(); ++param_i) { + if (!replace_parameters_[param_i]) { + if (parameters[param_i]->abstract() != nullptr) { + new_fg_parameters[param_i]->set_abstract(parameters[param_i]->abstract()); + } + new_parameters.push_back(new_fg_parameters[param_i]); + curr_input_idx++; + continue; + } + + // make a new parameter. + for (size_t input_i = 0; input_i < inputs_num_[param_i]; ++input_i) { + auto new_param = std::make_shared(new_fg); + new_param->set_abstract(args_.at(curr_input_idx)->abstract()); + + // update users of new parameter. + for (auto &user : node_users[new_fg_parameters[param_i]]) { + idx_ = -1; + AnfVisitor::Match(prim::kPrimTupleGetItem, {IsParam, IsValueNode})(user.first); + if (idx_ == -1) { + MS_LOG(ERROR) << "User of: " << new_fg_parameters[param_i]->DebugString() + << " must be tuple getitem here, but got: " << user.first->DebugString(); + return nullptr; + } + + if (input_i == IntToSize(idx_)) { + for (auto &sub_user : node_users[user.first]) { + auto sub_user_cnode = sub_user.first->cast(); + MS_EXCEPTION_IF_NULL(sub_user_cnode); + sub_user_cnode->set_input(sub_user.second, new_param); + (void)mng->Replace(sub_user.first, sub_user_cnode); + } + } + } + + // (void)mng->Replace(new_fg_parameters[param_i], new_param); + new_parameters.push_back(new_param); + curr_input_idx++; + } + } + + mng->SetParameters(new_fg, new_parameters); + (void)args_.insert(args_.begin(), NewValueNode(new_fg)); + auto new_call = node_fg->NewCNode(args_); + new_call->set_abstract(node->abstract()); + return new_call; + } + + void Visit(const ValueNodePtr &vnode) override { idx_ = GetValue(vnode->value()); } + + void Visit(const CNodePtr &cnode) override {} + + void Reset() { + replace_parameters_.clear(); + args_.clear(); + inputs_num_.clear(); + need_update_ = false; + idx_ = -1; + } + + private: + std::vector replace_parameters_{}; + std::vector args_{}; + std::vector inputs_num_{}; + bool need_update_{false}; + int idx_{-1}; +}; + // {prim::kPrimTupleGetItem, {{prim::kPrimSwitch, X, G1, G2}, Xs}, C} class IncorporateGetitemSwitch : public AnfVisitor { public: @@ -197,6 +382,31 @@ class IncorporateGetitemSwitch : public AnfVisitor { std::vector args_{}; internal::GetitemTransform getitem_transform_; }; + +class IncorporateGetitemSet { + public: + IncorporateGetitemSet() : incorporate_getitem_(), incorporate_getitem_switch_() { + eliminaters_.emplace_back(incorporate_getitem_); + eliminaters_.emplace_back(incorporate_getitem_switch_); + } + ~IncorporateGetitemSet() = default; + + AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) { + AnfNodePtr new_node; + for (auto &eliminater : eliminaters_) { + new_node = eliminater(optimizer, node); + if (new_node != nullptr) { + return new_node; + } + } + return nullptr; + } + + private: + IncorporateGetitem incorporate_getitem_; + IncorporateGetitemSwitch incorporate_getitem_switch_; + std::vector eliminaters_{}; +}; } // namespace irpass } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/optimizer/irpass/inline.h b/mindspore/ccsrc/optimizer/irpass/inline.h index 8ebd0f6eb7..64f192347c 100644 --- a/mindspore/ccsrc/optimizer/irpass/inline.h +++ b/mindspore/ccsrc/optimizer/irpass/inline.h @@ -71,11 +71,7 @@ class ReplaceApplicator : public AnfVisitor { using CriterionFuncType = std::function; bool IsTrivial(const FuncGraphPtr &fg, AnfNodePtr) { - auto &s = fg->nodes(); - int n_cnode = std::count_if(s.begin(), s.end(), [](const AnfNodePtr &n) { - MS_EXCEPTION_IF_NULL(n); - return n->isa(); - }); + auto n_cnode = fg->nodes().size() - fg->parameters().size(); // There is at least one CNode(return, other_node). return n_cnode <= 2; } @@ -90,20 +86,10 @@ bool IsUniqueUse(const FuncGraphPtr &fg, AnfNodePtr) { bool IsInside(FuncGraphPtr, const AnfNodePtr &node) { MS_EXCEPTION_IF_NULL(node->func_graph()); - auto &flags = node->func_graph()->flags(); - if (flags.find("inline_inside") != flags.end()) { - return flags["inline_inside"]; - } - return false; + return node->func_graph()->has_flag("inline_inside"); } -bool IsCore(const FuncGraphPtr &fg, AnfNodePtr) { - auto &flags = fg->flags(); - if (flags.find("core") != flags.end()) { - return flags["core"]; - } - return false; -} +bool IsCore(const FuncGraphPtr &fg, AnfNodePtr) { return fg->has_flag("core"); } bool NoCriterion(FuncGraphPtr, AnfNodePtr) { return true; } @@ -127,6 +113,13 @@ class InlinerBase : public AnfVisitor { if (fg->has_flag(FUNC_GRAPH_FLAG_DEFER_INLINE)) { return nullptr; } + // Do not inline GraphKernel to Cell. + if (fg->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL) && !node->func_graph()->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) { + // If the GraphKernel only contains a return node, we make it inlined. + if (fg->nodes().size() - fg->parameters().size() > 1) { + return nullptr; + } + } Reset(); bool is_match = false; @@ -167,7 +160,8 @@ class InlinerBase : public AnfVisitor { auto params = fg->parameters(); auto old_size = params.size(); if (old_size != new_params.size()) { - MS_LOG(EXCEPTION) << "Parameter size not match."; + MS_LOG(EXCEPTION) << "Parameter size not match." << old_size << " new " << new_params.size() + << fg->output()->DebugString(10); } for (size_t i = 0; i < old_size; i++) { (void)mng->Replace(params[i], new_params[i]); diff --git a/mindspore/ccsrc/optimizer/irpass/mark_interface_fusion.h b/mindspore/ccsrc/optimizer/irpass/mark_interface_fusion.h new file mode 100644 index 0000000000..6f2bcc187f --- /dev/null +++ b/mindspore/ccsrc/optimizer/irpass/mark_interface_fusion.h @@ -0,0 +1,86 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_OPTIMIZER_IRPASS_MARK_INTERFACE_FUSION_H +#define MINDSPORE_CCSRC_OPTIMIZER_IRPASS_MARK_INTERFACE_FUSION_H + +#include +#include +#include + +#include "session/anf_runtime_algorithm.h" +#include "optimizer/optimizer.h" +#include "optimizer/irpass.h" +#include "ir/visitor.h" +#include "operator/ops.h" +#include "utils/graph_utils.h" +#include "operator/composite/composite.h" + +namespace mindspore { +namespace opt { +namespace irpass { + +static int count = 0; + +std::string GetFusionNumber() { + std::stringstream ss; + ss << std::setw(4) << std::setfill('0') << count; + std::string num = ss.str(); + ++count; + + return "_" + num; +} + +// Mark CNodes which can be merged in kernel build +class MarkInterfaceFusion : public AnfVisitor { + public: + AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { + if (node->func_graph()->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL) && IsPrimitiveCNode(node, prim::kPrimSelect)) { + auto cnode = node->cast(); + auto condition = cnode->input(1); + std::string cmp; + std::unordered_map cmp_list = {{"GreaterEqual", "GE"}, {"Greater", "GT"}, + {"LessEqual", "LE"}, {"Less", "LT"}, + {"Equal", "EQ"}, {"NotEqual", "NE"}}; + if (IsPrimitiveCNode(condition)) { + auto prim_name = GetCNodeFuncName(condition->cast()); + if (cmp_list.count(prim_name) != 0) { + // Mark Select and compare node + cmp = cmp_list[prim_name]; + auto cnt = GetFusionNumber(); + AnfAlgo::SetNodeAttr("fusion", MakeValue("Select" + cmp + cnt), condition); + AnfAlgo::SetNodeAttr("fusion", MakeValue("Select" + cmp + cnt + "_end"), node); + for (size_t i = 1; i < cnode->inputs().size(); ++i) { + if (IsPrimitiveCNode(cnode->input(i), prim::kPrimZerosLike)) { + AnfAlgo::SetNodeAttr("fusion", MakeValue("Select" + cmp + cnt), cnode->input(i)); + } + } + } + } + } + return nullptr; + } + + void Visit(const AnfNodePtr &) override {} + + private: + AnfNodePtr y_{nullptr}; +}; + +} // namespace irpass +} // namespace opt +} // namespace mindspore +#endif // MINDSPORE_CCSRC_OPTIMIZER_IRPASS_MARK_INTERFACE_FUSION_H diff --git a/mindspore/ccsrc/optimizer/irpass/merge_addn.h b/mindspore/ccsrc/optimizer/irpass/merge_addn.h index 7a7c62f6f6..94f9e26c5b 100644 --- a/mindspore/ccsrc/optimizer/irpass/merge_addn.h +++ b/mindspore/ccsrc/optimizer/irpass/merge_addn.h @@ -19,6 +19,7 @@ #include #include +#include #include "optimizer/irpass.h" #include "optimizer/optimizer.h" @@ -177,7 +178,7 @@ class AddNZeroFilter : public AnfVisitor { // {kPrimMakeTuple, X1, X2, ...} filtered_Xs_.push_back(NewValueNode(prim::kPrimMakeTuple)); for (auto &x : Xs_) { - if (!IsPrimitiveCNode(x, prim::kPrimZerosLikeTensor)) { + if (!IsPrimitiveCNode(x, prim::kPrimZerosLike)) { filtered_Xs_.push_back(x); } else { has_zero_like_ = true; @@ -196,6 +197,131 @@ class AddNZeroFilter : public AnfVisitor { std::vector filtered_Xs_{}, Xs_{}; bool has_zero_like_{false}; }; + +// {PrimAddN, {kPrimMakeTuple, Xs}} +// Akg don't support AddN(ValueNode, Tensor, ...), converted to TensorAdd. +// case0: AddN(inputs)(inputs size < 2) -> error +// case1: AddN(inputs)(all inputs is ValueNode) -> error +// case2: AddN(inputs)(inputs size = 2) -> TensorAdd(Tensor, Tensor) +// case3: AddN(ValueNode, Tensor, Tensor, ...)(has one ValueNode input) +// -> TensorAdd(ValueNode, AddN(Tensor, Tensor, ...)) +class AddNEliminater : public AnfVisitor { + public: + AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { + if (!node->isa() || node->func_graph() == nullptr) { + return nullptr; + } + + auto &inputs = node->cast()->inputs(); + auto fg = GetValueNode(inputs[0]); + MS_EXCEPTION_IF_NULL(fg); + auto mng = fg->manager(); + MS_EXCEPTION_IF_NULL(mng); + if (fg->recursive()) { + return nullptr; + } + + auto new_fg = TransformableClone(fg, std::make_shared("fg")); + mng->AddFuncGraph(new_fg); + need_update_ = false; + bool changed = false; + do { + changed = false; + changed |= Process(new_fg); + } while (changed); + + if (!need_update_) { + return nullptr; + } else { + auto new_sx = inputs; + new_sx[0] = NewValueNode(new_fg); + return node->func_graph()->NewCNode(new_sx); + } + } + + bool Process(const FuncGraphPtr &func_graph) { + auto mng = func_graph->manager(); + MS_EXCEPTION_IF_NULL(mng); + auto nodes = TopoSort(func_graph->output()); + bool changed = false; + + for (size_t i = 0; i < nodes.size(); ++i) { + auto node = nodes[i]; + if (!IsPrimitiveCNode(node, prim::kPrimAddN)) { + continue; + } + + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + auto &tuple_input = cnode->input(1); + MS_EXCEPTION_IF_NULL(tuple_input); + auto tuple_input_cnode = tuple_input->cast(); + MS_EXCEPTION_IF_NULL(tuple_input_cnode); + auto &tuple_inputs = tuple_input_cnode->inputs(); + if (tuple_inputs.size() < 3) { + // case0: inputs size < 2, error + MS_EXCEPTION(ArgumentError) << "Inputs size of AddN less than 2. " << cnode->DebugString(2); + } + + int valuenode_num = + std::accumulate(tuple_inputs.begin() + 1, tuple_inputs.end(), 0, [](int accumulator, const AnfNodePtr &node) { + if (IsValueNode(node)) { + return accumulator + 1; + } else { + return accumulator; + } + }); + if (IntToSize(valuenode_num) == tuple_inputs.size()) { + // case1: all inputs is ValueNode, error + MS_EXCEPTION(ArgumentError) << "All inputs of AddN is ValueNode. " << cnode->DebugString(2); + } + + if (tuple_inputs.size() == 3) { + // case2: inputs size = 2, -> TensorAdd(Tensor, Tensor) + MS_LOG(DEBUG) << "Replace AddN with two inputs with TensorAdd. " << cnode->DebugString(2); + ValuePtr prim_tensoradd = prim::GetPythonOps("TensorAdd", "mindspore.ops.operations"); + std::vector new_xs{func_graph->NewCNode({NewValueNode(prim_tensoradd)}), tuple_inputs[1], + tuple_inputs[2]}; + mng->Replace(node, func_graph->NewCNode(new_xs)); + changed = true; + continue; + } + + auto first_valuenode = std::find_if(tuple_inputs.begin() + 1, tuple_inputs.end(), + [](const AnfNodePtr &node) { return IsValueNode(node); }); + if (first_valuenode == tuple_inputs.end()) { + // no ValueNode input found. + continue; + } else { + // case3: has one ValueNode input -> TensorAdd(ValueNode, AddN(Tensor, Tensor, ...)) + std::vector make_tuple_new_xs{ + NewValueNode(prim::kPrimMakeTuple), + }; + std::for_each(tuple_inputs.begin() + 1, tuple_inputs.end(), + [&make_tuple_new_xs, &first_valuenode](const AnfNodePtr &node) { + if (node != *first_valuenode) { + make_tuple_new_xs.push_back(node); + } + }); + ValuePtr prim_addn = prim::GetPythonOps("AddN", "mindspore.ops.operations"); + auto new_addn = func_graph->NewCNode( + {func_graph->NewCNode({NewValueNode(prim_addn)}), func_graph->NewCNode(make_tuple_new_xs)}); + ValuePtr prim_tensoradd = prim::GetPythonOps("TensorAdd", "mindspore.ops.operations"); + auto new_add = + func_graph->NewCNode({func_graph->NewCNode({NewValueNode(prim_tensoradd)}), *first_valuenode, new_addn}); + (void)mng->Replace(node, new_add); + changed = true; + continue; + } + } + + need_update_ |= changed; + return changed; + } + + private: + bool need_update_{false}; +}; } // namespace irpass } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/optimizer/irpass/reduce_eliminate.h b/mindspore/ccsrc/optimizer/irpass/reduce_eliminate.h index 73dbc152e5..d2e1d15f91 100644 --- a/mindspore/ccsrc/optimizer/irpass/reduce_eliminate.h +++ b/mindspore/ccsrc/optimizer/irpass/reduce_eliminate.h @@ -79,7 +79,7 @@ class ReduceOneEliminater : public AnfVisitor { } void Visit(const AnfNodePtr &node) override { - if (x_ == nullptr) { + if (!IsVNode(node) && x_ == nullptr) { if (IsValueNode(node)) { is_tensor_ = true; } diff --git a/mindspore/ccsrc/optimizer/irpass/ref_eliminate.h b/mindspore/ccsrc/optimizer/irpass/ref_eliminate.h index 201992ef13..599ee8c339 100644 --- a/mindspore/ccsrc/optimizer/irpass/ref_eliminate.h +++ b/mindspore/ccsrc/optimizer/irpass/ref_eliminate.h @@ -21,86 +21,69 @@ #include "optimizer/optimizer.h" #include "optimizer/irpass.h" -#include "ir/visitor.h" -#include "operator/ops.h" +#include "ir/pattern_matcher.h" namespace mindspore { namespace opt { namespace irpass { // {prim::kPrimMakeRef, X, Y, Z} -> Y -class MakeRefEliminater : public AnfVisitor { +class MakeRefEliminater : public OptimizerCaller { public: AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { - y_ = nullptr; - auto gety = [this](const AnfNodePtr &node) -> bool { - this->y_ = node; - return true; - }; - AnfVisitor::Match(prim::kPrimMakeRef, {IsNode, gety, IsNode})(node); - return y_; + PatternNode x, y, z; + MATCH_REPLACE(node, PPrimitive(prim::kPrimMakeRef, x, y, z), y); + return nullptr; } +}; - void Visit(const AnfNodePtr &) override {} - - private: - AnfNodePtr y_{nullptr}; +// {prim::kPrimGetRefValue, Parameter} -> Parameter +// {prim::kPrimGetRefOrigin, Parameter} -> Parameter +class GetRefParamEliminater : public OptimizerCaller { + public: + AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { + PatternNode x; + MATCH_REPLACE_IF(node, PPrimitive(prim::kPrimGetRefValue, x), x, x.CheckFunc(IsParam, node)); + MATCH_REPLACE_IF(node, PPrimitive(prim::kPrimGetRefOrigin, x), x, x.CheckFunc(IsParam, node)); + return nullptr; + } }; // {prim::kPrimGetRefKey, {prim::kPrimMakeRef, X, Y, Z}} -> X // {prim::kPrimGetRefValue, {prim::kPrimMakeRef, X, Y, Z}} -> Y // {prim::kPrimGetRefOrigin, {prim::kPrimMakeRef, X, Y, Z}} -> Z -class GetMakeRefEliminater : public AnfVisitor { +class GetMakeRefEliminater : public OptimizerCaller { public: AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { - auto cnode = node->cast(); - if (cnode == nullptr || cnode->size() != 2) { - return nullptr; - } - - // {prim::kPrimGetRefKey/Value, {...}} - auto ref = cnode->input(1)->cast(); - if (ref == nullptr || !ref->IsApply(prim::kPrimMakeRef) || ref->size() != 4) { - return nullptr; - } - - // {prim::kPrimMakeRef, X, Y, Z} - if (cnode->IsApply(prim::kPrimGetRefKey)) { - return ref->input(1); - } - - if (cnode->IsApply(prim::kPrimGetRefValue)) { - return ref->input(2); - } - - if (cnode->IsApply(prim::kPrimGetRefOrigin)) { - return ref->input(3); - } - + PatternNode x, y, z; + MATCH_REPLACE(node, PPrimitive(prim::kPrimGetRefKey, PPrimitive(prim::kPrimMakeRef, x, y, z)), x); + MATCH_REPLACE(node, PPrimitive(prim::kPrimGetRefValue, PPrimitive(prim::kPrimMakeRef, x, y, z)), y); + MATCH_REPLACE(node, PPrimitive(prim::kPrimGetRefOrigin, PPrimitive(prim::kPrimMakeRef, x, y, z)), z); return nullptr; } }; // IsValueNode -class ReplaceRefkeyByParam : public AnfVisitor { +class ReplaceRefkeyByParam : public OptimizerCaller { public: AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) override { - if (!IsValueNode(node)) { - return nullptr; - } - - auto refkey = GetValueNode(node); - auto resource = std::dynamic_pointer_cast(optimizer->resource()); - MS_EXCEPTION_IF_NULL(resource); - - auto top_graph = resource->func_graph(); - MS_EXCEPTION_IF_NULL(top_graph); - - for (const auto &tnode : top_graph->parameters()) { - auto para = tnode->cast(); - if (para != nullptr && para->name() == refkey->tag()) { - return para; + auto RefKeyLambda = [&node, &optimizer]() -> AnfNodePtr { + auto refkey = GetValueNode(node); + auto resource = std::dynamic_pointer_cast(optimizer->resource()); + MS_EXCEPTION_IF_NULL(resource); + + auto top_graph = resource->func_graph(); + MS_EXCEPTION_IF_NULL(top_graph); + + for (const auto &tnode : top_graph->parameters()) { + auto para = tnode->cast(); + if (para != nullptr && para->name() == refkey->tag()) { + return para; + } } - } + return nullptr; + }; + PatternNode x; + MATCH_REPLACE_LAMBDA_IF(node, x, RefKeyLambda, x.CheckFunc(IsValueNode, node)); return nullptr; } }; diff --git a/mindspore/ccsrc/optimizer/irpass/reshape_eliminate.h b/mindspore/ccsrc/optimizer/irpass/reshape_eliminate.h index f1f73de4d9..fb43f6ffd8 100644 --- a/mindspore/ccsrc/optimizer/irpass/reshape_eliminate.h +++ b/mindspore/ccsrc/optimizer/irpass/reshape_eliminate.h @@ -50,11 +50,15 @@ class ReshapeSameShapeEliminater : public AnfVisitor { } auto src_shape = src_shape_abs->GetShapeTrack(); - auto tgt_shape = GetValueNode(shape_); - if (src_shape != nullptr && tgt_shape != nullptr && src_shape->isa()) { - auto elements = GetValue>(tgt_shape); + auto tgt_shape_abs = node->abstract(); + if (tgt_shape_abs == nullptr) { + return nullptr; + } + auto tgt_shape = tgt_shape_abs->GetShapeTrack(); + if (src_shape != nullptr && tgt_shape != nullptr && src_shape->isa() && tgt_shape->isa()) { + auto elements = tgt_shape->cast(); auto shape = src_shape->cast(); - if (shape->shape() == elements) { + if (shape->shape() == elements->shape()) { return x_; } } diff --git a/mindspore/ccsrc/optimizer/irpass/special_op_eliminate.h b/mindspore/ccsrc/optimizer/irpass/special_op_eliminate.h index aa23441bbb..1dc8fbb344 100644 --- a/mindspore/ccsrc/optimizer/irpass/special_op_eliminate.h +++ b/mindspore/ccsrc/optimizer/irpass/special_op_eliminate.h @@ -24,9 +24,11 @@ #include "optimizer/optimizer.h" #include "optimizer/irpass.h" +#include "ir/optimizer_caller.h" #include "optimizer/irpass/prim_eliminate.h" #include "ir/visitor.h" #include "operator/ops.h" +#include "ir/pattern_matcher.h" namespace mindspore { namespace opt { @@ -35,12 +37,14 @@ class SpecialOpEliminater { public: SpecialOpEliminater() : insert_gradient_of_(prim::kPrimInsertGradientOf), + stop_gradient_(prim::kPrimStopGradient), hook_backward_(prim::kPrimHookBackward), print_shape_type_(prim::kPrimPrintShapeType), get_ref_value_(prim::kPrimGetRefValue), mirror_(prim::kPrimMirror), virtual_div_(prim::kPrimVirtualDiv) { eliminaters_.emplace_back(insert_gradient_of_); + eliminaters_.emplace_back(stop_gradient_); eliminaters_.emplace_back(hook_backward_); eliminaters_.emplace_back(print_shape_type_); eliminaters_.emplace_back(get_ref_value_); @@ -61,7 +65,8 @@ class SpecialOpEliminater { } private: - PrimEliminater insert_gradient_of_, hook_backward_, print_shape_type_, get_ref_value_, mirror_, virtual_div_; + PrimEliminater insert_gradient_of_, stop_gradient_, hook_backward_, print_shape_type_, get_ref_value_, mirror_, + virtual_div_; std::vector eliminaters_{}; }; @@ -137,13 +142,13 @@ class ResetDeferInline : public AnfVisitor { AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { if (IsValueNode(node)) { auto fg = GetValueNode(node); - fg->set_flags(FUNC_GRAPH_FLAG_DEFER_INLINE, false); + fg->set_flag(FUNC_GRAPH_FLAG_DEFER_INLINE, false); } return nullptr; } }; -// {PrimZerosLikeTensor, Y} -> +// {PrimZerosLike, Y} -> // {PrimFill, {PrimDType, Y}, {PrimShape, Y}, 0} class ZeroLikeFillZero : public AnfVisitor { public: @@ -155,7 +160,7 @@ class ZeroLikeFillZero : public AnfVisitor { AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { y_ = nullptr; - AnfVisitor::Match(prim::kPrimZerosLikeTensor, {IsNode})(node); + AnfVisitor::Match(prim::kPrimZerosLike, {IsNode})(node); if (y_ == nullptr || node->func_graph() == nullptr) { return nullptr; } @@ -188,6 +193,17 @@ class ZeroLikeFillZero : public AnfVisitor { AnfNodePtr y_{nullptr}; PrimitivePtr PrimFill_, PrimShape_, PrimDType_; }; + +// {prim::kPrimDepend, X, ValueCond}->X +class DependValueElim : public OptimizerCaller { + public: + AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { + PatternNode x, cond; + MATCH_REPLACE_IF(node, PPrimitive(prim::kPrimDepend, x, cond), x, IsVNode(cond.GetNode(node))); + return nullptr; + } +}; + } // namespace irpass } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/optimizer/irpass/specialize_transform.h b/mindspore/ccsrc/optimizer/irpass/specialize_transform.h index 905479df77..6ac4e40f5e 100644 --- a/mindspore/ccsrc/optimizer/irpass/specialize_transform.h +++ b/mindspore/ccsrc/optimizer/irpass/specialize_transform.h @@ -22,6 +22,7 @@ #include #include #include +#include #include "optimizer/irpass.h" #include "optimizer/optimizer.h" @@ -41,7 +42,7 @@ class SpecializeTransform { ~SpecializeTransform() = default; FuncGraphPtr operator()(const FuncGraphPtr &func_graph, std::vector graph_args, - std::vector prim_args) { + std::vector prim_args, std::vector value_args) { if (cache_.count(func_graph) == 0) { cache_[func_graph] = {}; } @@ -69,6 +70,13 @@ class SpecializeTransform { (void)mng->Replace(params[i], arg); continue; } + if (value_args[i] != nullptr) { + auto const_tensor = *value_args[i]; + auto const_tensor_ptr = std::make_shared(const_tensor); + AnfNodePtr arg = NewValueNode(const_tensor_ptr); + (void)mng->Replace(params[i], arg); + continue; + } new_params.push_back(params[i]); } @@ -108,6 +116,7 @@ class SpecializeOnGraphArguments : public AnfVisitor { std::vector graph_args; std::vector prim_args; + std::vector value_node_args; std::vector new_xs; bool hasVNode = false; for (size_t i = 1; i < inputs.size(); i++) { @@ -115,15 +124,24 @@ class SpecializeOnGraphArguments : public AnfVisitor { auto fg_vnode = GetValueNode(inputs[i]); graph_args.push_back(fg_vnode); prim_args.emplace_back(nullptr); + value_node_args.emplace_back(nullptr); hasVNode = true; } else if (IsValueNode(inputs[i])) { auto p_vnode = GetValueNode(inputs[i]); graph_args.emplace_back(nullptr); prim_args.push_back(p_vnode); + value_node_args.emplace_back(nullptr); + hasVNode = true; + } else if (IsValueNode(inputs[i])) { + tensor::TensorPtr t_vnode = GetValueNode(inputs[i]); + graph_args.emplace_back(nullptr); + prim_args.emplace_back(nullptr); + value_node_args.emplace_back(t_vnode); hasVNode = true; } else { graph_args.emplace_back(nullptr); prim_args.emplace_back(nullptr); + value_node_args.emplace_back(nullptr); new_xs.push_back(inputs[i]); } } @@ -132,7 +150,7 @@ class SpecializeOnGraphArguments : public AnfVisitor { return nullptr; } - auto new_fg = specialize_transform_(inp0_fg, graph_args, prim_args); + auto new_fg = specialize_transform_(inp0_fg, graph_args, prim_args, value_node_args); (void)new_xs.insert(new_xs.begin(), NewValueNode(new_fg)); return node->func_graph()->NewCNode(new_xs); @@ -141,6 +159,146 @@ class SpecializeOnGraphArguments : public AnfVisitor { private: internal::SpecializeTransform specialize_transform_; }; + +// Eliminate unused parameters. +// {G, Xs} +class UnusedParasEliminater : public AnfVisitor { + public: + AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { + if (!node->isa() || node->func_graph() == nullptr) { + return nullptr; + } + + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + auto &inputs = cnode->inputs(); + auto fg = GetValueNode(inputs[0]); + MS_EXCEPTION_IF_NULL(fg); + + std::vector parameters = fg->parameters(); + size_t size = parameters.size(); + if (size != inputs.size() - 1) { + return nullptr; + } + + std::vector new_xs; + std::vector keep_parameters; + auto mng = fg->manager(); + MS_EXCEPTION_IF_NULL(mng); + auto &node_users = mng->node_users(); + bool has_unused_para = false; + for (size_t i = 0; i < size; ++i) { + auto iter = node_users.find(parameters[i]); + if (iter != node_users.end() && !iter->second.empty()) { + keep_parameters.push_back(true); + new_xs.push_back(inputs[i + 1]); + continue; + } + keep_parameters.push_back(false); + has_unused_para = true; + } + + if (!has_unused_para) { + return nullptr; + } + FuncGraphPtr new_fg = TransformableClone(fg, std::make_shared("sp")); + mng->AddFuncGraph(new_fg); + + std::vector new_fg_parameters = new_fg->parameters(); + std::vector new_parameters; + for (size_t i = 0; i < size; i++) { + if (keep_parameters[i]) { + if (parameters[i]->abstract() != nullptr) { + new_fg_parameters[i]->set_abstract(parameters[i]->abstract()); + } + new_parameters.push_back(new_fg_parameters[i]); + } + } + mng->SetParameters(new_fg, new_parameters); + + (void)new_xs.insert(new_xs.begin(), NewValueNode(new_fg)); + return node->func_graph()->NewCNode(new_xs); + } +}; + +// Eliminate unused outputs. +// {G, Xs} +class UnusedOutputEliminater : public AnfVisitor { + public: + AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { + if (!node->isa() || node->func_graph() == nullptr) { + return nullptr; + } + + auto &inputs = node->cast()->inputs(); + auto fg = GetValueNode(inputs[0]); + MS_EXCEPTION_IF_NULL(fg); + auto mng = fg->manager(); + MS_EXCEPTION_IF_NULL(mng); + if (fg->recursive()) { + return nullptr; + } + + auto new_fg = TransformableClone(fg, std::make_shared("fg")); + mng->AddFuncGraph(new_fg); + auto new_fg_output = new_fg->output(); + if (!IsPrimitiveCNode(new_fg_output, prim::kPrimMakeTuple)) { + return nullptr; + } + + auto output_cnode = new_fg_output->cast(); + auto &node_users = mng->node_users(); + if (node_users.count(node) == 0 || node_users[node].empty()) { + return nullptr; + } + std::unordered_set used_output_idx; + std::vector> all_users; + for (auto &node_user : node_users[node]) { + if (!IsPrimitiveCNode(node_user.first, prim::kPrimTupleGetItem)) { + return nullptr; + } + auto user_cnode = node_user.first->cast(); + size_t used_idx = GetValue(user_cnode->input(2)->cast()->value()); + used_output_idx.insert(used_idx); + all_users.push_back(std::make_pair(node_user.first, used_idx)); + } + + if (used_output_idx.size() >= output_cnode->inputs().size() - 1) { + // all output has users. + return nullptr; + } + + if (used_output_idx.empty()) { + // we do not process this case. + return nullptr; + } else if (used_output_idx.size() == 1) { + // after eliminate, only one output left. + new_fg->set_output(output_cnode->input(*used_output_idx.begin() + 1)); + // update users. + for (auto &ret_user : all_users) { + (void)mng->Replace(ret_user.first, node); + } + } else { + // after eliminate, create new multi output. + std::vector new_output_inputs{output_cnode->input(0)}; + std::unordered_map new_idx_map; + for (auto idx : used_output_idx) { + new_idx_map[idx] = SizeToInt(new_output_inputs.size() - 1); + new_output_inputs.push_back(output_cnode->input(idx + 1)); + } + new_fg->set_output(new_fg->NewCNode(new_output_inputs)); + // update users. + for (auto &ret_user : all_users) { + auto ret_user_cnode = ret_user.first->cast(); + ret_user_cnode->set_input(2, NewValueNode(new_idx_map[ret_user.second])); + } + } + + auto new_sx = inputs; + new_sx[0] = NewValueNode(new_fg); + return node->func_graph()->NewCNode(new_sx); + } +}; } // namespace irpass } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/optimizer/opt.cc b/mindspore/ccsrc/optimizer/opt.cc index 96b9d28f9b..82fbcc2036 100644 --- a/mindspore/ccsrc/optimizer/opt.cc +++ b/mindspore/ccsrc/optimizer/opt.cc @@ -44,8 +44,17 @@ SubstitutionPtr MakeSubstitution(const TransformFuncType &transform, const std:: return false; } + auto cnode = node->cast(); + auto inp0 = cnode->input(0); + auto prim0 = GetValueNode(inp0); + if (prim0 == nullptr) { + return false; + } + + auto hash = prim0->Hash(); + auto const &name = prim0->name(); for (auto &prim : prims) { - if (IsPrimitiveCNode(node, prim)) { + if (hash == prim->Hash() && name == prim->name()) { return true; } } @@ -88,7 +97,7 @@ AnfNodePtr Substitution::operator()(const OptimizerPtr &optimizer, const AnfNode return result; } -inline bool isTraversable(const AnfNodePtr &node) { +static bool isTraversable(const AnfNodePtr &node) { if (node == nullptr) { return false; } @@ -110,6 +119,7 @@ bool SubstitutionList::ApplyTransform(const OptimizerPtr &optimizer, const AnfNo auto seen = NewSeenGeneration(); // 1024 is for the initial capacity of deque std::deque todo(1024); + todo.clear(); todo.push_back(root_node); bool changes = false; @@ -171,7 +181,7 @@ bool SubstitutionList::ApplyTransform(const OptimizerPtr &optimizer, const AnfNo } #ifdef ENABLE_PROFILE - MsProfile::StatTime("opt.transform", GetTime() - start); + MsProfile::StatTime("opt.transform." + optimizer->name(), GetTime() - start); #endif return changes; } diff --git a/mindspore/ccsrc/optimizer/optimizer.h b/mindspore/ccsrc/optimizer/optimizer.h index d5808b4818..3e77edc1e9 100644 --- a/mindspore/ccsrc/optimizer/optimizer.h +++ b/mindspore/ccsrc/optimizer/optimizer.h @@ -29,6 +29,7 @@ #include "debug/draw.h" #include "debug/anf_ir_dump.h" +#include "debug/anf_ir_utils.h" #include "debug/trace.h" #include "optimizer/opt.h" #include "pipeline/resource.h" @@ -88,7 +89,7 @@ using OptPassGroupMap = std::vector>; class Optimizer : public std::enable_shared_from_this { public: Optimizer(const std::string &name, const pipeline::ResourceBasePtr &resource_ptr) - : name_(name), resource_(resource_ptr), run_only_once_(false), is_watch_renormalize_(false) {} + : name_(name), resource_(resource_ptr), run_only_once_(false), is_watch_renormalize_(false), is_enable_(true) {} virtual ~Optimizer() = default; void Init(const OptPassGroupMap &passes, bool run_only_once) { @@ -131,6 +132,9 @@ class Optimizer : public std::enable_shared_from_this { } FuncGraphPtr step(FuncGraphPtr func_graph, bool use_profile = true) { + if (!is_enable_) { + return func_graph; + } // Optimizer step counter; int counter = -1; bool changes = true; @@ -170,11 +174,12 @@ class Optimizer : public std::enable_shared_from_this { }; use_profile ? (WITH(MsProfile::GetProfile()->Step(pass_names_[i])) opt_func) : opt_func(); if (IS_OUTPUT_ON(mindspore::DEBUG) && MsContext::GetInstance()->save_graphs_flag()) { - MS_LOG(DEBUG) << name_ << " round " << counter << " OptPass " << pass_names_[i] << " end."; + MS_LOG(DEBUG) << "The opt " << name_ << " round " << counter << " OptPass " << pass_names_[i] << " end."; auto fg_name = "opt_substep_" + name_ + "_r" + std::to_string(counter) + "_" + std::to_string(i) + "_" + pass_names_[i]; func_graph->DumpFuncGraph(fg_name); DumpIR(fg_name + ".ir", func_graph); + ExportIR(fg_name + ".dat", "", func_graph); MS_LOG(DEBUG) << "Dump " << pass_names_[i] << " func graph."; } } @@ -209,6 +214,7 @@ class Optimizer : public std::enable_shared_from_this { void enable_watch_renormalize() { is_watch_renormalize_ = true; } void disable_watch_renormalize() { is_watch_renormalize_ = false; } bool is_watch_renormalize() { return is_watch_renormalize_; } + void set_enable(bool enable) { is_enable_ = enable; } private: const std::string name_; @@ -218,6 +224,7 @@ class Optimizer : public std::enable_shared_from_this { bool run_only_once_; std::vector untyped_nodes_; bool is_watch_renormalize_; + bool is_enable_; }; } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.cc b/mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.cc index 687bc12f05..999c4a85a9 100644 --- a/mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.cc +++ b/mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.cc @@ -64,7 +64,7 @@ bool StepAllreduceFusion(const FuncGraphPtr &root, const opt::OptimizerPtr &opti DumpGraph(root, std::string(ALLREDUCE_FUSION_END)); // allreduce fusion only run once - root->flags()[ALLREDUCE_FUSION_RUN_ONCE_ONLY] = true; + root->set_flag(ALLREDUCE_FUSION_RUN_ONCE_ONLY, true); res->results()[pipeline::kStepParallelGraph] = root; #if defined(_WIN32) || defined(_WIN64) auto end_time = std::chrono::steady_clock::now(); diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.cc b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.cc index bb25246608..9fb79ceee4 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.cc +++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.cc @@ -28,7 +28,6 @@ namespace mindspore { namespace parallel { -#define DOUBLE_MAX (std::numeric_limits::max)() // Compute redistributed cost double CostRedis(const Graph::NodeType &node, @@ -621,75 +620,50 @@ StrategyRec CostCommon::ChoseStr(const std::vector &cost_op, StrategyRec break; default: - MS_LOG(EXCEPTION) << "Failure: CostBiasAdd failed."; + MS_LOG(EXCEPTION) << "Failure: Common failed."; } return str; } -// Get weight for BN -double CostBatchNorm::GetMinCostIn(const OperatorRec &op) { - int tensor = static_cast(op.arguments[0].tensor_shape.shape_h * op.arguments[0].tensor_str.str_h) * - static_cast(op.arguments[0].tensor_shape.shape_n * op.arguments[0].tensor_str.str_n) * - static_cast(op.arguments[0].tensor_shape.shape_w * op.arguments[0].tensor_str.str_w) * - static_cast(op.arguments[0].tensor_shape.shape_c * op.arguments[0].tensor_str.str_c); - - std::vector cost_in; - cost_in.push_back(StrDimB(tensor) * 1.2); - cost_in.push_back(DOUBLE_MAX); - cost_in.push_back(StrDimH(tensor) * 1.2); - cost_in.push_back(StrDimW(tensor) * 1.2); - - return *min_element(cost_in.begin(), cost_in.end()); -} - -// Get optimal strategy for BN -StrategyRec CostBatchNorm::GetOptimalStr(const Graph::NodeType &node, - const std::vector> &node_name_to_strategy, - const Graph &graph) { +// Get optimal strategy for BatchParallel OPs +StrategyRec CostBatchParallel::GetOptimalStr(const Graph::NodeType &node) { const OperatorRec &op = node.apply; - - int tensor_filter_n = static_cast(op.arguments[1].tensor_shape.shape_n * op.arguments[1].tensor_str.str_n); - int tensor_filter_c = static_cast(op.arguments[1].tensor_shape.shape_c * op.arguments[1].tensor_str.str_c); - int tensor_filter_h = static_cast(op.arguments[1].tensor_shape.shape_h * op.arguments[1].tensor_str.str_h); - int tensor_filter_w = static_cast(op.arguments[1].tensor_shape.shape_w * op.arguments[1].tensor_str.str_w); - - int tensor_filter = tensor_filter_h * tensor_filter_w * tensor_filter_n * tensor_filter_c; - - int output_tensor_h = static_cast(node.tensor_parm.tensor_shape.shape_h * node.tensor_parm.tensor_str.str_h); - int output_tensor_w = static_cast(node.tensor_parm.tensor_shape.shape_w * node.tensor_parm.tensor_str.str_w); - int output_tensor_n = static_cast(node.tensor_parm.tensor_shape.shape_n * node.tensor_parm.tensor_str.str_n); + int tensor_n = static_cast(op.arguments[0].tensor_shape.shape_n * op.arguments[0].tensor_str.str_n); + int tensor_c = static_cast(op.arguments[0].tensor_shape.shape_c * op.arguments[0].tensor_str.str_c); + int tensor_h = static_cast(op.arguments[0].tensor_shape.shape_h * op.arguments[0].tensor_str.str_h); + int tensor_w = static_cast(op.arguments[0].tensor_shape.shape_w * op.arguments[0].tensor_str.str_w); std::vector cost_op; - std::vector> mode; - if (output_tensor_n < 2 || output_tensor_n % 2 != 0) { + if (tensor_n < 2 || tensor_n % 2 != 0) { cost_op.push_back(DOUBLE_MAX); } else { - cost_op.push_back(StrDimB(tensor_filter) + CostRedis(node, node_name_to_strategy, - mode = {{0.5, 1, 1, 1}, {1, 1, 1, 1}, {0.5, 1, 1, 1}}, graph)); + cost_op.push_back(cost_in_); } - cost_op.push_back(DOUBLE_MAX); + if (tensor_c < 2 || tensor_c % 2 != 0) { + cost_op.push_back(DOUBLE_MAX); + } else { + cost_op.push_back(cost_in_); + } - if (output_tensor_h < 2 || output_tensor_h % 2 != 0) { + if (tensor_h < 2 || tensor_h % 2 != 0) { cost_op.push_back(DOUBLE_MAX); } else { - cost_op.push_back(StrDimH(tensor_filter) + CostRedis(node, node_name_to_strategy, - mode = {{1, 1, 0.5, 1}, {1, 1, 1, 1}, {1, 1, 0.5, 1}}, graph)); + cost_op.push_back(cost_in_); } - if (output_tensor_w < 2 || output_tensor_w % 2 != 0) { + if (tensor_w < 2 || tensor_w % 2 != 0) { cost_op.push_back(DOUBLE_MAX); } else { - cost_op.push_back(StrDimW(tensor_filter) + CostRedis(node, node_name_to_strategy, - mode = {{1, 1, 1, 0.5}, {1, 1, 1, 1}, {1, 1, 1, 0.5}}, graph)); + cost_op.push_back(cost_in_); } return ChoseStr(cost_op, node.apply.str); } -// Chose strategy for BatchNorm -StrategyRec CostBatchNorm::ChoseStr(const std::vector &cost_op, StrategyRec str) { +// Chose strategy for BatchParallel op +StrategyRec CostBatchParallel::ChoseStr(const std::vector &cost_op, StrategyRec str) { uint64_t min_position = min_element(cost_op.begin(), cost_op.end()) - cost_op.begin(); if (cost_op[min_position] > (DOUBLE_MAX - 0.1)) { return str; @@ -700,36 +674,75 @@ StrategyRec CostBatchNorm::ChoseStr(const std::vector &cost_op, Strategy str.inputTensor[0].str_n /= 2.0; str.outputTensor.str_n /= 2.0; str.cut_counter += 1; - str.cost = str.cost + cost_in_b_; + str.cost = str.cost + cost_in_; break; case 1: str.inputTensor[0].str_c /= 2.0; - str.inputTensor[1].str_c /= 2.0; - str.inputTensor[2].str_c /= 2.0; - str.inputTensor[3].str_c /= 2.0; - str.inputTensor[4].str_c /= 2.0; str.outputTensor.str_c /= 2.0; str.cut_counter += 1; - str.cost = str.cost + cost_in_c_; + str.cost = str.cost + cost_in_; break; case 2: str.inputTensor[0].str_h /= 2.0; str.outputTensor.str_h /= 2.0; str.cut_counter += 1; - str.cost = str.cost + cost_in_h_; + str.cost = str.cost + cost_in_; break; case 3: str.inputTensor[0].str_w /= 2.0; str.outputTensor.str_w /= 2.0; str.cut_counter += 1; - str.cost = str.cost + cost_in_w_; + str.cost = str.cost + cost_in_; + break; + + default: + MS_LOG(EXCEPTION) << "Failure: CostBatchParallel failed."; + } + return str; +} + +// Chose strategy for CostSoftmaxCrossEntropyWithLogits +StrategyRec CostSoftmaxCrossEntropyWithLogits::ChoseStr(const std::vector &cost_op, StrategyRec str) { + uint64_t min_position = min_element(cost_op.begin(), cost_op.end()) - cost_op.begin(); + if (cost_op[min_position] > (DOUBLE_MAX - 0.1)) { + return str; + } + + switch (min_position) { + case 0: + str.inputTensor[0].str_n /= 2.0; + str.inputTensor[1].str_n /= 2.0; + str.cut_counter += 1; + str.cost = str.cost + cost_in_; + break; + + case 1: + str.inputTensor[0].str_c /= 2.0; + str.inputTensor[1].str_c /= 2.0; + str.cut_counter += 1; + str.cost = str.cost + cost_in_; + break; + + case 2: + str.inputTensor[0].str_h /= 2.0; + str.inputTensor[1].str_h /= 2.0; + str.outputTensor.str_w /= 2.0; + str.cut_counter += 1; + str.cost = str.cost + cost_in_; + break; + + case 3: + str.inputTensor[0].str_w /= 2.0; + str.inputTensor[1].str_w /= 2.0; + str.cut_counter += 1; + str.cost = str.cost + cost_in_; break; default: - MS_LOG(EXCEPTION) << "Failure: CostBatchNorm failed."; + MS_LOG(EXCEPTION) << "Failure: CostSoftmax failed."; } return str; } diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.h b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.h index c45c81aca0..fb4fc27164 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.h +++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.h @@ -28,6 +28,8 @@ namespace mindspore { namespace parallel { +#define DOUBLE_MAX (std::numeric_limits::max)() + double CostRedis(const Graph::NodeType &node, const std::vector> &node_name_to_strategy, const std::vector> &mode, const Graph &graph); @@ -195,7 +197,6 @@ class CostTensorAdd : public CostCommon { }; // all the following operation are element-wise and have the same cost -class CostOneHot : public CostCommon {}; class CostReLU : public CostCommon {}; class CostLog : public CostCommon {}; class CostExp : public CostCommon {}; @@ -206,50 +207,27 @@ class CostDiv : public CostCommon {}; class CostSqueeze : public CostCommon {}; class CostCast : public CostCommon {}; -// class BatchNorm is used to compute the cost of BatchNorm operator. -class CostBatchNorm { +// class BatchParallel is used to compute the cost of BatchParallel operator. +class CostBatchParallel { public: - StrategyRec GetOptimalStr(const Graph::NodeType &node, - const std::vector> &node_name_to_strategy, - const Graph &graph); + virtual StrategyRec GetOptimalStr(const Graph::NodeType &node); - double GetMinCostIn(const OperatorRec &op); - - private: - double StrDimB(int32_t Tensor) { - cost_in_b_ = (static_cast(Tensor) * 4.0) / 2.0; - - return cost_in_b_; - } - - double StrDimC() { - cost_in_c_ = 0.0; - - return cost_in_c_; - } + virtual double GetMaxCostIn() const { return DOUBLE_MAX; } - double StrDimH(int32_t Tensor) { - cost_in_h_ = (static_cast(Tensor) * 4.0) / 2.0; - - return cost_in_h_; - } + protected: + virtual StrategyRec ChoseStr(const std::vector &cost_op, StrategyRec str); - double StrDimW(int32_t Tensor) { - cost_in_w_ = (static_cast(Tensor) * 4.0) / 2.0; + double cost_in_ = 0; +}; // class BatchParallel is used to compute the cost of BatchParallel operator. - return cost_in_w_; - } +class CostBatchNorm : public CostBatchParallel {}; +class CostOneHot : public CostBatchParallel {}; +class CostPRelu : public CostBatchParallel {}; +class CostSoftmax : public CostBatchParallel {}; +class CostSoftmaxCrossEntropyWithLogits : public CostBatchParallel { StrategyRec ChoseStr(const std::vector &cost_op, StrategyRec str); - - double cost_in_b_ = 0; - - double cost_in_c_ = 0; - - double cost_in_h_ = 0; - - double cost_in_w_ = 0; -}; // class BatchNorm is used to compute the cost of BatchNorm operator. +}; } // namespace parallel } // namespace mindspore #endif // PARALLEL_AUTO_PARALLEL_REC_COST_H_ diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc index 5bccf73fc2..19e07aae02 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc +++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc @@ -37,19 +37,75 @@ void GenerateStrategy(std::shared_ptr graph, const std::vector> no_stra_op_list(new std::vector); - GenerateEliminatedOperatorStrategyForward(graph, ops, eli_list, input_tensor_names, index_list, no_stra_op_list); + for (size_t i = 0; i < eli_list->size(); i++) { + no_stra_op_list->push_back(eli_list->at(i)[0]); + } + GenerateEliminatedOperatorStrategyForward(graph, ops, input_tensor_names, index_list, no_stra_op_list); GenerateEliminatedOperatorStrategyBackward(ops, input_tensor_names, no_stra_op_list); + GenerateRemainingOperatorStrategy(graph, ops, input_tensor_names, index_list, no_stra_op_list); } std::vector> PrepareMatMul(const std::shared_ptr &graph, const std::vector> &ops, const size_t iter_graph, const size_t iter_ops) { std::vector> strategies; + auto attrs = ops[iter_ops]->attrs(); + bool transpose_a = attrs[TRANSPOSE_A]->cast()->value(); + bool transpose_b = attrs[TRANSPOSE_B]->cast()->value(); + + // HCCL does not support multi-dimension partition, and the hardware does not support excessive + // number of EVENT, so we temporarily disable matmul's multi-dimension partition function. + const auto max_cut = 1.0 / g_device_manager->DeviceNum(); + if (graph->nodes[iter_graph].apply.arguments[0].tensor_str.str_h != max_cut && + graph->nodes[iter_graph].apply.arguments[1].tensor_str.str_w != max_cut) { + graph->nodes[iter_graph].apply.arguments[0].tensor_str.str_h = 1.0; + graph->nodes[iter_graph].apply.arguments[0].tensor_str.str_w = 1.0; + graph->nodes[iter_graph].apply.arguments[1].tensor_str.str_h = 1.0; + graph->nodes[iter_graph].apply.arguments[1].tensor_str.str_w = 1.0; + graph->nodes[iter_graph].tensor_parm.tensor_str.str_h = 1.0; + graph->nodes[iter_graph].tensor_parm.tensor_str.str_w = 1.0; + + auto shape_1 = ops[iter_ops]->inputs_tensor_info()[0].shape()[0]; + if (transpose_a) { + shape_1 = ops[iter_ops]->inputs_tensor_info()[0].shape()[1]; + } + auto shape_4 = ops[iter_ops]->inputs_tensor_info()[1].shape()[1]; + if (transpose_b) { + shape_4 = ops[iter_ops]->inputs_tensor_info()[1].shape()[0]; + } + + bool already_cut = false; + if (shape_1 >= shape_4) { + if (shape_1 % g_device_manager->DeviceNum() == 0) { + graph->nodes[iter_graph].apply.arguments[0].tensor_str.str_h = max_cut; + graph->nodes[iter_graph].tensor_parm.tensor_str.str_h = max_cut; + already_cut = true; + } + if (!already_cut && shape_4 % g_device_manager->DeviceNum() == 0) { + graph->nodes[iter_graph].apply.arguments[1].tensor_str.str_w = max_cut; + graph->nodes[iter_graph].tensor_parm.tensor_str.str_w = max_cut; + already_cut = true; + } + } else { + if (shape_4 % g_device_manager->DeviceNum() == 0) { + graph->nodes[iter_graph].apply.arguments[1].tensor_str.str_w = max_cut; + graph->nodes[iter_graph].tensor_parm.tensor_str.str_w = max_cut; + already_cut = true; + } + if (!already_cut && shape_1 % g_device_manager->DeviceNum() == 0) { + graph->nodes[iter_graph].apply.arguments[0].tensor_str.str_h = max_cut; + graph->nodes[iter_graph].tensor_parm.tensor_str.str_h = max_cut; + already_cut = true; + } + } + + if (!already_cut) { + MS_LOG(EXCEPTION) << "Failure: MatMul's shape is invalid."; + } + } + for (size_t iter_op_inputs = 0; iter_op_inputs < ops[iter_ops]->inputs_tensor_info().size(); iter_op_inputs++) { std::vector s; - auto attrs = ops[iter_ops]->attrs(); - bool transpose_a = attrs[TRANSPOSE_A]->cast()->value(); - bool transpose_b = attrs[TRANSPOSE_B]->cast()->value(); if (transpose_a && (iter_op_inputs == 0)) { s.push_back( static_cast(1.0 / graph->nodes[iter_graph].apply.arguments[iter_op_inputs].tensor_str.str_w)); @@ -71,52 +127,58 @@ std::vector> PrepareMatMul(const std::shared_ptr &gr return strategies; } -std::vector> PrepareVirtualDataset(const std::vector> &ops, - const size_t iter_ops) { - std::vector> strategies = MakeDataParallelStrategy(ops, iter_ops); - strategies[1][0] = strategies[0][0]; +std::vector> PrepareBiasAdd(const std::shared_ptr> &s) { + std::vector> strategies; + strategies.push_back(*s); + std::vector s_biasadd; + s_biasadd.push_back(s->at(1)); + strategies.push_back(s_biasadd); return strategies; } -std::vector> PrepareScalarInputOperator(const std::vector> &ops, - const size_t iter_ops, std::vector s) { +std::vector> PrepareOneHot(const std::shared_ptr &graph, + const std::vector> &ops, + const size_t iter_graph, const size_t iter_ops) { + std::vector> strategies = MakeRecSearchStrategy(graph, ops, iter_graph, iter_ops); + strategies[0][0] = strategies[0][1]; + strategies[0][1] = 1; + graph->nodes[iter_graph].tensor_parm.tensor_str.str_h = graph->nodes[iter_graph].tensor_parm.tensor_str.str_w; + graph->nodes[iter_graph].tensor_parm.tensor_str.str_w = 1.0; + std::vector s_empty = {}; + strategies.push_back(s_empty); + strategies.push_back(s_empty); + return strategies; +} + +std::vector> PrepareGatherV2(const std::shared_ptr> &s) { std::vector> strategies; + strategies.push_back(*s); + return strategies; +} - auto dev_num = g_device_manager->DeviceNum(); - size_t cut_num = 1; - for (size_t iter_s = 0; iter_s < s.size(); iter_s++) { - cut_num *= s[iter_s]; - } - if (cut_num != dev_num) { - std::vector s_max = s; - for (size_t dim = 0; dim < (size_t)ops[iter_ops]->inputs_tensor_info()[0].shape().size(); dim++) { - size_t shape = ops[iter_ops]->inputs_tensor_info()[0].shape()[dim] / s[dim]; - while (cut_num < dev_num && shape % 2 == 0) { - shape = shape / 2; - s_max[dim] = s_max[dim] * 2; - cut_num = cut_num * 2; - } - if (cut_num == dev_num) { - break; - } +std::vector> PrepareL2Normalize(const std::vector> &ops, + const size_t iter_ops, std::vector s) { + int32_t axis = 0; + auto iter = ops[iter_ops]->attrs().find(AXIS); + if (iter != ops[iter_ops]->attrs().end()) { + MS_EXCEPTION_IF_NULL(iter->second); + if (iter->second->isa()) { + axis = iter->second->cast()->value(); + } else { + MS_LOG(EXCEPTION) << ops[iter_ops]->name() << " : The value of axis is not int."; } - s = s_max; } - strategies.push_back(s); - std::vector s_biasadd; - s_biasadd.push_back(s[1]); - strategies.push_back(s_biasadd); + int32_t axis_index = axis; + if (axis < 0) { + size_t input_dim = ops[iter_ops]->inputs_tensor_info()[0].shape().size(); + axis_index = static_cast(input_dim) + axis; + } - return strategies; -} + s[IntToSize(axis_index)] = 1; -std::vector> PrepareOneHot(std::vector s) { std::vector> strategies; - std::vector s_empty = {}; strategies.push_back(s); - strategies.push_back(s_empty); - strategies.push_back(s_empty); return strategies; } @@ -131,16 +193,13 @@ std::vector> MakeRecSearchStrategy(const std::shared_ptrstrategy(); - std::vector> strategies; for (size_t iter_op_inputs = 0; iter_op_inputs < ops[iter_ops]->inputs_tensor_info().size(); iter_op_inputs++) { if (iter_op_inputs >= origin_strategy->GetInputDim().size()) { MS_LOG(EXCEPTION) << "Failure: Strategy's InputDim out of range."; } - // size_t output_size = ops[iter_ops]->outputs_tensor_info()[0].shape().size(); size_t output_size = origin_strategy->GetInputDim()[iter_op_inputs].size(); - std::vector s; if (output_size == 4) { s.push_back( @@ -164,14 +223,14 @@ std::vector> MakeRecSearchStrategy(const std::shared_ptr> MakeDataParallelStrategy(const std::vector> &ops, - const size_t iter_ops) { +std::vector> MakeDataParallelStrategy(const std::shared_ptr &graph, + const std::vector> &ops, + const size_t iter_graph, const size_t iter_ops) { if (ops.empty()) { MS_LOG(EXCEPTION) << "Failure: Operators is empty."; } @@ -180,8 +239,9 @@ std::vector> MakeDataParallelStrategy(const std::vectorstrategy(); - std::vector> strategies; + size_t max_device_num = g_device_manager->DeviceNum(); + size_t target_tensor_batch = ops[iter_ops]->outputs_tensor_info()[0].shape()[0]; for (size_t iter_op_inputs = 0; iter_op_inputs < ops[iter_ops]->inputs_tensor_info().size(); iter_op_inputs++) { if (iter_op_inputs >= origin_strategy->GetInputDim().size()) { MS_LOG(EXCEPTION) << "Failure: Strategy's InputDim out of range."; @@ -192,8 +252,6 @@ std::vector> MakeDataParallelStrategy(const std::vectorDeviceNum(); - size_t target_tensor_batch = ops[iter_ops]->outputs_tensor_info()[0].shape()[0]; s.push_back(std::min(max_device_num, target_tensor_batch)); } else { s.push_back(1); @@ -202,9 +260,21 @@ std::vector> MakeDataParallelStrategy(const std::vectornodes[iter_graph].tensor_parm.tensor_str.str_n = 1.0; + graph->nodes[iter_graph].tensor_parm.tensor_str.str_c = 1.0; + graph->nodes[iter_graph].tensor_parm.tensor_str.str_h = 1.0; + graph->nodes[iter_graph].tensor_parm.tensor_str.str_w = 1.0; + if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 1) { + graph->nodes[iter_graph].tensor_parm.tensor_str.str_w = 1.0 / std::min(max_device_num, target_tensor_batch); + } else if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 2) { + graph->nodes[iter_graph].tensor_parm.tensor_str.str_h = 1.0 / std::min(max_device_num, target_tensor_batch); + } else if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 4) { + graph->nodes[iter_graph].tensor_parm.tensor_str.str_n = 1.0 / std::min(max_device_num, target_tensor_batch); + } + return strategies; } @@ -217,20 +287,18 @@ std::vector> PrepareStrategy(const std::shared_ptr & if (iter_ops >= ops.size()) { MS_LOG(EXCEPTION) << "Failure: Operators' elements out of range."; } + MS_EXCEPTION_IF_NULL(ops[iter_ops]); auto type = ops[iter_ops]->type(); - if (type == VIRTUAL_DATA_SET) { - return PrepareVirtualDataset(ops, iter_ops); - } auto idx = DictOpType.find(type); if (idx == DictOpType.end()) { - return MakeDataParallelStrategy(ops, iter_ops); + return MakeDataParallelStrategy(graph, ops, iter_graph, iter_ops); } if (type == MATMUL) { return PrepareMatMul(graph, ops, iter_graph, iter_ops); - } else if (type == RESHAPE) { - return MakeDataParallelStrategy(ops, iter_ops); + } else if (type == ONEHOT) { + return PrepareOneHot(graph, ops, iter_graph, iter_ops); } else { return MakeRecSearchStrategy(graph, ops, iter_graph, iter_ops); } @@ -242,28 +310,25 @@ void GeneratePartitionedOperatorStrategy(const std::shared_ptr graph, for (size_t iter_ops = 0; iter_ops < (size_t)index_list->size(); iter_ops++) { std::vector> strategies; size_t iter_graph = index_list->at(iter_ops); - if (iter_graph == SIZE_MAX) { - StrategyPtr sp = std::make_shared(0, strategies); - ops[iter_ops]->SetSelectedStrategyAndCost(sp, ops[iter_ops]->selected_cost()); - continue; + if (iter_graph != SIZE_MAX) { + strategies = PrepareStrategy(graph, ops, iter_graph, iter_ops); } - strategies = PrepareStrategy(graph, ops, iter_graph, iter_ops); StrategyPtr sp = std::make_shared(0, strategies); ops[iter_ops]->SetSelectedStrategyAndCost(sp, ops[iter_ops]->selected_cost()); } } -int FindIndexOfOperatorIncoming(const std::vector> &input_tensor_names, - const size_t iter_ops) { - int incoming_op_index = -1; - for (size_t i = 1; i < (size_t)input_tensor_names[iter_ops].size(); i++) { - for (size_t j = 0; j < (size_t)input_tensor_names.size(); j++) { +size_t FindIndexOfOperatorIncoming(const std::vector> &input_tensor_names, + const size_t iter_ops) { + size_t incoming_op_index = SIZE_MAX; + for (size_t i = 1; i < input_tensor_names[iter_ops].size(); i++) { + for (size_t j = 0; j < input_tensor_names.size(); j++) { if (input_tensor_names[iter_ops][i] == input_tensor_names[j][0]) { incoming_op_index = j; break; } } - if (incoming_op_index != -1) { + if (incoming_op_index != SIZE_MAX) { break; } } @@ -298,12 +363,16 @@ std::vector CopyIncomingOperatorOutputStrategy(const std::shared_ptr PrepareIncomingOperatorInputStrategy(const std::vector> &ops, - const int incoming_op_index) { + const size_t incoming_op_index) { std::vector s; + if (ops[incoming_op_index]->type() == RESHAPE || ops[incoming_op_index]->type() == GATHERV2) { + return s; + } auto strategy = ops[incoming_op_index]->selected_strategy(); if (strategy->GetInputNumber() == 0) { return s; } + for (size_t i = 0; i < (size_t)ops[incoming_op_index]->inputs_tensor_info().size(); i++) { if (ops[incoming_op_index]->inputs_tensor_info()[i].shape().size() == 0) { continue; @@ -327,6 +396,7 @@ std::vector GetAxisList(const std::vector } else { MS_LOG(EXCEPTION) << "Failure: Axis type is invalid, neither tuple nor list." << std::endl; } + for (auto &element : elements) { if (!element->isa()) { MS_LOG(EXCEPTION) << "Failure: Dimension indexes is not Int32." << std::endl; @@ -338,12 +408,13 @@ std::vector GetAxisList(const std::vector } std::vector ModifyStrategyIfSqueezeIncoming(const std::vector> &ops, - const int incoming_op_index, std::vector s) { + const size_t incoming_op_index, std::vector s) { std::vector s_Squeeze; std::vector stra_dim_list; for (size_t i = 0; i < s.size(); i++) { stra_dim_list.push_back(i); } + auto axis_list = GetAxisList(ops, incoming_op_index); for (auto axis : axis_list) { auto it = find(stra_dim_list.begin(), stra_dim_list.end(), axis); @@ -355,6 +426,7 @@ std::vector ModifyStrategyIfSqueezeIncoming(const std::vector GetDimList(const std::vector> } std::vector ModifyStrategyIfReduceIncoming(const std::vector> &ops, - const int incoming_op_index, std::vector s) { + const size_t incoming_op_index, std::vector s) { std::vector s_Reduce; std::vector axis_list; for (size_t i = 0; i < s.size(); i++) { axis_list.push_back(i); } + auto dim_list = GetDimList(ops, incoming_op_index); for (auto axis : dim_list) { auto it = find(axis_list.begin(), axis_list.end(), axis); @@ -405,6 +478,7 @@ std::vector ModifyStrategyIfReduceIncoming(const std::vector ModifyStrategyIfReduceIncoming(const std::vector CopyIncomingOperatorInputStrategy(const std::vector> &ops, - const int incoming_op_index, const size_t iter_ops, - const std::shared_ptr> no_stra_op_list) { + const size_t iter_ops, const size_t incoming_op_index) { std::vector s; s = PrepareIncomingOperatorInputStrategy(ops, incoming_op_index); if (s.size() != 0) { @@ -429,27 +502,31 @@ std::vector CopyIncomingOperatorInputStrategy(const std::vector> GenerateStrategiesFromStrategy(const std::vector> &ops, - const size_t iter_ops, std::vector s) { + const size_t iter_ops, + std::vector basic_stra) { std::vector s_empty = {}; std::vector> stra; + MS_EXCEPTION_IF_NULL(ops[iter_ops]); - if (s.size() == 0) { + if (basic_stra.size() == 0) { for (size_t iter_op_inputs = 0; iter_op_inputs < (size_t)ops[iter_ops]->inputs_tensor_info().size(); iter_op_inputs++) { - stra.push_back(s); + stra.push_back(basic_stra); } return stra; } - MS_EXCEPTION_IF_NULL(ops[iter_ops]); - if (ops[iter_ops]->type() == BIAS_ADD || ops[iter_ops]->type() == PRELU) { - return PrepareScalarInputOperator(ops, iter_ops, s); + auto s_ptr = std::make_shared>(basic_stra); + if (ops[iter_ops]->type() == BIAS_ADD) { + return PrepareBiasAdd(s_ptr); } - if (ops[iter_ops]->type() == ONEHOT) { - return PrepareOneHot(s); + if (ops[iter_ops]->type() == GATHERV2) { + return PrepareGatherV2(s_ptr); + } + if (ops[iter_ops]->type() == L2_NORMALIZE) { + return PrepareL2Normalize(ops, iter_ops, basic_stra); } - auto dev_num = g_device_manager->DeviceNum(); for (size_t iter_op_inputs = 0; iter_op_inputs < (size_t)ops[iter_ops]->inputs_tensor_info().size(); iter_op_inputs++) { if (ops[iter_ops]->inputs_tensor_info()[iter_op_inputs].shape().size() == 0) { @@ -457,67 +534,49 @@ std::vector> GenerateStrategiesFromStrategy(const std::vect continue; } - size_t cut_num = 1; - for (size_t iter_s = 0; iter_s < s.size(); iter_s++) { - cut_num *= s[iter_s]; - } - if (cut_num == dev_num) { - std::vector s_1 = s; - bool modified = false; - for (size_t j = 0; j < (size_t)ops[iter_ops]->inputs_tensor_info()[iter_op_inputs].shape().size(); j++) { - if (ops[iter_ops]->inputs_tensor_info()[iter_op_inputs].shape()[j] == 1) { - s_1[j] = 1; - modified = true; - } + std::vector tmp_stra = basic_stra; + bool modified = false; + for (size_t j = 0; j < (size_t)ops[iter_ops]->inputs_tensor_info()[iter_op_inputs].shape().size(); j++) { + if (ops[iter_ops]->inputs_tensor_info()[iter_op_inputs].shape()[j] == 1) { + tmp_stra[j] = 1; + modified = true; } - if (modified) { - stra.push_back(s_1); - } else { - stra.push_back(s); - } - continue; } - - std::vector s_max = s; - for (size_t dim = 0; dim < (size_t)ops[iter_ops]->inputs_tensor_info()[iter_op_inputs].shape().size(); dim++) { - size_t shape = ops[iter_ops]->inputs_tensor_info()[iter_op_inputs].shape()[dim] / s[dim]; - while (cut_num < dev_num && shape % 2 == 0) { - shape = shape / 2; - s_max[dim] = s_max[dim] * 2; - cut_num = cut_num * 2; - } - if (cut_num == dev_num) { - break; - } + if (modified) { + stra.push_back(tmp_stra); + } else { + stra.push_back(basic_stra); } - - stra.push_back(s_max); } return stra; } void GenerateEliminatedOperatorStrategyForward(const std::shared_ptr graph, const std::vector> &ops, - const std::shared_ptr>> eli_list, const std::vector> &input_tensor_names, const std::shared_ptr> index_list, const std::shared_ptr> no_stra_op_list) { - for (int eli_index = eli_list->size() - 1; eli_index >= 0; eli_index--) { - size_t iter_ops = eli_list->at(eli_index)[0]; + if (no_stra_op_list->size() == 0) { + return; + } + std::vector no_stra_op_list_bis; + + for (size_t iter_list = no_stra_op_list->size(); iter_list > 0; iter_list--) { + size_t iter_ops = no_stra_op_list->at(iter_list - 1); std::vector> stra; std::vector s; - int incoming_op_index = FindIndexOfOperatorIncoming(input_tensor_names, iter_ops); - if (incoming_op_index != -1) { + size_t incoming_op_index = FindIndexOfOperatorIncoming(input_tensor_names, iter_ops); + if (incoming_op_index != SIZE_MAX) { auto iter_graph = index_list->at(incoming_op_index); if (iter_graph != SIZE_MAX) { s = CopyIncomingOperatorOutputStrategy(graph, ops, iter_ops, iter_graph); } else { - s = CopyIncomingOperatorInputStrategy(ops, incoming_op_index, iter_ops, no_stra_op_list); + s = CopyIncomingOperatorInputStrategy(ops, iter_ops, incoming_op_index); } } if (s.size() == 0) { - no_stra_op_list->push_back(iter_ops); + no_stra_op_list_bis.push_back(iter_ops); } else { stra = GenerateStrategiesFromStrategy(ops, iter_ops, s); } @@ -525,6 +584,11 @@ void GenerateEliminatedOperatorStrategyForward(const std::shared_ptr grap StrategyPtr sp = std::make_shared(0, stra); ops[iter_ops]->SetSelectedStrategyAndCost(sp, ops[iter_ops]->selected_cost()); } + + no_stra_op_list->clear(); + for (size_t i = 0; i < no_stra_op_list_bis.size(); i++) { + no_stra_op_list->push_back(no_stra_op_list_bis[i]); + } } std::vector ModifyStrategyIfSqueezeOutgoing(const std::vector> &ops, @@ -534,7 +598,7 @@ std::vector ModifyStrategyIfSqueezeOutgoing(const std::vector ModifyStrategyIfSqueezeOutgoing(const std::vector ModifyStrategyIfReduceOutgoing(const std::vector> &ops, - const size_t iter_ops, std::vector s) { - std::vector dim_list = GetDimList(ops, iter_ops); - if (dim_list.size() == 0) { - return s; + size_t cut = 1; + for (size_t i = 0; i < s_Squeeze.size(); i++) { + cut *= s_Squeeze[i]; } - std::vector s_Reduce; - size_t s_index = 0; - size_t dim_list_index = 0; - for (size_t i = 0; i < (size_t)(s.size() + dim_list.size()); i++) { - if (i == (size_t)dim_list[dim_list_index]) { - s_Reduce.push_back(1); - dim_list_index++; - } else { - s_Reduce.push_back(s[s_index]); - s_index++; - } + if (cut != g_device_manager->DeviceNum()) { + s_Squeeze.clear(); } - return s_Reduce; + + return s_Squeeze; } std::vector CopyOutgoingOperatorInputStrategy(const std::vector> &ops, const std::vector> &input_tensor_names, const size_t iter_ops) { std::vector s; + if (ops[iter_ops]->type() == REDUCE_MAX || ops[iter_ops]->type() == REDUCE_MIN || + ops[iter_ops]->type() == REDUCE_SUM || ops[iter_ops]->type() == REDUCE_MEAN || ops[iter_ops]->type() == RESHAPE || + ops[iter_ops]->type() == GATHERV2) { + return s; + } + bool found = false; - for (size_t i = 0; i < (size_t)input_tensor_names.size(); i++) { - for (size_t j = 1; j < (size_t)input_tensor_names[i].size(); j++) { - if (input_tensor_names[i][j] == input_tensor_names[iter_ops][0]) { - for (size_t k = 0; k < ops[i]->selected_strategy()->GetInputDim()[j - 1].size(); ++k) { - s.push_back(ops[i]->selected_strategy()->GetInputDim()[j - 1][k]); - } + size_t outgoing_op_index = SIZE_MAX; + size_t iter_op_inputs = SIZE_MAX; + for (size_t i = 0; i < input_tensor_names.size(); i++) { + for (size_t j = 1; j < input_tensor_names[i].size(); j++) { + if (input_tensor_names[i][j] == input_tensor_names[iter_ops][0] && + ops[i]->selected_strategy()->GetInputNumber() != 0) { + outgoing_op_index = i; + iter_op_inputs = j - 1; found = true; break; } } - if (found) break; + if (found) { + break; + } + } + + if (outgoing_op_index != SIZE_MAX && iter_op_inputs != SIZE_MAX) { + for (size_t k = 0; k < ops[iter_ops]->outputs_tensor_info()[0].shape().size(); ++k) { + s.push_back(ops[outgoing_op_index]->selected_strategy()->GetInputDim()[iter_op_inputs][k]); + } } return s; } @@ -589,23 +657,66 @@ std::vector CopyOutgoingOperatorInputStrategy(const std::vector> &ops, const std::vector> &input_tensor_names, const std::shared_ptr> no_stra_op_list) { - MS_EXCEPTION_IF_NULL(no_stra_op_list); - for (int iter_list = no_stra_op_list->size() - 1; iter_list >= 0; iter_list--) { - auto iter_ops = no_stra_op_list->at(iter_list); + if (no_stra_op_list->size() == 0) { + return; + } + std::vector no_stra_op_list_bis; + + for (size_t iter_list = no_stra_op_list->size(); iter_list > 0; iter_list--) { + auto iter_ops = no_stra_op_list->at(iter_list - 1); std::vector> stra; std::vector s = CopyOutgoingOperatorInputStrategy(ops, input_tensor_names, iter_ops); - if (s.size() == 0) { - for (size_t i = 0; i < ops[iter_ops]->inputs_tensor_info()[0].shape().size(); i++) { - s.push_back(1); - } - } - if (ops[iter_ops]->type() == SQUEEZE) { + + if (s.size() != 0 && ops[iter_ops]->type() == SQUEEZE) { s = ModifyStrategyIfSqueezeOutgoing(ops, iter_ops, s); } - if (ops[iter_ops]->type() == REDUCE_SUM || ops[iter_ops]->type() == REDUCE_MAX || - ops[iter_ops]->type() == REDUCE_MIN || ops[iter_ops]->type() == REDUCE_MEAN) { - s = ModifyStrategyIfReduceOutgoing(ops, iter_ops, s); + if (s.size() != 0) { + stra = GenerateStrategiesFromStrategy(ops, iter_ops, s); + } else { + no_stra_op_list_bis.push_back(iter_ops); + } + + StrategyPtr sp = std::make_shared(0, stra); + ops[iter_ops]->SetSelectedStrategyAndCost(sp, ops[iter_ops]->selected_cost()); + } + + no_stra_op_list->clear(); + for (size_t i = 0; i < no_stra_op_list_bis.size(); i++) { + no_stra_op_list->push_back(no_stra_op_list_bis[i]); + } +} + +void GenerateRemainingOperatorStrategy(const std::shared_ptr graph, + const std::vector> &ops, + const std::vector> &input_tensor_names, + const std::shared_ptr> index_list, + const std::shared_ptr> no_stra_op_list) { + if (no_stra_op_list->size() == 0) { + return; + } + + size_t no_stra_op_list_size; + do { + no_stra_op_list_size = no_stra_op_list->size(); + GenerateEliminatedOperatorStrategyForward(graph, ops, input_tensor_names, index_list, no_stra_op_list); + GenerateEliminatedOperatorStrategyBackward(ops, input_tensor_names, no_stra_op_list); + } while (no_stra_op_list_size > no_stra_op_list->size()); + + for (size_t iter_list = 0; iter_list < no_stra_op_list->size(); iter_list++) { + auto iter_ops = no_stra_op_list->at(iter_list); + std::vector> stra; + std::vector s; + + size_t max_dim_num = 0; + for (size_t iter_op_inputs = 0; iter_op_inputs < ops[iter_ops]->inputs_tensor_info().size(); iter_op_inputs++) { + if (ops[iter_ops]->inputs_tensor_info()[iter_op_inputs].shape().size() > max_dim_num) { + max_dim_num = ops[iter_ops]->inputs_tensor_info()[iter_op_inputs].shape().size(); + } + } + for (size_t i = 0; i < max_dim_num; i++) { + s.push_back(1); } + stra = GenerateStrategiesFromStrategy(ops, iter_ops, s); StrategyPtr sp = std::make_shared(0, stra); ops[iter_ops]->SetSelectedStrategyAndCost(sp, ops[iter_ops]->selected_cost()); diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.h b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.h index db275dda10..c9604b449f 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.h +++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.h @@ -34,55 +34,61 @@ void GenerateStrategy(std::shared_ptr graph, const std::vector> PrepareMatMul(const std::shared_ptr &graph, const std::vector> &ops, const size_t iter_graph, const size_t iter_ops); -std::vector> PrepareVirtualDataset(const std::vector> &ops, - const size_t iter_ops); -std::vector> PrepareScalarInputOperator(const std::vector> &ops, - const size_t iter_ops, std::vector s); -std::vector> PrepareOneHot(std::vector s); +std::vector> PrepareBiasAdd(const std::shared_ptr> &s); +std::vector> PrepareOneHot(const std::shared_ptr &graph, + const std::vector> &ops, + const size_t iter_graph, const size_t iter_ops); +std::vector> PrepareGatherV2(const std::shared_ptr> &s); +std::vector> PrepareL2Normalize(const std::vector> &ops, + const size_t iter_ops, std::vector s); std::vector> MakeRecSearchStrategy(const std::shared_ptr &graph, const std::vector> &ops, const size_t iter_graph, const size_t iter_ops); -std::vector> MakeDataParallelStrategy(const std::vector> &ops, - const size_t iter_ops); +std::vector> MakeDataParallelStrategy(const std::shared_ptr &graph, + const std::vector> &ops, + const size_t iter_graph, const size_t iter_ops); std::vector> PrepareStrategy(const std::shared_ptr &graph, const std::vector> &ops, const size_t iter_graph, const size_t iter_ops); void GeneratePartitionedOperatorStrategy(const std::shared_ptr graph, const std::vector> &ops, const std::shared_ptr> index_list); -int FindIndexOfOperatorIncoming(const std::vector> &input_tensor_names, const size_t iter_ops); +size_t FindIndexOfOperatorIncoming(const std::vector> &input_tensor_names, + const size_t iter_ops); std::vector CopyIncomingOperatorOutputStrategy(const std::shared_ptr graph, const std::vector> &ops, const size_t iter_ops, const size_t iter_graph); std::vector PrepareIncomingOperatorInputStrategy(const std::vector> &ops, - const int incoming_op_index); + const size_t incoming_op_index); std::vector GetAxisList(const std::vector> &ops, const int iter_ops); std::vector ModifyStrategyIfSqueezeIncoming(const std::vector> &ops, - const int incoming_op_index, std::vector s); + const size_t incoming_op_index, std::vector s); std::vector GetDimList(const std::vector> &ops, const size_t iter_ops); std::vector ModifyStrategyIfReduceIncoming(const std::vector> &ops, - const int incoming_op_index, std::vector s); + const size_t incoming_op_index, std::vector s); std::vector CopyIncomingOperatorInputStrategy(const std::vector> &ops, - const int incoming_op_index, const size_t iter_ops, - const std::shared_ptr> no_stra_op_list); + const size_t iter_ops, const size_t incoming_op_index); std::vector> GenerateStrategiesFromStrategy(const std::vector> &ops, - const size_t iter_ops, std::vector s); + const size_t iter_ops, + std::vector basic_stra); void GenerateEliminatedOperatorStrategyForward(std::shared_ptr graph, const std::vector> &ops, - const std::shared_ptr>> eli_list, const std::vector> &input_tensor_names, const std::shared_ptr> index_list, const std::shared_ptr> no_stra_op_list); std::vector ModifyStrategyIfSqueezeOutgoing(const std::vector> &ops, const size_t iter_ops, std::vector s); -std::vector ModifyStrategyIfReduceOutgoing(const std::vector> &ops, - const size_t iter_ops, std::vector s); std::vector CopyOutgoingOperatorInputStrategy(const std::vector> &ops, const std::vector> &input_tensor_names, const size_t iter_ops); void GenerateEliminatedOperatorStrategyBackward(const std::vector> &ops, const std::vector> &input_tensor_names, const std::shared_ptr> no_stra_op_list); +void GenerateRemainingOperatorStrategy(const std::shared_ptr graph, + const std::vector> &ops, + const std::vector> &input_tensor_names, + const std::shared_ptr> index_list, + const std::shared_ptr> no_stra_op_list); } // namespace parallel } // namespace mindspore #endif // PARALLEL_AUTO_PARALLEL_REC_GENERATE_STRATEGY_H_ diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_graph.h b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_graph.h index a7bc1ae86f..647b857e16 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_graph.h +++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_graph.h @@ -38,6 +38,7 @@ enum OperatorType { kRecBiasAdd, kRecSoftmax, kRecSparseSoftmaxCrossEntropyWithLogits, + kRecSoftmaxCrossEntropyWithLogits, kRecOneHot, kRecLog, kRecExp, @@ -47,7 +48,9 @@ enum OperatorType { kRecDiv, kRecSqueeze, kRecCast, - kRecReduce + kRecReduce, + kRecPReLU, + kRecGatherV2 }; enum InfoType { kApplication, kConstant }; @@ -67,6 +70,7 @@ class Graph { std::vector node_in; // Nodes that point from this node std::vector node_out; + std::vector node_in_aux; // Node Type Info: Application or Constant. Defined in enum . InfoType info; // Operator info. Defined in struct . diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.cc b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.cc index 823b1dca08..3e4eafe0a4 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.cc +++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.cc @@ -171,21 +171,41 @@ void Eliminate_Aux(const size_t node_index, const std::shared_ptr graph, eli.push_back(graph->nodes[node_index].node_out[i]); } eli_list->push_back(eli); - for (auto input_index : graph->nodes[node_index].node_in) { - auto it = find(graph->nodes[input_index].node_out.begin(), graph->nodes[input_index].node_out.end(), node_index); - if (it != graph->nodes[input_index].node_out.end()) { - graph->nodes[input_index].node_out.erase(it); - for (auto output_index : graph->nodes[node_index].node_out) { - graph->nodes[input_index].node_out.push_back(output_index); - } + + for (size_t i = 0; i < graph->nodes[node_index].node_in.size(); i++) { + auto *incoming_outputs = &graph->nodes[graph->nodes[node_index].node_in[i]].node_out; + auto it = find(incoming_outputs->begin(), incoming_outputs->end(), node_index); + if (it != incoming_outputs->end()) { + it = incoming_outputs->erase(it); + incoming_outputs->insert(it, graph->nodes[node_index].node_out.begin(), graph->nodes[node_index].node_out.end()); + } + } + + for (size_t i = 0; i < graph->nodes[node_index].node_in_aux.size(); i++) { + auto *aux_incoming_outputs = &graph->nodes[graph->nodes[node_index].node_in_aux[i]].node_out; + auto it = find(aux_incoming_outputs->begin(), aux_incoming_outputs->end(), node_index); + if (it != aux_incoming_outputs->end()) { + it = aux_incoming_outputs->erase(it); + aux_incoming_outputs->insert(it, graph->nodes[node_index].node_out.begin(), + graph->nodes[node_index].node_out.end()); } } - for (auto output_index : graph->nodes[node_index].node_out) { - auto it = find(graph->nodes[output_index].node_in.begin(), graph->nodes[output_index].node_in.end(), node_index); - if (it != graph->nodes[output_index].node_in.end()) { - graph->nodes[output_index].node_in.erase(it); - for (auto input_index : graph->nodes[node_index].node_in) { - graph->nodes[output_index].node_in.push_back(input_index); + + for (size_t i = 0; i < graph->nodes[node_index].node_out.size(); i++) { + auto *outgoing_inputs = &graph->nodes[graph->nodes[node_index].node_out[i]].node_in; + auto it = find(outgoing_inputs->begin(), outgoing_inputs->end(), node_index); + if (it != outgoing_inputs->end()) { + if (graph->nodes[node_index].node_in.size() > 0) { + outgoing_inputs->at(std::distance(outgoing_inputs->begin(), it)) = graph->nodes[node_index].node_in[0]; + for (size_t j = 1; j < graph->nodes[node_index].node_in.size(); j++) { + graph->nodes[graph->nodes[node_index].node_out[i]].node_in_aux.push_back(graph->nodes[node_index].node_in[j]); + } + for (size_t j = 1; j < graph->nodes[node_index].node_in_aux.size(); j++) { + graph->nodes[graph->nodes[node_index].node_out[i]].node_in_aux.push_back( + graph->nodes[node_index].node_in_aux[j]); + } + } else { + outgoing_inputs->erase(it); } } } @@ -196,20 +216,22 @@ std::shared_ptr EliminateGraph(const std::shared_ptr graph, const std::shared_ptr> index_list) { MS_EXCEPTION_IF_NULL(graph); const std::set type_list = { - OperatorType::kRecOneHot, OperatorType::kRecReLU, OperatorType::kRecLog, OperatorType::kRecExp, - OperatorType::kRecAdd, OperatorType::kRecElmWiseOp, OperatorType::kRecBiasAdd, OperatorType::kRecSub, - OperatorType::kRecMul, OperatorType::kRecDiv, OperatorType::kRecSqueeze, OperatorType::kRecReduce, - OperatorType::kRecCast}; + OperatorType::kRecReLU, OperatorType::kRecLog, OperatorType::kRecExp, OperatorType::kRecAdd, + OperatorType::kRecElmWiseOp, OperatorType::kRecBiasAdd, OperatorType::kRecSub, OperatorType::kRecMul, + OperatorType::kRecDiv, OperatorType::kRecSqueeze, OperatorType::kRecReduce, OperatorType::kRecCast, + OperatorType::kRecReshape, OperatorType::kRecGatherV2}; for (size_t node_index = 0; node_index < (size_t)graph->nodes.size(); node_index++) { auto type = graph->nodes[node_index].apply.op_type; if (type_list.find(type) != type_list.end()) { Eliminate_Aux(node_index, graph, eli_list); } } + index_list->reserve(graph->nodes.size()); for (size_t i = 0; i < (size_t)graph->nodes.size(); i++) { index_list->push_back(i); } + for (size_t i = 0; i < (size_t)eli_list->size(); i++) { if (eli_list->at(i)[0] >= index_list->size()) { MS_LOG(EXCEPTION) << "Failure: Operators' elements out of range."; @@ -219,6 +241,7 @@ std::shared_ptr EliminateGraph(const std::shared_ptr graph, index_list->at(j)--; } } + std::shared_ptr new_graph(new Graph); for (size_t i = 0; i < graph->nodes.size(); i++) { if (index_list->at(i) > SIZE_MAX / 2) { @@ -226,11 +249,23 @@ std::shared_ptr EliminateGraph(const std::shared_ptr graph, } new_graph->nodes.push_back(graph->nodes[i]); - for (size_t j = 0; j < new_graph->nodes[index_list->at(i)].node_in.size(); j++) { - new_graph->nodes[index_list->at(i)].node_in[j] = index_list->at(new_graph->nodes[index_list->at(i)].node_in[j]); + auto *node_in = &new_graph->nodes[index_list->at(i)].node_in; + for (size_t j = node_in->size(); j > 0; j--) { + bool IsEliminated = (index_list->at(node_in->at(j - 1)) == SIZE_MAX); + if (IsEliminated) { + node_in->erase(node_in->begin() + j - 1); + } else { + node_in->at(j - 1) = index_list->at(node_in->at(j - 1)); + } } - for (size_t j = 0; j < new_graph->nodes[index_list->at(i)].node_out.size(); j++) { - new_graph->nodes[index_list->at(i)].node_out[j] = index_list->at(new_graph->nodes[index_list->at(i)].node_out[j]); + auto *node_out = &new_graph->nodes[index_list->at(i)].node_out; + for (size_t j = node_out->size(); j > 0; j--) { + bool IsEliminated = (index_list->at(node_out->at(j - 1)) == SIZE_MAX); + if (IsEliminated) { + node_out->erase(node_out->begin() + j - 1); + } else { + node_out->at(j - 1) = index_list->at(node_out->at(j - 1)); + } } } return new_graph; diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h index e6398b9556..536c04cd9f 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h +++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h @@ -46,6 +46,7 @@ const std::map DictOpType{ {REDUCE_MAX, OperatorType::kRecReduce}, {REDUCE_MIN, OperatorType::kRecReduce}, {REDUCE_MEAN, OperatorType::kRecReduce}, + {GATHERV2, OperatorType::kRecGatherV2}, {RELU, OperatorType::kRecReLU}, {"ReLU6", OperatorType::kRecReLU}, @@ -55,16 +56,18 @@ const std::map DictOpType{ {"HSigmoid", OperatorType::kRecReLU}, {GELU, OperatorType::kRecReLU}, {TANH, OperatorType::kRecReLU}, - {PRELU, OperatorType::kRecReLU}, + {PRELU, OperatorType::kRecPReLU}, + + {L2_NORMALIZE, OperatorType::kRecElmWiseOp}, {TENSOR_ADD, OperatorType::kRecElmWiseOp}, {SUB, OperatorType::kRecElmWiseOp}, {MUL, OperatorType::kRecElmWiseOp}, {DIV, OperatorType::kRecElmWiseOp}, {REAL_DIV, OperatorType::kRecElmWiseOp}, - {SOFTMAX, OperatorType::kRecElmWiseOp}, - {LOG_SOFTMAX, OperatorType::kRecElmWiseOp}, - {SOFTMAX_CROSS_ENTROPY_WITH_LOGITS, OperatorType::kRecElmWiseOp}, + {SOFTMAX, OperatorType::kRecSoftmax}, + {LOG_SOFTMAX, OperatorType::kRecSoftmax}, + {SOFTMAX_CROSS_ENTROPY_WITH_LOGITS, OperatorType::kRecSoftmaxCrossEntropyWithLogits}, {SQRT, OperatorType::kRecElmWiseOp}, {NEG, OperatorType::kRecElmWiseOp}, {POW, OperatorType::kRecElmWiseOp}, @@ -79,6 +82,7 @@ const std::map DictOpType{ {"Abs", OperatorType::kRecElmWiseOp}, {"Acosh", OperatorType::kRecElmWiseOp}, {"AddN", OperatorType::kRecElmWiseOp}, + {"AccumulateNV2", OperatorType::kRecElmWiseOp}, {"Atan2", OperatorType::kRecElmWiseOp}, {"Erf", OperatorType::kRecElmWiseOp}, {"Floor", OperatorType::kRecElmWiseOp}, diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc index 186987c0dd..0f6e736d52 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc +++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc @@ -53,9 +53,8 @@ double GetWeights(const Graph::NodeType &node) { auto cost_ptr = std::make_shared(); return cost_ptr->GetMinCostIn(); - } else if (op.op_type == OperatorType::kRecReLU || op.op_type == OperatorType::kRecSoftmax || - op.op_type == OperatorType::kRecSparseSoftmaxCrossEntropyWithLogits) { - // For Activation and Softmax + } else if (op.op_type == OperatorType::kRecReLU) { + // For Activation auto cost_ptr = std::make_shared(); return cost_ptr->GetMinCostIn(); @@ -69,22 +68,24 @@ double GetWeights(const Graph::NodeType &node) { auto cost_ptr = std::make_shared(); return cost_ptr->GetMinCostIn(); - } else if (op.op_type == OperatorType::kRecBatchNorm) { - // For BatchNorm - auto cost_ptr = std::make_shared(); - - return cost_ptr->GetMinCostIn(op); - } else if (op.op_type == OperatorType::kRecOneHot || op.op_type == OperatorType::kRecLog || - op.op_type == OperatorType::kRecExp || op.op_type == OperatorType::kRecAdd || - op.op_type == OperatorType::kRecSub || op.op_type == OperatorType::kRecMul || - op.op_type == OperatorType::kRecDiv || op.op_type == OperatorType::kRecSqueeze || - op.op_type == OperatorType::kRecCast) { + } else if (op.op_type == OperatorType::kRecLog || op.op_type == OperatorType::kRecExp || + op.op_type == OperatorType::kRecAdd || op.op_type == OperatorType::kRecSub || + op.op_type == OperatorType::kRecMul || op.op_type == OperatorType::kRecDiv || + op.op_type == OperatorType::kRecSqueeze || op.op_type == OperatorType::kRecCast) { // For element-wise op auto cost_ptr = std::make_shared(); return cost_ptr->GetMinCostIn(); + } else if (op.op_type == OperatorType::kRecBatchNorm || op.op_type == OperatorType::kRecOneHot || + op.op_type == OperatorType::kRecPReLU || op.op_type == OperatorType::kRecSoftmax || + op.op_type == OperatorType::kRecSparseSoftmaxCrossEntropyWithLogits || + op.op_type == OperatorType::kRecSoftmaxCrossEntropyWithLogits) { + // For BatchParallel op + auto cost_ptr = std::make_shared(); + + return cost_ptr->GetMaxCostIn(); } else if (op.op_type == OperatorType::kRecUnkownType) { - // For unknown type + // For Unkown type return 0.0; } else { MS_LOG(EXCEPTION) << "Failure: GetOperatorWeight failed."; @@ -147,9 +148,8 @@ StrategyRec PartitionNode(const Graph::NodeType &node, auto cost_ptr = std::make_shared(); return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph); - } else if (node.apply.op_type == OperatorType::kRecReLU || node.apply.op_type == OperatorType::kRecSoftmax || - node.apply.op_type == OperatorType::kRecSparseSoftmaxCrossEntropyWithLogits) { - // For Softmax & Activation + } else if (node.apply.op_type == OperatorType::kRecReLU) { + // For Activation auto cost_ptr = std::make_shared(); return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph); @@ -163,22 +163,26 @@ StrategyRec PartitionNode(const Graph::NodeType &node, auto cost_ptr = std::make_shared(); return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph); - } else if (node.apply.op_type == OperatorType::kRecBatchNorm) { - // For BatchNorm - auto cost_ptr = std::make_shared(); - - return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph); - } else if (node.apply.op_type == OperatorType::kRecOneHot || node.apply.op_type == OperatorType::kRecLog || - node.apply.op_type == OperatorType::kRecExp || node.apply.op_type == OperatorType::kRecAdd || - node.apply.op_type == OperatorType::kRecSub || node.apply.op_type == OperatorType::kRecMul || - node.apply.op_type == OperatorType::kRecDiv || node.apply.op_type == OperatorType::kRecSqueeze || - node.apply.op_type == OperatorType::kRecCast) { + } else if (node.apply.op_type == OperatorType::kRecLog || node.apply.op_type == OperatorType::kRecExp || + node.apply.op_type == OperatorType::kRecAdd || node.apply.op_type == OperatorType::kRecSub || + node.apply.op_type == OperatorType::kRecMul || node.apply.op_type == OperatorType::kRecDiv || + node.apply.op_type == OperatorType::kRecSqueeze || node.apply.op_type == OperatorType::kRecCast) { // For element-wise op auto cost_ptr = std::make_shared(); return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph); + } else if (node.apply.op_type == OperatorType::kRecBatchNorm || node.apply.op_type == OperatorType::kRecOneHot || + node.apply.op_type == OperatorType::kRecPReLU || node.apply.op_type == kRecSoftmax || + node.apply.op_type == OperatorType::kRecSparseSoftmaxCrossEntropyWithLogits) { + // For BatchParallel type + auto cost_ptr = std::make_shared(); + return cost_ptr->GetOptimalStr(node); + } else if (node.apply.op_type == OperatorType::kRecSoftmaxCrossEntropyWithLogits) { + // For SoftmaxCrossEntropyWithLogits type + auto cost_ptr = std::make_shared(); + return cost_ptr->GetOptimalStr(node); } else if (node.apply.op_type == OperatorType::kRecUnkownType) { - // For unknown type + // For Unkown type StrategyRec default_strategy; return default_strategy; } else { diff --git a/mindspore/ccsrc/parallel/context.cc b/mindspore/ccsrc/parallel/context.cc index de92bba507..8957dc842c 100644 --- a/mindspore/ccsrc/parallel/context.cc +++ b/mindspore/ccsrc/parallel/context.cc @@ -48,6 +48,7 @@ ParallelContext::ParallelContext() { Reset(); } void ParallelContext::Reset() { mirror_mean_ = false; + full_batch_ = false; cast_before_mirror_ = true; loss_repeated_mean_ = true; device_num_ = 1; @@ -75,6 +76,8 @@ void ParallelContext::set_global_rank(int32_t global_rank) { void ParallelContext::set_mirror_mean(bool mirror_mean) { mirror_mean_ = mirror_mean; } +void ParallelContext::set_full_batch(bool full_batch) { full_batch_ = full_batch; } + void ParallelContext::set_cast_before_mirror(bool cast_before_mirror) { cast_before_mirror_ = cast_before_mirror; } void ParallelContext::set_loss_repeated_mean(bool loss_repeated_mean) { loss_repeated_mean_ = loss_repeated_mean; } @@ -155,8 +158,8 @@ void ParallelParameterContextRestoreInNoTraining(const FuncGraphPtr &func_graph, MS_EXCEPTION_IF_NULL(func_graph); MS_EXCEPTION_IF_NULL(param_node); MS_EXCEPTION_IF_NULL(ptr); - if (!func_graph->has_flag(AUTO_PARALLEL) || (func_graph->flags().count(TRAINING) == 0) || - func_graph->flags()[TRAINING]) { + if (!func_graph->has_flag(AUTO_PARALLEL) || (func_graph->attrs().count(TRAINING) == 0) || + func_graph->has_flag(TRAINING)) { return; } diff --git a/mindspore/ccsrc/parallel/context.h b/mindspore/ccsrc/parallel/context.h index 32f9838d6c..efa528d179 100644 --- a/mindspore/ccsrc/parallel/context.h +++ b/mindspore/ccsrc/parallel/context.h @@ -55,6 +55,9 @@ class ParallelContext { void set_mirror_mean(bool mirror_mean); bool mirror_mean() const { return mirror_mean_; } + void set_full_batch(bool full_batch); + bool full_batch() const { return full_batch_; } + void set_cast_before_mirror(bool cast_before_mirror); bool cast_before_mirror() const { return cast_before_mirror_; } @@ -103,6 +106,7 @@ class ParallelContext { ParallelContext(); static std::shared_ptr inst_context_; bool mirror_mean_; + bool full_batch_; bool cast_before_mirror_; bool loss_repeated_mean_; int32_t device_num_; diff --git a/mindspore/ccsrc/parallel/dynamic_creator.h b/mindspore/ccsrc/parallel/dynamic_creator.h index 4fd5f34cf2..f8e1d62d0a 100644 --- a/mindspore/ccsrc/parallel/dynamic_creator.h +++ b/mindspore/ccsrc/parallel/dynamic_creator.h @@ -121,6 +121,7 @@ REGISTER(SparseSoftmaxCrossEntropyWithLogitsInfo); REGISTER(AssignSubInfo); REGISTER(ReLUInfo); REGISTER(GatherV2Info); +REGISTER(SparseGatherV2Info); REGISTER(SqrtInfo); REGISTER(SigmoidInfo); REGISTER(GetNextInfo); diff --git a/mindspore/ccsrc/parallel/graph_util/generate_graph.cc b/mindspore/ccsrc/parallel/graph_util/generate_graph.cc index f5f0fe85cb..7bd2fa808d 100644 --- a/mindspore/ccsrc/parallel/graph_util/generate_graph.cc +++ b/mindspore/ccsrc/parallel/graph_util/generate_graph.cc @@ -28,9 +28,14 @@ namespace parallel { std::string GetOpPythonPath(const OperatorName &op_name) { // almost all ops are defined in two main paths const std::string ops_module = OP_PATH; + const std::string inner_ops_module = INNER_OP_PATH; py::module mod = py::module::import(common::SafeCStr(ops_module)); + py::module inner_mod = py::module::import(common::SafeCStr(inner_ops_module)); if (!py::hasattr(mod, common::SafeCStr(op_name))) { - MS_LOG(EXCEPTION) << ops_module << " don't have op:" << op_name; + if (!py::hasattr(inner_mod, common::SafeCStr(op_name))) { + MS_LOG(EXCEPTION) << ops_module << " or " << inner_ops_module << " don't have op:" << op_name; + } + return inner_ops_module; } return ops_module; } diff --git a/mindspore/ccsrc/parallel/node_check.cc b/mindspore/ccsrc/parallel/node_check.cc index 7fecd307c7..6f30a8ec1c 100644 --- a/mindspore/ccsrc/parallel/node_check.cc +++ b/mindspore/ccsrc/parallel/node_check.cc @@ -75,7 +75,7 @@ const std::set BLACK_LIST = {TUPLE_GETITEM, DROPOUT_GEN_MASK, EMBED, CREATINSTANCE, - ZEROSLIKETENSOR, + ZEROSLIKE, ASSIGN, REF_TO_EMBED, STOP_GRADIENT}; diff --git a/mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.cc b/mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.cc index 87b8d15cca..e88868c772 100644 --- a/mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.cc +++ b/mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.cc @@ -204,7 +204,7 @@ Status DropoutDoMaskInfo::InitForCostModel(const StrategyPtr &strategy) { PrimitivePtr GetDropoutGenMaskPrim(const CNodePtr &cnode) { MS_EXCEPTION_IF_NULL(cnode); - if (cnode->inputs().size() != DROPOUT_DO_MASK_CNODE_INPUT_SIZE) { + if (cnode->size() != DROPOUT_DO_MASK_CNODE_INPUT_SIZE) { MS_LOG(EXCEPTION) << "The size of dropout do mask cnode's inputs must be " << DROPOUT_DO_MASK_CNODE_INPUT_SIZE; } @@ -215,8 +215,7 @@ PrimitivePtr GetDropoutGenMaskPrim(const CNodePtr &cnode) { } auto dropout_gen_mask_cnode = dropout_gen_mask->cast(); - MS_EXCEPTION_IF_NULL(dropout_gen_mask_cnode); - if (dropout_gen_mask_cnode->inputs().size() != DROPOUT_GEN_MASK_CNODE_INPUT_SIZE) { + if (dropout_gen_mask_cnode->size() != DROPOUT_GEN_MASK_CNODE_INPUT_SIZE) { MS_LOG(EXCEPTION) << "The size of dropout gen mask cnode's inputs must be " << DROPOUT_GEN_MASK_CNODE_INPUT_SIZE; } if (!IsValueNode(dropout_gen_mask_cnode->input(0))) { @@ -233,11 +232,45 @@ PrimitivePtr GetDropoutGenMaskPrim(const CNodePtr &cnode) { return prim; } +void SetGenMaskShape(const CNodePtr &cnode, const Shape &input_slice_shape) { + MS_EXCEPTION_IF_NULL(cnode); + if (cnode->size() != DROPOUT_DO_MASK_CNODE_INPUT_SIZE) { + MS_LOG(EXCEPTION) << "The size of dropout do mask cnode's inputs must be " << DROPOUT_DO_MASK_CNODE_INPUT_SIZE; + } + + AnfNodePtr dropout_gen_mask = cnode->input(DROPOUT_GEN_MASK_INDEX); + MS_EXCEPTION_IF_NULL(dropout_gen_mask); + if (!dropout_gen_mask->isa()) { + MS_LOG(EXCEPTION) << "The dropout do mask cnode's input[" << DROPOUT_GEN_MASK_INDEX << "] must be a cnode."; + } + + auto dropout_gen_mask_cnode = dropout_gen_mask->cast(); + if (dropout_gen_mask_cnode->size() != DROPOUT_GEN_MASK_CNODE_INPUT_SIZE) { + MS_LOG(EXCEPTION) << "The size of dropout gen mask cnode's inputs must be " << DROPOUT_GEN_MASK_CNODE_INPUT_SIZE; + } + + if (!IsValueNode(dropout_gen_mask_cnode->input(1))) { + MS_LOG(EXCEPTION) << "The input[1] of dropout gen mask cnode is not ValueTuple."; + } + + FuncGraphPtr func_graph = cnode->func_graph(); + MS_EXCEPTION_IF_NULL(func_graph); + FuncGraphManagerPtr manager = func_graph->manager(); + if (manager == nullptr) { + MS_LOG(EXCEPTION) << "Failure: AddNode error since manager is nullptr."; + } + + ValuePtr new_shape = MakeValue(input_slice_shape); + AnfNodePtr val = NewValueNode(new_shape); + (void)manager->Replace(dropout_gen_mask_cnode->input(1), val); +} + // DropoutDoMask needs to be used together with DropoutGenMask. Only the first input tensor of DropoutGenMask is // split. Find the DropoutGenMask node in the anf graph according to DropoutDoMask node, and modify the input shape // of DropoutGenMask according to the strategy of DropoutDoMask. When the DropoutDoMask performs repeated calculation // and both seeds of DropoutGenMask are 0, two new seeds are automatically generated for DropoutGenMask. -Operator DropoutDoMaskInfo::GetDropoutGenMaskReplaceOp(const CNodePtr &cnode) { +std::vector DropoutDoMaskInfo::GetDropoutGenMaskReplaceOp(const CNodePtr &cnode) { + std::vector replace_ops; MS_EXCEPTION_IF_NULL(cnode); PrimitivePtr prim = GetDropoutGenMaskPrim(cnode); MS_EXCEPTION_IF_NULL(prim); @@ -260,15 +293,20 @@ Operator DropoutDoMaskInfo::GetDropoutGenMaskReplaceOp(const CNodePtr &cnode) { if ((attr.find(SEED0) == attr.end()) || (attr.find(SEED1) == attr.end())) { MS_LOG(EXCEPTION) << "The attrs of dropout gen mask must be have seed0 and seed1"; } + + Shape input_slice_shape = inputs_tensor_info_[0].slice_shape(); int32_t seed_0 = GetValue(attr[SEED0]); int32_t seed_1 = GetValue(attr[SEED1]); if ((seed_0 == 0) && (seed_1 == 0) && (repeated_calc_num_ > 1)) { seed_0 = SEED_NUM; seed_1 = SEED_NUM; SEED_NUM++; + } else { + SetGenMaskShape(cnode, input_slice_shape); + MS_LOG(DEBUG) << "The input slice shape droupout is " << ShapeToString(input_slice_shape); + return replace_ops; } - Shape input_slice_shape = inputs_tensor_info_[0].slice_shape(); ValuePtr new_shape = MakeValue(input_slice_shape); Attr attr_0 = std::make_pair(SEED0, MakeValue(seed_0)); Attr attr_1 = std::make_pair(SEED1, MakeValue(seed_1)); @@ -278,7 +316,8 @@ Operator DropoutDoMaskInfo::GetDropoutGenMaskReplaceOp(const CNodePtr &cnode) { OperatorParams params = {std::make_pair(param_0, 1), std::make_pair(param_1, 2)}; OperatorArgs args = std::make_pair(attrs, params); Operator replace_op = {std::make_pair(DROPOUT_GEN_MASK, args)}; - return replace_op; + replace_ops.push_back(replace_op); + return replace_ops; } } // namespace parallel } // namespace mindspore diff --git a/mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.h b/mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.h index c0d112f52d..c51a0a9513 100644 --- a/mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.h +++ b/mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.h @@ -41,7 +41,7 @@ class DropoutDoMaskInfo : public OperatorInfo { Status SetCostUnderStrategy(const StrategyPtr &strategy) override; Status InitForCostModel(const StrategyPtr &strategy) override; std::shared_ptr>> GenerateBatchStrategies() override; - Operator GetDropoutGenMaskReplaceOp(const CNodePtr &cnode); + std::vector GetDropoutGenMaskReplaceOp(const CNodePtr &cnode); protected: Status CheckStrategy(const StrategyPtr &strategy) override; diff --git a/mindspore/ccsrc/parallel/ops_info/gather_v2_p_info.cc b/mindspore/ccsrc/parallel/ops_info/gather_v2_p_info.cc index e2d01fb779..7a16aeafcb 100644 --- a/mindspore/ccsrc/parallel/ops_info/gather_v2_p_info.cc +++ b/mindspore/ccsrc/parallel/ops_info/gather_v2_p_info.cc @@ -44,6 +44,24 @@ Status GatherV2PInfo::GetAttrs() { } axis_ = axis; + // get target + auto target_iter = attrs_.find(TARGET); + if (target_iter != attrs_.end()) { + MS_EXCEPTION_IF_NULL(target_iter->second); + if (target_iter->second->isa()) { + target_ = target_iter->second->cast()->value(); + } else { + MS_LOG(ERROR) << name_ << " : The value of target is not a string."; + return FAILED; + } + } + + // target=CPU, axis must be 0 + if (target_ == "CPU" && axis_ != 0) { + MS_LOG(ERROR) << name_ << ": target is CPU, axis must be 0, but got " << axis_; + return FAILED; + } + return SUCCESS; } @@ -61,8 +79,8 @@ Status GatherV2PInfo::CheckStrategy(const StrategyPtr &strategy) { auto param_shape = inputs_shape_.at(0); auto param_strategy = strategy->GetInputDim().at(0); auto slice_shape = param_shape.at(param_shape.size() - 1) / param_strategy.at(param_strategy.size() - 1); - if (slice_shape % 8 != 0) { - MS_LOG(ERROR) << name_ << ": Last dim of param slice shape need 32Byte aligned."; + if (slice_shape % 8 != 0 && slice_shape != 1) { + MS_LOG(DEBUG) << name_ << ": Last dim of param slice shape need 32Byte aligned."; return FAILED; } @@ -74,20 +92,20 @@ Status GatherV2PInfo::CheckStrategy(const StrategyPtr &strategy) { // don't support scalar index if (inputs_shape_.at(1).size() == 0) { - MS_LOG(ERROR) << name_ << ": Don't support scalar index."; + MS_LOG(DEBUG) << name_ << ": Don't support scalar index."; return FAILED; } // axis=0, index_shape(0)%param_strategy(0) must be 0 Shape index_shape = inputs_shape_.at(1); if ((axis_ == 0) && (index_shape.at(0) % param_strategy.at(0) != 0)) { - MS_LOG(ERROR) << name_ << ": index_shape(0) can't be divided by param_strategy(0)."; + MS_LOG(DEBUG) << name_ << ": index_shape(0) can't be divided by param_strategy(0)."; return FAILED; } // axis != 0, param_shape(0)%(param_strategy(0)*param_strategy(axis)) must be 0 if (axis_ != 0 && param_shape.at(0) % (param_strategy.at(0) * param_strategy.at(IntToSize(axis_))) != 0) { - MS_LOG(ERROR) << name_ << ": index_shape(0) can't be divided by (param_strategy(0)*param_strategy(axis))."; + MS_LOG(DEBUG) << name_ << ": index_shape(0) can't be divided by (param_strategy(0)*param_strategy(axis))."; return FAILED; } @@ -95,7 +113,7 @@ Status GatherV2PInfo::CheckStrategy(const StrategyPtr &strategy) { auto index_strategy = strategy->GetInputDim().at(1); auto product_i = std::accumulate(index_strategy.begin(), index_strategy.end(), 1, std::multiplies()); if ((param_strategy.at(IntToSize(axis_)) != 1) && (product_i != 1)) { - MS_LOG(ERROR) << name_ << ": param is splited at dim (axis)" << axis_ << " ,index can't be splited."; + MS_LOG(DEBUG) << name_ << ": param is splited at dim (axis)" << axis_ << " ,index can't be splited."; return FAILED; } @@ -104,7 +122,7 @@ Status GatherV2PInfo::CheckStrategy(const StrategyPtr &strategy) { size_t dev_num = g_device_manager->GetDeviceListByStageId(0).size(); auto product_p = std::accumulate(param_strategy.begin(), param_strategy.end(), 1, std::multiplies()); if (IntToSize(product_p) != dev_num && param_strategy.at(IntToSize(axis_)) != 1) { - MS_LOG(ERROR) << name_ << ": Invalid strategy. Don't support repeated calc."; + MS_LOG(DEBUG) << name_ << ": Invalid strategy. Don't support repeated calc."; return FAILED; } @@ -267,6 +285,11 @@ Status GatherV2PInfo::InferBias() { int32_t rank = g_device_manager->global_rank(); auto input_shape = inputs_shape_.at(0); auto params_strategy = strategy_->GetInputDim().at(0); + // axis don't split + if (params_strategy.at(axis_) == 1) { + bias_ = 0; + return SUCCESS; + } // params_size=1, axis=0 if ((input_shape.size() == 1) && (axis_ == 0)) { slice_size_ = input_shape.at(0) / params_strategy.at(0); @@ -290,18 +313,94 @@ Status GatherV2PInfo::InferBias() { } Status GatherV2PInfo::InferGroup() { - std::vector group_list; auto param_strategy = strategy_->GetInputDim().at(0); size_t dim = IntToSize(axis_); if (param_strategy.at(IntToSize(axis_)) != 1 && inputs_shape_.at(0).size() == 2) { dim = (axis_ + 1) % 2; } - if (CreateGroupByDim(dim, &group_list) != SUCCESS) { + CheckGlobalDeviceManager(); + MS_EXCEPTION_IF_NULL(g_device_manager); + int32_t rank = g_device_manager->global_rank(); + RankList dev_list = g_device_manager->GetDeviceListByStageId(0); + DeviceMatrix dev_matrix(rank, dev_list, dev_matrix_shape_); + RankList group_devices; + if (dev_matrix.GetDevicesAlongDim(SizeToUint(dim), &group_devices) != SUCCESS) { MS_LOG(ERROR) << name_ << ": Create group failed."; return FAILED; } + if (group_devices.size() == 1) { + MS_LOG(INFO) << "the group is empty"; + return SUCCESS; + } + + group_ = g_device_manager->CreateGroup(group_devices); + return SUCCESS; +} + +std::vector GetRankFromGroup(const Group &group) { + std::vector rank_list; + auto device_list = group.GetDevicesList(); + for (auto &device : device_list) { + rank_list.insert(rank_list.end(), device.rank() % 8); + } + return rank_list; +} + +Status GatherV2PInfo::InferForwardCommunication() { + forward_op_.clear(); + if (target_ != CPU) { + return SUCCESS; + } + auto param_strategy = strategy_->GetInputDim().at(0); + // don't split axis, no need forward communication + if (param_strategy.at(IntToSize(axis_)) == 1) { + return SUCCESS; + } + // split axis + OperatorName operator_name; + if (InferGroup() != SUCCESS) { + MS_LOG(ERROR) << name_ << ": Infer Group failed."; + return FAILED; + } + auto group_size = group_.GetDevNum(); + Attr attr_group; + if (host_reduce_scatter_) { + // group size <= 8 + std::vector rank_list; + if (group_size <= 8) { + reduce_scatter_flag_ = false; + operator_name = HOST_REDUCE_SCATTER; + rank_list = GetRankFromGroup(group_); + attr_group = std::make_pair(GROUP, MakeValue(rank_list)); + } else { + // group size > 8, don't support host reduce_scatter + reduce_scatter_flag_ = true; + split_num_ = SizeToInt(group_size / 8); + CheckGlobalDeviceManager(); + operator_name = REDUCE_SCATTER; + int32_t rank = g_device_manager->global_rank(); + size_t repeat = group_size / 8; + for (size_t i = 0; i < repeat; ++i) { + rank_list.push_back(rank + SizeToInt(i * 8)); + } + Group g = g_device_manager->CreateGroup(rank_list); + attr_group = std::make_pair(GROUP, MakeValue(g.name())); + } + } else { + operator_name = REDUCE_SCATTER; + if (InferGroup() != SUCCESS) { + MS_LOG(ERROR) << name_ << ": Infer Group failed."; + return FAILED; + } + attr_group = std::make_pair(GROUP, MakeValue(group_.name())); + } + Attr attr_op = std::make_pair(OP, MakeValue(REDUCE_OP_SUM)); + OperatorAttrs attrs = {attr_op, attr_group}; + OperatorParams params; + OperatorArgs args = std::make_pair(attrs, params); + Operator op = std::make_pair(operator_name, args); - group_ = group_list.at(0); + forward_op_.push_back(op); return SUCCESS; } @@ -320,7 +419,7 @@ Status GatherV2PInfo::ComputeReplaceGraph(const CNodePtr &cnode) { auto minimum = gen_g.PushBack({gen_g.NewOpInst(MINIMUM), relu, CreateInt32Tensor(slice_size_ - 1)}); auto equal = gen_g.PushBack({gen_g.NewOpInst(EQUAL), sub, minimum}); auto gather_v2 = - gen_g.PushBack({gen_g.NewOpInst(GATHERV2), gen_g.virtual_input_node(), minimum, CreatInt32Imm(axis_)}); + gen_g.PushBack({gen_g.NewOpInst(replace_op_name_), gen_g.virtual_input_node(), minimum, CreatInt32Imm(axis_)}); auto dtype = gen_g.PushBack({gen_g.NewOpInst(DTYPE), gather_v2}); auto cast = gen_g.PushBack({gen_g.NewOpInst(CAST), equal, dtype}); auto expand_dims = gen_g.PushBack({gen_g.NewOpInst(EXPAND_DIMS), cast, CreatInt32Imm(axis_ - 1)}); @@ -346,6 +445,10 @@ Status GatherV2PInfo::ComputeReplaceGraph(const CNodePtr &cnode) { ReplaceGraphPtr GatherV2PInfo::replace_graph(const CNodePtr &cnode) { auto param_strategy = strategy_->GetInputDim().at(0); + // target_ == CPU, no need to raplace graph + if (target_ == CPU) { + return nullptr; + } if (param_strategy.at(IntToSize(axis_)) != 1 && ComputeReplaceGraph(cnode) != SUCCESS) { MS_LOG(ERROR) << name_ << ": ComputeReplaceGraph failed."; return nullptr; @@ -353,11 +456,34 @@ ReplaceGraphPtr GatherV2PInfo::replace_graph(const CNodePtr &cnode) { return replace_graph_; } +Status GatherV2PInfo::ComputeReplaceOp() { + if (InferBias() != SUCCESS) { + MS_LOG(ERROR) << name_ << ": Infer offset failed."; + return FAILED; + } + OperatorName op_name = EMBEDDING_LOOKUP; + OperatorAttrs attrs; + Attr param_offset = std::make_pair("offset", MakeValue(bias_)); + Attr param_flag = std::make_pair("reduce_scatter_flag", MakeValue(reduce_scatter_flag_)); + Attr param_split_num = std::make_pair("split_num", MakeValue(split_num_)); + OperatorParams params = {std::make_pair(param_offset, 3), std::make_pair(param_flag, 4), + std::make_pair(param_split_num, 5)}; + OperatorArgs args = std::make_pair(attrs, params); + Operator op = std::make_pair(op_name, args); + replace_op_.push_back(op); + + return SUCCESS; +} + Status GatherV2PInfo::Init(const StrategyPtr &strategy) { if (InitWithAutoRepeatCalc(strategy) != SUCCESS) { MS_LOG(ERROR) << name_ << ": Init failed."; return FAILED; } + // only target_ == CPU, we need to replace op + if (target_ == CPU && ComputeReplaceOp() != SUCCESS) { + MS_LOG(ERROR) << name_ << ": ComputeReplaceOp failed."; + } MS_LOG(INFO) << name_ << ": Init success."; return SUCCESS; } diff --git a/mindspore/ccsrc/parallel/ops_info/gather_v2_p_info.h b/mindspore/ccsrc/parallel/ops_info/gather_v2_p_info.h index a87b9838c9..83868606d1 100644 --- a/mindspore/ccsrc/parallel/ops_info/gather_v2_p_info.h +++ b/mindspore/ccsrc/parallel/ops_info/gather_v2_p_info.h @@ -49,7 +49,7 @@ class GatherV2PInfo : public OperatorInfo { protected: Status CheckStrategy(const StrategyPtr &strategy) override; Status InferMirrorOps() override; - Status InferForwardCommunication() override { return SUCCESS; } + Status InferForwardCommunication() override; Status InferTensorInfo() override; Status InferDevMatrixShape() override; Status InferTensorMap() override; @@ -57,14 +57,31 @@ class GatherV2PInfo : public OperatorInfo { private: Status ComputeReplaceGraph(const CNodePtr &cnode); + Status ComputeReplaceOp(); Status InferBias(); Status InferGroup(); int32_t axis_; + std::string target_; + std::string replace_op_name_ = GATHERV2; int32_t bias_; int32_t slice_size_; Shape out_dev_matrix_shape_; Group group_; + bool reduce_scatter_flag_ = false; + int32_t split_num_ = 1; + bool host_reduce_scatter_ = false; +}; + +class SparseGatherV2Info : public GatherV2PInfo { + public: + SparseGatherV2Info(const std::string &name, const Shapes &inputs_shape, const Shapes &outputs_shape, + const PrimitiveAttrs &attrs) + : GatherV2PInfo(name, inputs_shape, outputs_shape, attrs) {} + ~SparseGatherV2Info() override = default; + + private: + std::string replace_op_name_ = SPARSE_GATHERV2; }; } // namespace parallel } // namespace mindspore diff --git a/mindspore/ccsrc/parallel/ops_info/get_next_info.cc b/mindspore/ccsrc/parallel/ops_info/get_next_info.cc index 29d519fda8..0fb49364f0 100644 --- a/mindspore/ccsrc/parallel/ops_info/get_next_info.cc +++ b/mindspore/ccsrc/parallel/ops_info/get_next_info.cc @@ -24,15 +24,23 @@ #include "ir/value.h" #include "parallel/device_matrix.h" #include "parallel/strategy.h" +#include "parallel/context.h" #include "parallel/tensor_layout/tensor_redistribution.h" namespace mindspore { namespace parallel { Status GetNextInfo::InferTensorMap() { + MS_EXCEPTION_IF_NULL(ParallelContext::GetInstance()); + bool full_batch = ParallelContext::GetInstance()->full_batch(); + for (auto shp : shapes_) { TensorMap out_tensor_map; for (size_t i = 0; i < shp.size(); ++i) { - out_tensor_map.push_back(SizeToInt(dev_matrix_shape_.size() - i - 1)); + if (full_batch) { + out_tensor_map.push_back(MAP_NONE); + } else { + out_tensor_map.push_back(SizeToInt(dev_matrix_shape_.size() - i - 1)); + } } outputs_tensor_map_.push_back(out_tensor_map); } @@ -190,6 +198,9 @@ Status GetNextInfo::GetAttrs() { } Status GetNextInfo::InferReplaceOps(const StrategyPtr &) { + MS_EXCEPTION_IF_NULL(ParallelContext::GetInstance()); + bool full_batch = ParallelContext::GetInstance()->full_batch(); + Shapes out_shapes = outputs_shape_; for (size_t i = 0; i < out_shapes.size(); ++i) { if (dev_num_ <= 0) { @@ -200,7 +211,9 @@ Status GetNextInfo::InferReplaceOps(const StrategyPtr &) { MS_LOG(ERROR) << name_ << " : batch num cannot floor div dev num."; return FAILED; } - out_shapes[i][0] = out_shapes[i][0] / dev_num_; + if (!full_batch) { + out_shapes[i][0] = out_shapes[i][0] / dev_num_; + } } ValuePtr new_shapes = MakeValue(out_shapes); Attr attr_types = std::make_pair(TYPES, attrs_[TYPES]); diff --git a/mindspore/ccsrc/parallel/ops_info/ops_utils.h b/mindspore/ccsrc/parallel/ops_info/ops_utils.h index 4da54a358d..4b8f61bb2e 100644 --- a/mindspore/ccsrc/parallel/ops_info/ops_utils.h +++ b/mindspore/ccsrc/parallel/ops_info/ops_utils.h @@ -55,6 +55,7 @@ constexpr char REDUCE_OP_SUM[] = "sum"; constexpr char REDUCE_OP_MAX[] = "max"; constexpr char REDUCE_OP_MIN[] = "min"; constexpr char OP_PATH[] = "mindspore.ops.operations"; +constexpr char INNER_OP_PATH[] = "mindspore.ops.operations._inner_ops"; constexpr char GET_OP_FUNCTION_PATH[] = "mindspore.parallel._utils"; constexpr char GET_OP_FUNCTION[] = "_get_python_op"; constexpr char KEEP_DIMS[] = "keep_dims"; @@ -72,10 +73,12 @@ constexpr char OP[] = "op"; constexpr char IDENTITY_INFO[] = "identity_info"; constexpr char DIVISOR[] = "divisor"; constexpr char NONE[] = "None"; -constexpr char DEPEND[] = "depend"; +constexpr char DEPEND[] = "Depend"; constexpr char BATCH_PARALLEL[] = "BatchParallel"; constexpr char ACTIVATION_TYPE[] = "activation_type"; +constexpr char TARGET[] = "primitive_target"; +constexpr char CPU[] = "CPU"; constexpr char TRANSPOSE_A[] = "transpose_a"; constexpr char TRANSPOSE_B[] = "transpose_b"; constexpr char SHAPE[] = "shape"; @@ -127,6 +130,7 @@ constexpr char FORWARD_OP[] = "forward_op"; constexpr char REDISTRIBUTION_OP[] = "redistribution_op"; constexpr char DARA_PARALLEL[] = "data_parallel"; constexpr char FORWARD_REDUCE_SCATTER[] = "forward_reduce_scatter"; +constexpr char OPTIMIZER_SUB_STRING[] = "optimizer"; // Operator constexpr char VIRTUAL_DIV[] = "_VirtualDiv"; @@ -141,6 +145,8 @@ constexpr char MIRROR_OPERATOR[] = "_MirrorOperator"; constexpr char STRIDED_SLICE[] = "StridedSlice"; constexpr char ALL_GATHER[] = "AllGather"; constexpr char REDUCE_SCATTER[] = "ReduceScatter"; +constexpr char HOST_REDUCE_SCATTER[] = "HostReduceScatter"; +constexpr char EMBEDDING_LOOKUP[] = "EmbeddingLookup"; constexpr char CONCAT[] = "Concat"; constexpr char SOFTMAX_CROSS_ENTROPY_WITH_LOGITS[] = "SoftmaxCrossEntropyWithLogits"; constexpr char SIGMOID_CROSS_ENTROPY_WITH_LOGITS[] = "SigmoidCrossEntropyWithLogits"; @@ -201,6 +207,7 @@ constexpr char EQUAL[] = "Equal"; constexpr char NOT_EQUAL[] = "NotEqual"; constexpr char LOGICALNOT[] = "LogicalNot"; constexpr char GATHERV2[] = "GatherV2"; +constexpr char SPARSE_GATHERV2[] = "SparseGatherV2"; constexpr char STRIDEDSLICE[] = "StridedSlice"; constexpr char BROADCAST[] = "Broadcast"; constexpr char SQRT[] = "Sqrt"; @@ -211,6 +218,16 @@ constexpr char NEG[] = "Neg"; constexpr char BATCH_MATMUL[] = "BatchMatMul"; constexpr char EXPAND_DIMS[] = "ExpandDims"; constexpr char SQUARE[] = "Square"; +constexpr char BATCHMATMUL[] = "BatchMatMul"; +constexpr char TOPK[] = "TopK"; +constexpr char IN_TOPK[] = "InTopK"; +constexpr char PACK[] = "Pack"; +constexpr char GATHER_ND[] = "GatherNd"; +constexpr char UNSORTEF_SEGMENT_MIND[] = "UnsortedSegmentMinD"; +constexpr char UNSORTEF_SEGMENT_PRODD[] = "UnsortedSegmentProdD"; +constexpr char DEPTHWISE_CONV2D_NATIVE[] = "DepthwiseConv2dNative"; +constexpr char DEPTHWISE_CONV2D[] = "DepthwiseConv2D"; +constexpr char ADD[] = "Add"; // Parallel don't care constexpr char TUPLE_GETITEM[] = "tuple_getitem"; @@ -263,7 +280,7 @@ constexpr char COL2IMV1[] = "col2im_v1"; constexpr char RESOLVE[] = "resolve"; constexpr char EMBED[] = "embed"; constexpr char CREATINSTANCE[] = "create_instance"; -constexpr char ZEROSLIKETENSOR[] = "zeros_like_tensor"; +constexpr char ZEROSLIKE[] = "ZerosLike"; constexpr char REF_TO_EMBED[] = "RefToEmbed"; constexpr char STOP_GRADIENT[] = "stop_gradient"; diff --git a/mindspore/ccsrc/parallel/ops_info/virtual_dataset_info.cc b/mindspore/ccsrc/parallel/ops_info/virtual_dataset_info.cc index 4b695ba62d..ce8b04d802 100644 --- a/mindspore/ccsrc/parallel/ops_info/virtual_dataset_info.cc +++ b/mindspore/ccsrc/parallel/ops_info/virtual_dataset_info.cc @@ -23,6 +23,7 @@ #include "parallel/device_manager.h" #include "parallel/device_matrix.h" #include "parallel/step_parallel.h" +#include "parallel/context.h" #include "utils/log_adapter.h" namespace mindspore { @@ -93,59 +94,21 @@ Status VirtualDatasetInfo::InferDevMatrixShape() { return SUCCESS; } -Status VirtualDatasetInfo::InferMirrorOps() { - mirror_ops_.clear(); - - int32_t stage = strategy_->GetInputStage(); - CheckGlobalDeviceManager(); - RankList dev_list = g_device_manager->GetDeviceListByStageId(stage); - if (dev_list.empty()) { - MS_LOG(ERROR) << name_ << ": The current stage is empty!"; - return Status::FAILED; - } - if (dev_list.size() == 1) { - MS_LOG(INFO) << name_ << ": No need mirror ops."; - return Status::SUCCESS; - } - - OperatorName operator_name = BROADCAST; - ValuePtr attr0_value = MakeValue(dev_list.front()); - std::vector group_list; - if (CreateGroupByDim(dev_matrix_shape_.size() - 1, &group_list) != SUCCESS) { - MS_LOG(ERROR) << name_ << ": Infer mirror ops, create group failed."; - return FAILED; - } else if (group_list.empty()) { - MS_LOG(INFO) << name_ << ": No need mirror ops."; - return SUCCESS; - } - std::string group = group_list[0].name(); - ValuePtr attr1_value = MakeValue(group); - - Attr attr0 = std::make_pair(SRC, attr0_value); - Attr attr1 = std::make_pair(GROUP, attr1_value); - - OperatorAttrs operator_attrs = {attr0, attr1}; - - OperatorParams operator_param; - OperatorArgs operator_args = std::make_pair(operator_attrs, operator_param); - - Operator op = std::make_pair(operator_name, operator_args); - OperatorVector op_vector = {op}; - - size_t size = inputs_shape_.size(); - for (size_t i = 0; i < size; ++i) { - mirror_ops_.push_back(op_vector); - } - mirror_ops_.clear(); - return SUCCESS; -} +Status VirtualDatasetInfo::InferMirrorOps() { return SUCCESS; } Status VirtualDatasetInfo::InferForwardCommunication() { return SUCCESS; } Status VirtualDatasetInfo::InferTensorMap() { + MS_EXCEPTION_IF_NULL(ParallelContext::GetInstance()); + bool full_batch = ParallelContext::GetInstance()->full_batch(); + for (size_t i = 0; i < strategy_->GetInputNumber(); i++) { std::vector tensor_map_index; - tensor_map_index.push_back((int32_t)(LAST_INDEX(SizeToUint(dev_matrix_shape_.size())))); + if (full_batch) { + tensor_map_index.push_back(MAP_NONE); + } else { + tensor_map_index.push_back((int32_t)(LAST_INDEX(SizeToUint(dev_matrix_shape_.size())))); + } for (size_t j = 1; j < strategy_->GetInputDim()[i].size(); ++j) { tensor_map_index.push_back(MAP_NONE); } @@ -213,6 +176,10 @@ Status VirtualDatasetInfo::SetCostUnderStrategy(const StrategyPtr &strategy) { } Status VirtualDatasetInfo::GenerateStrategies(int32_t stage_id) { + MS_EXCEPTION_IF_NULL(ParallelContext::GetInstance()); + bool full_batch = ParallelContext::GetInstance()->full_batch(); + size_t total_dev_num; + if (GetAttrs() != SUCCESS) { MS_LOG(ERROR) << name_ << ": GetAttrs failed"; return FAILED; @@ -220,7 +187,11 @@ Status VirtualDatasetInfo::GenerateStrategies(int32_t stage_id) { CheckGlobalDeviceManager(); is_auto_parallel_ = true; - size_t total_dev_num = g_device_manager->GetDeviceListByStageId(stage_id).size(); + if (full_batch) { + total_dev_num = 1; + } else { + total_dev_num = g_device_manager->GetDeviceListByStageId(stage_id).size(); + } StrategyPtr sp; std::vector strategy; for (auto &shape : inputs_shape_) { @@ -232,10 +203,18 @@ Status VirtualDatasetInfo::GenerateStrategies(int32_t stage_id) { sp = std::make_shared(stage_id, strategy); if (SetCostUnderStrategy(sp) == SUCCESS) { - MS_LOG(INFO) << name_ << ": Successfully generated batch-parallel-strategy."; + if (full_batch) { + MS_LOG(INFO) << name_ << ": Successfully generated full-batch-parallel-strategy."; + } else { + MS_LOG(INFO) << name_ << ": Successfully generated batch-parallel-strategy."; + } PrintStrategy(sp); } else { - MS_LOG(ERROR) << name_ << ": Generating batch-parallel-strategy failed."; + if (full_batch) { + MS_LOG(ERROR) << name_ << ": Generating full-batch-parallel-strategy failed."; + } else { + MS_LOG(ERROR) << name_ << ": Generating batch-parallel-strategy failed."; + } return FAILED; } return SUCCESS; diff --git a/mindspore/ccsrc/parallel/step_auto_parallel.cc b/mindspore/ccsrc/parallel/step_auto_parallel.cc index fe77b6027b..894177df8d 100644 --- a/mindspore/ccsrc/parallel/step_auto_parallel.cc +++ b/mindspore/ccsrc/parallel/step_auto_parallel.cc @@ -107,7 +107,7 @@ bool StepAutoParallel(const FuncGraphPtr &root, const opt::OptimizerPtr &) { time += static_cast(end_time.tv_usec - start_time.tv_usec); MS_LOG(INFO) << "Now leaving step auto parallel, used time: " << time << " us"; - root->flags()[AUTO_PARALLEL_RUN_ONCE_ONLY] = true; + root->set_flag(AUTO_PARALLEL_RUN_ONCE_ONLY, true); return changes; } @@ -261,7 +261,7 @@ bool IsSplittableOperator(const std::string &op_name) { REDUCE_MAX, REDUCE_MIN, ARGMAXWITHVALUE, ARGMINWITHVALUE, REDUCE_SUM, CONV2D, FUSE_BATCH_NORM, POOLING, MAX_POOL_WITH_ARGMAX, SIMPLE_MEAN, FLATTEN, BATCH_NORM, LAYER_NORM, BIAS_ADD, ASSIGN_SUB, COS, ACOS, EXP, LOG, REDUCE_MEAN, REAL_DIV, SIGMOID, POW, MAXIMUM, MINIMUM, EQUAL, NOT_EQUAL, LOGICALNOT, GATHERV2, SQRT, - STRIDEDSLICE, GET_NEXT, CAST, NEG, SQUARE, BATCH_MATMUL, EXPAND_DIMS, SQUEEZE, + STRIDEDSLICE, GET_NEXT, CAST, NEG, SQUARE, BATCH_MATMUL, EXPAND_DIMS, SQUEEZE, SPARSE_GATHERV2, SOFTMAX_CROSS_ENTROPY_WITH_LOGITS, SIGMOID_CROSS_ENTROPY_WITH_LOGITS, SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS}; // clang-format on @@ -283,6 +283,10 @@ bool IsAutoParallelCareNode(const CNodePtr &cnode) { if (bool_result) { MS_LOG(EXCEPTION) << "Should implementing OperatorInfo for: " << prim->name(); } else if (prim->name() == CAST) { + if (cnode->fullname_with_scope().find(OPTIMIZER_SUB_STRING) != std::string::npos) { + // Do not care CASTs from optimizer + return false; + } return true; } return IsParallelCareNode(cnode) && IsSplittableOperator(prim->name()); @@ -409,6 +413,13 @@ Status ConstructCostGraphNodesByUniqueId(const std::vector &all_node } ValueNodePtr prim_anf_node = cnode->input(0)->cast(); if (!IsAutoParallelCareNode(cnode)) { + // Needed by rec_parser + if (ParallelContext::GetInstance()->strategy_search_mode() == RECURSIVE_PROGRAMMING) { + auto prev_cnode = GetInternalOperatorInfo(cnode, prim_anf_node); + if (prev_cnode != nullptr) { + entire_costgraph->add_tuple_getitem(std::make_pair(cnode->UniqueId(), prev_cnode->UniqueId())); + } + } continue; } PrimitivePtr prim = GetValueNode(prim_anf_node); @@ -467,6 +478,13 @@ Status ConstructCostGraphNodesByUniqueIdTC(const std::vector &all_no } ValueNodePtr prim_anf_node = cnode->input(0)->cast(); if (!IsAutoParallelCareNode(cnode)) { + // Needed by rec_parser + if (ParallelContext::GetInstance()->strategy_search_mode() == RECURSIVE_PROGRAMMING) { + auto prev_cnode = GetInternalOperatorInfo(cnode, prim_anf_node); + if (prev_cnode != nullptr) { + entire_costgraph->add_tuple_getitem(std::make_pair(cnode->UniqueId(), prev_cnode->UniqueId())); + } + } continue; } PrimitivePtr prim = GetValueNode(prim_anf_node); @@ -1090,14 +1108,44 @@ std::vector> RecInputTensorNames(const std::map(prim_anf_node); + if (prim->name() == TUPLE_GETITEM || prim->name() == DEPEND) { + auto prev_cnode = cnode->input(1)->cast(); + if (prev_cnode == nullptr || !IsValueNode(prev_cnode->input(0))) { + return nullptr; + } + auto prev_prim = prev_cnode->input(0)->cast()->value()->cast(); + while (prev_prim->name() == TUPLE_GETITEM || prev_prim->name() == DEPEND) { + prev_cnode = prev_cnode->input(1)->cast(); + if (prev_cnode == nullptr || !IsValueNode(prev_cnode->input(0))) { + return nullptr; + } + prev_prim = prev_cnode->input(0)->cast()->value()->cast(); + } + return prev_cnode; + } + return nullptr; +} + Status ParallelStrategyRecSearch(const std::vector &all_nodes, const FuncGraphPtr &root) { - if (ConstructCostGraphNodesByUniqueId(all_nodes, root) == SUCCESS) { - MS_LOG(INFO) << "Constructing nodes for cost graph succeeded. There are " << entire_costgraph->GetOperators().size() - << " operators."; + if (CostModelContext::GetInstance()->is_multi_subgraphs()) { + if (ConstructCostGraphNodesByUniqueIdTC(all_nodes, root) == SUCCESS) { + MS_LOG(INFO) << "Constructing nodes for cost graph succeeded. There are " + << entire_costgraph->GetOperators().size() << " operators."; + } else { + MS_LOG(EXCEPTION) << "Constructing nodes for cost graph failed."; + } } else { - MS_LOG(ERROR) << "Constructing nodes for cost graph failed."; - return FAILED; + if (ConstructCostGraphNodesByUniqueId(all_nodes, root) == SUCCESS) { + MS_LOG(INFO) << "Constructing nodes for cost graph succeeded. There are " + << entire_costgraph->GetOperators().size() << " operators."; + } else { + MS_LOG(EXCEPTION) << "Constructing nodes for cost graph failed."; + } } + ReshapeCostCompute(all_nodes); + auto ops = entire_costgraph->GetOperators(); std::vector> input_tensor_names = entire_costgraph->get_inputs_tensor_name_list(); auto tuple_getitem_list = entire_costgraph->get_tuple_getitem_list(); diff --git a/mindspore/ccsrc/parallel/step_auto_parallel.h b/mindspore/ccsrc/parallel/step_auto_parallel.h index fff9dfa4c3..c923e5770f 100644 --- a/mindspore/ccsrc/parallel/step_auto_parallel.h +++ b/mindspore/ccsrc/parallel/step_auto_parallel.h @@ -57,6 +57,8 @@ Status ParallelStrategyRecSearch(const std::vector &all_nodes, const std::vector> RecInputTensorNames(const std::map::iterator &it, std::vector> input_tensor_names); + +CNodePtr GetInternalOperatorInfo(const CNodePtr &cnode, const ValueNodePtr &prim_anf_node); } // namespace parallel } // namespace mindspore #endif // PARALLEL_STEP_AUTO_PARALLEL_H_ diff --git a/mindspore/ccsrc/parallel/step_parallel.cc b/mindspore/ccsrc/parallel/step_parallel.cc index fd09b5e0b5..fc7b48d267 100644 --- a/mindspore/ccsrc/parallel/step_parallel.cc +++ b/mindspore/ccsrc/parallel/step_parallel.cc @@ -534,6 +534,10 @@ std::vector ReplaceOpInput(const Operator &replace_op, const std::st MS_LOG(EXCEPTION) << "Failure: " << node->ToString() << " size is smaller than 2"; } std::vector replace_input = {NewValueNode(pyop_instance), node->input(1)}; + auto prim = GetValueNode(node->input(0)); + if (prim->name() == GATHERV2 || prim->name() == SPARSE_GATHERV2) { + replace_input = {NewValueNode(pyop_instance), node->input(1), node->input(2)}; + } if (!params.empty()) { Param param_first = *(params.begin()); int32_t first_position = param_first.second; @@ -1371,11 +1375,19 @@ void SetClonedTensorShapeForOptimizer(const FuncGraphPtr &root) { void SetVirtualDatasetStrategy(const CNodePtr &node) { MS_EXCEPTION_IF_NULL(node); + MS_EXCEPTION_IF_NULL(ParallelContext::GetInstance()); + bool full_batch = ParallelContext::GetInstance()->full_batch(); + PrimitivePtr prim = GetValueNode(node->input(0)); MS_EXCEPTION_IF_NULL(prim); if (prim->name() == VIRTUAL_DATA_SET) { CheckGlobalDeviceManager(); - int32_t dev_num = SizeToInt(g_device_manager->GetDeviceListByStageId(0).size()); + int32_t dev_num; + if (full_batch) { + dev_num = 1; + } else { + dev_num = SizeToInt(g_device_manager->GetDeviceListByStageId(0).size()); + } auto attrs_temp = prim->attrs(); std::vector shape_list = ExtractShape(node); if (shape_list.empty()) { @@ -1864,11 +1876,15 @@ void HandleDropoutNode(const OperatorInfoPtr &distribute_operator, const CNodePt DropoutDoMaskInfoPtr dropout_do_mask = std::dynamic_pointer_cast(distribute_operator); MS_EXCEPTION_IF_NULL(dropout_do_mask); - Operator replace_op = dropout_do_mask->GetDropoutGenMaskReplaceOp(cnode); + std::vector replace_op = dropout_do_mask->GetDropoutGenMaskReplaceOp(cnode); + if (replace_op.empty()) { + MS_LOG(DEBUG) << "No need to replace dropout_gen_mask"; + return; + } if (cnode->inputs().size() != DROPOUT_DO_MASK_CNODE_INPUT_SIZE) { MS_LOG(EXCEPTION) << "The size of drop out do mask cnode's input is not " << DROPOUT_DO_MASK_CNODE_INPUT_SIZE; } - ReplaceOneOp(replace_op, cnode->input(DROPOUT_GEN_MASK_INDEX)->cast()); + ReplaceOneOp(replace_op[0], cnode->input(DROPOUT_GEN_MASK_INDEX)->cast()); } void HandleSpecialNode(const OperatorInfoPtr &distribute_operator, const CNodePtr &cnode) { @@ -2254,10 +2270,10 @@ bool StepParallel(const FuncGraphPtr &root, const opt::OptimizerPtr &optimizer) (root->has_flag(SEMI_AUTO_PARALLEL_RUN_ONCE_ONLY))) { if (!root->has_flag(CHECK_SET_STRATEGY_VALID_ONCE_ONLY)) { if (HasStrategy(root)) { - MS_LOG(INFO) << "strategies ignored in " << parallel_mode + MS_LOG(INFO) << "Strategies ignored in " << parallel_mode << ", set_strategy() only valid in [semi_]auto_parallel."; } - root->flags()[CHECK_SET_STRATEGY_VALID_ONCE_ONLY] = true; + root->set_flag(CHECK_SET_STRATEGY_VALID_ONCE_ONLY, true); } return changes; @@ -2314,11 +2330,11 @@ bool StepParallel(const FuncGraphPtr &root, const opt::OptimizerPtr &optimizer) DumpGraph(root, std::string(STEP_PARALLEL_END)); // step parallel only run once - root->flags()[SEMI_AUTO_PARALLEL_RUN_ONCE_ONLY] = true; + root->set_flag(SEMI_AUTO_PARALLEL_RUN_ONCE_ONLY, true); res->results()[pipeline::kStepParallelGraph] = root; // in auto parallel mode, no need to check if stategies set - root->flags()[CHECK_SET_STRATEGY_VALID_ONCE_ONLY] = true; + root->set_flag(CHECK_SET_STRATEGY_VALID_ONCE_ONLY, true); (void)gettimeofday(&end_time, nullptr); uint64_t time = kUSecondInSecond * static_cast(end_time.tv_sec - start_time.tv_sec); diff --git a/mindspore/ccsrc/pipeline/action.cc b/mindspore/ccsrc/pipeline/action.cc index 3e87000be7..7d56551ff0 100644 --- a/mindspore/ccsrc/pipeline/action.cc +++ b/mindspore/ccsrc/pipeline/action.cc @@ -38,6 +38,7 @@ #include "pipeline/remove_value_node_dup.h" #include "optimizer/optimizer.h" #include "vm/transform.h" +#include "parse/python_adapter.h" namespace mindspore { namespace pipeline { @@ -228,6 +229,9 @@ bool AbstractSpecializeAction(const ResourcePtr &res) { if (param_node->has_default()) { auto param_value = std::dynamic_pointer_cast(param_node->default_param()); AbstractBasePtr ptr = abstract::FromValue(parse::data_converter::PyDataToValue(param_value->value()), true); + auto sparse_grad = + py::cast(parse::python_adapter::GetPyObjAttr(param_value->value(), "sparse_grad")); + ptr->set_sparse_grad(sparse_grad); parallel::ParallelParameterContextRestoreInNoTraining(func_graph, param_node, ptr); args_spec.push_back(ptr); @@ -276,8 +280,14 @@ bool GeOptimizeAction(const ResourcePtr &res) { return OptimizeAction(res, kGePa bool VmOptimizeAction(const ResourcePtr &res) { return OptimizeAction(res, kVmPasses); } +bool PynativeOptimizeAction(const ResourcePtr &res) { return OptimizeAction(res, kPynativePasses); } + static bool IsCtrlSink() { auto ms_ctx = MsContext::GetInstance(); + if (ms_ctx->execution_mode() != kGraphMode) { + return false; + } + std::string device_target = ms_ctx->device_target(); if (device_target != kAscendDevice) { return false; @@ -287,15 +297,9 @@ static bool IsCtrlSink() { return false; } - const char *enable_ctrl_sink = std::getenv("ENABLE_CTRL_SINK"); - if (enable_ctrl_sink == nullptr) { - return false; - } - std::string enable_ctrl_sink_str(enable_ctrl_sink); - if (enable_ctrl_sink_str == "0") { + if (!ms_ctx->is_multi_graph_sink()) { return false; } - return true; } @@ -305,12 +309,24 @@ bool TaskEmitAction(const ResourcePtr &res) { } FuncGraphPtr func_graph = res->func_graph(); auto bc_ptr = res->results()[kBackend].cast(); + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + if (CompileGraphs::ContainMixedTarget(func_graph)) { + bc_ptr->set_is_multi_graph_sink(false); + context_ptr->set_is_multi_graph_sink(false); + context_ptr->set_loop_sink_flag(false); + } else if (context_ptr->execution_mode() != kPynativeMode) { + std::string device_target = context_ptr->device_target(); + if (device_target == kAscendDevice) { + bc_ptr->set_is_multi_graph_sink(true); + context_ptr->set_is_multi_graph_sink(true); + } + } if (IsCtrlSink()) { res->results()[kOutput] = bc_ptr->CompileGraph(NOT_NULL(func_graph)); return true; } - std::vector cut_list = compile::nonlinear_ops; if (bc_ptr->name() == kMsConvert) { cut_list = compile::GetMsNonlinearOps(); @@ -329,7 +345,6 @@ bool ExecuteAction(const ResourcePtr &res) { if (!res->results()[kOutput].is()) { MS_LOG(EXCEPTION) << "Execute args error"; } - auto graph_id = res->results()[kOutput].cast(); std::shared_ptr bc_ptr = res->results()[kBackend].cast>(); std::shared_ptr msbc_ptr = std::dynamic_pointer_cast(bc_ptr); diff --git a/mindspore/ccsrc/pipeline/action.h b/mindspore/ccsrc/pipeline/action.h index 8a651c0038..eed1307872 100644 --- a/mindspore/ccsrc/pipeline/action.h +++ b/mindspore/ccsrc/pipeline/action.h @@ -35,6 +35,7 @@ bool SymbolResolveAction(const ResourcePtr &res); bool AbstractSpecializeAction(const ResourcePtr &res); bool GeOptimizeAction(const ResourcePtr &res); bool VmOptimizeAction(const ResourcePtr &res); +bool PynativeOptimizeAction(const ResourcePtr &res); bool TaskEmitAction(const ResourcePtr &res); bool ExecuteAction(const ResourcePtr &res); diff --git a/mindspore/ccsrc/pipeline/init.cc b/mindspore/ccsrc/pipeline/init.cc index 1b9666a400..7025447a29 100644 --- a/mindspore/ccsrc/pipeline/init.cc +++ b/mindspore/ccsrc/pipeline/init.cc @@ -17,6 +17,7 @@ #include #include #include "kernel/oplib/oplib.h" +#include "kernel/oplib/oploader.h" #include "pipeline/pipeline.h" #include "operator/composite/composite.h" #include "ir/signature.h" @@ -26,6 +27,7 @@ #include "pipeline/parse/python_adapter.h" #include "utils/summary/event_writer.h" #include "utils/config_manager.h" +#include "utils/mpi/mpi_config.h" #include "parallel/context.h" #include "parallel/device_manager.h" #include "parallel/costmodel_context.h" @@ -44,6 +46,7 @@ using PrimitivePy = mindspore::PrimitivePy; using MetaFuncGraph = mindspore::MetaFuncGraph; using EventWriter = mindspore::summary::EventWriter; using OpLib = mindspore::kernel::OpLib; +using OpInfoLoaderPy = mindspore::kernel::OpInfoLoaderPy; using ParallelContext = mindspore::parallel::ParallelContext; using CostModelContext = mindspore::parallel::CostModelContext; @@ -76,6 +79,8 @@ PYBIND11_MODULE(_c_expression, m) { "Get CNode Strategy Dictionary.") .def("get_allreduce_fusion", &ExecutorPy::GetAllreduceFusion, py::arg("phase") = py::str("train"), "Get Allreduce Fusion Dictionary.") + .def("fetch_info_for_quant_export", &ExecutorPy::FetchInfoForQuantExport, py::arg("phase") = py::str("train"), + "Fetch the inputs of Conv or Matmul for quant export.") .def("build_data_graph", &ExecutorPy::BuildGraph, py::arg("build_params"), py::arg("phase") = py::str("train"), py::arg("broadcast_params") = py::dict(), "Build data graph.") .def("has_compiled", &ExecutorPy::HasCompiled, py::arg("phase") = py::str(""), "get if cell compiled.") @@ -143,7 +148,18 @@ PYBIND11_MODULE(_c_expression, m) { .def("get_profiling_options", &mindspore::MsContext::profiling_options, "Get options to profiling.") .def("set_profiling_options", &mindspore::MsContext::set_profiling_options, "Set options to profiling.") .def("get_check_bprop_flag", &mindspore::MsContext::check_bprop_flag, "Get whether to check bprop.") - .def("set_check_bprop_flag", &mindspore::MsContext::set_check_bprop_flag, "Set whether to check bprop."); + .def("set_check_bprop_flag", &mindspore::MsContext::set_check_bprop_flag, "Set whether to check bprop.") + .def("get_max_device_memory", &mindspore::MsContext::max_device_memory, "Get deivce memory max size.") + .def("set_max_device_memory", &mindspore::MsContext::set_max_device_memory, "Set deivce memory max size.") + .def("set_print_file_path", &mindspore::MsContext::set_print_file_path, "Set path to print.") + .def("set_enable_graph_kernel", &mindspore::MsContext::set_enable_graph_kernel, + "Set the GraphKernel switch to on or off.") + .def("get_enable_graph_kernel", &mindspore::MsContext::enable_graph_kernel, "Get the value of GraphKernel switch."); + + (void)py::class_>(m, "MpiConfig") + .def_static("get_instance", &mindspore::MpiConfig::GetInstance, "Get mpi config instance.") + .def("get_enable_mpi", &mindspore::MpiConfig::enable_mpi, "Get whether enable mpi.") + .def("set_enable_mpi", &mindspore::MpiConfig::set_enable_mpi, "Set whether to enable mpi."); (void)py::class_>(m, "AutoParallelContext") .def_static("get_instance", &ParallelContext::GetInstance, "Get auto parallel context instance.") @@ -187,6 +203,8 @@ PYBIND11_MODULE(_c_expression, m) { "Set strategy checkpoint save file.") .def("get_strategy_ckpt_load_file", &ParallelContext::strategy_ckpt_load_file, "Get strategy checkpoint load file.") .def("get_strategy_ckpt_save_file", &ParallelContext::strategy_ckpt_save_file, "Get strategy checkpoint save file.") + .def("set_full_batch", &ParallelContext::set_full_batch, "Set whether load full batch on each device.") + .def("get_full_batch", &ParallelContext::full_batch, "Get whether load full batch on each device.") .def("reset", &ParallelContext::Reset, "Reset auto parallel context."); (void)py::class_>(m, "CostModelContext") @@ -312,4 +330,8 @@ PYBIND11_MODULE(_c_expression, m) { "Finalize gpu collective communication mode."); #endif + + (void)py::class_>(m, "OpInfoLoaderPy") + .def(py::init()) + .def("get_all_ops_info", &OpInfoLoaderPy::GetAllOpsInfo, "get all ops info."); } diff --git a/mindspore/ccsrc/pipeline/parse/data_converter.cc b/mindspore/ccsrc/pipeline/parse/data_converter.cc index 5dbb8bc453..330d03d11c 100644 --- a/mindspore/ccsrc/pipeline/parse/data_converter.cc +++ b/mindspore/ccsrc/pipeline/parse/data_converter.cc @@ -32,6 +32,7 @@ #include "utils/symbolic.h" #include "utils/context/ms_context.h" #include "debug/trace.h" +#include "optimizer/ad/grad.h" namespace mindspore { namespace parse { @@ -40,6 +41,35 @@ using TensorPtr = mindspore::tensor::TensorPtr; using MetaTensor = mindspore::tensor::MetaTensor; using MetaTensorPtr = mindspore::tensor::MetaTensorPtr; +FuncGraphPtr ConvertToBpropCut(const py::object &obj) { + std::vector results = data_converter::GetObjKey(obj); + std::string obj_key = results[0]; + py::function bprop_func = py::getattr(obj, CUSTOM_BPROP_NAME); + + auto bprop_graph = std::make_shared(); + std::vector outputs; + + auto fake_bprop = std::make_shared("bprop_cut", py::object()); + fake_bprop->set_hook(bprop_func); + (void)fake_bprop->AddAttr(CUSTOM_BPROP_NAME, MakeValue(true)); + outputs.push_back(NewValueNode(fake_bprop)); + + py::object code_obj = py::getattr(bprop_func, "__code__"); + size_t inputs_num = py::cast(py::getattr(code_obj, "co_argcount")) - 3; + for (size_t i = 0; i < inputs_num; ++i) { + auto param = bprop_graph->add_parameter(); + outputs.push_back(param); + } + auto p1 = bprop_graph->add_parameter(); + auto p2 = bprop_graph->add_parameter(); + outputs.push_back(p1); + outputs.push_back(p2); + + bprop_graph->set_output(bprop_graph->NewCNode(outputs)); + data_converter::SetObjGraphValue(obj_key, bprop_graph); + return bprop_graph; +} + namespace { bool ConvertTuple(const py::object &obj, ValuePtr *const data, bool use_signature) { MS_LOG(DEBUG) << "Converting python tuple"; @@ -208,33 +238,51 @@ bool ConvertTensor(const py::object &obj, ValuePtr *const data) { return true; } -FuncGraphPtr ConvertToBpropCut(py::object obj) { - std::vector results = data_converter::GetObjKey(obj); - std::string obj_key = results[0]; - py::function bprop_func = py::getattr(obj, "bprop"); - - FuncGraphPtr bprop_graph = std::make_shared(); - std::vector outputs; - - auto fake_bprop = std::make_shared("bprop_cut"); - fake_bprop->set_hook(bprop_func); - (void)fake_bprop->AddAttr("bprop", MakeValue(true)); - outputs.push_back(NewValueNode(fake_bprop)); +bool ConvertSlice(const py::object &obj, ValuePtr *const data) { + MS_LOG(DEBUG) << "Converting slice object"; + + py::slice slice_obj = obj.cast(); + auto convert_func = [obj](std::string attr) -> ValuePtr { + auto py_attr = py::getattr(obj, attr.c_str()); + if (py::isinstance(py_attr)) { + return kNone; + } else if (py::isinstance(py_attr)) { + int value = py::cast(py_attr); + return MakeValue(value); + } else { + MS_LOG(EXCEPTION) << "Slice should contain only int or none"; + } + }; + ValuePtr start = convert_func("start"); + ValuePtr stop = convert_func("stop"); + ValuePtr step = convert_func("step"); + *data = std::make_shared(start, stop, step); + return true; +} - py::object code_obj = py::getattr(bprop_func, "__code__"); - size_t inputs_num = py::cast(py::getattr(code_obj, "co_argcount")) - 3; - for (size_t i = 0; i < inputs_num; ++i) { - auto param = bprop_graph->add_parameter(); - outputs.push_back(param); +bool ConvertCellObjToFuncGraph(py::object obj, ValuePtr *const data) { + FuncGraphPtr func_graph = ConvertToFuncGraph(obj); + if (func_graph == nullptr) { + MS_LOG(ERROR) << "Parse resolve function error."; + return false; } - auto p1 = bprop_graph->add_parameter(); - auto p2 = bprop_graph->add_parameter(); - outputs.push_back(p1); - outputs.push_back(p2); - - bprop_graph->set_output(bprop_graph->NewCNode(outputs)); - data_converter::SetObjGraphValue(obj_key, bprop_graph); - return bprop_graph; + // if the cell object has specified bprop, it has user-defined bprop function parse and record it + if (py::hasattr(obj, CUSTOM_BPROP_NAME)) { + FuncGraphPtr bprop_graph = nullptr; + bool enable_bprop_debug = py::cast(py::getattr(obj, "bprop_debug")); + if (enable_bprop_debug) { + bprop_graph = ConvertToBpropCut(obj); + } else { + bprop_graph = ConvertToFuncGraph(obj, PYTHON_MOD_GET_BPROP_METHOD); + } + if (bprop_graph != nullptr) { + (void)func_graph->transforms().insert(std::make_pair(CUSTOM_BPROP_NAME, FuncGraphTransform(bprop_graph))); + (void)bprop_graph->transforms().insert(std::make_pair("primal", FuncGraphTransform(func_graph))); + func_graph->set_flag(FUNC_GRAPH_FLAG_DEFER_INLINE, true); + } + } + *data = func_graph; + return true; } bool ConvertOtherObj(py::object obj, ValuePtr *const data) { @@ -261,32 +309,12 @@ bool ConvertOtherObj(py::object obj, ValuePtr *const data) { // Create the namespace for common class instance // When the obj is Cell, default parse the 'construct' if (data_converter::IsCellInstance(obj)) { - FuncGraphPtr func_graph = ConvertToFuncGraph(obj); - if (func_graph == nullptr) { - MS_LOG(ERROR) << "Parse resolve function error."; - return false; - } - // if the cell object has specified bprop, it has user-defined bprop function parse and record it - if (py::hasattr(obj, "bprop")) { - FuncGraphPtr bprop_graph = nullptr; - bool enable_bprop_debug = py::cast(py::getattr(obj, "bprop_debug")); - if (enable_bprop_debug) { - bprop_graph = ConvertToBpropCut(obj); - } else { - bprop_graph = ConvertToFuncGraph(obj, PYTHON_MOD_GET_BPROP_METHOD); - } - if (bprop_graph != nullptr) { - (void)func_graph->transforms().insert(std::make_pair("bprop", FuncGraphTransform(bprop_graph))); - (void)bprop_graph->transforms().insert(std::make_pair("primal", FuncGraphTransform(func_graph))); - func_graph->set_flags(FUNC_GRAPH_FLAG_DEFER_INLINE, true); - } - } - *data = func_graph; - } else { - py::module mod = python_adapter::GetPyModule(PYTHON_MOD_PARSE_MODULE); - py::object namespace_var = python_adapter::CallPyModFn(mod, PYTHON_MOD_GET_MEMBER_NAMESPACE_SYMBOL, obj); - *data = std::make_shared(RESOLVE_NAMESPACE_NAME_CLASS_MEMBER, namespace_var); + return ConvertCellObjToFuncGraph(obj, data); } + + py::module mod = python_adapter::GetPyModule(PYTHON_MOD_PARSE_MODULE); + py::object namespace_var = python_adapter::CallPyModFn(mod, PYTHON_MOD_GET_MEMBER_NAMESPACE_SYMBOL, obj); + *data = std::make_shared(RESOLVE_NAMESPACE_NAME_CLASS_MEMBER, namespace_var); return true; } MS_LOG(ERROR) << "Resolve type is invalid " << ((std::string)py::str(obj)); @@ -315,6 +343,10 @@ bool ConvertData(const py::object &obj, ValuePtr *const data, bool use_signature converted = std::make_shared(py::cast(obj)); } else if (py::isinstance(obj)) { ret = ConvertDict(obj, &converted, use_signature); + } else if (py::isinstance(obj)) { + ret = ConvertSlice(obj, &converted); + } else if (py::isinstance(obj)) { + converted = kEllipsis; } else if (py::isinstance(obj)) { ret = ConvertTuple(obj, &converted, use_signature); } else if (py::hasattr(obj, PYTHON_CELL_AS_LIST)) { @@ -338,6 +370,9 @@ bool ConvertData(const py::object &obj, ValuePtr *const data, bool use_signature } else if (py::hasattr(obj, PYTHON_ENVINSTANCE_FLAG)) { std::shared_ptr env = obj.cast>(); converted = env; + } else if (py::hasattr(obj, "__parameter__")) { + auto to_convert = py::cast(python_adapter::GetPyObjAttr(obj, "default_input")); + ret = ConvertData(to_convert, &converted); } else { ret = ConvertOtherObj(obj, &converted); } diff --git a/mindspore/ccsrc/pipeline/parse/data_converter.h b/mindspore/ccsrc/pipeline/parse/data_converter.h index a8918fa60c..0165b55363 100644 --- a/mindspore/ccsrc/pipeline/parse/data_converter.h +++ b/mindspore/ccsrc/pipeline/parse/data_converter.h @@ -51,6 +51,7 @@ void ClearObjectCache(); } // namespace data_converter ClassPtr ParseDataClass(const py::object &cls_obj); +FuncGraphPtr ConvertToBpropCut(const py::object &obj); void CleanDataClassToClassMap(); diff --git a/mindspore/ccsrc/pipeline/parse/function_block.cc b/mindspore/ccsrc/pipeline/parse/function_block.cc index 66534390a0..fbeeba94a1 100644 --- a/mindspore/ccsrc/pipeline/parse/function_block.cc +++ b/mindspore/ccsrc/pipeline/parse/function_block.cc @@ -265,6 +265,13 @@ CNodePtr FunctionBlock::ForceToBoolNode(const AnfNodePtr &cond) { return op_apply_node; } +CNodePtr FunctionBlock::ForceToWhileCond(const AnfNodePtr &cond) { + TraceManager::DebugTrace(std::make_shared(cond->debug_info())); + CNodePtr op_apply_node = func_graph()->NewCNode({MakeResolveOperation("while_cond"), cond}); + TraceManager::EndTrace(); + return op_apply_node; +} + // Perform a jump from this block to target block void FunctionBlock::Jump(const FunctionBlockPtr &target_block, AnfNodePtr node) { if (func_graph()->get_return() != nullptr) { @@ -315,12 +322,10 @@ void FunctionBlock::InsertDependItemsBeforeReturn() { ValueNodePtr make_tuple_op = NewValueNode(prim::kPrimMakeTuple); ValueNodePtr depend_op = NewValueNode(prim::kPrimDepend); - ValueNodePtr get_ref_origin_op = NewValueNode(prim::kPrimGetRefOrigin); ValueNodePtr stop_gradient_op = NewValueNode(prim::kPrimStopGradient); const std::string primitive_name("assign"); const std::string module_name("mindspore.ops.functional"); - ValueNodePtr assign_op = NewValueNode(prim::GetPythonOps(primitive_name, module_name)); - + ValueNodePtr assign_op = NewValueNode(prim::GetPythonOps(primitive_name, module_name, true)); if (state_assign_.size() == 0 && auto_depends_.size() == 0) { return; } @@ -329,8 +334,7 @@ void FunctionBlock::InsertDependItemsBeforeReturn() { vec_states.emplace_back(make_tuple_op); for (auto &item : state_assign_) { auto source = ReadVariable(item.second); - auto origin = func_graph()->NewCNode({get_ref_origin_op, item.first}); - auto assign = func_graph()->NewCNode({assign_op, origin, source}); + auto assign = func_graph()->NewCNode({assign_op, item.first, source}); MS_LOG(INFO) << "SetState read " << item.first->ToString() << ", " << item.second; vec_states.emplace_back(assign); } diff --git a/mindspore/ccsrc/pipeline/parse/function_block.h b/mindspore/ccsrc/pipeline/parse/function_block.h index e7842903ee..346061430d 100644 --- a/mindspore/ccsrc/pipeline/parse/function_block.h +++ b/mindspore/ccsrc/pipeline/parse/function_block.h @@ -28,6 +28,7 @@ #include #include "pipeline/parse/parse_base.h" #include "utils/log_adapter.h" +#include "utils/ordered_map.h" namespace mindspore { namespace parse { @@ -55,6 +56,7 @@ class FunctionBlock : public std::enable_shared_from_this { // A block is matured if all its predecessors is generated void Mature(); CNodePtr ForceToBoolNode(const AnfNodePtr &cond); + CNodePtr ForceToWhileCond(const AnfNodePtr &cond); void Jump(const FunctionBlockPtr &block, AnfNodePtr node); AnfNodePtr SearchReplaceNode(const std::string &var, const ParameterPtr &phi); void ConditionalJump(AnfNodePtr condNode, const FunctionBlockPtr &trueBlock, const FunctionBlockPtr &falseBlock); @@ -99,7 +101,7 @@ class FunctionBlock : public std::enable_shared_from_this { std::unordered_map removable_phis_; // set state nodes need to insert before function return nodes. - std::unordered_map state_assign_; + OrderedMap state_assign_; // hold declared global variables in function std::set global_vars_; diff --git a/mindspore/ccsrc/pipeline/parse/parse.cc b/mindspore/ccsrc/pipeline/parse/parse.cc index c6e5d3713a..6d5c28c98c 100644 --- a/mindspore/ccsrc/pipeline/parse/parse.cc +++ b/mindspore/ccsrc/pipeline/parse/parse.cc @@ -67,7 +67,7 @@ AnfNodePtr GetMixedPrecisionCastHelp(const FuncGraphPtr &func_graph, const AnfNo } else { return param; } - auto cast_helper = prim::GetPythonOps("_mp_cast_helper", "mindspore.ops.composite.base"); + auto cast_helper = prim::kPrimMixedPrecisionCast; auto cast = func_graph->NewCNode({NewValueNode(cast_helper), NewValueNode(dst_type), param}); return cast; } @@ -967,6 +967,7 @@ FunctionBlockPtr Parser::ParseWhile(const FunctionBlockPtr &block, const py::obj py::object test_node = python_adapter::GetPyObjAttr(node, "test"); AnfNodePtr condition_node = ParseExprNode(header_block, test_node); + condition_node = header_block->ForceToWhileCond(condition_node); body_block->Mature(); header_block->ConditionalJump(condition_node, body_block, after_block); @@ -1175,11 +1176,11 @@ void Parser::HandleAssignClassMember(const FunctionBlockPtr &block, const py::ob auto filename = location[0].cast(); auto line_no = location[1].cast(); // Now only support the self.xxx = yyy, where self.xxx must be a defined Parameter type - if (!py::hasattr(ast()->obj(), attr_name.c_str())) { + if (!py::hasattr(ast()->obj(), common::SafeCStr(attr_name))) { MS_EXCEPTION(TypeError) << "'" << var_name << "' should be a Parameter, but not defined, at " << filename << ":" << line_no; } - auto obj = ast()->obj().attr(attr_name.c_str()); + auto obj = ast()->obj().attr(common::SafeCStr(attr_name)); auto obj_type = obj.attr("__class__").attr("__name__"); if (!py::hasattr(obj, "__parameter__")) { MS_EXCEPTION(TypeError) << "'" << var_name << "' should be a Parameter, but got '" @@ -1205,8 +1206,18 @@ void Parser::HandleAssignSubscript(const FunctionBlockPtr &block, const py::obje // getitem apply should return the sequence data structure itself std::string var_name = ""; if (ast_->IsClassMember(value_obj)) { - var_name = "self."; - (void)var_name.append(value_obj.attr("attr").cast()); + std::string attr_name = value_obj.attr("attr").cast(); + var_name = "self." + attr_name; + if (!py::hasattr(ast()->obj(), common::SafeCStr(attr_name))) { + MS_EXCEPTION(TypeError) << "'" << var_name << "' was not defined in the class '__init__' function."; + } + auto obj = ast()->obj().attr(common::SafeCStr(attr_name)); + auto obj_type = obj.attr("__class__").attr("__name__"); + if (!py::hasattr(obj, "__parameter__")) { + MS_EXCEPTION(TypeError) << "'" << var_name << "' should be a Parameter, but got '" + << py::str(obj).cast() << "' with type '" + << py::str(obj_type).cast() << "'."; + } } else { var_name = value_obj.attr("id").cast(); } @@ -1231,7 +1242,7 @@ void Parser::WriteAssignVars(const FunctionBlockPtr &block, const py::object &ta } } -// process a assign statement , such as a =b, a,b = tup +// process a assign statement, such as a =b, a,b = tup FunctionBlockPtr Parser::ParseAssign(const FunctionBlockPtr &block, const py::object &node) { MS_LOG(DEBUG) << "Process ast assgin"; py::object value_object = python_adapter::GetPyObjAttr(node, "value"); @@ -1437,15 +1448,23 @@ bool ParseAst::UpdateFuncGraphFlags(const FuncGraphPtr &func_graph) { } py::dict flags = python_adapter::GetPyObjAttr(obj_, PYTHON_EXTERN_MINDSPORE_FLAG); for (auto &item : flags) { - if (!py::isinstance(item.first) || !py::isinstance(item.second)) { + if (!py::isinstance(item.first)) { MS_LOG(ERROR) << "Type error in flags dict convert"; return false; } auto name = py::cast(item.first); - auto value = py::cast(item.second); - MS_LOG(DEBUG) << "Flag name: " << name << ". Value: " << value; - - func_graph->set_flags(name, value); + if (py::isinstance(item.second)) { + auto value = py::cast(item.second); + MS_LOG(DEBUG) << "Flag name: " << name << ". Value: " << value; + func_graph->set_flag(name, value); + } else if (py::isinstance(item.second)) { + auto value = py::cast(item.second); + MS_LOG(DEBUG) << "Flag name: " << name << ". Value: " << value; + func_graph->set_attr(name, MakeValue(value)); + } else { + MS_LOG(ERROR) << "Type error in flags/attrs dict convert"; + return false; + } } return true; diff --git a/mindspore/ccsrc/pipeline/parse/parse.h b/mindspore/ccsrc/pipeline/parse/parse.h index 969effbd18..0a56ccaed9 100644 --- a/mindspore/ccsrc/pipeline/parse/parse.h +++ b/mindspore/ccsrc/pipeline/parse/parse.h @@ -223,8 +223,8 @@ class Parser { FunctionBlockPtr block = std::make_shared(parse); // In order to keep effect order in the sub-graphs which generated by control flow. // We copy the flags from the top graph to the sub-graphs. - if (func_graph_ && !func_graph_->flags().empty()) { - block->func_graph()->set_flags(func_graph_->flags()); + if (func_graph_ && !func_graph_->attrs().empty()) { + block->func_graph()->set_attrs(func_graph_->attrs()); } func_block_list_.push_back(block); return block; diff --git a/mindspore/ccsrc/pipeline/parse/parse_base.h b/mindspore/ccsrc/pipeline/parse/parse_base.h index ef1aeef55c..4961ab78c0 100644 --- a/mindspore/ccsrc/pipeline/parse/parse_base.h +++ b/mindspore/ccsrc/pipeline/parse/parse_base.h @@ -60,6 +60,7 @@ const char PYTHON_MOD_RESOLVE_FUNCTION[] = "resolve_symbol"; const char PYTHON_MOD_RESOLVE_GET_OBJ_KEY[] = "get_object_key"; const char PYTHON_MOD_PARSE_CHECK_IS_CLASS_MEMBER[] = "is_class_member"; const char PYTHON_MOD_RESOLVE_GET_OBJ_TYPE[] = "get_obj_type"; +const char PYTHON_MOD_GET_OBJ_ID[] = "get_obj_id"; const char PYTHON_MOD_GET_CLASS_INSTANCE_TYPE[] = "get_class_instance_type"; const char PYTHON_MOD_CREATE_OBJ_INSTANCE[] = "create_obj_instance"; const char PYTHON_MOD_GET_DATACLASS_ATTRS[] = "get_dataclass_attributes"; @@ -83,6 +84,7 @@ const char PYTHON_PARSE_GET_SCOPE_NAME[] = "get_scope_name"; const char PYTHON_PARSE_CLASS_SLICE[] = "create_slice_obj"; const char PYTHON_PARSE_CLASS_ELLIPSIS[] = "create_ellipsis_obj"; +const char PYTHON_MOD_GET_DEFAULT_INPUT[] = "get_default_input"; // define the common name const char NAMED_PRIMITIVE_ITER[] = "iter"; @@ -107,6 +109,7 @@ const char PYTHON_EXTERN_MINDSPORE_FLAG[] = "_mindspore_flags"; // define the parse constant const int MAX_COMPARISON_OPS_SUPPORTED = 1; +const char CUSTOM_BPROP_NAME[] = "bprop"; // define the Namespace name const char RESOLVE_NAMESPACE_NAME_AST[] = "Ast"; // for ast type namespace diff --git a/mindspore/ccsrc/pipeline/pass.cc b/mindspore/ccsrc/pipeline/pass.cc index 0a5af9e3df..94063fb780 100644 --- a/mindspore/ccsrc/pipeline/pass.cc +++ b/mindspore/ccsrc/pipeline/pass.cc @@ -25,12 +25,14 @@ #include #include "ir/func_graph_cloner.h" +#include "debug/anf_ir_utils.h" #include "pipeline/parse/parse_base.h" #include "pipeline/parse/data_converter.h" #include "pipeline/resource.h" #include "pipeline/validator.h" #include "optimizer/optimizer.h" #include "optimizer/cse.h" +#include "optimizer/graph_kernel_reuse.h" #include "optimizer/clean.h" #include "optimizer/irpass.h" #include "optimizer/control_depend.h" @@ -38,6 +40,7 @@ #include "parallel/step_auto_parallel.h" #include "parallel/allreduce_fusion/step_allreduce_fusion.h" #include "utils/any.h" +#include "utils/log_adapter.h" namespace mindspore { namespace pipeline { @@ -79,15 +82,9 @@ OptPassGroupMap GetOptPassesA(const opt::irpass::OptimizeIRPassLib &irpass) { // Specialization irpass.specialize_transform_, - // Arithmetic simplifications - irpass.arithmetic_simplify_, - irpass.addn_zero_filter_, - // Miscellaneous irpass.item_tuple_eliminate_, - irpass.env_get_set_item_, - irpass.new_env_get_item_, - irpass.add_env_get_item_, + irpass.env_get_item_eliminate_, irpass.cast_eliminate_, irpass.reshape_eliminate_, irpass.reduce_eliminate_, @@ -95,18 +92,26 @@ OptPassGroupMap GetOptPassesA(const opt::irpass::OptimizeIRPassLib &irpass) { irpass.transpose_eliminate_, irpass.minmaximum_grad_, irpass.get_make_ref_eliminate_, + + // Arithmetic simplifications + irpass.arithmetic_simplify_, + irpass.addn_zero_filter_, + irpass.adjust_all_reduce_mul_add_, + + // Safe inlining + irpass.inline_, }); opt::OptPassConfig a_2 = opt::OptPassConfig({ irpass.merge_addn_, irpass.float_tuple_getitem_switch_, irpass.float_env_getitem_switch_, - irpass.incorporate_getitem_, - irpass.incorporate_getitem_switch_, + irpass.incorporate_getitem_set_, irpass.incorporate_call_, irpass.incorporate_call_switch_, irpass.incorporate_env_getitem_, irpass.incorporate_env_getitem_switch_, irpass.new_env_get_item_, + irpass.depend_value_elim_, }); opt::OptPassConfig a_3 = opt::OptPassConfig({ irpass.same_eliminate_, @@ -144,12 +149,12 @@ OptPassGroupMap GetOptPassesB(const opt::irpass::OptimizeIRPassLib &irpass) { irpass.reset_defer_inline_, irpass.inline_, irpass.special_op_eliminate_, - irpass.stop_gradient_eliminate_, irpass.get_make_ref_eliminate_, }); opt::OptPassConfig b_2 = opt::OptPassConfig({ irpass.replace_refkey_by_param_, irpass.make_ref_eliminate_, + irpass.get_ref_param_eliminate_, }); OptPassGroupMap map({ {"b_1", b_1}, @@ -160,6 +165,40 @@ OptPassGroupMap GetOptPassesB(const opt::irpass::OptimizeIRPassLib &irpass) { return map; } +OptPassGroupMap GetOptPassesGraphKernelA(const opt::irpass::OptimizeIRPassLib &irpass) { + opt::OptPassConfig interface_fusion = opt::OptPassConfig({ + irpass.mark_interface_fusion_, + }); + OptPassGroupMap map({ + {"graph_kernel_reuse", opt::OptPassConfig(opt::GraphKernelReuse())}, + {"interface_fusion", interface_fusion}, + {"renormalize", opt::OptPassConfig::Renormalize()}, + {"cse", opt::OptPassConfig(opt::CSE(false))}, + }); + return map; +} + +OptPassGroupMap GetOptPassesGraphKernelB(const opt::irpass::OptimizeIRPassLib &irpass) { + opt::OptPassConfig elim_1 = opt::OptPassConfig({ + irpass.addn_eliminate_, + irpass.incorporate_getitem_from_param_, + }); + opt::OptPassConfig elim_2 = opt::OptPassConfig({ + irpass.unused_parameter_eliminate_, + irpass.unused_output_eliminate_, + }); + OptPassGroupMap map({ + {"elim_1", elim_1}, + {"renormalize", opt::OptPassConfig::Renormalize()}, + {"elim_2", elim_2}, + }); + return map; +} + +OptPassGroupMap GetOptPassesC(const opt::irpass::OptimizeIRPassLib &irpass) { + return OptPassGroupMap({{"renormalize", opt::OptPassConfig::Renormalize()}}); +} + OptPassGroupMap GetControlPhases(const opt::irpass::OptimizeIRPassLib &irpass) { opt::OptPassConfig control_group = opt::OptPassConfig({irpass.convert_switch_replacement_}, true); OptPassGroupMap map({ @@ -189,8 +228,19 @@ void InitOpt(const ResourcePtr &res) { opt::irpass::OptimizeIRPassLib irpass; g_pass_opts["opt_a"] = Optimizer::MakeOptimizer("opt_a", res, GetOptPassesA(irpass)); g_pass_opts["opt_b"] = Optimizer::MakeOptimizer("opt_b", res, GetOptPassesB(irpass), false, true); + g_pass_opts["opt_graph_kernel_a"] = + Optimizer::MakeOptimizer("opt_graph_kernel_a", res, GetOptPassesGraphKernelA(irpass), true); + g_pass_opts["opt_graph_kernel_b"] = + Optimizer::MakeOptimizer("opt_graph_kernel_b", res, GetOptPassesGraphKernelB(irpass), false); + g_pass_opts["renormal"] = Optimizer::MakeOptimizer("renormal", res, GetOptPassesC(irpass)); g_pass_opts["opt_control"] = Optimizer::MakeOptimizer("opt_control", res, GetControlPhases(irpass), false, true); g_pass_opts["opt_prepare"] = Optimizer::MakeOptimizer("opt_prepare", res, GetPreparePhases(irpass)); + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + if (!(context_ptr->enable_graph_kernel())) { + g_pass_opts["opt_graph_kernel_a"]->set_enable(false); + g_pass_opts["opt_graph_kernel_b"]->set_enable(false); + } } } } // namespace @@ -222,9 +272,13 @@ bool OptPassGroup(const ResourcePtr &res, const std::string &name) { bool OptPassAGroup(const ResourcePtr &res) { return OptPassGroup(res, "opt_a"); } bool OptPassBGroup(const ResourcePtr &res) { return OptPassGroup(res, "opt_b"); } +bool OptPassGraphKernelGroupA(const ResourcePtr &res) { return OptPassGroup(res, "opt_graph_kernel_a"); } +bool OptPassGraphKernelGroupB(const ResourcePtr &res) { return OptPassGroup(res, "opt_graph_kernel_b"); } bool ControlGroup(const ResourcePtr &res) { return OptPassGroup(res, "opt_control"); } bool PrepareGroup(const ResourcePtr &res) { return OptPassGroup(res, "opt_prepare"); } +bool OptPassRNGroup(const ResourcePtr &res) { return OptPassGroup(res, "renormal"); } + bool AddControlDependPass(const ResourcePtr &res) { FuncGraphPtr func_graph = res->func_graph(); MS_EXCEPTION_IF_NULL(func_graph); @@ -268,8 +322,10 @@ bool InferenceOptPreparePass(const ResourcePtr &res) { std::vector kVmPasses = {{"simplify_data_structures", SimplifyDataStructuresPass}, {"opt_a", OptPassAGroup}, {"opt_b", OptPassBGroup}, - {"add_control_depend", AddControlDependPass}, - {"cconv", CconvPass}}; + {"cconv", CconvPass}, + {"opt_graph_kernel_a", OptPassGraphKernelGroupA}, + {"opt_graph_kernel_b", OptPassGraphKernelGroupB}, + {"add_control_depend", AddControlDependPass}}; std::vector kGePasses = {{"simplify_data_structures", SimplifyDataStructuresPass}, {"opt_a", OptPassAGroup}, @@ -278,5 +334,7 @@ std::vector kGePasses = {{"simplify_data_structures", SimplifyDataStru {"opt_control", ControlGroup}, {"opt_prepare", PrepareGroup}, {"cconv", CconvPass}}; + +std::vector kPynativePasses = {{"opt_a", OptPassAGroup}, {"opt_b", OptPassBGroup}, {"cconv", CconvPass}}; } // namespace pipeline } // namespace mindspore diff --git a/mindspore/ccsrc/pipeline/pass.h b/mindspore/ccsrc/pipeline/pass.h index 2636879d01..9064df52ee 100644 --- a/mindspore/ccsrc/pipeline/pass.h +++ b/mindspore/ccsrc/pipeline/pass.h @@ -29,6 +29,7 @@ using PassItem = std::pair>; extern std::vector kGePasses; extern std::vector kVmPasses; +extern std::vector kPynativePasses; bool CconvPass(const ResourcePtr &res); bool ValidatePass(const ResourcePtr &res); diff --git a/mindspore/ccsrc/pipeline/pipeline.cc b/mindspore/ccsrc/pipeline/pipeline.cc index 103477363f..517d4cc518 100644 --- a/mindspore/ccsrc/pipeline/pipeline.cc +++ b/mindspore/ccsrc/pipeline/pipeline.cc @@ -59,6 +59,7 @@ using mindspore::abstract::AbstractTuplePtr; const char IR_TYPE_ANF[] = "anf_ir"; const char IR_TYPE_ONNX[] = "onnx_ir"; +const char IR_TYPE_BINARY[] = "binary_ir"; ExecutorPyPtr ExecutorPy::executor_ = nullptr; std::mutex ExecutorPy::instance_lock_; @@ -212,6 +213,14 @@ py::bytes ExecutorPy::GetFuncGraphProto(const std::string &phase, const std::str return proto_str; } + if (ir_type == IR_TYPE_BINARY) { + std::string proto_str = GetBinaryProtoString(fg_ptr); + if (proto_str.empty()) { + MS_LOG(EXCEPTION) << "Graph proto is empty."; + } + return proto_str; + } + MS_LOG(EXCEPTION) << "Unknown ir type: " << ir_type; } @@ -236,9 +245,7 @@ py::dict ExecutorPy::GetAllreduceFusion(const std::string &phase) { } void ExecutorPy::DelNetRes(const std::string &id) { -#ifdef ENABLE_GE FinalizeBackend(); -#endif if (executor_ != nullptr) { bool flag = false; auto tmp_info = info_; @@ -272,6 +279,75 @@ ExecutorPy::~ExecutorPy() { ConfigManager::GetInstance().ResetConfig(); } +std::map> ExecutorPy::FetchInfoForQuantExport( + const std::string &phase_s) { + FuncGraphPtr func_graph = info_[phase_s]->resource->func_graph(); + MS_EXCEPTION_IF_NULL(func_graph); + MS_LOG(DEBUG) << "FetchInfoForQuantExport func graph(" << func_graph->ToString() << ") phase(" << phase_s << ")!"; + std::map> fake_quant_table; + auto filter = [](AnfNodePtr node) { + return !(IsPrimitiveCNode(node, prim::kPrimConv2D) || IsPrimitiveCNode(node, prim::kPrimMatMul)); + }; + std::vector nodes = DeepScopedGraphSearchWithFilter(func_graph->get_return(), AlwaysInclude, filter); + auto is_quant_cnode = [](AnfNodePtr node) { + return IsPrimitiveCNode(node, prim::kPrimFakeQuantPerLayer) || + IsPrimitiveCNode(node, prim::kPrimFakeQuantPerChannel); + }; + for (auto node : nodes) { + auto cnode = node->cast(); + if (cnode == nullptr || cnode->size() != 3) { + continue; + } + auto x = cnode->input(1); + auto weight = cnode->input(2); + if (!is_quant_cnode(weight)) { + continue; + } + // get parameter weight's name + cnode = weight->cast(); + auto weight_node = cnode->input(2); + if (!weight_node->isa()) { + continue; + } + auto weight_name = weight_node->cast()->name(); + // find the fakequant from input + int count = 0; + int max_depth = 5; + while (!is_quant_cnode(x)) { + if (count >= max_depth) { + break; + } + cnode = x->cast(); + if (cnode == nullptr || cnode->size() <= 1) { + break; + } + x = cnode->input(1); + count += 1; + } + // get the fakequant parameter minq's name + if (!is_quant_cnode(x)) { + continue; + } + cnode = x->cast(); + if (cnode == nullptr || cnode->size() != 4) { + continue; + } + auto fakequant_min_node = cnode->input(2); + if (!fakequant_min_node->isa()) { + continue; + } + auto fakequant_min_node_name = fakequant_min_node->cast()->name(); + auto quant_op_value = cnode->input(0)->cast()->value(); + if (!quant_op_value->isa()) { + continue; + } + auto quant_op = quant_op_value->cast(); + fake_quant_table[weight_name] = std::make_pair(quant_op, fakequant_min_node_name); + } + + return fake_quant_table; +} + void ExecutorPy::SaveCompiledGraph(const std::string &phase_s) { // save the graph to ExecutorPy FuncGraphPtr func_graph = info_[phase_s]->resource->func_graph(); @@ -462,6 +538,9 @@ bool ExecutorPy::Compile(const py::object &obj, const py::tuple &args, const py: } catch (const py::value_error &ex) { ReleaseResource(phase); throw py::value_error(ex); + } catch (const py::index_error &ex) { + ReleaseResource(phase); + throw py::index_error(ex); } catch (const std::exception &ex) { ReleaseResource(phase); // re-throw this exception to Python interpreter to handle it @@ -506,7 +585,6 @@ void RunPipelineAction(const ActionItem &action, pipeline::ResourcePtr resource, // when in loading anf ir mode, action `parse` do nothing if (action.first == "parse") { - parse::PythonAdapter::SetPythonEnvFlag(true); return; } @@ -566,6 +644,7 @@ void Pipeline::Run() { draw::Draw(base_name + ".dot", graph); // generate IR file in human readable format DumpIR(base_name + ".ir", graph); + // generate IR file in a heavily commented format, which can also be reloaded if (action.first != "parse") { ExportIR(base_name + ".dat", std::to_string(i), graph); @@ -608,24 +687,27 @@ void Pipeline::Run() { MS_LOG(INFO) << "End"; } -void ExecutorPy::ProcessVmArg(const py::tuple &args, const std::string &phase, VectorRef *arg_list) { +void ProcessVmArgInner(const py::tuple &args, const ResourcePtr &res, VectorRef *const arg_list) { std::size_t size = args.size(); for (std::size_t i = 0; i < size; i++) { py::object arg = args[i]; auto ms_context = MsContext::GetInstance(); if (ms_context->backend_policy() == kMsConvert && py::isinstance(arg)) { - MS_LOG(EXCEPTION) << "Args[" << i << "] is numpy array, not tensor"; + MS_LOG(EXCEPTION) << "The " << i << "th arg is numpy array, not tensor."; } ValuePtr converted = nullptr; bool succ = parse::ConvertData(arg, &converted); if (!succ) { - MS_LOG(EXCEPTION) << "Args convert error"; + MS_LOG(EXCEPTION) << "The " << i << "th arg convert failed."; + } + if (MsContext::GetInstance()->execution_mode() == 0 && !converted->isa()) { + MS_EXCEPTION(TypeError) << "For 'graph mode', the " << i << "th arg: " << converted->ToString() + << " is not tensor."; } arg_list->push_back(converted); } - ResourcePtr res = GetResource(phase); MS_EXCEPTION_IF_NULL(res); auto graph = res->func_graph(); MS_EXCEPTION_IF_NULL(graph); @@ -647,6 +729,10 @@ void ExecutorPy::ProcessVmArg(const py::tuple &args, const std::string &phase, V } } +void ExecutorPy::ProcessVmArg(const py::tuple &args, const std::string &phase, VectorRef *const arg_list) { + ProcessVmArgInner(args, GetResource(phase), arg_list); +} + py::object ExecutorPy::Run(const py::tuple &args, const py::object &phase) { std::size_t size = args.size(); if (!py::isinstance(phase)) { @@ -775,7 +861,7 @@ bool InitExecDatasetVm(const std::string &queue_name, int64_t size, int64_t batc MS_EXCEPTION_IF_NULL(convert_fn); // Convert CNodeList to LinConvertResult. ConfigManager::GetInstance().set_iter_num(1); - auto runner = convert_fn({app_init}); + auto runner = convert_fn({app_init}, ""); if (MsContext::GetInstance()->execution_mode() != kPynativeMode) { backend->Link(runner.graph_id); } @@ -874,6 +960,8 @@ void ClearResAtexit() { compile::ClearConvertCache(); pipeline::GetMethodMap().clear(); pipeline::ExecutorPy::ClearRes(); + pipeline::ReclaimOptimizer(); + pynative::PynativeExecutor::GetInstance()->ClearRes(); #ifdef ENABLE_GE transform::DfGraphManager::GetInstance().ClearGraph(); transform::DfGraphConvertor::get_adpt_map().clear(); diff --git a/mindspore/ccsrc/pipeline/pipeline.h b/mindspore/ccsrc/pipeline/pipeline.h index 81d0e1a9f4..3f1274c417 100644 --- a/mindspore/ccsrc/pipeline/pipeline.h +++ b/mindspore/ccsrc/pipeline/pipeline.h @@ -97,6 +97,8 @@ class ExecutorPy : public std::enable_shared_from_this { void ReleaseResource(const py::object &phase); static void ClearRes(); + std::map> FetchInfoForQuantExport(const std::string &phase_s); + private: ExecutorPy(); void ConvertObjectToTensors(const py::dict &dict, std::map *tensors); @@ -139,6 +141,8 @@ bool InitExecDatasetVm(const std::string &queue_name, int64_t size, int64_t batc const std::vector &types, const std::vector> &shapes, const std::vector &input_indexes, bool need_run); +void ProcessVmArgInner(const py::tuple &args, const ResourcePtr &res, VectorRef *const arg_list); + } // namespace pipeline } // namespace mindspore diff --git a/mindspore/ccsrc/pipeline/pipeline_ge.cc b/mindspore/ccsrc/pipeline/pipeline_ge.cc index 309b482d62..ea0ca14c7a 100644 --- a/mindspore/ccsrc/pipeline/pipeline_ge.cc +++ b/mindspore/ccsrc/pipeline/pipeline_ge.cc @@ -460,12 +460,12 @@ void ProcessGeArg(const std::map &info, const py:: ValuePtr converted = nullptr; bool succ = parse::ConvertData(args[i], &converted); if (!succ) { - MS_LOG(EXCEPTION) << "Args convert error"; + MS_LOG(EXCEPTION) << "The " << i << "th arg convert failed."; } if (converted->isa()) { inputs->push_back(converted->cast()); } else { - MS_EXCEPTION(TypeError) << "Args " << converted->ToString() << " is not tensor"; + MS_EXCEPTION(TypeError) << "The " << i << "th arg: " << converted->ToString() << " is not tensor."; } } } @@ -488,7 +488,7 @@ py::object ExecDFGraph(const std::map &info, const #ifdef ENABLE_INFER // Now don't use the graph because the exec ge function don't take effect MS_EXCEPTION_IF_NULL(info.at(phase)->func_graph); - if (ENABLE_TRAIN != info.at(phase)->func_graph->flags()["training"]) { + if (ENABLE_TRAIN != info.at(phase)->func_graph->has_flag("training")) { MS_LOG(ERROR) << "Graph training mode mismatch mode of libraries"; ConfigManager::GetInstance().ResetConfig(); return py::none(); diff --git a/mindspore/ccsrc/pipeline/static_analysis/abstract_value.cc b/mindspore/ccsrc/pipeline/static_analysis/abstract_value.cc index d4f0c6f8d4..f23c6e31c4 100644 --- a/mindspore/ccsrc/pipeline/static_analysis/abstract_value.cc +++ b/mindspore/ccsrc/pipeline/static_analysis/abstract_value.cc @@ -51,6 +51,7 @@ ValuePtr AbstractBase::BuildValue() const { AbstractBasePtr AbstractBase::Broaden() const { AbstractBasePtr clone = Clone(); clone->set_value(kAnyValue); + clone->set_sparse_grad(sparse_grad_); return clone; } @@ -63,7 +64,8 @@ std::string AbstractBase::ToString() const { MS_EXCEPTION_IF_NULL(type_); MS_EXCEPTION_IF_NULL(shape_); buffer << type_name() << "(" - << "Type: " << type_->ToString() << " Value: " << value << " Shape: " << shape_->ToString() << ")"; + << "Type: " << type_->ToString() << " Value: " << value << " Shape: " << shape_->ToString() + << " sparse_grad: " << sparse_grad_ << ")"; return buffer.str(); } @@ -72,16 +74,22 @@ AbstractBasePtr AbstractScalar::Broaden() const { return AbstractBase::Broaden() AbstractBasePtr AbstractScalar::Join(const AbstractBasePtr &other) { MS_EXCEPTION_IF_NULL(other); if (*this == *other) { - return shared_from_base(); + auto ret = shared_from_base(); + ret->set_sparse_grad(sparse_grad()); + return ret; } auto value_self = GetValueTrack(); MS_EXCEPTION_IF_NULL(value_self); ValuePtr res_value = ValueJoin(value_self, other->GetValueTrack()); TypePtr res_type = TypeJoin(GetTypeTrack(), other->GetTypeTrack()); if (res_value == value_self) { - return shared_from_base(); + auto ret = shared_from_base(); + ret->set_sparse_grad(sparse_grad()); + return ret; } - return std::make_shared(res_value, res_type); + auto ret = std::make_shared(res_value, res_type); + ret->set_sparse_grad(sparse_grad()); + return ret; } AbstractBasePtr AbstractType::Clone() const { @@ -423,7 +431,9 @@ AbstractBasePtr AbstractTensor::Join(const AbstractBasePtr &other) { } auto element = element_->Join(other_tensor->element_); auto shape = ShapeJoin(this->shape(), other_tensor->shape()); - return std::make_shared(element, shape); + auto ret = std::make_shared(element, shape); + ret->set_sparse_grad(sparse_grad()); + return ret; } bool AbstractTensor::operator==(const AbstractTensor &other) const { @@ -463,6 +473,7 @@ AbstractBasePtr AbstractTensor::Clone() const { ShapePtr shp = shape(); clone->set_shape(shp->Clone()); clone->set_value(GetValueTrack()); + clone->set_sparse_grad(sparse_grad()); return clone; } @@ -472,6 +483,7 @@ AbstractBasePtr AbstractTensor::Broaden() const { auto shp = shape(); broaden->set_shape(shp->Clone()); broaden->set_value(kAnyValue); + broaden->set_sparse_grad(sparse_grad()); return broaden; } @@ -482,6 +494,7 @@ AbstractBasePtr AbstractTensor::BroadenWithShape() const { shp->Broaden(); broaden->set_shape(shp); broaden->set_value(kAnyValue); + broaden->set_sparse_grad(sparse_grad()); return broaden; } @@ -502,7 +515,8 @@ std::string AbstractTensor::ToString() const { MS_EXCEPTION_IF_NULL(value_track); buffer << type_name() << "(" << "shape: " << shape_track->ToString() << ", element: " << element_->ToString() - << ", value_ptr: " << value_track << ", value: " << value_track->ToString() << ")"; + << ", value_ptr: " << value_track << ", value: " << value_track->ToString() << " sparse_grad " << sparse_grad() + << ")"; return buffer.str(); } diff --git a/mindspore/ccsrc/pipeline/static_analysis/abstract_value.h b/mindspore/ccsrc/pipeline/static_analysis/abstract_value.h index 939976bb95..f3375d22d6 100644 --- a/mindspore/ccsrc/pipeline/static_analysis/abstract_value.h +++ b/mindspore/ccsrc/pipeline/static_analysis/abstract_value.h @@ -44,7 +44,7 @@ class AbstractBase : public Base { public: explicit AbstractBase(const ValuePtr &value = nullptr, const TypePtr &type = kAnyType, const BaseShapePtr &shape = kNoShape) - : value_(value), type_(type), shape_(shape) {} + : value_(value), type_(type), shape_(shape), sparse_grad_("") {} ~AbstractBase() override = default; MS_DECLARE_PARENT(AbstractBase, Base) @@ -53,11 +53,13 @@ class AbstractBase : public Base { virtual bool operator==(const AbstractBase &other) const; void set_value(const ValuePtr &value) { value_ = value; } + void set_sparse_grad(const std::string &sparse_grad) { sparse_grad_ = sparse_grad; } void set_type(const TypePtr &type) { type_ = type; } void set_shape(const BaseShapePtr &shape) { shape_ = shape; } void set_value_desc(const std::string &desc) { value_desc_ = desc; } const std::string &value_desc() const { return value_desc_; } ValuePtr GetValueTrack() const { return value_; } + const std::string &sparse_grad() const { return sparse_grad_; } TypePtr GetTypeTrack() const { return type_; } BaseShapePtr GetShapeTrack() const { return shape_; } @@ -85,6 +87,7 @@ class AbstractBase : public Base { TypePtr type_; BaseShapePtr shape_; std::string value_desc_; // store initial value description for error report + std::string sparse_grad_; }; class AbstractScalar : public AbstractBase { diff --git a/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc b/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc index 254fd43c0b..c9b1ce4f93 100644 --- a/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc +++ b/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc @@ -165,7 +165,7 @@ AbstractBasePtrList FuncGraphEvaluator::BroadenUndeterminedArgs(const AbstractBa MS_LOG(DEBUG) << "Joined args: " << ::mindspore::ToString(joined_args_spec_list); // If there is loop variant, all arguments need to be broaden to avoid wrong constant propagation. if (!(joined_args_spec_list == args_spec_list)) { - func_graph_->set_flags(FUNC_GRAPH_FLAG_IGNORE_VALUES, true); + func_graph_->set_flag(FUNC_GRAPH_FLAG_IGNORE_VALUES, true); } return joined_args_spec_list; } @@ -178,7 +178,7 @@ AbstractBasePtrList FuncGraphEvaluator::BroadenUndeterminedArgs(const AbstractBa // If there is loop variant, all arguments need to be broaden to avoid wrong constant propagation. if (!(joined_args_spec_list == args_spec_list)) { trace_.push_back(joined_args_spec_list); - func_graph_->set_flags(FUNC_GRAPH_FLAG_IGNORE_VALUES, true); + func_graph_->set_flag(FUNC_GRAPH_FLAG_IGNORE_VALUES, true); } MS_LOG(DEBUG) << "Joined eval args: " << ::mindspore::ToString(joined_args_spec_list); return joined_args_spec_list; diff --git a/mindspore/ccsrc/pipeline/static_analysis/prim.cc b/mindspore/ccsrc/pipeline/static_analysis/prim.cc index f2f85df430..82b8395933 100644 --- a/mindspore/ccsrc/pipeline/static_analysis/prim.cc +++ b/mindspore/ccsrc/pipeline/static_analysis/prim.cc @@ -55,6 +55,7 @@ PrimitiveEvalImplMap &GetPrimitiveToEvalImplMap() { {prim::kPrimIsNot, {InferImplIsNot, true}}, {prim::kPrimInDict, {InferImplInDict, true}}, {prim::kPrimNotInDict, {InferImplNotInDict, true}}, + {prim::kPrimIsConsant, {InferImplIsConstant, true}}, // Maths {prim::kPrimMaximumGrad, {InferImplMinOrMaxGrad, true}}, {prim::kPrimMinimumGrad, {InferImplMinOrMaxGrad, true}}, @@ -106,8 +107,8 @@ PrimitiveEvalImplMap &GetPrimitiveToEvalImplMap() { {prim::kPrimConv2DBackpropFilter, {InferImplConv2DBackpropFilter, true}}, {prim::kPrimBiasAddGrad, {InferImplBiasAddGrad, true}}, {prim::kPrimRelu, {InferImplRelu, true}}, - {prim::kPrimZerosLikeTensor, {InferImplZerosLikeTensor, true}}, {prim::kPrimFakeBprop, {InferImplFakeBprop, false}}, + {prim::kPrimZerosLike, {InferImplZerosLike, true}}, {prim::kPrimBpropCut, {InferImplBpropCut, true}}, {prim::kPrimLayerNorm, {InferImplLayerNorm, true}}, {prim::kPrimLayerNormGrad, {InferImplLayerNormGrad, true}}, @@ -147,9 +148,6 @@ EvalResultPtr StandardPrimEvaluator::EvalPrim(const AnalysisEnginePtr &engine, c EvalResultPtr DoSignatureEvaluator::Run(AnalysisEnginePtr engine, const ConfigPtrList &args_conf_list, AnfNodeConfigPtr out_conf) { AbstractBasePtrList args_spec_list; - if (!prim_->isa()) { - MS_LOG(EXCEPTION) << "Primitive should be DoSignature, but " << prim_->ToString(); - } if (out_conf->node() == nullptr || !out_conf->node()->isa()) { MS_LOG(EXCEPTION) << "Node of out_conf should be CNode"; } @@ -221,9 +219,6 @@ EvalResultPtr UnpackGraphEvaluator::Run(AnalysisEnginePtr engine, const ConfigPt if (out_conf->node() == nullptr || !out_conf->node()->isa()) { MS_LOG(EXCEPTION) << "Node of out_conf should be CNode"; } - if (!prim_->isa()) { - MS_LOG(EXCEPTION) << "Primitive should be UnpackGraphPrimitive, but got " << prim_->ToString(); - } auto unpack_graph = prim_->cast(); auto out_node = out_conf->node()->cast(); @@ -267,6 +262,80 @@ EvalResultPtr UnpackGraphEvaluator::Run(AnalysisEnginePtr engine, const ConfigPt return engine->ForwardConfig(out_conf, fn_conf); } +AnfNodePtr MixedPrecisionCastHelper(AnfNodePtr source_node, AbstractBasePtr node_type, AnfNodePtr target_type, + FuncGraphPtr func_graph) { + AnfNodePtr target_node = source_node; + if (node_type->isa()) { + auto x = node_type->cast(); + if (x->element()->BuildType()->isa()) { + auto cast = prim::GetPythonOps("cast", "mindspore.ops.functional"); + MS_EXCEPTION_IF_NULL(cast); + target_node = func_graph->NewCNode({NewValueNode(cast), source_node, target_type}); + } + } else if (node_type->isa()) { + auto x = node_type->cast(); + auto &items = x->elements(); + std::vector nodes; + nodes.emplace_back(NewValueNode(prim::kPrimMakeTuple)); + int idx = 0; + for (const auto &item : items) { + AnfNodePtr tuple_node = + func_graph->NewCNode({NewValueNode(prim::kPrimTupleGetItem), source_node, NewValueNode(idx)}); + AnfNodePtr node = MixedPrecisionCastHelper(tuple_node, item, target_type, func_graph); + nodes.emplace_back(node); + ++idx; + } + target_node = func_graph->NewCNode(nodes); + } else if (node_type->isa()) { + auto x = node_type->cast(); + auto &items = x->elements(); + std::vector dict_key_nodes; + std::vector dict_value_nodes; + dict_key_nodes.emplace_back(NewValueNode(prim::kPrimMakeTuple)); + dict_value_nodes.emplace_back(NewValueNode(prim::kPrimMakeTuple)); + for (const auto &item : items) { + AnfNodePtr dict_value_node = + func_graph->NewCNode({NewValueNode(prim::kPrimDictGetItem), source_node, NewValueNode(item.first)}); + AnfNodePtr node = MixedPrecisionCastHelper(dict_value_node, item.second, target_type, func_graph); + dict_key_nodes.emplace_back(NewValueNode(item.first)); + dict_value_nodes.emplace_back(node); + } + target_node = func_graph->NewCNode({NewValueNode(prim::kPrimMakeDict), func_graph->NewCNode(dict_key_nodes), + func_graph->NewCNode(dict_value_nodes)}); + } + return target_node; +} + +EvalResultPtr MixedPrecisionCastEvaluator::Run(AnalysisEnginePtr engine, const ConfigPtrList &args_conf_list, + AnfNodeConfigPtr out_conf) { + AbstractBasePtrList args_spec_list; + if (out_conf->node() == nullptr || !out_conf->node()->isa()) { + MS_LOG(EXCEPTION) << "Node of out_conf should be CNode"; + } + auto out_node = out_conf->node()->cast(); + const auto &out_node_inputs = out_node->inputs(); + if (out_node->inputs().size() == 0 || (out_node_inputs.size() - 1) != args_conf_list.size()) { + MS_LOG(EXCEPTION) << "MixedPrecisionCast" + << " args size should equal to inputs size minus 1, but args size " << args_conf_list.size() + << ", inputs size " << out_node_inputs.size(); + } + AnfNodePtrList args_inputs{out_node_inputs.begin() + 1, out_node_inputs.end()}; + (void)std::transform(args_conf_list.begin(), args_conf_list.end(), std::back_inserter(args_spec_list), + [](const ConfigPtr &ref) -> AbstractBasePtr { return ref->GetEvaluatedValue()->abstract(); }); + + ScopePtr scope = kDefaultScope; + if (out_conf != nullptr) { + scope = out_conf->node()->scope(); + } + ScopeGuard scope_guard(scope); + + FuncGraphPtr func_graph = out_conf->node()->func_graph(); + AnfNodePtr new_node = MixedPrecisionCastHelper(out_node_inputs[2], args_spec_list[1], out_node_inputs[1], func_graph); + AnfNodeConfigPtr fn_conf = engine->MakeConfig(new_node, out_conf->context()); + + return engine->ForwardConfig(out_conf, fn_conf); +} + namespace { py::object BuildValue(const ValuePtr &value_ptr) { if (value_ptr == nullptr) { @@ -300,11 +369,9 @@ py::dict ConvertAbstractToPython(const AbstractBasePtr &abs_base) { auto value = abs_base->cast()->ref(); dic = ConvertAbstractToPython(value); } else if (abs_base->isa()) { - auto arg_slice = dyn_cast(abs_base); - std::vector shape; - dic["shape"] = shape; - dic["dtype"] = arg_slice->BuildType(); - dic["value"] = BuildValue(arg_slice->BuildValue()); + dic["shape"] = py::none(); + dic["dtype"] = py::ellipsis(); + dic["value"] = py::ellipsis(); } else if (abs_base->isa()) { auto arg_tuple = dyn_cast(abs_base); size_t len = arg_tuple->size(); @@ -798,7 +865,11 @@ class RefToEmbedEvaluator : public SymbolicPrimEvaluator { } auto refkey = key_value->cast(); if (refkey == nullptr) { - return std::make_shared(std::make_shared(type), std::make_shared()); + auto ret = std::make_shared(type); + auto ref_value = ref_abs->ref(); + MS_EXCEPTION_IF_NULL(ref_value); + ret->set_sparse_grad(ref_value->sparse_grad()); + return std::make_shared(ret, std::make_shared()); } std::string name = refkey->tag(); @@ -812,6 +883,7 @@ class RefToEmbedEvaluator : public SymbolicPrimEvaluator { x = SensitivityTransform(x); std::shared_ptr key = std::make_shared(node, x); std::shared_ptr abs_scalar = std::make_shared(key, type); + abs_scalar->set_sparse_grad(x->sparse_grad()); return std::make_shared(abs_scalar, std::make_shared()); } }; diff --git a/mindspore/ccsrc/pipeline/static_analysis/prim.h b/mindspore/ccsrc/pipeline/static_analysis/prim.h index 22418180f7..5b910f8194 100644 --- a/mindspore/ccsrc/pipeline/static_analysis/prim.h +++ b/mindspore/ccsrc/pipeline/static_analysis/prim.h @@ -102,6 +102,22 @@ class UnpackGraphEvaluator : public Evaluator { PrimitivePtr prim_; }; +class MixedPrecisionCastEvaluator : public Evaluator { + public: + explicit MixedPrecisionCastEvaluator(const PrimitivePtr primitive) + : Evaluator("MixedPrecisionCastEvaluator"), prim_(primitive) {} + ~MixedPrecisionCastEvaluator() override = default; + EvalResultPtr Run(AnalysisEnginePtr engine, const ConfigPtrList &argrefs, + AnfNodeConfigPtr out_config = nullptr) override; + + EvalResultPtr Eval(AnalysisEnginePtr, const AbstractBasePtrList &) override { + MS_LOG(EXCEPTION) << "Eval() should not be called, Run() method should be called"; + } + + private: + PrimitivePtr prim_; +}; + bool IsInWhiteList(PrimitivePtr primitive); StandardPrimitiveEvalImpl GetPrimitiveInferImpl(const PrimitivePtr &primitive); @@ -184,6 +200,8 @@ AbstractBasePtr InferImplInDict(const AnalysisEnginePtr &, const PrimitivePtr &, const AbstractBasePtrList &args_spec_list); AbstractBasePtr InferImplNotInDict(const AnalysisEnginePtr &, const PrimitivePtr &, const AbstractBasePtrList &args_spec_list); +AbstractBasePtr InferImplIsConstant(const AnalysisEnginePtr &, const PrimitivePtr &, + const AbstractBasePtrList &args_spec_list); AbstractBasePtr InferImplPooling(const AnalysisEnginePtr &, const PrimitivePtr &primitive, const AbstractBasePtrList &args_spec_list); AbstractBasePtr InferImplPoolingGrad(const AnalysisEnginePtr &, const PrimitivePtr &primitive, @@ -206,10 +224,10 @@ AbstractBasePtr InferImplGeluGrad(const AnalysisEnginePtr &, const PrimitivePtr const AbstractBasePtrList &args_spec_list); AbstractBasePtr InferImplRelu(const AnalysisEnginePtr &, const PrimitivePtr &primitive, const AbstractBasePtrList &args_spec_list); -AbstractBasePtr InferImplZerosLikeTensor(const AnalysisEnginePtr &, const PrimitivePtr &primitive, - const AbstractBasePtrList &args_spec_list); AbstractBasePtr InferImplFakeBprop(const AnalysisEnginePtr &, const PrimitivePtr &primitive, const AbstractBasePtrList &args_spec_list); +AbstractBasePtr InferImplZerosLike(const AnalysisEnginePtr &, const PrimitivePtr &primitive, + const AbstractBasePtrList &args_spec_list); AbstractBasePtr InferImplBpropCut(const AnalysisEnginePtr &, const PrimitivePtr &primitive, const AbstractBasePtrList &args_spec_list); AbstractBasePtr InferImplLayerNorm(const AnalysisEnginePtr &, const PrimitivePtr &primitive, diff --git a/mindspore/ccsrc/pipeline/static_analysis/program_specialize.cc b/mindspore/ccsrc/pipeline/static_analysis/program_specialize.cc index 2a03eb6d5c..e01b98841b 100644 --- a/mindspore/ccsrc/pipeline/static_analysis/program_specialize.cc +++ b/mindspore/ccsrc/pipeline/static_analysis/program_specialize.cc @@ -378,11 +378,7 @@ AnfNodePtr FuncGraphSpecializer::BuildSpecializedNodeInner(const AbstractBasePtr } auto real_eval = dyn_cast(eval); - if (func->context() != nullptr) { - if (!IsVisible(func_graph_, func->context()->func_graph())) { - MS_LOG(EXCEPTION) << "Func is not visible NodeInfo: " << trace::GetDebugInfo(func_graph_->debug_info()); - } - } else { + if (func->context() == nullptr) { MS_LOG(EXCEPTION) << "Func context is nullptr NodeInfo: " << trace::GetDebugInfo(func_graph_->debug_info()); } AnalysisContextPtr context = real_eval->MakeContext(engine_, argvals); @@ -507,9 +503,9 @@ void FuncGraphSpecializer::ProcessCNode(const CNodePtr &new_node) { // First element is partial, second is func so arg is start from 2 (void)args.insert(args.begin(), inputs.begin() + 2, inputs.end()); func = inputs[1]; - new_inputs = args; - (void)new_inputs.insert(new_inputs.begin(), func); } + new_inputs = args; + (void)new_inputs.insert(new_inputs.begin(), func); AbstractBasePtrList argvals; MS_EXCEPTION_IF_NULL(new_inputs[0]); @@ -524,9 +520,23 @@ void FuncGraphSpecializer::ProcessCNode(const CNodePtr &new_node) { << new_inputs[i]->DebugString() << ", abstract: " << new_inputs[i]->abstract()->ToString(); } - if (func->isa() && func->func_graph()->has_flag(FUNC_GRAPH_FLAG_SPECIALIZE_PARAMETER)) { - auto wrapped_node = BuildSpecializedParameterNode(new_node); - new_inputs[0] = wrapped_node; + if (!func->isa()) { + MS_LOG(DEBUG) << func->abstract()->type_name() << " | " << func->abstract()->ToString(); + if (func->abstract()->isa() && !func->abstract()->isa()) { + auto func_abs = func->abstract()->cast(); + EvaluatorPtr eval = engine_->GetEvaluatorFor(func_abs); + std::pair result; + AbstractBasePtrList empty_args; + auto status = FindUniqueArgvals(func_abs, eval, empty_args, &result); + MS_LOG(DEBUG) << "FindUniqueArgvals return status: " << status; + // if a node is a poly node, or an input parameter is a PartialAbstractClosure, expand it early + if (status == kSpecializeFindUniqueArgvalPoly || + (func->isa() && (func->func_graph()->has_flag(FUNC_GRAPH_FLAG_SPECIALIZE_PARAMETER) || + func->abstract()->isa()))) { + auto wrapped_node = BuildSpecializedParameterNode(new_node); + new_inputs[0] = wrapped_node; + } + } } if (CanSpecializeNode(func)) { diff --git a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc b/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc index b7520176ec..9da148d2a7 100644 --- a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc +++ b/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc @@ -308,6 +308,10 @@ EvaluatorPtr GetPrimEvaluator(const PrimitivePtr &prim, const AnalysisEnginePtr evaluator = std::make_shared(prim); return evaluator; } + if (prim->Hash() == prim::kPrimMixedPrecisionCast->Hash() && prim->name() == prim::kPrimMixedPrecisionCast->name()) { + evaluator = std::make_shared(prim); + return evaluator; + } if (prim->HasPyEvaluator()) { auto prim_py = dyn_cast(prim); if (prim_py != nullptr) { @@ -464,6 +468,85 @@ EvalResultPtr AnalysisEngine::ExecuteEvaluators(const std::vector return ExecuteMultipleEvaluators(evaluators, out_conf, args_conf_list); } +void AnalysisEngine::SetUndeterminedFlag(const EvaluatorPtr &evaluator) { + auto fg_eval = evaluator->cast(); + if (fg_eval == nullptr) { + return; + } + auto fg = fg_eval->func_graph(); + MS_EXCEPTION_IF_NULL(fg); + auto undetermined_fgs = fg->recursive_graphs(); + if (undetermined_fgs) { + auto fg_parent = fg->parent(); + MS_EXCEPTION_IF_NULL(fg_parent); + fg_parent->set_flag(kFuncGraphFlagUndetermined, true); + MS_LOG(DEBUG) << "Set graph undetermined: " << fg_parent->ToString(); + } +} + +EvaluatorPtr AnalysisEngine::HandleNestedRecursion(const std::vector &evaluators, + const EvaluatorPtr &eval, const AbstractBasePtrList &args_spec_list, + const EvalTraceRevIter &it, bool *continue_flag) { + *continue_flag = false; + // Find latest entry function to handle nested recursion. + EvaluatorPtr latest_entry = eval; + auto latest_entry_iter = eval_trace_.rbegin(); + for (auto r_it = eval_trace_.rbegin(); *r_it != *it;) { + auto it_temp = std::find(evaluators.begin(), evaluators.end(), r_it->first); + if (it_temp != evaluators.end()) { + latest_entry = *it_temp; + latest_entry_iter = r_it; + break; + } + latest_entry_iter = ++r_it; + } + if (latest_entry != eval) { + MS_LOG(DEBUG) << "Continue Evaluator " << eval->ToString(); + *continue_flag = true; + return latest_entry; + } + + bool has_undetermined = false; + // Check whether sub loop has untraced undetermined evaluator. + std::set> undetermined_evals; + for (auto r_it = eval_trace_.rbegin(); r_it != latest_entry_iter; r_it++) { + undetermined_evals.insert(*r_it); + } + MS_LOG(DEBUG) << "undetermined_evals size(): " << undetermined_evals.size(); + + for (auto u_eval : undetermined_evals) { + MS_LOG(DEBUG) << u_eval.first->ToString() << " check undetermined."; + if (!undetermined_evals.count(std::make_pair(multi_poss_[u_eval.first], args_spec_list))) { + MS_LOG(DEBUG) << u_eval.first->ToString() << " has undetermined."; + has_undetermined = true; + break; + } + } + if (has_undetermined == false) { + MS_LOG(DEBUG) << eval->ToString() << " has no undetermined."; + *continue_flag = true; + return latest_entry; + } + + return latest_entry; +} + +EvalResultPtr AnalysisEngine::ProcessEvalResults(const AbstractBasePtrList &out_specs) { + if (out_specs.size() == 0) { + MS_LOG(EXCEPTION) << "There is an endless loop for evaluator."; + } + + if (out_specs.size() == 1) { + MS_EXCEPTION_IF_NULL(out_specs[0]); + // If only one result derived, then broaden it to avoid wrong constant propagation. + return std::make_shared(out_specs[0]->Broaden(), std::make_shared()); + } + auto joined_spec = AbstractJoin(out_specs); + MS_EXCEPTION_IF_NULL(joined_spec); + MS_LOG(DEBUG) << "Multiple evaluators joined: " << joined_spec->ToString(); + return std::make_shared(joined_spec, std::make_shared()); +} + EvalResultPtr AnalysisEngine::ExecuteMultipleEvaluators(const std::vector &evaluators, const AnfNodeConfigPtr &out_conf, const ConfigPtrList &args_conf_list) { @@ -479,18 +562,7 @@ EvalResultPtr AnalysisEngine::ExecuteMultipleEvaluators(const std::vectorGetEvaluatedValue()->abstract(); }); for (auto eval : evaluators) { - auto fg_eval = eval->cast(); - if (fg_eval) { - auto fg = fg_eval->func_graph(); - MS_EXCEPTION_IF_NULL(fg); - auto undetermined_fgs = fg->recursive_graphs(); - if (undetermined_fgs) { - auto fg_parent = fg->parent(); - MS_EXCEPTION_IF_NULL(fg_parent); - fg_parent->set_flags(kFuncGraphFlagUndetermined, true); - MS_LOG(DEBUG) << "Set graph undetermined: " << fg_parent->ToString(); - } - } + SetUndeterminedFlag(eval); auto current_inf = std::make_pair(eval, args_spec_list); MS_LOG(DEBUG) << "Check Evaluator " << eval->ToString(); @@ -510,40 +582,9 @@ EvalResultPtr AnalysisEngine::ExecuteMultipleEvaluators(const std::vectorfirst); - if (it_temp != evaluators.end()) { - latest_entry = *it_temp; - latest_entry_iter = r_it; - break; - } - latest_entry_iter = ++r_it; - } - if (latest_entry != eval) { - MS_LOG(DEBUG) << "Continue Evaluator " << eval->ToString(); - continue; - } - - bool has_undetermined = false; - // Check whether sub loop has untraced undetermined evaluator. - std::set> undetermined_evals; - for (auto r_it = eval_trace_.rbegin(); r_it != latest_entry_iter; r_it++) { - undetermined_evals.insert(*r_it); - } - MS_LOG(DEBUG) << "undetermined_evals size(): " << undetermined_evals.size(); - for (auto u_eval : undetermined_evals) { - MS_LOG(DEBUG) << u_eval.first->ToString() << " check undetermined."; - if (!undetermined_evals.count(std::make_pair(multi_poss_[u_eval.first], args_spec_list))) { - MS_LOG(DEBUG) << u_eval.first->ToString() << " has undetermined."; - has_undetermined = true; - break; - } - } - if (has_undetermined == false) { - MS_LOG(DEBUG) << eval->ToString() << " has no undetermined."; + bool continue_flag = false; + auto latest_entry = HandleNestedRecursion(evaluators, eval, args_spec_list, it, &continue_flag); + if (continue_flag) { continue; } @@ -558,19 +599,8 @@ EvalResultPtr AnalysisEngine::ExecuteMultipleEvaluators(const std::vector(out_specs[0]->Broaden(), std::make_shared()); - } - auto joined_spec = AbstractJoin(out_specs); - MS_EXCEPTION_IF_NULL(joined_spec); - MS_LOG(DEBUG) << "Multiple evaluators joined: " << joined_spec->ToString(); - return std::make_shared(joined_spec, std::make_shared()); + return ProcessEvalResults(out_specs); } EvalResultPtr AnfNodeConfig::GetEvaluatedValue() { diff --git a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.h b/mindspore/ccsrc/pipeline/static_analysis/static_analysis.h index 1e7a52fda9..a0b7ee5478 100644 --- a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.h +++ b/mindspore/ccsrc/pipeline/static_analysis/static_analysis.h @@ -172,6 +172,8 @@ struct AnalysisResult { AnalysisContextPtr context; }; +using EvalTraceRevIter = std::list>::reverse_iterator; + class AnalysisEngine : public std::enable_shared_from_this { public: AnalysisEngine(const PrimEvaluatorMap &prim_evaluator_map, const FuncGraphManagerPtr &func_graph_manager) @@ -222,6 +224,12 @@ class AnalysisEngine : public std::enable_shared_from_this { std::unordered_map prim_py_evaluators_; private: + void SetUndeterminedFlag(const EvaluatorPtr &evaluator); + EvaluatorPtr HandleNestedRecursion(const std::vector &evaluators, const EvaluatorPtr &eval, + const AbstractBasePtrList &args_spec_list, const EvalTraceRevIter &it, + bool *continue_flag); + EvalResultPtr ProcessEvalResults(const AbstractBasePtrList &out_specs); + const PrimEvaluatorMap &prim_constructors_; FuncGraphManagerPtr func_graph_manager_; std::unordered_map constructors_; diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc index f01dd95f06..981e2255f3 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc @@ -16,11 +16,13 @@ #include "pre_activate/ascend/ascend_backend_optimization.h" #include #include +#include #include "pre_activate/common/optimizer.h" #include "pre_activate/ascend/ir_fission/bn_split.h" #include "pre_activate/ascend/ir_fission/bn_grad_split.h" #include "pre_activate/ascend/ir_fission/batch_norm_grad_split.h" #include "pre_activate/ascend/ir_fission/batch_norm_bert_fission.h" +#include "pre_activate/ascend/ir_fission/single_batch_norm_fission.h" #include "pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h" #include "pre_activate/ascend/ir_fission/layer_norm_grad_split.h" #include "pre_activate/pass/communication_op_fusion.h" @@ -54,6 +56,7 @@ #include "pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.h" #include "pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.h" #include "pre_activate/ascend/format_type/insert_trans_op.h" +#include "pre_activate/ascend/format_type/rectify_do_mask_kernel_info.h" #include "pre_activate/pass/getitem_tuple.h" #include "pre_activate/pass/optimize_dependence.h" #include "pre_activate/pass/erase_visit_attr.h" @@ -61,10 +64,14 @@ #include "pre_activate/ascend/format_type/convert_unsupported_transnode_to_aicpu.h" #include "pre_activate/pass/eliminate_redundant_op.h" #include "pre_activate/pass/common_subexpression_elimination.h" +#include "pre_activate/pass/fuse_graph_kernel.h" +#include "pre_activate/pass/fuse_basic.h" +#include "pre_activate/pass/add_atomic_clean.h" #include "pre_activate/ascend/format_type/merge_cast_to_op.h" #include "pre_activate/ascend/format_type/check_consistency.h" #include "pre_activate/ascend/buffer_fusion/ub_pattern_fusion.h" #include "pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.h" +#include "pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.h" #include "pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.h" #include "pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.h" #include "pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.h" @@ -77,14 +84,16 @@ #include "pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h" #include "pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.h" #include "pre_activate/ascend/format_type/deal_ref_trans_and_cast.h" -#include "pre_activate/ascend/enhancer/add_memcpy_async.h" +#include "pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op.h" #include "pre_activate/ascend/enhancer/insert_pad_for_nms_with_mask.h" -#include "pre_activate/ascend/format_type/insert_cast_for_runop.h" #include "pre_activate/ascend/format_type/insert_transdata_for_runop.h" #include "pre_activate/ascend/enhancer/getnext_memcpy_elimination.h" #include "pre_activate/ascend/ir_fission/addn_fission.h" #include "pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.h" #include "pre_activate/ascend/ir_fission/batch_norm_grad_infer_fission.h" +#include "pre_activate/ascend/ir_fission/split_fission.h" +#include "pre_activate/ascend/format_type/modify_ops_attrs.h" +#include "pre_activate/ascend/format_type/remove_no_use_reshape_op.h" #include "utils/context/ms_context.h" #include "utils/config_manager.h" #include "debug/anf_ir_dump.h" @@ -96,10 +105,13 @@ namespace { void AddAscendBackendOptionalIRFusion(PassManager *ir_fusion_pm) { MS_EXCEPTION_IF_NULL(ir_fusion_pm); ir_fusion_pm->AddPass(std::make_shared()); + ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); + ir_fusion_pm->AddPass(std::make_shared()); + ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); @@ -136,6 +148,8 @@ void AddAscendBackendOptionalIRFusion(PassManager *ir_fusion_pm) { ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); + ir_fusion_pm->AddPass(std::make_shared()); + ir_fusion_pm->AddPass(std::make_shared()); } } // namespace @@ -143,7 +157,7 @@ void RunOpAscendDataLayout(const std::shared_ptr &kernel_g MS_EXCEPTION_IF_NULL(kernel_graph); auto optimizer = std::make_shared(); auto data_layout_pm = std::make_shared("pynative_transop_pm"); - data_layout_pm->AddPass(std::make_shared()); + data_layout_pm->AddPass(std::make_shared()); data_layout_pm->AddPass(std::make_shared()); data_layout_pm->AddPass(std::make_shared()); data_layout_pm->AddPass(std::make_shared()); @@ -156,22 +170,15 @@ void RunOpAscendDataLayout(const std::shared_ptr &kernel_g kernel_graph->SetExecOrderByDefault(); } -void RunOpAscendMixPrecision(const std::shared_ptr &kernel_graph) { +void AscendGraphKernelCommonProcess(const std::shared_ptr &kernel_graph) { MS_EXCEPTION_IF_NULL(kernel_graph); auto optimizer = std::make_shared(); - auto mixed_precision_pm = std::make_shared("pynative_transop_pm"); - mixed_precision_pm->AddPass(std::make_shared()); - mixed_precision_pm->AddPass(std::make_shared()); - mixed_precision_pm->AddPass(std::make_shared()); - mixed_precision_pm->AddPass(std::make_shared()); - mixed_precision_pm->AddPass(std::make_shared()); - mixed_precision_pm->AddPass(std::make_shared()); - mixed_precision_pm->AddPass(std::make_shared()); - mixed_precision_pm->AddPass(std::make_shared()); - mixed_precision_pm->AddPass(std::make_shared()); - mixed_precision_pm->AddPass(std::make_shared()); - mixed_precision_pm->AddPass(std::make_shared()); - optimizer->AddPassManager(mixed_precision_pm); + MS_EXCEPTION_IF_NULL(optimizer); + auto common_process = std::make_shared("graph_kernel_common_process"); + MS_EXCEPTION_IF_NULL(common_process); + common_process->AddPass(std::make_shared()); + common_process->AddPass(std::make_shared()); + optimizer->AddPassManager(common_process); (void)optimizer->Optimize(kernel_graph); kernel_graph->SetExecOrderByDefault(); } @@ -180,7 +187,7 @@ void AscendDataLayout(const std::shared_ptr &kernel_graph) MS_EXCEPTION_IF_NULL(kernel_graph); auto optimizer = std::make_shared(); auto data_layout_pm = std::make_shared("transop_pm"); - data_layout_pm->AddPass(std::make_shared()); + data_layout_pm->AddPass(std::make_shared()); data_layout_pm->AddPass(std::make_shared()); data_layout_pm->AddPass(std::make_shared()); data_layout_pm->AddPass(std::make_shared()); @@ -236,10 +243,11 @@ void AscendBackendIRFusionOptimization(const std::shared_ptrAddPass(std::make_shared()); } else { ir_fusion_pm->AddPass(std::make_shared()); + ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); - ir_fusion_pm->AddPass(std::make_shared()); + ir_fusion_pm->AddPass(std::make_shared()); + ir_fusion_pm->AddPass(std::make_shared()); } - ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); if (context_ptr->ir_fusion_flag()) { AddAscendBackendOptionalIRFusion(ir_fusion_pm.get()); @@ -250,6 +258,7 @@ void AscendBackendIRFusionOptimization(const std::shared_ptrAddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); } + ir_fusion_pm->AddPass(std::make_shared()); optimizer->AddPassManager(ir_fusion_pm); (void)optimizer->Optimize(kernel_graph); kernel_graph->SetExecOrderByDefault(); @@ -279,6 +288,7 @@ void RunOpAscendBackendIRFusionOptimization(const std::shared_ptr(); auto ir_fusion_pm = std::make_shared("ir_fusion_pm"); ir_fusion_pm->AddPass(std::make_shared()); + ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); @@ -318,22 +328,117 @@ void AscendBackendOptimization(const std::shared_ptr &kern other_pm->AddPass(std::make_shared()); other_pm->AddPass(std::make_shared()); other_pm->AddPass(std::make_shared()); - other_pm->AddPass(std::make_shared()); - other_pm->AddPass(std::make_shared()); - if (context_ptr->enable_task_sink() && context_ptr->loop_sink_flag() && ConfigManager::GetInstance().iter_num() > 1) { - other_pm->AddPass(std::make_shared()); - } - other_pm->AddPass(std::make_shared()); optimizer->AddPassManager(other_pm); (void)optimizer->Optimize(kernel_graph); kernel_graph->SetExecOrderByDefault(); // buffer fusion AscendBackendUBFusionOptimization(kernel_graph); + + // other2 optimization + auto optimizer2 = std::make_shared(); + auto other2_pm = std::make_shared("other2_pm"); + other2_pm->AddPass(std::make_shared()); + other2_pm->AddPass(std::make_shared()); + if (context_ptr->enable_task_sink() && context_ptr->loop_sink_flag() && ConfigManager::GetInstance().iter_num() > 1) { + other2_pm->AddPass(std::make_shared()); + } + other2_pm->AddPass(std::make_shared()); + optimizer2->AddPassManager(other2_pm); + (void)optimizer2->Optimize(kernel_graph); + kernel_graph->SetExecOrderByDefault(); + + if (save_graphs) { + std::string file_path = + save_graphs_path + "/" + "hwopt_d_end" + "_graph_" + std::to_string(kernel_graph->graph_id()) + ".ir"; + DumpIR(file_path, kernel_graph, true); + DumpIRProto(kernel_graph, "after_hwopt"); + kernel_graph->DumpFuncGraph("hwopt_d_end"); + } +} + +void AscendBackendGraphKernelOpt(const std::shared_ptr &kernel_graph, + bool is_before_kernel_select) { + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + if (!(context_ptr->enable_graph_kernel())) { + return; + } + bool save_graphs = context_ptr->save_graphs_flag(); + auto save_graphs_path = context_ptr->save_graphs_path(); + if (save_graphs_path.empty()) { + save_graphs_path = "."; + } + if (save_graphs) { + std::string file_path = save_graphs_path + "/" + "hwopt_d_graph_kernel_opt_before_graph_" + + std::to_string(!is_before_kernel_select) + "_" + std::to_string(kernel_graph->graph_id()) + + ".ir"; + DumpIR(file_path, kernel_graph); + } + + // Fuse graph kernels with basic ops + FuseGraphKernel(kernel_graph, is_before_kernel_select); + + if (save_graphs) { + std::string file_path = save_graphs_path + "/" + "hwopt_d_graph_kernel_opt_end_graph_" + + std::to_string(!is_before_kernel_select) + "_" + std::to_string(kernel_graph->graph_id()) + + ".ir"; + DumpIR(file_path, kernel_graph, true); + } +} + +void AscendBackendFuseBasicOpt(const std::shared_ptr &kernel_graph, + bool is_before_kernel_select) { + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + if (!(context_ptr->enable_graph_kernel())) { + return; + } + bool save_graphs = context_ptr->save_graphs_flag(); + auto save_graphs_path = context_ptr->save_graphs_path(); + if (save_graphs_path.empty()) { + save_graphs_path = "."; + } + if (save_graphs) { + std::string file_path = save_graphs_path + "/" + "hwopt_d_fuse_basic_opt_before_graph_" + + std::to_string(!is_before_kernel_select) + "_" + std::to_string(kernel_graph->graph_id()) + + ".ir"; + DumpIR(file_path, kernel_graph, true); + } + + // Fuse basic ops with basic ops + FuseBasic(kernel_graph, is_before_kernel_select); + + if (save_graphs) { + std::string file_path = save_graphs_path + "/" + "hwopt_d_fuse_basic_opt_end_graph_" + + std::to_string(!is_before_kernel_select) + "_" + std::to_string(kernel_graph->graph_id()) + + ".ir"; + DumpIR(file_path, kernel_graph, true); + } +} + +void AscendBackendAddAtomicClean(const std::shared_ptr &kernel_graph) { + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + if (!(context_ptr->enable_graph_kernel())) { + return; + } + bool save_graphs = context_ptr->save_graphs_flag(); + auto save_graphs_path = context_ptr->save_graphs_path(); + if (save_graphs_path.empty()) { + save_graphs_path = "."; + } + if (save_graphs) { + std::string file_path = save_graphs_path + "/" + "hwopt_d_add_atomic_clean_before" + "_graph_" + + std::to_string(kernel_graph->graph_id()) + ".ir"; + DumpIR(file_path, kernel_graph); + } + + AddAtomicClean(kernel_graph); + if (save_graphs) { std::string file_path = save_graphs_path + "/" + "hwopt_d_end" + "_graph_" + std::to_string(kernel_graph->graph_id()) + ".ir"; DumpIR(file_path, kernel_graph, true); - DumpIRProto(kernel_graph, "after_hwopt_" + std::to_string(kernel_graph->graph_id())); } } @@ -350,7 +455,8 @@ void AscendBackendUBFusionOptimization(const std::shared_ptrgraph_id()) + ".ir"; DumpIR(file_path, kernel_graph); } auto fusion_id_allocator = std::make_shared(); @@ -368,6 +474,7 @@ void AscendBackendUBFusionOptimization(const std::shared_ptrAddPass(std::make_shared(fusion_id_allocator)); ub_fusion_pm->AddPass(std::make_shared(fusion_id_allocator)); ub_fusion_pm->AddPass(std::make_shared(fusion_id_allocator)); + ub_fusion_pm->AddPass(std::make_shared(fusion_id_allocator)); ub_fusion_pm->AddPass(std::make_shared(fusion_id_allocator)); ub_fusion_pm->AddPass(std::make_shared(fusion_id_allocator)); ub_fusion_pm->AddPass(std::make_shared()); @@ -375,7 +482,8 @@ void AscendBackendUBFusionOptimization(const std::shared_ptrOptimize(kernel_graph); kernel_graph->SetExecOrderByDefault(); if (save_graphs) { - std::string file_path = save_graphs_path + "/" + "hwopt_d_ub_fusion_after.ir"; + std::string file_path = + save_graphs_path + "/hwopt_d_ub_fusion_after_graph_" + std::to_string(kernel_graph->graph_id()) + ".ir"; DumpIR(file_path, kernel_graph); } } diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.h b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.h index 914b4c053a..222c4b90b5 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.h +++ b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.h @@ -20,11 +20,16 @@ namespace mindspore { namespace opt { void RunOpAscendDataLayout(const std::shared_ptr &kernel_graph); -void RunOpAscendMixPrecision(const std::shared_ptr &kernel_graph); void RunOpAscendBackendIRFusionOptimization(const std::shared_ptr &kernel_graph); void AscendDataLayout(const std::shared_ptr &kernel_graph); void AscendMixPrecision(const std::shared_ptr &kernel_graph); void AscendBackendOptimization(const std::shared_ptr &kernel_graph); +void AscendGraphKernelCommonProcess(const std::shared_ptr &kernel_graph); +void AscendBackendGraphKernelOpt(const std::shared_ptr &kernel_graph, + bool is_before_kernel_select = false); +void AscendBackendFuseBasicOpt(const std::shared_ptr &kernel_graph, + bool is_before_kernel_select = false); +void AscendBackendAddAtomicClean(const std::shared_ptr &kernel_graph); void AscendBackendIRFusionOptimization(const std::shared_ptr &kernel_graph); void AscendBackendUBFusionOptimization(const std::shared_ptr &kernel_graph); } // namespace opt diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_helper.cc b/mindspore/ccsrc/pre_activate/ascend/ascend_helper.cc index b573cb33bb..9c498bd736 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ascend_helper.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ascend_helper.cc @@ -22,6 +22,7 @@ #include "utils/utils.h" #include "device/kernel_info.h" #include "kernel/oplib/oplib.h" +#include "kernel/common_utils.h" #include "operator/ops.h" #include "session/anf_runtime_algorithm.h" #include "session/kernel_graph.h" @@ -31,6 +32,7 @@ namespace mindspore { namespace opt { using KernelBuildInfoBuilder = kernel::KernelBuildInfo::KernelBuildInfoBuilder; namespace { +const std::set kCommonFormatSet = {kOpFormat_DEFAULT, kOpFormat_ND, kOpFormat_NCHW}; AnfNodePtr CreateReshapeNode(const FuncGraphPtr &func_graph, const AnfNodePtr &input_node, const KernelSelectPtr &kernel_select, const std::vector &dst_shape) { std::vector trans_inputs; @@ -53,7 +55,6 @@ AnfNodePtr AddTransOpNodeToGraph(const FuncGraphPtr &func_graph, const AnfNodePt CNodePtr trans_data = nullptr; std::string input_format = is_insert_input ? kOpFormat_DEFAULT : AnfAlgo::GetOutputFormat(node, 0); std::string dst_format = is_insert_input ? AnfAlgo::GetInputFormat(node, 0) : kOpFormat_DEFAULT; - TypeId dtype = AnfAlgo::GetOutputDeviceDataType(node, 0); std::vector padding_axis = AnfAlgo::GetOutputReshapeType(node, 0); MS_EXCEPTION_IF_NULL(node); // if insert transdata for input we need to change the input @@ -62,10 +63,9 @@ AnfNodePtr AddTransOpNodeToGraph(const FuncGraphPtr &func_graph, const AnfNodePt MS_LOG(EXCEPTION) << "cannot insert a transdata node to a node's input which the node is not a cnode"; } auto cnode = node->cast(); - dtype = AnfAlgo::GetInputDeviceDataType(cnode, insert_index); dst_format = AnfAlgo::GetInputFormat(cnode, insert_index); input_node = AnfAlgo::GetInputNode(cnode, insert_index); - padding_axis = AnfAlgo::GetInputReshapeType(node, 0); + padding_axis = AnfAlgo::GetInputReshapeType(node, insert_index); } bool need_padding = false; if (is_insert_input) { @@ -94,7 +94,7 @@ AnfNodePtr AddTransOpNodeToGraph(const FuncGraphPtr &func_graph, const AnfNodePt trans_node = reshape_node; } // refresh the transdata's format to ori format & dst format - RefreshKernelBuildInfo(input_format, dst_format, dtype, trans_data, padding_axis); + RefreshKernelBuildInfo(input_format, dst_format, trans_data, padding_axis); return trans_node; } @@ -110,13 +110,9 @@ AnfNodePtr GetTransInputNodePtr(const FuncGraphPtr &func_graph, const CNodePtr & MS_EXCEPTION_IF_NULL(input_node); AnfAlgo::SetNodeInput(node, input_node, index); } - if (AnfAlgo::GetInputFormat(node, index) == kOpFormat_NC1KHKWHWC0) { - MS_LOG(EXCEPTION) << "got the format " << AnfAlgo::GetInputFormat(node, index) - << "when inserting the transdata node " << node->DebugString(); - } std::vector origin_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, index); std::string dest_format = AnfAlgo::GetInputFormat(node, index); - if (kNeedTransFormatSet.find(dest_format) != kNeedTransFormatSet.end() && origin_shape.size() > 1) { + if (kCommonFormatSet.find(dest_format) == kCommonFormatSet.end() && origin_shape.size() > 1) { MS_LOG(DEBUG) << node->DebugString() << "Insert transdata " << AnfAlgo::GetInputFormat(node, index) << " To DefaultFormat , index: " << index; return AddTransOpNodeToGraph(func_graph, node, kernel_select, index, true); @@ -133,7 +129,7 @@ AnfNodePtr InsertTransOpForSingleOutput(const FuncGraphPtr &func_graph, const An MS_LOG(EXCEPTION) << "got the hw format " << output_format << "when insert the transdata node " << node->DebugString(); } - if (kNeedTransFormatSet.find(output_format) != kNeedTransFormatSet.end() && origin_shape.size() > 1) { + if (kCommonFormatSet.find(output_format) == kCommonFormatSet.end() && origin_shape.size() > 1) { MS_LOG(DEBUG) << "Inserted Transdata " << output_format << " To default , index :0"; return AddTransOpNodeToGraph(func_graph, node, kernel_select, 0, false); } @@ -154,7 +150,7 @@ AnfNodePtr InsertTransOpForMultipleOutput(const FuncGraphPtr &func_graph, const } auto tuple_getitem = CreatTupleGetItemNode(func_graph, node, output_idx); std::vector origin_shape = AnfAlgo::GetOutputInferShape(node, output_idx); - if (kNeedTransFormatSet.find(output_format) != kNeedTransFormatSet.end() && origin_shape.size() > 1) { + if (kCommonFormatSet.find(output_format) == kCommonFormatSet.end() && origin_shape.size() > 1) { make_tuple_inputs.emplace_back(AddTransOpNodeToGraph(func_graph, tuple_getitem, kernel_select, 0, false)); } else { // No need insert trans op. @@ -165,22 +161,17 @@ AnfNodePtr InsertTransOpForMultipleOutput(const FuncGraphPtr &func_graph, const return make_tuple; } } // namespace -void RefreshKernelBuildInfo(const std::string &input_format, const std::string &output_format, const TypeId device_type, +void RefreshKernelBuildInfo(const std::string &input_format, const std::string &output_format, const AnfNodePtr &trans_data, const std::vector &reshape_type) { MS_EXCEPTION_IF_NULL(trans_data); - MS_EXCEPTION_IF_NULL(trans_data->kernel_info()); - auto ori_build_info = trans_data->kernel_info()->select_kernel_build_info(); - KernelBuildInfoBuilder builder; - builder.SetInputsFormat({input_format}); - builder.SetInputReshapeType({reshape_type}); - builder.SetInputReshapeType({reshape_type}); - builder.SetOutputsFormat({output_format}); - builder.SetInputsDeviceType({device_type}); - builder.SetOutputsDeviceType({device_type}); - builder.SetKernelType(ori_build_info->kernel_type()); - builder.SetFusionType(ori_build_info->fusion_type()); - builder.SetProcessor(ori_build_info->processor()); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), trans_data.get()); + auto ori_build_info = AnfAlgo::GetSelectKernelBuildInfo(trans_data); + MS_EXCEPTION_IF_NULL(ori_build_info); + auto builder = std::make_shared(ori_build_info); + builder->SetInputsFormat({input_format}); + builder->SetInputReshapeType({reshape_type}); + builder->SetOutputReshapeType({reshape_type}); + builder->SetOutputsFormat({output_format}); + AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), trans_data.get()); } CNodePtr NewTransOpNode(const FuncGraphPtr &func_graph, const AnfNodePtr &input, const KernelSelectPtr &kernel_select, @@ -239,7 +230,7 @@ AnfNodePtr AddCastOpNodeToGraph(const FuncGraphPtr &func_graph, const AnfNodePtr if (kernel::OpLib::FindOp(prim::kPrimCast->name(), kernel::kTBE) != nullptr) { builder.SetKernelType(KernelType::TBE_KERNEL); } else { - builder.SetKernelType(KernelType::AUTO_DIFF_KERNEL); + builder.SetKernelType(KernelType::AKG_KERNEL); } // if kernel info is null , it remarks this function is running ut if (cast->kernel_info() == nullptr) { @@ -294,19 +285,17 @@ CNodePtr InsertCastForInput(const FuncGraphPtr &func_graph, const CNodePtr &cnod MS_EXCEPTION_IF_NULL(cnode); std::vector new_inputs = {AnfAlgo::GetCNodePrimitiveNode(cnode)}; for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(cnode); ++input_index) { - TypeId origin_type; + const auto infer_type = AnfAlgo::GetPrevNodeOutputInferDataType(cnode, input_index); + TypeId origin_type(kTypeUnknown); auto cur_input = AnfAlgo::GetInputNode(cnode, input_index); auto kernel_with_index = AnfAlgo::VisitKernel(cur_input, 0); - auto is_weight_boundary = [](const AnfNodePtr &node) -> bool { - if (node->isa() || node->isa()) { - return true; - } - return false; - }; auto real_input_node = kernel_with_index.first; - if (is_weight_boundary(real_input_node)) { + if (kernel::IsWeightBoundary(real_input_node) || func_graph->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) { // weight - origin_type = AnfAlgo::GetPrevNodeOutputDeviceDataType(cnode, input_index); + origin_type = AnfAlgo::GetPrevNodeOutputPrecision(cnode, input_index); + if (origin_type == kTypeUnknown) { + origin_type = AnfAlgo::GetPrevNodeOutputDeviceDataType(cnode, input_index); + } } else { // feature map origin_type = AnfAlgo::GetPrevNodeOutputInferDataType(cnode, input_index); @@ -314,9 +303,13 @@ CNodePtr InsertCastForInput(const FuncGraphPtr &func_graph, const CNodePtr &cnod const std::string dev_fmt = AnfAlgo::GetInputFormat(cnode, input_index); const std::vector origin_shape = AnfAlgo::GetPrevNodeOutputInferShape(cnode, input_index); const TypeId device_type = AnfAlgo::GetInputDeviceDataType(cnode, input_index); - if (origin_type != device_type) { + // In graph kernel, we check parameter, + // the eliminate pass will not eliminate this case, so we just do not insert the noused cast. + if (func_graph->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL) && IsValueNode(cur_input)) { + new_inputs.push_back(cur_input); + } else if (origin_type != device_type) { auto cast = - AddCastOpNodeToGraph(func_graph, cur_input, dev_fmt, origin_type, device_type, origin_shape, origin_type); + AddCastOpNodeToGraph(func_graph, cur_input, dev_fmt, origin_type, device_type, origin_shape, infer_type); MS_EXCEPTION_IF_NULL(cast); cast->set_scope(cnode->scope()); AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), cast); diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_helper.h b/mindspore/ccsrc/pre_activate/ascend/ascend_helper.h index 66e3f2ad33..ad48ca5291 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ascend_helper.h +++ b/mindspore/ccsrc/pre_activate/ascend/ascend_helper.h @@ -21,7 +21,8 @@ #include #include "device/ascend/kernel_select_ascend.h" #include "kernel/kernel_query.h" -#include "kernel/tbe/tbe_kernel_select.h" +#include "kernel/oplib/oplib.h" +#include "session/anf_runtime_algorithm.h" namespace mindspore { namespace opt { @@ -37,11 +38,11 @@ class SupportedChecker { public: SupportedChecker() = default; virtual ~SupportedChecker() = default; - virtual bool CheckAiCoreSupported(const AnfNodePtr &anf_node, + virtual bool CheckAICoreSupported(const AnfNodePtr &anf_node, const kernel::KernelBuildInfoPtr &select_kernel_build_info) { return kernel::IsSupportedByAICore(anf_node, select_kernel_build_info); } - virtual bool CheckAiCpuSupported(const AnfNodePtr &anf_node, + virtual bool CheckAICPUSupported(const AnfNodePtr &anf_node, const kernel::KernelBuildInfoPtr &select_kernel_build_info) { return kernel::IsSupportedByAICPU(anf_node, select_kernel_build_info); } @@ -56,9 +57,20 @@ class KernelQuery { std::vector> *kernel_info_list) { kernel::KernelQuery(kernel_node, kernel_info_list); } + virtual bool IsTbeRef(const AnfNodePtr &node) { + MS_EXCEPTION_IF_NULL(node); + if (!node->isa()) { + return false; + } + auto op_info = mindspore::kernel::OpLib::FindOp(AnfAlgo::GetCNodeName(node), kernel::kTBE); + if (op_info != nullptr) { + return op_info->is_ref(); + } + return false; + } }; using KernelQueryPtr = std::shared_ptr; -void RefreshKernelBuildInfo(const std::string &input_format, const std::string &output_format, const TypeId device_type, +void RefreshKernelBuildInfo(const std::string &input_format, const std::string &output_format, const AnfNodePtr &trans_data, const std::vector &reshape_type = {}); CNodePtr NewTransOpNode(const FuncGraphPtr &func_graph, const AnfNodePtr &input, const KernelSelectPtr &kernel_select, diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.cc index 8c4b1dcc63..94318d63ca 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.cc +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.cc @@ -34,16 +34,22 @@ void BnupdateEltwiseEltwiseFusionPass::MatchBnupdateAddRelu(const CNodePtr &cnod MS_EXCEPTION_IF_NULL(candidate_fusion); auto manager = kernel_graph.manager(); MS_EXCEPTION_IF_NULL(manager); + MS_EXCEPTION_IF_NULL(relu_input); auto add = relu_input->cast(); MS_EXCEPTION_IF_NULL(add); auto tuple_getitem = add->input(1); + MS_EXCEPTION_IF_NULL(tuple_getitem); if (tuple_getitem->isa() && AnfAlgo::GetCNodeName(tuple_getitem) == prim::kPrimTupleGetItem->name()) { auto getitem = tuple_getitem->cast(); + MS_EXCEPTION_IF_NULL(getitem); auto bnupdate = getitem->input(1); + MS_EXCEPTION_IF_NULL(bnupdate); if (bnupdate->isa() && AnfAlgo::GetCNodeName(bnupdate) == kBNTrainingUpdateOpName) { std::vector output_used_num(AnfAlgo::GetOutputTensorNum(bnupdate), 0); for (auto out_getitem : manager->node_users()[bnupdate]) { + MS_EXCEPTION_IF_NULL(out_getitem.first); auto out_getitem_ptr = out_getitem.first->cast(); + MS_EXCEPTION_IF_NULL(out_getitem_ptr); auto input2 = out_getitem_ptr->input(2); auto output_idx = GetValue(GetValueNode(input2)); output_used_num[output_idx] = SizeToInt(manager->node_users()[out_getitem.first].size()); @@ -70,10 +76,8 @@ void BnupdateEltwiseEltwiseFusionPass::MatchSingleFusionPattern(const session::K if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE) { auto eltwise_input = cnode->input(1); - if (AnfAlgo::GetCNodeName(cnode) == kReluV2OpName || AnfAlgo::CheckPrimitiveType(cnode, prim::kPrimRelu)) { - if (eltwise_input->isa() && AnfAlgo::CheckPrimitiveType(eltwise_input, prim::kPrimTensorAdd)) { - MatchBnupdateAddRelu(cnode, eltwise_input, kernel_graph, candidate_fusion); - } + if (eltwise_input->isa() && AnfAlgo::CheckPrimitiveType(eltwise_input, prim::kPrimTensorAdd)) { + MatchBnupdateAddRelu(cnode, eltwise_input, kernel_graph, candidate_fusion); } } } diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.cc index 348504345a..1f7fef9e62 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.cc +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.cc @@ -34,12 +34,17 @@ void BnupdateEltwiseFusionPass::MatchBnupdateRelu(const CNodePtr &cnode, const A MS_EXCEPTION_IF_NULL(candidate_fusion); auto manager = kernel_graph.manager(); MS_EXCEPTION_IF_NULL(manager); + MS_EXCEPTION_IF_NULL(relu_input); auto getitem = relu_input->cast(); + MS_EXCEPTION_IF_NULL(getitem); auto bnupdate = getitem->input(1); + MS_EXCEPTION_IF_NULL(bnupdate); if (bnupdate->isa() && AnfAlgo::GetCNodeName(bnupdate) == kBNTrainingUpdateOpName) { std::vector output_used_num(AnfAlgo::GetOutputTensorNum(bnupdate), 0); for (auto out_getitem : manager->node_users()[bnupdate]) { + MS_EXCEPTION_IF_NULL(out_getitem.first); auto out_getitem_ptr = out_getitem.first->cast(); + MS_EXCEPTION_IF_NULL(out_getitem_ptr); auto input2 = out_getitem_ptr->input(2); auto output_idx = GetValue(GetValueNode(input2)); output_used_num[output_idx] = SizeToInt(manager->node_users()[out_getitem.first].size()); @@ -65,10 +70,8 @@ void BnupdateEltwiseFusionPass::MatchSingleFusionPattern(const session::KernelGr if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE) { auto eltwise_input = cnode->input(1); - if (AnfAlgo::GetCNodeName(cnode) == kReluV2OpName || AnfAlgo::CheckPrimitiveType(cnode, prim::kPrimRelu)) { - if (eltwise_input->isa() && AnfAlgo::CheckPrimitiveType(eltwise_input, prim::kPrimTupleGetItem)) { - MatchBnupdateRelu(cnode, eltwise_input, kernel_graph, candidate_fusion); - } + if (eltwise_input->isa() && AnfAlgo::CheckPrimitiveType(eltwise_input, prim::kPrimTupleGetItem)) { + MatchBnupdateRelu(cnode, eltwise_input, kernel_graph, candidate_fusion); } } } diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.cc index c90d2a17cd..6091eb572d 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.cc +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.cc @@ -35,6 +35,7 @@ void Conv2DBackpropEltwiseEltwiseFusionPass::MatchConv2DBackpropInputEltwiseEltw MS_EXCEPTION_IF_NULL(manager); std::unordered_set record{cnode}; auto eltwise_input = cnode->input(1); + MS_EXCEPTION_IF_NULL(eltwise_input); if (CheckDoubleInEltWiseNode(manager.get(), eltwise_input)) { (void)record.insert(eltwise_input); } else { @@ -43,6 +44,7 @@ void Conv2DBackpropEltwiseEltwiseFusionPass::MatchConv2DBackpropInputEltwiseEltw auto input_cnode = eltwise_input->cast(); MS_EXCEPTION_IF_NULL(input_cnode); auto double_in_eltwise_input = input_cnode->input(1); + MS_EXCEPTION_IF_NULL(double_in_eltwise_input); if (!double_in_eltwise_input->isa() || !AnfAlgo::IsRealCNodeKernel(double_in_eltwise_input) || fusion_id_allocator->HasFusionIdAttr(double_in_eltwise_input)) { return; diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.cc index a18d578f7f..963f1885fe 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.cc +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.cc @@ -36,6 +36,7 @@ void Conv2DBackpropEltwiseFusionPass::MatchConv2DBackpropInputEltwise(const CNod MS_EXCEPTION_IF_NULL(manager); std::unordered_set record{cnode}; auto eltwise_input = cnode->input(1); + MS_EXCEPTION_IF_NULL(eltwise_input); if (!eltwise_input->isa() || !AnfAlgo::IsRealCNodeKernel(eltwise_input) || fusion_id_allocator->HasFusionIdAttr(eltwise_input)) { return; diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.cc index 2b243dbdac..63e7dcf6b8 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.cc +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.cc @@ -35,6 +35,7 @@ void ConvBnReduceFusionPass::MatchConvBnreduce(const CNodePtr &cnode, const sess auto manager = kernel_graph.manager(); MS_EXCEPTION_IF_NULL(manager); auto conv = cnode->input(1); + MS_EXCEPTION_IF_NULL(conv); if (conv->isa() && AnfAlgo::GetCNodeName(conv) == prim::kPrimConv2D->name()) { std::vector output_used_num{SizeToInt(manager->node_users()[conv].size())}; AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), conv); diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.cc index c4bfb96109..a126143811 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.cc +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.cc @@ -35,6 +35,7 @@ void ConvDoubleInFusionPass::MatchConvDoubleInEltwise(const CNodePtr &cnode, con MS_EXCEPTION_IF_NULL(manager); std::unordered_set record{cnode}; auto eltwise_input = cnode->input(1); + MS_EXCEPTION_IF_NULL(eltwise_input); if (CheckDoubleInEltWiseNode(manager.get(), eltwise_input)) { (void)record.insert(eltwise_input); } else { @@ -43,6 +44,7 @@ void ConvDoubleInFusionPass::MatchConvDoubleInEltwise(const CNodePtr &cnode, con auto input_cnode = eltwise_input->cast(); MS_EXCEPTION_IF_NULL(input_cnode); auto double_in_eltwise_input = input_cnode->input(1); + MS_EXCEPTION_IF_NULL(double_in_eltwise_input); if (!double_in_eltwise_input->isa() || !AnfAlgo::IsRealCNodeKernel(double_in_eltwise_input) || fusion_id_allocator->HasFusionIdAttr(double_in_eltwise_input)) { return; diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.cc index c07c30f11c..d83b32a888 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.cc +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.cc @@ -44,6 +44,7 @@ void ConvSingleInFusionPass::MatchConvSingleInEltwise(const CNodePtr &cnode, con break; } } + MS_EXCEPTION_IF_NULL(eltwise_input); if (!eltwise_input->isa() || !AnfAlgo::IsRealCNodeKernel(eltwise_input) || fusion_id_allocator->HasFusionIdAttr(eltwise_input)) { return; diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.cc index f485e901d8..98a6838bed 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.cc +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.cc @@ -74,11 +74,8 @@ void DepthwiseConvEltwiseFusionPass::MatchSingleFusionPattern(const session::Ker if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE) { auto eltwise_input = cnode->input(1); - if (AnfAlgo::GetCNodeName(cnode) == kReluV2OpName || AnfAlgo::CheckPrimitiveType(cnode, prim::kPrimRelu)) { - if (eltwise_input->isa() && - AnfAlgo::CheckPrimitiveType(eltwise_input, prim::kPrimDepthwiseConv2dNative)) { - MatchDepthwiseConvRelu(cnode, kernel_graph, candidate_fusion, true); - } + if (eltwise_input->isa() && AnfAlgo::CheckPrimitiveType(eltwise_input, prim::kPrimDepthwiseConv2dNative)) { + MatchDepthwiseConvRelu(cnode, kernel_graph, candidate_fusion, true); } } else if (AnfAlgo::GetCNodeName(cnode) == prim::kPrimDepthwiseConv2dNative->name()) { MatchDepthwiseConvRelu(cnode, kernel_graph, candidate_fusion, false); diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.cc index 42860de700..2f04e16692 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.cc +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.cc @@ -35,6 +35,7 @@ void EltwiseFusionPass::MatchEltwise(const CNodePtr &cnode, const session::Kerne MS_EXCEPTION_IF_NULL(manager); std::unordered_set record{cnode}; auto eltwise_input = cnode->input(1); + MS_EXCEPTION_IF_NULL(eltwise_input); while (CheckEltWiseNode(manager.get(), eltwise_input)) { (void)record.insert(eltwise_input); if (record.size() == MAX_ELTWISE_SIZE) { @@ -55,7 +56,9 @@ void EltwiseFusionPass::MatchSingleFusionPattern(const session::KernelGraph &ker FusedNodeRecord *candidate_fusion) { MS_EXCEPTION_IF_NULL(candidate_fusion); std::vector node_list = TopoSort(kernel_graph.get_return()); + std::reverse(node_list.begin(), node_list.end()); for (auto &node : node_list) { + MS_EXCEPTION_IF_NULL(node); if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { continue; diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.cc index 3f5dd98112..a516f04442 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.cc +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.cc @@ -25,6 +25,7 @@ namespace mindspore { namespace opt { bool FusionBasePass::CheckEltWiseNode(FuncGraphManager *manager, const AnfNodePtr &node) { MS_EXCEPTION_IF_NULL(manager); + MS_EXCEPTION_IF_NULL(node); if (!node->isa() || !AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node)) { return false; } @@ -38,6 +39,7 @@ bool FusionBasePass::CheckEltWiseNode(FuncGraphManager *manager, const AnfNodePt bool FusionBasePass::CheckDoubleInEltWiseNode(FuncGraphManager *manager, const AnfNodePtr &node) { MS_EXCEPTION_IF_NULL(manager); + MS_EXCEPTION_IF_NULL(node); if (!node->isa() || !AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node)) { return false; } @@ -49,6 +51,20 @@ bool FusionBasePass::CheckDoubleInEltWiseNode(FuncGraphManager *manager, const A cnode->inputs().size() == ELTWISE_DOUBLE_IN_INPUT_SIZE; } +bool FusionBasePass::CheckMultiOutputEltWiseNode(FuncGraphManager *manager, const AnfNodePtr &node) { + MS_EXCEPTION_IF_NULL(manager); + MS_EXCEPTION_IF_NULL(node); + if (!node->isa() || !AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node)) { + return false; + } + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + auto user_nodes = manager->node_users()[node]; + return AnfAlgo::GetKernelType(node) == KernelType::TBE_KERNEL && + AnfAlgo::GetFusionType(node) == kernel::FusionType::ELEMWISE && user_nodes.size() == ELTWISE_MULTI_USE && + cnode->inputs().size() == ELTWISE_INPUT_SIZE; +} + void FusionBasePass::SetRecordFusionId(const std::unordered_set &record) { auto id = fusion_id_allocator->AllocateFusionId(); for (auto node : record) { diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.h b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.h index 421efa9716..8d6eca774c 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.h +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.h @@ -33,8 +33,12 @@ const int8_t MAX_ELTWISE_NUM = 3; const int8_t MIN_ELTWISE_SIZE = 2; const int8_t ELTWISE_INPUT_SIZE = 2; const int8_t ELTWISE_DOUBLE_IN_INPUT_SIZE = 3; +const int8_t CONV_DOUBLE_IN_INPUT_SIZE = 3; +const int8_t CONV_QUART_IN_INPUT_SIZE = 5; const int8_t ELTWISE_USE = 1; +const int8_t ELTWISE_MULTI_USE = 2; const int8_t MAX_ELTWISE_SIZE = 6; +const int8_t MULTI_ELTWISE_SIZE = 4; using FusedNodeRecord = std::vector>; struct BufferFusionInfo_t { @@ -58,6 +62,7 @@ class FusionBasePass : public Pass { void SetRecordFusionId(const std::unordered_set &record); bool CheckEltWiseNode(FuncGraphManager *manager, const AnfNodePtr &node); bool CheckDoubleInEltWiseNode(FuncGraphManager *manager, const AnfNodePtr &node); + bool CheckMultiOutputEltWiseNode(FuncGraphManager *manager, const AnfNodePtr &node); FusionIdAllocatorPtr fusion_id_allocator; }; } // namespace opt diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.cc index 41b17eba04..d1ef5dc83b 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.cc +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.cc @@ -55,6 +55,7 @@ void MatmulEltwiseFusionPass::MatchSingleFusionPattern(const session::KernelGrap if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE) { auto eltwise_input = cnode->input(1); + MS_EXCEPTION_IF_NULL(eltwise_input); if (eltwise_input->isa() && AnfAlgo::CheckPrimitiveType(eltwise_input, prim::kPrimMatMul)) { MatchMatmulEltwise(cnode, eltwise_input, kernel_graph, candidate_fusion); } diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.cc new file mode 100644 index 0000000000..be4d2af1cb --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.cc @@ -0,0 +1,84 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.h" +#include +#include +#include +#include +#include "kernel/kernel_fusion.h" +#include "debug/anf_ir_dump.h" +#include "session/anf_runtime_algorithm.h" +#include "operator/ops.h" +#include "utils/context/ms_context.h" +#include "pre_activate/common/fusion_id_allocator.h" + +namespace mindspore { +namespace opt { +void MultiOutputFusionPass::MatchMultiOutputEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(cnode); + MS_EXCEPTION_IF_NULL(candidate_fusion); + auto manager = kernel_graph.manager(); + MS_EXCEPTION_IF_NULL(manager); + std::unordered_set record{cnode}; + auto eltwise_input = cnode->input(1); + MS_EXCEPTION_IF_NULL(eltwise_input); + if (CheckMultiOutputEltWiseNode(manager.get(), eltwise_input)) { + std::vector output_used_num{SizeToInt(manager->node_users()[eltwise_input].size())}; + AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), eltwise_input); + (void)record.insert(eltwise_input); + auto input_cnode = eltwise_input->cast(); + MS_EXCEPTION_IF_NULL(input_cnode); + eltwise_input = input_cnode->input(1); + } else { + return; + } + while (CheckEltWiseNode(manager.get(), eltwise_input)) { + (void)record.insert(eltwise_input); + if (record.size() == MULTI_ELTWISE_SIZE) { + break; + } + auto input_cnode = eltwise_input->cast(); + MS_EXCEPTION_IF_NULL(input_cnode); + eltwise_input = input_cnode->input(1); + } + if (record.size() != MULTI_ELTWISE_SIZE) { + return; + } + candidate_fusion->push_back(record); + SetRecordFusionId(record); +} + +void MultiOutputFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(candidate_fusion); + std::vector node_list = TopoSort(kernel_graph.get_return()); + std::reverse(node_list.begin(), node_list.end()); + for (auto &node : node_list) { + if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || + AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { + continue; + } + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && + AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE && cnode->inputs().size() == ELTWISE_INPUT_SIZE) { + MatchMultiOutputEltwise(cnode, kernel_graph, candidate_fusion); + } + } +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.h b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.h new file mode 100644 index 0000000000..0e2510128a --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.h @@ -0,0 +1,48 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_MULTI_OUTPUT_FUSION_PASS_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_MULTI_OUTPUT_FUSION_PASS_H_ + +#include +#include + +#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" +#include "ir/anf.h" +#include "pre_activate/common/pass.h" +#include "pre_activate/common/fusion_id_allocator.h" +#include "device/kernel_info.h" +#include "kernel/kernel.h" +#include "session/kernel_graph.h" + +namespace mindspore { +namespace opt { +using FusedNodeRecord = std::vector>; + +class MultiOutputFusionPass : public FusionBasePass { + public: + explicit MultiOutputFusionPass(FusionIdAllocatorPtr idAllocator) + : FusionBasePass("MultiOutputFusionPass", idAllocator) {} + ~MultiOutputFusionPass() override = default; + void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; + + private: + void MatchMultiOutputEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion); +}; +} // namespace opt +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_MULTI_OUTPUT_FUSION_PASS_H_ diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.cc index 2293754106..623f0e3426 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.cc +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.cc @@ -45,6 +45,7 @@ void ReduceEltwiseFusionPass::MatchReduceEltwise(const CNodePtr &cnode, const se break; } } + MS_EXCEPTION_IF_NULL(eltwise_input); if (!eltwise_input->isa() || !AnfAlgo::IsRealCNodeKernel(eltwise_input) || fusion_id_allocator->HasFusionIdAttr(eltwise_input)) { return; diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.cc index 1926d64c61..0dcf2362bc 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.cc +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.cc @@ -44,6 +44,7 @@ void SegmentEltwiseFusionPass::MatchSegmentEltwise(const CNodePtr &cnode, const break; } } + MS_EXCEPTION_IF_NULL(eltwise_input); if (!eltwise_input->isa() || !AnfAlgo::IsRealCNodeKernel(eltwise_input) || fusion_id_allocator->HasFusionIdAttr(eltwise_input)) { return; @@ -73,6 +74,7 @@ void SegmentEltwiseFusionPass::MatchSingleFusionPattern(const session::KernelGra FusedNodeRecord *candidate_fusion) { MS_EXCEPTION_IF_NULL(candidate_fusion); std::vector node_list = TopoSort(kernel_graph.get_return()); + std::reverse(node_list.begin(), node_list.end()); for (auto &node : node_list) { if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.cc new file mode 100644 index 0000000000..5bc0fdced7 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.cc @@ -0,0 +1,89 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.h" + +#include +#include +#include +#include +#include "kernel/kernel_fusion.h" +#include "debug/anf_ir_dump.h" +#include "session/anf_runtime_algorithm.h" +#include "operator/ops.h" +#include "utils/context/ms_context.h" +#include "pre_activate/common/fusion_id_allocator.h" + +namespace mindspore { +namespace opt { +void StridedReadConvStridedWriteFusionPass::MatchStridedReadConvStridedWrite(const CNodePtr &cnode, + const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(cnode); + MS_EXCEPTION_IF_NULL(candidate_fusion); + auto manager = kernel_graph.manager(); + MS_EXCEPTION_IF_NULL(manager); + std::unordered_set record{cnode}; + auto write_input = cnode->input(1); + if (CheckEltWiseNode(manager.get(), write_input)) { + (void)record.insert(write_input); + auto input_cnode = write_input->cast(); + MS_EXCEPTION_IF_NULL(input_cnode); + write_input = input_cnode->input(1); + } + MS_EXCEPTION_IF_NULL(write_input); + if (!write_input->isa() || !AnfAlgo::IsRealCNodeKernel(write_input) || + fusion_id_allocator->HasFusionIdAttr(write_input)) { + return; + } + auto conv_cnode = write_input->cast(); + MS_EXCEPTION_IF_NULL(conv_cnode); + if (AnfAlgo::GetKernelType(conv_cnode) == KernelType::TBE_KERNEL && + AnfAlgo::GetFusionType(conv_cnode) == kernel::FusionType::CONVLUTION && + conv_cnode->inputs().size() >= CONV_DOUBLE_IN_INPUT_SIZE && + conv_cnode->inputs().size() <= CONV_QUART_IN_INPUT_SIZE) { + (void)record.insert(write_input); + auto conv_input = conv_cnode->input(1); + MS_EXCEPTION_IF_NULL(conv_input); + if (!conv_input->isa() || !AnfAlgo::IsRealCNodeKernel(conv_input) || + fusion_id_allocator->HasFusionIdAttr(conv_input)) { + return; + } + if (AnfAlgo::GetCNodeName(conv_input) == kStridedReadOpName) { + (void)record.insert(conv_input); + candidate_fusion->push_back(record); + SetRecordFusionId(record); + } + } +} + +void StridedReadConvStridedWriteFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(candidate_fusion); + std::vector node_list = TopoSort(kernel_graph.get_return()); + for (auto &node : node_list) { + if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || + AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { + continue; + } + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + if (AnfAlgo::GetCNodeName(cnode) == kStridedWriteOpName) { + MatchStridedReadConvStridedWrite(cnode, kernel_graph, candidate_fusion); + } + } +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.h b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.h new file mode 100644 index 0000000000..c6c5fe88dc --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.h @@ -0,0 +1,48 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_STRIDEDREAD_CONV_STRIDEDWRITE_FUSION_PASS_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_STRIDEDREAD_CONV_STRIDEDWRITE_FUSION_PASS_H_ + +#include +#include + +#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" +#include "ir/anf.h" +#include "pre_activate/common/pass.h" +#include "pre_activate/common/fusion_id_allocator.h" +#include "device/kernel_info.h" +#include "kernel/kernel.h" +#include "session/kernel_graph.h" + +namespace mindspore { +namespace opt { +using FusedNodeRecord = std::vector>; + +class StridedReadConvStridedWriteFusionPass : public FusionBasePass { + public: + explicit StridedReadConvStridedWriteFusionPass(FusionIdAllocatorPtr idAllocator) + : FusionBasePass("StridedReadConvStridedWriteFusionPass", idAllocator) {} + ~StridedReadConvStridedWriteFusionPass() override = default; + void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; + + private: + void MatchStridedReadConvStridedWrite(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion); +}; +} // namespace opt +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_STRIDEDREAD_CONV_STRIDEDWRITE_FUSION_PASS_H_ diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/ub_pattern_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/ub_pattern_fusion.cc index af20c47996..faa5169c40 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/ub_pattern_fusion.cc +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/ub_pattern_fusion.cc @@ -206,6 +206,7 @@ void ReplaceOldNode(std::unordered_map *buffer_fusi void GetFusionScopeComputeNodeList(session::KernelGraph *kernel_graph, std::unordered_map *buffer_fusion_infos) { MS_EXCEPTION_IF_NULL(buffer_fusion_infos); + MS_EXCEPTION_IF_NULL(kernel_graph); auto nodes = TopoSort(kernel_graph->get_return()); for (auto &node : nodes) { MS_EXCEPTION_IF_NULL(node); @@ -231,6 +232,7 @@ void GetFusionScopeInputNodeList(const session::KernelGraph &kernel_graph, auto fusion_info = buffer_fusion_info.second; for (const auto &node : fusion_info.anf_nodes) { auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); for (size_t idx = 1; idx < cnode->inputs().size(); ++idx) { auto real_input = AnfAlgo::VisitKernel(cnode->input(idx), 0); if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), real_input.first) == @@ -253,6 +255,14 @@ bool TupleGetitemNodeCompare(const AnfNodePtr &node1, const AnfNodePtr &node2) { auto getitem2 = node2->cast(); MS_EXCEPTION_IF_NULL(getitem1); MS_EXCEPTION_IF_NULL(getitem2); + if (getitem1->size() < kTupleGetItemInputSize) { + MS_LOG(EXCEPTION) << "node's input size less than " << kTupleGetItemInputSize << ", getitem1[" + << getitem1->DebugString() << "]"; + } + if (getitem2->size() < kTupleGetItemInputSize) { + MS_LOG(EXCEPTION) << "node's input size less than " << kTupleGetItemInputSize << ", getitem1[" + << getitem2->DebugString() << "]"; + } auto output_idx1 = GetValue(GetValueNode(getitem1->input(2))); auto output_idx2 = GetValue(GetValueNode(getitem2->input(2))); return output_idx1 < output_idx2; @@ -285,6 +295,7 @@ void GetFusionScopeOutputNodeList(session::KernelGraph *kernel_graph, [](const std::pair &use_node) { return use_node.first; }); std::sort(tuple_getitem_nodes.begin(), tuple_getitem_nodes.end(), TupleGetitemNodeCompare); for (auto getitem : tuple_getitem_nodes) { + MS_EXCEPTION_IF_NULL(getitem); auto getitem_ptr = getitem->cast(); auto input2 = getitem_ptr->input(2); auto output_idx = GetValue(GetValueNode(input2)); @@ -313,6 +324,7 @@ void SetFusionOpRefInfos(session::KernelGraph *kernel_graph, const std::vectorisa() && AnfAlgo::GetCNodeName(output) == prim::kPrimTupleGetItem->name()) { auto real_output = AnfAlgo::VisitKernel(output, 0); auto output_cnode = output->cast(); @@ -393,6 +405,7 @@ bool UbPatternFusion::FuseBufferFusionPattern(session::KernelGraph *kernel_graph bool UbPatternFusion::ReplaceFusionOp(std::unordered_map *buffer_fusion_infos, int32_t fusion_id, const kernel::KernelModPtr &kernel_ptr, session::KernelGraph *kernel_graph) const { + MS_EXCEPTION_IF_NULL(buffer_fusion_infos); auto buffer_fusion_info = (*buffer_fusion_infos)[fusion_id]; auto buffer_fusion = CreateFusionOp(buffer_fusion_info.inputs_list, buffer_fusion_info.outputs_list, buffer_fusion_info.anf_nodes, kernel_graph); diff --git a/mindspore/ccsrc/pre_activate/ascend/enhancer/add_memcpy_async.cc b/mindspore/ccsrc/pre_activate/ascend/enhancer/add_memcpy_async.cc deleted file mode 100644 index 51f6732c66..0000000000 --- a/mindspore/ccsrc/pre_activate/ascend/enhancer/add_memcpy_async.cc +++ /dev/null @@ -1,75 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "pre_activate/ascend/enhancer/add_memcpy_async.h" -#include -#include "utils/utils.h" -#include "session/anf_runtime_algorithm.h" -#include "optimizer/opt.h" -#include "pre_activate/ascend/ascend_helper.h" - -namespace mindspore { -namespace opt { -namespace { -bool InputIsParameterOrValueNode(const AnfNodePtr &node) { - MS_EXCEPTION_IF_NULL(node); - auto kernel_with_index = AnfAlgo::VisitKernelWithReturnType(node, 0, true); - return kernel_with_index.first->isa() || kernel_with_index.first->isa(); -} - -const AnfNodePtr AddMemcpyAsyncIfInputIsUsedByOthers(const FuncGraphPtr &graph, const CNodePtr &node) { - MS_EXCEPTION_IF_NULL(graph); - MS_EXCEPTION_IF_NULL(node); - auto manager = graph->manager(); - MS_EXCEPTION_IF_NULL(manager); - const std::vector &inputs = node->inputs(); - bool replace = false; - if (inputs.empty()) { - MS_LOG(EXCEPTION) << "node[" + AnfAlgo::GetCNodeName(node) + "]'s inputs is empty"; - } - std::vector new_inputs = {inputs[0]}; - for (size_t i = 1; i < inputs.size(); ++i) { - auto input = node->input(i); - if (manager->node_users().find(input) == manager->node_users().end()) { - MS_LOG(EXCEPTION) << "node has no output in manager"; - } - // when input is used by others or is a parameter or is a value node, insert a memcpy_async - if (manager->node_users()[input].size() > 1 || InputIsParameterOrValueNode(input)) { - replace = true; - new_inputs.push_back(CreateMemcpyAsyncOp(graph, input)); - } else { - new_inputs.push_back(input); - } - } - - CNodePtr new_node = std::make_shared(*node); - new_node->set_inputs(new_inputs); - return replace ? new_node : nullptr; -} -} // namespace - -const AnfNodePtr AddMemcpyAsync::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, - const EquivPtr &) const { - if (func_graph == nullptr || node == nullptr || !node->isa()) { - return nullptr; - } - auto cnode = node->cast(); - if (!AnfAlgo::IsCommunicationOp(node)) { - return nullptr; - } - return AddMemcpyAsyncIfInputIsUsedByOthers(func_graph, cnode); -} -} // namespace opt -} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op.cc b/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op.cc new file mode 100644 index 0000000000..63ea59d744 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op.cc @@ -0,0 +1,144 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op.h" +#include +#include +#include +#include "utils/utils.h" +#include "session/anf_runtime_algorithm.h" +#include "optimizer/opt.h" +#include "pre_activate/ascend/ascend_helper.h" + +namespace mindspore { +namespace opt { +namespace { +// insert memcpy for some cnode even if not a Ref cnode +const std::set kNeedInsertMemcpyOpSet = {kLambNextMVOpName, kLambNextMVWithDecayOpName, + kLambUpdateWithLROpName}; + +bool IsParameterOrValueNode(const AnfNodePtr &node) { + MS_EXCEPTION_IF_NULL(node); + auto kernel_with_index = AnfAlgo::VisitKernelWithReturnType(node, 0, true); + return kernel_with_index.first->isa() || kernel_with_index.first->isa(); +} + +void TransferControl(const CNodePtr &hccl_node, const AnfNodePtr &memcpy_async, const FuncGraphPtr &graph) { + MS_EXCEPTION_IF_NULL(hccl_node); + MS_EXCEPTION_IF_NULL(memcpy_async); + MS_EXCEPTION_IF_NULL(graph); + auto manager = graph->manager(); + MS_EXCEPTION_IF_NULL(manager); + auto &node_users = manager->node_users(); + auto iter = node_users.find(hccl_node); + if (iter == node_users.end()) { + MS_LOG(EXCEPTION) << "node has no output in manager"; + } + // find hccl_node's output which is a control depend + for (const auto &node_index : iter->second) { + AnfNodePtr output = node_index.first; + int output_index = node_index.second; + if (AnfAlgo::CheckPrimitiveType(output, prim::kPrimControlDepend)) { + CNodePtr control_depend = output->cast(); + MS_EXCEPTION_IF_NULL(control_depend); + std::vector new_inputs; + for (size_t i = 0; i < control_depend->size(); ++i) { + if (i == IntToSize(output_index)) { + new_inputs.push_back(memcpy_async); + } else { + new_inputs.push_back(control_depend->input(i)); + } + } + control_depend->set_inputs(new_inputs); + } + } +} +} // namespace + +bool InsertMemcpyAsyncForHcclOp::NeedInsertMemcpy(const FuncGraphPtr &graph, const AnfNodePtr &input) const { + MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(input); + // when input is a parameter or is a value node + if (IsParameterOrValueNode(input)) { + return true; + } + + // when input is a Ref or some special cnodes + if (kernel_query_->IsTbeRef(input) || + kNeedInsertMemcpyOpSet.find(AnfAlgo::GetCNodeName(input)) != kNeedInsertMemcpyOpSet.end()) { + return true; + } + + auto manager = graph->manager(); + MS_EXCEPTION_IF_NULL(manager); + auto &node_users = manager->node_users(); + auto iter = node_users.find(input); + if (iter == node_users.end()) { + MS_LOG(EXCEPTION) << "node has no output in manager"; + } + // when input is used by others + if (iter->second.size() > 1) { + return true; + } + return false; +} + +void InsertMemcpyAsyncForHcclOp::InsertMemcpyAsync(const FuncGraphPtr &graph, const CNodePtr &hccl_node) const { + MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(hccl_node); + bool has_insert_memcpy = false; + AnfNodePtr memcpy_async = nullptr; + std::vector new_inputs = {hccl_node->input(0)}; + for (size_t i = 1; i < hccl_node->size(); ++i) { + auto input = hccl_node->input(i); + if (NeedInsertMemcpy(graph, input)) { + memcpy_async = CreateMemcpyAsyncOp(graph, input); + has_insert_memcpy = true; + new_inputs.push_back(memcpy_async); + } else { + new_inputs.push_back(input); + } + } + + if (has_insert_memcpy) { + CNodePtr new_hccl_node = std::make_shared(*hccl_node); + new_hccl_node->set_inputs(new_inputs); + auto manager = graph->manager(); + MS_EXCEPTION_IF_NULL(manager); + MS_LOG(DEBUG) << "start replace new_hccl_node to old hccl_node"; + (void)manager->Replace(hccl_node, new_hccl_node); + MS_LOG(DEBUG) << "end replace"; + + // transer hccl op's control to the memcpy_async + if (hccl_node->size() == 2) { + TransferControl(new_hccl_node, memcpy_async, graph); + } + } +} + +const AnfNodePtr InsertMemcpyAsyncForHcclOp::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, + const EquivPtr &) const { + if (func_graph == nullptr || node == nullptr || !node->isa()) { + return nullptr; + } + auto cnode = node->cast(); + if (!AnfAlgo::IsCommunicationOp(node)) { + return nullptr; + } + InsertMemcpyAsync(func_graph, cnode); + return nullptr; +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op.h b/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op.h new file mode 100644 index 0000000000..e2f3b781ed --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op.h @@ -0,0 +1,40 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_INSERT_MEMCPY_ASYNC_FOR_HCCL_OP_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_INSERT_MEMCPY_ASYNC_FOR_HCCL_OP_H_ + +#include +#include "pre_activate/common/optimizer.h" +#include "pre_activate/ascend/ascend_helper.h" + +namespace mindspore { +namespace opt { +class InsertMemcpyAsyncForHcclOp : public PatternProcessPass { + public: + explicit InsertMemcpyAsyncForHcclOp(bool multigraph = true) + : PatternProcessPass("insert_memcpy_async_for_hccl_op", multigraph), + kernel_query_(std::make_shared()) {} + ~InsertMemcpyAsyncForHcclOp() override = default; + const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; + + private: + void InsertMemcpyAsync(const FuncGraphPtr &graph, const CNodePtr &hccl_node) const; + bool NeedInsertMemcpy(const FuncGraphPtr &graph, const AnfNodePtr &input) const; + KernelQueryPtr kernel_query_; +}; +} // namespace opt +} // namespace mindspore +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_INSERT_MEMCPY_ASYNC_FOR_HCCL_OP_H_ diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/check_consistency.cc b/mindspore/ccsrc/pre_activate/ascend/format_type/check_consistency.cc index d2557a4bb7..7c8fb70fda 100644 --- a/mindspore/ccsrc/pre_activate/ascend/format_type/check_consistency.cc +++ b/mindspore/ccsrc/pre_activate/ascend/format_type/check_consistency.cc @@ -17,9 +17,12 @@ #include #include +#include #include "utils/utils.h" #include "session/anf_runtime_algorithm.h" +#include "common/utils.h" +#include "kernel/common_utils.h" namespace mindspore { namespace opt { @@ -74,11 +77,21 @@ const AnfNodePtr CheckConsistency::Process(const FuncGraphPtr &, const AnfNodePt if (node == nullptr || !node->isa() || !AnfAlgo::IsRealKernel(node)) { return nullptr; } - CNodePtr cnode = node->cast(); - for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(cnode); i++) { - if (!CheckFormatForConsistency(cnode, i) || !CheckDataTypeForConsistency(cnode, i)) { - MS_LOG(EXCEPTION) << "Found inconsistent format or data type! Op: " << AnfAlgo::GetCNodeName(node) << "[" - << node->DebugString() << "]"; + + std::vector todos = {node}; + if (AnfAlgo::IsGraphKernel(node)) { + auto sub_graph = AnfAlgo::GetCNodeFuncGraphPtr(node); + MS_EXCEPTION_IF_NULL(sub_graph); + kernel::GetValidKernelNodes(sub_graph, &todos); + } + + for (auto &t : todos) { + CNodePtr cnode = t->cast(); + for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(cnode); i++) { + if (!CheckFormatForConsistency(cnode, i) || !CheckDataTypeForConsistency(cnode, i)) { + MS_LOG(EXCEPTION) << "Found inconsistent format or data type! Op: " << AnfAlgo::GetCNodeName(cnode) << "[" + << cnode->DebugString() << "]"; + } } } return nullptr; diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/convert_unsupported_transnode_to_aicpu.cc b/mindspore/ccsrc/pre_activate/ascend/format_type/convert_unsupported_transnode_to_aicpu.cc index 5b5bf7e4fc..c0f99ed415 100644 --- a/mindspore/ccsrc/pre_activate/ascend/format_type/convert_unsupported_transnode_to_aicpu.cc +++ b/mindspore/ccsrc/pre_activate/ascend/format_type/convert_unsupported_transnode_to_aicpu.cc @@ -34,13 +34,13 @@ const AnfNodePtr ConvertUnSupportNodeToAICPU::Process(const mindspore::FuncGraph return nullptr; } auto node_name = AnfAlgo::GetCNodeName(node); - if (node_name != prim::KPrimTransData->name() || node_name != prim::kPrimCast->name()) { + if (node_name != prim::KPrimTransData->name() && node_name != prim::kPrimCast->name()) { return nullptr; } auto kernel_builder_info = AnfAlgo::GetSelectKernelBuildInfo(node); - if (supported_checker_->CheckAiCoreSupported(node, kernel_builder_info)) { - return node; - } else if (supported_checker_->CheckAiCpuSupported(node, kernel_builder_info)) { + if (supported_checker_->CheckAICoreSupported(node, kernel_builder_info)) { + return nullptr; + } else if (supported_checker_->CheckAICPUSupported(node, kernel_builder_info)) { auto builder = std::make_shared(kernel_builder_info); builder->SetKernelType(AICPU_KERNEL); AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), node.get()); @@ -49,7 +49,7 @@ const AnfNodePtr ConvertUnSupportNodeToAICPU::Process(const mindspore::FuncGraph MS_LOG(EXCEPTION) << " kernel " << kernel_builder_info->ToString() << "is not supported in AiCPU & AiCore : node [" << node->DebugString() << "]"; } - return node; + return nullptr; } } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/deal_ref_trans_and_cast.cc b/mindspore/ccsrc/pre_activate/ascend/format_type/deal_ref_trans_and_cast.cc index 43857dddfd..f909dae9e4 100644 --- a/mindspore/ccsrc/pre_activate/ascend/format_type/deal_ref_trans_and_cast.cc +++ b/mindspore/ccsrc/pre_activate/ascend/format_type/deal_ref_trans_and_cast.cc @@ -31,13 +31,14 @@ session::KernelWithIndex FindRefOriginNode(const AnfNodePtr &node) { session::KernelWithIndex kernel_with_index = AnfAlgo::VisitKernel(node, 0); AnfNodePtr cur_node = kernel_with_index.first; size_t cur_out_index = kernel_with_index.second; + MS_EXCEPTION_IF_NULL(cur_node); if (cur_node->isa()) { - auto cnode = node->cast(); + auto cnode = cur_node->cast(); MS_EXCEPTION_IF_NULL(cnode); std::string op_name = AnfAlgo::GetCNodeName(cnode); auto op_info = mindspore::kernel::OpLib::FindOp(op_name, kernel::kTBE); // deal ref op - if (op_info->is_ref()) { + if (op_info != nullptr && op_info->is_ref()) { auto ref_infos = op_info->ref_infos(); if (ref_infos.count(cur_out_index) != 0) { auto in_index = ref_infos.at(cur_out_index); @@ -88,7 +89,7 @@ AnfNodePtr AddAdditionalToRefOutput(const FuncGraphPtr &func_graph, const CNodeP size_t input_index, const AnfNodePtr &get_item) { AnfNodePtr final_node = (get_item == nullptr ? cnode : get_item); size_t final_index = output_index; - AnfNodePtr input_node = cnode->input(input_index + 1); + AnfNodePtr input_node = AnfAlgo::GetInputNode(cnode, input_index); session::KernelWithIndex origin_pair; origin_pair = FindRefOriginNode(input_node); MS_EXCEPTION_IF_NULL(origin_pair.first); @@ -106,7 +107,7 @@ AnfNodePtr AddAdditionalToRefOutput(const FuncGraphPtr &func_graph, const CNodeP if (origin_format != cur_format && cur_shape.size() > 1) { auto kernel_select = std::make_shared(); final_node = NewTransOpNode(func_graph, final_node, kernel_select, false, prim::KPrimTransData->name()); - RefreshKernelBuildInfo(cur_format, origin_format, origin_type, final_node); + RefreshKernelBuildInfo(cur_format, origin_format, final_node); final_index = 0; MS_EXCEPTION_IF_NULL(final_node); MS_LOG(INFO) << "DealRefTransAndCast add trans op, op debug info is " << final_node->DebugString(); @@ -133,6 +134,7 @@ AnfNodePtr AddAdditionalToRefOutput(const FuncGraphPtr &func_graph, const CNodeP } AnfNodePtr DealRefForMultipleOutput(const FuncGraphPtr &func_graph, const CNodePtr &cnode, const std::shared_ptr &op_info) { + MS_EXCEPTION_IF_NULL(op_info); auto ref_infos = op_info->ref_infos(); std::vector make_tuple_inputs; AbstractBasePtrList abstract_list; @@ -144,9 +146,11 @@ AnfNodePtr DealRefForMultipleOutput(const FuncGraphPtr &func_graph, const CNodeP auto input_index = ref_infos.at(output_index); final_node = AddAdditionalToRefOutput(func_graph, cnode, output_index, input_index, final_node); } + MS_EXCEPTION_IF_NULL(final_node); abstract_list.push_back(final_node->abstract()); make_tuple_inputs.push_back(final_node); } + MS_EXCEPTION_IF_NULL(func_graph); AnfNodePtr make_tuple = func_graph->NewCNode(make_tuple_inputs); MS_EXCEPTION_IF_NULL(make_tuple); make_tuple->set_abstract(std::make_shared(abstract_list)); @@ -155,6 +159,8 @@ AnfNodePtr DealRefForMultipleOutput(const FuncGraphPtr &func_graph, const CNodeP AnfNodePtr DealRefSigleOutput(const FuncGraphPtr &func_graph, const CNodePtr &cnode, const std::shared_ptr &op_info) { + MS_EXCEPTION_IF_NULL(cnode); + MS_EXCEPTION_IF_NULL(op_info); auto ref_infos = op_info->ref_infos(); for (const auto &ref_info : ref_infos) { if (ref_info.second > cnode->inputs().size()) { @@ -206,7 +212,9 @@ const AnfNodePtr DealRefTransAndCast::Process(const FuncGraphPtr &graph, const A return nullptr; } if (op_info->is_ref()) { - if (!cnode->Type()->isa()) { + auto type = cnode->Type(); + MS_EXCEPTION_IF_NULL(type); + if (!type->isa()) { return DealRefSigleOutput(graph, cnode, op_info); } else { return DealRefForMultipleOutput(graph, cnode, op_info); diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_cast.cc b/mindspore/ccsrc/pre_activate/ascend/format_type/insert_cast.cc index 0fefab10d0..3d09233d99 100644 --- a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_cast.cc +++ b/mindspore/ccsrc/pre_activate/ascend/format_type/insert_cast.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include "device/kernel_info.h" #include "pre_activate/ascend/ascend_helper.h" @@ -27,34 +28,45 @@ #include "session/anf_runtime_algorithm.h" #include "session/kernel_graph.h" #include "utils/utils.h" +#include "kernel/common_utils.h" namespace mindspore { namespace opt { namespace { -AnfNodePtr InsertCastForMultipleOutput(const FuncGraphPtr &func_graph, const CNodePtr &cnode) { +AnfNodePtr InsertCastForMultipleOutput(const FuncGraphPtr &func_graph, const CNodePtr &cnode, + const std::vector &need_insert_cast) { MS_EXCEPTION_IF_NULL(func_graph); MS_EXCEPTION_IF_NULL(cnode); std::vector make_tuple_inputs; AbstractBasePtrList abstract_list; make_tuple_inputs.push_back(NewValueNode(prim::kPrimMakeTuple)); for (size_t output_idx = 0; output_idx < AnfAlgo::GetOutputTensorNum(cnode); ++output_idx) { - const std::string dev_fmt = AnfAlgo::GetOutputFormat(cnode, output_idx); - const std::vector origin_shape = AnfAlgo::GetOutputInferShape(cnode, output_idx); - const TypeId origin_type = AnfAlgo::GetOutputInferDataType(cnode, output_idx); - const TypeId device_type = AnfAlgo::GetOutputDeviceDataType(cnode, output_idx); + AnfNodePtr replace_node = nullptr; + const auto origin_shape = AnfAlgo::GetOutputInferShape(cnode, output_idx); + const auto infer_type = AnfAlgo::GetOutputInferDataType(cnode, output_idx); auto idx = NewValueNode(SizeToInt(output_idx)); MS_EXCEPTION_IF_NULL(idx); auto imm = std::make_shared(output_idx); idx->set_abstract(std::make_shared(imm)); auto getitem = func_graph->NewCNode({NewValueNode(prim::kPrimTupleGetItem), cnode, idx}); - AnfAlgo::SetOutputInferTypeAndShape({origin_type}, {origin_shape}, getitem.get()); - AnfNodePtr replace_node = nullptr; - if (origin_type != device_type) { - replace_node = - AddCastOpNodeToGraph(func_graph, getitem, dev_fmt, device_type, origin_type, origin_shape, origin_type); - MS_EXCEPTION_IF_NULL(replace_node); - replace_node->set_scope(cnode->scope()); - AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), replace_node); + AnfAlgo::SetOutputInferTypeAndShape({infer_type}, {origin_shape}, getitem.get()); + if (need_insert_cast[output_idx]) { + const auto dev_fmt = AnfAlgo::GetOutputFormat(cnode, output_idx); + TypeId origin_type(kTypeUnknown); + if (func_graph->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) { + origin_type = AnfAlgo::GetCNodeOutputPrecision(cnode); + } + origin_type = origin_type == kTypeUnknown ? infer_type : origin_type; + const auto device_type = AnfAlgo::GetOutputDeviceDataType(cnode, output_idx); + if (origin_type != device_type) { + replace_node = + AddCastOpNodeToGraph(func_graph, getitem, dev_fmt, device_type, origin_type, origin_shape, infer_type); + MS_EXCEPTION_IF_NULL(replace_node); + replace_node->set_scope(cnode->scope()); + AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), replace_node); + } else { + replace_node = getitem; + } } else { replace_node = getitem; } @@ -65,9 +77,10 @@ AnfNodePtr InsertCastForMultipleOutput(const FuncGraphPtr &func_graph, const CNo MS_EXCEPTION_IF_NULL(make_tuple); make_tuple->set_abstract(std::make_shared(abstract_list)); return make_tuple; -} +} // namespace -AnfNodePtr InsertCastForOutput(const FuncGraphPtr &func_graph, const CNodePtr &cnode) { +AnfNodePtr InsertCastForOutput(const FuncGraphPtr &func_graph, const CNodePtr &cnode, + const std::vector &need_insert_cast) { MS_EXCEPTION_IF_NULL(func_graph); MS_EXCEPTION_IF_NULL(cnode); if (AnfAlgo::GetOutputTensorNum(cnode) == 0) { @@ -76,14 +89,23 @@ AnfNodePtr InsertCastForOutput(const FuncGraphPtr &func_graph, const CNodePtr &c MS_EXCEPTION_IF_NULL(cnode->Type()); // Single output if (!cnode->Type()->isa()) { + if (!need_insert_cast[0]) { + return cnode; + } + const std::string dev_fmt = AnfAlgo::GetOutputFormat(cnode, 0); std::vector origin_shape = AnfAlgo::GetOutputInferShape(cnode, 0); - const TypeId origin_type = AnfAlgo::GetOutputInferDataType(cnode, 0); + const auto infer_type = AnfAlgo::GetOutputInferDataType(cnode, 0); + TypeId origin_type(kTypeUnknown); + if (func_graph->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) { + origin_type = AnfAlgo::GetCNodeOutputPrecision(cnode); + } + origin_type = origin_type == kTypeUnknown ? infer_type : origin_type; const TypeId device_type = AnfAlgo::GetOutputDeviceDataType(cnode, 0); AnfNodePtr replace_node = cnode; if (origin_type != device_type) { replace_node = - AddCastOpNodeToGraph(func_graph, cnode, dev_fmt, device_type, origin_type, origin_shape, origin_type); + AddCastOpNodeToGraph(func_graph, cnode, dev_fmt, device_type, origin_type, origin_shape, infer_type); MS_EXCEPTION_IF_NULL(replace_node); replace_node->set_scope(cnode->scope()); AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), replace_node); @@ -91,7 +113,57 @@ AnfNodePtr InsertCastForOutput(const FuncGraphPtr &func_graph, const CNodePtr &c return replace_node; } // Multiple output - return InsertCastForMultipleOutput(func_graph, cnode); + return InsertCastForMultipleOutput(func_graph, cnode, need_insert_cast); +} + +AnfNodePtr ProcessGraphKernelOp(const FuncGraphPtr &func_graph, const AnfNodePtr &node) { + // insert cast for ops in graph kernel. + auto sub_graph = AnfAlgo::GetCNodeFuncGraphPtr(node); + MS_EXCEPTION_IF_NULL(sub_graph); + auto mng = sub_graph->manager(); + MS_EXCEPTION_IF_NULL(mng); + std::vector todo; + std::vector> graph_rets; + kernel::GetValidKernelNodes(sub_graph, &todo); + kernel::GetGraphRealOutput(sub_graph, &graph_rets); + for (auto &t : todo) { + AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), t); + // process input + CNodePtr t_cnode = t->cast(); + MS_EXCEPTION_IF_NULL(t_cnode); + auto t_new_node = InsertCastForInput(sub_graph, t_cnode); + AnfNodePtr t_new_node_1 = nullptr; + std::vector need_insert_cast(AnfAlgo::GetOutputTensorNum(t), true); + // process output + auto iter = std::find_if(graph_rets.begin(), graph_rets.end(), + [&t](const std::pair &ret) { return ret.first == t; }); + if (iter != graph_rets.end()) { + auto t_fix_output_type = AnfAlgo::GetCNodeOutputPrecision(t); + auto t_output_type = AnfAlgo::GetOutputDeviceDataType(t, iter->second); + auto graph_output_type = AnfAlgo::GetOutputDeviceDataType(node, iter - graph_rets.begin()); + if (t_fix_output_type == kTypeUnknown && t_output_type == graph_output_type) { + need_insert_cast[iter->second] = false; + } else if (t_fix_output_type == t_output_type && t_output_type == graph_output_type) { + need_insert_cast[iter->second] = false; + } + t_new_node_1 = InsertCastForOutput(sub_graph, t_new_node, need_insert_cast); + } else { + t_new_node_1 = InsertCastForOutput(sub_graph, t_new_node, need_insert_cast); + } + + if (t_new_node_1 != nullptr && t_new_node_1 != t) { + (void)mng->Replace(t, t_new_node_1); + } + } + + // insert cast for graph kernel. + AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), node); + // process input + CNodePtr cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + auto new_node = InsertCastForInput(func_graph, cnode); + // process output + return InsertCastForOutput(func_graph, new_node, std::vector(AnfAlgo::GetOutputTensorNum(new_node), true)); } } // namespace @@ -106,13 +178,27 @@ const AnfNodePtr InsertCast::Process(const FuncGraphPtr &func_graph, const AnfNo if (!AnfAlgo::IsRealCNodeKernel(node) || func_graph == nullptr) { return nullptr; } + + if (AnfAlgo::IsGraphKernel(node)) { + return ProcessGraphKernelOp(func_graph, node); + } else { + // insert cast for single op. + AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), node); + // process input + CNodePtr cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + auto new_node = InsertCastForInput(func_graph, cnode); + // process output + return InsertCastForOutput(func_graph, new_node, std::vector(AnfAlgo::GetOutputTensorNum(new_node), true)); + } + // insert cast for single op. AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), node); // process input CNodePtr cnode = node->cast(); MS_EXCEPTION_IF_NULL(cnode); auto new_node = InsertCastForInput(func_graph, cnode); // process output - return InsertCastForOutput(func_graph, new_node); + return InsertCastForOutput(func_graph, new_node, std::vector(AnfAlgo::GetOutputTensorNum(new_node), true)); } } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_cast_for_runop.cc b/mindspore/ccsrc/pre_activate/ascend/format_type/insert_cast_for_runop.cc deleted file mode 100644 index 7647b86c17..0000000000 --- a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_cast_for_runop.cc +++ /dev/null @@ -1,48 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "pre_activate/ascend/format_type/insert_cast_for_runop.h" - -#include - -#include "device/kernel_info.h" -#include "pre_activate/ascend/ascend_helper.h" -#include "pre_activate/common/helper.h" -#include "kernel/oplib/oplib.h" -#include "session/anf_runtime_algorithm.h" -#include "utils/utils.h" - -namespace mindspore { -namespace opt { -const BaseRef RunOpInsertCast::DefinePattern() const { - VarPtr V = std::make_shared(UnVisited); - VarPtr Xs = std::make_shared(); - return VectorRef({V, Xs}); -} - -const AnfNodePtr RunOpInsertCast::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, - const EquivPtr &) const { - MS_EXCEPTION_IF_NULL(node); - if (!AnfAlgo::IsRealCNodeKernel(node) || func_graph == nullptr) { - return nullptr; - } - AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), node); - // process input - CNodePtr cnode = node->cast(); - MS_EXCEPTION_IF_NULL(cnode); - return InsertCastForInput(func_graph, cnode); -} -} // namespace opt -} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_cast_for_runop.h b/mindspore/ccsrc/pre_activate/ascend/format_type/insert_cast_for_runop.h deleted file mode 100644 index 4467cc5198..0000000000 --- a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_cast_for_runop.h +++ /dev/null @@ -1,35 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_INSERT_CAST_FOR_RUNOP_H_ -#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_INSERT_CAST_FOR_RUNOP_H_ -#include - -#include "pre_activate/common/optimizer.h" -#include "pre_activate/common/pattern_engine.h" -#include "ir/anf.h" -namespace mindspore { -namespace opt { -class RunOpInsertCast : public PatternProcessPass { - public: - explicit RunOpInsertCast(bool multigraph = true) : PatternProcessPass("insert_cast_for_runop", multigraph) {} - ~RunOpInsertCast() override = default; - const BaseRef DefinePattern() const override; - const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; -}; -} // namespace opt -} // namespace mindspore - -#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_INSERT_CAST_FOR_RUNOP_H_ diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_trans_op.cc b/mindspore/ccsrc/pre_activate/ascend/format_type/insert_trans_op.cc index 97244e40c6..953f464431 100644 --- a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_trans_op.cc +++ b/mindspore/ccsrc/pre_activate/ascend/format_type/insert_trans_op.cc @@ -16,11 +16,13 @@ #include "pre_activate/ascend/format_type/insert_trans_op.h" #include +#include #include "utils/utils.h" #include "pre_activate/ascend/ascend_helper.h" #include "session/anf_runtime_algorithm.h" #include "device/kernel_info.h" #include "kernel/oplib/oplib.h" +#include "utils/context/ms_context.h" namespace mindspore { namespace opt { @@ -30,6 +32,15 @@ const BaseRef InsertTransOp::DefinePattern() const { return VectorRef({V, Xs}); } +bool IsGraphOutput(const AnfNodePtr &node, const std::vector &outputs) { + auto iter = std::find(outputs.begin(), outputs.end(), node); + if (iter != outputs.end()) { + return true; + } + + return false; +} + const AnfNodePtr InsertTransOp::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const EquivPtr &) const { if (node == nullptr || !AnfAlgo::IsRealKernel(node)) { @@ -38,6 +49,13 @@ const AnfNodePtr InsertTransOp::Process(const FuncGraphPtr &func_graph, const An AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), node); MS_LOG(DEBUG) << "====process op: " << node->DebugString(); AnfNodePtr new_node = InsertTransOpForInput(func_graph, node, kernel_select_); + auto ms_context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(ms_context); + if (ms_context->execution_mode() == kPynativeMode && !ms_context->enable_pynative_hook()) { + if (IsGraphOutput(node, AnfAlgo::GetAllOutput(func_graph->output(), {prim::kPrimTupleGetItem}))) { + return new_node; + } + } return InsertTransOpForOutput(func_graph, new_node, kernel_select_); } } // namespace opt diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/merge_cast_to_op.cc b/mindspore/ccsrc/pre_activate/ascend/format_type/merge_cast_to_op.cc index dc47757e5d..b1817cec3d 100644 --- a/mindspore/ccsrc/pre_activate/ascend/format_type/merge_cast_to_op.cc +++ b/mindspore/ccsrc/pre_activate/ascend/format_type/merge_cast_to_op.cc @@ -61,16 +61,14 @@ bool AlternativeKernelInfoForInput(const CNodePtr &node, const TypeId dst_type, bool GetNextNodeAndCastIndex(const FuncGraphPtr &graph, const AnfNodePtr &node, AnfNodePtr *next_node, size_t *cast_index) { - MS_EXCEPTION_IF_NULL(graph); - MS_EXCEPTION_IF_NULL(node); - // Check whether the cast node is used for input by only one another node. - auto manager = graph->manager(); - MS_EXCEPTION_IF_NULL(manager); - if (manager->node_users().find(node) == manager->node_users().end() || manager->node_users()[node].size() != 1) { + auto output_node_list = GetRealNodeUsedList(graph, node); + MS_EXCEPTION_IF_NULL(output_node_list); + if (output_node_list->size() != 1) { return false; } - *next_node = manager->node_users()[node].begin()->first; - *cast_index = IntToSize(manager->node_users()[node].begin()->second - 1); + auto node_pair = output_node_list->at(0); + *next_node = node_pair.first; + *cast_index = node_pair.second - 1; return true; } @@ -122,6 +120,24 @@ bool CheckIndexOutput(const CNodePtr &node, const std::shared_ptrGetOutputFormat(index); } +void ChangeNodeInferInfo(const CNodePtr &cnode, const CNodePtr &cast, const size_t cast_index) { + using Shape = std::vector; + auto cast_dtype = AnfAlgo::GetOutputInferDataType(cast, 0); + auto cast_shape = AnfAlgo::GetOutputInferShape(cast, 0); + std::vector shapes; + std::vector types; + for (size_t index = 0; index < AnfAlgo::GetOutputTensorNum(cnode); ++index) { + if (cast_index == index) { + shapes.emplace_back(cast_shape); + types.emplace_back(cast_dtype); + continue; + } + shapes.emplace_back(AnfAlgo::GetOutputInferShape(cnode, index)); + types.emplace_back(AnfAlgo::GetOutputInferDataType(cnode, index)); + } + AnfAlgo::SetOutputInferTypeAndShape(types, shapes, cnode.get()); +} + AnfNodePtr MergeCastToNextOp(const FuncGraphPtr &graph, const CNodePtr &node, const KernelQueryPtr kernel_query) { MS_EXCEPTION_IF_NULL(node); MS_EXCEPTION_IF_NULL(kernel_query); @@ -135,6 +151,9 @@ AnfNodePtr MergeCastToNextOp(const FuncGraphPtr &graph, const CNodePtr &node, co return nullptr; } auto next_cnode = next_node->cast(); + if (AnfAlgo::IsGraphKernel(next_node)) { + return nullptr; + } auto next_op_name = AnfAlgo::GetCNodeName(next_node); std::vector> kernel_info_list; kernel_query->Query(next_cnode, &kernel_info_list); @@ -148,11 +167,14 @@ AnfNodePtr MergeCastToNextOp(const FuncGraphPtr &graph, const CNodePtr &node, co if (alternative_kernel_info == kernel_info_list.end()) { return nullptr; } - MS_LOG(INFO) << "Found alternative kernel info for current anf kernel " << next_op_name; + auto ori_kernel_info = AnfAlgo::GetSelectKernelBuildInfo(next_node); + MS_LOG(INFO) << "Found alternative kernel info for current anf kernel " << next_cnode->DebugString() + << "ori kernel info" << ori_kernel_info->ToString() << "alternative kernel info" + << (*alternative_kernel_info)->ToString(); AnfAlgo::SetSelectKernelBuildInfo(*alternative_kernel_info, next_cnode.get()); + ChangeNodeInferInfo(next_cnode, node, cast_index); if (node->inputs().size() < kCastInputNum) { - auto op_name = AnfAlgo::GetCNodeName(node); - MS_LOG(EXCEPTION) << "op[" << op_name << "] has wrong input num:"; + MS_LOG(EXCEPTION) << "Op[" << node->DebugString() << "] has wrong input num:"; } return node->input(1); } @@ -205,6 +227,9 @@ AnfNodePtr MergeCastToPriorOp(const FuncGraphPtr &graph, const CNodePtr &cur_nod return nullptr; } MS_EXCEPTION_IF_NULL(prior_op); + if (AnfAlgo::IsGraphKernel(prior_op)) { + return nullptr; + } std::vector> kernel_info_list; kernel_query->Query(prior_op, &kernel_info_list); @@ -217,8 +242,16 @@ AnfNodePtr MergeCastToPriorOp(const FuncGraphPtr &graph, const CNodePtr &cur_nod if (kernel_info_it == kernel_info_list.end()) { return nullptr; } + auto ori_kernel_info = AnfAlgo::GetSelectKernelBuildInfo(prior_op); + MS_LOG(INFO) << "Found alternative kernel info for current anf kernel " << prior_op->DebugString() + << "ori kernel info" << ori_kernel_info->ToString() << "alternative kernel info" + << (*kernel_info_it)->ToString(); AnfAlgo::SetSelectKernelBuildInfo(*kernel_info_it, prior_op.get()); - + ChangeNodeInferInfo(prior_op, cur_node, output_idx); + if (!single_output) { + MS_EXCEPTION_IF_NULL(x_node); + ChangeNodeInferInfo(x_node->cast(), cur_node, 0); + } auto prior_name = AnfAlgo::GetCNodeName(prior_op); if (prior_name == kFive2FourOpName) { AnfAlgo::CopyNodeAttr("dst_type", "dstType", cur_node, prior_op); diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/modify_ops_attrs.cc b/mindspore/ccsrc/pre_activate/ascend/format_type/modify_ops_attrs.cc new file mode 100644 index 0000000000..42061957b9 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/format_type/modify_ops_attrs.cc @@ -0,0 +1,99 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "pre_activate/ascend/format_type/modify_ops_attrs.h" +#include +#include +#include "utils/utils.h" +#include "pre_activate/common/helper.h" +#include "kernel/common_utils.h" +#include "session/anf_runtime_algorithm.h" +#include "operator/ops.h" + +namespace mindspore { +namespace opt { +namespace { +AnfNodePtr ModifyReduceOpsAttrs(const CNodePtr &cnode) { + auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(cnode, 0); + auto input_format = AnfAlgo::GetInputFormat(cnode, 0); + if (input_shape.size() == 5 || input_format != kOpFormat_NC1HWC0) { + return nullptr; + } + if (!AnfAlgo::HasNodeAttr(kAttrKeepDims, cnode)) { + return nullptr; + } + + AnfAlgo::SetNodeAttr(kAttrKeepDims, MakeValue(true), cnode); + return cnode; +} + +AnfNodePtr ModifyTileOpAttrs(const CNodePtr &cnode) { + auto input_shape = AnfAlgo::GetInputDeviceShape(cnode, 0); + if (input_shape.size() != 5) { + return nullptr; + } + if (!AnfAlgo::HasNodeAttr(kAttrMultiples, cnode)) { + return nullptr; + } + + auto multiples = AnfAlgo::GetNodeAttr>(cnode, kAttrMultiples); + if (multiples.size() == 4 && multiples[1] == 1) { + multiples.push_back(1); + AnfAlgo::SetNodeAttr(kAttrMultiples, MakeValue(multiples), cnode); + } + + return cnode; +} + +AnfNodePtr ModifyAttrs(const CNodePtr &cnode) { + MS_EXCEPTION_IF_NULL(cnode); + auto op_name = AnfAlgo::GetCNodeName(cnode); + if (op_name == prim::kPrimTile->name()) { + return ModifyTileOpAttrs(cnode); + } else if (op_name == prim::kPrimReduceSum->name()) { + // kPrimReduceMean + // kPrimReduceSum + // kPrimReduceAll + // kPrimReduceMax + // kPrimReduceMin + return ModifyReduceOpsAttrs(cnode); + } + return nullptr; +} +} // namespace + +const AnfNodePtr ModifyOpAttrs::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, + const EquivPtr &) const { + if (node == nullptr || !node->isa() || !AnfAlgo::IsGraphKernel(node)) { + return nullptr; + } + MS_LOG(DEBUG) << "====Process op: " << AnfAlgo::GetCNodeName(node); + auto fg = AnfAlgo::GetCNodeFuncGraphPtr(node); + MS_EXCEPTION_IF_NULL(fg); + auto manager = fg->manager(); + MS_EXCEPTION_IF_NULL(manager); + std::vector todos; + kernel::GetValidKernelNodes(fg, &todos); + for (auto &t : todos) { + auto new_node = ModifyAttrs(t->cast()); + if (new_node != nullptr && new_node != t) { + (void)manager->Replace(t, new_node); + } + } + return node; +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/enhancer/add_memcpy_async.h b/mindspore/ccsrc/pre_activate/ascend/format_type/modify_ops_attrs.h similarity index 66% rename from mindspore/ccsrc/pre_activate/ascend/enhancer/add_memcpy_async.h rename to mindspore/ccsrc/pre_activate/ascend/format_type/modify_ops_attrs.h index 900b0fb46a..25ec94b6b4 100644 --- a/mindspore/ccsrc/pre_activate/ascend/enhancer/add_memcpy_async.h +++ b/mindspore/ccsrc/pre_activate/ascend/format_type/modify_ops_attrs.h @@ -13,19 +13,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_ADD_MEMCPY_ASYNC_H_ -#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_ADD_MEMCPY_ASYNC_H_ -#include +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_MODIFY_OPS_ATTRS_H +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_MODIFY_OPS_ATTRS_H + #include "pre_activate/common/optimizer.h" + namespace mindspore { namespace opt { -class AddMemcpyAsync : public PatternProcessPass { +class ModifyOpAttrs : public PatternProcessPass { public: - explicit AddMemcpyAsync(bool multigraph = true) : PatternProcessPass("add_memcpy_async", multigraph) {} - ~AddMemcpyAsync() override = default; + explicit ModifyOpAttrs(bool multigraph = true) : PatternProcessPass("modify_ops_attrs", multigraph) {} + ~ModifyOpAttrs() override = default; const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; }; } // namespace opt } // namespace mindspore -#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_ADD_MEMCPY_ASYNC_H_ + +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_MODIFY_OPS_ATTRS_H diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/rectify_do_mask_kernel_info.cc b/mindspore/ccsrc/pre_activate/ascend/format_type/rectify_do_mask_kernel_info.cc new file mode 100644 index 0000000000..d81a8c90ce --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/format_type/rectify_do_mask_kernel_info.cc @@ -0,0 +1,163 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "pre_activate/ascend/format_type/rectify_do_mask_kernel_info.h" + +#include +#include +#include +#include + +#include "session/anf_runtime_algorithm.h" +#include "kernel/kernel_build_info.h" +#include "utils/utils.h" +#include "kernel/common_utils.h" +#include "utils/context/ms_context.h" + +namespace mindspore { +namespace opt { +const BaseRef RectifyDoMaskKernelInfo::DefinePattern() const { + VarPtr X = std::make_shared(); + VarPtr Xs = std::make_shared(); + return VectorRef({X, Xs}); +} + +const AnfNodePtr RectifyDoMaskKernelInfo::Process(const FuncGraphPtr &graph, const AnfNodePtr &node, + const EquivPtr &) const { + if (node == nullptr || !node->isa()) { + return nullptr; + } + auto cnode = node->cast(); + auto ms_context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(ms_context); + if (ms_context->execution_mode() == kPynativeMode) { + return RectifyKernelInfoInPynativeProcess(node); + } + if (AnfAlgo::GetCNodeName(cnode) != prim::kPrimDropoutGenMask->name()) { + return nullptr; + } + std::vector do_mask_node_list; + auto manager = graph->manager(); + MS_EXCEPTION_IF_NULL(manager); + auto node_map = manager->node_users(); + auto iter = node_map.find(node); + if (iter == node_map.end()) { + MS_LOG(EXCEPTION) << "Cannot find the node " << node->DebugString() << " in the graph manager!"; + } + auto gen_mask_output_nodes = iter->second; + for (const auto &output_node : gen_mask_output_nodes) { + if (AnfAlgo::GetCNodeName(output_node.first) == prim::kPrimDropoutDoMask->name()) { + auto output_cnode = output_node.first->cast(); + do_mask_node_list.push_back(output_cnode); + } + } + std::vector input_shape; + for (const auto &output_node : do_mask_node_list) { + if (input_shape.empty()) { + input_shape = AnfAlgo::GetPrevNodeOutputInferShape(output_node, 0); + continue; + } + auto shape = AnfAlgo::GetPrevNodeOutputInferShape(output_node, 0); + if (!kernel::IsSameShape(shape, input_shape)) { + MS_LOG(EXCEPTION) << "The DropOutGenMask connected with same genmask's shape must be equal!" + << " GenMask " << node->DebugString(); + } + } + RectifyKernelInfo(do_mask_node_list); + return nullptr; +} + +void RectifyDoMaskKernelInfo::RectifyKernelInfo(const std::vector &do_mask_node_list) const { + std::map format_counter; + std::string special_format; + std::string convert_format; + for (const auto &do_mask : do_mask_node_list) { + auto do_mask_data_format = AnfAlgo::GetInputFormat(do_mask, 0); + if (special_format.empty() && kHWSpecialFormatSet.find(do_mask_data_format) != kHWSpecialFormatSet.end()) { + special_format = do_mask_data_format; + } + if (format_counter.find(do_mask_data_format) == format_counter.end()) { + format_counter[do_mask_data_format] = 1; + } else { + format_counter[do_mask_data_format] = format_counter[do_mask_data_format] + 1; + } + // if has two or more special format we need change all domask's format to default that can avoid insert more + // transdata + if (format_counter.size() > 2) { + convert_format = kOpFormat_DEFAULT; + break; + } + if (kHWSpecialFormatSet.find(do_mask_data_format) != kHWSpecialFormatSet.end() && + special_format != do_mask_data_format) { + convert_format = kOpFormat_DEFAULT; + break; + } + } + if (format_counter.size() == 1) { + return; + } + if (convert_format.empty()) { + convert_format = GetConvertFormat(format_counter); + } + RectifyDropOutDoMaskKernelInfo(do_mask_node_list, convert_format); +} + +std::string RectifyDoMaskKernelInfo::GetConvertFormat(const std::map &format_counter) const { + std::string convert_format; + const size_t counter = 0; + for (const auto &iter : format_counter) { + if (counter < iter.second) { + convert_format = iter.first; + } + if (counter == iter.second && kHWSpecialFormatSet.find(convert_format) == kHWSpecialFormatSet.end()) { + convert_format = iter.first; + } + } + return convert_format; +} + +void RectifyDoMaskKernelInfo::RectifyDropOutDoMaskKernelInfo(const std::vector &do_mask_node_list, + const std::string &format) const { + for (const auto &do_mask : do_mask_node_list) { + auto builder = + std::make_shared(AnfAlgo::GetSelectKernelBuildInfo(do_mask)); + builder->SetInputFormat(format, 0); + builder->SetOutputFormat(format, 0); + AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), do_mask.get()); + } +} + +AnfNodePtr RectifyDoMaskKernelInfo::RectifyKernelInfoInPynativeProcess(const AnfNodePtr &node) const { + MS_EXCEPTION_IF_NULL(node); + auto cnode = node->cast(); + if (cnode == nullptr) { + return nullptr; + } + if (AnfAlgo::GetCNodeName(cnode) != prim::kPrimDropoutDoMask->name()) { + return nullptr; + } + auto do_mask_input_format = AnfAlgo::GetInputFormat(node, 0); + if (do_mask_input_format != kOpFormat_DEFAULT) { + auto builder = + std::make_shared(AnfAlgo::GetSelectKernelBuildInfo(node)); + builder->SetInputFormat(kOpFormat_DEFAULT, 0); + builder->SetOutputFormat(kOpFormat_DEFAULT, 0); + AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), node.get()); + } + return nullptr; +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/rectify_do_mask_kernel_info.h b/mindspore/ccsrc/pre_activate/ascend/format_type/rectify_do_mask_kernel_info.h new file mode 100644 index 0000000000..81bad4d8f8 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/format_type/rectify_do_mask_kernel_info.h @@ -0,0 +1,42 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_RECTIFY_DO_MASK_KERNEL_INFO_H +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_RECTIFY_DO_MASK_KERNEL_INFO_H +#include +#include +#include + +#include "pre_activate/common/optimizer.h" +namespace mindspore { +namespace opt { +class RectifyDoMaskKernelInfo : public PatternProcessPass { + public: + explicit RectifyDoMaskKernelInfo(bool multigraph = true) + : PatternProcessPass("batch_norm_bert_fission", multigraph) {} + ~RectifyDoMaskKernelInfo() override = default; + const BaseRef DefinePattern() const override; + const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; + + private: + void RectifyKernelInfo(const std::vector &do_mask_node_list) const; + AnfNodePtr RectifyKernelInfoInPynativeProcess(const AnfNodePtr &node) const; + std::string GetConvertFormat(const std::map &format_counter) const; + void RectifyDropOutDoMaskKernelInfo(const std::vector &do_mask_node_list, const std::string &format) const; +}; +} // namespace opt +} // namespace mindspore +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_RECTIFY_DO_MASK_KERNEL_INFO_H diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/remove_no_use_reshape_op.cc b/mindspore/ccsrc/pre_activate/ascend/format_type/remove_no_use_reshape_op.cc new file mode 100644 index 0000000000..dde40a5090 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/format_type/remove_no_use_reshape_op.cc @@ -0,0 +1,66 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "pre_activate/ascend/format_type/remove_no_use_reshape_op.h" +#include +#include +#include "pre_activate/common/helper.h" +#include "kernel/common_utils.h" +#include "session/anf_runtime_algorithm.h" +#include "operator/ops.h" + +namespace mindspore { +namespace opt { +namespace { +AnfNodePtr RemoveReshapeOp(const CNodePtr &cnode) { + MS_EXCEPTION_IF_NULL(cnode); + auto op_name = AnfAlgo::GetCNodeName(cnode); + if (op_name != prim::kPrimReshape->name()) { + return nullptr; + } + + auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(cnode, 0); + auto input_format = AnfAlgo::GetPrevNodeOutputFormat(cnode, 0); + if (input_shape.size() != 1 || input_format != kOpFormat_NC1HWC0) { + return nullptr; + } + + return cnode->input(1); +} +} // namespace + +const AnfNodePtr RemoveNoUseReshapeOp::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, + const EquivPtr &) const { + if (node == nullptr || !node->isa() || !AnfAlgo::IsGraphKernel(node)) { + return nullptr; + } + MS_LOG(DEBUG) << "====process op: " << AnfAlgo::GetCNodeName(node); + auto fg = AnfAlgo::GetCNodeFuncGraphPtr(node); + MS_EXCEPTION_IF_NULL(fg); + auto manager = fg->manager(); + MS_EXCEPTION_IF_NULL(manager); + std::vector todos; + kernel::GetValidKernelNodes(fg, &todos); + for (auto &t : todos) { + auto new_node = RemoveReshapeOp(t->cast()); + if (new_node != nullptr && new_node != t) { + (void)manager->Replace(t, new_node); + } + } + return node; +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/conv_bn_add_relu_fusion.h b/mindspore/ccsrc/pre_activate/ascend/format_type/remove_no_use_reshape_op.h similarity index 57% rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/conv_bn_add_relu_fusion.h rename to mindspore/ccsrc/pre_activate/ascend/format_type/remove_no_use_reshape_op.h index eb7cc730b5..4942c2fc08 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/conv_bn_add_relu_fusion.h +++ b/mindspore/ccsrc/pre_activate/ascend/format_type/remove_no_use_reshape_op.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,21 +14,20 @@ * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_CONV_BN_ADD_RELU_FUSION_H_ -#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_CONV_BN_ADD_RELU_FUSION_H_ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_REMOVE_NO_USE_RESHAPE_OP_H +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_REMOVE_NO_USE_RESHAPE_OP_H #include "pre_activate/common/optimizer.h" -#include "pre_activate/common/helper.h" namespace mindspore { namespace opt { -class ConvBnAddReluFusion : public PatternProcessPass { +class RemoveNoUseReshapeOp : public PatternProcessPass { public: - explicit ConvBnAddReluFusion(bool multigraph = true) : PatternProcessPass("conv_bn_add_relu_fusion", multigraph) {} - ~ConvBnAddReluFusion() override = default; - const BaseRef DefinePattern() const override; + explicit RemoveNoUseReshapeOp(bool multigraph = true) : PatternProcessPass("remove_no_use_reshape_op", multigraph) {} + ~RemoveNoUseReshapeOp() override = default; const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; }; } // namespace opt } // namespace mindspore -#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_CONV_BN_ADD_RELU_FUSION_H_ + +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_REMOVE_NO_USE_RESHAPE_OP_H diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_bert_fission.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_bert_fission.cc index 640f84aa44..e6a8864e46 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_bert_fission.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_bert_fission.cc @@ -27,24 +27,6 @@ const std::vector kOutputIndex{0, 3, 4, 5}; constexpr size_t kBatchNormRealOutputNum = 3; constexpr size_t kBatchNormRealInputNum = 3; -bool CompareTupleGetitem(const AnfNodePtr &n1, const AnfNodePtr &n2) { - MS_EXCEPTION_IF_NULL(n1); - MS_EXCEPTION_IF_NULL(n2); - auto n1_cnode = n1->cast(); - auto n2_cnode = n2->cast(); - MS_EXCEPTION_IF_NULL(n1_cnode); - MS_EXCEPTION_IF_NULL(n2_cnode); - auto index_input1 = n1_cnode->input(kInputNodeOutputIndexInTupleGetItem); - MS_EXCEPTION_IF_NULL(index_input1); - auto value_node1 = index_input1->cast(); - MS_EXCEPTION_IF_NULL(value_node1); - auto index_input2 = n2_cnode->input(kInputNodeOutputIndexInTupleGetItem); - MS_EXCEPTION_IF_NULL(index_input2); - auto value_node2 = index_input2->cast(); - MS_EXCEPTION_IF_NULL(value_node2); - return GetValue(value_node1->value()) < GetValue(value_node2->value()); -} - bool GetBatchNormOutputs(const FuncGraphPtr &func_graph, const AnfNodePtr &bn, std::vector *bn_outputs) { MS_EXCEPTION_IF_NULL(func_graph); MS_EXCEPTION_IF_NULL(bn_outputs); diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/bn_split.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fission/bn_split.cc index c8d92f7200..66ffa24bf1 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/bn_split.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/bn_split.cc @@ -28,14 +28,14 @@ namespace mindspore { namespace opt { namespace { -void CreateOutputsOfBNTrainingReduce(const FuncGraphPtr &graph, const CNodePtr &bn_cnode, +bool CreateOutputsOfBNTrainingReduce(const FuncGraphPtr &graph, const CNodePtr &bn_cnode, std::vector *bn_training_reduce_outputs) { MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(bn_cnode); if (bn_cnode->inputs().size() != kBnInputNum) { - MS_LOG(EXCEPTION) << "BN node has wrong input size"; + MS_LOG(INFO) << "FusedbatchNorm's input size less than " << kBnInputNum << ". " << bn_cnode->DebugString(); + return false; } - // All the inputs of BNTrainingReduce are from the inputs of BN std::vector bn_training_reduce_inputs = { NewValueNode(std::make_shared(kBNTrainingReduceOpName))}; bn_training_reduce_inputs.push_back(bn_cnode->input(1)); @@ -45,8 +45,9 @@ void CreateOutputsOfBNTrainingReduce(const FuncGraphPtr &graph, const CNodePtr & MS_EXCEPTION_IF_NULL(kernel_info); bn_training_reduce->set_kernel_info(kernel_info); std::vector bn_shape_i0 = AnfAlgo::GetPrevNodeOutputInferShape(bn_cnode, 0); - if (bn_shape_i0.size() != kShape4dDims) { - MS_LOG(EXCEPTION) << "Get shape of FusedBatchNorm fail"; + if (bn_shape_i0.size() < kShape2dDims) { + MS_LOG(INFO) << "The FusedBatchNorm's first input's shape dims less than " << kShape2dDims; + return false; } std::vector bn_training_reduce_shape = {bn_shape_i0[1]}; auto types = {kNumberTypeFloat32, kNumberTypeFloat32}; @@ -56,6 +57,7 @@ void CreateOutputsOfBNTrainingReduce(const FuncGraphPtr &graph, const CNodePtr & AnfAlgo::CopyNodeAttrs(bn_cnode, bn_training_reduce); CreateMultipleOutputsOfAnfNode(graph, bn_training_reduce, kBNTrainingReduceOutputNum, bn_training_reduce_outputs); + return true; } AnfNodePtr CreateOutputsOfBNTrainingUpdate(const FuncGraphPtr &graph, const CNodePtr &bn_cnode, @@ -99,11 +101,15 @@ AnfNodePtr SplitFusedBatchNormForTBE(const FuncGraphPtr &func_graph, const AnfNo auto cnode = node->cast(); MS_EXCEPTION_IF_NULL(cnode); if (cnode->inputs().size() < kBnInputNum) { - MS_LOG(EXCEPTION) << "op[FusedBatchNorm] has less than " << kBnInputNum << " inputs."; + MS_LOG(INFO) << "op[FusedBatchNorm] has less than " << kBnInputNum << " inputs."; + return nullptr; } // Create BNTrainingReduce node and get outputs of BNTrainingReduce std::vector bn_training_reduce_outputs; - CreateOutputsOfBNTrainingReduce(func_graph, cnode, &bn_training_reduce_outputs); + if (!CreateOutputsOfBNTrainingReduce(func_graph, cnode, &bn_training_reduce_outputs)) { + MS_LOG(WARNING) << "Create BNTrainingReduce fail, quit split"; + return nullptr; + } if (bn_training_reduce_outputs.size() != kBN1OutputNum) { MS_LOG(EXCEPTION) << "make outputs of op BNTrainingReduce fail"; } diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/layer_norm_grad_split.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fission/layer_norm_grad_split.cc index cc1356c724..1a25d83650 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/layer_norm_grad_split.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/layer_norm_grad_split.cc @@ -32,7 +32,6 @@ void LayerNormGradSplit::CreateOutputsOfLayerNormXBackprop( std::vector *layer_norm_x_backprop_outputs) const { MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(layer_norm_grad); - MS_EXCEPTION_IF_NULL(kernel_select_); auto prim = std::make_shared(kLayerNormXBackpropOpName); std::vector layer_norm_x_backprop_inputs = {NewValueNode(prim)}; for (size_t i = 1; i < layer_norm_grad->inputs().size(); ++i) { @@ -46,7 +45,6 @@ void LayerNormGradSplit::CreateOutputsOfLayerNormXBackprop( auto shapes = {AnfAlgo::GetOutputInferShape(layer_norm_grad, 0)}; AnfAlgo::SetOutputInferTypeAndShape(types, shapes, layer_norm_x_backprop.get()); - kernel_select_->SelectKernel(layer_norm_x_backprop); (*layer_norm_x_backprop_outputs).push_back(layer_norm_x_backprop); } @@ -55,7 +53,6 @@ void LayerNormGradSplit::CreateOutputsOfLayerNormBetaGammaBackprop( std::vector *layer_norm_beta_gamma_backprop_outputs) const { MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(layer_norm_grad); - MS_EXCEPTION_IF_NULL(kernel_select_); auto prim = std::make_shared(kLayerNormBetaGammaBackpropOpName); std::vector layer_norm_beta_gamma_backprop_inputs = {NewValueNode(prim)}; for (size_t i = 1; i < layer_norm_grad->inputs().size() - 1; ++i) { @@ -73,10 +70,9 @@ void LayerNormGradSplit::CreateOutputsOfLayerNormBetaGammaBackprop( AnfAlgo::SetOutputInferTypeAndShape(types, shapes, layer_norm_beta_gamma_backprop.get()); // get device shape of LayerNormGrad's 5th Input, and convert it to attr - std::vector shape_gamma = AnfAlgo::GetInputDeviceShape(layer_norm_grad, 4); + std::vector shape_gamma = AnfAlgo::GetPrevNodeOutputInferShape(layer_norm_grad, 4); AnfAlgo::SetNodeAttr(kAttrShapeGamma, MakeValue(opt::Convert2Int(shape_gamma)), layer_norm_beta_gamma_backprop); - kernel_select_->SelectKernel(layer_norm_beta_gamma_backprop); CreateMultipleOutputsOfAnfNode(graph, layer_norm_beta_gamma_backprop, kLayerNormBetaGammaBackpropOutputNum, layer_norm_beta_gamma_backprop_outputs); } diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/layer_norm_grad_split.h b/mindspore/ccsrc/pre_activate/ascend/ir_fission/layer_norm_grad_split.h index f25c2e9838..f442446b01 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/layer_norm_grad_split.h +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/layer_norm_grad_split.h @@ -26,8 +26,7 @@ namespace mindspore { namespace opt { class LayerNormGradSplit : public PatternProcessPass { public: - explicit LayerNormGradSplit(bool multigraph = true) - : PatternProcessPass("layer_norm_grad_split", multigraph), kernel_select_(std::make_shared()) {} + explicit LayerNormGradSplit(bool multigraph = true) : PatternProcessPass("layer_norm_grad_split", multigraph) {} ~LayerNormGradSplit() override = default; const BaseRef DefinePattern() const override; const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; @@ -37,7 +36,6 @@ class LayerNormGradSplit : public PatternProcessPass { std::vector *layer_norm_grad_outputs) const; void CreateOutputsOfLayerNormBetaGammaBackprop(const FuncGraphPtr &graph, const CNodePtr &layer_norm_grad, std::vector *layer_norm_beta_gamma_outputs) const; - KernelSelectPtr kernel_select_; }; } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/single_batch_norm_fission.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fission/single_batch_norm_fission.cc new file mode 100644 index 0000000000..159be2ac3b --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/single_batch_norm_fission.cc @@ -0,0 +1,117 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "pre_activate/ascend/ir_fission/single_batch_norm_fission.h" +#include +#include +#include +#include "session/anf_runtime_algorithm.h" +#include "pre_activate/common/helper.h" + +namespace mindspore { +namespace opt { +namespace { +constexpr size_t kBatchNormRealInputNum = 3; + +AnfNodePtr CreateBNTrainingReduce(const FuncGraphPtr &func_graph, const AnfNodePtr &bn) { + MS_EXCEPTION_IF_NULL(func_graph); + MS_EXCEPTION_IF_NULL(bn); + auto bn_cnode = bn->cast(); + MS_EXCEPTION_IF_NULL(bn_cnode); + if (bn_cnode->inputs().size() < kBatchNormRealInputNum + 1) { + MS_LOG(EXCEPTION) << "The input size of node " + bn_cnode->DebugString() + " is less than " + << kBatchNormRealInputNum + 1; + } + std::vector bn_training_reduce_inputs = { + NewValueNode(std::make_shared(kBNTrainingReduceOpName)), bn_cnode->input(1)}; + auto bn_training_reduce = func_graph->NewCNode(bn_training_reduce_inputs); + MS_EXCEPTION_IF_NULL(bn_training_reduce); + + // set abstract + auto bn_input1 = bn_cnode->input(2); + MS_EXCEPTION_IF_NULL(bn_input1); + AbstractBasePtrList abstract_list{bn_input1->abstract(), bn_input1->abstract()}; + auto abstract_tuple = std::make_shared(abstract_list); + bn_training_reduce->set_abstract(abstract_tuple); + bn_training_reduce->set_scope(bn->scope()); + return bn_training_reduce; +} + +AnfNodePtr CreateBNTrainingUpdateV3(const FuncGraphPtr &func_graph, const AnfNodePtr &bn, + const std::vector &bn_training_reduce_outputs) { + MS_EXCEPTION_IF_NULL(func_graph); + MS_EXCEPTION_IF_NULL(bn); + auto bn_cnode = bn->cast(); + MS_EXCEPTION_IF_NULL(bn_cnode); + if (bn_cnode->inputs().size() < kBatchNormRealInputNum + 1) { + MS_LOG(EXCEPTION) << "The input size of node " + bn_cnode->DebugString() + " is less than " + << kBatchNormRealInputNum + 1; + } + if (bn_training_reduce_outputs.size() != kBNTrainingReduceOutputNum) { + MS_LOG(EXCEPTION) << "The output size of node bn_training_reduce must be " << kBNTrainingReduceOutputNum + << ", but it is " << bn_training_reduce_outputs.size(); + } + std::vector bn_training_update_v3_inputs = { + NewValueNode(std::make_shared(kBNTrainingUpdateV3OpName)), + bn_cnode->input(1), + bn_training_reduce_outputs[0], + bn_training_reduce_outputs[1], + bn_cnode->input(2), + bn_cnode->input(3)}; + auto bn_training_update_v3 = func_graph->NewCNode(bn_training_update_v3_inputs); + MS_EXCEPTION_IF_NULL(bn_training_update_v3); + + auto bn_abstract_tuple = dyn_cast(bn->abstract()); + MS_EXCEPTION_IF_NULL(bn_abstract_tuple); + if (bn_abstract_tuple->elements().size() != kBatchNormOutputNum) { + MS_LOG(EXCEPTION) << "The abstract size of node bn must be " << kBatchNormOutputNum << ", but it is " + << bn_abstract_tuple->elements().size(); + } + bn_training_update_v3->set_abstract(bn->abstract()); + bn_training_update_v3->set_scope(bn->scope()); + AnfAlgo::CopyNodeAttr(kAttrEpsilon, bn_cnode, bn_training_update_v3); + return bn_training_update_v3; +} +} // namespace + +const BaseRef SingleBatchNormFission::DefinePattern() const { + VarPtr Xs = std::make_shared(); + return VectorRef({prim::kPrimBatchNorm, Xs}); +} + +const AnfNodePtr SingleBatchNormFission::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, + const EquivPtr &) const { + MS_EXCEPTION_IF_NULL(func_graph); + MS_EXCEPTION_IF_NULL(node); + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + if (cnode->size() < kBatchNormRealInputNum + 1) { + MS_LOG(INFO) << "The input num of BatchNorm less than" << kBatchNormRealInputNum + << ". The node should not be changed"; + return nullptr; + } + if (!GetBoolAttr(cnode, kAttrIsTraining)) { + MS_LOG(INFO) << "is training should be true if do fusion"; + return nullptr; + } + AnfNodePtr bn_training_reduce = CreateBNTrainingReduce(func_graph, node); + std::vector bn_training_reduce_outputs; + CreateMultipleOutputsOfAnfNode(func_graph, bn_training_reduce, kBNTrainingReduceOutputNum, + &bn_training_reduce_outputs); + + return CreateBNTrainingUpdateV3(func_graph, node, bn_training_reduce_outputs); +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/conv_bn_relu_fusion.h b/mindspore/ccsrc/pre_activate/ascend/ir_fission/single_batch_norm_fission.h similarity index 61% rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/conv_bn_relu_fusion.h rename to mindspore/ccsrc/pre_activate/ascend/ir_fission/single_batch_norm_fission.h index ea415564ae..145603132b 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/conv_bn_relu_fusion.h +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/single_batch_norm_fission.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,21 +13,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_CONV_BN_RELU_FUSION_H_ -#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_CONV_BN_RELU_FUSION_H_ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_SINGLE_BATCH_NORM_FISSION_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_SINGLE_BATCH_NORM_FISSION_H_ #include "pre_activate/common/optimizer.h" -#include "pre_activate/common/helper.h" namespace mindspore { namespace opt { -class ConvBnReluFusion : public PatternProcessPass { +class SingleBatchNormFission : public PatternProcessPass { public: - explicit ConvBnReluFusion(bool multigraph = true) : PatternProcessPass("conv_bn_relu_fusion", multigraph) {} - ~ConvBnReluFusion() override = default; + explicit SingleBatchNormFission(bool multigraph = true) + : PatternProcessPass("single_batch_norm_fission", multigraph) {} + ~SingleBatchNormFission() override = default; const BaseRef DefinePattern() const override; const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; }; } // namespace opt } // namespace mindspore -#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_CONV_BN_RELU_FUSION_H_ +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_SINGLE_BATCH_NORM_FISSION_H_ diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/split_fission.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fission/split_fission.cc new file mode 100644 index 0000000000..c39a5e01e6 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/split_fission.cc @@ -0,0 +1,191 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "pre_activate/ascend/ir_fission/split_fission.h" +#include +#include +#include "session/anf_runtime_algorithm.h" + +namespace mindspore { +namespace opt { +namespace { +CNodePtr CreateSplitVNode(const FuncGraphPtr &func_graph, const AnfNodePtr &input_node) { + MS_EXCEPTION_IF_NULL(func_graph); + MS_EXCEPTION_IF_NULL(input_node); + std::vector splitv_inputs{NewValueNode(std::make_shared(kSplitVOpName)), input_node}; + CNodePtr splitv = func_graph->NewCNode(splitv_inputs); + MS_EXCEPTION_IF_NULL(splitv); + splitv->set_scope(input_node->scope()); + return splitv; +} + +CNodePtr CreateBaseSplitVNode(const FuncGraphPtr &func_graph, const CNodePtr &origin_cnode) { + MS_EXCEPTION_IF_NULL(origin_cnode); + if (origin_cnode->inputs().size() < kSplitInputNum) { + MS_LOG(EXCEPTION) << "The input number of split: " << origin_cnode->DebugString() << " should be " + << kSplitInputNum - 1; + } + return CreateSplitVNode(func_graph, origin_cnode->input(1)); +} + +void SetAttrForSplitVNode(const AnfNodePtr &splitv, const std::vector &size_splits, int split_dim, int num_split) { + AnfAlgo::SetNodeAttr(kAttrSizeSplits, MakeValue(size_splits), splitv); + AnfAlgo::SetNodeAttr(kAttrSplitDim, MakeValue(split_dim), splitv); + AnfAlgo::SetNodeAttr(kAttrNumSplit, MakeValue(num_split), splitv); +} + +size_t GetSmallSplitSize(const AnfNodePtr &split_node, int split_dim, int num_split) { + auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(split_node, 0); + if (split_dim < 0) { + split_dim += input_shape.size(); + } + if (IntToSize(split_dim) >= input_shape.size()) { + MS_LOG(EXCEPTION) << "The split_dim value should be less than the shape size of input 0"; + } + return input_shape[split_dim] / num_split; +} + +void AddNewOutputs(const FuncGraphPtr &func_graph, const AnfNodePtr &new_splitv, int outputs_num, + std::vector *inputs) { + MS_EXCEPTION_IF_NULL(inputs); + std::vector new_splitv_output; + CreateMultipleOutputsOfAnfNode(func_graph, new_splitv, outputs_num, &new_splitv_output); + inputs->insert(inputs->end(), new_splitv_output.begin(), new_splitv_output.end()); +} + +AnfNodePtr CreateTupleGetItem(const FuncGraphPtr &func_graph, const AnfNodePtr &input, size_t index) { + MS_EXCEPTION_IF_NULL(func_graph); + auto idx = NewValueNode(SizeToInt(index)); + MS_EXCEPTION_IF_NULL(idx); + auto imm = std::make_shared(SizeToInt(index)); + auto abstract_scalar = std::make_shared(imm); + idx->set_abstract(abstract_scalar); + auto tuple_getitem = func_graph->NewCNode({NewValueNode(prim::kPrimTupleGetItem), input, idx}); + return tuple_getitem; +} + +void CreateOutputShapeAndTypeId(const CNodePtr &origin_cnode, int split_dim, int split_size, int num_split, + std::vector *new_type_ids, + std::vector> *new_output_shapes) { + MS_EXCEPTION_IF_NULL(new_type_ids); + MS_EXCEPTION_IF_NULL(new_output_shapes); + auto output_shape = AnfAlgo::GetOutputInferShape(origin_cnode, 0); + output_shape[split_dim] = split_size; + TypeId type_id = AnfAlgo::GetOutputInferDataType(origin_cnode, 0); + for (int i = 0; i < num_split; ++i) { + new_type_ids->emplace_back(type_id); + new_output_shapes->emplace_back(output_shape); + } +} + +void SetAttrAndAbstractForBaseSplitv(const CNodePtr &origin_cnode, const CNodePtr &base_splitv, + const std::vector &size_splits_base, int split_dim, int num_split) { + SetAttrForSplitVNode(base_splitv, size_splits_base, split_dim, num_split); + std::vector base_type_ids; + std::vector> base_output_shapes_base; + auto output_shape = AnfAlgo::GetOutputInferShape(origin_cnode, 0); + TypeId type_id = AnfAlgo::GetOutputInferDataType(origin_cnode, 0); + for (int i = 0; i < num_split; ++i) { + output_shape[split_dim] = size_splits_base[i]; + base_output_shapes_base.emplace_back(output_shape); + base_type_ids.emplace_back(type_id); + } + AnfAlgo::SetOutputInferTypeAndShape(base_type_ids, base_output_shapes_base, base_splitv.get()); +} + +AnfNodePtr DoFission(const FuncGraphPtr &func_graph, const CNodePtr &cnode, int num_split, int divisor) { + MS_EXCEPTION_IF_NULL(func_graph); + auto split_dim = AnfAlgo::GetNodeAttr(cnode, kAttrAxis); + CNodePtr base_splitv = CreateBaseSplitVNode(func_graph, cnode); + + // Create new size_splits for "size_splits" attr of each new Splitv node which has full inputs. + auto small_split_size = SizeToInt(GetSmallSplitSize(cnode, split_dim, num_split)); + std::vector size_splits_new; + for (int i = 0; i < divisor; ++i) { + size_splits_new.emplace_back(small_split_size); + } + // Create new output shape and new output type id for each new Splitv node which has full inputs. + std::vector new_type_ids; + std::vector> new_output_shapes; + CreateOutputShapeAndTypeId(cnode, split_dim, small_split_size, divisor, &new_type_ids, &new_output_shapes); + + // Create make_tuple input to create a make_tuple for replacing the old Split node. + std::vector make_tuple_inputs{NewValueNode(prim::kPrimMakeTuple)}; + // Start to divide the outputs of Split. + std::vector size_splits_base; + const auto base_split_size = divisor * small_split_size; + int nodes_num = 0; + int cur_output_index = 0; + while (num_split - cur_output_index > divisor) { + CNodePtr new_splitv = CreateSplitVNode(func_graph, CreateTupleGetItem(func_graph, base_splitv, nodes_num)); + SetAttrForSplitVNode(new_splitv, size_splits_new, split_dim, divisor); + AnfAlgo::SetOutputInferTypeAndShape(new_type_ids, new_output_shapes, new_splitv.get()); + AddNewOutputs(func_graph, new_splitv, divisor, &make_tuple_inputs); + cur_output_index += divisor; + size_splits_base.emplace_back(base_split_size); + nodes_num++; + } + if (cur_output_index < num_split) { + auto last_node_num_split = num_split - cur_output_index; + if (last_node_num_split > 1) { + CNodePtr new_splitv = CreateSplitVNode(func_graph, CreateTupleGetItem(func_graph, base_splitv, nodes_num)); + std::vector size_splits_new_last; + for (int i = 0; i < last_node_num_split; ++i) { + size_splits_new_last.emplace_back(small_split_size); + } + SetAttrForSplitVNode(new_splitv, size_splits_new_last, split_dim, last_node_num_split); + // Create new output shape and new output type id for the last Splitv node + std::vector last_new_type_ids; + std::vector> last_new_output_shapes; + CreateOutputShapeAndTypeId(cnode, split_dim, small_split_size, last_node_num_split, &last_new_type_ids, + &last_new_output_shapes); + AnfAlgo::SetOutputInferTypeAndShape(last_new_type_ids, last_new_output_shapes, new_splitv.get()); + AddNewOutputs(func_graph, new_splitv, last_node_num_split, &make_tuple_inputs); + size_splits_base.emplace_back(last_node_num_split * small_split_size); + } else { + make_tuple_inputs.emplace_back(CreateTupleGetItem(func_graph, base_splitv, nodes_num)); + size_splits_base.emplace_back(small_split_size); + } + nodes_num++; + } + // Set Attr and abstract for the base splitv + SetAttrAndAbstractForBaseSplitv(cnode, base_splitv, size_splits_base, split_dim, nodes_num); + AnfNodePtr make_tuple = func_graph->NewCNode(make_tuple_inputs); + return make_tuple; +} +} // namespace + +const BaseRef SplitFission::DefinePattern() const { + VarPtr Xs = std::make_shared(); + auto split_prim = std::make_shared(kSplitOpName); + return VectorRef({split_prim, Xs}); +} + +const AnfNodePtr SplitFission::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const EquivPtr &) const { + MS_EXCEPTION_IF_NULL(node); + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + // Check output num + if (!AnfAlgo::HasNodeAttr(kAttrOutputNum, cnode)) { + return nullptr; + } + auto num_split = AnfAlgo::GetNodeAttr(cnode, kAttrOutputNum); + if (num_split <= outputs_divisor_) { + return nullptr; + } + return DoFission(func_graph, cnode, num_split, outputs_divisor_); +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/conv_bn_fusion.h b/mindspore/ccsrc/pre_activate/ascend/ir_fission/split_fission.h similarity index 60% rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/conv_bn_fusion.h rename to mindspore/ccsrc/pre_activate/ascend/ir_fission/split_fission.h index 892e6053cf..c2763bb714 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/conv_bn_fusion.h +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/split_fission.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,22 +13,25 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_CONV_BN_FUSION_H_ -#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_CONV_BN_FUSION_H_ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_SPLIT_FISSION_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_SPLIT_FISSION_H_ #include "pre_activate/common/optimizer.h" -#include "pre_activate/common/helper.h" namespace mindspore { namespace opt { -class ConvBnFusion : public PatternProcessPass { +constexpr int kSplitOutputsDivisor = 63; +class SplitFission : public PatternProcessPass { public: - explicit ConvBnFusion(bool multigraph = true) : PatternProcessPass("conv_bn_fusion", multigraph) {} - ~ConvBnFusion() override = default; + explicit SplitFission(bool multigraph = true) + : PatternProcessPass("split_fission", multigraph), outputs_divisor_(kSplitOutputsDivisor) {} + ~SplitFission() override = default; const BaseRef DefinePattern() const override; const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; + + private: + int outputs_divisor_; }; } // namespace opt } // namespace mindspore -#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_CONV_BN_FUSION_H_ +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_SPLIT_FISSION_H_ diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.cc index 9abef8fa70..1cace41fc4 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.cc @@ -91,6 +91,30 @@ kernel::KernelBuildInfoPtr CreateKernelBuildInfo() { builder.SetOutputsDeviceType({kNumberTypeFloat16, kNumberTypeInt32}); return builder.Build(); } + +bool CheckInputNamesSize(const CNodePtr &cnode) { + auto input_names_vec = AnfAlgo::GetNodeAttr>(cnode, kAttrInputNames); + if (input_names_vec.size() < kTopkIndexK + 1) { + MS_LOG(INFO) << "The input k of topk has been converted to attr"; + return false; + } + return true; +} + +bool CheckOutputShape(const AnfNodePtr &node) { + auto shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0); + if (shape.empty()) { + MS_LOG(INFO) << "The output shape of topk to split must not be empty"; + return false; + } + auto last_dim = shape[shape.size() - 1]; + const size_t kMaxFloat16 = 65500; + if (last_dim > kMaxFloat16) { + MS_LOG(INFO) << "The last dim is more than " << kMaxFloat16 << ", switch to aicpu ops."; + return false; + } + return true; +} } // namespace const BaseRef TopKSplit::DefinePattern() const { @@ -107,16 +131,10 @@ const AnfNodePtr TopKSplit::Process(const FuncGraphPtr &func_graph, const AnfNod // set value node as topk's input auto cnode = node->cast(); MS_EXCEPTION_IF_NULL(cnode); - auto input_names_vec = AnfAlgo::GetNodeAttr>(cnode, kAttrInputNames); - if (input_names_vec.size() < kTopkIndexK + 1) { - MS_LOG(INFO) << "The input k of topk has been converted to attr"; + if (!CheckInputNamesSize(cnode)) { return nullptr; } - auto shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0); - auto last_dim = shape[shape.size() - 1]; - const size_t kMaxFloat16 = 65500; - if (last_dim > kMaxFloat16) { - MS_LOG(INFO) << "The last dim is more than 65500, switch to aicpu ops."; + if (!CheckOutputShape(cnode)) { return nullptr; } // Copy a new node to check supported. @@ -148,7 +166,7 @@ const AnfNodePtr TopKSplit::Process(const FuncGraphPtr &func_graph, const AnfNod auto indices_const = CreateValueNode(new_cnode); new_cnode->add_input(indices_const); MS_EXCEPTION_IF_NULL(supported_checker_); - if (!supported_checker_->CheckAiCoreSupported(new_cnode, CreateKernelBuildInfo())) { + if (!supported_checker_->CheckAICoreSupported(new_cnode, CreateKernelBuildInfo())) { MS_LOG(INFO) << "split topk failed, check to aicpu."; return nullptr; } diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/transdata_split.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fission/transdata_split.cc index 0305104f5b..bfb7e50486 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/transdata_split.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/transdata_split.cc @@ -69,13 +69,11 @@ bool TransDataSplit::DoSplit(const FuncGraphPtr &func_graph, const AnfNodePtr &n // trans input_format to hwcn new_transdata_node = NewTransOpNode(func_graph, AnfAlgo::GetInputNode(node->cast(), 0), kernel_select_, false, prim::KPrimTransData->name()); - RefreshKernelBuildInfo(input_format, kOpFormat_HWCN, AnfAlgo::GetOutputDeviceDataType(new_transdata_node, 0), - new_transdata_node); + RefreshKernelBuildInfo(input_format, kOpFormat_HWCN, new_transdata_node); // trans hwcn to default_format new_transpose_node = NewTransOpNode(func_graph, new_transdata_node, kernel_select_, false, prim::kPrimTranspose->name()); - RefreshKernelBuildInfo(kOpFormat_HWCN, output_format, AnfAlgo::GetOutputDeviceDataType(new_transpose_node, 0), - new_transpose_node); + RefreshKernelBuildInfo(kOpFormat_HWCN, output_format, new_transpose_node); AnfAlgo::SetNodeAttr(kAttrPerm, MakeValue(std::vector{3, 2, 0, 1}), new_transpose_node); new_replace_node = new_transpose_node; } else { @@ -83,14 +81,12 @@ bool TransDataSplit::DoSplit(const FuncGraphPtr &func_graph, const AnfNodePtr &n new_transpose_node = NewTransOpNode(func_graph, AnfAlgo::GetInputNode(node->cast(), 0), kernel_select_, false, prim::kPrimTranspose->name()); AnfAlgo::SetNodeAttr(kAttrPerm, MakeValue(std::vector{2, 3, 1, 0}), new_transpose_node); - RefreshKernelBuildInfo(input_format, kOpFormat_HWCN, AnfAlgo::GetOutputDeviceDataType(new_transpose_node, 0), - new_transpose_node); + RefreshKernelBuildInfo(input_format, kOpFormat_HWCN, new_transpose_node); // trans hwcn to output_format new_transdata_node = NewTransOpNode(func_graph, new_transpose_node, kernel_select_, false, prim::KPrimTransData->name()); - RefreshKernelBuildInfo(kOpFormat_HWCN, output_format, AnfAlgo::GetOutputDeviceDataType(new_transdata_node, 0), - new_transdata_node); + RefreshKernelBuildInfo(kOpFormat_HWCN, output_format, new_transdata_node); new_replace_node = new_transdata_node; } FuncGraphManagerPtr manager = func_graph->manager(); diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_fusion.cc index 4645167191..59be003b15 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_fusion.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_fusion.cc @@ -109,6 +109,9 @@ const AnfNodePtr AdamApplyOneFusion::Process(const FuncGraphPtr &func_graph, con const EquivPtr &equiv) const { MS_EXCEPTION_IF_NULL(func_graph); MS_EXCEPTION_IF_NULL(node); + if (!CheckSupportDataType(node, kFloatDataTypeSet)) { + return nullptr; + } auto new_node = CreateAdamApplyOneNode(func_graph, equiv); MS_EXCEPTION_IF_NULL(new_node); new_node->set_scope(node->scope()); diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.cc index 7dc13ee7a7..f6077c95f2 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.cc @@ -146,7 +146,9 @@ const AnfNodePtr AdamApplyOneWithDecayRule::Process(const FuncGraphPtr &graph, c if (graph == nullptr || node == nullptr || equiv == nullptr) { return nullptr; } - + if (!CheckSupportDataType(node, kFloatDataTypeSet)) { + return nullptr; + } std::vector inputs = GetFusionNodeInputs(equiv); auto fusion_node = graph->NewCNode(inputs); MS_EXCEPTION_IF_NULL(fusion_node); diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.cc index a524d694e6..9e2c6374ce 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.cc @@ -25,29 +25,8 @@ namespace mindspore { namespace opt { -namespace { -void SetAttrsForFusionNode(const AnfNodePtr &sub_anf, const AnfNodePtr &fusion_node) { - MS_EXCEPTION_IF_NULL(sub_anf); - MS_EXCEPTION_IF_NULL(fusion_node); - auto sub = sub_anf->cast(); - MS_EXCEPTION_IF_NULL(sub); - if (sub->size() != kSubInputNum) { - MS_LOG(EXCEPTION) << "Sub's size is not equal with 3"; - } - auto reduce_sum_anf = sub->input(2); - MS_EXCEPTION_IF_NULL(reduce_sum_anf); - auto reduce_sum = reduce_sum_anf->cast(); - if (reduce_sum == nullptr) { - MS_LOG(EXCEPTION) << "Sub's second input is not a cnode"; - } - AnfAlgo::CopyNodeAttr(kAttrAxis, reduce_sum, fusion_node); - AnfAlgo::CopyNodeAttr(kAttrKeepDims, reduce_sum, fusion_node); -} -} // namespace - const BaseRef ConfusionSoftmaxGradRule::DefinePattern() const { - return VectorRef( - {prim::kPrimSub, input0_, VectorRef({prim::kPrimReduceSum, VectorRef({prim::kPrimMul, input1_, input0_})})}); + return VectorRef({prim::kPrimSub, input0_, VectorRef({reduce_sum_, VectorRef({prim::kPrimMul, input1_, input0_})})}); } const AnfNodePtr ConfusionSoftmaxGradRule::Process(const FuncGraphPtr &graph, const AnfNodePtr &node, @@ -55,22 +34,28 @@ const AnfNodePtr ConfusionSoftmaxGradRule::Process(const FuncGraphPtr &graph, co MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(node); MS_EXCEPTION_IF_NULL(equiv); - auto input0 = utils::cast((*equiv)[input0_]); - auto input1 = utils::cast((*equiv)[input1_]); - MS_EXCEPTION_IF_NULL(input0); - MS_EXCEPTION_IF_NULL(input1); + AnfNodePtr input0 = GetAnfNodeByVar(equiv, input0_); + AnfNodePtr input1 = GetAnfNodeByVar(equiv, input1_); + AnfNodePtr sum_anf = GetAnfNodeByVar(equiv, reduce_sum_); + if (sum_anf == nullptr || !sum_anf->isa()) { + MS_LOG(WARNING) << "Matched ReduceSum is not a CNode!"; + return nullptr; + } + if (!GetBoolAttr(sum_anf, kAttrKeepDims)) { + MS_LOG(INFO) << "ReduceSum's attr keep_dims should be true if do fusion. Otherwise the calculation will be wrong"; + return nullptr; + } auto prim = std::make_shared(kConfusionSoftmaxGradOpName); MS_EXCEPTION_IF_NULL(prim); std::vector inputs = {NewValueNode(prim), input0, input1}; - auto confusion_softmax_grad = graph->NewCNode(inputs); - MS_EXCEPTION_IF_NULL(confusion_softmax_grad); - auto types = {AnfAlgo::GetOutputInferDataType(node, 0)}; - auto shapes = {AnfAlgo::GetOutputInferShape(node, 0)}; - AnfAlgo::SetOutputInferTypeAndShape(types, shapes, confusion_softmax_grad.get()); - confusion_softmax_grad->set_scope(node->scope()); - SetAttrsForFusionNode(node, confusion_softmax_grad); - return confusion_softmax_grad; + auto fusion_node = graph->NewCNode(inputs); + MS_EXCEPTION_IF_NULL(fusion_node); + fusion_node->set_abstract(node->abstract()); + fusion_node->set_scope(node->scope()); + AnfAlgo::CopyNodeAttr(kAttrAxis, sum_anf, fusion_node); + AnfAlgo::CopyNodeAttr(kAttrKeepDims, sum_anf, fusion_node); + return fusion_node; } } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.h b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.h index 58722e586f..a4d0d1ce7a 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.h +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.h @@ -24,9 +24,11 @@ namespace opt { class ConfusionSoftmaxGradRule : public PatternProcessPass { public: explicit ConfusionSoftmaxGradRule(bool multigraph = true) - : PatternProcessPass("confusion_softmax_grad_rule", multigraph), - input0_(std::make_shared()), - input1_(std::make_shared()) {} + : PatternProcessPass("confusion_softmax_grad_rule", multigraph) { + input0_ = std::make_shared(); + input1_ = std::make_shared(); + reduce_sum_ = std::make_shared(std::make_shared(prim::kPrimReduceSum->name())); + } ~ConfusionSoftmaxGradRule() override = default; const BaseRef DefinePattern() const override; const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; @@ -34,6 +36,7 @@ class ConfusionSoftmaxGradRule : public PatternProcessPass { private: VarPtr input0_; VarPtr input1_; + VarPtr reduce_sum_; }; } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/conv_bn_add_relu_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/conv_bn_add_relu_fusion.cc deleted file mode 100644 index efee8c0eff..0000000000 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/conv_bn_add_relu_fusion.cc +++ /dev/null @@ -1,157 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "pre_activate/ascend/ir_fusion/conv_bn_add_relu_fusion.h" -#include -#include -#include -#include -#include -#include "session/anf_runtime_algorithm.h" -#include "device/kernel_info.h" - -namespace mindspore { -namespace opt { -namespace { -constexpr size_t kBn2AddReluOutputNum = 4; -enum Bn2AddReluOutput { - kBn2AddReluOutput = 0, - kBn2AddReluRunningMean, - kBn2AddReluRunningVariance, - kBn2AddReluSaveInvVariance, -}; - -std::tuple GetUsedCNode(const AnfNodePtr &node) { - auto relu_cnode = CheckAnfNodeIfCNodeAndInputSize(node, kReluInputNum); - MS_EXCEPTION_IF_NULL(relu_cnode); - auto add_cnode = CheckAnfNodeIfCNodeAndInputSize(relu_cnode->input(1), kAddInputNum); - MS_EXCEPTION_IF_NULL(add_cnode); - auto add_input1_cnode = CheckAnfNodeIfCNodeAndInputSize(add_cnode->input(1), kTupleGetitemInputNum); - MS_EXCEPTION_IF_NULL(add_input1_cnode); - auto bn_cnode = CheckAnfNodeIfCNodeAndInputSize(add_input1_cnode->input(1), kBnInputNum); - MS_EXCEPTION_IF_NULL(bn_cnode); - auto conv_cnode = CheckAnfNodeIfCNodeAndInputSize(bn_cnode->input(kX), kConvInputNum); - - return std::make_tuple(conv_cnode, bn_cnode, add_cnode, relu_cnode); -} - -void CreateOutputsOfBn2AddRelu(const FuncGraphPtr &func_graph, const std::vector &conv_bn1_outputs, - const CNodePtr &bn_node, const CNodePtr &add_node, const CNodePtr &relu_node, - std::vector *bn2_add_relu_outputs) { - MS_EXCEPTION_IF_NULL(func_graph); - MS_EXCEPTION_IF_NULL(add_node); - MS_EXCEPTION_IF_NULL(relu_node); - MS_EXCEPTION_IF_NULL(bn_node); - auto prim = std::make_shared(kBN2AddReluOpName); - std::vector bn2_add_relu_inputs = {NewValueNode(prim)}; - // The inputs of bn2_add_relu are from the outputs of conv_bn1, the 2nd input of add, and the 2nd to 5th inputs of bn - (void)std::copy(conv_bn1_outputs.begin(), conv_bn1_outputs.end(), std::back_inserter(bn2_add_relu_inputs)); - bn2_add_relu_inputs.push_back(add_node->input(2)); - for (size_t i = kX + 1; i <= kVariance; i++) { - bn2_add_relu_inputs.push_back(bn_node->input(i)); - } - auto bn2_add_relu_cnode = func_graph->NewCNode(bn2_add_relu_inputs); - MS_EXCEPTION_IF_NULL(bn2_add_relu_cnode); - auto kernel_info = std::make_shared(); - MS_EXCEPTION_IF_NULL(kernel_info); - bn2_add_relu_cnode->set_kernel_info(kernel_info); - - // Set attr for bn2_add_relu - AnfAlgo::CopyNodeAttrs(bn_node, bn2_add_relu_cnode); - AnfAlgo::CopyNodeAttr("epsilon", "eps", bn_node, bn2_add_relu_cnode); - - // Set abstract of bn2_add_relu - auto bn_abstract_tuple = dyn_cast(bn_node->abstract()); - MS_EXCEPTION_IF_NULL(bn_abstract_tuple); - if (bn_abstract_tuple->elements().size() != kBnOutputNum) { - MS_LOG(EXCEPTION) << "Abstract tuple size of FusedBatchNorm must be " << kBnOutputNum << ", but it is " - << bn_abstract_tuple->elements().size(); - } - auto relu_abstract = relu_node->abstract(); - MS_EXCEPTION_IF_NULL(relu_abstract); - // The abstracts of node bn2_add_relu are from the some abstracts of bn and relu nodes. - AbstractBasePtrList bn2_add_relu_abstract_list{relu_abstract, bn_abstract_tuple->elements()[kRunningMean], - bn_abstract_tuple->elements()[kRunningVariance], - bn_abstract_tuple->elements()[kSaveInvVariance]}; - auto abstract_tuple = std::make_shared(bn2_add_relu_abstract_list); - MS_EXCEPTION_IF_NULL(abstract_tuple); - bn2_add_relu_cnode->set_abstract(abstract_tuple); - - CreateMultipleOutputsOfAnfNode(func_graph, bn2_add_relu_cnode, kBn2AddReluOutputNum, bn2_add_relu_outputs); -} -} // namespace - -const BaseRef ConvBnAddReluFusion::DefinePattern() const { - VarPtr X = std::make_shared(); - MS_EXCEPTION_IF_NULL(X); - VarPtr W = std::make_shared(); - MS_EXCEPTION_IF_NULL(W); - VarPtr Ys = std::make_shared(); - MS_EXCEPTION_IF_NULL(Ys); - VarPtr Zs = std::make_shared(); - MS_EXCEPTION_IF_NULL(Zs); - - return VectorRef( - {prim::kPrimRelu, - PatternListType( - {prim::kPrimTensorAdd, - PatternListType({prim::kPrimTupleGetItem, - PatternListType({prim::kPrimFusedBatchNorm, PatternListType({prim::kPrimConv2D, Ys}), Zs}), - W}), - X})}); -} - -const AnfNodePtr ConvBnAddReluFusion::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, - const EquivPtr &) const { - MS_EXCEPTION_IF_NULL(func_graph); - auto manager = func_graph->manager(); - MS_EXCEPTION_IF_NULL(manager); - CNodePtr conv_cnode = nullptr; - CNodePtr bn_cnode = nullptr; - CNodePtr add_cnode = nullptr; - CNodePtr relu_cnode = nullptr; - std::tie(conv_cnode, bn_cnode, add_cnode, relu_cnode) = GetUsedCNode(node); - // Create conv_bn1 node and get outputs of conv_bn1 - std::vector conv_bn1_outputs; - CreateOutputsOfConvBn1(func_graph, conv_cnode, bn_cnode, &conv_bn1_outputs); - if (conv_bn1_outputs.size() != kConvBn1OutputNum) { - MS_LOG(EXCEPTION) << "The output size of node conv_bn1 must be " << kConvBn1OutputNum << ", but it is " - << conv_bn1_outputs.size(); - } - // Replace conv_node with the output 0 of conv_bn1 directly because the conv node may be used as input by others - (void)manager->Replace(conv_cnode, conv_bn1_outputs[kData]); - - // Create bn2_add_relu node and get outputs of bn2_add_relu - std::vector bn2_add_relu_outputs; - CreateOutputsOfBn2AddRelu(func_graph, conv_bn1_outputs, bn_cnode, add_cnode, relu_cnode, &bn2_add_relu_outputs); - if (bn2_add_relu_outputs.size() != kBn2AddReluOutputNum) { - MS_LOG(EXCEPTION) << "The output size of node bn2_add_relu must be " << kBn2AddReluOutputNum << ", but it is " - << bn2_add_relu_outputs.size(); - } - - // Create a make_tuple to replace the bn node here, the outputs are from node bn2_add_relu and conv_bn1. - std::vector make_tuple_inputs{NewValueNode(prim::kPrimMakeTuple), - bn2_add_relu_outputs[kBn2AddReluOutput], - bn2_add_relu_outputs[kBn2AddReluRunningMean], - bn2_add_relu_outputs[kBn2AddReluRunningVariance], - conv_bn1_outputs[kMean], - bn2_add_relu_outputs[kBn2AddReluSaveInvVariance]}; - auto make_tuple = func_graph->NewCNode(make_tuple_inputs); - (void)manager->Replace(bn_cnode, make_tuple); - return bn2_add_relu_outputs[kBn2AddReluOutput]; -} -} // namespace opt -} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/conv_bn_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/conv_bn_fusion.cc deleted file mode 100644 index 70a7b53809..0000000000 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/conv_bn_fusion.cc +++ /dev/null @@ -1,93 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "pre_activate/ascend/ir_fusion/conv_bn_fusion.h" -#include -#include -#include "session/anf_runtime_algorithm.h" -#include "device/kernel_info.h" - -namespace mindspore { -namespace opt { -const BaseRef ConvBnFusion::DefinePattern() const { - VarPtr Xs = std::make_shared(); - MS_EXCEPTION_IF_NULL(Xs); - VarPtr Ys = std::make_shared(); - MS_EXCEPTION_IF_NULL(Ys); - return VectorRef({prim::kPrimFusedBatchNorm, PatternListType({prim::kPrimConv2D, Xs}), Ys}); -} - -const AnfNodePtr ConvBnFusion::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const EquivPtr &) const { - MS_EXCEPTION_IF_NULL(func_graph); - MS_EXCEPTION_IF_NULL(node); - if (!node->isa()) { - MS_LOG(EXCEPTION) << "The bn node is expected to be a cnode"; - } - auto bn_cnode = node->cast(); - MS_EXCEPTION_IF_NULL(bn_cnode); - if (bn_cnode->inputs().size() < kVariance + 1) { - auto op_name = AnfAlgo::GetCNodeName(bn_cnode); - MS_LOG(EXCEPTION) << "op[" << op_name << "] has less than " << kVariance + 1 << " inputs."; - } - AnfNodePtr conv_node = bn_cnode->input(kX); - MS_EXCEPTION_IF_NULL(conv_node); - if (!conv_node->isa()) { - MS_LOG(EXCEPTION) << "The conv node is expected to be a cnode"; - } - auto conv_cnode = conv_node->cast(); - MS_EXCEPTION_IF_NULL(conv_cnode); - auto manager = func_graph->manager(); - MS_EXCEPTION_IF_NULL(manager); - // Create conv_bn1 node and get outputs of conv_bn1 - std::vector conv_bn1_outputs; - CreateOutputsOfConvBn1(func_graph, conv_cnode, bn_cnode, &conv_bn1_outputs); - if (conv_bn1_outputs.size() != kConvBn1OutputNum) { - MS_LOG(EXCEPTION) << "The output size of node conv_bn1 must be " << kConvBn1OutputNum << ", but it is " - << conv_bn1_outputs.size(); - } - // Replace conv_node with the output 0 of conv_bn1 directly because the conv node may be used as input by other - (void)manager->Replace(conv_node, conv_bn1_outputs[kData]); - - // Create bn2 node and get outputs of bn2 - std::vector bn2_outputs; - std::vector bn1_outputs = {conv_bn1_outputs[2], conv_bn1_outputs[1]}; - CreateOutputsOfFusedBn2(func_graph, bn1_outputs, bn_cnode, &bn2_outputs); - if (bn2_outputs.size() != kBN2OutputNum) { - MS_LOG(EXCEPTION) << "The output size of node fusedbn2 must be " << kBN2OutputNum << ", but it is " - << bn2_outputs.size(); - } - - // Create bn3 node and get outputs of bn3 - std::vector bn3_outputs; - CreateOutputsOfFusedBn3(func_graph, conv_bn1_outputs[0], bn1_outputs, bn2_outputs, bn_cnode, &bn3_outputs); - - if (bn3_outputs.size() != kBN3OutputNum) { - MS_LOG(EXCEPTION) << "The output size of node fusedbn3 must be " << kBN3OutputNum << ", but it is " - << bn3_outputs.size(); - } - - // Return a make_tuple to replace the bn node here, the outputs are from node bn2 and conv_bn1. - std::vector make_tuple_inputs{NewValueNode(prim::kPrimMakeTuple), - bn3_outputs[0], - bn2_outputs[1], - bn2_outputs[2], - conv_bn1_outputs[2], - bn2_outputs[0]}; - - return func_graph->NewCNode(make_tuple_inputs); -} -} // namespace opt -} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/conv_bn_relu_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/conv_bn_relu_fusion.cc deleted file mode 100644 index c5cea86b7f..0000000000 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/conv_bn_relu_fusion.cc +++ /dev/null @@ -1,140 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "pre_activate/ascend/ir_fusion/conv_bn_relu_fusion.h" - -#include -#include -#include -#include -#include - -#include "utils/utils.h" -#include "session/anf_runtime_algorithm.h" -#include "common/utils.h" -#include "device/kernel_info.h" - -namespace mindspore { -namespace opt { -namespace { -std::tuple GetPrevNodes(const AnfNodePtr &node) { - MS_EXCEPTION_IF_NULL(node); - auto relu_node = node->cast(); - MS_EXCEPTION_IF_NULL(relu_node); - if (relu_node->inputs().size() < kReluInputNum) { - MS_LOG(EXCEPTION) << "relu has wrong input size"; - } - auto tuple_getitem_anf = relu_node->input(1); - MS_EXCEPTION_IF_NULL(tuple_getitem_anf); - auto tuple_getitem = tuple_getitem_anf->cast(); - MS_EXCEPTION_IF_NULL(tuple_getitem); - if (tuple_getitem->inputs().size() < kTupleGetitemInputNum) { - MS_LOG(EXCEPTION) << "tuple getitem has wrong input size"; - } - auto bn_node_anf = tuple_getitem->input(1); - MS_EXCEPTION_IF_NULL(bn_node_anf); - auto bn_node = bn_node_anf->cast(); - MS_EXCEPTION_IF_NULL(bn_node); - if (bn_node->inputs().size() < kBnInputNum) { - MS_LOG(EXCEPTION) << "bn_node has wrong input size"; - } - auto conv_node_anf = bn_node->input(1); - MS_EXCEPTION_IF_NULL(conv_node_anf); - CNodePtr conv_node = conv_node_anf->cast(); - MS_EXCEPTION_IF_NULL(conv_node); - return std::make_tuple(bn_node, bn_node, conv_node); -} - -void CreateOutputsOfBn2Relu(const FuncGraphPtr &func_graph, const std::vector &conv_bn1_outputs, - const CNodePtr &bn_node, const CNodePtr &relu_node, - std::vector *bn2_relu_outputs) { - MS_EXCEPTION_IF_NULL(func_graph); - MS_EXCEPTION_IF_NULL(bn_node); - MS_EXCEPTION_IF_NULL(relu_node); - // The inputs of bn2_relu are from the outputs of conv_bn1 and the 2nd to 5th inputs of bn - std::vector bn2_relu_inputs = {NewValueNode(std::make_shared(kBN2ReLUOpName))}; - (void)std::copy(conv_bn1_outputs.begin(), conv_bn1_outputs.end(), std::back_inserter(bn2_relu_inputs)); - for (size_t i = 2; i <= 5; i++) { - bn2_relu_inputs.push_back(bn_node->input(i)); - } - auto bn2_relu = func_graph->NewCNode(bn2_relu_inputs); - MS_EXCEPTION_IF_NULL(bn2_relu); - auto kernel_info = std::make_shared(); - MS_EXCEPTION_IF_NULL(kernel_info); - bn2_relu->set_kernel_info(kernel_info); - auto types = {AnfAlgo::GetOutputInferDataType(relu_node, 0), AnfAlgo::GetOutputInferDataType(bn_node, 1), - AnfAlgo::GetOutputInferDataType(bn_node, 2), AnfAlgo::GetOutputInferDataType(bn_node, 4)}; - auto shapes = {AnfAlgo::GetOutputInferShape(relu_node, 0), AnfAlgo::GetOutputInferShape(bn_node, 1), - AnfAlgo::GetOutputInferShape(bn_node, 2), AnfAlgo::GetOutputInferShape(bn_node, 4)}; - AnfAlgo::SetOutputInferTypeAndShape(types, shapes, bn2_relu.get()); - // Set attr for bn2_add_relu - AnfAlgo::CopyNodeAttrs(bn_node, bn2_relu); - AnfAlgo::CopyNodeAttr("epsilon", "eps", bn_node, bn2_relu); - - CreateMultipleOutputsOfAnfNode(func_graph, bn2_relu, kBn2ReluOutputNum, bn2_relu_outputs); -} -} // namespace - -const BaseRef ConvBnReluFusion::DefinePattern() const { - VarPtr Xs = std::make_shared(); - VarPtr Ys = std::make_shared(); - VarPtr Z = std::make_shared(); - MS_EXCEPTION_IF_NULL(Xs); - MS_EXCEPTION_IF_NULL(Ys); - MS_EXCEPTION_IF_NULL(Z); - return VectorRef( - {prim::kPrimRelu, - PatternListType({prim::kPrimTupleGetItem, - PatternListType({prim::kPrimFusedBatchNorm, PatternListType({prim::kPrimConv2D, Xs}), Ys}), Z})}); -} - -const AnfNodePtr ConvBnReluFusion::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, - const EquivPtr &) const { - MS_EXCEPTION_IF_NULL(func_graph); - MS_EXCEPTION_IF_NULL(node); - - CNodePtr relu_node = nullptr; - CNodePtr bn_node = nullptr; - CNodePtr conv_node = nullptr; - std::tie(relu_node, bn_node, conv_node) = GetPrevNodes(node); - - auto manager = func_graph->manager(); - MS_EXCEPTION_IF_NULL(manager); - - std::vector conv_bn1_outputs; - CreateOutputsOfConvBn1(func_graph, conv_node, bn_node, &conv_bn1_outputs); - if (conv_bn1_outputs.size() != kConvBn1OutputNum) { - MS_LOG(EXCEPTION) << "conv_bn1 outputs has wrong size: " << conv_bn1_outputs.size(); - } - (void)manager->Replace(conv_node, conv_bn1_outputs[0]); - - std::vector bn2_relu_outputs; - CreateOutputsOfBn2Relu(func_graph, conv_bn1_outputs, bn_node, relu_node, &bn2_relu_outputs); - if (bn2_relu_outputs.size() != kBn2ReluOutputNum) { - MS_LOG(EXCEPTION) << "bn2_relu outputs has wrong size: " << bn2_relu_outputs.size(); - } - std::vector make_tuple_inputs{NewValueNode(prim::kPrimMakeTuple), - bn2_relu_outputs[0], - bn2_relu_outputs[1], - bn2_relu_outputs[2], - conv_bn1_outputs[2], - bn2_relu_outputs[3]}; - auto make_tuple = func_graph->NewCNode(make_tuple_inputs); - MS_EXCEPTION_IF_NULL(make_tuple); - (void)manager->Replace(bn_node, make_tuple); - return bn2_relu_outputs[0]; -} -} // namespace opt -} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.cc index 03428e6357..efc9ee7934 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.cc @@ -291,7 +291,7 @@ const AnfNodePtr FusedBatchNormFusion::Process(const FuncGraphPtr &func_graph, c return bn_training_update_outputs[0]; } -const BaseRef FusedBatchNormMixPrecisionFusion::DefinePattern() const { +const BaseRef FusedBatchNormMixPrecisionFusion0::DefinePattern() const { std::shared_ptr Xs = std::make_shared(); VarPtr index0 = std::make_shared(IsC); VarPtr index1 = std::make_shared(IsC); @@ -313,5 +313,28 @@ const BaseRef FusedBatchNormMixPrecisionFusion::DefinePattern() const { VectorRef depend0 = VectorRef({prim::kPrimDepend, tuple_getitem0, assign_sub0}); return VectorRef({prim::kPrimDepend, depend0, assign_sub1}); } + +const BaseRef FusedBatchNormMixPrecisionFusion1::DefinePattern() const { + std::shared_ptr Xs = std::make_shared(); + VarPtr index0 = std::make_shared(IsC); + VarPtr index1 = std::make_shared(IsC); + VarPtr index2 = std::make_shared(IsC); + VectorRef batch_norm = VectorRef({batch_norm_var_, data_input0_var_, data_input1_var_, data_input2_var_, Xs}); + VectorRef tuple_getitem0 = VectorRef({prim::kPrimTupleGetItem, batch_norm, index0}); + VectorRef tuple_getitem1 = VectorRef({prim::kPrimTupleGetItem, batch_norm, index1}); + VectorRef tuple_getitem2 = VectorRef({prim::kPrimTupleGetItem, batch_norm, index2}); + VectorRef cast_variable_input0 = VectorRef({prim::kPrimCast, variable_input0_var_}); + VectorRef cast_variable_input1 = VectorRef({prim::kPrimCast, variable_input1_var_}); + VectorRef sub0 = VectorRef({prim::kPrimSub, cast_variable_input0, tuple_getitem1}); + VectorRef sub1 = VectorRef({prim::kPrimSub, cast_variable_input1, tuple_getitem2}); + VectorRef cast0 = VectorRef({prim::kPrimCast, sub0}); + VectorRef cast1 = VectorRef({prim::kPrimCast, sub1}); + VectorRef mul0 = VectorRef({prim::kPrimMul, cast0, constant_input0_var_}); + VectorRef mul1 = VectorRef({prim::kPrimMul, cast1, constant_input1_var_}); + VectorRef assign_sub0 = VectorRef({prim::kPrimAssignSub, variable_input0_var_, mul0}); + VectorRef assign_sub1 = VectorRef({prim::kPrimAssignSub, variable_input1_var_, mul1}); + VectorRef depend0 = VectorRef({prim::kPrimDepend, tuple_getitem0, assign_sub0}); + return VectorRef({prim::kPrimDepend, depend0, assign_sub1}); +} } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h index e4b31ca5f4..f476e96062 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h @@ -61,12 +61,21 @@ class FusedBatchNormFusion : public PatternProcessPass { VarPtr batch_norm_var_; }; -class FusedBatchNormMixPrecisionFusion : public FusedBatchNormFusion { +class FusedBatchNormMixPrecisionFusion0 : public FusedBatchNormFusion { public: - explicit FusedBatchNormMixPrecisionFusion(bool multigraph = true) + explicit FusedBatchNormMixPrecisionFusion0(bool multigraph = true) : FusedBatchNormFusion("fused_batch_norm_mix_precision_fusion", multigraph) {} - ~FusedBatchNormMixPrecisionFusion() override = default; + ~FusedBatchNormMixPrecisionFusion0() override = default; + const BaseRef DefinePattern() const override; +}; + +class FusedBatchNormMixPrecisionFusion1 : public FusedBatchNormFusion { + public: + explicit FusedBatchNormMixPrecisionFusion1(bool multigraph = true) + : FusedBatchNormFusion("fused_batch_norm_mix_precision_fusion", multigraph) {} + + ~FusedBatchNormMixPrecisionFusion1() override = default; const BaseRef DefinePattern() const override; }; } // namespace opt diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_rule.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_rule.cc index 5f0b869644..42e37df3e4 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_rule.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_rule.cc @@ -108,6 +108,9 @@ bool LambNextMVRule::IsShareNodes(const EquivPtr &equiv1, const EquivPtr &equiv2 const AnfNodePtr LambNextMVRule::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const EquivPtr &equiv) const { + if (!CheckSupportDataType(node, kFloatDataTypeSet)) { + return nullptr; + } std::vector old_pattern_outputs; if (!IsRuleMatched(func_graph, node, equiv, &old_pattern_outputs)) { return nullptr; diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_rule.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_rule.cc index e0389309a1..0e3cd28a66 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_rule.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_rule.cc @@ -88,6 +88,9 @@ const AnfNodePtr LambNextMVWithDecayRule::Process(const FuncGraphPtr &func_graph const EquivPtr &equiv) const { MS_EXCEPTION_IF_NULL(func_graph); MS_EXCEPTION_IF_NULL(node); + if (!CheckSupportDataType(node, kFloatDataTypeSet)) { + return nullptr; + } AnfNodePtr mul4 = GetAnfNodeByVar(equiv, mul4_var_); MS_EXCEPTION_IF_NULL(mul4); // Get add3 and match the add3 pattern diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.cc index 9efd503363..26828f2137 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.cc @@ -153,6 +153,9 @@ const AnfNodePtr LambNextMVWithDecayV1Rule::Process(const FuncGraphPtr &func_gra if (func_graph == nullptr || node == nullptr || equiv == nullptr) { return nullptr; } + if (!CheckSupportDataType(node, kFloatDataTypeSet)) { + return nullptr; + } AnfNodePtr mul4 = nullptr; AnfNodePtr real_div0 = nullptr; AnfNodePtr real_div1 = nullptr; diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_right_rule.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_right_rule.cc index 68baeeed99..5065c4c5ba 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_right_rule.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_right_rule.cc @@ -61,6 +61,9 @@ const AnfNodePtr LambNextRightRule::Process(const FuncGraphPtr &func_graph, cons const EquivPtr &equiv) const { MS_EXCEPTION_IF_NULL(func_graph); MS_EXCEPTION_IF_NULL(node); + if (!CheckSupportDataType(node, kFloatDataTypeSet)) { + return nullptr; + } auto new_node = CreateLambNextRightNode(func_graph, equiv); MS_EXCEPTION_IF_NULL(new_node); // Set abstract of new node diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.cc index 16a43e2072..b5b6d2bb08 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.cc @@ -50,6 +50,9 @@ const AnfNodePtr LambUpdateWithLRRuleFusion::Process(const FuncGraphPtr &graph, MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(node); MS_EXCEPTION_IF_NULL(equiv); + if (!CheckSupportDataType(node, kFloatDataTypeSet)) { + return nullptr; + } auto input0 = utils::cast((*equiv)[input0_]); auto input1 = utils::cast((*equiv)[input1_]); auto input2 = utils::cast((*equiv)[input2_]); diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_v2.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_v2.cc index 069581b6e4..43e1872163 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_v2.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_v2.cc @@ -42,6 +42,9 @@ const AnfNodePtr LambUpdateWithLrV2::Process(const FuncGraphPtr &func_graph, con const EquivPtr &equiv) const { MS_EXCEPTION_IF_NULL(func_graph); MS_EXCEPTION_IF_NULL(equiv); + if (!CheckSupportDataType(node, kFloatDataTypeSet)) { + return nullptr; + } auto prim = std::make_shared(kLambUpdateWithLrV2OpName); std::vector inputs = {NewValueNode(prim)}; (void)std::transform(input_varptr_.begin(), input_varptr_.end(), std::back_inserter(inputs), diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.cc index fba1ab40af..b16387d8f1 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.cc @@ -121,6 +121,9 @@ const AnfNodePtr LayerNormBetaGammaBackpropFusion::Process(const FuncGraphPtr &f if (node == nullptr || !node->isa()) { return nullptr; } + if (AnfAlgo::IsGraphKernel(node)) { + return nullptr; + } auto cnode = node->cast(); MS_EXCEPTION_IF_NULL(cnode); std::vector cast_nodes; diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/momentum_lossscale_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/momentum_lossscale_fusion.cc index 6b751873d6..e7a73a9c7f 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/momentum_lossscale_fusion.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/momentum_lossscale_fusion.cc @@ -32,19 +32,6 @@ bool CheckValueNodeInputOfMul(const AnfNodePtr &node) { std::vector mul_input_shape = AnfAlgo::GetOutputInferShape(node, 0); return mul_input_shape.empty() || (mul_input_shape.size() == 1 && mul_input_shape[0] == 1); } -void AddInputToOutput(const FuncGraphPtr &func_graph, const CNodePtr &old_cnode, const AnfNodePtr &new_node, - std::vector *new_outputs) { - MS_EXCEPTION_IF_NULL(old_cnode); - MS_EXCEPTION_IF_NULL(new_node); - MS_EXCEPTION_IF_NULL(new_outputs); - auto node_to_output = old_cnode->input(kAccumIndex + 1); - MS_EXCEPTION_IF_NULL(node_to_output); - AbstractBasePtrList abstract_list{old_cnode->abstract(), node_to_output->abstract()}; - auto abstract_tuple = std::make_shared(abstract_list); - new_node->set_abstract(abstract_tuple); - // Create Output - CreateMultipleOutputsOfAnfNode(func_graph, new_node, kFusedMulApplyMomentumOutputNum, new_outputs); -} } // namespace const BaseRef MomentumLossscaleFusion::DefinePattern() const { @@ -94,14 +81,9 @@ const AnfNodePtr MomentumLossscaleFusion::Process(const FuncGraphPtr &func_graph input_names_value[3] = "x1"; input_names_value.emplace_back("x2"); AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(input_names_value), new_node); + new_node->set_abstract(node->abstract()); new_node->set_scope(node->scope()); - // Create Outputs - std::vector new_outputs; - AddInputToOutput(func_graph, cnode, new_node, &new_outputs); - if (new_outputs.size() != kFusedMulApplyMomentumOutputNum) { - MS_LOG(EXCEPTION) << "Failed to create outputs of " << new_node->DebugString(); - } - return new_outputs[0]; + return new_node; } } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/remove_reshape_pair.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/remove_reshape_pair.cc index 5e265f2cf1..fa2815ff62 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/remove_reshape_pair.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/remove_reshape_pair.cc @@ -23,33 +23,33 @@ namespace mindspore { namespace opt { const BaseRef RemoveReshapePair::DefinePattern() const { - const auto prim_reshape = std::make_shared(prim::kPrimReshape->name()); - VectorRef reshape({prim_reshape, input_varptr_}); - - return VectorRef({prim::kPrimReshape, reshape}); + VarPtr X = std::make_shared(); + MS_EXCEPTION_IF_NULL(X); + return VectorRef({prim::kPrimReshape, VectorRef({prim::kPrimReshape, X})}); } const AnfNodePtr RemoveReshapePair::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const EquivPtr &equiv) const { MS_EXCEPTION_IF_NULL(func_graph); MS_EXCEPTION_IF_NULL(equiv); - auto manager = func_graph->manager(); - MS_EXCEPTION_IF_NULL(manager); auto reshape_op_1 = CheckAnfNodeIfCNodeAndInputSize(node, kBackendReshapeInputNum); MS_EXCEPTION_IF_NULL(reshape_op_1); // If reshape operator used by more than one other operators, reshape operator cant not be deleted directly - auto users = manager->node_users()[reshape_op_1]; - if (users.size() > 1) { + if (IsUsedByOthers(func_graph, reshape_op_1)) { return nullptr; } auto reshape_op_2 = CheckAnfNodeIfCNodeAndInputSize(reshape_op_1->input(1), kBackendReshapeInputNum); MS_EXCEPTION_IF_NULL(reshape_op_2); - users = manager->node_users()[reshape_op_2]; - if (users.size() > 1) { + if (IsUsedByOthers(func_graph, reshape_op_2)) { return nullptr; } - auto input_node = reshape_op_2->input(1); - return input_node; + auto output_shape = AnfAlgo::GetOutputDeviceShape(reshape_op_2, 0); + auto input_shape = AnfAlgo::GetInputDeviceShape(reshape_op_1, 0); + if (input_shape == output_shape) { + auto input_node = reshape_op_2->input(1); + return input_node; + } + return nullptr; } } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/remove_reshape_pair.h b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/remove_reshape_pair.h index a284f4eaa9..ddb25df70c 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/remove_reshape_pair.h +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/remove_reshape_pair.h @@ -28,15 +28,10 @@ namespace mindspore { namespace opt { class RemoveReshapePair : public PatternProcessPass { public: - explicit RemoveReshapePair(bool multigraph = true) : PatternProcessPass("remove_reshape_pair", multigraph) { - input_varptr_ = std::make_shared(); - } + explicit RemoveReshapePair(bool multigraph = true) : PatternProcessPass("remove_reshape_pair", multigraph) {} ~RemoveReshapePair() override = default; const BaseRef DefinePattern() const override; const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; - - private: - VarPtr input_varptr_; }; } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.cc index ccb0cbfcb8..f95406e5e1 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.cc @@ -31,6 +31,24 @@ const BaseRef SoftmaxGradExtFusion::DefinePattern() const { return mul_grad; } +const BaseRef SoftmaxGradExtFusionV2::DefinePattern() const { + VectorRef mul({prim::kPrimMul, input1_, input0_}); + VectorRef sum({sum_var_, mul}); + VectorRef sub({prim::kPrimSub, input0_, sum}); + VectorRef mul1({prim::kPrimMul, input1_, sub}); + VectorRef mul_grad({prim::kPrimMul, input2_, mul1}); + return mul_grad; +} + +const BaseRef SoftmaxGradExtFusionV3::DefinePattern() const { + VectorRef mul({prim::kPrimMul, input1_, input0_}); + VectorRef sum({sum_var_, mul}); + VectorRef sub({prim::kPrimSub, input0_, sum}); + VectorRef mul1({prim::kPrimMul, input1_, sub}); + VectorRef mul_grad({prim::kPrimMul, mul1, input2_}); + return mul_grad; +} + const AnfNodePtr SoftmaxGradExtFusion::Process(const FuncGraphPtr &graph, const AnfNodePtr &node, const EquivPtr &equiv) const { MS_EXCEPTION_IF_NULL(graph); @@ -40,13 +58,17 @@ const AnfNodePtr SoftmaxGradExtFusion::Process(const FuncGraphPtr &graph, const auto input1 = GetAnfNodeByVar(equiv, input1_); auto input2 = GetAnfNodeByVar(equiv, input2_); auto sum = GetAnfNodeByVar(equiv, sum_var_); + if (!GetBoolAttr(sum, kAttrKeepDims)) { + MS_LOG(INFO) << "sum's attr keep_dims should be true if do fusion"; + return nullptr; + } auto prim = std::make_shared(kSoftmaxGradExtOpName); auto fusion_node = graph->NewCNode({NewValueNode(prim), input0, input1, input2}); MS_EXCEPTION_IF_NULL(fusion_node); fusion_node->set_scope(node->scope()); fusion_node->set_abstract(node->abstract()); - AnfAlgo::CopyNodeAttr(kAttrKeepDims, sum, fusion_node); + AnfAlgo::CopyNodeAttr(kAttrKeepDims, "keepdims", sum, fusion_node); AnfAlgo::CopyNodeAttr(kAttrAxis, sum, fusion_node); return fusion_node; } diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.h b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.h index 70c5658e60..59032e6973 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.h +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.h @@ -17,13 +17,15 @@ #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_SOFTMAX_GRAD_EXT_FUSION_H_ #include +#include #include "pre_activate/common/optimizer.h" namespace mindspore { namespace opt { class SoftmaxGradExtFusion : public PatternProcessPass { public: - explicit SoftmaxGradExtFusion(bool multigraph = true) : PatternProcessPass("softmax_grad_ext_fusion", multigraph) { + explicit SoftmaxGradExtFusion(const std::string &name = "softmax_grad_ext_fusion", bool multigraph = true) + : PatternProcessPass(name, multigraph) { input0_ = std::make_shared(); input1_ = std::make_shared(); input2_ = std::make_shared(); @@ -33,12 +35,28 @@ class SoftmaxGradExtFusion : public PatternProcessPass { const BaseRef DefinePattern() const override; const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; - private: + protected: VarPtr input0_; VarPtr input1_; VarPtr input2_; VarPtr sum_var_; }; + +class SoftmaxGradExtFusionV2 : public SoftmaxGradExtFusion { + public: + explicit SoftmaxGradExtFusionV2(bool multigraph = true) + : SoftmaxGradExtFusion("softmax_grad_ext_fusion_v2", multigraph) {} + ~SoftmaxGradExtFusionV2() override = default; + const BaseRef DefinePattern() const override; +}; + +class SoftmaxGradExtFusionV3 : public SoftmaxGradExtFusion { + public: + explicit SoftmaxGradExtFusionV3(bool multigraph = true) + : SoftmaxGradExtFusion("softmax_grad_ext_fusion_v3", multigraph) {} + ~SoftmaxGradExtFusionV3() override = default; + const BaseRef DefinePattern() const override; +}; } // namespace opt } // namespace mindspore #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_SOFTMAX_GRAD_EXT_FUSION_H_ diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/square_sum_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/square_sum_fusion.cc index c3884ff70a..6261b63882 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/square_sum_fusion.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/square_sum_fusion.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include "session/anf_runtime_algorithm.h" #include "common/utils.h" @@ -50,6 +51,8 @@ CNodePtr GenerateSquareSumV1(const FuncGraphPtr &graph, const CNodePtr &square, square_sumv1->set_scope(sum->scope()); AnfAlgo::CopyNodeAttr(kAttrAxis, sum, square_sumv1); AnfAlgo::CopyNodeAttr(kAttrKeepDims, sum, square_sumv1); + auto names = MakeValue>({prim::kPrimSquare->name(), prim::kPrimReduceSum->name()}); + AnfAlgo::SetNodeAttr(kAttrDatadumpOriginalNames, names, square_sumv1); return square_sumv1; } @@ -71,6 +74,8 @@ CNodePtr GenerateSquareSumV2(const FuncGraphPtr &graph, const CNodePtr &square, square_sumv2->set_scope(sum->scope()); AnfAlgo::CopyNodeAttr(kAttrAxis, sum, square_sumv2); AnfAlgo::CopyNodeAttr(kAttrKeepDims, sum, square_sumv2); + auto names = MakeValue>({prim::kPrimSquare->name(), prim::kPrimReduceSum->name()}); + AnfAlgo::SetNodeAttr(kAttrDatadumpOriginalNames, names, square_sumv2); return square_sumv2; } diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/transpose_transdata_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/transpose_transdata_fusion.cc index 1651718703..e45fc2637f 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/transpose_transdata_fusion.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/transpose_transdata_fusion.cc @@ -53,7 +53,7 @@ const AnfNodePtr TransposeTransDataFusion::Process(const FuncGraphPtr &func_grap new_transdata_builder->SetProcessor(transdata_kernel_build_info->processor()); auto new_fusion_transdata = std::make_shared(kTransDataOpName); - if (supported_checker_->CheckAiCoreSupported(transdata_cnode, new_transdata_builder->Build())) { + if (supported_checker_->CheckAICoreSupported(transdata_cnode, new_transdata_builder->Build())) { std::vector inputs = {NewValueNode(new_fusion_transdata), utils::cast((*equiv)[input_varptr_])}; auto new_node = func_graph->NewCNode(inputs); diff --git a/mindspore/ccsrc/pre_activate/common/common_backend_optimization.cc b/mindspore/ccsrc/pre_activate/common/common_backend_optimization.cc index 7ba42a60a0..b930ac69c9 100644 --- a/mindspore/ccsrc/pre_activate/common/common_backend_optimization.cc +++ b/mindspore/ccsrc/pre_activate/common/common_backend_optimization.cc @@ -28,6 +28,7 @@ namespace mindspore { namespace opt { void BackendCommonOptimization(const std::shared_ptr &kernel_graph) { + MS_EXCEPTION_IF_NULL(kernel_graph); MS_LOG(INFO) << "start common opt graph:" << kernel_graph->graph_id(); auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); @@ -37,7 +38,8 @@ void BackendCommonOptimization(const std::shared_ptr &kern save_graphs_path = "."; } if (save_graphs) { - std::string file_path = save_graphs_path + "/" + "hwopt_common_before.ir"; + std::string file_path = + save_graphs_path + "/hwopt_common_before_graph_" + std::to_string(kernel_graph->graph_id()) + ".ir"; DumpIR(file_path, kernel_graph); } auto optimizer = std::make_shared(); @@ -51,7 +53,8 @@ void BackendCommonOptimization(const std::shared_ptr &kern (void)optimizer->Optimize(kernel_graph); kernel_graph->SetExecOrderByDefault(); if (save_graphs) { - std::string file_path = save_graphs_path + "/" + "hwopt_common_after.ir"; + std::string file_path = + save_graphs_path + "/hwopt_common_after_graph_" + std::to_string(kernel_graph->graph_id()) + ".ir"; DumpIR(file_path, kernel_graph); } } diff --git a/mindspore/ccsrc/pre_activate/common/helper.cc b/mindspore/ccsrc/pre_activate/common/helper.cc index 9be537775e..1c2ade201c 100644 --- a/mindspore/ccsrc/pre_activate/common/helper.cc +++ b/mindspore/ccsrc/pre_activate/common/helper.cc @@ -16,6 +16,7 @@ #include "pre_activate/common/helper.h" #include +#include #include #include #include @@ -45,6 +46,7 @@ bool IsDepend(const FuncGraphPtr &graph, const AnfNodePtr &node1, const AnfNodeP std::vector node_list = TopoSort(graph->get_return()); std::map> control_depend_map; for (auto &nd : node_list) { + MS_EXCEPTION_IF_NULL(nd); if (AnfAlgo::CheckPrimitiveType(nd, prim::kPrimControlDepend)) { auto control_depend = nd->cast(); auto prior_node = control_depend->input(kControlDependPriorIndex); @@ -100,9 +102,12 @@ bool UnVisited(const BaseRef &n) { auto prim_py = value->cast(); MS_EXCEPTION_IF_NULL(prim_py); return !prim_py->HasAttr(kAttrVisited); - } else { - return false; + } else if (IsValueNode(in)) { + auto func_graph = GetValueNode(in); + MS_EXCEPTION_IF_NULL(func_graph); + return !func_graph->has_flag(kAttrVisited); } + return false; } return false; } @@ -157,6 +162,7 @@ const AnfNodePtr EliminateDependTransop(const FuncGraphPtr &func_graph, const An MS_EXCEPTION_IF_NULL(func_graph); auto transop_cnode = CheckAnfNodeIfCNodeAndInputSize(node, kTransOpInputNum); + MS_EXCEPTION_IF_NULL(transop_cnode); auto depend_cnode = CheckAnfNodeIfCNodeAndInputSize(transop_cnode->input(kCastInputNum - 1), kDependInputNum); auto prev_transop_cnode = CheckAnfNodeIfCNodeAndInputSize(depend_cnode->input(1), kTransOpInputNum); MS_EXCEPTION_IF_NULL(depend_cnode->input(kDependInputNum - 1)); @@ -185,9 +191,12 @@ bool Visited(const BaseRef &n) { auto prim_py = value->cast(); MS_EXCEPTION_IF_NULL(prim_py); return prim_py->HasAttr(kAttrVisited); - } else { - return false; + } else if (IsValueNode(in)) { + auto func_graph = GetValueNode(in); + MS_EXCEPTION_IF_NULL(func_graph); + return func_graph->has_flag(kAttrVisited); } + return false; } return false; } @@ -381,7 +390,7 @@ tensor::TensorPtr CreateTupleTensor(const ValueTuplePtr &value_tuple) { bool IsNopNode(const AnfNodePtr &node) { auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); - if (context_ptr->device_target() != kAscendDevice) { + if (context_ptr->device_target() != kAscendDevice && context_ptr->device_target() != kGPUDevice) { return false; } static std::unordered_set nop_nodes = {prim::kPrimReshape->name(), kExpandDimsOpName, @@ -473,15 +482,36 @@ void RemoveNopNode(session::KernelGraph *const graph) { } } -bool IsUsedByOthers(const FuncGraphPtr &graph, const AnfNodePtr &node) { +std::shared_ptr>> GetRealNodeUsedList(const FuncGraphPtr &graph, + const AnfNodePtr &node) { + auto output_node_list = std::make_shared>>(); MS_EXCEPTION_IF_NULL(graph); - MS_EXCEPTION_IF_NULL(node); auto manager = graph->manager(); MS_EXCEPTION_IF_NULL(manager); - if (manager->node_users().find(node) == manager->node_users().end()) { + auto iter = manager->node_users().find(node); + if (iter == manager->node_users().end()) { MS_LOG(EXCEPTION) << "node has no output in manager"; } - return manager->node_users()[node].size() > 1; + auto output_info_list = iter->second; + for (const auto &output_info : output_info_list) { + if (AnfAlgo::GetCNodeName(output_info.first) == prim::kPrimControlDepend->name()) { + continue; + } + if (AnfAlgo::GetCNodeName(output_info.first) == prim::kPrimDepend->name() && + output_info.second == kDependAttachNodeIndex) { + continue; + } + output_node_list->push_back(output_info); + } + return output_node_list; +} + +bool IsUsedByOthers(const FuncGraphPtr &graph, const AnfNodePtr &node) { + MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(node); + auto output_node_list = GetRealNodeUsedList(graph, node); + MS_EXCEPTION_IF_NULL(output_node_list); + return output_node_list->size() > 1; } AnfNodePtr CreatTupleGetItemNode(const FuncGraphPtr &func_graph, const AnfNodePtr &node, size_t output_idx) { @@ -545,14 +575,22 @@ bool AnfEqual(const BaseRef &a, const BaseRef &b) { if (utils::isa(a) && utils::isa(b)) { auto a_node = utils::cast(a); auto b_node = utils::cast(b); + MS_EXCEPTION_IF_NULL(a_node); + MS_EXCEPTION_IF_NULL(b_node); if (IsValueNode(a_node) && IsValueNode(b_node)) { auto a_value_node = a_node->cast(); + MS_EXCEPTION_IF_NULL(a_value_node); auto a_value = a_value_node->value(); + MS_EXCEPTION_IF_NULL(a_value); auto a_prim = a_value->cast(); + MS_EXCEPTION_IF_NULL(a_prim); auto b_value_node = b_node->cast(); + MS_EXCEPTION_IF_NULL(b_value_node); auto b_value = b_value_node->value(); + MS_EXCEPTION_IF_NULL(b_value); auto b_prim = b_value->cast(); + MS_EXCEPTION_IF_NULL(b_prim); return a_prim->name() == b_prim->name(); } else if (a_node->isa() && b_node->isa()) { @@ -704,5 +742,44 @@ AnfNodePtr GetAnfNodeByVar(const EquivPtr &equiv, const VarPtr &var_node) { } return res; } + +bool CompareTupleGetitem(const AnfNodePtr &n1, const AnfNodePtr &n2) { + MS_EXCEPTION_IF_NULL(n1); + MS_EXCEPTION_IF_NULL(n2); + auto n1_cnode = n1->cast(); + auto n2_cnode = n2->cast(); + MS_EXCEPTION_IF_NULL(n1_cnode); + MS_EXCEPTION_IF_NULL(n2_cnode); + auto index_input1 = n1_cnode->input(kInputNodeOutputIndexInTupleGetItem); + MS_EXCEPTION_IF_NULL(index_input1); + auto value_node1 = index_input1->cast(); + MS_EXCEPTION_IF_NULL(value_node1); + auto index_input2 = n2_cnode->input(kInputNodeOutputIndexInTupleGetItem); + MS_EXCEPTION_IF_NULL(index_input2); + auto value_node2 = index_input2->cast(); + MS_EXCEPTION_IF_NULL(value_node2); + return GetValue(value_node1->value()) < GetValue(value_node2->value()); +} + +bool GetBoolAttr(const AnfNodePtr &node, const std::string &attr_name) { + MS_EXCEPTION_IF_NULL(node); + if (!node->isa()) { + MS_LOG(INFO) << "node is not a cnode"; + return false; + } + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + return AnfAlgo::HasNodeAttr(attr_name, cnode) && AnfAlgo::GetNodeAttr(node, attr_name); +} + +bool CheckSupportDataType(const AnfNodePtr &node, const std::set &supported_data_type_set) { + MS_EXCEPTION_IF_NULL(node); + TypeId data_type = AnfAlgo::GetOutputInferDataType(node, 0); + if (supported_data_type_set.find(data_type) != supported_data_type_set.end()) { + return true; + } + MS_LOG(DEBUG) << "Not supported data type. Node:" << node->DebugString(); + return false; +} } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/common/helper.h b/mindspore/ccsrc/pre_activate/common/helper.h index d315f6b5d9..49a1d47d0c 100644 --- a/mindspore/ccsrc/pre_activate/common/helper.h +++ b/mindspore/ccsrc/pre_activate/common/helper.h @@ -18,7 +18,9 @@ #include #include +#include #include +#include #include #include "ir/func_graph.h" #include "session/kernel_graph.h" @@ -65,6 +67,7 @@ constexpr size_t kBNGrad3OutputNum = 1; constexpr size_t kBNTrainingReduceOutputNum = 2; constexpr size_t kBNTrainingUpdateOutputNum = 5; constexpr size_t kBNTrainingUpdateV2OutputNum = 3; +constexpr size_t kBNTrainingUpdateV3OutputNum = 5; constexpr size_t kBNTrainingUpdateGradOutputNum = 2; constexpr size_t kSingleOutputNum = 1; @@ -94,6 +97,7 @@ constexpr size_t kBiasAddInputNum = 3; constexpr size_t kTopkInputNum = 3; constexpr size_t kLarsV2InputNum = 5; constexpr size_t kFusedMulApplyMomentumOutputNum = 2; +constexpr size_t kSplitInputNum = 2; enum FusedBatchNormInput { kX = 1, @@ -152,6 +156,8 @@ tensor::TensorPtr CreateTensorWithValueTuple(const ValueTuplePtr &value_tuple_pt tensor::TensorPtr CreateTupleTensor(const ValueTuplePtr &value_tuple); +bool IsAllNopNode(const session::KernelGraph *const graph); + bool IsNopNode(const AnfNodePtr &node); void HideNopNode(session::KernelGraph *const graph); @@ -162,6 +168,9 @@ AnfNodePtr CreatTupleGetItemNode(const FuncGraphPtr &func_graph, const AnfNodePt bool IsUsedByOthers(const FuncGraphPtr &graph, const AnfNodePtr &node); +std::shared_ptr>> GetRealNodeUsedList(const FuncGraphPtr &graph, + const AnfNodePtr &node); + void ConstInputToAttr(const CNodePtr &cnode, const std::unordered_set &input_attrs); bool AnfEqual(const BaseRef &a, const BaseRef &b); @@ -176,6 +185,15 @@ bool IsSameNode(const EquivPtr &equiv1, const EquivPtr &equiv2, const VarPtr &va // Get anf_node from equiv by var_node AnfNodePtr GetAnfNodeByVar(const EquivPtr &equiv, const VarPtr &var_node); + +// Compare tuple getitem's index, return bool[n1's index < n2's index] +bool CompareTupleGetitem(const AnfNodePtr &n1, const AnfNodePtr &n2); + +// Get attr which is bool from cnode +bool GetBoolAttr(const AnfNodePtr &node, const std::string &attr_name); + +// Check node's data type is in supported data type set +bool CheckSupportDataType(const AnfNodePtr &node, const std::set &supported_data_type_set); } // namespace opt } // namespace mindspore #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_COMMON_HELPER_H_ diff --git a/mindspore/ccsrc/pre_activate/common/node_pass.cc b/mindspore/ccsrc/pre_activate/common/node_pass.cc index a6e93d2f07..876da8667b 100644 --- a/mindspore/ccsrc/pre_activate/common/node_pass.cc +++ b/mindspore/ccsrc/pre_activate/common/node_pass.cc @@ -22,6 +22,7 @@ #include "ir/anf.h" #include "ir/func_graph.h" #include "ir/manager.h" +#include "session/anf_runtime_algorithm.h" namespace mindspore { namespace opt { @@ -52,8 +53,13 @@ bool NodePass::Run(const FuncGraphPtr &func_graph) { if (new_node && IsValueNode(new_node)) { auto const_func_graph = GetValueNode(new_node); MS_EXCEPTION_IF_NULL(const_func_graph); - todo.push_back(const_func_graph->output()); + if (!const_func_graph->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) { + todo.push_back(const_func_graph->output()); + } } else if (new_node && new_node->isa()) { + if (AnfAlgo::IsGraphKernel(new_node)) { + todo.push_back(new_node); + } auto cnode = new_node->cast(); MS_EXCEPTION_IF_NULL(cnode); auto inputs = cnode->inputs(); diff --git a/mindspore/ccsrc/pre_activate/common/optimizer.cc b/mindspore/ccsrc/pre_activate/common/optimizer.cc index fa51a0bd8c..71a523ea1d 100644 --- a/mindspore/ccsrc/pre_activate/common/optimizer.cc +++ b/mindspore/ccsrc/pre_activate/common/optimizer.cc @@ -86,11 +86,8 @@ void GraphOptimizer::AddPassManager(const PassManagerPtr &pass_manager) { FuncGraphPtr GraphOptimizer::Optimize(const FuncGraphPtr &func_graph, bool run_only_once) { MS_EXCEPTION_IF_NULL(func_graph); run_only_once_ = (pass_managers_.size() == 1) ? true : run_only_once; - auto manager = func_graph->manager(); - if (manager == nullptr) { - manager = Manage(func_graph, false); - func_graph->set_manager(manager); - } + // Performance risk by creating new manager each time + auto manager = Manage(func_graph, true); bool changed = true; while (changed) { diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_copy_manager.h b/mindspore/ccsrc/pre_activate/mem_reuse/mem_copy_manager.h index 49d1884a48..ea9947b41b 100644 --- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_copy_manager.h +++ b/mindspore/ccsrc/pre_activate/mem_reuse/mem_copy_manager.h @@ -83,9 +83,9 @@ class MemCopyManager { virtual DeviceAddressPtr UpdateSwapInQueue() { return nullptr; } - virtual bool AllocHostPinnedMem(size_t size, void **addr) { return true; } + virtual bool AllocHostPinnedMem(size_t size, void **addr) const { return true; } - virtual void FreeHostPinnedMem(void *addr) {} + virtual void FreeHostPinnedMem(void *addr) const {} virtual void ClearSwapQueue() {} }; diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.cc b/mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.cc index a2dfce2241..095f8f6495 100644 --- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.cc +++ b/mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.cc @@ -150,7 +150,7 @@ size_t DynamicMemPoolBestFit::CalMemBlockAllocSize(size_t size) { alloc_mem_size = alloc_mem_size * 2; } alloc_mem_size = std::min(alloc_mem_size, device_free_mem_size); - return AlignMemorySize(alloc_mem_size); + return alloc_mem_size; } bool DynamicMemPoolBestFit::IsDivide(size_t tensor_size, size_t mem_buf_size) const { diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse.cc b/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse.cc index aaa0c155e4..2927b1204f 100644 --- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse.cc +++ b/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse.cc @@ -18,6 +18,8 @@ #include #include #include "pre_activate/mem_reuse/mem_reuse_checker.h" +#include "pre_activate/common/helper.h" + namespace mindspore { namespace memreuse { bool MemReuseUtil::InitDynamicOutputKernelRef() { @@ -226,7 +228,11 @@ KernelRefCountPtr MemReuseUtil::GetKernelInputRef(const CNodePtr &kernel, size_t << AnfAlgo::GetInputTensorNum(kernel); } auto input_node = kernel->input(input_idx + 1); - auto kernel_input = AnfAlgo::VisitKernel(input_node, 0); + // Graph may be all nop nodes and not remove nop node, so this can not skip nop node. + auto kernel_input = AnfAlgo::VisitKernelWithReturnType(input_node, 0, false); + if (IsPrimitive(kernel_input.first, prim::kPrimMakeTuple)) { + MS_LOG(EXCEPTION) << "Input node [" << input_node->DebugString() << "]'s input " << input_idx << " is MakeTuple"; + } auto result = GetRef(kernel_input.first, SizeToInt(kernel_input.second)); return result; } @@ -252,6 +258,7 @@ void MemReuseUtil::SetKernelDefMap() { void MemReuseUtil::SetKernelDefInputs() { for (const auto &kernel : graph_->execution_order()) { + MS_EXCEPTION_IF_NULL(kernel); auto key = kernel.get(); // find kernel_def according to cnode addr auto iter = kernel_map_.find(key); @@ -264,7 +271,11 @@ void MemReuseUtil::SetKernelDefInputs() { if (ref_ptr != nullptr) { // set the inputs of this kernel_def auto input_node = AnfAlgo::GetInputNode(kernel, i); - auto input = AnfAlgo::VisitKernel(input_node, 0); + // Graph may be all nop nodes and not remove nop node, so this can not skip nop node. + auto input = AnfAlgo::VisitKernelWithReturnType(input_node, 0, false); + if (IsPrimitive(input.first, prim::kPrimMakeTuple)) { + MS_LOG(EXCEPTION) << "Input node [" << input_node->DebugString() << "]'s input " << i << " is MakeTuple"; + } auto input_key = (input.first).get(); auto input_iter = kernel_map_.find(input_key); if (input_iter == kernel_map_.end()) { @@ -292,10 +303,47 @@ void MemReuseUtil::SetReuseRefCount() { } } +void MemReuseUtil::SetSummaryNodesRefCount() { + bool summary_exist = graph_->summary_node_exist(); + if (!summary_exist) { + return; + } + + auto summary_nodes = graph_->summary_nodes(); + if (summary_nodes.empty()) { + return; + } + + for (auto &node_item : summary_nodes) { + auto node = node_item.second.first; + size_t index = IntToSize(node_item.second.second); + MS_LOG(INFO) << "set summary node's ref count, node: " << node->fullname_with_scope() << " index: " << index; + if (kernel_output_refs_.find(node.get()) != kernel_output_refs_.end()) { + KernelRefCountPtr kernel_ref = kernel_output_refs_[node.get()][index]; + kernel_ref->ref_count_ = kMaxRefCount; + kernel_ref->ref_count_dynamic_use_ = kMaxRefCount; + } else { + MS_LOG(WARNING) << "can't find summary node's kernel_def " << node->fullname_with_scope(); + } + } +#ifdef MEM_REUSE_DEBUG + auto graph = *graph_; + MemReuseChecker::GetInstance().CheckMemReuseIR(total_refs_list_, kernel_def_ptr_list_, &graph); +#endif +} + void MemReuseUtil::SetGraphOutputRefCount() { + auto is_all_nop_node = opt::IsAllNopNode(graph_); auto nodes = AnfAlgo::GetAllOutput(graph_->output(), {prim::kPrimTupleGetItem}); for (const auto &node : nodes) { - auto kernel_input = AnfAlgo::VisitKernelWithReturnType(node, 0); + session::KernelWithIndex kernel_input; + if (is_all_nop_node) { + // The graph does not remove the nop node. + kernel_input = AnfAlgo::VisitKernelWithReturnType(node, 0, false); + } else { + // The graph removes the nop node. + kernel_input = AnfAlgo::VisitKernelWithReturnType(node, 0, true); + } MS_EXCEPTION_IF_NULL(kernel_input.first); if (!kernel_input.first->isa() || !AnfAlgo::IsRealKernel(kernel_input.first)) { continue; @@ -319,6 +367,7 @@ void MemReuseUtil::SetGraphOutputRefCount() { void MemReuseUtil::ResetDynamicUsedRefCount() { for (auto iter = kernel_output_refs_.begin(); iter != kernel_output_refs_.end(); ++iter) { for (auto &ref_count : iter->second) { + MS_EXCEPTION_IF_NULL(ref_count); ref_count->ref_count_dynamic_use_ = ref_count->ref_count_; } } @@ -330,6 +379,7 @@ void MemReuseUtil::SetAllInfo(KernelGraph *graph) { } SetKernelDefMap(); SetReuseRefCount(); + SetSummaryNodesRefCount(); SetWorkSpaceList(); #ifdef MEM_REUSE_DEBUG MemReuseChecker::GetInstance().CheckMemReuseIR(total_refs_list_, kernel_def_ptr_list_, graph); diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse.h b/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse.h index 08029f231a..c7a129f1e9 100644 --- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse.h +++ b/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse.h @@ -63,6 +63,7 @@ class MemReuseUtil { void SetWkMap(const CNodePtr &kernel, KernelDef *kernel_def_ptr); void SetKernelDefInputs(); void SetReuseRefCount(); + void SetSummaryNodesRefCount(); // Set the reference count of graph output specially. void SetGraphOutputRefCount(); // Reset the dynamic used reference count by ref_count_. diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_checker.cc b/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_checker.cc index cf92679187..5cd6a5f50e 100644 --- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_checker.cc +++ b/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_checker.cc @@ -48,7 +48,8 @@ void MemReuseChecker::CheckOutRef(const KernelRefs &kernel_refs, const CNodePtr auto iter = kernel_refs.find(key); auto node_name = AnfAlgo::GetCNodeName(c_node); if (iter == kernel_refs.end()) { - MS_LOG(EXCEPTION) << "kernel [" << node_name << "] has no output tensor"; + MS_LOG(EXCEPTION) << "kernel [" << node_name << "] has no output tensor, node: " << c_node->DebugString() + << " output index: " << output_idx; } if (output_idx >= iter->second.size()) { MS_LOG(INFO) << "invalid cnode: " << c_node->fullname_with_scope().c_str(); diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_manager.cc b/mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_manager.cc index c41eacc334..d81364edfb 100644 --- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_manager.cc +++ b/mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_manager.cc @@ -28,7 +28,7 @@ void MemSwapManager::Init(const mindspore::session::KernelGraph *kernel_graph) { size_t kernel_index = 0; for (const auto &kernel : execution_order_) { // parse topo order of kernel - kernel_execution_info_.emplace(kernel.get(), kernel_index++); + (void)kernel_execution_info_.emplace(kernel.get(), kernel_index++); // parse tensor info auto kernel_mod = AnfAlgo::GetKernelMod(kernel); MS_EXCEPTION_IF_NULL(kernel_mod); @@ -144,7 +144,7 @@ void MemSwapManager::AddSwapInfo() { } void MemSwapManager::AddMemSwapTask(SwapKind swap_kind, const DeviceAddressPtr &device_address, - const HostAddress &host_address) { + const HostAddress &host_address) const { if (swap_kind == SwapKind::kDeviceToHost) { mem_copy_manager_->AddMemSwapOutTask(device_address, host_address); } else if (swap_kind == SwapKind::kHostToDevice) { @@ -152,9 +152,11 @@ void MemSwapManager::AddMemSwapTask(SwapKind swap_kind, const DeviceAddressPtr & } } -bool MemSwapManager::SyncMemCopyStream(SwapKind swap_kind) { return mem_copy_manager_->SyncMemCopyStream(swap_kind); } +bool MemSwapManager::SyncMemCopyStream(SwapKind swap_kind) const { + return mem_copy_manager_->SyncMemCopyStream(swap_kind); +} -DeviceAddressPtr MemSwapManager::UpdateSwapQueue(SwapKind swap_kind) { +DeviceAddressPtr MemSwapManager::UpdateSwapQueue(SwapKind swap_kind) const { if (swap_kind == SwapKind::kDeviceToHost) { return mem_copy_manager_->UpdateSwapOutQueue(); } else { @@ -298,7 +300,7 @@ void MemSwapManager::ReleaseHostPinnedMem() { host_addrs_list_.clear(); } -void MemSwapManager::ClearSwapQueue() { mem_copy_manager_->ClearSwapQueue(); } +void MemSwapManager::ClearSwapQueue() const { mem_copy_manager_->ClearSwapQueue(); } void MemSwapManager::ResetSwapInfo() { ClearSwapQueue(); diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_manager.h b/mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_manager.h index c19930000e..7e2823d27c 100644 --- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_manager.h +++ b/mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_manager.h @@ -44,11 +44,12 @@ class MemSwapManager { void Init(const mindspore::session::KernelGraph *kernel_graph); - void AddMemSwapTask(SwapKind swap_kind, const DeviceAddressPtr &device_address, const HostAddress &host_address); + void AddMemSwapTask(SwapKind swap_kind, const DeviceAddressPtr &device_address, + const HostAddress &host_address) const; - bool SyncMemCopyStream(SwapKind swap_kind); + bool SyncMemCopyStream(SwapKind swap_kind) const; - DeviceAddressPtr UpdateSwapQueue(SwapKind swap_kind); + DeviceAddressPtr UpdateSwapQueue(SwapKind swap_kind) const; // retreat to find a workable swap scheme bool RetreatSwapInfo(); @@ -83,7 +84,7 @@ class MemSwapManager { void ReleaseHostPinnedMem(); - void ClearSwapQueue(); + void ClearSwapQueue() const; private: void AddSwapInfo(); diff --git a/mindspore/ccsrc/pre_activate/pass/add_atomic_clean.cc b/mindspore/ccsrc/pre_activate/pass/add_atomic_clean.cc new file mode 100644 index 0000000000..9df34a1c59 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/pass/add_atomic_clean.cc @@ -0,0 +1,122 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "pre_activate/pass/add_atomic_clean.h" +#include +#include +#include +#include "operator/ops.h" +#include "utils/utils.h" +#include "utils/graph_utils.h" +#include "utils/log_adapter.h" +#include "session/anf_runtime_algorithm.h" +#include "session/kernel_graph.h" +#include "debug/anf_ir_dump.h" + +namespace mindspore { +namespace opt { +namespace { + +static std::vector g_output_idx; + +bool HasAtomic(const AnfNodePtr &input) { + if (IsPrimitiveCNode(input)) { + const auto &cnode = input->cast(); + const auto &prim = GetValueNode(cnode->input(0)); + return prim->HasAttr("atomic_add"); + } + return false; +} + +std::vector CalCleanSize(const CNodePtr &pre_node) { + MS_EXCEPTION_IF_NULL(pre_node); + std::vector clean_size_list; + // clean output + for (auto &index : g_output_idx) { + TypeId output_type_id = AnfAlgo::GetOutputDeviceDataType(pre_node, index); + size_t type_size = GetTypeByte(TypeIdToType(output_type_id)); + std::vector shape = AnfAlgo::GetOutputDeviceShape(pre_node, index); + auto size = std::accumulate(shape.begin(), shape.end(), type_size, std::multiplies()); + clean_size_list.push_back((size + kMemAlignSize + 31) / kMemAlignSize * kMemAlignSize); + } + MS_LOG(DEBUG) << "Clear output size: " << clean_size_list.size() << ", pre_node: " << pre_node->fullname_with_scope(); + return clean_size_list; +} + +CNodePtr CreateTbeAtomicCleanNode(const std::shared_ptr &kernel_graph, + const mindspore::CNodePtr &pre_node) { + MS_EXCEPTION_IF_NULL(kernel_graph); + MS_EXCEPTION_IF_NULL(pre_node); + auto clean_zero_prim = std::make_shared(kAtomicAddrCleanOpName); + auto new_value_node = NewValueNode(clean_zero_prim); + std::vector inputs = {new_value_node}; + CNodePtr clean_zero = kernel_graph->NewCNode(inputs); + AbstractBasePtr abstract = std::make_shared(); + clean_zero->set_abstract(abstract); + auto builder = std::make_shared(); + builder->SetKernelType(KernelType::TBE_KERNEL); + AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), clean_zero.get()); + auto clean_size = CalCleanSize(pre_node); + AnfAlgo::SetNodeAttr(kAttrAtomicAddMemSize, MakeValue(clean_size), clean_zero); + AnfAlgo::SetNodeAttr(kAttrAtomicOutputIndexs, MakeValue(g_output_idx), clean_zero); + AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(pre_node.get()), clean_zero.get()); + return clean_zero; +} +} // namespace + +void AddAtomicClean(const std::shared_ptr &kernel_graph) { + MS_EXCEPTION_IF_NULL(kernel_graph); + auto mng = kernel_graph->manager(); + if (mng == nullptr) { + mng = Manage(kernel_graph, true); + kernel_graph->set_manager(mng); + } + auto &todos = kernel_graph->execution_order(); + for (auto iter = todos.cbegin(); iter != todos.end(); ++iter) { + auto node = *iter; + if (AnfAlgo::IsGraphKernel(node) && kernel_graph->nodes().contains(node)) { + auto fg = GetValueNode(node->input(kAnfPrimitiveIndex)); + MS_EXCEPTION_IF_NULL(fg); + auto input = fg->get_return()->input(1); + if (IsPrimitiveCNode(input, prim::kPrimMakeTuple)) { + const auto &cnode = input->cast(); + for (size_t i = 0; i < cnode->inputs().size(); ++i) { + if (HasAtomic(cnode->input(i))) { + g_output_idx.push_back(i - 1); + } + } + } else if (HasAtomic(input)) { + g_output_idx.push_back(0); + } + + if (!g_output_idx.empty()) { + auto zero_node = CreateTbeAtomicCleanNode(kernel_graph, node); + auto depend = kernel_graph->NewCNode({NewValueNode(prim::kPrimDepend), node->input(1), zero_node}); + std::vector new_input = node->inputs(); + new_input[1] = depend; + auto new_cnode = std::make_shared(new_input, kernel_graph); + // Set abstract + new_cnode->set_abstract(node->abstract()); + // Set kernel info + new_cnode->set_kernel_info(node->kernel_info_ptr()); + mng->Replace(node, new_cnode); + g_output_idx.clear(); + } + } + } +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/pass/add_atomic_clean.h b/mindspore/ccsrc/pre_activate/pass/add_atomic_clean.h new file mode 100644 index 0000000000..bb1edb0e35 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/pass/add_atomic_clean.h @@ -0,0 +1,29 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_ADD_ATOMIC_CLEAN_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_ADD_ATOMIC_CLEAN_H_ + +#include +#include "session/kernel_graph.h" + +namespace mindspore { +namespace opt { +void AddAtomicClean(const std::shared_ptr &kernel_graph); +} // namespace opt +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_ADD_ATOMIC_CLEAN_H diff --git a/mindspore/ccsrc/pre_activate/pass/common_subexpression_elimination.cc b/mindspore/ccsrc/pre_activate/pass/common_subexpression_elimination.cc index f8604d7638..9af50eac33 100644 --- a/mindspore/ccsrc/pre_activate/pass/common_subexpression_elimination.cc +++ b/mindspore/ccsrc/pre_activate/pass/common_subexpression_elimination.cc @@ -45,6 +45,8 @@ bool BackendCSE::CheckReplace(const AnfNodePtr &main, const AnfNodePtr &node) co auto node_value = GetValueNode(node); if (main_value->isa() && node_value->isa()) { replace = false; + } else if (main_value->isa() && node_value->isa()) { + replace = (AbsOf(main) == AbsOf(node)) && CheckEqualKernelBuildInfo(main, node); } else { replace = (AbsOf(main) == AbsOf(node)) && (*main_value == *node_value); } diff --git a/mindspore/ccsrc/pre_activate/pass/communication_op_fusion.cc b/mindspore/ccsrc/pre_activate/pass/communication_op_fusion.cc index fc878dd881..aa4690abcb 100644 --- a/mindspore/ccsrc/pre_activate/pass/communication_op_fusion.cc +++ b/mindspore/ccsrc/pre_activate/pass/communication_op_fusion.cc @@ -253,6 +253,13 @@ bool CommunicationOpFusion::Run(const FuncGraphPtr &func_graph) { if (it.second.communication_op_nodes.size() <= 1) { continue; } + auto first_node = it.second.communication_op_nodes[0]; + if (AnfAlgo::HasNodeAttr(kAttrIndex, first_node) && AnfAlgo::GetNodeAttr(first_node, kAttrIndex) > 0) { + std::stable_sort(it.second.communication_op_nodes.begin(), it.second.communication_op_nodes.end(), + [](const CNodePtr &a, const CNodePtr &b) { + return AnfAlgo::GetNodeAttr(a, kAttrIndex) < AnfAlgo::GetNodeAttr(b, kAttrIndex); + }); + } size_t segment_num = 0; std::vector segment_index; if (GetSplitSegments(it.second, &segment_num, &segment_index, it.first)) { diff --git a/mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.cc b/mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.cc index cc8a1341be..6a557388ad 100644 --- a/mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.cc +++ b/mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.cc @@ -36,6 +36,9 @@ ConstInputToAttrInfoRegistry::ConstInputToAttrInfoRegistry() { Register(prim::kPrimReduceSum->name(), {1}); Register(prim::kPrimReduceMean->name(), {1}); Register(prim::kPrimGatherV2->name(), {2}); + Register(prim::kPrimEmbeddingLookup->name(), {2, 3, 4, 5}); + Register(prim::kPrimEmbeddingLookupCommGrad->name(), {1}); + Register(prim::kPrimSubscalar->name(), {1}); Register(prim::kPrimTranspose->name(), {1}); Register(prim::kPrimUnsortedSegmentSum->name(), {2}); Register(prim::kPrimOneHot->name(), {1}); @@ -44,6 +47,7 @@ ConstInputToAttrInfoRegistry::ConstInputToAttrInfoRegistry() { Register(prim::kPrimCumProd->name(), {1}); Register(prim::kPrimReduceAll->name(), {1}); Register(prim::kPrimUnsortedSegmentMin->name(), {2}); + Register(kSparseGatherV2, {2}); Register(kUnsortedSegmentProdOpName, {2}); Register(kSimpleMeanGradOpName, {1}); Register(kMeanGradOpName, {1}); diff --git a/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_attr.cc b/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_attr.cc index 1f9e2712a6..38d629c415 100644 --- a/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_attr.cc +++ b/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_attr.cc @@ -26,6 +26,7 @@ #include "utils/context/ms_context.h" #include "operator/ops.h" #include "session/anf_runtime_algorithm.h" +#include "kernel/common_utils.h" namespace mindspore { namespace opt { @@ -34,14 +35,24 @@ const AnfNodePtr ConvertConstInputToAttr::Process(const FuncGraphPtr &, const An if (node == nullptr || !AnfAlgo::IsRealCNodeKernel(node)) { return nullptr; } - CNodePtr cnode = node->cast(); + std::vector todos; + if (AnfAlgo::IsGraphKernel(node)) { + auto sub_graph = AnfAlgo::GetCNodeFuncGraphPtr(node); + MS_EXCEPTION_IF_NULL(sub_graph); + kernel::GetValidKernelNodes(sub_graph, &todos); + } else { + todos.push_back(node); + } - ConstInputToAttrInfoRegister reg; - if (!ConstInputToAttrInfoRegistry::Instance().GetRegisterByOpName(AnfAlgo::GetCNodeName(cnode), ®)) { - return nullptr; + for (auto &t : todos) { + CNodePtr cnode = t->cast(); + ConstInputToAttrInfoRegister reg; + if (!ConstInputToAttrInfoRegistry::Instance().GetRegisterByOpName(AnfAlgo::GetCNodeName(cnode), ®)) { + continue; + } + ConstInputToAttr(cnode, reg.GetConstInputAttrInfo()); } - ConstInputToAttr(cnode, reg.GetConstInputAttrInfo()); - return cnode; + return node; } } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_tensor_input.cc b/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_tensor_input.cc index 56be2e273d..b4f98cc6d7 100644 --- a/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_tensor_input.cc +++ b/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_tensor_input.cc @@ -17,15 +17,39 @@ #include #include +#include #include "utils/graph_utils.h" #include "pre_activate/common/helper.h" #include "session/anf_runtime_algorithm.h" #include "session/kernel_graph.h" +#include "kernel/common_utils.h" +#include "device/kernel_info.h" namespace mindspore { namespace opt { namespace { +ValueNodePtr MakeValueNode(const ValueNodePtr &value_node) { + MS_EXCEPTION_IF_NULL(value_node); + ValueNodePtr new_value_node = std::make_shared(value_node->value()); + new_value_node->set_abstract(value_node->abstract()); + // create kernel_info fo new value node + auto kernel_info = std::make_shared(); + new_value_node->set_kernel_info(kernel_info); + // create kernel_build_info for new value node + auto kernel_build_info_builder = std::make_shared(); + // set the format of value_node to DEFAULT_FORMAT + kernel_build_info_builder->SetOutputsFormat(std::vector{kOpFormat_DEFAULT}); + // set value node initial device data type = infer data type + std::vector types; + for (size_t index = 0; index < AnfAlgo::GetOutputTensorNum(value_node); ++index) { + types.push_back(kTypeUnknown); + } + kernel_build_info_builder->SetOutputsDeviceType(types); + AnfAlgo::SetSelectKernelBuildInfo(kernel_build_info_builder->Build(), new_value_node.get()); + return new_value_node; +} + AnfNodePtr CreateTensorInput(const KernelGraphPtr &kernel_graph, const AnfNodePtr &input_node) { MS_EXCEPTION_IF_NULL(input_node); auto value_node = input_node->cast(); @@ -50,6 +74,8 @@ AnfNodePtr CreateTensorInput(const KernelGraphPtr &kernel_graph, const AnfNodePt if (kernel_graph != nullptr) { tensor_input = kernel_graph->NewValueNode(tensor_input); kernel_graph->AddValueNodeToGraph(tensor_input); + } else { + tensor_input = MakeValueNode(tensor_input); } tensor_input->set_scope(input_node->scope()); return tensor_input; @@ -89,6 +115,26 @@ AnfNodePtr ConstInputToTensorInput(const FuncGraphPtr &func_graph, const CNodePt } return nullptr; } + +AnfNodePtr ProcessGraphKernelOp(const AnfNodePtr &node) { + auto sub_graph = AnfAlgo::GetCNodeFuncGraphPtr(node); + MS_EXCEPTION_IF_NULL(sub_graph); + auto mng = sub_graph->manager(); + MS_EXCEPTION_IF_NULL(mng); + std::vector todo; + std::vector> graph_rets; + kernel::GetValidKernelNodes(sub_graph, &todo); + kernel::GetGraphRealOutput(sub_graph, &graph_rets); + + for (auto &t : todo) { + auto t_new_node = ConstInputToTensorInput(sub_graph, t->cast()); + if (t_new_node != nullptr && t_new_node != t) { + (void)mng->Replace(t, t_new_node); + } + } + + return node; +} } // namespace const AnfNodePtr ConvertConstInputToTensorInput::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, @@ -96,8 +142,11 @@ const AnfNodePtr ConvertConstInputToTensorInput::Process(const FuncGraphPtr &fun if (node == nullptr || func_graph == nullptr || !AnfAlgo::IsRealCNodeKernel(node)) { return nullptr; } - CNodePtr cnode = node->cast(); - return ConstInputToTensorInput(func_graph, cnode); + if (AnfAlgo::IsGraphKernel(node)) { + return ProcessGraphKernelOp(node); + } else { + return ConstInputToTensorInput(func_graph, node->cast()); + } } } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/pass/convert_tuple_input_to_dynamic_input.cc b/mindspore/ccsrc/pre_activate/pass/convert_tuple_input_to_dynamic_input.cc index ccc4fd5265..a03087c1a4 100644 --- a/mindspore/ccsrc/pre_activate/pass/convert_tuple_input_to_dynamic_input.cc +++ b/mindspore/ccsrc/pre_activate/pass/convert_tuple_input_to_dynamic_input.cc @@ -21,10 +21,37 @@ #include "session/anf_runtime_algorithm.h" #include "pre_activate/common/helper.h" #include "session/kernel_graph.h" +#include "kernel/common_utils.h" +#include "device/kernel_info.h" namespace mindspore { namespace opt { namespace { +bool MakeValueNode(const AnfNodePtr &node) { + auto value_node = node->cast(); + if (value_node == nullptr) { + return false; + } + + // create kernel_info fo new value node + auto kernel_info = std::make_shared(); + value_node->set_kernel_info(kernel_info); + // create kernel_build_info for new value node + auto kernel_build_info_builder = std::make_shared(); + // set the format of value_node to DEFAULT_FORMAT + kernel_build_info_builder->SetOutputsFormat(std::vector{kOpFormat_DEFAULT}); + // set value node initial device data type = infer data type + TypeId infer_data_type; + if (AnfAlgo::GetOutputTensorNum(value_node) == 0) { + infer_data_type = kTypeUnknown; + } else { + infer_data_type = AnfAlgo::GetOutputInferDataType(value_node, 0); + } + kernel_build_info_builder->SetOutputsDeviceType(std::vector{infer_data_type}); + AnfAlgo::SetSelectKernelBuildInfo(kernel_build_info_builder->Build(), value_node.get()); + return true; +} + void ConvertTupleOuputToPlantInputs(const FuncGraphPtr &graph, const AnfNodePtr &input_node, std::vector *plant_inputs, std::vector *dyn_input_sizes) { MS_EXCEPTION_IF_NULL(plant_inputs); @@ -50,12 +77,12 @@ void ConvertTupleOuputToPlantInputs(const FuncGraphPtr &graph, const AnfNodePtr (void)std::copy(convert_inputs.begin(), convert_inputs.end(), std::back_inserter(*plant_inputs)); } -CNodePtr ConvertMakeTupleInputToPlantInputs(const FuncGraphPtr &graph, const CNodePtr &cnode_ptr) { +void ConvertMakeTupleInputToPlantInputs(const FuncGraphPtr &graph, const CNodePtr &cnode_ptr) { MS_EXCEPTION_IF_NULL(cnode_ptr); MS_EXCEPTION_IF_NULL(graph); auto &ori_args = cnode_ptr->inputs(); if (ori_args.size() < 1) { - return nullptr; + return; } std::vector plant_inputs; std::vector dyn_input_sizes; @@ -68,8 +95,17 @@ CNodePtr ConvertMakeTupleInputToPlantInputs(const FuncGraphPtr &graph, const CNo auto cnode = input_node->cast(); MS_EXCEPTION_IF_NULL(cnode); auto inputs = cnode->inputs(); - (void)std::copy(inputs.begin() + 1, inputs.end(), std::back_inserter(plant_inputs)); - } else if (AnfAlgo::IsTupleOutput(input_node)) { + for (size_t j = 1; j < inputs.size(); ++j) { + MS_EXCEPTION_IF_NULL(inputs[j]); + if (IsValueNode(inputs[j])) { + auto success = MakeValueNode(inputs[j]); + if (!success) { + MS_LOG(WARNING) << "Make value node failed, " << inputs[j]->DebugString(); + } + } + plant_inputs.push_back(inputs[j]); + } + } else if (input_node->Type() != nullptr && AnfAlgo::IsTupleOutput(input_node)) { ConvertTupleOuputToPlantInputs(graph, input_node, &plant_inputs, &dyn_input_sizes); } else { dyn_input_sizes.push_back(-1); @@ -81,7 +117,6 @@ CNodePtr ConvertMakeTupleInputToPlantInputs(const FuncGraphPtr &graph, const CNo AnfAlgo::SetNodeAttr(kAttrDynInputSizes, MakeValue(dyn_input_sizes), cnode_ptr); cnode_ptr->set_inputs(plant_inputs); } - return cnode_ptr; } } // namespace @@ -96,7 +131,18 @@ const AnfNodePtr ConvertTupleInputToDynamicInput::Process(const FuncGraphPtr &fu if (node == nullptr || !node->isa() || !AnfAlgo::IsRealKernel(node)) { return nullptr; } - return ConvertMakeTupleInputToPlantInputs(func_graph, node->cast()); + if (AnfAlgo::IsGraphKernel(node)) { + auto sub_graph = AnfAlgo::GetCNodeFuncGraphPtr(node); + MS_EXCEPTION_IF_NULL(sub_graph); + std::vector todos; + kernel::GetValidKernelNodes(sub_graph, &todos); + for (auto &t : todos) { + ConvertMakeTupleInputToPlantInputs(sub_graph, t->cast()); + } + } else { + ConvertMakeTupleInputToPlantInputs(func_graph, node->cast()); + } + return node; } } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/pass/convert_tuple_output_to_maketuple.cc b/mindspore/ccsrc/pre_activate/pass/convert_tuple_output_to_maketuple.cc index 66b3dc1d88..a5e51411bc 100644 --- a/mindspore/ccsrc/pre_activate/pass/convert_tuple_output_to_maketuple.cc +++ b/mindspore/ccsrc/pre_activate/pass/convert_tuple_output_to_maketuple.cc @@ -47,8 +47,7 @@ CNodePtr ConvertTupleInputToMakeTuple(const FuncGraphPtr &graph, const CNodePtr convert_inputs.push_back(input_node); } } - cnode_ptr->set_inputs(convert_inputs); - return cnode_ptr; + return graph->NewCNode(convert_inputs); } } // namespace @@ -68,8 +67,9 @@ const AnfNodePtr ConvertTupleOutputToMaketuple::Process(const FuncGraphPtr &func if (IsPrimitiveCNode(cnode, prim::kPrimTupleGetItem) || IsPrimitiveCNode(cnode, prim::kPrimControlDepend)) { return nullptr; } - if (std::any_of(cnode->inputs().begin() + 1, cnode->inputs().end(), - [](const AnfNodePtr &node) { return AnfAlgo::IsRealKernel(node) && AnfAlgo::IsTupleOutput(node); })) { + if (std::any_of(cnode->inputs().begin() + 1, cnode->inputs().end(), [](const AnfNodePtr &node) { + return node->Type() != nullptr && AnfAlgo::IsRealKernel(node) && AnfAlgo::IsTupleOutput(node); + })) { return ConvertTupleInputToMakeTuple(func_graph, cnode); } return nullptr; diff --git a/mindspore/ccsrc/pre_activate/pass/eliminate_redundant_op.cc b/mindspore/ccsrc/pre_activate/pass/eliminate_redundant_op.cc index 2fc971881d..4d3dcfccc0 100644 --- a/mindspore/ccsrc/pre_activate/pass/eliminate_redundant_op.cc +++ b/mindspore/ccsrc/pre_activate/pass/eliminate_redundant_op.cc @@ -18,10 +18,12 @@ #include #include #include +#include #include "session/anf_runtime_algorithm.h" #include "utils/utils.h" #include "pre_activate/common/helper.h" #include "operator/ops.h" +#include "kernel/common_utils.h" namespace mindspore { namespace opt { @@ -125,13 +127,7 @@ void EliminateRedundantOp::Init() { kTransDataOpName, std::pair(kTransDataOpName, TransDataOpEliminateCondition))); } -const AnfNodePtr EliminateRedundantOp::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, - const EquivPtr &) const { - MS_EXCEPTION_IF_NULL(node); - auto cnode = node->cast(); - if (cnode == nullptr || func_graph == nullptr) { - return nullptr; - } +const AnfNodePtr EliminateRedundantOp::DoEliminate(const FuncGraphPtr &func_graph, const CNodePtr &cnode) const { // match the first name auto name1 = AnfAlgo::GetCNodeName(cnode); auto it = redundant_process_map_.find(name1); @@ -160,5 +156,35 @@ const AnfNodePtr EliminateRedundantOp::Process(const FuncGraphPtr &func_graph, c return ProcessMatchedNodes(func_graph, cnode, prev_cnode, &pass_vector); } + +const AnfNodePtr EliminateRedundantOp::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, + const EquivPtr &) const { + MS_EXCEPTION_IF_NULL(node); + auto cnode = node->cast(); + if (cnode == nullptr || func_graph == nullptr) { + return nullptr; + } + + if (AnfAlgo::IsGraphKernel(node)) { + // do eliminate for ops in graph kernel. + auto sub_graph = AnfAlgo::GetCNodeFuncGraphPtr(node); + MS_EXCEPTION_IF_NULL(sub_graph); + auto mng = sub_graph->manager(); + MS_EXCEPTION_IF_NULL(mng); + std::vector todo; + kernel::GetValidKernelNodes(sub_graph, &todo); + for (auto &t : todo) { + CNodePtr t_cnode = t->cast(); + MS_EXCEPTION_IF_NULL(t_cnode); + auto t_new_node = DoEliminate(sub_graph, t_cnode); + if (t_new_node != nullptr && t_new_node != t) { + (void)mng->Replace(t, t_new_node); + } + } + return node; + } + // do eliminate for single op. + return DoEliminate(func_graph, cnode); +} } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/pass/eliminate_redundant_op.h b/mindspore/ccsrc/pre_activate/pass/eliminate_redundant_op.h index 9e0dacecb1..c44190f645 100644 --- a/mindspore/ccsrc/pre_activate/pass/eliminate_redundant_op.h +++ b/mindspore/ccsrc/pre_activate/pass/eliminate_redundant_op.h @@ -40,6 +40,7 @@ class EliminateRedundantOp : public PatternProcessPass { private: void Init(); + const AnfNodePtr DoEliminate(const FuncGraphPtr &func_graph, const CNodePtr &cnode) const; std::unordered_map redundant_process_map_; }; } // namespace opt diff --git a/mindspore/ccsrc/pre_activate/pass/erase_visit_attr.cc b/mindspore/ccsrc/pre_activate/pass/erase_visit_attr.cc index 4ea817df85..3b566b4f7c 100644 --- a/mindspore/ccsrc/pre_activate/pass/erase_visit_attr.cc +++ b/mindspore/ccsrc/pre_activate/pass/erase_visit_attr.cc @@ -16,6 +16,8 @@ #include "pre_activate/pass/erase_visit_attr.h" #include +#include +#include "kernel/common_utils.h" #include "session/anf_runtime_algorithm.h" #include "pre_activate/common/helper.h" @@ -28,7 +30,20 @@ const BaseRef EraseVisitAttr::DefinePattern() const { } const AnfNodePtr EraseVisitAttr::Process(const FuncGraphPtr &, const AnfNodePtr &node, const EquivPtr &) const { - AnfAlgo::EraseNodeAttr(kAttrVisited, node); + if (node != nullptr && AnfAlgo::IsRealCNodeKernel(node)) { + if (AnfAlgo::IsGraphKernel(node)) { + auto fg = AnfAlgo::GetCNodeFuncGraphPtr(node); + MS_EXCEPTION_IF_NULL(fg); + std::vector todos; + kernel::GetValidKernelNodes(fg, &todos); + for (auto &t : todos) { + AnfAlgo::EraseNodeAttr(kAttrVisited, t); + } + } + AnfAlgo::EraseNodeAttr(kAttrVisited, node); + } else { + AnfAlgo::EraseNodeAttr(kAttrVisited, node); + } return nullptr; } } // namespace opt diff --git a/mindspore/ccsrc/pre_activate/pass/fuse_basic.cc b/mindspore/ccsrc/pre_activate/pass/fuse_basic.cc new file mode 100644 index 0000000000..84edd5c5e2 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/pass/fuse_basic.cc @@ -0,0 +1,222 @@ + +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "pre_activate/pass/fuse_basic.h" +#include "pre_activate/pass/fuse_graph_kernel.h" + +#include +#include +#include +#include +#include +#include + +#include "operator/ops.h" +#include "utils/utils.h" +#include "utils/graph_utils.h" +#include "pre_activate/common/helper.h" +#include "session/anf_runtime_algorithm.h" +#include "vm/segment_runner.h" +#include "debug/draw.h" +#include "debug/anf_ir_dump.h" +#include "ir/func_graph_cloner.h" + +namespace mindspore { +namespace opt { +namespace { +std::vector get_fusable_basic_ops(bool is_before_kernel_select) { + std::vector fusable_basic_ops = {prim::kPrimTensorAdd, prim::kPrimMul, prim::kPrimSub, + prim::kPrimExpandDims}; + if (!is_before_kernel_select) { + fusable_basic_ops.push_back(prim::kPrimCast); + } + return fusable_basic_ops; +} + +IncludeType IncludeFusedBasicOpForward(const AnfNodePtr &cur_node, const GraphKernelInfo &info, + const AnfNodePtr &node) { + if (cur_node == node) { + return FOLLOW; + } + if (!IsPrimitiveCNode(node)) { + return EXCLUDE; + } + + auto fusable_basic_ops = get_fusable_basic_ops(info.is_before_kernel_select); + bool is_fusable = std::any_of(fusable_basic_ops.begin(), fusable_basic_ops.end(), + [&node](const PrimitivePtr &prim) { return IsPrimitiveCNode(node, prim); }); + + return is_fusable ? FOLLOW : EXCLUDE; +} + +std::vector FindFuseCNodes(const CNodePtr &cnode, bool is_before_kernel_select) { + GraphKernelInfo info; + info.is_before_kernel_select = is_before_kernel_select; + // Search fusable nodes according input direction. + auto include_func_forward = std::bind(IncludeFusedBasicOpForward, cnode, info, std::placeholders::_1); + auto used_nodes = DeepLinkedGraphSearch(cnode, include_func_forward); + if (used_nodes.size() > 1) { + used_nodes = RemoveCircle(used_nodes, false); + } + TopoSortForNodeList(&used_nodes); + return used_nodes; +} + +void RemoveControlDependOut(const FuncGraphPtr &fg, AnfNodePtrList *outputs, const FuncGraphManagerPtr &mng) { + AnfNodeSet outputs_set; + for (auto out : *outputs) { + outputs_set.insert(out); + } + + AnfNodePtrList vir_outputs; + std::unordered_map eqv; + auto fg_outputs = fg->output(); + if (IsPrimitiveCNode(fg_outputs, prim::kPrimMakeTuple)) { + auto cnode = fg_outputs->cast(); + for (size_t i = 1; i < cnode->size(); ++i) { + vir_outputs.push_back(cnode->input(i)); + } + } else { + vir_outputs.push_back(fg_outputs); + } + + if (vir_outputs.size() != outputs->size()) { + MS_LOG(EXCEPTION) << "The size of virtual output of the fg is not the same with the real output"; + } + bool has_erase_outs = false; + size_t index = -1; + for (auto it = outputs->begin(); it != outputs->end();) { + index++; + auto out = *it; + eqv[out] = vir_outputs[index]; + auto users = mng->node_users()[out]; + bool is_only_control_depend_use = true; + std::vector control_depend_use_index; + std::vector control_depend_nodes; + AnfNodePtr use_out = nullptr; + for (auto &user : users) { + auto use_node = user.first; + if (outputs_set.count(use_node) == 0 && !(IsPrimitiveCNode(use_node, prim::kPrimControlDepend))) { + is_only_control_depend_use = false; + continue; + } + if (outputs_set.count(use_node) != 0) { + use_out = use_node; + } + + if (IsPrimitiveCNode(use_node, prim::kPrimControlDepend)) { + control_depend_nodes.push_back(use_node->cast()); + control_depend_use_index.push_back(user.second); + } + } + + if (is_only_control_depend_use && !control_depend_nodes.empty()) { + MS_EXCEPTION_IF_NULL(use_out); + it = outputs->erase(it); + for (size_t i = 0; i < control_depend_nodes.size(); ++i) { + auto control_depend_node = control_depend_nodes[i]; + std::vector new_control_depend_inputs; + for (size_t j = 0; j < control_depend_node->size(); ++j) { + if (j == control_depend_use_index[i]) { + new_control_depend_inputs.push_back(use_out); + } else { + new_control_depend_inputs.push_back(control_depend_node->input(j)); + } + } + auto new_control_depend = control_depend_node->func_graph()->NewCNode(new_control_depend_inputs); + mng->Replace(control_depend_node, new_control_depend); + has_erase_outs = true; + } + } else { + it++; + } + } + + if (!has_erase_outs) { + return; + } + + AnfNodePtr fg_new_output; + if (outputs->size() > 1) { + std::vector output_args; + output_args.push_back(NewValueNode(prim::kPrimMakeTuple)); + (void)std::transform(std::begin(*outputs), std::end(*outputs), std::back_inserter(output_args), + [&eqv](const AnfNodePtr &o) -> AnfNodePtr { return eqv[o]; }); + // Set output for AnfGraph + fg_new_output = fg->NewCNode(output_args); + } else { + fg_new_output = eqv[(*outputs)[0]]; + } + fg->set_output(fg_new_output, true); +} + +void FuseBasic(const std::shared_ptr &kernel_graph, const std::vector &todos, + std::unordered_set *fused_ops, bool is_before_kernel_select) { + auto mng = kernel_graph->manager(); + for (auto iter = todos.cbegin(); iter != todos.cend(); ++iter) { + auto node = (*iter)->cast(); + if (node == nullptr) { + continue; + } + if (fused_ops->count(node)) { + continue; + } + auto fusable_basic_ops = get_fusable_basic_ops(is_before_kernel_select); + bool is_basic_op = std::any_of(fusable_basic_ops.begin(), fusable_basic_ops.end(), + [&node](const PrimitivePtr &prim) { return IsPrimitiveCNode(node, prim); }); + if (!is_basic_op || !kernel_graph->nodes().contains(node)) { + continue; + } + + auto fuse_nodes = FindFuseCNodes(node, is_before_kernel_select); + if (fuse_nodes.size() <= 1) { + continue; + } + + FuncGraphPtr fg; + AnfNodePtrList inputs; + AnfNodePtrList outputs; + std::tie(fg, inputs, outputs) = compile::TransformSegmentToAnfGraph(fuse_nodes); + RemoveControlDependOut(fg, &outputs, mng); + auto fuse_new_node = CreateNewFuseCNode(kernel_graph, fg, inputs, outputs, is_before_kernel_select); + + ReplaceNewFuseCNode(kernel_graph, fuse_new_node, outputs); + + // Set graph kernel attr + std::string fuse_op_name = ""; + for (auto &fuse_node : fuse_nodes) { + fuse_op_name += AnfAlgo::GetCNodePrimitive(fuse_node)->name() + "_"; + } + fused_ops->insert(fuse_nodes.begin(), fuse_nodes.end()); + fg->set_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL, MakeValue(fuse_op_name)); + } +} +} // namespace + +void FuseBasic(const std::shared_ptr &kernel_graph, bool is_before_kernel_select) { + MS_EXCEPTION_IF_NULL(kernel_graph); + auto mng = kernel_graph->manager(); + if (mng == nullptr) { + mng = Manage(kernel_graph, true); + kernel_graph->set_manager(mng); + } + std::unordered_set fused_ops; + auto todos = TopoSort(kernel_graph->get_return()); + std::reverse(todos.begin(), todos.end()); + FuseBasic(kernel_graph, todos, &fused_ops, is_before_kernel_select); +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/pass/fuse_basic.h b/mindspore/ccsrc/pre_activate/pass/fuse_basic.h new file mode 100644 index 0000000000..fbbf5d9937 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/pass/fuse_basic.h @@ -0,0 +1,29 @@ + +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_FUSE_BASIC_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_FUSE_BASIC_H_ + +#include +#include "pre_activate/common/optimizer.h" +#include "session/kernel_graph.h" + +namespace mindspore { +namespace opt { +void FuseBasic(const std::shared_ptr &kernel_graph, bool is_before_kernel_select); +} // namespace opt +} // namespace mindspore +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_FUSE_BASIC_H_ diff --git a/mindspore/ccsrc/pre_activate/pass/fuse_graph_kernel.cc b/mindspore/ccsrc/pre_activate/pass/fuse_graph_kernel.cc new file mode 100644 index 0000000000..591b210335 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/pass/fuse_graph_kernel.cc @@ -0,0 +1,562 @@ + +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "pre_activate/pass/fuse_graph_kernel.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "operator/ops.h" +#include "utils/utils.h" +#include "utils/graph_utils.h" +#include "pre_activate/common/helper.h" +#include "session/anf_runtime_algorithm.h" +#include "vm/segment_runner.h" +#include "debug/draw.h" +#include "debug/anf_ir_dump.h" +#include "ir/func_graph_cloner.h" + +namespace mindspore { +namespace opt { +std::vector get_fusable_basic_ops(bool is_before_kernel_select) { + std::vector fusable_basic_ops = { + prim::kPrimAddN, prim::kPrimTensorAdd, prim::kPrimMul, prim::kPrimSub, prim::kPrimMaximum, + prim::kPrimMinimum, prim::kPrimNeg, prim::kPrimRealDiv, prim::kPrimPow, prim::kPrimSqrt, + prim::kPrimReciprocal, prim::kPrimExpandDims, prim::kPrimLessEqual}; + if (!is_before_kernel_select) { + fusable_basic_ops.push_back(prim::kPrimCast); + } + return fusable_basic_ops; +} + +std::vector get_fusable_basic_ops_with_reduce(bool is_before_kernel_select) { + std::vector fusable_basic_ops_with_reduce; + if (!is_before_kernel_select) { + fusable_basic_ops_with_reduce.push_back(prim::kPrimCast); + } + return fusable_basic_ops_with_reduce; +} + +std::vector get_reduce_ops() { + std::vector reduce_ops = {prim::kPrimReduceSum, prim::kPrimReduceMean, prim::kPrimReduceMin, + prim::kPrimReduceMax, prim::kPrimReduceAll}; + return reduce_ops; +} + +void GetGraphKernelInfo(const FuncGraphPtr fg, GraphKernelInfo *info) { + MS_EXCEPTION_IF_NULL(fg); + auto reduce_ops = get_reduce_ops(); + const auto &nodes = fg->nodes(); + info->op_type = ELEWISE; + info->cal_step = -1; + info->reduce_op_num = 0; + for (auto node : nodes) { + auto cnode = node->cast(); + if (cnode == nullptr) { + continue; + } + info->cal_step++; + auto prim = GetValueNode(cnode->input(0)); + if (prim != nullptr) { + bool is_reudce = std::any_of(reduce_ops.begin(), reduce_ops.end(), [&prim](const PrimitivePtr &op) { + return op->hash() == prim->hash() && op->name() == prim->name(); + }); + if (is_reudce) { + info->op_type = REDUCE; + info->reduce_op_num++; + } + } + } +} + +bool IsFuse(const GraphKernelInfo &info, const AnfNodePtr &node) { + auto fusable_basic_ops = get_fusable_basic_ops(info.is_before_kernel_select); + auto fusable_basic_ops_with_reduce = get_fusable_basic_ops_with_reduce(info.is_before_kernel_select); + bool is_fusable = false; + if (info.op_type == REDUCE && + (info.cal_step >= MAX_REDUCE_OP_FUSION_CAL_STEP || info.reduce_op_num >= MAX_REDUCE_OP_FUSION_REDUCE_NUM)) { + is_fusable = std::any_of(fusable_basic_ops_with_reduce.begin(), fusable_basic_ops_with_reduce.end(), + [&node](const PrimitivePtr &prim) { return IsPrimitiveCNode(node, prim); }); + } else { + is_fusable = std::any_of(fusable_basic_ops.begin(), fusable_basic_ops.end(), + [&node](const PrimitivePtr &prim) { return IsPrimitiveCNode(node, prim); }); + } + + return is_fusable; +} + +IncludeType IncludeFusedBasicOpForward(const AnfNodePtr &cur_node, const GraphKernelInfo &info, + const AnfNodePtr &node) { + if (cur_node == node) { + return FOLLOW; + } + if (!IsPrimitiveCNode(node)) { + return EXCLUDE; + } + + bool is_fusable = IsFuse(info, node); + return is_fusable ? FOLLOW : EXCLUDE; +} + +IncludeType IncludeFusedBasicOpBackward(const AnfNodePtr &cur_node, const GraphKernelInfo &info, + const AnfNodePtr &node) { + if (cur_node == node) { + return FOLLOW; + } + if (AnfAlgo::IsGraphKernel(node)) { + auto cnode = node->cast(); + auto fg = GetValueNode(cnode->input(kAnfPrimitiveIndex)); + auto fg_attr_val = fg->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL); + MS_EXCEPTION_IF_NULL(fg_attr_val); + auto fg_attr = GetValue(fg_attr_val); + if (fg_attr == kApplyMomentumOpName) { + return FOLLOW; + } + return EXCLUDE; + } + if (!IsPrimitiveCNode(node)) { + return EXCLUDE; + } + + bool is_fusable = IsFuse(info, node); + return is_fusable ? FOLLOW : EXCLUDE; +} + +bool CheckCircle(const std::set &fused_op_set, const AnfNodePtr &check_node, + std::set *cached_unconnected_set) { + if (!check_node->isa() || AnfAlgo::IsGraphKernel(check_node)) { + return false; + } + + auto cnode = check_node->cast(); + const auto &inputs = cnode->inputs(); + // there is a input not in fused_op_set, but the input depends on the fused_op_set + bool has_circle = false; + for (auto input : inputs) { + if (input->isa() && !fused_op_set.count(input)) { + std::set done; + std::vector todos = {input}; + while (!todos.empty()) { + auto node = todos.back(); + todos.pop_back(); + if (done.count(node) || cached_unconnected_set->count(node)) { + continue; + } + + done.insert(node); + if (fused_op_set.count(node)) { + has_circle = true; + break; + } + + if (node->isa()) { + auto cnode_ptr = node->cast(); + for (auto it : cnode_ptr->inputs()) { + if (it->isa()) { + todos.push_back(it); + } + } + } + } + + if (has_circle) { + return true; + } + cached_unconnected_set->insert(done.begin(), done.end()); + } + } + + return false; +} + +bool IsMakeTupleOut(const AnfNodePtr &out, AnfNodePtrList *real_outs) { + if (IsPrimitiveCNode(out, prim::kPrimMakeTuple)) { + auto &inputs = out->cast()->inputs(); + for (size_t i = 1; i < inputs.size(); ++i) { + real_outs->push_back(inputs[i]); + } + return true; + } + + if (AnfAlgo::GetCNodeFuncGraphPtr(out) != nullptr) { + auto fg = AnfAlgo::GetCNodeFuncGraphPtr(out); + auto fg_out = fg->output(); + if (IsPrimitiveCNode(fg_out, prim::kPrimMakeTuple)) { + auto inputs = fg_out->cast()->inputs(); + for (size_t i = 1; i < inputs.size(); ++i) { + real_outs->push_back(inputs[i]); + } + return true; + } + } + return false; +} + +std::vector RemoveCircle(const std::vector &fused_op, bool is_backward) { + std::set cached_unconnected_set; + std::set fused_op_set(fused_op.begin(), fused_op.end()); + auto include = [&fused_op_set](const AnfNodePtr &node) { + if (fused_op_set.count(node)) { + return FOLLOW; + } + return EXCLUDE; + }; + for (auto iter = fused_op.rbegin(); iter != fused_op.rend(); ++iter) { + bool has_circle = CheckCircle(fused_op_set, *iter, &cached_unconnected_set); + // delete the circle node and the node which depend on the circle node in fused op + if (has_circle) { + auto mng = (*iter)->func_graph()->manager(); + std::vector erase_nodes; + if (is_backward) { + erase_nodes = DeepUsersSearch(*iter, include, mng); + } else { + erase_nodes = DeepLinkedGraphSearch(*iter, include); + } + for (auto erase_node : erase_nodes) { + fused_op_set.erase(erase_node); + } + } + } + + std::vector res; + for (auto node : fused_op) { + if (fused_op_set.count(node)) { + res.push_back(node); + } + } + return res; +} + +void TopoSortForNodeList(std::vector *lst) { + if (lst->size() < 2) { + return; + } + + std::vector res; + std::set node_sets(lst->begin(), lst->end()); + std::map> ins; + std::map> outs; + std::queue q; + for (auto node : *lst) { + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + for (auto input : cnode->inputs()) { + if (!node_sets.count(input)) { + continue; + } + // out_degree + outs[input].insert(node); + // in_degree + ins[node].insert(input); + } + if (!ins.count(node)) { + ins[node] = {}; + } + } + + for (auto p : ins) { + if (p.second.size() == 0) { + q.push(p.first); + } + } + + while (!q.empty()) { + auto node = q.front(); + q.pop(); + res.push_back(node); + if (!outs.count(node)) { + continue; + } + for (auto out : outs[node]) { + if (!ins.count(out)) { + continue; + } + ins[out].erase(node); + if (ins[out].size() == 0) { + q.push(out); + } + } + } + + lst->assign(res.begin(), res.end()); +} + +std::vector FindFuseCNodes(const CNodePtr &cnode, bool is_before_kernel_select) { + auto func_graph = cnode->func_graph(); + auto graph_kernel_g = GetValueNode(cnode->input(0)); + GraphKernelInfo info; + info.is_before_kernel_select = is_before_kernel_select; + GetGraphKernelInfo(graph_kernel_g, &info); + auto mng = func_graph->manager(); + // Search fusable nodes according input direction. + auto include_func_forward = std::bind(IncludeFusedBasicOpForward, cnode, info, std::placeholders::_1); + auto used_nodes = DeepLinkedGraphSearch(cnode, include_func_forward); + std::reverse(used_nodes.begin(), used_nodes.end()); + // Search fusable nodes according output direction. + auto include_func_backward = std::bind(IncludeFusedBasicOpBackward, cnode, info, std::placeholders::_1); + auto user_nodes = DeepUsersSearch(cnode, include_func_backward, mng); + + used_nodes.insert(used_nodes.end(), user_nodes.begin() + 1, user_nodes.end()); + if (used_nodes.size() > 1) { + used_nodes = RemoveCircle(used_nodes); + } + TopoSortForNodeList(&used_nodes); + return used_nodes; +} + +AbstractBasePtr GetOutputAbstract(const AnfNodePtr &node, size_t output_idx) { + auto out_spec = node->abstract(); + if (out_spec->isa()) { + return out_spec->cast()->elements()[output_idx]; + } + return out_spec; +} + +AnfNodePtr CreateNewFuseCNode(const std::shared_ptr &kernel_graph, const FuncGraphPtr &fg, + const AnfNodePtrList &inputs, const AnfNodePtrList &outputs, + bool is_before_kernel_select) { + auto func_node = NewValueNode(fg); + std::vector fn_inputs; + fn_inputs.push_back(func_node); + fn_inputs.insert(fn_inputs.end(), inputs.begin(), inputs.end()); + auto fuse_cnode = kernel_graph->NewCNode(fn_inputs); + // Set output abstract + if (outputs.size() > 1) { + std::vector out_specs; + for (size_t i = 0; i < outputs.size(); ++i) { + out_specs.push_back(outputs[i]->abstract()); + } + auto out_spec = std::make_shared(out_specs); + fuse_cnode->set_abstract(out_spec); + } else { + fuse_cnode->set_abstract(outputs[0]->abstract()); + } + // Set parameter abstract. + for (size_t i = 0; i < inputs.size(); ++i) { + auto kernel_with_index = AnfAlgo::VisitKernel(inputs[i], 0); + auto input_abs = GetOutputAbstract(kernel_with_index.first, kernel_with_index.second); + fg->parameters()[i]->set_abstract(input_abs); + if (is_before_kernel_select) { + fg->parameters()[i]->set_kernel_info(std::make_shared()); + } + } + // Set kernel info. + if (!is_before_kernel_select) { + std::vector graph_input_format; + std::vector graph_input_type; + std::vector graph_output_format; + std::vector graph_output_type; + for (size_t i = 0; i < inputs.size(); ++i) { + auto kernel_with_index = AnfAlgo::VisitKernel(inputs[i], 0); + auto input_format = AnfAlgo::GetOutputFormat(kernel_with_index.first, kernel_with_index.second); + graph_input_format.push_back(input_format); + auto input_type = AnfAlgo::GetOutputDeviceDataType(kernel_with_index.first, kernel_with_index.second); + graph_input_type.push_back(input_type); + auto input_abs = GetOutputAbstract(kernel_with_index.first, kernel_with_index.second); + fg->parameters()[i]->set_abstract(input_abs); + } + auto new_outputs = outputs; + if (outputs.size() == 1 && AnfAlgo::IsGraphKernel(outputs[0])) { + std::vector real_outs; + if (IsMakeTupleOut(outputs[0], &real_outs)) { + new_outputs = real_outs; + } + } + for (size_t i = 0; i < new_outputs.size(); ++i) { + auto kernel_with_index = AnfAlgo::VisitKernel(new_outputs[i], 0); + auto output_format = AnfAlgo::GetOutputFormat(kernel_with_index.first, kernel_with_index.second); + auto output_type = AnfAlgo::GetOutputDeviceDataType(kernel_with_index.first, kernel_with_index.second); + graph_output_format.push_back(output_format); + graph_output_type.push_back(output_type); + } + kernel::KernelBuildInfo::KernelBuildInfoBuilder graph_info_builder; + graph_info_builder.SetInputsFormat(graph_input_format); + graph_info_builder.SetInputsDeviceType(graph_input_type); + graph_info_builder.SetOutputsFormat(graph_output_format); + graph_info_builder.SetOutputsDeviceType(graph_output_type); + graph_info_builder.SetProcessor(kernel::Processor::AICORE); + graph_info_builder.SetKernelType(KernelType::AKG_KERNEL); + graph_info_builder.SetFusionType(kernel::FusionType::OPAQUE); + auto graph_selected_info = graph_info_builder.Build(); + AnfAlgo::SetSelectKernelBuildInfo(graph_selected_info, fuse_cnode.get()); + } + return fuse_cnode; +} + +void ReplaceNewFuseCNode(const std::shared_ptr &kernel_graph, const AnfNodePtr &new_fuse_cnode, + const AnfNodePtrList &outputs) { + MS_EXCEPTION_IF_NULL(kernel_graph); + auto mng = kernel_graph->manager(); + MS_EXCEPTION_IF_NULL(mng); + // single out + if (outputs.size() == 1) { + mng->Replace(outputs[0], new_fuse_cnode); + return; + } + + std::vector fn_inputs; + for (size_t out_idx = 0; out_idx < outputs.size(); out_idx++) { + AnfNodePtrList real_outs; + // not make tuple out, replace + if (!IsMakeTupleOut(outputs[out_idx], &real_outs)) { + fn_inputs.clear(); + fn_inputs.push_back(NewValueNode(prim::kPrimTupleGetItem)); + fn_inputs.push_back(new_fuse_cnode); + fn_inputs.push_back(NewValueNode(MakeValue(SizeToInt(out_idx)))); + auto new_out = kernel_graph->NewCNode(fn_inputs); + new_out->set_abstract(outputs[out_idx]->abstract()); + mng->Replace(outputs[out_idx], new_out); + continue; + } + + // the out is make tuple , modify the get_item node's value + auto users = mng->node_users()[outputs[out_idx]]; + for (auto &user : users) { + auto use_node = user.first; + if (use_node->isa() && (IsPrimitiveCNode(use_node, prim::kPrimTupleGetItem))) { + auto get_item_cnode = use_node->cast(); + auto value_input = get_item_cnode->input(kInputNodeOutputIndexInTupleGetItem); + MS_EXCEPTION_IF_NULL(value_input); + auto value_node = value_input->cast(); + MS_EXCEPTION_IF_NULL(value_node); + int item_idx = GetValue(value_node->value()); + int new_item_idx = SizeToInt(out_idx) + item_idx; + fn_inputs.clear(); + fn_inputs.push_back(NewValueNode(prim::kPrimTupleGetItem)); + fn_inputs.push_back(new_fuse_cnode); + fn_inputs.push_back(NewValueNode(new_item_idx)); + auto new_out = kernel_graph->NewCNode(fn_inputs); + new_out->set_abstract(get_item_cnode->abstract()); + mng->Replace(get_item_cnode, new_out); + } + } + } +} + +AnfNodePtrList EliminateMakeTuple(FuncGraphPtr *fg, FuncGraphManagerPtr *mng) { + AnfNodePtrList outs; + auto out_node = (*fg)->output(); + if (IsPrimitiveCNode(out_node, prim::kPrimMakeTuple)) { + std::vector output_args; + auto out_cnode = out_node->cast(); + for (auto out : out_cnode->inputs()) { + if (IsPrimitiveCNode(out, prim::kPrimMakeTuple)) { + auto inputs = out->cast()->inputs(); + for (size_t i = 1; i < inputs.size(); ++i) { + output_args.push_back(inputs[i]); + } + } else { + output_args.push_back(out); + } + } + if (output_args.size() != out_cnode->inputs().size()) { + auto new_out = (*fg)->NewCNode(output_args); + (*mng)->Replace(out_node, new_out); + } + + for (size_t i = 1; i < output_args.size(); ++i) { + outs.push_back(output_args[i]); + } + return outs; + } + + outs.push_back(out_node); + return outs; +} + +AnfNodePtrList GetExpandOuts(const AnfNodePtrList &outs) { + AnfNodePtrList res; + if (outs.size() <= 1) { + return outs; + } + + for (auto out : outs) { + AnfNodePtrList real_outs; + if (IsMakeTupleOut(out, &real_outs)) { + res.insert(res.end(), real_outs.begin(), real_outs.end()); + continue; + } + res.push_back(out); + } + return res; +} + +void FuseGraphKernel(const std::shared_ptr &kernel_graph, bool is_before_kernel_select) { + MS_EXCEPTION_IF_NULL(kernel_graph); + auto mng = kernel_graph->manager(); + if (mng == nullptr) { + mng = Manage(kernel_graph, true); + kernel_graph->set_manager(mng); + } + auto &todos = kernel_graph->execution_order(); + for (auto iter = todos.cbegin(); iter != todos.cend(); ++iter) { + auto node = *iter; + if (!AnfAlgo::IsGraphKernel(node) || !kernel_graph->nodes().contains(node)) { + continue; + } + + auto origin_fg = AnfAlgo::GetCNodeFuncGraphPtr(node); + auto fg_attr = origin_fg->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL); + if (fg_attr != nullptr) { + auto fg_name = GetValue(fg_attr); + if (graph_kernel_black_list.count(fg_name) != 0) { + continue; + } + } + + auto fuse_nodes = FindFuseCNodes(node, is_before_kernel_select); + if (fuse_nodes.size() <= 1) { + continue; + } + + FuncGraphPtr fg; + AnfNodePtrList inputs; + AnfNodePtrList outputs; + std::tie(fg, inputs, outputs) = compile::TransformSegmentToAnfGraph(fuse_nodes); + + // Remove nest make tuple in outs + auto expand_out = GetExpandOuts(outputs); + auto fuse_new_node = CreateNewFuseCNode(kernel_graph, fg, inputs, expand_out, is_before_kernel_select); + + ReplaceNewFuseCNode(kernel_graph, fuse_new_node, outputs); + + // Inline origin graphkernel + auto cnodes = fg->GetOrderedCnodes(); + for (const auto &n : cnodes) { + if (!AnfAlgo::IsGraphKernel(n)) { + continue; + } + auto graph_kernel_g = GetValueNode(n->input(0)); + AnfNodePtrList ins; + ins.insert(ins.end(), n->inputs().begin() + 1, n->inputs().end()); + auto out = InlineClone(graph_kernel_g, fg, ins, n->input(0)->scope()); + mng->Replace(n, out); + } + + EliminateMakeTuple(&fg, &mng); + // Set graphkernel flag + auto ori_fg = GetValueNode(node->input(kAnfPrimitiveIndex)); + fg->set_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL, ori_fg->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)); + } +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/pass/fuse_graph_kernel.h b/mindspore/ccsrc/pre_activate/pass/fuse_graph_kernel.h new file mode 100644 index 0000000000..a5a26765a3 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/pass/fuse_graph_kernel.h @@ -0,0 +1,63 @@ + +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_FUSE_GRAPH_KERNEL_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_FUSE_GRAPH_KERNEL_H_ + +#include +#include +#include +#include +#include "pre_activate/common/optimizer.h" +#include "session/kernel_graph.h" + +namespace mindspore { +namespace opt { +enum GraphKernelType { + ELEWISE = 0, // only contain elewise basic ops + REDUCE, // contain reduce ops + CUBE, // contain cube ops +}; +struct GraphKernelInfo { + GraphKernelType op_type = ELEWISE; + bool is_before_kernel_select = false; + int reduce_op_num = 0; + int cal_step = 0; +}; + +// when reduce graph kernel's cal step is greater than this number, not fuse +const int MAX_REDUCE_OP_FUSION_CAL_STEP = 5; +// when reduce graph kernel contain reduce op num is greater than this number, not fuse +const int MAX_REDUCE_OP_FUSION_REDUCE_NUM = 2; + +const std::set graph_kernel_black_list = {"BNTrainingUpdateSum", "ApplyMomentum", "LayerNormForward", + "LambNextMV", "LambUpdateWithLR"}; + +std::vector RemoveCircle(const std::vector &fused_op, bool is_backward = true); + +void TopoSortForNodeList(std::vector *lst); + +AnfNodePtr CreateNewFuseCNode(const std::shared_ptr &kernel_graph, const FuncGraphPtr &fg, + const AnfNodePtrList &inputs, const AnfNodePtrList &outputs, + bool is_before_kernel_select); + +void ReplaceNewFuseCNode(const std::shared_ptr &kernel_graph, const AnfNodePtr &new_fuse_cnode, + const AnfNodePtrList &outputs); + +void FuseGraphKernel(const std::shared_ptr &kernel_graph, bool is_before_kernel_select = false); +} // namespace opt +} // namespace mindspore +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_FUSE_GRAPH_KERNEL_H_ diff --git a/mindspore/ccsrc/pre_activate/pass/optimize_dependence.cc b/mindspore/ccsrc/pre_activate/pass/optimize_dependence.cc index 86a90a4dfe..1d5f909e7d 100644 --- a/mindspore/ccsrc/pre_activate/pass/optimize_dependence.cc +++ b/mindspore/ccsrc/pre_activate/pass/optimize_dependence.cc @@ -44,11 +44,11 @@ AnfNodePtr GetReplaceNode(const AnfNodePtr &node) { return cnode->input(kSingleInputIndex); } -bool ReplaceMakeTuple(const FuncGraphPtr &func_graph, const CNodePtr &cnode) { +AnfNodePtr ReplaceMakeTuple(const FuncGraphPtr &func_graph, const CNodePtr &cnode) { MS_EXCEPTION_IF_NULL(func_graph); MS_EXCEPTION_IF_NULL(cnode); if (AnfAlgo::GetCNodeName(cnode) != prim::kPrimMakeTuple->name()) { - return false; + return nullptr; } std::vector new_make_tuple_inputs; bool need_update = false; @@ -75,17 +75,16 @@ bool ReplaceMakeTuple(const FuncGraphPtr &func_graph, const CNodePtr &cnode) { auto manager = func_graph->manager(); MS_EXCEPTION_IF_NULL(manager); manager->Replace(cnode, new_make_tuple); + return new_make_tuple; } - return true; + return nullptr; } } // namespace const BaseRef OptimizeDependence::DefinePattern() const { - VarPtr X = std::make_shared("X"); - MS_EXCEPTION_IF_NULL(X); - VarPtr Y = std::make_shared("Y"); - MS_EXCEPTION_IF_NULL(Y); - return VectorRef({prim::kPrimDepend, X, Y}); + VarPtr X = std::make_shared(); + VarPtr Xs = std::make_shared(); + return VectorRef({X, Xs}); } const AnfNodePtr OptimizeDependence::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, @@ -95,29 +94,31 @@ const AnfNodePtr OptimizeDependence::Process(const FuncGraphPtr &func_graph, con if (!node->isa()) { return nullptr; } + auto node_name = AnfAlgo::GetCNodeName(node); + if (node_name != prim::kPrimControlDepend->name() && node_name != prim::kPrimDepend->name()) { + return nullptr; + } + size_t index = 0; auto depend_cnode = node->cast(); MS_EXCEPTION_IF_NULL(depend_cnode); - CheckCNodeInputSize(depend_cnode, kDependInputNum); - auto replacing_node = depend_cnode->input(kDependInputNum - 1); - MS_EXCEPTION_IF_NULL(replacing_node); - if (!replacing_node->isa()) { - return nullptr; + std::vector new_depend_inputs = {depend_cnode->input(kAnfPrimitiveIndex)}; + if (node_name == prim::kPrimDepend->name()) { + index = 1; + new_depend_inputs.push_back(depend_cnode->input(kRealInputIndexInDepend)); } - auto replacing_cnode = replacing_node->cast(); - MS_EXCEPTION_IF_NULL(replacing_cnode); - // Deal with the make_tuple with TransData or Cast inputs. - if (ReplaceMakeTuple(func_graph, replacing_cnode)) { - return nullptr; + if (AnfAlgo::GetInputTensorNum(depend_cnode) < 2) { + MS_LOG(EXCEPTION) << "The depend node input size is at less size 2,but got " + << AnfAlgo::GetInputTensorNum(depend_cnode) << depend_cnode->DebugString(); } - AnfNodePtr replace_node = GetReplaceNode(replacing_cnode); - if (replace_node == nullptr) { - MS_LOG(DEBUG) << "Can not find the TransData or Cast with single output node. Depend node: " << node->DebugString(); - return nullptr; + auto input_num = AnfAlgo::GetInputTensorNum(depend_cnode); + while (index < input_num) { + auto replace_node = GetConvertNode(func_graph, node, index); + MS_EXCEPTION_IF_NULL(replace_node); + new_depend_inputs.push_back(replace_node); + ++index; } - std::vector new_depend_inputs = {depend_cnode->input(kAnfPrimitiveIndex), - depend_cnode->input(kRealInputIndexInDepend), replace_node}; auto kernel_graph = func_graph->cast>(); - CNodePtr new_depend; + CNodePtr new_depend = nullptr; if (kernel_graph == nullptr) { new_depend = func_graph->NewCNode(new_depend_inputs); MS_EXCEPTION_IF_NULL(new_depend); @@ -130,5 +131,31 @@ const AnfNodePtr OptimizeDependence::Process(const FuncGraphPtr &func_graph, con } return new_depend; } + +const AnfNodePtr OptimizeDependence::GetConvertNode(const FuncGraphPtr &graph, const AnfNodePtr &node, + const size_t index) const { + MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(node); + auto depend_cnode = node->cast(); + auto replacing_node = AnfAlgo::GetInputNode(depend_cnode, index); + MS_EXCEPTION_IF_NULL(replacing_node); + if (!replacing_node->isa()) { + return replacing_node; + } + auto replacing_cnode = replacing_node->cast(); + MS_EXCEPTION_IF_NULL(replacing_cnode); + // Deal with the make_tuple with TransData or Cast inputs. + auto make_tuple_replace_node = ReplaceMakeTuple(graph, replacing_cnode); + if (make_tuple_replace_node != nullptr) { + return make_tuple_replace_node; + } + AnfNodePtr replace_node = GetReplaceNode(replacing_cnode); + if (replace_node == nullptr) { + MS_LOG(DEBUG) << "Can not find the TransData or Cast with single output node. Depend node: " << node->DebugString(); + return replacing_node; + } + return replace_node; +} + } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/pass/optimize_dependence.h b/mindspore/ccsrc/pre_activate/pass/optimize_dependence.h index d2995cdd30..30027b790a 100644 --- a/mindspore/ccsrc/pre_activate/pass/optimize_dependence.h +++ b/mindspore/ccsrc/pre_activate/pass/optimize_dependence.h @@ -27,6 +27,7 @@ class OptimizeDependence : public PatternProcessPass { ~OptimizeDependence() override = default; const BaseRef DefinePattern() const override; const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; + const AnfNodePtr GetConvertNode(const FuncGraphPtr &graph, const AnfNodePtr &node, const size_t index) const; }; } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/predict/converter/attr_utils/convert_util.h b/mindspore/ccsrc/predict/converter/attr_utils/convert_util.h index d29e5e532e..5c7551a190 100644 --- a/mindspore/ccsrc/predict/converter/attr_utils/convert_util.h +++ b/mindspore/ccsrc/predict/converter/attr_utils/convert_util.h @@ -36,7 +36,6 @@ using GraphDefT = mindspore::predict::GraphDefT; using TensorDefT = mindspore::predict::TensorDefT; using SubGraphDefT = mindspore::predict::SubGraphDefT; using SubGraphPtr = std::unique_ptr; -using NodeDef = mindspore::predict::NodeDefT; using MsDataType = mindspore::predict::DataType; using MsFormat = mindspore::predict::Format; using MsKernelKey = void *; diff --git a/mindspore/ccsrc/predict/converter/kernel2ms.cc b/mindspore/ccsrc/predict/converter/kernel2ms.cc index 32cdee1350..902efac720 100644 --- a/mindspore/ccsrc/predict/converter/kernel2ms.cc +++ b/mindspore/ccsrc/predict/converter/kernel2ms.cc @@ -108,8 +108,7 @@ bool Kernel2Ms::SetGraphOutputIdx(const KernelGraphPtr &kernel_graph_ptr, const } bool Kernel2Ms::SetOpOutputIdx(const CNodePtr &c_node_ptr, const TensorPtr &output_tensor, - const TensorCachePtr &tensor_cache, int ref_count, size_t order_index, - NodeDef *ms_node) { + const TensorCachePtr &tensor_cache, int ref_count, size_t order_index, OpDefT *ms_node) { MS_EXCEPTION_IF_NULL(c_node_ptr); MS_EXCEPTION_IF_NULL(output_tensor); MS_EXCEPTION_IF_NULL(ms_node); @@ -123,7 +122,7 @@ bool Kernel2Ms::SetOpOutputIdx(const CNodePtr &c_node_ptr, const TensorPtr &outp std::vector tensor_shape; (void)std::transform(host_shape.begin(), host_shape.end(), std::back_inserter(tensor_shape), SizeToInt); int outputIndex = tensor_cache->addExTensor(tensor_key, output_tensor, ref_count, tensor_shape, KERNEL); - ms_node->opDef->outputIndex.push_back(outputIndex); + ms_node->outputIndex.push_back(outputIndex); return true; } @@ -164,7 +163,7 @@ void Kernel2Ms::GetRealInpoutsPtr(const AnfNodePtr &node, std::vectoropDef->inputIndex.push_back(ex_tensor_ptr->index_); + ms_node->inputIndex.push_back(ex_tensor_ptr->index_); } } return true; @@ -397,19 +396,17 @@ bool Kernel2Ms::SetGraphOpTensors(const KernelGraphPtr &kernel_graph_ptr, const return false; } auto kernel_key = node_indexs_[kernel.get()]; - std::unique_ptr ms_node(new NodeDef); + std::unique_ptr ms_node(new OpDefT); + ms_node->name = kernel->fullname_with_scope(); ms_node->fmkType = mindspore::predict::FmkType_CAFFE; - std::unique_ptr ms_op(new OpDefT()); auto c_name = AnfAlgo::GetCNodeName(kernel); auto fun = predict::convert::OpAttrFactory::GetInstance()->GetPackFun(c_name); if (fun == nullptr) { - MS_LOG(ERROR) << "get node [" << kernel->fullname_with_scope() << "] attr failed."; - return false; - } else if (!fun(kernel, ms_op.get())) { + MS_LOG(WARNING) << "get node [" << kernel->fullname_with_scope() << "] attr failed."; + } else if (!fun(kernel, ms_node.get())) { MS_LOG(ERROR) << "set node [" << kernel->fullname_with_scope() << "] attr failed."; return false; } - ms_node->opDef = std::move(ms_op); auto output_size = AnfAlgo::GetOutputTensorNum(kernel); int nodeRefCount = SizeToInt(output_size); for (size_t j = 0; j < output_size; ++j) { @@ -466,7 +463,7 @@ bool Kernel2Ms::KernelGraph2MsGraph(const KernelGraphPtr &kernel_graph_ptr) { if (!SetOpInputIdx(kernels[i], tensor_cache_ptr_, ms_node)) { return false; } - std::unique_ptr ms_node_tmp(ms_node); + std::unique_ptr ms_node_tmp(ms_node); sub_ms_graph->nodes.emplace_back(std::move(ms_node_tmp)); } if (!SetAllTensors(tensor_cache_ptr_, sub_ms_graph.get())) { diff --git a/mindspore/ccsrc/predict/converter/kernel2ms.h b/mindspore/ccsrc/predict/converter/kernel2ms.h index f991ecc94a..7013f88107 100644 --- a/mindspore/ccsrc/predict/converter/kernel2ms.h +++ b/mindspore/ccsrc/predict/converter/kernel2ms.h @@ -64,10 +64,10 @@ class Kernel2Ms { bool SetAllTensors(const TensorCachePtr &tensor_cache, SubGraphDefT *sub_graph_def_t); - bool SetOpInputIdx(const CNodePtr &c_node_ptr, const TensorCachePtr &tensor_cache, NodeDef *ms_node); + bool SetOpInputIdx(const CNodePtr &c_node_ptr, const TensorCachePtr &tensor_cache, OpDefT *ms_node); bool SetOpOutputIdx(const CNodePtr &c_node_ptr, const TensorPtr &output_tensor, const TensorCachePtr &tensor_cache, - int ref_count, size_t order_index, NodeDef *ms_node); + int ref_count, size_t order_index, OpDefT *ms_node); bool SetGraphOutputIdx(const KernelGraphPtr &kernel_graph_ptr, const TensorCachePtr &tensor_cache, SubGraphDefT *sub_graph_def_t, AllOutputTensors *all_output_tensors); @@ -102,7 +102,7 @@ class Kernel2Ms { bool SetMemResue() const; SubGraphPtr sub_ms_graph_; AllOutputTensors all_output_tensors_; - std::vector tmp_op_nodes_; + std::vector tmp_op_nodes_; std::unordered_map node_indexs_; std::unordered_map index_nodes_; int graph_index_ = 0; diff --git a/mindspore/ccsrc/predict/converter/lite_model/op_attr_packer.cc b/mindspore/ccsrc/predict/converter/lite_model/op_attr_packer.cc index e6fec3d540..52648812be 100644 --- a/mindspore/ccsrc/predict/converter/lite_model/op_attr_packer.cc +++ b/mindspore/ccsrc/predict/converter/lite_model/op_attr_packer.cc @@ -33,6 +33,14 @@ bool CastPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); bool MeanPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); bool SoftmaxPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); bool ScalePacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); +bool AddFoldPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); +bool ArgMaxPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); +bool BatchNormFoldPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); +bool FakeQuantWithMinMaxPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); +bool FakeQuantWithMinMaxPerChannelPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); +bool MulPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); +bool MulFoldPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); +bool SqueezePacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); OpAttrFactory::OpAttrFactory() { pack_funs_ = {{"Conv2D", Conv2dPacker}, @@ -60,23 +68,31 @@ OpAttrFactory::OpAttrFactory() { {"TensorAdd", AddPacker}, {"SoftMax", SoftmaxPacker}, {"SimpleMean", MeanPacker}, - {"Scale", ScalePacker}}; + {"ReduceMean", MeanPacker}, + {"AddFold", AddFoldPacker}, + {"ArgMax", ArgMaxPacker}, + {"BatchNorm", BatchNormFoldPacker}, + {"FakeQuantPerLayer", FakeQuantWithMinMaxPacker}, + {"FakeQuantPerChannel", FakeQuantWithMinMaxPerChannelPacker}, + {"Mul", MulPacker}, + {"MulFold", MulFoldPacker}, + {"Squeeze", SqueezePacker}}; } OpAttrPackFun OpAttrFactory::GetPackFun(const std::string &opType) { if (pack_funs_.find(opType) == pack_funs_.end()) { - MS_LOG(ERROR) << "Op Attr pack fun [\" << opType << \"] not found."; + MS_LOG(WARNING) << "Op Attr pack fun [" << opType << "] not found."; return nullptr; } return pack_funs_[opType]; } -mindspore::predict::DataFormatType GetAttrFormat(const std::string &format) { +mindspore::predict::Format GetAttrFormat(const std::string &format) { if (format == kOpFormat_NCHW) { - return predict::DataFormatType::DataFormatType_NCHW; + return predict::Format::Format_NCHW; } else if (format == kOpFormat_NHWC) { - return predict::DataFormatType::DataFormatType_NHWC; + return predict::Format::Format_NHWC; } else { - return predict::DataFormatType::DataFormatType_UNKNOW; + return predict::Format::Format_NUM_OF_FORMAT; } } diff --git a/mindspore/ccsrc/predict/converter/lite_model/op_attr_packer.h b/mindspore/ccsrc/predict/converter/lite_model/op_attr_packer.h index 83d0f9287b..89e38d1871 100644 --- a/mindspore/ccsrc/predict/converter/lite_model/op_attr_packer.h +++ b/mindspore/ccsrc/predict/converter/lite_model/op_attr_packer.h @@ -48,7 +48,7 @@ class OpAttrFactory { std::unordered_map pack_funs_; }; -mindspore::predict::DataFormatType GetAttrFormat(const std::string &format); +mindspore::predict::Format GetAttrFormat(const std::string &format); mindspore::predict::PadMode GetAttrPadMode(const std::string &pad_mode); } // namespace convert diff --git a/mindspore/ccsrc/predict/converter/lite_model/operations/add_packer.cc b/mindspore/ccsrc/predict/converter/lite_model/operations/add_packer.cc index 81a2d3a9af..02a9bda65e 100644 --- a/mindspore/ccsrc/predict/converter/lite_model/operations/add_packer.cc +++ b/mindspore/ccsrc/predict/converter/lite_model/operations/add_packer.cc @@ -25,7 +25,6 @@ bool AddPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { } std::unique_ptr attr(new AddT()); MS_EXCEPTION_IF_NULL(attr); - attr->format = predict::DataFormatType::DataFormatType_NCHW; ms_op->name = c_node_ptr->fullname_with_scope(); ms_op->attr.type = OpT_Add; ms_op->attr.value = attr.release(); diff --git a/mindspore/ccsrc/predict/converter/lite_model/operations/addfold_packer.cc b/mindspore/ccsrc/predict/converter/lite_model/operations/addfold_packer.cc new file mode 100644 index 0000000000..b6affd5001 --- /dev/null +++ b/mindspore/ccsrc/predict/converter/lite_model/operations/addfold_packer.cc @@ -0,0 +1,34 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "predict/converter/lite_model/op_attr_packer.h" + +namespace mindspore { +namespace predict { +namespace convert { +bool AddFoldPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { + if (c_node_ptr == nullptr || ms_op == nullptr) { + return false; + } + std::unique_ptr attr(new AddFoldT()); + MS_EXCEPTION_IF_NULL(attr); + ms_op->attr.type = OpT_AddFold; + ms_op->attr.value = attr.release(); + return true; +} +} // namespace convert +} // namespace predict +} // namespace mindspore diff --git a/mindspore/ccsrc/predict/converter/lite_model/operations/argmax_packer.cc b/mindspore/ccsrc/predict/converter/lite_model/operations/argmax_packer.cc new file mode 100644 index 0000000000..4df643704c --- /dev/null +++ b/mindspore/ccsrc/predict/converter/lite_model/operations/argmax_packer.cc @@ -0,0 +1,34 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "predict/converter/lite_model/op_attr_packer.h" + +namespace mindspore { +namespace predict { +namespace convert { +bool ArgMaxPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { + if (c_node_ptr == nullptr || ms_op == nullptr) { + return false; + } + std::unique_ptr attr(new ArgMaxT()); + MS_EXCEPTION_IF_NULL(attr); + ms_op->attr.type = OpT_ArgMax; + ms_op->attr.value = attr.release(); + return true; +} +} // namespace convert +} // namespace predict +} // namespace mindspore diff --git a/mindspore/ccsrc/predict/converter/lite_model/operations/batchnormfold_packer.cc b/mindspore/ccsrc/predict/converter/lite_model/operations/batchnormfold_packer.cc new file mode 100644 index 0000000000..f05f3894be --- /dev/null +++ b/mindspore/ccsrc/predict/converter/lite_model/operations/batchnormfold_packer.cc @@ -0,0 +1,34 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "predict/converter/lite_model/op_attr_packer.h" + +namespace mindspore { +namespace predict { +namespace convert { +bool BatchNormFoldPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { + if (c_node_ptr == nullptr || ms_op == nullptr) { + return false; + } + std::unique_ptr attr(new BatchNormFoldT()); + MS_EXCEPTION_IF_NULL(attr); + ms_op->attr.type = OpT_BatchNormFold; + ms_op->attr.value = attr.release(); + return true; +} +} // namespace convert +} // namespace predict +} // namespace mindspore diff --git a/mindspore/ccsrc/predict/converter/lite_model/operations/fakequantwithminmax_packer.cc b/mindspore/ccsrc/predict/converter/lite_model/operations/fakequantwithminmax_packer.cc new file mode 100644 index 0000000000..195a4fde9f --- /dev/null +++ b/mindspore/ccsrc/predict/converter/lite_model/operations/fakequantwithminmax_packer.cc @@ -0,0 +1,34 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "predict/converter/lite_model/op_attr_packer.h" + +namespace mindspore { +namespace predict { +namespace convert { +bool FakeQuantWithMinMaxPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { + if (c_node_ptr == nullptr || ms_op == nullptr) { + return false; + } + std::unique_ptr attr(new FakeQuantWithMinMaxT()); + MS_EXCEPTION_IF_NULL(attr); + ms_op->attr.type = OpT_FakeQuantWithMinMax; + ms_op->attr.value = attr.release(); + return true; +} +} // namespace convert +} // namespace predict +} // namespace mindspore diff --git a/mindspore/ccsrc/predict/converter/lite_model/operations/fakequantwithminmaxperchannel_packer.cc b/mindspore/ccsrc/predict/converter/lite_model/operations/fakequantwithminmaxperchannel_packer.cc new file mode 100644 index 0000000000..0074c87646 --- /dev/null +++ b/mindspore/ccsrc/predict/converter/lite_model/operations/fakequantwithminmaxperchannel_packer.cc @@ -0,0 +1,34 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "predict/converter/lite_model/op_attr_packer.h" + +namespace mindspore { +namespace predict { +namespace convert { +bool FakeQuantWithMinMaxPerChannelPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { + if (c_node_ptr == nullptr || ms_op == nullptr) { + return false; + } + std::unique_ptr attr(new FakeQuantWithMinMaxPerChannelT()); + MS_EXCEPTION_IF_NULL(attr); + ms_op->attr.type = OpT_FakeQuantWithMinMaxPerChannel; + ms_op->attr.value = attr.release(); + return true; +} +} // namespace convert +} // namespace predict +} // namespace mindspore diff --git a/mindspore/ccsrc/predict/converter/lite_model/operations/mul_packer.cc b/mindspore/ccsrc/predict/converter/lite_model/operations/mul_packer.cc new file mode 100644 index 0000000000..6c430e79e7 --- /dev/null +++ b/mindspore/ccsrc/predict/converter/lite_model/operations/mul_packer.cc @@ -0,0 +1,34 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "predict/converter/lite_model/op_attr_packer.h" + +namespace mindspore { +namespace predict { +namespace convert { +bool MulPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { + if (c_node_ptr == nullptr || ms_op == nullptr) { + return false; + } + std::unique_ptr attr(new MulT()); + MS_EXCEPTION_IF_NULL(attr); + ms_op->attr.type = OpT_Mul; + ms_op->attr.value = attr.release(); + return true; +} +} // namespace convert +} // namespace predict +} // namespace mindspore diff --git a/mindspore/ccsrc/predict/converter/lite_model/operations/mulflod_packer.cc b/mindspore/ccsrc/predict/converter/lite_model/operations/mulflod_packer.cc new file mode 100644 index 0000000000..1df7204875 --- /dev/null +++ b/mindspore/ccsrc/predict/converter/lite_model/operations/mulflod_packer.cc @@ -0,0 +1,35 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "predict/converter/lite_model/op_attr_packer.h" + +namespace mindspore { +namespace predict { +namespace convert { +bool MulFoldPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { + if (c_node_ptr == nullptr || ms_op == nullptr) { + return false; + } + std::unique_ptr attr(new MulFoldT()); + MS_EXCEPTION_IF_NULL(attr); + ms_op->name = c_node_ptr->fullname_with_scope(); + ms_op->attr.type = OpT_MulFold; + ms_op->attr.value = attr.release(); + return true; +} +} // namespace convert +} // namespace predict +} // namespace mindspore diff --git a/mindspore/ccsrc/predict/converter/lite_model/operations/pooling_packer.cc b/mindspore/ccsrc/predict/converter/lite_model/operations/pooling_packer.cc index 4eeb643817..edfdcda040 100644 --- a/mindspore/ccsrc/predict/converter/lite_model/operations/pooling_packer.cc +++ b/mindspore/ccsrc/predict/converter/lite_model/operations/pooling_packer.cc @@ -36,7 +36,6 @@ bool PoolingPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { attr->poolingMode = mindspore::predict::PoolMode::PoolMode_MEAN_POOLING; } else if (c_name == "GlobalPool") { ms_op->name = c_node_ptr->fullname_with_scope(); - attr->poolingMode = mindspore::predict::PoolMode::PoolMode_GLOBAL_POOING; } else { MS_LOG(ERROR) << "unknowed pooling type."; return false; @@ -53,7 +52,6 @@ bool PoolingPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { attr->padDown = 0; attr->padLeft = 0; attr->padRight = 0; - attr->caffeMode = false; ms_op->attr.type = OpT_Pooling; ms_op->attr.value = attr.release(); return true; diff --git a/mindspore/ccsrc/predict/converter/lite_model/operations/reshape_packer.cc b/mindspore/ccsrc/predict/converter/lite_model/operations/reshape_packer.cc index cd8b72a8ac..a0a263631d 100644 --- a/mindspore/ccsrc/predict/converter/lite_model/operations/reshape_packer.cc +++ b/mindspore/ccsrc/predict/converter/lite_model/operations/reshape_packer.cc @@ -25,7 +25,7 @@ bool ReshapePacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { } std::unique_ptr attr(new ReshapeT()); MS_EXCEPTION_IF_NULL(attr); - attr->format = predict::DataFormatType::DataFormatType_NCHW; + attr->format = predict::Format::Format_NCHW; ms_op->name = c_node_ptr->fullname_with_scope(); ms_op->attr.type = OpT_Reshape; ms_op->attr.value = attr.release(); diff --git a/mindspore/ccsrc/predict/converter/lite_model/operations/scale_packer.cc b/mindspore/ccsrc/predict/converter/lite_model/operations/scale_packer.cc index 7b4f6f6283..356775247d 100644 --- a/mindspore/ccsrc/predict/converter/lite_model/operations/scale_packer.cc +++ b/mindspore/ccsrc/predict/converter/lite_model/operations/scale_packer.cc @@ -25,7 +25,7 @@ bool ScalePacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { } std::unique_ptr attr(new ScaleT()); MS_EXCEPTION_IF_NULL(attr); - attr->format = predict::DataFormatType::DataFormatType_NCHW; + attr->format = predict::Format::Format_NCHW; ms_op->name = c_node_ptr->fullname_with_scope(); ms_op->attr.type = OpT_Scale; ms_op->attr.value = attr.release(); diff --git a/mindspore/ccsrc/predict/converter/lite_model/operations/squeeze_packer.cc b/mindspore/ccsrc/predict/converter/lite_model/operations/squeeze_packer.cc new file mode 100644 index 0000000000..7e836fe021 --- /dev/null +++ b/mindspore/ccsrc/predict/converter/lite_model/operations/squeeze_packer.cc @@ -0,0 +1,38 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "predict/converter/lite_model/op_attr_packer.h" + +namespace mindspore { +namespace predict { +namespace convert { +bool SqueezePacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { + if (c_node_ptr == nullptr || ms_op == nullptr) { + return false; + } + std::unique_ptr attr(new SqueezeT()); + MS_EXCEPTION_IF_NULL(attr); + + std::vector kernel_axis_value = AnfAlgo::GetNodeAttr>(c_node_ptr, "axis"); + attr->axis = kernel_axis_value; + + ms_op->attr.type = OpT_Squeeze; + ms_op->attr.value = attr.release(); + return true; +} +} // namespace convert +} // namespace predict +} // namespace mindspore diff --git a/mindspore/ccsrc/predict/predict.cc b/mindspore/ccsrc/predict/predict.cc index d81dcd3321..bbb12c3787 100644 --- a/mindspore/ccsrc/predict/predict.cc +++ b/mindspore/ccsrc/predict/predict.cc @@ -22,12 +22,15 @@ namespace mindspore { namespace predictmodel { -void StepConvertGraph(const KernelGraphPtrNew &kernel_graph_ptr) { +void StepConvertGraph(const KernelGraphPtr &kernel_graph_ptr) { MS_LOG(INFO) << "start convert_graph step"; // get kernel_graph. this graph can be origin or device, depends on which steps to persistence MS_EXCEPTION_IF_NULL(kernel_graph_ptr); bool save_ms_model = MsContext::GetInstance()->save_ms_model_flag(); if (save_ms_model) { + if (kernel_graph_ptr->inputs().empty()) { + return; + } // set convert_mode: convert cpu info or convert Davnici executor::Kernel2Ms::GetInstance().set_convert_mode(executor::kConvertCpuMode); // convert kernel_graph to sub_ms_graph @@ -46,6 +49,9 @@ void StepConvertWeight(const std::vector &inputs) { bool save_ms_model = MsContext::GetInstance()->save_ms_model_flag(); std::string save_path = MsContext::GetInstance()->save_ms_model_path(); if (save_ms_model) { + if (inputs.empty()) { + return; + } MS_LOG(INFO) << "save ms model is true to path " << save_path; if (!executor::Kernel2Ms::GetInstance().KernelInput2MS(inputs)) { MS_LOG(WARNING) << "convert mindspore kernel input failed"; @@ -59,15 +65,5 @@ void StepConvertWeight(const std::vector &inputs) { } } } - -executor::TargetMode GetDeviceTarget(const std::string &device_target) { - if (device_target == "GPU") { - return executor::kGPUTarget; - } else if (device_target == "Ascend") { - return executor::kCPUTarget; - } else { - return executor::kUnknowTarget; - } -} } // namespace predictmodel } // namespace mindspore diff --git a/mindspore/ccsrc/predict/predict.h b/mindspore/ccsrc/predict/predict.h index 04184fe77c..7c65f16619 100644 --- a/mindspore/ccsrc/predict/predict.h +++ b/mindspore/ccsrc/predict/predict.h @@ -19,16 +19,14 @@ #include #include -#include #include "session/session_basic.h" #include "predict/converter/kernel2ms.h" namespace mindspore { namespace predictmodel { -using KernelGraphPtrNew = std::shared_ptr; -void StepConvertGraph(const KernelGraphPtrNew &kernel_graph_ptr); +using KernelGraphPtr = std::shared_ptr; +void StepConvertGraph(const KernelGraphPtr &kernel_graph_ptr); void StepConvertWeight(const std::vector &inputs); -executor::TargetMode GetDeviceTarget(const std::string &device_target); } // namespace predictmodel } // namespace mindspore #endif // MINDSPORE_MINDSPORE_CCSRC_PREDICT_H_ diff --git a/mindspore/ccsrc/predict/schema/ms.fbs b/mindspore/ccsrc/predict/schema/ms.fbs index a114fc444e..7c3dcfb498 100644 --- a/mindspore/ccsrc/predict/schema/ms.fbs +++ b/mindspore/ccsrc/predict/schema/ms.fbs @@ -13,42 +13,26 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + include "op.fbs"; namespace mindspore.predict; -enum DataType : int { - DT_FLOAT = 0, - DT_FLOAT16 = 1, - DT_INT8 = 2, - DT_INT32 = 3, - DT_UINT8 = 4, - DT_UINT32 = 8, - DT_UNDEFINED = 16 -} - -enum Format : int { - NCHW = 0, - NHWC, - NC4HW4 = 100, - NUM_OF_FORMAT -} - -enum MSConst: int { +enum MSCONST: int { WEIGHT_REFCOUNT = 999 } -table QuantizationDef { - // Quantized value q, corresponding float value r: - // r = scale * (q - zero_point), where scale = (rmax - rmin) / (qmax - qmin) - min: [float]; - max: [float]; - scale: [float]; - zero_point: [long]; +table QuantParam { + scale: double; + zeroPoint: int; + min: double = 0; + max: double = 0; + narrowRange: bool = true; + numBits: int = 8; +} - // Tensor shape of the specifies dimension. - dimension: int; +table QuantParamArray { + param: [QuantParam]; //pre-channel } table TensorDef { @@ -60,7 +44,6 @@ table TensorDef { refCount: int; offset: int; data: [ubyte]; - quantization: QuantizationDef; } union OpT { @@ -70,7 +53,6 @@ union OpT { Conv2D, FusedBatchNorm, CaffeBatchNorm, - Squeeze, BiasAdd, Pooling, DepthwiseConv2D, @@ -85,57 +67,134 @@ union OpT { Eltwise, NetOutput, Add, + Sub, MatMul, StridedSlice, Power, Slice, Stack, Mul, + RealDiv, Pad, Maximum, + Minimum, CaffePReLU, + LeakyReLU, ArgMax, + ArgMin, Exp, CaffeCrop, Range, + Rsqrt, ExpandDims, Tile, - Cast -// Split + Cast, + Shape, + Nchw2Nhwc, + Nhwc2Nchw, + QuantDTypeCast, + Split, + Permute, + FakeQuantWithMinMaxVars, + Equal, + Less, + Greater, + Min, + Floor, + Abs, + Neg, + Cos, + Sin, + Sqrt, + Square, + Constant, + Log, + Tan, + Atan, + Asin, + Clip, + Transpose, + Squeeze, + Unsqueeze, + Upsample, + Dropout, + Broadcast, + Lrn, + Prelu, + ZerosLike, + TopK, + SpaceToDepth, + SpaceToBatch, + SparseToDense, + ReverseSequence, + Rank, + Gather, + GatherNd, + Fill, + Elu, + DepthToSpace, + BatchToSpace, + AddN, + Ceil, + EmbeddingLookup, + EmbeddingLookupSparse, + FloorDiv, + FloorMod, + L2Norm, + LocalResponseNormalization, + MatrixDiag, + Reduce, + Reverse, + Round, + Select, + Scatter, + Unique, + Unstack, + LogicalAnd, + LogicalOr, + LogicalXor, + LogicalNot, + OnnxInt8Quantize, + OnnxInt8Dequantize, + FakeQuantWithMinMax, + FakeQuantWithMinMaxPerChannel, + BatchNormFold, + MulFold, + AddFold, + SquaredDifference } enum QuantType: int { QUANT_NONE, - QUANT_INT8 + AwareTrainning, + WeightQuant, + PostTraining +} + +enum FmkType: int { + TF, + CAFFE, + ONNX, + MS, + TFLITE } table OpDef { name: string; + fmkType: FmkType; attr: OpT; inputIndex: [uint]; outputIndex: [uint]; - isLastConv: bool; quantType: QuantType = QUANT_NONE; + quantParam: [QuantParamArray]; } - -enum FmkType: int { - TF, - CAFFE -} - -table NodeDef { - fmkType: FmkType; - opDef: OpDef; -} - - table SubGraphDef { name: string; inputIndex: [uint]; outputIndex: [uint]; mempoolSize: uint; - nodes: [NodeDef]; + nodes: [OpDef]; allTensors: [TensorDef]; // weight + input + output } diff --git a/mindspore/ccsrc/predict/schema/op.fbs b/mindspore/ccsrc/predict/schema/op.fbs index d48f11b4d1..9286c2b2d3 100644 --- a/mindspore/ccsrc/predict/schema/op.fbs +++ b/mindspore/ccsrc/predict/schema/op.fbs @@ -22,12 +22,30 @@ enum ResizeMethod: byte { NEAREST_NEIGHBOR = 1 } -enum DataFormatType : byte { // todo combine with mslite.h::Format - UNKNOW = -1, +enum DataType : int { + DT_FLOAT = 0, + DT_FLOAT16 = 1, + DT_INT8 = 2, + DT_INT32 = 3, + DT_UINT8 = 4, + DT_INT16 = 5, + DT_UINT32 = 8, + DT_INT64 = 9, + DT_UINT16 = 10, + DT_UNDEFINED = 16 +} + +enum Format : int { NCHW = 0, - NHWC = 1, - HWC = 2, // for input image or resize - CHW = 3, // for input image or resize + NHWC, + HWKC, + HWCK, + KCHW, + CKHW, + KHWC, + CHWK, + NC4HW4 = 100, + NUM_OF_FORMAT } enum ActivationType : byte { @@ -42,26 +60,47 @@ enum ActivationType : byte { SOFTSIGN = 8, SOFTPLUS = 9, TANH = 10, - UNKNOW = 11 + SELU = 11, + HSWISH = 12, + HSIGMOID = 13, + THRESHOLDRELU = 14, + LINEAR = 15, + UNKNOW = 16 +} + +enum ReduceType : byte { + REDUCE_MAX = 0, + REDUCE_MEAN = 1, + REDUCE_ALL = 2, + REDUCE_ANY = 3, + REDUCE_LOG_SUM_EXP = 4, + REDUCE_PROD = 5, + REDUCE_SUM = 6, + UNKNOW = 7 } enum PoolMode : byte { MAX_POOLING = 0, MEAN_POOLING = 1, - GLOBAL_POOING = 2 } enum EltwiseMode : byte { PROD = 0, SUM = 1, - MAXIMUM = 2 + MAXIMUM = 2, + UNKNOW = 3 } enum PadMode : byte { - NOTSET=0, - SAME=1, - VALID=2, - CAFFE_CEIL_NEW=4 + NOTSET = 0, + SAME = 1, + VALID = 2, + CAFFE = 4 +} + +enum RoundMode : byte { + FLOOR = 0, + CEIL = 1 } enum PaddingMode : byte { @@ -77,7 +116,9 @@ table Pad { } table Maximum { - format: DataFormatType = 0; +} + +table Minimum { } table Concat { @@ -94,7 +135,7 @@ table Activation { } table Conv2D { - format: DataFormatType = 0; + format: Format = 0; group: int; channelIn: int; channelOut: int; @@ -114,15 +155,29 @@ table Conv2D { } table FusedBatchNorm { - epsilon: float; // eg. epsilon=0.001 + epsilon: float = 0.00001; // eg. epsilon=0.001 + momentum: float = 0.9; + spatial: int = 1; } table CaffeBatchNorm { epsilon: float; // eg. epsilon=0.001 } -table Squeeze { - axis: [int]; +table Shape { +} + +table Nchw2Nhwc { + +} + +table Nhwc2Nchw { + +} + +table FakeQuantWithMinMaxVars { + narrowRange: bool; + numBits: int; } table BiasAdd { @@ -130,8 +185,9 @@ table BiasAdd { } table Pooling { - format: DataFormatType = 0; + format: Format = 0; poolingMode: PoolMode; + global: bool = false; windowW: int; windowH: int; strideW: int; @@ -141,12 +197,11 @@ table Pooling { padDown: int; padLeft: int; padRight: int; - // todo replace with padValueMode in convolution pooling and so on - caffeMode: bool = false; + roundMode: RoundMode; } table DepthwiseConv2D { - format: DataFormatType = 0; + format: Format = 0; channelIn: int; channelMultiplier: int; kernelW: int; @@ -165,7 +220,7 @@ table DepthwiseConv2D { } table DeDepthwiseConv2D { - format: DataFormatType = 0; + format: Format = 0; channelIn: int; channelMultiplier: int; kernelW: int; @@ -185,7 +240,7 @@ table DeDepthwiseConv2D { table Resize { - format: DataFormatType = 0; + format: Format = 0; method: ResizeMethod; newHeight: long; newWidth: long; @@ -194,7 +249,7 @@ table Resize { } table DetectionPostProcess { - format: DataFormatType = 0; + format: Format = 0; inputSize: int; hScale: float; wScale: float; @@ -210,8 +265,8 @@ table DetectionPostProcess { } table FullConnection { - format: DataFormatType = 0; hasBias: bool; + axis: int; } // Mean(input_tensor, axis, keep_dims) @@ -221,7 +276,7 @@ table Mean { } table DeConv2D { - format: DataFormatType = 0; + format: Format = 0; group: int; channelIn: int; channelOut: int; @@ -241,34 +296,88 @@ table DeConv2D { } table Scale { - format: DataFormatType = 0; + format: Format = 0; } table Eltwise { - format: DataFormatType = 0; mode: EltwiseMode; - // todo repeat coeff (default 1) } table Add { - format: DataFormatType = 0; +} + +table Sub { +} + +table Mul { +} + +table RealDiv { +} + +table Rsqrt { +} + +table Equal { +} + +table Less { +} + +table Greater { +} + +table Min { } table Slice { - format: DataFormatType = 0; + format: Format = 0; begin: [int]; - end: [int]; - stride: [int]; + size: [int]; } -table Mul { +table Floor { +} + +table Abs { +} + +table Neg { } table Exp { } +table Cos { +} + +table Sin { +} + +table Sqrt { +} + +table Square { +} + +table Ceil { +} + +table Log { +} + +table Tan { +} + +table Atan { +} + +table Asin { +} + table Reshape { - format: DataFormatType = 0; + format: Format = 0; + shape: [long]; } table Power { @@ -280,13 +389,20 @@ table Power { table ArgMax { axis: int; outMaxValue: bool; - topK: int; + topK: int = 1; + keepDims: bool; + axisType: int; +} + +table ArgMin { + axis: int; + outMaxValue: bool; + topK: int = 1; keepDims: bool; axisType: int; } table NetOutput { - format: DataFormatType = 0; } table MatMul { @@ -298,6 +414,10 @@ table CaffePReLU { channelShared : bool = false; } +table LeakyReLU { + negativeSlope: float; +} + table StridedSlice { beginMask: int; endMask: int; @@ -317,6 +437,7 @@ table Stack { } table Range { + dType: DataType; start: int; limit: int; delta: int; @@ -335,13 +456,244 @@ table Cast { dstT: int; } -//table Split { -// numberSplit: int; -// sizeSplits: [int]; -// splitDim: int; -//} +table QuantDTypeCast { + srcT: DataType; + dstT: DataType; +} + +table Split { + numberSplit: int; + sizeSplits: [int]; + splitDim: int; +} table CaffeCrop { axis : long; offsets : [long]; } + +table Permute { + order: [long]; +} + +table Clip { + max: float; + min: float; +} + +table Constant { +} + + +table Elu { + alpha: float = 1.0; +} + +table Broadcast { +} + +table Lrn { + alpha: float = 0.0001; + beta: float = 0.75; + bias: float = 1.0; + size: int; +} + +enum ReduceMode : byte { + ReduceMean = 0, + ReduceMax = 1, + ReduceMin = 2, + ReduceProd = 3, + ReduceSum = 4, + ReduceSumSquare = 5 +} + +table Reduce { + axes: [int]; + keepDims: int; + mode: ReduceMode; +} + +table Prelu { + slope: [float]; +} + +table Transpose { + perm: [int]; + conjugate: bool = false; +} + +table Squeeze { + axis: [int]; +} + +table Unsqueeze { + axis: [int]; +} + +table Upsample { + mode: string; + scales: [float]; +} + +table Dropout { + ratio : float = 0.5; +} + +table LocalResponseNormalization { + depth_radius: int; + bias: float; + alpha: float; + beta: float; +} + +table ZerosLike { +} + +table TopK { + k : int; + sorted : bool = true; +} + +table SpaceToDepth { + blockSize : int; + format: Format = 0; +} + +table SpaceToBatch { + blockShape : [int]; + paddings : [int]; +} + +table SparseToDense { + validateIndices: bool; +} + +table ReverseSequence { + seqAxis: int; + batchAxis: int; +} + +table Rank { +} + + +table Gather { + axis: int; + batchDims: int; +} + +table GatherNd { + batchDims: int; +} + +table Fill { + dims: [int]; +} + +table DepthToSpace { + blockSize: int; + format: Format = 0; +} + + +table BatchToSpace { + blockShape: [int]; + crops: [int]; +} + +table AddN { + N: int; +} + + +table EmbeddingLookup { + ids: [int]; + maxNorm: float; +} + +table EmbeddingLookupSparse { + spIds: [int]; + spWeights: [float]; + //combiner: Combiner=0; + maxNortm: float; +} + +table FloorDiv { +} + +table FloorMod { +} + +table L2Norm { + axis: [int]; + epsilon: float; +} + +table LogicalAnd { +} + +table LogicalOr { +} + +table LogicalXor { +} + +table LogicalNot { +} + +table MatrixDiag { + k: int; + numRows: int; + numCols: int; + paddingValue: float; +} + +table Select { +} + +table TfReduce { + type: ReduceType = 7; +} + +table Reverse { + axis: [int]; +} + +table Round { +} + +table Scatter { +} + +table Unique { +} + +table Unstack { + num: int; + axis: int; +} + +table OnnxInt8Quantize { +} + +table OnnxInt8Dequantize { +} + +table FakeQuantWithMinMax { +} + +table FakeQuantWithMinMaxPerChannel { +} + +table BatchNormFold { +} + +table MulFold { +} + +table AddFold { +} + +table SquaredDifference { +} diff --git a/mindspore/ccsrc/pynative/base.h b/mindspore/ccsrc/pynative/base.h index fc143da3c1..60ae869227 100644 --- a/mindspore/ccsrc/pynative/base.h +++ b/mindspore/ccsrc/pynative/base.h @@ -45,7 +45,7 @@ enum PynativeStatusCode { PYNATIVE_UNKNOWN_STATE = 0XFF }; -enum RunOpArgsEnum { PY_PRIM = 0, PY_NAME, PY_INPUTS, PY_INPUT_MASK, PY_ARGS_NUM }; +enum RunOpArgsEnum { PY_PRIM = 0, PY_NAME, PY_INPUTS, PY_ARGS_NUM }; struct OpExecInfo { PrimitivePyPtr py_primitive; @@ -57,9 +57,9 @@ struct OpExecInfo { py::dict op_attrs; }; using OpExecInfoPtr = std::shared_ptr; -OpExecInfoPtr GenerateOpExecInfo(const py::args &args); +OpExecInfoPtr GenerateOpExecInfo(const py::args &args, py::list *const out_args); -const std::set ignore_infer_prim = {"partial", "make_ref"}; +const std::set ignore_infer_prim = {"make_ref"}; } // namespace pynative } // namespace mindspore diff --git a/mindspore/ccsrc/pynative/pynative_execute.cc b/mindspore/ccsrc/pynative/pynative_execute.cc index 6f0a4e5790..75653ff5d2 100644 --- a/mindspore/ccsrc/pynative/pynative_execute.cc +++ b/mindspore/ccsrc/pynative/pynative_execute.cc @@ -22,17 +22,30 @@ #include #include +#include "ir/param_value_py.h" #include "utils/any.h" #include "utils/utils.h" #include "utils/context/ms_context.h" #include "operator/ops.h" +#include "operator/composite/composite.h" #include "operator/composite/do_signature.h" #include "pipeline/parse/data_converter.h" +#include "pipeline/parse/parse_base.h" +#include "pipeline/parse/resolve.h" #include "pipeline/static_analysis/prim.h" #include "session/session_factory.h" #include "pre_activate/pass/const_input_to_attr_registry.h" #include "pre_activate/common/helper.h" +#include "pipeline/action.h" + #include "pynative/base.h" +#include "pybind_api/api_register.h" +#include "vm/transform.h" + +#include "optimizer/ad/grad.h" +#include "pipeline/resource.h" +#include "pipeline/pipeline.h" +#include "pipeline/pass.h" #ifdef ENABLE_GE #include "pynative/pynative_execute_ge.h" @@ -40,77 +53,139 @@ const char SINGLE_OP_GRAPH[] = "single_op_graph"; // primitive unable to infer value for constant input in PyNative mode -const std::set vm_operators = {"partial", "depend", "make_ref", "zeros_like_tensor"}; +const std::set vm_operators = {"make_ref", "HookBackward", "stop_gradient"}; namespace mindspore { namespace pynative { + static std::shared_ptr session = nullptr; +PynativeExecutorPtr PynativeExecutor::executor_ = nullptr; +std::mutex PynativeExecutor::instance_lock_; +ResourcePtr PynativeExecutor::resource_; + inline ValuePtr PyAttrValue(const py::object &obj) { - ValuePtr converted_ret = nullptr; - bool converted = parse::ConvertData(obj, &converted_ret); - if (!converted) { + ValuePtr converted_ret = parse::data_converter::PyDataToValue(obj); + if (!converted_ret) { MS_LOG(EXCEPTION) << "Attribute convert error with type:" << std::string(py::str(obj)); } return converted_ret; } -py::tuple ConvertInputs(const PrimitivePyPtr &prim, const py::tuple &py_args) { - auto signature = prim->signatures(); - std::vector dtypes; - (void)std::transform(signature.begin(), signature.end(), std::back_inserter(dtypes), - [](const Signature &sig) { return sig.dtype; }); - int empty_dtype_count = std::count(dtypes.begin(), dtypes.end(), SignatureEnumDType::kDTypeEmptyDefaultValue); - if (dtypes.size() == 0 || static_cast(dtypes.size()) == empty_dtype_count) { - return py_args; +std::string GetId(const py::object &obj) { + py::object to_process = obj; + std::string prefix = ""; + if (py::isinstance(to_process)) { + auto p_list = py::cast(to_process); + if (p_list.size() == 0) { + return "empty"; + } + prefix = "tuple:"; + std::string key = ""; + for (size_t i = 0; i < p_list.size(); ++i) { + key += std::string(py::str(GetId(p_list[i]))) + ":"; + } + return prefix + key; + } + if (py::isinstance(to_process)) { + return prefix + std::string(py::str(to_process)); } - std::map> type_indexs; + if (py::isinstance(to_process)) { + return prefix + std::string(py::str(to_process)); + } + if (py::isinstance(to_process)) { + auto tensor_ptr = py::cast(to_process); + return prefix + tensor_ptr->id(); + } + + py::object ret = parse::python_adapter::CallPyFn(parse::PYTHON_MOD_PARSE_MODULE, parse::PYTHON_MOD_GET_OBJ_ID, obj); + return py::cast(ret); +} + +py::object GetTupleObj(const py::object &obj) { + py::module mod = parse::python_adapter::GetPyModule(parse::PYTHON_MOD_PARSE_MODULE); + py::object obj_tuple = parse::python_adapter::CallPyModFn(mod, parse::PYTHON_MOD_GET_DEFAULT_INPUT, obj); + return obj_tuple; +} + +std::map> GetTypeIndex(const std::vector &dtypes) { + std::map> type_indexes; for (size_t i = 0; i < dtypes.size(); ++i) { - auto it = type_indexs.find(dtypes[i]); - if (it == type_indexs.end()) { - (void)type_indexs.insert(std::make_pair(dtypes[i], std::vector{i})); + auto it = type_indexes.find(dtypes[i]); + if (it == type_indexes.end()) { + (void)type_indexes.insert(std::make_pair(dtypes[i], std::vector{i})); } else { it->second.push_back(i); } } + return type_indexes; +} + +std::map GetDstType(const py::tuple &py_args, + const std::map> &type_indexes) { std::map dst_type; - for (auto it = type_indexs.begin(); it != type_indexs.end(); (void)++it) { + for (auto it = type_indexes.begin(); it != type_indexes.end(); (void)++it) { auto type = it->first; - auto indexs = it->second; - if (indexs.size() < 2) { + auto indexes = it->second; + if (indexes.size() < 2) { continue; } - size_t m_index = indexs[0]; - for (size_t i = 1; i < indexs.size(); ++i) { - if (py::isinstance(py_args[indexs[i]])) { - m_index = indexs[i]; + size_t m_index = indexes[0]; + for (size_t i = 1; i < indexes.size(); ++i) { + if (py::isinstance(py_args[indexes[i]])) { + m_index = indexes[i]; } } (void)dst_type.insert(std::make_pair(type, m_index)); } - py::tuple py_inputs(py_args.size()); + return dst_type; +} + +py::tuple ConvertInputs(const PrimitivePyPtr &prim, const py::list &args, py::tuple *const out_args, + py::list *out_args_list) { + auto &py_args = *out_args; + py::tuple input_mask(args.size()); + for (size_t i = 0; i < args.size(); ++i) { + if (py::hasattr(args[i], "__parameter__")) { + input_mask[i] = true; + } else { + input_mask[i] = false; + } + py_args[i] = GetTupleObj(args[i]); + } + auto signature = prim->signatures(); + std::vector dtypes; + (void)std::transform(signature.begin(), signature.end(), std::back_inserter(dtypes), + [](const Signature &sig) { return sig.dtype; }); + int empty_dtype_count = std::count(dtypes.begin(), dtypes.end(), SignatureEnumDType::kDTypeEmptyDefaultValue); + if (dtypes.size() == 0 || static_cast(dtypes.size()) == empty_dtype_count) { + return input_mask; + } + auto type_indexes = GetTypeIndex(dtypes); + auto dst_type = GetDstType(py_args, type_indexes); for (size_t i = 0; i < py_args.size(); ++i) { auto it = dst_type.find(dtypes[i]); if (it != dst_type.end() && it->second != i && (py::isinstance(py_args[i]) || py::isinstance(py_args[i]))) { auto tensor_ptr = py::cast(py_args[it->second]); if (py::isinstance(py_args[i])) { - py_inputs[i] = std::make_shared(py::cast(py_args[i]), tensor_ptr->Dtype()); + py_args[i] = std::make_shared(py::cast(py_args[i]), tensor_ptr->Dtype()); + (*out_args_list)[i] = py_args[i]; } else { - py_inputs[i] = std::make_shared(py::cast(py_args[i]), tensor_ptr->Dtype()); + py_args[i] = std::make_shared(py::cast(py_args[i]), tensor_ptr->Dtype()); + (*out_args_list)[i] = py_args[i]; } continue; } - py_inputs[i] = py_args[i]; } - return py_inputs; + return input_mask; } -void PynativeInfer(const PrimitivePyPtr &prim, const py::tuple &py_args, OpExecInfo *const op_exec_info) { +void PynativeInfer(const PrimitivePyPtr &prim, const py::list &py_args, OpExecInfo *const op_exec_info) { size_t size = py_args.size(); AbstractBasePtrList args_spec_list; for (size_t i = 0; i < size; i++) { ValuePtr input_value = PyAttrValue(py_args[i]); - if (py::isinstance(py_args[i])) { + if (!py::hasattr(prim->GetPyObj(), "const_value") && input_value->isa()) { args_spec_list.emplace_back(abstract::FromValueInside(input_value, true)); } else { args_spec_list.emplace_back(abstract::FromValueInside(input_value, false)); @@ -120,9 +195,9 @@ void PynativeInfer(const PrimitivePyPtr &prim, const py::tuple &py_args, OpExecI op_exec_info->abstract = infer_res; } -OpExecInfoPtr GenerateOpExecInfo(const py::args &args) { +OpExecInfoPtr GenerateOpExecInfo(const py::args &args, py::list *const out_args) { if (args.size() != PY_ARGS_NUM) { - MS_LOG(ERROR) << "Four args are needed by RunOp"; + MS_LOG(ERROR) << "Three args are needed by RunOp"; return nullptr; } auto op_exec_info = std::make_shared(); @@ -133,15 +208,18 @@ OpExecInfoPtr GenerateOpExecInfo(const py::args &args) { if (pyobj == nullptr) { MS_LOG(EXCEPTION) << "pyobj is empty"; } - py::tuple py_args = ConvertInputs(prim, args[PY_INPUTS]); + + py::list a = args[PY_INPUTS]; + size_t input_num = a.size(); + op_exec_info->op_inputs = py::tuple(input_num); + + op_exec_info->inputs_mask = ConvertInputs(prim, args[PY_INPUTS], &op_exec_info->op_inputs, out_args); // use python infer method if (ignore_infer_prim.find(op_exec_info->op_name) == ignore_infer_prim.end()) { - PynativeInfer(prim, py_args, op_exec_info.get()); + PynativeInfer(prim, op_exec_info->op_inputs, op_exec_info.get()); } op_exec_info->py_primitive = prim; op_exec_info->op_attrs = py::getattr(args[PY_PRIM], "attrs"); - op_exec_info->op_inputs = py_args; - op_exec_info->inputs_mask = args[PY_INPUT_MASK]; if (op_exec_info->op_inputs.size() != op_exec_info->inputs_mask.size()) { MS_LOG(ERROR) << "Op:" << op_exec_info->op_name << " inputs size not equal op_mask"; return nullptr; @@ -154,9 +232,13 @@ std::string GetSingleOpGraphInfo(const OpExecInfoPtr &op_exec_info, MS_EXCEPTION_IF_NULL(op_exec_info); std::string graph_info; // get input tensor info - for (const auto &input_tensor : input_tensors) { - MS_EXCEPTION_IF_NULL(input_tensor); - (void)graph_info.append(input_tensor->GetShapeAndDataTypeInfo() + "_"); + size_t input_num = op_exec_info->op_inputs.size(); + for (size_t index = 0; index < input_num; ++index) { + auto input = op_exec_info->op_inputs[index]; + if (py::isinstance(input)) { + auto tensor_ptr = py::cast(input); + (void)graph_info.append(tensor_ptr->GetShapeAndDataTypeInfo() + "_"); + } } // get prim and abstract info MS_EXCEPTION_IF_NULL(op_exec_info->abstract); @@ -171,6 +253,23 @@ py::object RunOpInVM(const OpExecInfoPtr &op_exec_info, PynativeStatusCode *stat MS_EXCEPTION_IF_NULL(status); MS_EXCEPTION_IF_NULL(op_exec_info); MS_EXCEPTION_IF_NULL(op_exec_info->py_primitive); + if (op_exec_info->op_name == "HookBackward") { + auto op_inputs = op_exec_info->op_inputs; + py::tuple result(op_inputs.size()); + for (size_t i = 0; i < op_inputs.size(); i++) { + py::object input = op_inputs[i]; + if (py::hasattr(input, "__parameter__")) { + result[i] = py::getattr(input, "data"); + } else { + auto tensor = py::cast(op_inputs[i]); + auto new_tensor = std::make_shared(tensor->data()); + result[i] = new_tensor; + } + } + *status = PYNATIVE_SUCCESS; + MS_LOG(INFO) << "RunOpInVM end"; + return std::move(result); + } auto func = op_exec_info->py_primitive->GetComputeFunction(); if (py::isinstance(func)) { MS_LOG(ERROR) << "VM failed to get func"; @@ -237,6 +336,27 @@ void ConvertValueTupleToTensor(const py::object &input_object, std::vectorpush_back(tensor_ptr); } +void ConvertMultiPyObjectToTensor(const py::object &input_object, const PrimitivePtr &op_prim, + std::vector *input_tensors, int *tensor_mask) { + MS_EXCEPTION_IF_NULL(op_prim); + MS_EXCEPTION_IF_NULL(input_tensors); + MS_EXCEPTION_IF_NULL(tensor_mask); + + if (!py::isinstance(input_object)) { + MS_LOG(EXCEPTION) << "The input should be a tuple!"; + } + auto tuple_inputs = py::cast(input_object); + if (tuple_inputs.size() == 0) { + MS_LOG(EXCEPTION) << "The size of input list or tuple is 0!"; + } + if (py::isinstance(tuple_inputs[0])) { + PlantTensorTupleToVector(tuple_inputs, op_prim, input_tensors); + } else { + ConvertValueTupleToTensor(input_object, input_tensors); + *tensor_mask = kValueNodeTensorMask; + } +} + void ConvertPyObjectToTensor(const py::object &input_object, const PrimitivePtr &op_prim, std::vector *input_tensors, int *tensor_mask) { MS_EXCEPTION_IF_NULL(op_prim); @@ -251,20 +371,20 @@ void ConvertPyObjectToTensor(const py::object &input_object, const PrimitivePtr } else if (py::isinstance(input_object)) { tensor_ptr = std::make_shared(py::cast(input_object), kInt32); *tensor_mask = kValueNodeTensorMask; - } else if (py::isinstance(input_object)) { - tensor_ptr = std::make_shared(py::cast(input_object), nullptr); } else if (py::isinstance(input_object)) { tensor_ptr = std::make_shared(py::cast(input_object), nullptr); - } else if (py::isinstance(input_object)) { + } else if (py::isinstance(input_object)) { + auto list_inputs = py::cast(input_object); + py::tuple tuple_inputs(list_inputs.size()); + for (size_t i = 0; i < tuple_inputs.size(); ++i) { + tuple_inputs[i] = list_inputs[i]; + } + ConvertMultiPyObjectToTensor(tuple_inputs, op_prim, input_tensors, tensor_mask); return; } else if (py::isinstance(input_object)) { - auto tuple_inputs = py::cast(input_object); - if (py::isinstance(tuple_inputs[0])) { - PlantTensorTupleToVector(tuple_inputs, op_prim, input_tensors); - } else { - ConvertValueTupleToTensor(input_object, input_tensors); - *tensor_mask = kValueNodeTensorMask; - } + ConvertMultiPyObjectToTensor(input_object, op_prim, input_tensors, tensor_mask); + return; + } else if (py::isinstance(input_object)) { return; } else { MS_LOG(EXCEPTION) << "Run op inputs type is invalid!"; @@ -288,7 +408,6 @@ void ConstructInputTensor(const OpExecInfoPtr &op_run_info, std::vector *te opt::ConstInputToAttrInfoRegister reg; bool reg_exist = opt::ConstInputToAttrInfoRegistry::Instance().GetRegisterByOpName(op_run_info->op_name, ®); size_t input_num = op_run_info->op_inputs.size(); - MS_LOG(INFO) << "py input size: " << input_num; for (size_t index = 0; index < input_num; ++index) { // convert const input to attr if (reg_exist && @@ -386,22 +505,61 @@ py::object RunOpWithBackendPolicy(MsBackendPolicy backend_policy, const OpExecIn return result; } -py::tuple RunOp(const py::args &args) { - py::object result; - // returns a null py::tuple on error - py::tuple err_ret(0); - PynativeStatusCode status = PYNATIVE_UNKNOWN_STATE; +AnfNodePtr PynativeExecutor::MakeCNode(const OpExecInfoPtr &op_exec_info, const py::args &args, const py::tuple &out) { + if (!grad_flag_ || graph_info_map_.size() == 0) { + return nullptr; + } + std::vector inputs; + auto prim = op_exec_info->py_primitive; + inputs.push_back(NewValueNode(prim)); + py::tuple op_masks = op_exec_info->inputs_mask; + AbstractBasePtrList args_spec_list; + for (size_t i = 0; i < args.size(); i++) { + auto node = GetInput(args[i], op_masks[i]); + args_spec_list.push_back(node->abstract()); + inputs.push_back(node); + } - OpExecInfoPtr op_exec_info = GenerateOpExecInfo(args); - MS_EXCEPTION_IF_NULL(op_exec_info); - if (op_exec_info->abstract != nullptr) { - py::dict output = abstract::ConvertAbstractToPython(op_exec_info->abstract); - if (!output["value"].is_none()) { - py::tuple value_ret(1); - value_ret[0] = output["value"]; - return value_ret; + auto cnode = curr_g_->NewCNode(inputs); + MS_LOG(DEBUG) << "MakeCnode set node " << cnode->DebugString(4); + py::object out_real = out; + if (out.size() == 1) { + MS_LOG(DEBUG) << "MakeCnode out size is one."; + out_real = out[0]; + } + std::string obj_id = GetId(out_real); + if (py::isinstance(out_real)) { + auto value = py::cast(out_real); + if (value.size() > 1) { + for (int i = 0; i < static_cast(value.size()); i++) { + auto value_id = GetId(value[i]); + MS_LOG(DEBUG) << "MakeCnode set node id " << value_id; + set_obj_node_map(curr_g_, value_id, cnode, i); + } } } + MS_LOG(DEBUG) << "MakeCnode set node id " << obj_id; + set_obj_node_map(curr_g_, obj_id, cnode); + set_pyobj(curr_g_, obj_id); + return cnode; +} + +AnfNodePtr PynativeExecutor::GetObjNode(const py::object &obj) { + auto &out = graph_info_map_[curr_g_].obj_node_map[GetId(obj)]; + if (out.second.size() == 1 && out.second[0] == -1) { + return out.first; + } + auto node = out.first; + MS_LOG(DEBUG) << "output size " << out.second.size() << node->DebugString(); + for (auto &idx : out.second) { + std::vector tuple_get_item_inputs{NewValueNode(prim::kPrimTupleGetItem), node, NewValueNode(idx)}; + node = curr_g_->NewCNode(tuple_get_item_inputs); + } + MS_LOG(DEBUG) << "GetObjNode output" << node->DebugString(6); + return node; +} + +py::tuple RunOp(const OpExecInfoPtr &op_exec_info, const py::args &args) { MS_LOG(INFO) << "RunOp start, op name is: " << op_exec_info->op_name; mindspore::parse::python_adapter::set_python_env_flag(true); MsBackendPolicy backend_policy; @@ -422,16 +580,442 @@ py::tuple RunOp(const py::args &args) { if (vm_operators.find(op_exec_info->op_name) != vm_operators.end()) { backend_policy = kMsBackendVmOnly; } - result = RunOpWithBackendPolicy(backend_policy, op_exec_info, &status); + PynativeStatusCode status = PYNATIVE_UNKNOWN_STATE; + // returns a null py::tuple on error + py::tuple err_ret(0); + py::object result = RunOpWithBackendPolicy(backend_policy, op_exec_info, &status); if (status != PYNATIVE_SUCCESS) { MS_LOG(ERROR) << "Failed to run " << op_exec_info->op_name; return err_ret; } - MS_LOG(INFO) << "RunOp end"; + auto node = PynativeExecutor::GetInstance()->MakeCNode(op_exec_info, args, result); + if (node != nullptr) { + node->set_abstract(op_exec_info->abstract); + MS_LOG(DEBUG) << "RunOp MakeCnode,new node is: " << node->DebugString(); + } + MS_LOG(DEBUG) << "RunOp end"; return result; } +py::tuple RunOp(const py::args &args) { + MS_LOG(DEBUG) << "RunOp start" << args.size(); + py::list args_input = args[PY_INPUTS]; + + OpExecInfoPtr op_exec_info = GenerateOpExecInfo(args, &args_input); + MS_EXCEPTION_IF_NULL(op_exec_info); + + if (op_exec_info->abstract != nullptr) { + py::dict output = abstract::ConvertAbstractToPython(op_exec_info->abstract); + if (!output["value"].is_none()) { + py::tuple value_ret(1); + value_ret[0] = output["value"]; + return value_ret; + } + if (py::hasattr(op_exec_info->py_primitive->GetPyObj(), "const_value")) { + py::tuple value_ret(1); + value_ret[0] = ""; + return value_ret; + } + } + return RunOp(op_exec_info, args_input); +} + void ClearPyNativeSession() { session = nullptr; } + +PynativeExecutor::~PynativeExecutor() { ClearRes(); } + +PynativeExecutor::PynativeExecutor() { grad_flag_ = false; } + +void PynativeExecutor::NewGraph(const py::object &cell, const py::args &args) { + auto cell_id = GetId(cell); + if (cell_graph_map_.count(cell_id) != 0) { + MS_LOG(DEBUG) << "Newgraph already compiled"; + return; + } + + auto g = std::make_shared(); + + if (top_g_ == nullptr) { + top_g_ = curr_g_ = g; + df_builder_ = std::make_shared(); + MS_LOG(DEBUG) << "First new graph" << top_g_.get(); + Pushp(); + } else { + Pushp(); + curr_g_ = g; + } + if (graph_info_map_.count(g) == 0) { + graph_info_map_[g] = GraphInfo(); + } + for (size_t i = 0; i < args.size(); i++) { + auto new_param = g->add_parameter(); + std::string param_obj = GetId(args[i]); + graph_info_map_[g].param_map[param_obj] = new_param; + } +} + +AnfNodePtr PynativeExecutor::MakeValueNode(const py::object &obj, const std::string &obj_id) { + ValuePtr converted_ret = nullptr; + parse::ConvertData(obj, &converted_ret); + auto node = NewValueNode(converted_ret); + set_obj_node_map(curr_g_, obj_id, node); + return node; +} + +AnfNodePtr PynativeExecutor::GetInput(const py::object &obj, const py::object &op_mask) { + AnfNodePtr node = nullptr; + std::string obj_id = GetId(obj); + + if (op_mask != nullptr && py::cast(op_mask)) { + MS_LOG(DEBUG) << "Topgraph free parameter"; + // get the parameter name from parameter object + auto name_attr = mindspore::parse::python_adapter::GetPyObjAttr(obj, "name"); + if (py::isinstance(name_attr)) { + MS_LOG(EXCEPTION) << "Parameter object should have name attribute"; + } + std::string param_name = py::cast(name_attr); + if (graph_info_map_[df_builder_].param_map.count(obj_id) == 0) { + auto free_param = df_builder_->add_parameter(); + free_param->set_name(param_name); + auto free_param_new = std::make_shared(obj); + free_param->set_default_param(free_param_new); + free_param->debug_info()->set_name(param_name); + MS_LOG(DEBUG) << "Top graph set free parameter " << obj_id; + graph_info_map_[df_builder_].param_map[obj_id] = free_param; + return free_param; + } + return graph_info_map_[df_builder_].param_map[obj_id]; + } + + // if input is graph output + if (graph_info_map_[curr_g_].param_map.count(obj_id) != 0) { + // op(x, y) + node = graph_info_map_[curr_g_].param_map[obj_id]; + } else if (graph_info_map_[curr_g_].obj_node_map.count(obj_id) != 0) { + // out = op(op1(x, y)) + // out = op(cell1(x, y)) + // out = op(cell1(x, y)[0]) + node = GetObjNode(obj); + } else if (py::isinstance(obj)) { + // out = op((x, y)) + // out = cell((x, y)) + auto tuple = obj.cast(); + + // cell((1,2)): support not mix (scalar, tensor) + if (tuple.size() > 0 && !py::isinstance(tuple[0])) { + return MakeValueNode(obj, obj_id); + } + + std::vector args; + args.push_back(NewValueNode(prim::kPrimMakeTuple)); + + auto tuple_size = static_cast(tuple.size()); + for (int i = 0; i < tuple_size; i++) { + args.push_back(GetInput(tuple[i], py::object())); + } + auto cnode = curr_g_->NewCNode(args); + set_obj_node_map(curr_g_, GetId(obj), cnode); + node = cnode; + } else { + node = MakeValueNode(obj, obj_id); + } + + MS_LOG(DEBUG) << "Now getinput node " << node->ToString() << obj_id; + return node; +} + +// for output[0][1] need getitem multi +void PynativeExecutor::SetTupleOutput(const py::object &obj, const AnfNodePtr &cnode, std::vector idx) { + if (py::isinstance(obj)) { + auto tuple = obj.cast(); + for (int i = 0; i < static_cast(tuple.size()); i++) { + std::vector tmp = idx; + tmp.push_back(i); + set_obj_node_map(curr_g_, GetId(tuple[i]), cnode, tmp); + SetTupleOutput(tuple[i], cnode, tmp); + } + } +} + +void PynativeExecutor::Pushp() { graph_p_.push(curr_g_); } + +void PynativeExecutor::Popp() { + if (graph_p_.empty()) { + MS_LOG(EXCEPTION) << "Stack graph_p_ is empty"; + } + curr_g_ = graph_p_.top(); + graph_p_.pop(); +} + +void PynativeExecutor::EndGraph(const py::object &cell, const py::object &out, const py::args &args) { + auto cell_id = GetId(cell); + if (cell_graph_map_.count(cell_id) != 0) { + MS_LOG(DEBUG) << "Endgraph already compiled"; + return; + } + cell_graph_map_[cell_id] = curr_g_; + auto out_id = GetId(out); + if (!graph_info_map_[curr_g_].obj_node_map.count(out_id) && !graph_info_map_[curr_g_].param_map.count(out_id)) { + // cell construct return x, y + if (py::isinstance(out)) { + std::vector args; + args.push_back(NewValueNode(prim::kPrimMakeTuple)); + + auto tuple = out.cast(); + MS_LOG(DEBUG) << "End graph start tuple size" << tuple.size(); + auto tuple_size = static_cast(tuple.size()); + auto cnode = curr_g_->NewCNode(args); + for (int i = 0; i < tuple_size; i++) { + args.push_back(GetInput(tuple[i], py::object())); + set_obj_node_map(curr_g_, GetId(tuple[i]), cnode, i); + SetTupleOutput(tuple[i], cnode, std::vector{i}); + } + cnode->set_inputs(args); + set_obj_node_map(curr_g_, out_id, cnode); + } else { + MS_LOG(ERROR) << "Graph has no this out: " << out_id; + return; + } + } + EndGraphByOutId(out_id, cell, out, args); +} + +void PynativeExecutor::EndGraphByOutId(const std::string &out_id, const py::object &cell, const py::object &out, + const py::args &args) { + AnfNodePtr output_node; + if (graph_info_map_[curr_g_].param_map.count(out_id)) { + output_node = graph_info_map_[curr_g_].param_map[out_id]; + } else { + output_node = GetObjNode(out); + } + curr_g_->set_output(output_node); + std::vector inputs; + inputs.push_back(NewValueNode(curr_g_)); + MS_LOG(DEBUG) << "Current graph" << curr_g_->output()->DebugString(); + resource_->manager()->AddFuncGraph(curr_g_); + // custom bprop debug + if (py::hasattr(cell, parse::CUSTOM_BPROP_NAME)) { + MS_LOG(DEBUG) << "Use cell custom bprop function."; + FuncGraphPtr bprop_graph = parse::ConvertToBpropCut(cell); + if (bprop_graph != nullptr) { + (void)curr_g_->transforms().insert(std::make_pair(parse::CUSTOM_BPROP_NAME, FuncGraphTransform(bprop_graph))); + (void)bprop_graph->transforms().insert(std::make_pair("primal", FuncGraphTransform(curr_g_))); + } + } + auto newfg = ad::Grad(curr_g_, resource_, curr_g_ == top_g_); + if (curr_g_ != top_g_) { + Popp(); + for (size_t i = 0; i < args.size(); i++) { + auto input = GetInput(args[i], py::object()); + inputs.push_back(input); + } + auto out_cnode = curr_g_->NewCNode(inputs); + set_pyobj(curr_g_, GetId(cell)); + if (py::isinstance(out)) { + auto out_list = py::cast(out); + auto out_size = static_cast(out_list.size()); + for (int i = 0; i < out_size; i++) { + set_obj_node_map(curr_g_, GetId(out_list[i]), out_cnode, i); + SetTupleOutput(out_list[i], out_cnode, std::vector{i}); + } + } + set_obj_node_map(curr_g_, GetId(out), out_cnode); + } else { + parse::ResolveFuncGraph(newfg, resource_); + resource_->set_func_graph(newfg); + } +} + +std::vector PynativeExecutor::GetWeightsArgs(const py::object &weights) { + std::vector w_args; + if (py::hasattr(weights, "__parameter_tuple__")) { + auto tuple = weights.cast(); + MS_LOG(DEBUG) << "GradNet start weights tuple size" << tuple.size(); + w_args.push_back(NewValueNode(prim::kPrimMakeTuple)); + for (size_t it = 0; it < tuple.size(); ++it) { + auto param = tuple[it]; + auto param_id = GetId(param); + AnfNodePtr para_node = nullptr; + if (graph_info_map_[df_builder_].param_map.count(param_id)) { + para_node = graph_info_map_[df_builder_].param_map[param_id]; + + AnfNodePtr value = parse::GetMixedPrecisionCastHelp(df_builder_, para_node); + AnfNodePtr make_ref = NewValueNode(prim::kPrimMakeRef); + auto refkey = std::make_shared(para_node->cast()->name()); + AnfNodePtr ref_key_node = NewValueNode(refkey); + AnfNodePtr ref_node = df_builder_->NewCNode({make_ref, ref_key_node, value, para_node}); + + w_args.push_back(ref_node); + } + } + } else { + MS_LOG(EXCEPTION) << "training not paramter_tuple"; + } + return w_args; +} + +abstract::AbstractBasePtrList PynativeExecutor::GetArgsSpec(const py::args &args) { + abstract::AbstractBasePtrList args_spec; + std::size_t size = args.size(); + for (std::size_t i = 0; i < size; i++) { + ValuePtr converted = nullptr; + bool succ = parse::ConvertData(args[i], &converted); + if (!succ) { + MS_LOG(EXCEPTION) << "Args convert error"; + } + bool broaden = true; + auto abs = abstract::FromValue(converted, broaden); + args_spec.push_back(abs); + auto param_node = std::static_pointer_cast(df_builder_->parameters()[i]); + param_node->set_abstract(abs); + } + + for (const auto ¶m : df_builder_->parameters()) { + auto param_node = std::static_pointer_cast(param); + if (param_node->has_default()) { + auto param_value = std::dynamic_pointer_cast(param_node->default_param()); + AbstractBasePtr ptr = abstract::FromValue(parse::data_converter::PyDataToValue(param_value->value()), true); + if (ptr == nullptr) { + MS_LOG(EXCEPTION) << "Args convert error"; + } + args_spec.push_back(ptr); + param_node->set_abstract(ptr); + } + } + + return args_spec; +} + +void PynativeExecutor::GradNet(const GradOperationPtr &grad, const py::object &cell, const py::object &weights, + const py::args &args) { + MS_LOG(INFO) << "GradNet start" << args.size(); + + std::size_t size = args.size(); + auto cell_id = GetId(cell); + if (graph_map_.count(cell_id) != 0) { + MS_LOG(DEBUG) << "GradNet already compiled"; + return; + } + MS_LOG(DEBUG) << "GradNet first compiled"; + std::vector new_params; + for (size_t i = 0; i < size; i++) { + ParameterPtr p = std::make_shared(df_builder_); + new_params.push_back(p); + } + MS_LOG(DEBUG) << "GradNet start weight size" << df_builder_->parameters().size(); + new_params.insert(new_params.end(), df_builder_->parameters().begin(), df_builder_->parameters().end()); + df_builder_->set_parameters(new_params); + resource_->manager()->SetParameters(df_builder_, new_params); + + std::vector w_args = GetWeightsArgs(weights); + MS_EXCEPTION_IF_NULL(resource_->func_graph()); + auto g = GradGraph(resource_->func_graph(), grad, w_args, size); + resource_->set_func_graph(g); + resource_->manager()->KeepRoots({g}); + + // get the parameters items and add the value to args_spec + abstract::AbstractBasePtrList args_spec = GetArgsSpec(args); + MS_LOG(DEBUG) << "Args_spec size" << args_spec.size(); + + resource_->set_args_spec(args_spec); + MS_LOG(DEBUG) << "Start opt"; + + // Create backend and session + resource_->results()[pipeline::kBackend] = compile::CreateBackend(); + + graph_map_[cell_id] = g; + PynativeOptimizeAction(resource_); + TaskEmitAction(resource_); + ExecuteAction(resource_); + resource_->Clean(); + ad::CleanRes(); + pipeline::ReclaimOptimizer(); +} + +void PynativeExecutor::Clear(const std::string &flag) { + if (flag == "resource") { + MS_LOG(INFO) << "Clear res"; + Clean(); + // Maybe exit in the pynative runing op, so need reset pynative flag. + auto ms_context = MsContext::GetInstance(); + if (ms_context != nullptr) { + ms_context->set_enable_pynative_infer(false); + } + return; + } + MS_LOG(INFO) << "Clear"; + top_g_ = nullptr; + curr_g_ = nullptr; + graph_info_map_.clear(); + std::stack().swap(graph_p_); +} + +void PynativeExecutor::Clean() { + MS_LOG(INFO) << "Clean all res"; + Clear(); + grad_flag_ = false; + df_builder_ = nullptr; + ad::CleanRes(); + pipeline::ReclaimOptimizer(); +} + +void PynativeExecutor::ClearRes() { + Clean(); + resource_.reset(); +} + +py::object PynativeExecutor::Run(const py::tuple &args, const py::object &phase) { + VectorRef arg_list; + pipeline::ProcessVmArgInner(args, resource_, &arg_list); + if (resource_->results().find(pipeline::kOutput) == resource_->results().end() || + !resource_->results()[pipeline::kOutput].is()) { + MS_LOG(EXCEPTION) << "Can't find run graph func for "; + } + compile::VmEvalFuncPtr run = resource_->results()[pipeline::kOutput].cast(); + if (run == nullptr) { + MS_LOG(EXCEPTION) << "Can't find run graph func for "; + } + + std::string backend = MsContext::GetInstance()->backend_policy(); + + MS_LOG(DEBUG) << "Eval run" << backend; + BaseRef value = (*run)(arg_list); + MS_LOG(DEBUG) << "Run end" << value.ToString(); + return BaseRefToPyData(value); +} + +FuncGraphPtr PynativeExecutor::GradGraph(FuncGraphPtr g, const GradOperationPtr &grad_op, + const std::vector &weights, size_t arg_size) { + auto nparam = top_g_->parameters().size(); + std::ostringstream ss; + ss << "grad{" << nparam << "}"; + df_builder_->set_flag(FUNC_GRAPH_FLAG_CORE, true); + df_builder_->debug_info()->set_name(ss.str()); + + auto df = grad_op->GetGrad(NewValueNode(g), nullptr, top_g_->parameters(), weights); + std::vector inputs = {NewValueNode(df)}; + for (size_t i = 0; i < arg_size; ++i) { + inputs.push_back(df_builder_->parameters()[i]); + } + auto out = df_builder_->NewCNode(inputs); + df_builder_->set_output(out); + resource_->manager()->AddFuncGraph(df); + resource_->manager()->AddFuncGraph(df_builder_); + return df_builder_; +} + +REGISTER_PYBIND_DEFINE(PynativeExecutor_, ([](const py::module *m) { + (void)py::class_>(*m, "PynativeExecutor_") + .def_static("get_instance", &PynativeExecutor::GetInstance, "PynativeExecutor get_instance.") + .def("new_graph", &PynativeExecutor::NewGraph, "pynative new a graph.") + .def("end_graph", &PynativeExecutor::EndGraph, "pynative end a graph.") + .def("grad_net", &PynativeExecutor::GradNet, "pynative grad graph.") + .def("clear", &PynativeExecutor::Clear, "pynative clear status.") + .def("__call__", &PynativeExecutor::Run, py::arg("args"), py::arg("phase") = py::str(""), + "Executor run function.") + .def("set_grad_flag", &PynativeExecutor::set_grad_flag, py::arg("flag") = py::bool_(false), + "Executor set grad flag."); + })); } // namespace pynative } // namespace mindspore diff --git a/mindspore/ccsrc/pynative/pynative_execute.h b/mindspore/ccsrc/pynative/pynative_execute.h index 65be3b2ab2..310cf0cb1e 100644 --- a/mindspore/ccsrc/pynative/pynative_execute.h +++ b/mindspore/ccsrc/pynative/pynative_execute.h @@ -22,23 +22,103 @@ #include #include #include +#include +#include #include "pybind11/pybind11.h" #include "pynative/base.h" #include "utils/context/ms_context.h" +#include "ir/anf.h" +#include "pipeline/resource.h" +#include "operator/composite/composite.h" namespace mindspore { namespace pynative { namespace py = pybind11; +using ResourcePtr = std::shared_ptr; +using GradOperationPtr = std::shared_ptr; py::object RunOpInVM(const OpExecInfoPtr &op_exec_info, PynativeStatusCode *status); py::tuple RunOp(const py::args &args); +py::tuple ConvertInputs(const PrimitivePyPtr &prim, const py::list &py_args, py::tuple *const out_args, + py::list *out_args_list); + void ClearPyNativeSession(); +struct GraphInfo { + std::unordered_map param_map; + std::unordered_map>> obj_node_map; + AnfNodePtr output; + std::vector objects; +}; + +class PynativeExecutor : public std::enable_shared_from_this { + public: + static std::shared_ptr GetInstance() { + std::lock_guard i_lock(instance_lock_); + if (executor_ == nullptr) { + executor_ = std::shared_ptr(new (std::nothrow) PynativeExecutor()); + resource_ = std::make_shared(); + } + return executor_; + } + void NewGraph(const py::object &cell, const py::args &args); + void EndGraph(const py::object &cell, const py::object &out, const py::args &args); + void EndGraphByOutId(const std::string &out_id, const py::object &cell, const py::object &out, const py::args &args); + std::vector GetWeightsArgs(const py::object &weights); + abstract::AbstractBasePtrList GetArgsSpec(const py::args &args); + void GradNet(const GradOperationPtr &grad, const py::object &cell, const py::object &weights, const py::args &args); + void Clear(const std::string &flag = ""); + void Clean(); + void ClearRes(); + bool grad_flag() { return grad_flag_; } + void set_grad_flag(bool flag) { grad_flag_ = flag; } + AnfNodePtr GetInput(const py::object &obj, const py::object &op_mask); + AnfNodePtr GetObjNode(const py::object &obj); + FuncGraphPtr curr_g() { return curr_g_; } + void set_pyobj(FuncGraphPtr g, const std::string obj) { graph_info_map_[g].objects.push_back(obj); } + void set_obj_node_map(FuncGraphPtr g, const std::string obj, AnfNodePtr node) { + graph_info_map_[g].obj_node_map[obj] = std::make_pair(node, std::vector{-1}); + } + void set_obj_node_map(FuncGraphPtr g, const std::string obj, AnfNodePtr node, int index) { + graph_info_map_[g].obj_node_map[obj] = std::make_pair(node, std::vector{index}); + } + void set_obj_node_map(FuncGraphPtr g, const std::string obj, AnfNodePtr node, std::vector index) { + graph_info_map_[g].obj_node_map[obj] = std::make_pair(node, index); + } + AnfNodePtr MakeCNode(const OpExecInfoPtr &op_exec_info, const py::args &args, const py::tuple &out); + py::object Run(const py::tuple &args, const py::object &phase); + + void Pushp(); + void Popp(); + FuncGraphPtr GradGraph(FuncGraphPtr g, const GradOperationPtr &grad_op, const std::vector &weights, + size_t arg_size); + void SetTupleOutput(const py::object &obj, const AnfNodePtr &cnode, std::vector idx); + AnfNodePtr MakeValueNode(const py::object &obj, const std::string &obj_id); + + ~PynativeExecutor(); + + private: + PynativeExecutor(); + static std::shared_ptr executor_; + static std::mutex instance_lock_; + static ResourcePtr resource_; + bool grad_flag_; + std::unordered_map graph_map_; + std::unordered_map cell_graph_map_; + std::unordered_map graph_info_map_; + std::stack graph_p_; + FuncGraphPtr top_g_; + FuncGraphPtr df_builder_; + FuncGraphPtr curr_g_; +}; + +using PynativeExecutorPtr = std::shared_ptr; + } // namespace pynative } // namespace mindspore diff --git a/mindspore/ccsrc/session/CMakeLists.txt b/mindspore/ccsrc/session/CMakeLists.txt index 2824af8a5d..782eb51183 100644 --- a/mindspore/ccsrc/session/CMakeLists.txt +++ b/mindspore/ccsrc/session/CMakeLists.txt @@ -23,6 +23,7 @@ if (ENABLE_D) file(GLOB_RECURSE _D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "ascend_session.cc" "ascend_control_parser.cc" + "ascend_inference_session.cc" ) list(APPEND _SESSION_SRC_LIST ${_D_SRC_LIST}) endif () diff --git a/mindspore/ccsrc/session/anf_runtime_algorithm.cc b/mindspore/ccsrc/session/anf_runtime_algorithm.cc index 6cc68457e5..5db7dbc324 100644 --- a/mindspore/ccsrc/session/anf_runtime_algorithm.cc +++ b/mindspore/ccsrc/session/anf_runtime_algorithm.cc @@ -178,12 +178,29 @@ bool AnfRuntimeAlgorithm::CheckPrimitiveType(const AnfNodePtr &node, const Primi return IsPrimitive(cnode->input(kAnfPrimitiveIndex), primitive_type); } +FuncGraphPtr AnfRuntimeAlgorithm::GetCNodeFuncGraphPtr(const AnfNodePtr &node) { + MS_EXCEPTION_IF_NULL(node); + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + auto attr_input = cnode->input(kAnfPrimitiveIndex); + MS_EXCEPTION_IF_NULL(attr_input); + auto value_node = attr_input->cast(); + MS_EXCEPTION_IF_NULL(value_node); + auto value = value_node->value(); + MS_EXCEPTION_IF_NULL(value); + return value->cast(); +} + std::string AnfRuntimeAlgorithm::GetCNodeName(const AnfNodePtr &node) { MS_EXCEPTION_IF_NULL(node); if (node->isa()) { auto primitive = AnfAlgo::GetCNodePrimitive(node); - MS_EXCEPTION_IF_NULL(primitive); - return primitive->name(); + if (primitive != nullptr) { + return primitive->name(); + } + auto func_graph = AnfAlgo::GetCNodeFuncGraphPtr(node); + MS_EXCEPTION_IF_NULL(func_graph); + return func_graph->ToString(); } MS_LOG(EXCEPTION) << "Unknown anf node type " << node->DebugString(); } @@ -198,9 +215,16 @@ void AnfRuntimeAlgorithm::SetNodeAttr(const std::string &key, const ValuePtr &va if (!node->isa()) { MS_LOG(EXCEPTION) << "Only cnode has attr, but this anf is " << node->DebugString(); } + // single op cnode. auto primitive = AnfAlgo::GetCNodePrimitive(node); - MS_EXCEPTION_IF_NULL(primitive); - primitive->set_attr(key, value); + if (primitive != nullptr) { + primitive->set_attr(key, value); + return; + } + // graph kernel cnode. + auto fg = AnfAlgo::GetCNodeFuncGraphPtr(node); + MS_EXCEPTION_IF_NULL(fg); + fg->set_attr(key, value); } void AnfRuntimeAlgorithm::CopyNodeAttr(const std::string &key, const AnfNodePtr &from, const AnfNodePtr &to) { @@ -241,16 +265,33 @@ void AnfRuntimeAlgorithm::EraseNodeAttr(const std::string &key, const AnfNodePtr if (!node->isa()) { MS_LOG(EXCEPTION) << "Only cnode has attr, but this anf is " << node->DebugString(); } + // single op cnode. auto primitive = AnfAlgo::GetCNodePrimitive(node); - MS_EXCEPTION_IF_NULL(primitive); - primitive->EraseAttr(key); + if (primitive != nullptr) { + primitive->EraseAttr(key); + return; + } + // graph kernel cnode. + auto fg = AnfAlgo::GetCNodeFuncGraphPtr(node); + MS_EXCEPTION_IF_NULL(fg); + fg->erase_flag(key); } bool AnfRuntimeAlgorithm::HasNodeAttr(const std::string &key, const CNodePtr &node) { MS_EXCEPTION_IF_NULL(node); + if (!node->isa()) { + MS_LOG(WARNING) << "Only cnode has attr, but this anf is " << node->DebugString(); + return false; + } + // single op cnode. auto primitive = AnfAlgo::GetCNodePrimitive(node); - MS_EXCEPTION_IF_NULL(primitive); - return primitive->HasAttr(key); + if (primitive != nullptr) { + return primitive->HasAttr(key); + } + // graph kernel cnode. + auto fg = AnfAlgo::GetCNodeFuncGraphPtr(node); + MS_EXCEPTION_IF_NULL(fg); + return fg->has_flag(key); } size_t AnfRuntimeAlgorithm::GetInputTensorNum(const AnfNodePtr &node) { @@ -544,9 +585,10 @@ TypeId AnfRuntimeAlgorithm::GetPrevNodeOutputDeviceDataType(const AnfNodePtr &an } // get output device addr of anf_node -const DeviceAddress *AnfRuntimeAlgorithm::GetOutputAddr(const AnfNodePtr &node, size_t output_idx) { +const DeviceAddress *AnfRuntimeAlgorithm::GetOutputAddr(const AnfNodePtr &node, size_t output_idx, + bool visit_nop_node) { MS_EXCEPTION_IF_NULL(node); - if (opt::IsNopNode(node)) { + if (opt::IsNopNode(node) && visit_nop_node) { auto cnode = node->cast(); MS_EXCEPTION_IF_NULL(cnode); if (cnode->inputs().size() == 2) { @@ -565,9 +607,10 @@ const DeviceAddress *AnfRuntimeAlgorithm::GetOutputAddr(const AnfNodePtr &node, return addr; } -DeviceAddressPtr AnfRuntimeAlgorithm::GetMutableOutputAddr(const AnfNodePtr &node, size_t output_idx) { +DeviceAddressPtr AnfRuntimeAlgorithm::GetMutableOutputAddr(const AnfNodePtr &node, size_t output_idx, + bool visit_nop_node) { MS_EXCEPTION_IF_NULL(node); - if (opt::IsNopNode(node)) { + if (opt::IsNopNode(node) && visit_nop_node) { auto cnode = node->cast(); MS_EXCEPTION_IF_NULL(cnode); if (cnode->inputs().size() == 2) { @@ -598,14 +641,16 @@ bool AnfRuntimeAlgorithm::OutputAddrExist(const AnfNodePtr &node, size_t output_ return kernel_info->OutputAddrExist(output_idx); } -const DeviceAddress *AnfRuntimeAlgorithm::GetPrevNodeOutputAddr(const AnfNodePtr &anf_node, size_t input_idx) { +const DeviceAddress *AnfRuntimeAlgorithm::GetPrevNodeOutputAddr(const AnfNodePtr &anf_node, size_t input_idx, + bool visit_nop_node) { KernelWithIndex kernel_with_index = AnfAlgo::GetPrevNodeOutput(anf_node, input_idx); - return AnfRuntimeAlgorithm::GetOutputAddr(kernel_with_index.first, kernel_with_index.second); + return AnfRuntimeAlgorithm::GetOutputAddr(kernel_with_index.first, kernel_with_index.second, visit_nop_node); } -DeviceAddressPtr AnfRuntimeAlgorithm::GetPrevNodeMutableOutputAddr(const AnfNodePtr &anf_node, size_t input_idx) { +DeviceAddressPtr AnfRuntimeAlgorithm::GetPrevNodeMutableOutputAddr(const AnfNodePtr &anf_node, size_t input_idx, + bool visit_nop_node) { KernelWithIndex kernel_with_index = AnfAlgo::GetPrevNodeOutput(anf_node, input_idx); - return AnfRuntimeAlgorithm::GetMutableOutputAddr(kernel_with_index.first, kernel_with_index.second); + return AnfRuntimeAlgorithm::GetMutableOutputAddr(kernel_with_index.first, kernel_with_index.second, visit_nop_node); } // set output device addr of anf_node @@ -778,6 +823,26 @@ bool AnfRuntimeAlgorithm::IsRealCNodeKernel(const AnfNodePtr &node) { return IsRealKernel(node); } +bool AnfRuntimeAlgorithm::IsGraphKernel(const AnfNodePtr &node) { + MS_EXCEPTION_IF_NULL(node); + // graph kernel should be a real cnode kernel. + if (!IsRealCNodeKernel(node)) { + return false; + } + + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + auto input = cnode->input(kAnfPrimitiveIndex); + // graph kernel should has func_graph as first input. + if (!IsValueNode(input)) { + return false; + } + + auto func_graph = GetValueNode(input); + MS_EXCEPTION_IF_NULL(func_graph); + return func_graph->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL); +} + bool AnfRuntimeAlgorithm::IsParameterWeight(const ParameterPtr &node) { MS_EXCEPTION_IF_NULL(node); return node->has_default(); @@ -976,5 +1041,78 @@ bool AnfRuntimeAlgorithm::IsSwitchCall(const CNodePtr &call_node) { } MS_LOG(EXCEPTION) << "Unexpected input1 of call node,input1:" << input1->DebugString(); } + +bool AnfRuntimeAlgorithm::IsScalarInput(const CNodePtr &cnode, size_t index) { + auto shape = AnfAlgo::GetPrevNodeOutputInferShape(cnode, index); + if (shape.empty()) { + return true; + } + return shape.size() == kShape1dDims && shape[0] == 1; +} + +bool AnfRuntimeAlgorithm::IsScalarOutput(const CNodePtr &cnode, size_t index) { + auto shape = AnfAlgo::GetPrevNodeOutputInferShape(cnode, index); + if (shape.empty()) { + return true; + } + return shape.size() == kShape1dDims && shape[0] == 1; +} + +void AnfRuntimeAlgorithm::ReorderExecList(NotNull *> node_list) { + std::vector all_opt_list; + std::vector non_opt_list; + + for (const auto &node : *node_list) { + MS_EXCEPTION_IF_NULL(node); + if (kOptOperatorSet.find(AnfAlgo::GetCNodeName(node)) != kOptOperatorSet.end()) { + all_opt_list.emplace_back(node); + } else { + non_opt_list.emplace_back(node); + } + } + node_list->clear(); + std::copy(non_opt_list.begin(), non_opt_list.end(), std::back_inserter(*node_list)); + std::copy(all_opt_list.begin(), all_opt_list.end(), std::back_inserter(*node_list)); +} + +TypeId AnfRuntimeAlgorithm::GetCNodeOutputPrecision(const AnfNodePtr &node) { + MS_EXCEPTION_IF_NULL(node); + auto prim = AnfAlgo::GetCNodePrimitive(node); + if (prim == nullptr) { + return kTypeUnknown; + } + + TypeId except_type = kTypeUnknown; + if (prim->GetAttr(kAttrOutputPrecision) != nullptr) { + auto output_type_str = GetValue(prim->GetAttr(kAttrOutputPrecision)); + if (output_type_str == "float16") { + except_type = kNumberTypeFloat16; + } else if (output_type_str == "float32") { + except_type = kNumberTypeFloat32; + } else { + MS_LOG(EXCEPTION) << "The fix precision must be float16 or float32, but got " << output_type_str; + } + } + + return except_type; +} + +TypeId AnfRuntimeAlgorithm::GetPrevNodeOutputPrecision(const AnfNodePtr &node, size_t input_idx) { + if (!node->isa()) { + MS_LOG(EXCEPTION) << node->DebugString() << ", input node is not CNode."; + } + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + if (input_idx + 1 >= cnode->inputs().size()) { + MS_LOG(EXCEPTION) << "Input index " << input_idx << " is larger than input number " << GetInputTensorNum(cnode); + } + auto input_node = cnode->input(input_idx + 1); + MS_EXCEPTION_IF_NULL(input_node); + auto kernel_with_index = VisitKernel(input_node, 0); + if (!kernel_with_index.first->isa()) { + return kTypeUnknown; + } + return GetCNodeOutputPrecision(kernel_with_index.first); +} } // namespace session } // namespace mindspore diff --git a/mindspore/ccsrc/session/anf_runtime_algorithm.h b/mindspore/ccsrc/session/anf_runtime_algorithm.h index 10ae5282e0..c46f0b5955 100644 --- a/mindspore/ccsrc/session/anf_runtime_algorithm.h +++ b/mindspore/ccsrc/session/anf_runtime_algorithm.h @@ -54,6 +54,8 @@ class AnfRuntimeAlgorithm { static PrimitivePtr GetCNodePrimitive(const AnfNodePtr &node); // check whether anf node is a node of 'primitive_type',such as make_tuple is a cnode of kPrimMakeTuple static bool CheckPrimitiveType(const AnfNodePtr &node, const PrimitivePtr &primitive_type); + // get cnode primitive + static FuncGraphPtr GetCNodeFuncGraphPtr(const AnfNodePtr &node); // get kernel_name of anf node static std::string GetCNodeName(const AnfNodePtr &node); // get detail info of anf node @@ -121,14 +123,16 @@ class AnfRuntimeAlgorithm { // get output select data type from prev node,input_index is the input index of current node related to prev node static TypeId GetPrevNodeOutputDeviceDataType(const AnfNodePtr &node, size_t input_idx); // get output device addr of anf_node - static const DeviceAddress *GetOutputAddr(const AnfNodePtr &node, size_t output_idx); + static const DeviceAddress *GetOutputAddr(const AnfNodePtr &node, size_t output_idx, bool visit_nop_node = true); // get mutable output device addr of anf_node - static DeviceAddressPtr GetMutableOutputAddr(const AnfNodePtr &node, size_t output_idx); + static DeviceAddressPtr GetMutableOutputAddr(const AnfNodePtr &node, size_t output_idx, bool visit_nop_node = true); // check whether output addr is exist or not static bool OutputAddrExist(const AnfNodePtr &node, size_t output_idx); // get address from prev node,input_index is the input index of current node related to prev node - static const DeviceAddress *GetPrevNodeOutputAddr(const AnfNodePtr &node, size_t input_idx); - static DeviceAddressPtr GetPrevNodeMutableOutputAddr(const AnfNodePtr &anf_node, size_t input_idx); + static const DeviceAddress *GetPrevNodeOutputAddr(const AnfNodePtr &node, size_t input_idx, + bool visit_nop_node = true); + static DeviceAddressPtr GetPrevNodeMutableOutputAddr(const AnfNodePtr &anf_node, size_t input_idx, + bool visit_nop_node = true); // set output device addr of anf_node static void SetOutputAddr(const DeviceAddressPtr &addr, size_t output_idx, AnfNode *node); // set workspace device addr of anf_node @@ -159,6 +163,8 @@ class AnfRuntimeAlgorithm { static bool IsRealKernel(const AnfNodePtr &node); // checkout whether the anf node is a real kernel that is a cnode and can run on device static bool IsRealCNodeKernel(const AnfNodePtr &node); + // checkout whether the anf node is a graph kernel. + static bool IsGraphKernel(const AnfNodePtr &node); // check parameter is weight or data static bool IsParameterWeight(const ParameterPtr &node); // set stream id of kernel,which will be set in stream assign and be used in stream generate @@ -185,6 +191,14 @@ class AnfRuntimeAlgorithm { static FuncGraphPtr GetValueNodeFuncGraph(const AnfNodePtr &node); static std::vector GetCallNodeKernelGraph(const CNodePtr &call_node); static bool IsSwitchCall(const CNodePtr &call_node); + static bool IsScalarInput(const CNodePtr &cnode, size_t index); + static bool IsScalarOutput(const CNodePtr &cnode, size_t index); + static void ReorderExecList(NotNull *> node_list); + static bool IsWhileTrueGraph(const KernelGraphPtr &child_graph); + // get fix output precision of cnode. + static TypeId GetCNodeOutputPrecision(const AnfNodePtr &node); + // get fix output precision from prev node, input_idx is the input index of current node related to prev node. + static TypeId GetPrevNodeOutputPrecision(const AnfNodePtr &node, size_t input_idx); }; } // namespace session using AnfAlgo = session::AnfRuntimeAlgorithm; diff --git a/mindspore/ccsrc/session/ascend_control_parser.cc b/mindspore/ccsrc/session/ascend_control_parser.cc index 2853caa732..868b968d9e 100644 --- a/mindspore/ccsrc/session/ascend_control_parser.cc +++ b/mindspore/ccsrc/session/ascend_control_parser.cc @@ -18,6 +18,7 @@ #include #include "session/ascend_control_parser.h" #include "session/anf_runtime_algorithm.h" +#include "utils/union_find_set.h" static constexpr size_t kCNodePrim = 0; static constexpr size_t kCNodeCallArg = 1; @@ -32,35 +33,126 @@ static constexpr size_t kCNodeSwitchLayerLength = 3; namespace mindspore { namespace session { +static void InitUnionFindSet(NotNull kg, const NotNull *> union_find_set, + const NotNull *> memo) { + if (memo->find(kg.get()) != memo->end()) { + return; + } + memo->insert(kg.get()); -void AscendControlParser::ChildGraphDataAssign(const std::map &graph_id_map) { - for (auto &iter : graph_id_map) { - auto &kg = iter.second; - MS_EXCEPTION_IF_NULL(kg); - auto real_inputs = kg->real_inputs(); - for (auto &it : real_inputs) { - auto ¶meter = it.first; - auto &args = it.second; - for (auto &arg : args) { - MS_EXCEPTION_IF_NULL(arg); - if (arg->isa()) { - MS_LOG(INFO) << "Parameter should be reused, no need insert assign, parameter: " << parameter->DebugString() - << ", arg:" << arg->DebugString(); - continue; - } - auto target_graph_iter = graph_id_map.find(AnfAlgo::GetGraphId(arg.get())); - if (target_graph_iter == graph_id_map.end()) { - MS_LOG(EXCEPTION) << "Graph id " << AnfAlgo::GetGraphId(arg.get()) << " not found."; - } - InsertAssignToGraph(NOT_NULL(target_graph_iter->second), NOT_NULL(arg), NOT_NULL(parameter)); + const std::vector>> &real_inputs = kg->real_inputs(); + for (auto &iter : real_inputs) { + auto ¶ = iter.first; + MS_EXCEPTION_IF_NULL(para); + if (para->isa()) { + union_find_set->Add(para); + } + for (auto &arg : iter.second) { + MS_EXCEPTION_IF_NULL(arg); + if (!arg->isa()) { + continue; } + union_find_set->Add(arg); } } + for (auto &child : kg->child_graph_order()) { + InitUnionFindSet(NOT_NULL(child), union_find_set, memo); + } +} + +static void UnionParentParameter(NotNull kg, const NotNull *> union_find_set, + const NotNull *> memo) { + if (memo->find(kg.get()) != memo->end()) { + return; + } + memo->insert(kg.get()); + + const std::vector>> &real_inputs = kg->real_inputs(); + for (auto &iter : real_inputs) { + auto ¶ = iter.first; + for (auto &arg : iter.second) { + MS_EXCEPTION_IF_NULL(arg); + if (!arg->isa()) { + continue; + } + union_find_set->Union(arg, para); + } + } + for (auto &child : kg->child_graph_order()) { + UnionParentParameter(NOT_NULL(child), union_find_set, memo); + } +} + +static UnionFindSet MakeUnionFindSet(NotNull root_kg) { + UnionFindSet result; + std::set memo; + InitUnionFindSet(root_kg, NOT_NULL(&result), NOT_NULL(&memo)); + memo.clear(); + UnionParentParameter(root_kg, NOT_NULL(&result), NOT_NULL(&memo)); + return result; +} + +static void RecursiveReplaceNode(NotNull kg, NotNull main_parameter, + const std::set ¶meter_reuse_set, + const NotNull *> memo) { + if (parameter_reuse_set.empty()) { + MS_LOG(EXCEPTION) << "parameter_reuse_set is empty."; + } + if (memo->find(kg.get()) != memo->end()) { + return; + } + memo->insert(kg.get()); + + for (auto ¶ : parameter_reuse_set) { + if (para == main_parameter.get()) { + continue; + } + MS_EXCEPTION_IF_NULL(para); + MS_LOG(INFO) << "Replace " << para->DebugString() << " of graph " << AnfAlgo::GetGraphId(para.get()) << " to " + << main_parameter->DebugString() << " of graph " << AnfAlgo::GetGraphId(main_parameter.get().get()); + kg->ReplaceNode(NOT_NULL(para), main_parameter); + } + + for (auto &child : kg->child_graph_order()) { + RecursiveReplaceNode(NOT_NULL(child), main_parameter, parameter_reuse_set, memo); + } +} + +static void ReuseParameter(NotNull root_kg, NotNull *> parameter_set) { + auto parameter_reuse_sets = parameter_set->GetSets(); + for (auto &[key, parameter_reuse_set] : parameter_reuse_sets) { + if (parameter_reuse_set.size() <= 1) { + continue; + } + + AnfNodePtr main_parameter = key; + std::set root_inputs_set; + const auto &root_inputs_vector = root_kg->inputs(); + root_inputs_set.insert(root_inputs_vector.begin(), root_inputs_vector.end()); + for (auto &node : parameter_reuse_set) { + if (root_inputs_set.find(node) != root_inputs_set.end()) { + main_parameter = node; + break; + } + } + + std::set memo; + RecursiveReplaceNode(root_kg, NOT_NULL(main_parameter), parameter_reuse_set, NOT_NULL(&memo)); + } +} + +CNodePtr GetNextRealKernel(const std::vector &list, size_t start) { + for (size_t i = start; i < list.size() - 1; ++i) { + if (!IsPrimitiveCNode(list[i], prim::kPrimPartial) && AnfAlgo::IsRealKernel(list[i])) { + return list[i]; + } + } + return nullptr; } void AscendControlParser::LinkGraph(NotNull kg) { std::set memo; - ProcessKernelGraph(kg, nullptr, nullptr, NOT_NULL(&memo)); + (void)ProcessKernelGraph(kg, nullptr, nullptr, NOT_NULL(&memo)); std::map graph_id_map; for (auto &g : memo) { if (graph_id_map.find(g->graph_id()) != graph_id_map.end()) { @@ -69,16 +161,49 @@ void AscendControlParser::LinkGraph(NotNull kg) { } graph_id_map[g->graph_id()] = g; } + // Make UnionFindSet + UnionFindSet parameter_set = MakeUnionFindSet(kg); + // Reuse Parameter + ReuseParameter(kg, NOT_NULL(¶meter_set)); + // Insert Assign ChildGraphDataAssign(graph_id_map); } -CNodePtr AscendControlParser::GetNextRealKernel(const std::vector &list, size_t start) { - for (size_t i = start; i < list.size() - 1; ++i) { - if (!IsPrimitiveCNode(list[i], prim::kPrimPartial) && AnfAlgo::IsRealKernel(list[i])) { - return list[i]; +void AscendControlParser::ExecutorValidate(NotNull root_graph) { + std::set memo; + (void)RecurseGraph(root_graph, NOT_NULL(&memo)); +} + +void AscendControlParser::ChildGraphDataAssign(const std::map &graph_id_map) { + for (auto &iter : graph_id_map) { + auto &kg = iter.second; + MS_EXCEPTION_IF_NULL(kg); + std::set> memo; + const std::vector>> &real_inputs = kg->real_inputs(); + for (auto &it : real_inputs) { + auto ¶meter = it.first; + auto &args = it.second; + for (auto &arg : args) { + MS_EXCEPTION_IF_NULL(arg); + if (memo.find({parameter, arg}) != memo.end()) { + continue; + } else { + memo.emplace(parameter, arg); + } + if (arg->isa()) { + MS_EXCEPTION_IF_NULL(parameter); + MS_LOG(DEBUG) << "Parameter should be reused, no need insert assign, parameter: " << parameter->DebugString() + << ", arg:" << arg->DebugString(); + continue; + } + auto target_graph_iter = graph_id_map.find(AnfAlgo::GetGraphId(arg.get())); + if (target_graph_iter == graph_id_map.end()) { + MS_LOG(EXCEPTION) << "Graph id " << AnfAlgo::GetGraphId(arg.get()) << " not found."; + } + InsertMultipleAssignToGraph(NOT_NULL(target_graph_iter->second), NOT_NULL(arg), NOT_NULL(parameter)); + } } } - return nullptr; } NotNull AscendControlParser::ProcessKernelGraph(NotNull kg, const CNodePtr &last_node, @@ -99,25 +224,29 @@ NotNull AscendControlParser::ProcessKernelGraph(NotNullSetExecOrderByDefault(); const std::vector &nodes = kg->execution_order(); - if (nodes.empty()) { - MS_LOG(EXCEPTION) << "KernelGraph " << kg->ToString() << " has no cnodes!"; - } // 4. insert first_label - auto start_label = kg->NewCNode({std::make_shared(std::make_shared(kLabelSetOpName))}); - MS_LOG(INFO) << "Insert start label " << start_label->DebugString() << " to " << kg->ToString(); - kg->set_start_label(start_label); + CNodePtr start_label; + if (last_node != nullptr && last_label != nullptr) { + start_label = kg->NewCNode({std::make_shared(std::make_shared(kLabelSetOpName))}); + MS_LOG(INFO) << "Insert start label " << start_label->DebugString() << " to " << kg->ToString(); + kg->set_start_label(start_label); + } else { + // no goto node will jump to start label of root graph, so return a fake label + start_label = std::make_shared(std::vector(), FuncGraphPtr(nullptr)); + } + // 5. traverse for (size_t i = 0; i < nodes.size(); ++i) { auto &cnode = nodes[i]; if (cnode->size() < kCNodePrim + 1) { MS_LOG(EXCEPTION) << "Inputs of apply node is empty"; } - AnfNodePtr fn = cnode->input(kCNodePrim); + AnfNodePtr fn = cnode->input(kAnfPrimitiveIndex); if (!IsPrimitive(fn, prim::kPrimCall) || cnode->size() < kCNodeCallArg + 1) { MS_LOG(DEBUG) << "continue node " << cnode->DebugString(); continue; } - AnfNodePtr arg = cnode->input(kCNodeCallArg); + AnfNodePtr arg = cnode->input(kFirstDataInputIndex); if (IsValueNode(arg)) { RecurseCall(kg, NOT_NULL(cnode), GetNextRealKernel(nodes, i + 1), memo); } else if (!arg->isa()) { @@ -140,11 +269,10 @@ NotNull AscendControlParser::ProcessKernelGraph(NotNull kg, NotNull attch_node) { - std::vector inputs = {NewValueNode(std::make_shared("depend"))}; auto return_node = kg->get_return(); MS_EXCEPTION_IF_NULL(return_node); - inputs.push_back(return_node->input(1)); - inputs.push_back(attch_node.get()); + std::vector inputs = {NewValueNode(std::make_shared(prim::kPrimDepend->name())), + return_node->input(kFirstDataInputIndex), attch_node.get()}; auto depend_node = kg->NewCNode(inputs); return_node->set_input(1, depend_node); } @@ -161,17 +289,8 @@ void AscendControlParser::InsertControlDependToGraph(NotNull kg, void AscendControlParser::LinkParentGraph(NotNull kg, const CNodePtr &from_graph_call_node, const CNodePtr &last_label) { - auto origin_return = kg->get_return(); - const std::vector &origin_return_inputs = origin_return->inputs(); - // if entry graph, replace return with make_tuple - if (from_graph_call_node == nullptr || last_label == nullptr) { - MS_LOG(INFO) << kg->ToString() << " is entry graph."; - std::vector make_tuple_inputs = {std::make_shared(prim::kPrimMakeTuple)}; - make_tuple_inputs.insert(make_tuple_inputs.end(), origin_return_inputs.begin() + 1, origin_return_inputs.end()); - auto make_tuple = kg->NewCNode(make_tuple_inputs); - origin_return->set_inputs({origin_return->input(kCNodePrim), make_tuple}); - } else { - // else replace return with label_goto + // if not entry graph, replace return with label_goto + if (from_graph_call_node != nullptr && last_label != nullptr) { auto label_goto = kg->NewCNode({std::make_shared(std::make_shared(kLabelGotoOpName)), last_label}); MS_LOG(INFO) << "Insert end goto " << label_goto->DebugString() << " to " << kg->ToString(); @@ -181,10 +300,13 @@ void AscendControlParser::LinkParentGraph(NotNull kg, const CNod void AscendControlParser::RecurseCall(NotNull kg, NotNull cur_node, const CNodePtr &next_node, const NotNull *> memo) { - MS_LOG(INFO) << "process call func " << cur_node->DebugString(); + MS_LOG(INFO) << "Process call func " << cur_node->DebugString(); // 1 get kernel graph const std::vector &origin_inputs = cur_node->inputs(); + if (kCNodeCallArg >= origin_inputs.size()) { + MS_LOG(EXCEPTION) << "Index out of range,size:" << origin_inputs.size(); + } std::vector new_inputs = {std::make_shared(std::make_shared(kLabelGotoOpName))}; if (!IsValueNode(origin_inputs[kCNodeCallArg])) { MS_LOG(WARNING) << "Node " << cur_node->DebugString(10) << " index " << kCNodeCallArg << " is not a ValueNode"; @@ -208,12 +330,12 @@ void AscendControlParser::RecurseCall(NotNull kg, NotNullset_inputs(new_inputs); cur_node->set_abstract(nullptr); - MS_LOG(INFO) << "success process call func " << cur_node->DebugString(); + MS_LOG(INFO) << "Succeed processing call func " << cur_node->DebugString(); } void AscendControlParser::RecurseSwitch(NotNull kg, NotNull cur_node, const CNodePtr &next_node, const NotNull *> memo) { - MS_LOG(INFO) << "process switch node " << cur_node->DebugString(); + MS_LOG(INFO) << "Process switch node " << cur_node->DebugString(); if (cur_node->size() < kCNodeSwitchLength) { MS_LOG(EXCEPTION) << "Inputs of apply node must more than " << kCNodeSwitchLength; @@ -245,13 +367,13 @@ void AscendControlParser::RecurseSwitch(NotNull kg, NotNullset_inputs(new_switch_inputs); cur_node->set_abstract(nullptr); - MS_LOG(INFO) << "success process switch func " << cur_node->DebugString(); + MS_LOG(INFO) << "Succeed processing switch func " << cur_node->DebugString(); } void AscendControlParser::RecurseSwitchLayer(NotNull kg, NotNull cur_node, const CNodePtr &next_node, const NotNull *> memo) { - MS_LOG(INFO) << "process switch node " << cur_node->DebugString(); + MS_LOG(INFO) << "Process switch node " << cur_node->DebugString(); if (cur_node->size() < kCNodeSwitchLayerLength) { MS_LOG(EXCEPTION) << "Inputs of apply node must more than " << kCNodeSwitchLayerLength; @@ -272,6 +394,9 @@ void AscendControlParser::RecurseSwitchLayer(NotNull kg, NotNull } // 3 recurse sub graph const std::vector &origin_switch_inputs = cur_node->inputs(); + if (kCNodeSwitchCond >= origin_switch_inputs.size()) { + MS_LOG(EXCEPTION) << "Index out of range:" << origin_switch_inputs.size() << "."; + } std::vector new_switch_inputs = { std::make_shared(std::make_shared(kLabelSwitchOpName)), origin_switch_inputs[kCNodeSwitchCond]}; @@ -286,7 +411,7 @@ void AscendControlParser::RecurseSwitchLayer(NotNull kg, NotNull new_switch_inputs.insert(new_switch_inputs.end(), branch_partial.begin(), branch_partial.end()); cur_node->set_inputs(new_switch_inputs); cur_node->set_abstract(nullptr); - MS_LOG(INFO) << "success process switch layer " << cur_node->DebugString(); + MS_LOG(INFO) << "Succeed processing switch layer " << cur_node->DebugString(); } std::tuple AscendControlParser::ParsePartial(NotNull node) { @@ -295,15 +420,33 @@ std::tuple AscendControlParser::ParsePartial(NotNull(node.get()); + MS_EXCEPTION_IF_NULL(partial_cnode); if (partial_cnode->size() < kCNodePartialLength) { MS_LOG(EXCEPTION) << "Inputs of partial node must more than " << kCNodePartialLength; } - auto partial_inputs = partial_cnode->inputs(); - auto branch_kg = GetValueNode(partial_inputs[kCNodePartialFunc]); + const auto &partial_inputs = partial_cnode->inputs(); + if (kCNodePartialFunc >= partial_inputs.size()) { + MS_LOG(EXCEPTION) << "Index out of range:" << partial_inputs.size() << "."; + } + auto branch_kg = GetValueNode(partial_inputs[kCNodePartialFunc]); return {partial_cnode, branch_kg}; } +void AscendControlParser::InsertMultipleAssignToGraph(NotNull kg, NotNull from, + NotNull to) { + std::vector from_outputs = AnfAlgo::GetAllOutput(from, {prim::kPrimTupleGetItem}); + std::vector to_outputs = AnfAlgo::GetAllOutput(to, {prim::kPrimTupleGetItem}); + MS_LOG(INFO) << "Insert multi-assign from [" << from->DebugString() << "] to [" << to->DebugString() << "]"; + if (from_outputs.size() != to_outputs.size()) { + MS_LOG(EXCEPTION) << "From outputs size[" << from_outputs.size() << "] is not equal to to outputs size[" + << to_outputs.size() << "]"; + } + for (size_t i = 0; i < from_outputs.size(); i++) { + InsertAssignToGraph(kg, NOT_NULL(from_outputs[i]), NOT_NULL(to_outputs[i])); + } +} + void AscendControlParser::InsertAssignToGraph(NotNull kg, NotNull from, NotNull to) { if (AnfAlgo::OutputAddrExist(from, 0) && AnfAlgo::OutputAddrExist(to, 0) && @@ -316,7 +459,7 @@ void AscendControlParser::InsertAssignToGraph(NotNull kg, NotNul MS_LOG(INFO) << "Insert assign to graph " << kg->ToString() << " from " << from->DebugString() << " to " << to->DebugString(); // config inputs of assign node - std::vector inputs = {NewValueNode(std::make_shared("Assign")), to, from}; + std::vector inputs = {NewValueNode(std::make_shared(prim::kPrimAssign->name())), to, from}; // generate a new cnode auto assign_node = kg->NewCNode(inputs); MS_EXCEPTION_IF_NULL(assign_node); @@ -325,49 +468,24 @@ void AscendControlParser::InsertAssignToGraph(NotNull kg, NotNul InsertDependToGraph(kg, NOT_NULL(assign_node)); } -void AscendControlParser::LinkArgsToParam(NotNull to_graph, NotNull target_graph, - NotNull arg, NotNull param) { - if (IsPrimitiveCNode(arg, prim::kPrimMakeTuple) && IsPrimitiveCNode(param, prim::kPrimMakeTuple)) { - MS_LOG(INFO) << "Arg " << arg->DebugString() << " Param " << param->DebugString() << " is a tuple"; - CNodePtr cnode_arg = arg.get()->cast(); - CNodePtr cnode_param = param.get()->cast(); - MS_EXCEPTION_IF_NULL(cnode_arg); - MS_EXCEPTION_IF_NULL(cnode_param); - if (cnode_arg->size() != cnode_param->size()) { - MS_LOG(EXCEPTION) << "Arg " << arg->DebugString() << " size " << cnode_arg->size() << " but Param " - << param->DebugString() << " size " << cnode_param->size(); - } - - for (size_t i = 1; i < cnode_param->size(); ++i) { - LinkArgsToParam(to_graph, target_graph, NOT_NULL(cnode_arg->input(i)), NOT_NULL(cnode_param->input(i))); - } - } else if (arg->isa()) { - InsertAssignToGraph(target_graph, arg, param); - } else { - MS_LOG(EXCEPTION) << "Arg " << arg->DebugString() << " Param " << param->DebugString() << " unknown type."; - } -} - -void AscendControlParser::ExecutorValidate(NotNull root_graph) { - std::set memo; - (void)RecurseGraph(root_graph, NOT_NULL(&memo)); -} - std::vector AscendControlParser::RecurseGraph(NotNull graph, const NotNull *> memo) { - MS_LOG(INFO) << "graph:" << graph->graph_id() << " start"; - auto print_vector = [&](std::vector vec) -> void { - MS_LOG(INFO) << "graph:" << graph->graph_id() << "execution order"; - for (size_t i = 0; i < vec.size(); i++) { - MS_LOG(INFO) << "[" << i << "][" << vec[i]->DebugString() << "]"; - } - }; + MS_LOG(INFO) << "Graph:" << graph->graph_id() << " start"; if (memo->find(graph) != memo->end()) { return {}; } memo->insert(graph.get()); graph->SetExecOrderByDefault(); - const std::vector &cnodes = graph->execution_order(); + std::vector cnodes = graph->execution_order(); + + auto end_label_goto = graph->get_end_goto(); + if (cnodes.rbegin() != cnodes.rend() && *cnodes.rbegin() == end_label_goto) { + cnodes.pop_back(); + } + AnfAlgo::ReorderExecList(NOT_NULL(&cnodes)); + if (end_label_goto != nullptr) { + cnodes.push_back(end_label_goto); + } std::vector execution_order; uint32_t child_order_index = 0; @@ -377,45 +495,34 @@ std::vector AscendControlParser::RecurseGraph(NotNull if (node == graph->get_end_goto()) { continue; } - if (AnfAlgo::CheckPrimitiveType(node, prim::kPrimLabelGoto)) { - if (!CheckLabelIndex(child_order_index, 0, node, graph)) { - MS_LOG(EXCEPTION) << "Check label index fail"; - } - auto child_graph = graph->child_graph_order()[child_order_index++]; - if (child_graph == graph->parent_graph()) { - continue; - } - auto child_execution_order = RecurseGraph(NOT_NULL(child_graph), memo); - execution_order.insert(execution_order.end(), child_execution_order.begin(), child_execution_order.end()); - } else if (AnfAlgo::CheckPrimitiveType(node, prim::kPrimLabelSwitch)) { - std::vector label_switch_list = GetLabelSwitchList(node); + if (AnfAlgo::CheckPrimitiveType(node, prim::kPrimLabelSwitch)) { + std::vector label_switch_list = AnfAlgo::GetNodeAttr>(node, kAttrLabelSwitchList); for (auto iter = label_switch_list.rbegin(); iter != label_switch_list.rend(); ++iter) { if (!CheckLabelIndex(child_order_index, *iter, node, graph)) { MS_LOG(EXCEPTION) << "Check label index fail"; } - auto child_graph = graph->child_graph_order()[child_order_index++]; - if (child_graph == graph->parent_graph()) { - continue; + if (child_order_index >= graph->child_graph_order().size()) { + MS_LOG(EXCEPTION) << "Index out of range:" << graph->child_graph_order().size(); } + auto child_graph = graph->child_graph_order()[child_order_index++]; auto child_execution_order = RecurseGraph(NOT_NULL(child_graph), memo); execution_order.insert(execution_order.end(), child_execution_order.begin(), child_execution_order.end()); } + } else if (AnfAlgo::CheckPrimitiveType(node, prim::kPrimLabelGoto)) { + uint32_t label_index = AnfAlgo::GetNodeAttr(node, kAttrLabelIndex); + if (!CheckLabelIndex(child_order_index, label_index, node, graph)) { + MS_LOG(EXCEPTION) << "Check label index fail"; + } + auto child_graph = graph->child_graph_order()[child_order_index++]; + auto child_execution_order = RecurseGraph(NOT_NULL(child_graph), memo); + execution_order.insert(execution_order.end(), child_execution_order.begin(), child_execution_order.end()); } } graph->set_execution_order(execution_order); - print_vector(graph->execution_order()); + graph->PrintGraphExecuteOrder(); return execution_order; } -std::vector AscendControlParser::GetLabelSwitchList(const CNodePtr &node) { - if (!AnfAlgo::HasNodeAttr(kAttrLabelSwitchList, node)) { - MS_LOG(EXCEPTION) << "LabelSwitchKernel has no attr label_switch_list"; - } - auto primitive = AnfAlgo::GetCNodePrimitive(node); - MS_EXCEPTION_IF_NULL(primitive); - return GetValue>(primitive->GetAttr(kAttrLabelSwitchList)); -} - bool AscendControlParser::CheckLabelIndex(uint32_t order_index, uint32_t label_index, const CNodePtr &cur_label, NotNull graph) { const std::vector> &child_graph_order = graph->child_graph_order(); @@ -424,37 +531,23 @@ bool AscendControlParser::CheckLabelIndex(uint32_t order_index, uint32_t label_i MS_LOG(EXCEPTION) << "Child graph order is wrong, graph " << graph->ToString() << " child graph size " << child_graph_order.size() << " goto index " << order_index; } - - if (AnfAlgo::CheckPrimitiveType(cur_label, prim::kPrimLabelGoto)) { - // check label_goto and start_label in child graph - if (!AnfAlgo::HasNodeAttr(kAttrLabelIndex, cur_label)) { - MS_LOG(EXCEPTION) << "LabelSetKernel has no attr label_index"; - } - auto primitive = AnfAlgo::GetCNodePrimitive(cur_label); - MS_EXCEPTION_IF_NULL(primitive); - uint32_t label_goto_index = GetValue(primitive->GetAttr(kAttrLabelIndex)); - label_index = label_goto_index; - } - // get start_label_set_index of child graph auto child_graph = child_graph_order[order_index]; MS_EXCEPTION_IF_NULL(child_graph); + + // get start_label_set_index of child graph auto start_label_set = child_graph->get_start_label(); - if (!AnfAlgo::HasNodeAttr(kAttrLabelIndex, start_label_set)) { - MS_LOG(EXCEPTION) << "LabelSetKernel has no attr label_index"; - } - auto start_primitive = AnfAlgo::GetCNodePrimitive(start_label_set); - MS_EXCEPTION_IF_NULL(start_primitive); - uint32_t start_label_set_index = GetValue(start_primitive->GetAttr(kAttrLabelIndex)); + uint32_t start_label_set_index = AnfAlgo::GetNodeAttr(start_label_set, kAttrLabelIndex); if (label_index != start_label_set_index) { MS_LOG(WARNING) << cur_label->DebugString() << " index " << label_index << " but " << start_label_set->DebugString() << " index " << start_label_set_index << " current child graph order : " << order_index; return false; + } else { + return true; } - return true; } void AscendControlParser::UpdateChildGraphOrder(NotNull kg) { - MS_LOG(INFO) << "graph id:" << kg->graph_id(); + MS_LOG(INFO) << "Graph id:" << kg->graph_id(); kg->SetExecOrderByDefault(); auto call_nodes = kg->FindNodeByPrimitive(std::make_shared(prim::kPrimCall->name())); std::vector child_graph_order; @@ -474,6 +567,5 @@ void AscendControlParser::UpdateChildGraphOrder(NotNull kg) { } kg->set_child_graph_order(child_graph_order); } - } // namespace session } // namespace mindspore diff --git a/mindspore/ccsrc/session/ascend_control_parser.h b/mindspore/ccsrc/session/ascend_control_parser.h index bb1aee76af..73d68449b3 100644 --- a/mindspore/ccsrc/session/ascend_control_parser.h +++ b/mindspore/ccsrc/session/ascend_control_parser.h @@ -26,7 +26,6 @@ namespace mindspore { namespace session { - class AscendControlParser { public: static void ChildGraphDataAssign(const std::map &graph_id_map); @@ -53,15 +52,10 @@ class AscendControlParser { const CNodePtr &last_label); static std::tuple ParsePartial(NotNull node); - static void LinkArgsToParam(NotNull to_graph, NotNull target_graph, - NotNull arg, NotNull param); - + static void InsertMultipleAssignToGraph(NotNull kg, NotNull from, NotNull to); static void InsertAssignToGraph(NotNull kg, NotNull from, NotNull to); - static CNodePtr GetNextRealKernel(const std::vector &list, size_t start); - // root graph order - static std::vector GetLabelSwitchList(const CNodePtr &node); static bool CheckLabelIndex(uint32_t order_index, uint32_t label_index, const CNodePtr &cnode, NotNull graph); static std::vector RecurseGraph(NotNull graph, diff --git a/mindspore/ccsrc/session/ascend_inference_session.cc b/mindspore/ccsrc/session/ascend_inference_session.cc new file mode 100644 index 0000000000..ff53874502 --- /dev/null +++ b/mindspore/ccsrc/session/ascend_inference_session.cc @@ -0,0 +1,90 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "session/ascend_inference_session.h" +#include "operator/ops.h" +#include "ir/tensor.h" +#include "ir/anf.h" +#include "ir/param_value_py.h" +#include "device/kernel_runtime.h" +#include "session/anf_runtime_algorithm.h" +#include "common/utils.h" +#include "common/trans.h" +#include "kernel/tbe/tbe_python_funcs.h" +#include "utils/config_manager.h" +#include "utils/base_ref_extends.h" + +namespace mindspore { +namespace session { +void AscendInferenceSession::LoadInputData(const std::shared_ptr &kernel_graph, + const std::vector &inputs_const) const { + MS_EXCEPTION_IF_NULL(kernel_graph); + std::vector inputs(inputs_const); + auto input_nodes = kernel_graph->inputs(); + + auto ms_context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(ms_context); + size_t no_weight_input = 0; + for (size_t i = 0; i < input_nodes.size(); ++i) { + tensor::TensorPtr tensor = nullptr; + if (!input_nodes[i]->isa()) { + MS_LOG(ERROR) << "Kernel graph inputs have anfnode which is not Parameter"; + continue; + } + auto pk_node = input_nodes[i]->cast(); + MS_EXCEPTION_IF_NULL(pk_node); + if (AnfAlgo::IsParameterWeight(pk_node)) { + auto param_value = std::dynamic_pointer_cast(pk_node->default_param()); + MS_EXCEPTION_IF_NULL(param_value); + auto py_param = param_value->value(); + MS_EXCEPTION_IF_NULL(py_param); + py::array py_array = py_param.cast(); + tensor = std::make_shared(py_array); + } else { + tensor = inputs[no_weight_input++]; + } + MS_EXCEPTION_IF_NULL(tensor); + if (AnfAlgo::OutputAddrExist(pk_node, 0)) { + auto device_address = AnfAlgo::GetMutableOutputAddr(pk_node, 0); + bool need_sync = false; + if (ms_context->enable_pynative_infer()) { + if (tensor->device_address().get() == nullptr || tensor->device_address() != device_address) { + need_sync = true; + } + } else { + if (tensor->is_dirty()) { + need_sync = true; + } else if (tensor->device_address() != device_address) { + (void)tensor->data_sync(); + need_sync = true; + } + } + if (need_sync) { + if (ms_context->execution_mode() == kPynativeMode || AnfAlgo::IsParameterWeight(pk_node)) { + tensor->set_device_address(device_address); + } + MS_EXCEPTION_IF_NULL(device_address); + if (!device_address->SyncHostToDevice(trans::GetRuntimePaddingShape(pk_node, 0), + LongToSize(tensor->data().nbytes()), tensor->data_type(), + tensor->data_c(false))) { + MS_LOG(EXCEPTION) << "SyncHostToDevice failed."; + } + } + } + tensor->set_dirty(false); + } +} +} // namespace session +} // namespace mindspore diff --git a/mindspore/ccsrc/session/ascend_inference_session.h b/mindspore/ccsrc/session/ascend_inference_session.h new file mode 100644 index 0000000000..53be881f93 --- /dev/null +++ b/mindspore/ccsrc/session/ascend_inference_session.h @@ -0,0 +1,45 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_SESSION_ASCEND_INFERENCE_SESSION_H +#define MINDSPORE_CCSRC_SESSION_ASCEND_INFERENCE_SESSION_H +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "session/ascend_session.h" +#include "session/kernel_graph.h" +#include "kernel/kernel.h" +#include "session/session_factory.h" +#include "session/ascend_control_parser.h" + +namespace mindspore { +namespace session { +class AscendInferenceSession : public AscendSession { + public: + AscendInferenceSession() = default; + ~AscendInferenceSession() = default; + void LoadInputData(const std::shared_ptr &kernel_graph, + const std::vector &inputs_const) const; +}; +MS_REG_SESSION(kDavinciInferenceDevice, AscendInferenceSession); +} // namespace session +} // namespace mindspore +#endif // MINDSPORE_CCSRC_SESSION_ASCEND_INFERENCE_SESSION_H diff --git a/mindspore/ccsrc/session/ascend_session.cc b/mindspore/ccsrc/session/ascend_session.cc index f1b15b27ab..bae10ed943 100644 --- a/mindspore/ccsrc/session/ascend_session.cc +++ b/mindspore/ccsrc/session/ascend_session.cc @@ -29,6 +29,7 @@ #include "device/ascend/ascend_kernel_runtime.h" #include "device/ascend/ascend_device_address.h" #include "pre_activate/ascend/ascend_backend_optimization.h" +#include "pre_activate/common/common_backend_optimization.h" #include "device/kernel_adjust.h" #include "device/ascend/ascend_stream_assign.h" #include "device/ascend/ascend_label_assign.h" @@ -37,6 +38,7 @@ #include "ir/scalar.h" #include "debug/anf_ir_dump.h" #include "debug/anf_ir_utils.h" +#include "debug/draw.h" #include "common/utils.h" #include "pre_activate/common/helper.h" #include "device/kernel_runtime_manager.h" @@ -48,7 +50,7 @@ namespace mindspore { namespace session { const size_t kInvalidIndex = SIZE_MAX; namespace { -void DumpGraphExeOrder(const std::vector &execution_order) { +void DumpGraphExeOrder(const std::vector &execution_order, const std::string &tag = "") { MS_LOG(INFO) << "Dump execution_order size " << execution_order.size(); MS_LOG(INFO) << "[index][stream_label][graph_id][node string]"; int i = 0; @@ -60,6 +62,24 @@ void DumpGraphExeOrder(const std::vector &execution_order) { << "[" << cnode->DebugString() << "]"; i++; } + + std::stringstream buf; + buf << "================== execution order ==================\n"; + if (!tag.empty()) { + buf << tag << "\n"; + } + buf << "execution_order size: " << execution_order.size() << "\n"; + i = 0; + for (auto &cnode : execution_order) { + MS_EXCEPTION_IF_NULL(cnode); + buf << i << ":\n"; + buf << "\t" << cnode->DebugString() << "\n"; + buf << "\t" << AnfAlgo::GetStreamDistinctionLabel(cnode.get()) << "\n"; + buf << "\t" << AnfAlgo::GetGraphId(cnode.get()) << "\n"; + i++; + } + buf << "================== execution order ==================\n"; + // std::cout << buf.str() << std::endl; } void DumpGraphInputArgs(const VectorRef &args) { @@ -104,6 +124,7 @@ std::vector GetRealArgs(const KernelGraphPtr graph, const VectorRef &ar if (abstract->isa() && !AnfAlgo::CheckPrimitiveType(anf_node, prim::kPrimTupleGetItem)) { auto tuple_abstract = abstract->cast(); + MS_EXCEPTION_IF_NULL(tuple_abstract); real_args_size += tuple_abstract->size(); continue; } @@ -131,34 +152,6 @@ std::vector GetRealArgs(const KernelGraphPtr graph, const VectorRef &ar return real_args; } -void ClearRunOpMemoryResource(const KernelGraphPtr &kernel_graph) { - MS_EXCEPTION_IF_NULL(kernel_graph); - // clear input parameter memory resource - for (const auto &input_node : kernel_graph->inputs()) { - MS_EXCEPTION_IF_NULL(input_node); - AnfAlgo::SetOutputAddr(nullptr, 0, input_node.get()); - } - // clear input value node memory resource - for (const auto &value_node : kernel_graph->graph_value_nodes()) { - MS_EXCEPTION_IF_NULL(value_node); - AnfAlgo::SetOutputAddr(nullptr, 0, value_node.get()); - } - for (const auto &cnode : kernel_graph->execution_order()) { - MS_EXCEPTION_IF_NULL(cnode); - // clear output memory resource - for (size_t index = 0; index < AnfAlgo::GetOutputTensorNum(cnode); ++index) { - AnfAlgo::SetOutputAddr(nullptr, index, cnode.get()); - } - // clear workspace memory resource - auto kernel_mod = AnfAlgo::GetKernelMod(cnode); - MS_EXCEPTION_IF_NULL(kernel_mod); - auto workspace_lists = kernel_mod->GetWorkspaceSizeList(); - for (size_t index = 0; index < workspace_lists.size(); ++index) { - AnfAlgo::SetWorkspaceAddr(nullptr, index, cnode.get()); - } - } -} - std::vector GetCNodes(const std::vector &anf_nodes) { std::vector cnodes = {}; size_t i = 0; @@ -206,39 +199,32 @@ static std::vector> GetChildList(const std::vector ¶meters, const std::vector &args, + KernelGraph *child_graph) { + MS_EXCEPTION_IF_NULL(child_graph); + MS_LOG(INFO) << "Start bind parameter of child graph:" << child_graph->graph_id(); + if (args.empty()) { + return; + } + if (parameters.size() != args.size()) { + MS_LOG(EXCEPTION) << "Graph:" << child_graph->graph_id() << " parameters size:" << parameters.size() + << " and args size:" << args.size() << " not equal!"; + } + child_graph->SetExecOrderByDefault(); + for (size_t i = 0; i < parameters.size(); i++) { + if (args[i] == parameters[i]) { + child_graph->SetRealInput(parameters[i], args[i]); + MS_LOG(INFO) << "Parameter and arg are same."; + continue; + } + child_graph->SetRealInput(parameters[i], args[i]); + } +} + // if a call has kernel input, it's a child graph split from ME, so these kernel input should be set into real input of // graph.For example, call input = (prim,graph,kernel1,kernel2),then real_input = [kernel1,kernel2] static void UpdateRealInput(NotNull graph) { auto call_nodes = graph->FindNodeByPrimitive(prim::kPrimCall); - auto bind_call_arg_with_parameter = [&](const std::vector ¶meters, - const std::vector &args, KernelGraph *child_graph) -> void { - MS_EXCEPTION_IF_NULL(child_graph); - MS_LOG(INFO) << "start bind parameter of child graph:" << child_graph->graph_id(); - if (args.empty()) { - return; - } - if (parameters.size() != args.size()) { - MS_LOG(EXCEPTION) << "graph:" << child_graph->graph_id() << " parameters size:" << parameters.size() - << " and args size:" << args.size() << " not equal!"; - } - child_graph->SetExecOrderByDefault(); - for (size_t i = 0; i < parameters.size(); i++) { - if (args[i] == parameters[i]) { - child_graph->SetRealInput(parameters[i], args[i]); - MS_LOG(INFO) << "Parameter and arg are same"; - continue; - } - // if arg is a parameter ,then reuse this parameter - if (args[i]->isa()) { - MS_LOG(INFO) << "Parameter:" << parameters[i]->DebugString() << " of graph:" << child_graph->graph_id() - << " reuse parameter:" << args[i]->DebugString() - << " of graph:" << AnfAlgo::GetGraphId(args[i].get()); - child_graph->ReplaceNode(parameters[i], args[i]); - continue; - } - child_graph->SetRealInput(parameters[i], args[i]); - } - }; for (auto &call_node : call_nodes) { MS_EXCEPTION_IF_NULL(call_node); auto child_graphs = AnfAlgo::GetCallNodeKernelGraph(call_node); @@ -247,7 +233,7 @@ static void UpdateRealInput(NotNull graph) { std::vector real_args = std::vector(call_node->inputs().begin() + 2, call_node->inputs().end()); std::vector child_inputs = child_graphs[0]->inputs(); - bind_call_arg_with_parameter(child_inputs, real_args, child_graphs[0].get()); + BindCallArgsWithParameter(child_inputs, real_args, child_graphs[0].get()); call_node->set_inputs(std::vector(call_node->inputs().begin(), call_node->inputs().begin() + 2)); } else if (child_graphs.size() == 2) { auto get_partial_args = [&](size_t input_index) -> std::vector { @@ -264,8 +250,8 @@ static void UpdateRealInput(NotNull graph) { std::vector(partial_cnode->inputs().begin(), partial_cnode->inputs().begin() + 2)); return ret; }; - bind_call_arg_with_parameter(child_graphs[0]->inputs(), get_partial_args(2), child_graphs[0].get()); - bind_call_arg_with_parameter(child_graphs[1]->inputs(), get_partial_args(3), child_graphs[1].get()); + BindCallArgsWithParameter(child_graphs[0]->inputs(), get_partial_args(2), child_graphs[0].get()); + BindCallArgsWithParameter(child_graphs[1]->inputs(), get_partial_args(3), child_graphs[1].get()); } } } @@ -273,7 +259,7 @@ static void UpdateRealInput(NotNull graph) { static void RecurseToUpdateCallRealInput(NotNull graph, const NotNull *> memo) { memo->insert(graph.get()); - MS_LOG(INFO) << "start graph id:" << graph->graph_id(); + MS_LOG(INFO) << "Start graph id:" << graph->graph_id(); for (auto &child_graph : graph->child_graph_order()) { if (memo->find(child_graph) != memo->end()) { MS_LOG(INFO) << "Child graph:" << child_graph->graph_id() @@ -298,37 +284,57 @@ GraphId AscendSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrL GraphId AscendSession::CompileGraph(NotNull func_graph) { MS_LOG(INFO) << "start"; - auto graph = ConstructKernelGraph(func_graph); + std::vector all_graphs; + auto root_graph = ConstructKernelGraph(func_graph, &all_graphs); + BackendOptimization(all_graphs); // split switch - SplitGraphs(NOT_NULL(graph)); + SplitGraphs(NOT_NULL(root_graph)); // insert goto labels and label_sets - LinkChildGraphs(NOT_NULL(graph)); + LinkChildGraphs(NOT_NULL(root_graph)); // resource initialize InitRuntimeResource(); // assign label - AssignLabel(NOT_NULL(graph)); - // recurse compile child graph + AssignLabel(NOT_NULL(root_graph)); + // recurse compile child root_graph std::set memo; - RecurseCompileGraph(NOT_NULL(graph), NOT_NULL(&memo)); - // root graph valiate,include genearte execute order and so on - RootGraphExecutorValidate(NOT_NULL(graph)); + RecurseCompileGraph(NOT_NULL(root_graph), NOT_NULL(&memo)); + // root root_graph valiate,include genearte execute order and so on + RootGraphExecutorValidate(NOT_NULL(root_graph)); // adjust kernel - AdjustKernel(graph); + AdjustKernel(root_graph); // assign stream - AssignStream(graph); + AssignStream(root_graph); + // insert profiling point + device::KernelAdjust::GetInstance().Profiling(NOT_NULL(root_graph.get())); // build kernel - BuildKernel(graph); + BuildKernel(root_graph); // alloc mem - MemoryAlloc(graph.get()); + MemoryAlloc(root_graph.get()); // task generate - GenerateTaskInfo(graph); + GenerateTaskInfo(root_graph); // load task into device - LoadTask(graph); - // return the graph id to backend - auto graph_id = graph->graph_id(); + LoadTask(root_graph); + // return the root_graph id to backend + auto graph_id = root_graph->graph_id(); return graph_id; } +void AscendSession::SetFinalGraphSummaryFlag(const std::shared_ptr &kernel_graph) { + MS_EXCEPTION_IF_NULL(kernel_graph); + auto graph_order = GetGraphOrder(kernel_graph->graph_id()); + for (auto graph_id : graph_order) { + auto child_graph = GetGraph(graph_id); + if (child_graph == nullptr) { + continue; + } + if (child_graph->summary_node_exist()) { + kernel_graph->set_summary_node_exist(true); + return; + } + } + kernel_graph->set_summary_node_exist(false); +} + void AscendSession::BuildGraph(GraphId graph_id) { MS_LOG(INFO) << "start"; auto graph = GetGraph(graph_id); @@ -344,6 +350,7 @@ void AscendSession::BuildGraph(GraphId graph_id) { InsertAllAssigns(); // insert switch and active to child graph MergeSwitchCompile(); + SetFinalGraphSummaryFlag(graph); // OptChildGraphs auto graph_order = GetGraphOrder(final_graph_id_); auto &graph_type = GetGraphOrderType(final_graph_id_); @@ -355,6 +362,7 @@ void AscendSession::BuildGraph(GraphId graph_id) { auto child_graph = GetGraph(graph_order[i]); CompileChildGraph(child_graph); } + GetSummaryNodes(graph.get()); // merge child graph MergeGraphExecOrder(); } else { @@ -394,8 +402,28 @@ void AscendSession::CompileChildGraph(const KernelGraphPtr &child_graph) { MS_EXCEPTION_IF_NULL(child_graph); MS_LOG(INFO) << "CompileChildGraph " << child_graph->ToString(); opt::AscendBackendIRFusionOptimization(child_graph); + opt::AscendBackendFuseBasicOpt(child_graph, true); + opt::AscendBackendGraphKernelOpt(child_graph, true); + child_graph->SetExecOrderByDefault(); + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + bool save_graphs = context_ptr->save_graphs_flag(); + auto save_graphs_path = context_ptr->save_graphs_path(); + if (save_graphs_path.empty()) { + save_graphs_path = "."; + } + if (save_graphs) { + std::string file_path = + save_graphs_path + "/" + "select_kernel_before" + "_graph_" + std::to_string(child_graph->graph_id()) + ".ir"; + DumpIR(file_path, child_graph); + } // select kernel build info SelectKernel(*child_graph); + if (save_graphs) { + std::string file_path = + save_graphs_path + "/" + "select_kernel_after" + "_graph_" + std::to_string(child_graph->graph_id()) + ".ir"; + DumpIR(file_path, child_graph); + } // convert kernel Graph to model predictmodel::StepConvertGraph(child_graph); // optimize graph @@ -411,7 +439,6 @@ void AscendSession::RunGraph(const GraphId &graph_id, const std::vectorexecutable()) { @@ -512,7 +539,7 @@ py::tuple AscendSession::RunOp(const OpRunInfo &op_run_info, const GraphInfo &gr } py::object tuple_obj = utils::cast(output_tensors).object_; py::tuple tuple_tensors = py::cast(tuple_obj); - ClearRunOpMemoryResource(graph); + RunOpMemoryClear(graph.get()); MS_LOG(INFO) << "Run op " << op_run_info.op_name << " finish!"; return tuple_tensors; } @@ -531,13 +558,17 @@ void AscendSession::SelectKernel(const KernelGraph &kernel_graph) const { } MS_LOG(INFO) << "Select ApplyKernel: " << cnode->DebugString(); } - if (raise_precision_count > 0) { - MS_LOG(WARNING) << "There has " << raise_precision_count - << " node/nodes used raise precision to selected the kernel!"; - } - if (reduce_precision_count > 0) { - MS_LOG(WARNING) << "There has " << reduce_precision_count - << " node/nodes used reduce precision to selected the kernel!"; + auto ms_context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(ms_context); + if (ms_context->execution_mode() == kGraphMode) { + if (raise_precision_count > 0) { + MS_LOG(WARNING) << "There has " << raise_precision_count + << " node/nodes used raise precision to selected the kernel!"; + } + if (reduce_precision_count > 0) { + MS_LOG(WARNING) << "There has " << reduce_precision_count + << " node/nodes used reduce precision to selected the kernel!"; + } } MS_LOG(INFO) << "Finish!"; } @@ -553,8 +584,12 @@ void AscendSession::InitRuntimeResource() { } void AscendSession::HardwareOptimize(const std::shared_ptr &kernel_graph) const { + device::ascend::KernelPreBuild(kernel_graph.get()); MS_LOG(INFO) << "HardwareOptimize start!"; opt::AscendBackendOptimization(kernel_graph); + opt::AscendGraphKernelCommonProcess(kernel_graph); + opt::AscendBackendFuseBasicOpt(kernel_graph, false); + opt::AscendBackendAddAtomicClean(kernel_graph); MS_EXCEPTION_IF_NULL(kernel_graph); kernel_graph->SetExecOrderByDefault(); MS_LOG(INFO) << "HardwareOptimize Finish!"; @@ -562,7 +597,6 @@ void AscendSession::HardwareOptimize(const std::shared_ptr &kernel_ void AscendSession::AdjustKernel(const std::shared_ptr &kernel_graph) const { MS_LOG(INFO) << "Start!"; - device::KernelAdjust::GetInstance().Reorder(kernel_graph); opt::HideNopNode(kernel_graph.get()); // Insert CLearZero op // prepare for next step from json get atomic info @@ -595,7 +629,7 @@ void AscendSession::RunOpAdjustKernel(const std::shared_ptr &kernel void AscendSession::AssignStream(const std::shared_ptr &kernel_graph) const { MS_LOG(INFO) << "Start!"; - device::ascend::AscendStreamAssign::GetInstance().AssignStreamNew(kernel_graph); + device::ascend::AscendStreamAssign::GetInstance().AssignStream(kernel_graph); MS_LOG(INFO) << "Finish!"; } @@ -642,6 +676,13 @@ void AscendSession::RunOpMemoryAlloc(const std::vector &input MS_LOG(INFO) << "Finish!"; } +void AscendSession::RunOpMemoryClear(KernelGraph *kernel_graph) const { + MS_EXCEPTION_IF_NULL(kernel_graph); + auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); + MS_EXCEPTION_IF_NULL(runtime_instance); + runtime_instance->RunOpClearMemory(kernel_graph); +} + void AscendSession::GenerateTaskInfo(const std::shared_ptr &kernel_graph) const { MS_LOG(INFO) << "Start!"; (void)device::KernelAdjust::GetInstance().StepLoadCtrlInputs(kernel_graph); @@ -698,14 +739,15 @@ void AscendSession::ExportChildGraphs(const GraphId graph_id) { save_graphs_path = "."; } if (graph_id == final_graph_id_) { - auto &graph_order = GetGraphOrder(final_graph_id_); - auto &graph_type = GetGraphOrderType(final_graph_id_); + const auto &graph_order = GetGraphOrder(final_graph_id_); + const auto &graph_type = GetGraphOrderType(final_graph_id_); for (size_t i = 0; i < graph_order.size(); i++) { if (graph_type[i] == BRANCH_END || graph_type[i] == BRANCH_START) { continue; } - auto child_graph = GetGraph(graph_order[i]); + const auto child_graph = GetGraph(graph_order[i]); MS_LOG(DEBUG) << "Start export child graph " << graph_order[i]; + MS_EXCEPTION_IF_NULL(child_graph); std::string file_path = save_graphs_path + "/graph_build_" + std::to_string(child_graph->graph_id()) + ".ir"; DumpIR(file_path, child_graph, true); DumpIRProto(child_graph, "vm_build_" + std::to_string(child_graph->graph_id())); @@ -755,29 +797,47 @@ GraphId AscendSession::SetFinalGraphInput(const std::vector &args) { return final_graph_id_; } -void AscendSession::GetSummaryNodes(const KernelGraph *graph, - std::unordered_map> *summary) { - MS_LOG(DEBUG) << "Update summary Start"; +void AscendSession::RecurseGetSummaryNodes(KernelGraph *graph, + std::map> *summary) { MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(summary); - summary->clear(); // if final graph have no child graph auto graph_order_iter = graph_execute_orders_.find(graph->graph_id()); if (graph_order_iter == graph_execute_orders_.end()) { - SessionBasic::GetSummaryNodes(graph, summary); + SessionBasic::GetSummaryNodes(graph); + auto summary_nodes = graph->summary_nodes(); + (*summary).insert(summary_nodes.begin(), summary_nodes.end()); return; } // for every child graph, find summary nodes auto graph_order = GetGraphOrder(graph->graph_id()); for (size_t i = 0; i < graph_order.size(); i++) { auto child_graph = GetGraph(graph_order[i]); - SessionBasic::GetSummaryNodes(child_graph.get(), summary); + if (child_graph == nullptr) { + continue; + } + SessionBasic::GetSummaryNodes(child_graph.get()); + auto child_graph_summary = child_graph->summary_nodes(); + (*summary).insert(child_graph_summary.begin(), child_graph_summary.end()); + RecurseGetSummaryNodes(child_graph.get(), summary); } - MS_LOG(DEBUG) << "Update summary end size: " << (*summary).size(); + graph->set_summary_nodes(*summary); +} + +void AscendSession::GetSummaryNodes(KernelGraph *graph) { + MS_LOG(DEBUG) << "Update summary Start"; + MS_EXCEPTION_IF_NULL(graph); + auto summary_nodes = graph->summary_nodes(); + std::map> summary; + summary.insert(summary_nodes.begin(), summary_nodes.end()); + RecurseGetSummaryNodes(graph, &summary); + graph->set_summary_nodes(summary); + MS_LOG(DEBUG) << "Update summary end size: " << summary.size(); } AnfNodePtr AscendSession::CreateFakeOutput(GraphId fake_graph_id, const AnfNodePtr &true_output) { auto fake_graph = GetGraph(fake_graph_id); + MS_EXCEPTION_IF_NULL(fake_graph); auto output_item_with_index = AnfAlgo::VisitKernelWithReturnType(true_output, 0); auto create_parameter = [&](const AbstractBasePtr &abstract) -> AnfNodePtr { auto parameter = fake_graph->NewParameter(); @@ -798,7 +858,7 @@ AnfNodePtr AscendSession::CreateFakeOutput(GraphId fake_graph_id, const AnfNodeP if (abstract->isa()) { auto tuple_abstract = abstract->cast(); MS_EXCEPTION_IF_NULL(tuple_abstract); - MS_LOG(INFO) << "tuple_size [" << tuple_abstract->size() << "]"; + MS_LOG(INFO) << "Tuple size [" << tuple_abstract->size() << "]"; return create_parameter((*tuple_abstract)[output_idx]); } return create_parameter(cnode->abstract()); @@ -990,6 +1050,7 @@ void AscendSession::SwitchCompile(GraphId cond_graph_id, GraphId true_graph_id, if (false_graph_id != kInvalidGraphId) { // false graph and condition in graph same stream auto condition_graph = GetGraph(cond_graph_id); + MS_EXCEPTION_IF_NULL(condition_graph); SetStreamDistinctionLabel(GetGraph(false_graph_id), condition_graph->stream_distinction_label(), true); // if false graph is a condition graph and has been switch compiled before,it's false should be updated again auto cond_it = switches_.find(false_graph_id); @@ -997,6 +1058,9 @@ void AscendSession::SwitchCompile(GraphId cond_graph_id, GraphId true_graph_id, cond_graph_id = cond_it->first; false_graph_id = cond_it->second.second; condition_graph = GetGraph(cond_graph_id); + if (condition_graph == nullptr) { + continue; + } SetStreamDistinctionLabel(GetGraph(false_graph_id), condition_graph->stream_distinction_label(), true); cond_it = switches_.find(false_graph_id); } @@ -1133,7 +1197,7 @@ void AscendSession::SetChildGraphParameter(const AnfNodePtr &front_anf, GraphId MS_EXCEPTION_IF_NULL(backend_arg); MS_LOG(INFO) << "Reuse node [" << backend_arg->DebugString() << "], old node[" << backend_parameter->DebugString() << "] will be replaced."; - to_graph->ReplaceNode(backend_parameter, backend_arg); + to_graph->ReplaceNode(NOT_NULL(backend_parameter), NOT_NULL(backend_arg)); return; } MS_LOG(INFO) << "Assign of node" << backend_arg->DebugString() << " of graph " << from_graph_id << " to node" @@ -1429,22 +1493,44 @@ void AscendSession::SyncInitialTenosrToDevice() { } } -std::vector AscendSession::ConstructSplitedGraph(const KernelGraphPtr &new_kernel_graph, - const std::vector &list) { - MS_EXCEPTION_IF_NULL(new_kernel_graph); - MS_LOG(INFO) << "start contruct splited kernel graph:" << new_kernel_graph->graph_id(); +static void ConstructSplitedGraphOutput(const KernelGraphPtr &new_kernel_graph, const std::vector &list) { // count the output of every anf node std::set has_output_nodes; for (auto &anf_node : list) { + MS_EXCEPTION_IF_NULL(anf_node); for (auto &input : anf_node->inputs()) { (void)has_output_nodes.insert(input); } } + + auto make_tuple_primitve = NewValueNode(std::make_shared(prim::kPrimMakeTuple->name())); + std::vector make_tuple_inputs = {make_tuple_primitve}; + int output_idx = 0; + MS_EXCEPTION_IF_NULL(new_kernel_graph); + for (auto &anf_node : list) { + if (AnfAlgo::CheckPrimitiveType(anf_node, prim::kPrimReturn)) { + new_kernel_graph->set_return(anf_node); + } + if (has_output_nodes.find(anf_node) == has_output_nodes.end()) { + MS_LOG(INFO) << "Output[" << output_idx++ << "]:" << anf_node->DebugString(); + make_tuple_inputs.push_back(anf_node); + } + } + if (new_kernel_graph->get_return() == nullptr) { + new_kernel_graph->set_output(new_kernel_graph->NewCNode(make_tuple_inputs)); + } +} + +std::vector AscendSession::ConstructSplitedGraph(const KernelGraphPtr &new_kernel_graph, + const std::vector &list) { + MS_EXCEPTION_IF_NULL(new_kernel_graph); + MS_LOG(INFO) << "start contruct splited kernel graph:" << new_kernel_graph->graph_id(); MS_LOG(INFO) << "Construct input of kernel graph:" << new_kernel_graph->graph_id(); std::vector call_node_inputs; std::vector new_graph_inputs; // create new parameter from cnode for (auto &anf_node : list) { + MS_EXCEPTION_IF_NULL(anf_node); auto cnode = anf_node->cast(); for (size_t input_idx = 1; input_idx < cnode->inputs().size(); input_idx++) { auto input = cnode->inputs()[input_idx]; @@ -1479,26 +1565,21 @@ std::vector AscendSession::ConstructSplitedGraph(const KernelGraphPt MS_EXCEPTION_IF_NULL(graph_inputs); graph_inputs->clear(); std::copy(new_graph_inputs.begin(), new_graph_inputs.end(), std::back_inserter(*graph_inputs)); + MS_LOG(INFO) << "Construct output of kernel graph:" << new_kernel_graph->graph_id(); - auto make_tuple_primitve = NewValueNode(std::make_shared(prim::kPrimMakeTuple->name())); - std::vector make_tuple_inputs = {make_tuple_primitve}; - int output_idx = 0; - for (auto &anf_node : list) { - if (AnfAlgo::CheckPrimitiveType(anf_node, prim::kPrimReturn)) { - new_kernel_graph->set_return(anf_node); - } - if (has_output_nodes.find(anf_node) == has_output_nodes.end()) { - MS_LOG(INFO) << "output[" << output_idx++ << "]:" << anf_node->DebugString(); - make_tuple_inputs.push_back(anf_node); - } - } - if (new_kernel_graph->get_return() == nullptr) { - new_kernel_graph->set_output(new_kernel_graph->NewCNode(make_tuple_inputs)); - } + ConstructSplitedGraphOutput(new_kernel_graph, list); MS_LOG(INFO) << "end"; return call_node_inputs; } +void AscendSession::BackendOptimization(const std::vector &all_graphs) { + MS_LOG(INFO) << "Start BackendCommonOptimization"; + for (auto &graph : all_graphs) { + opt::BackendCommonOptimization(graph); + } + MS_LOG(INFO) << "End."; +} + void AscendSession::SplitGraphs(NotNull root_graph) { std::set memo; // if root graph output is a call node ,the root graph is condition graph of 'if' sentence @@ -1512,43 +1593,50 @@ void AscendSession::SplitGraphs(NotNull root_graph) { RecurseSplitGraph(root_graph, NOT_NULL(&memo)); } memo.clear(); + // add maketuple to the end of the last child graph to suit old process + auto output_graph = root_graph->child_graph_order().empty() ? root_graph : root_graph->child_graph_order().back(); + auto make_tuple = output_graph->NewCNode( + {NewValueNode(std::make_shared(prim::kPrimMakeTuple->name())), output_graph->output()}); + output_graph->set_output(make_tuple); // replace the real input if the real input is a call RecurseToUpdateCallRealInput(root_graph, NOT_NULL(&memo)); } +AnfNodePtr AscendSession::BindNewCallToNewGraph(NotNull graph, + const std::vector &child_graph_list) { + // if child graph list only has a call ,then return the exist call + if (child_graph_list.size() == 1 && AnfAlgo::CheckPrimitiveType(child_graph_list[0], prim::kPrimCall)) { + return child_graph_list[0]; + } + // create new child graph + auto child_graph = NewKernelGraph(); + MS_EXCEPTION_IF_NULL(child_graph); + // create new value node to bind child graph + auto graph_value_node = graph->NewValueNode(NewValueNode(child_graph)); + std::vector new_call_input = {NewValueNode(std::make_shared(prim::kPrimCall->name())), + graph_value_node}; + // set the graph id of all node of child graph + for (auto &child_graph_node : child_graph_list) { + AnfAlgo::SetGraphId(child_graph->graph_id(), child_graph_node.get()); + } + auto call_node_args = ConstructSplitedGraph(child_graph, child_graph_list); + std::copy(call_node_args.begin(), call_node_args.end(), std::back_inserter(new_call_input)); + auto new_call = graph->NewCNode(new_call_input); + AnfAlgo::SetNodeAttr("graph_id", MakeValue(graph->graph_id()), new_call); + return new_call; +} + void AscendSession::SplitGraph(NotNull graph, const std::set &cut_prims) { - MS_LOG(INFO) << "start,graph_id:" << graph->graph_id(); + MS_LOG(INFO) << "Start,graph_id:" << graph->graph_id(); auto apply_list = GetCNodes(TopoSort(graph->get_return())); // update the root graph child graph order AscendControlParser::UpdateChildGraphOrder(graph); // get child list from current graph std::vector> child_graph_lists = GetChildList(apply_list, cut_prims); - auto bind_new_call_to_new_graph = [&](std::vector child_graph_list) -> AnfNodePtr { - // if child graph list only has a call ,then return the exist call - if (child_graph_list.size() == 1 && AnfAlgo::CheckPrimitiveType(child_graph_list[0], prim::kPrimCall)) { - return child_graph_list[0]; - } - // create new child graph - auto child_graph = NewKernelGraph(); - MS_EXCEPTION_IF_NULL(child_graph); - // create new value node to bind child graph - auto graph_value_node = graph->NewValueNode(NewValueNode(child_graph)); - std::vector new_call_input = {NewValueNode(std::make_shared(prim::kPrimCall->name())), - graph_value_node}; - // set the graph id of all node of child graph - for (auto &child_graph_node : child_graph_list) { - AnfAlgo::SetGraphId(child_graph->graph_id(), child_graph_node.get()); - } - auto call_node_args = ConstructSplitedGraph(child_graph, child_graph_list); - std::copy(call_node_args.begin(), call_node_args.end(), std::back_inserter(new_call_input)); - auto new_call = graph->NewCNode(new_call_input); - AnfAlgo::SetNodeAttr("graph id", MakeValue(graph->graph_id()), new_call); - return new_call; - }; if (child_graph_lists.size() > 1) { std::list depend_input = {}; for (size_t call_index = 0; call_index < child_graph_lists.size(); call_index++) { - auto call_node = bind_new_call_to_new_graph(child_graph_lists[call_index]); + auto call_node = BindNewCallToNewGraph(graph, child_graph_lists[call_index]); MS_EXCEPTION_IF_NULL(call_node); // if call node is the last call of true graph,no need create child graph after that auto child_graphs = AnfAlgo::GetCallNodeKernelGraph(call_node->cast()); @@ -1605,6 +1693,5 @@ void AscendSession::RecurseCompileGraph(NotNull graph, const Not RecurseCompileGraph(NOT_NULL(child_graph), memo); } } - } // namespace session } // namespace mindspore diff --git a/mindspore/ccsrc/session/ascend_session.h b/mindspore/ccsrc/session/ascend_session.h index 529304714c..7857330115 100755 --- a/mindspore/ccsrc/session/ascend_session.h +++ b/mindspore/ccsrc/session/ascend_session.h @@ -67,8 +67,8 @@ class AscendSession : public SessionBasic { void SetActive(GraphId, GraphId) override; // compile child graph when session have multiple child graphs void CompileChildGraph(const KernelGraphPtr &child_graph); - void GetSummaryNodes(const KernelGraph *graph, - std::unordered_map> *summary) override; + void RecurseGetSummaryNodes(KernelGraph *graph, std::map> *summary); + void GetSummaryNodes(KernelGraph *graph); private: void InitRuntimeResource(); @@ -81,6 +81,7 @@ class AscendSession : public SessionBasic { void BuildKernel(const std::shared_ptr &kernel_graph) const; void MemoryAlloc(KernelGraph *kernel_graph) const; void RunOpMemoryAlloc(const std::vector &input_tensors, KernelGraph *kernel_graph) const; + void RunOpMemoryClear(KernelGraph *kernel_graph) const; void GenerateTaskInfo(const std::shared_ptr &kernel_graph) const; void LoadTask(const std::shared_ptr &kernel_graph) const; void ExecTask(const std::shared_ptr &kernel_graph) const; @@ -101,12 +102,14 @@ class AscendSession : public SessionBasic { void SplitGraph(NotNull graph, const std::set &cut_prims); // split graphs with recurse from root graph void SplitGraphs(NotNull root_graph); + void BackendOptimization(const std::vector &all_graphs); void LinkChildGraphs(NotNull graph); void RootGraphExecutorValidate(NotNull graph); std::vector ConstructSplitedGraph(const KernelGraphPtr &new_kernel_graph, const std::vector &list); void RecurseCompileGraph(NotNull graph, const NotNull *> memo); void RecurseSplitGraph(NotNull graph, const NotNull *> memo); + AnfNodePtr BindNewCallToNewGraph(NotNull graph, const std::vector &child_graph_list); // merge execution order list of child graphs void MergeGraphExecOrder(); @@ -148,6 +151,7 @@ class AscendSession : public SessionBasic { AnfNodePtr CreateFakeOutput(GraphId final_graph_id, const AnfNodePtr &true_output); // sync intial tensors' data to device void SyncInitialTenosrToDevice(); + void SetFinalGraphSummaryFlag(const std::shared_ptr &kernel_graph); // member variables // key is final_graph_id,value is child graph execute order of final graph diff --git a/mindspore/ccsrc/session/cpu_session.cc b/mindspore/ccsrc/session/cpu_session.cc index 32e3d8b6cc..e70e551022 100644 --- a/mindspore/ccsrc/session/cpu_session.cc +++ b/mindspore/ccsrc/session/cpu_session.cc @@ -28,6 +28,23 @@ namespace mindspore { namespace session { +ParameterPtr CPUSession::CreateNewParameterFromParameter(const AnfNodePtr &anf, bool valid_input, KernelGraph *graph) { + MS_EXCEPTION_IF_NULL(anf); + if (!anf->isa()) { + MS_LOG(EXCEPTION) << "anf[" << anf->DebugString() << "] is not a parameter"; + } + auto valid_inputs = graph->MutableValidInputs(); + MS_EXCEPTION_IF_NULL(valid_inputs); + auto graph_inputs = graph->MutableInputs(); + MS_EXCEPTION_IF_NULL(graph_inputs); + TraceManager::DebugTrace(std::make_shared(anf->debug_info())); + ParameterPtr new_parameter = graph->NewParameter(anf->cast()); + TraceManager::EndTrace(); + graph_inputs->push_back(new_parameter); + valid_inputs->push_back(valid_input); + return new_parameter; +} + GraphId CPUSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList &outputs) { auto graph_id = graph_sum_; auto graph = ConstructKernelGraph(lst, outputs); @@ -46,16 +63,35 @@ void CPUSession::RunGraph(const GraphId &graph_id, const std::vector need_sync_outputs; + runtime_.BindInputOutput(kernel_graph.get(), inputs, outputs, &need_sync_outputs); MS_LOG(INFO) << "Run graph start"; predictmodel::StepConvertWeight(inputs); auto execution_order = kernel_graph->execution_order(); Reorder(&execution_order); + + bool enable_summary = summary_callback_ != nullptr; kernel_graph->set_execution_order(execution_order); + NamedSummaryOutputs summary_outputs; + if (enable_summary) { + GetSummaryNodes(kernel_graph.get()); + summary_outputs = kernel_graph->summary_nodes(); + runtime_.IncreaseSummaryRefCount(summary_outputs); + } + bool ret = runtime_.Run(kernel_graph.get()); if (!ret) { MS_LOG(EXCEPTION) << "Run graph failed"; } + for (auto output : need_sync_outputs) { + (void)output->data_sync(); + } + + if (enable_summary) { + Summary(kernel_graph.get()); + runtime_.DecreaseSummaryRefCount(summary_outputs); + } + MS_LOG(INFO) << "Run graph end"; } diff --git a/mindspore/ccsrc/session/cpu_session.h b/mindspore/ccsrc/session/cpu_session.h index c53b0d2d8c..36b987e840 100644 --- a/mindspore/ccsrc/session/cpu_session.h +++ b/mindspore/ccsrc/session/cpu_session.h @@ -35,6 +35,9 @@ class CPUSession : public SessionBasic { GraphId CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList &outputs) override; void RunGraph(const GraphId &graph_id, const std::vector &inputs, VectorRef *outputs) override; + protected: + ParameterPtr CreateNewParameterFromParameter(const AnfNodePtr &anf, bool valid_input, KernelGraph *graph) override; + private: void SetKernelInfo(const KernelGraph *kernel_graph); void BuildKernel(const KernelGraph *kernel_graph); diff --git a/mindspore/ccsrc/session/gpu_session.cc b/mindspore/ccsrc/session/gpu_session.cc index b843514793..e67a922567 100644 --- a/mindspore/ccsrc/session/gpu_session.cc +++ b/mindspore/ccsrc/session/gpu_session.cc @@ -22,6 +22,7 @@ #include "pre_activate/common/pass_manager.h" #include "pre_activate/common/helper.h" #include "pre_activate/pass/communication_op_fusion.h" +#include "pre_activate/pass/getitem_tuple.h" #include "device/kernel_runtime_manager.h" #include "predict/predict.h" #include "common/utils.h" @@ -51,9 +52,11 @@ void GPUSession::StartKernelRT() const { } void GPUSession::Optimize(const std::shared_ptr &kernel_graph) { + MS_EXCEPTION_IF_NULL(kernel_graph); auto optimizer = std::make_shared(); auto pm = std::make_shared(); pm->AddPass(std::make_shared()); + pm->AddPass(std::make_shared()); optimizer->AddPassManager(pm); (void)optimizer->Optimize(kernel_graph); kernel_graph->SetExecOrderByDefault(); @@ -72,7 +75,6 @@ void GPUSession::AllocateMemory(KernelGraph *kernel_graph) const { MS_EXCEPTION_IF_NULL(kernel_graph); auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); MS_EXCEPTION_IF_NULL(runtime_instance); - // opt::RemoveNopNode(kernel_graph); runtime_instance->AssignMemory(kernel_graph); } @@ -81,10 +83,16 @@ void GPUSession::RunOpAllocateMemory(const std::vector &input MS_EXCEPTION_IF_NULL(kernel_graph); auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); MS_EXCEPTION_IF_NULL(runtime_instance); - // opt::RemoveNopNode(kernel_graph); runtime_instance->RunOpAssignMemory(input_tensors, kernel_graph); } +void GPUSession::RunOpClearMemory(KernelGraph *kernel_graph) const { + MS_EXCEPTION_IF_NULL(kernel_graph); + auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); + MS_EXCEPTION_IF_NULL(runtime_instance); + runtime_instance->RunOpClearMemory(kernel_graph); +} + void GPUSession::LoadInputData(const std::shared_ptr &kernel_graph, const std::vector &inputs_const) const { std::vector inputs(inputs_const); @@ -101,17 +109,19 @@ void GPUSession::LoadInputData(const std::shared_ptr &kernel_graph, if (input_node->isa() && AnfAlgo::OutputAddrExist(input_node, 0)) { auto pk_node = input_node->cast(); auto device_address = AnfAlgo::GetMutableOutputAddr(pk_node, 0); + auto tensor_address = tensor->device_address(); bool need_sync = false; if (ms_context->enable_pynative_infer()) { - if (tensor->device_address().get() == nullptr || tensor->device_address() != device_address) { + if (tensor_address.get() == nullptr || tensor_address != device_address) { need_sync = true; } - } else { - if (tensor->is_dirty()) { + } else if (tensor->is_dirty()) { + need_sync = true; + } else if (tensor_address != device_address) { + if (tensor_address->DeviceType() == device_address->DeviceType()) { + AnfAlgo::SetOutputAddr(tensor_address, 0, pk_node.get()); + } else { need_sync = true; - } else if (tensor->device_address() != device_address) { - AnfAlgo::SetOutputAddr(tensor->device_address(), 0, pk_node.get()); - need_sync = false; } } if (need_sync) { @@ -140,6 +150,7 @@ GraphId GPUSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList // Construct graph, if successfully, graph_sum_ + 1 auto graph_id = graph_sum_; auto graph = ConstructKernelGraph(lst, outputs); + MS_EXCEPTION_IF_NULL(graph); // Select kernel build info SelectKernel(graph); // Convert kernel Graph to model @@ -150,14 +161,18 @@ GraphId GPUSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList Optimize(graph); // Assign CUDA streams AssignStream(graph); - // Remove NoOp from execution graph - // opt::HideNopNode(graph.get()); + // Hide NoOp from execution graph + opt::HideNopNode(graph.get()); // Build kernel if node is cnode BuildKernel(graph); // Set graph execution order before memory alloc, ensure that memory alloc is according to the reorder graph auto execution_order = graph->execution_order(); Reorder(&execution_order); graph->set_execution_order(execution_order); + // Get summary nodes. + GetSummaryNodes(graph.get()); + // Remove NoOp from execution graph + opt::RemoveNopNode(graph.get()); // Alloc memory, including static memory and dynamic memory AllocateMemory(graph.get()); MS_EXCEPTION_IF_NULL(context_); @@ -194,11 +209,17 @@ void GPUSession::RunGraph(const GraphId &graph_id, const std::vector &input_tensors, const std::vector &tensors_mask) { + // Check if the graph cache exists. + if (run_op_graphs_.find(graph_info) != run_op_graphs_.end()) { + return; + } // Prepare the graph auto kernel_graph = ConstructSingleOpGraph(op_run_info, input_tensors, tensors_mask); MS_EXCEPTION_IF_NULL(kernel_graph); SelectKernel(kernel_graph); StartKernelRT(); + // Hide NoOp from execution graph + opt::HideNopNode(kernel_graph.get()); BuildKernel(kernel_graph); run_op_graphs_[graph_info] = kernel_graph; } @@ -207,6 +228,8 @@ py::tuple GPUSession::RunOp(const OpRunInfo &op_run_info, const GraphInfo &graph const std::vector &input_tensors) { auto kernel_graph = run_op_graphs_[graph_info]; MS_EXCEPTION_IF_NULL(kernel_graph); + // Remove NoOp from execution graph + opt::RemoveNopNode(kernel_graph.get()); RunOpAllocateMemory(input_tensors, kernel_graph.get()); // Execute the computation LoadInputData(kernel_graph, input_tensors); @@ -222,7 +245,7 @@ py::tuple GPUSession::RunOp(const OpRunInfo &op_run_info, const GraphInfo &graph } py::object tuple_obj = utils::cast(output_tensors).object_; py::tuple tuple_tensors = py::cast(tuple_obj); - run_op_graphs_.clear(); + RunOpClearMemory(kernel_graph.get()); return tuple_tensors; } } // namespace gpu diff --git a/mindspore/ccsrc/session/gpu_session.h b/mindspore/ccsrc/session/gpu_session.h index b396e4a9ba..0dfb815abe 100644 --- a/mindspore/ccsrc/session/gpu_session.h +++ b/mindspore/ccsrc/session/gpu_session.h @@ -59,6 +59,8 @@ class GPUSession : public SessionBasic { void RunOpAllocateMemory(const std::vector &input_tensors, KernelGraph *kernel_graph) const; + void RunOpClearMemory(KernelGraph *kernel_graph) const; + void LoadInputData(const std::shared_ptr &kernel_graph, const std::vector &inputs_const) const override; diff --git a/mindspore/ccsrc/session/kernel_graph.cc b/mindspore/ccsrc/session/kernel_graph.cc index c1992b7cc0..7e9bb62aab 100644 --- a/mindspore/ccsrc/session/kernel_graph.cc +++ b/mindspore/ccsrc/session/kernel_graph.cc @@ -24,6 +24,7 @@ #include "device/kernel_info.h" #include "kernel/kernel_build_info.h" #include "device/kernel_runtime_manager.h" +#include "kernel/common_utils.h" namespace mindspore { namespace session { @@ -43,12 +44,28 @@ void PushNoVisitedNode(const AnfNodePtr &node, std::queue *que, std::vector GetCallRealOutputs(const AnfNodePtr &call_node) { auto item_with_index = AnfAlgo::VisitKernelWithReturnType(call_node, 0); - MS_EXCEPTION_IF_NULL(item_with_index.first); - if (!AnfAlgo::CheckPrimitiveType(item_with_index.first, prim::kPrimCall)) { - return {item_with_index.first}; + AnfNodePtr node = item_with_index.first; + MS_EXCEPTION_IF_NULL(node); + if (AnfAlgo::CheckPrimitiveType(node, prim::kPrimMakeTuple)) { + auto outputs = AnfAlgo::GetAllOutput(node); + std::set memo; + std::vector new_output; + for (auto &output : outputs) { + if (memo.find(output) != memo.end()) { + continue; + } + memo.insert(output); + new_output.push_back(output); + } + if (new_output.size() == 1 && AnfAlgo::CheckPrimitiveType(new_output[0], prim::kPrimCall)) { + node = new_output[0]; + } + } + if (!AnfAlgo::CheckPrimitiveType(node, prim::kPrimCall)) { + return {node}; } std::vector real_inputs; - auto child_graphs = AnfAlgo::GetCallNodeKernelGraph(item_with_index.first->cast()); + auto child_graphs = AnfAlgo::GetCallNodeKernelGraph(node->cast()); for (const auto &child_graph : child_graphs) { if (child_graph->get_output_null()) { continue; @@ -59,6 +76,31 @@ std::vector GetCallRealOutputs(const AnfNodePtr &call_node) { } return real_inputs; } + +AnfNodePtr MakeValueNode(const AnfNodePtr &node) { + auto value_node = node->cast(); + if (value_node == nullptr) { + return nullptr; + } + + ValueNodePtr new_value_node = std::make_shared(value_node->value()); + new_value_node->set_abstract(value_node->abstract()); + // create kernel_info fo new value node + auto kernel_info = std::make_shared(); + new_value_node->set_kernel_info(kernel_info); + // create kernel_build_info for new value node + auto kernel_build_info_builder = std::make_shared(); + // set the format of value_node to DEFAULT_FORMAT + kernel_build_info_builder->SetOutputsFormat(std::vector{kOpFormat_DEFAULT}); + // set value node initial device data type = infer data type + std::vector types; + for (size_t index = 0; index < AnfAlgo::GetOutputTensorNum(value_node); ++index) { + types.push_back(kTypeUnknown); + } + kernel_build_info_builder->SetOutputsDeviceType(types); + AnfAlgo::SetSelectKernelBuildInfo(kernel_build_info_builder->Build(), new_value_node.get()); + return new_value_node; +} } // namespace std::vector KernelGraph::outputs() const { auto graph_output = output(); @@ -215,7 +257,8 @@ CNodePtr KernelGraph::NewCNode(const std::vector &inputs) { auto cnode = FuncGraph::NewCNode(inputs); MS_EXCEPTION_IF_NULL(cnode); cnode->set_abstract(std::make_shared()); - // create kernel_info from new parameter + CreateKernelInfoFromNewParameter(cnode); + auto kernel_info = std::make_shared(); std::vector feature_map_input_indexs; // if the node only has the primitive(such as getNext) or the node's input has a feature map input @@ -241,6 +284,41 @@ CNodePtr KernelGraph::NewCNode(const std::vector &inputs) { return cnode; } +void KernelGraph::CreateKernelInfoFromNewParameter(const CNodePtr &cnode) { + if (!AnfAlgo::IsGraphKernel(cnode)) { + return; + } + auto func_graph = AnfAlgo::GetCNodeFuncGraphPtr(cnode); + MS_EXCEPTION_IF_NULL(func_graph); + + std::vector node_list; + std::vector input_list; + std::vector output_list; + kernel::GetValidKernelNodes(func_graph, &node_list, &input_list, &output_list); + for (auto &anf_node : node_list) { + MS_EXCEPTION_IF_NULL(anf_node); + auto kernel_info = std::make_shared(); + anf_node->set_kernel_info(kernel_info); + auto anf_cnode = anf_node->cast(); + MS_EXCEPTION_IF_NULL(anf_cnode); + for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(anf_cnode); ++i) { + auto input_node = anf_cnode->input(i + 1); + MS_EXCEPTION_IF_NULL(input_node); + if (IsValueNode(input_node)) { + auto new_input_node = MakeValueNode(input_node); + if (new_input_node != nullptr) { + anf_cnode->set_input(i + 1, new_input_node); + } + } + } + } + for (auto &anf_node : input_list) { + MS_EXCEPTION_IF_NULL(anf_node); + auto kernel_info = std::make_shared(); + anf_node->set_kernel_info(kernel_info); + } +} + CNodePtr KernelGraph::NewCNode(const CNodePtr &cnode) { MS_EXCEPTION_IF_NULL(cnode); auto new_cnode = std::make_shared(*cnode); @@ -336,21 +414,7 @@ std::vector KernelGraph::SplitTupleValueNodeToNodeList(const ValueNo ValueNodePtr KernelGraph::NewValueNode(const ValueNodePtr &value_node) { MS_EXCEPTION_IF_NULL(value_node); - ValueNodePtr new_value_node = std::make_shared(value_node->value()); - new_value_node->set_abstract(value_node->abstract()); - // create kernel_info fo new value node - auto kernel_info = std::make_shared(); - kernel_info->SetFeatureMapFlag(false); - new_value_node->set_kernel_info(kernel_info); - // create kernel_build_info for new value node - auto kernel_build_info_builder = std::make_shared(); - // set the format of value_node to DEFAULT_FORMAT - auto output_tensor_num = AnfAlgo::GetOutputTensorNum(value_node); - kernel_build_info_builder->SetOutputsFormat(std::vector(output_tensor_num, kOpFormat_DEFAULT)); - // set value node initial device data type = infer data type - std::vector types = std::vector(output_tensor_num, kTypeUnknown); - kernel_build_info_builder->SetOutputsDeviceType(types); - AnfAlgo::SetSelectKernelBuildInfo(kernel_build_info_builder->Build(), new_value_node.get()); + auto new_value_node = MakeValueNode(value_node)->cast(); AnfAlgo::SetGraphId(graph_id_, new_value_node.get()); return new_value_node; } @@ -377,8 +441,8 @@ void KernelGraph::FrontBackendlMapUpdate(const AnfNodePtr &old_backend_anf, cons MS_EXCEPTION_IF_NULL(old_backend_anf); MS_EXCEPTION_IF_NULL(new_backend_anf); if (old_backend_anf == new_backend_anf) { - MS_LOG(INFO) << "old:" << old_backend_anf->DebugString() << ",new:" << new_backend_anf->DebugString(); - MS_LOG(EXCEPTION) << "old can't be same with new"; + MS_LOG(DEBUG) << "old same with new:" << old_backend_anf->DebugString(); + return; } if (backend_front_anf_map_.find(old_backend_anf) == backend_front_anf_map_.end()) { MS_LOG(DEBUG) << "old_backend_anf " << old_backend_anf->DebugString() << " is not exist in the map"; @@ -482,7 +546,13 @@ void KernelGraph::UpdateControlDependRelations(const std::vector &de depend_nodes = GetOutputNodes(depend_node); } for (auto &first_node : prior_nodes) { + if (AnfAlgo::CheckPrimitiveType(first_node, prim::kPrimControlDepend)) { + continue; + } for (auto &second_node : depend_nodes) { + if (AnfAlgo::CheckPrimitiveType(second_node, prim::kPrimControlDepend)) { + continue; + } MS_EXCEPTION_IF_NULL(first_node); MS_EXCEPTION_IF_NULL(second_node); MS_LOG(INFO) << "Add first node:" << first_node->DebugString() << ",second node:" << second_node->DebugString(); @@ -581,9 +651,7 @@ bool KernelGraph::RemoveValueNodeFromGraph(const ValueNodePtr &value_node) { return false; } -void KernelGraph::ReplaceNode(const AnfNodePtr &old_anf_node, AnfNodePtr new_anf_node) { - MS_EXCEPTION_IF_NULL(old_anf_node); - MS_EXCEPTION_IF_NULL(new_anf_node); +void KernelGraph::ReplaceNode(NotNull old_anf_node, NotNull new_anf_node) { MS_EXCEPTION_IF_NULL(inputs_); auto it = node_output_edges_.find(old_anf_node); if (it != node_output_edges_.end()) { @@ -598,16 +666,16 @@ void KernelGraph::ReplaceNode(const AnfNodePtr &old_anf_node, AnfNodePtr new_anf continue; } for (size_t i = 1; i < output_node_inputs.size(); i++) { - if (output_node_inputs[i] == old_anf_node) { + if (output_node_inputs[i] == old_anf_node.get()) { output_cnode->set_input(i, new_anf_node); } } // update graph inputs for (size_t i = 0; i < inputs_->size(); i++) { - if ((*inputs_)[i] == old_anf_node) { + if ((*inputs_)[i] == old_anf_node.get()) { MS_LOG(INFO) << "Replace input of graph:" << graph_id_ << ", old graph input: " << old_anf_node->DebugString() << ",new graph input:" << new_anf_node->DebugString(); - (*inputs_)[i] = new_anf_node; + (*inputs_)[i] = new_anf_node.get(); break; } } @@ -615,22 +683,29 @@ void KernelGraph::ReplaceNode(const AnfNodePtr &old_anf_node, AnfNodePtr new_anf // update front to backend map FrontBackendlMapUpdate(old_anf_node, new_anf_node); // update output depend relations - node_output_edges_[new_anf_node] = it->second; + node_output_edges_[new_anf_node.get()] = it->second; (void)node_output_edges_.erase(old_anf_node); } // update graph inputs in child graph - auto it_real_inputs = real_inputs_.find(old_anf_node); + auto it_real_inputs = std::find_if(real_inputs_.begin(), real_inputs_.end(), + [&old_anf_node](const std::pair> &n) -> bool { + return n.first == old_anf_node.get(); + }); if (it_real_inputs != real_inputs_.end()) { + // erase old parameter in map + auto old_args = it_real_inputs->second; + real_inputs_.erase(it_real_inputs); // insert new parameter to map - auto iter = real_inputs_.find(new_anf_node); + auto iter = std::find_if(real_inputs_.begin(), real_inputs_.end(), + [&new_anf_node](const std::pair> &n) -> bool { + return n.first == new_anf_node.get(); + }); if (iter != real_inputs_.end()) { MS_LOG(WARNING) << new_anf_node->DebugString() << " already exist in real inputs, will be rewrited."; - iter->second = it_real_inputs->second; + iter->second = old_args; } else { - real_inputs_[new_anf_node] = it_real_inputs->second; + real_inputs_.emplace_back(new_anf_node, old_args); } - // erase old parameter in map - real_inputs_.erase(old_anf_node); } } @@ -672,73 +747,69 @@ void KernelGraph::SetRealInput(const AnfNodePtr ¶meter, const AnfNodePtr &ar MS_LOG(INFO) << "parameter: " << parameter->DebugString() << ", real input : " << arg->DebugString(); MS_EXCEPTION_IF_NULL(parameter); MS_EXCEPTION_IF_NULL(arg); - if (real_inputs_.find(parameter) == real_inputs_.end()) { - real_inputs_[parameter] = std::set(); - } - auto &args = real_inputs_[parameter]; - (void)args.insert(arg); -} - -std::set KernelGraph::GetRealInput(const AnfNodePtr ¶meter) { - MS_EXCEPTION_IF_NULL(parameter); - auto iter = real_inputs_.find(parameter); + auto iter = std::find_if( + real_inputs_.begin(), real_inputs_.end(), + [¶meter](const std::pair> &n) -> bool { return n.first == parameter; }); if (iter != real_inputs_.end()) { - return iter->second; + auto &args = iter->second; + args.push_back(arg); + } else { + real_inputs_.emplace_back(parameter, std::vector(1, arg)); } - MS_LOG(EXCEPTION) << parameter->DebugString() << " not found."; } void KernelGraph::UpdateCallRealInput() { MS_LOG(INFO) << "Update graph id: " << graph_id_; - std::map> real_inputs_map; - std::vector> replace_list; + std::vector>> real_inputs_map; for (auto &it : real_inputs_) { auto parameter = it.first; MS_EXCEPTION_IF_NULL(parameter); auto real_inputs = it.second; std::vector new_real_inputs; - std::set erase_real_inputs; for (auto &real_input : real_inputs) { // if real input is a call node ,find the child graph output act as the new real input auto item_with_index = AnfAlgo::VisitKernelWithReturnType(real_input, 0); MS_EXCEPTION_IF_NULL(item_with_index.first); - if (AnfAlgo::CheckPrimitiveType(item_with_index.first, prim::kPrimCall)) { - (void)erase_real_inputs.insert(item_with_index.first); - new_real_inputs = GetCallRealOutputs(item_with_index.first); - continue; - } - } - for (auto &erase_node : erase_real_inputs) { - MS_LOG(INFO) << "paramter: " << parameter->DebugString() << " erase real input:" << erase_node->DebugString(); - (void)real_inputs.erase(erase_node); - } - for (auto &new_real_input : new_real_inputs) { - MS_LOG(INFO) << "paramter: " << parameter->DebugString() - << " insert real input:" << new_real_input->DebugString(); - (void)real_inputs.insert(new_real_input); - if (new_real_input->isa()) { - replace_list.emplace_back(parameter, new_real_input); - parameter = new_real_input; - } + auto tmp_real_input = GetCallRealOutputs(item_with_index.first); + std::copy(tmp_real_input.begin(), tmp_real_input.end(), std::back_inserter(new_real_inputs)); } - real_inputs_map[parameter] = real_inputs; - } - for (auto [parameter, arg] : replace_list) { - ReplaceNode(parameter, arg); + real_inputs_map.emplace_back(parameter, new_real_inputs); } real_inputs_ = real_inputs_map; } -std::string KernelGraph::ToString() const { return std::string("kernel_graph_").append(std::to_string(graph_id_)); } +void KernelGraph::PrintGraphExecuteOrder() const { + MS_LOG(INFO) << "graph:" << graph_id_ << "execution order"; + for (size_t i = 0; i < execution_order_.size(); i++) { + CNodePtr cur_cnode_ptr = execution_order_[i]; + MS_EXCEPTION_IF_NULL(cur_cnode_ptr); + std::string event_str; + std::string label_str; + if (AnfAlgo::HasNodeAttr(kAttrEventId, cur_cnode_ptr)) { + event_str = ", event_id[" + std::to_string(AnfAlgo::GetNodeAttr(cur_cnode_ptr, kAttrEventId)) + "]"; + } -KernelGraph::~KernelGraph() { - auto context = MsContext::GetInstance(); - if (!context) { - return; - } - if (context->execution_mode() == kGraphMode) { - device::KernelRuntimeManager::Instance().ClearGraphResource(graph_id_); + if (AnfAlgo::HasNodeAttr(kAttrLabelIndex, cur_cnode_ptr)) { + label_str = ", label_id[" + std::to_string(AnfAlgo::GetNodeAttr(cur_cnode_ptr, kAttrLabelIndex)) + "]"; + } + + if (AnfAlgo::HasNodeAttr(kAttrLabelSwitchList, cur_cnode_ptr)) { + auto label_list = AnfAlgo::GetNodeAttr>(cur_cnode_ptr, kAttrLabelSwitchList); + label_str = ", label_id["; + for (size_t j = 0; j < label_list.size(); ++j) { + label_str += std::to_string(label_list[j]) + (j + 1 < label_list.size() ? ", " : "]"); + } + } + + MS_LOG(INFO) << "index[" << i << "], node name[" << cur_cnode_ptr->fullname_with_scope() << "], logic id[" + << AnfAlgo::GetStreamDistinctionLabel(cur_cnode_ptr.get()) << "], stream id[" + << AnfAlgo::GetStreamId(cur_cnode_ptr) << "], node info[" << cur_cnode_ptr->DebugString() << "]" + << event_str << label_str; } } + +std::string KernelGraph::ToString() const { return std::string("kernel_graph_").append(std::to_string(graph_id_)); } + +KernelGraph::~KernelGraph() { device::KernelRuntimeManager::Instance().ClearGraphResource(graph_id_); } } // namespace session } // namespace mindspore diff --git a/mindspore/ccsrc/session/kernel_graph.h b/mindspore/ccsrc/session/kernel_graph.h index 98a007d1a1..3009ab0ce9 100644 --- a/mindspore/ccsrc/session/kernel_graph.h +++ b/mindspore/ccsrc/session/kernel_graph.h @@ -40,6 +40,7 @@ class KernelGraph : public FuncGraph { inputs_ = std::make_shared>(); execution_order_ = {}; executable_ = true; + summary_node_exist_ = false; stream_distinction_label_ = kInvalidDistincLabel; } ~KernelGraph() override; @@ -50,6 +51,7 @@ class KernelGraph : public FuncGraph { std::vector *MutableInputs() const { return inputs_.get(); } std::vector outputs() const; CNodePtr NewCNode(const std::vector &inputs) override; + void CreateKernelInfoFromNewParameter(const CNodePtr &cnode); CNodePtr NewCNode(const CNodePtr &cnode); ParameterPtr NewParameter(const ParameterPtr ¶meter = nullptr); ValueNodePtr NewValueNode(const ValueNodePtr &value_node = nullptr); @@ -90,11 +92,15 @@ class KernelGraph : public FuncGraph { bool executable() const { return executable_; } // set executable of graph void set_executable(bool executable) { executable_ = executable; } + // set summary_node of graph + void set_summary_node_exist(bool summary_node_exist) { summary_node_exist_ = summary_node_exist; } + // check whether exist summary node in graph + bool summary_node_exist() const { return summary_node_exist_; } // set invalid inputs for control sink std::vector *MutableValidInputs() { return &valid_inputs_; } std::vector valid_inputs() const { return valid_inputs_; } // replace node in graph - void ReplaceNode(const AnfNodePtr &old_anf_node, AnfNodePtr new_anf_node); + void ReplaceNode(NotNull old_anf_node, NotNull new_anf_node); // set stream label of graph void set_stream_distinction_label(uint32_t stream_label) { stream_distinction_label_ = stream_label; } // get stream label of graph @@ -122,8 +128,7 @@ class KernelGraph : public FuncGraph { // find anf node in graph std::vector FindNodeByPrimitive(const PrimitivePtr &primitive) const; // get real inputs - const std::map> &real_inputs() const { return real_inputs_; } - std::set GetRealInput(const AnfNodePtr ¶meter); + const std::vector>> &real_inputs() const { return real_inputs_; } void SetRealInput(const AnfNodePtr ¶meter, const AnfNodePtr &arg); // used to dump ir std::string ToString() const override; @@ -136,6 +141,9 @@ class KernelGraph : public FuncGraph { CNodePtr get_end_goto() { return end_goto_; } bool get_output_null() { return null_output_; } void set_output_null(bool is_output_null) { null_output_ = is_output_null; } + void PrintGraphExecuteOrder() const; + const std::map> &summary_nodes() const { return summary_nodes_; } + void set_summary_nodes(const std::map> &nodes) { summary_nodes_ = nodes; } private: // remove value node form graph @@ -169,8 +177,11 @@ class KernelGraph : public FuncGraph { // record map between ref final output anf with index and ref origin input with index std::map ref_out_in_map_; std::unordered_map>> node_output_edges_; + std::map> summary_nodes_; // graph needn't execute bool executable_; + // exist summary node in graph + bool summary_node_exist_; // valid inputs std::vector valid_inputs_; @@ -186,7 +197,7 @@ class KernelGraph : public FuncGraph { // parameter graph std::shared_ptr parent_graph_; // record real parameters,inputs_ is the formal parameters - std::map> real_inputs_; + std::vector>> real_inputs_; CNodePtr start_label_; CNodePtr end_goto_; diff --git a/mindspore/ccsrc/session/session.cc b/mindspore/ccsrc/session/session.cc new file mode 100644 index 0000000000..90e02b37ff --- /dev/null +++ b/mindspore/ccsrc/session/session.cc @@ -0,0 +1,174 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "include/inference.h" +#include "session/session.h" +#include "utils/load_onnx/anf_converter.h" +#include "session/session_basic.h" +#include "session/session_factory.h" +#include "utils/base_ref_utils.h" +#include "kernel/oplib/oplib.h" +#ifdef ENABLE_D +#include "utils/context/ms_context.h" +#include "session/ascend_session.h" +#else +#include "session/cpu_session.h" +#endif + +namespace py = pybind11; +namespace mindspore::inference { +std::shared_ptr LoadModel(const char *model_buf, size_t size, const std::string &device) { + inference::Session::RegAllOp(); + auto anf_graph = lite::AnfConverter::RunAnfConverter(model_buf, size); + return anf_graph; +} + +void ExitInference() { + auto ms_context = MsContext::GetInstance(); + if (ms_context == nullptr) { + MS_LOG(ERROR) << "Get Context failed!"; + return; + } + if (!ms_context->CloseTsd()) { + MS_LOG(ERROR) << "Inference CloseTsd failed!"; + return; + } +} + +std::shared_ptr MSSession::CreateSession(const std::string &device, uint32_t device_id) { + auto session = std::make_shared(); + auto ret = session->Init(device, device_id); + if (ret != 0) { + return nullptr; + } + return session; +} + +void Session::RegAllOp() { + static std::mutex init_mutex; + static bool Initialized = false; + + std::lock_guard lock(init_mutex); + if (Initialized) { + return; + } + Initialized = true; + MsContext::GetInstance()->set_execution_mode(kGraphMode); + Py_Initialize(); + auto c_expression = PyImport_ImportModule("mindspore._c_expression"); + if (c_expression == nullptr) { + MS_LOG(EXCEPTION) << "Failed to import mindspore._c_expression module."; + return; + } + PyObject *c_expression_dict = PyModule_GetDict(c_expression); + + PyObject *op_info_loader_class = PyDict_GetItemString(c_expression_dict, "OpInfoLoaderPy"); + if (op_info_loader_class == nullptr) { + MS_LOG(EXCEPTION) << "Failed to get op_info_loader_class from mindspore._c_expression."; + return; + } + PyObject *op_info_loader = PyInstanceMethod_New(op_info_loader_class); + if (op_info_loader == nullptr) { + MS_LOG(EXCEPTION) << "Failed to create op_info_loader instance."; + return; + } + PyObject *op_info_loader_ins = PyObject_CallObject(op_info_loader, nullptr); + if (op_info_loader_ins == nullptr) { + MS_LOG(EXCEPTION) << "Failed to call op_info_loader instance."; + return; + } + auto all_ops_info_vector_addr_ul = PyObject_CallMethod(op_info_loader_ins, "get_all_ops_info", nullptr); + if (all_ops_info_vector_addr_ul == nullptr) { + MS_LOG(EXCEPTION) << "Failed to call get_all_ops_addr."; + return; + } + auto all_ops_info_vector_addr = PyLong_AsVoidPtr(all_ops_info_vector_addr_ul); + auto all_ops_info = static_cast *>(all_ops_info_vector_addr); + for (auto op_info : *all_ops_info) { + kernel::OpLib::RegOpInfo(std::shared_ptr(op_info)); + } + all_ops_info->clear(); + delete all_ops_info; + Py_DECREF(op_info_loader); + Py_DECREF(op_info_loader_class); + Py_DECREF(c_expression_dict); + Py_DECREF(c_expression); + return; +} + +uint32_t Session::CompileGraph(std::shared_ptr funcGraphPtr) { + MS_ASSERT(session_impl_ != nullptr); + auto graph_id = session_impl_->CompileGraph(NOT_NULL(funcGraphPtr)); + py::gil_scoped_release gil_release; + return graph_id; +} + +MultiTensor Session::RunGraph(uint32_t graph_id, const std::vector> &inputs) { + std::vector inTensors; + inTensors.resize(inputs.size()); + bool has_error = false; + std::transform(inputs.begin(), inputs.end(), inTensors.begin(), + [&has_error](const std::shared_ptr &tensor_ptr) -> tensor::TensorPtr { + if (tensor_ptr == nullptr) { + MS_LOG(WARNING) << "input MSTensor is nullptr, return nullptr"; + has_error = true; + return nullptr; + } + auto tensor = static_cast(tensor_ptr.get()); + if (tensor == nullptr) { + MS_LOG(ERROR) << "Can not cast input MSTensor to tensor"; + has_error = true; + return nullptr; + } + return tensor->tensor(); + }); + if (has_error) { + MS_LOG(ERROR) << "Init Tensor failed, returning empty result"; + std::vector> multiTensor; + return multiTensor; + } + VectorRef outputs; + session_impl_->RunGraph(graph_id, inTensors, &outputs); + + return TransformVectorRefToMultiTensor(outputs); +} + +int Session::Init(const std::string &device, uint32_t device_id) { + RegAllOp(); + auto ms_context = MsContext::GetInstance(); + ms_context->set_execution_mode(kGraphMode); + ms_context->set_device_target(kAscendDevice); + session_impl_ = session::SessionFactory::Get().Create(device); + if (session_impl_ == nullptr) { + MS_LOG(ERROR) << "Session create failed!, please make sure target device:" << device << " is available."; + return -1; + } + session_impl_->Init(device_id); + if (ms_context == nullptr) { + MS_LOG(ERROR) << "Get Context failed!"; + return -1; + } + if (!ms_context->OpenTsd()) { + MS_LOG(ERROR) << "Session init OpenTsd failed!"; + return -1; + } + return 0; +} + +Session::Session() = default; +} // namespace mindspore::inference diff --git a/mindspore/ccsrc/session/session.h b/mindspore/ccsrc/session/session.h new file mode 100644 index 0000000000..b608163067 --- /dev/null +++ b/mindspore/ccsrc/session/session.h @@ -0,0 +1,50 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_SESSION_SESSION_H +#define MINDSPORE_CCSRC_SESSION_SESSION_H + +#include +#include +#include +#include +#include +#include + +#include "session/session_basic.h" +#include "ir/anf.h" +#include "include/inference.h" + +namespace mindspore { +namespace inference { +class Session : public MSSession { + public: + Session(); + + uint32_t CompileGraph(std::shared_ptr funcGraphPtr) override; + + MultiTensor RunGraph(uint32_t graph_id, const std::vector> &inputs) override; + + int Init(const std::string &device, uint32_t device_id); + + static void RegAllOp(); + + private: + std::shared_ptr session_impl_ = nullptr; + std::vector graph_id_; +}; +} // namespace inference +} // namespace mindspore +#endif // MINDSPORE_CCSRC_SESSION_SESSION_BASIC_H diff --git a/mindspore/ccsrc/session/session_basic.cc b/mindspore/ccsrc/session/session_basic.cc index d47cea188c..ff6fa8ff94 100644 --- a/mindspore/ccsrc/session/session_basic.cc +++ b/mindspore/ccsrc/session/session_basic.cc @@ -21,6 +21,7 @@ #include "pipeline/parse/data_converter.h" #include "ir/manager.h" #include "ir/param_value_py.h" +#include "kernel/common_utils.h" #include "operator/ops.h" #include "common/trans.h" #include "utils/context/ms_context.h" @@ -33,6 +34,7 @@ #include "common/utils.h" #include "ir/dtype.h" #include "ir/anf.h" +#include "ir/func_graph_cloner.h" namespace mindspore { namespace session { @@ -50,6 +52,7 @@ PyObject *GetParamDefaultInputTensor(const AnfNodePtr &node) { return nullptr; } auto param_value = std::dynamic_pointer_cast(parameter->default_param()); + MS_EXCEPTION_IF_NULL(param_value); auto py_param = param_value->value(); return py_param.ptr(); } @@ -67,7 +70,7 @@ BaseRef CreateOneTensor(const AnfNodePtr &node, size_t output_index, const Kerne } if (node->isa()) { for (size_t input_idx = 0; input_idx < graph.inputs().size(); input_idx++) { - if (input_idx > input_tensors.size()) { + if (input_idx >= input_tensors.size()) { MS_LOG(EXCEPTION) << "input idx:" << input_idx << "out of range:" << input_tensors.size(); } if (graph.inputs()[input_idx] == node) { @@ -147,6 +150,8 @@ BaseRef CreatTupleForOutput(const AnfNodePtr &anf, const KernelGraph &graph, } ValueNodePtr CreateNewValueNode(const AnfNodePtr &anf, KernelGraph *graph) { + MS_EXCEPTION_IF_NULL(anf); + MS_EXCEPTION_IF_NULL(graph); auto value_node = anf->cast(); MS_EXCEPTION_IF_NULL(value_node); auto value = value_node->value(); @@ -227,6 +232,7 @@ ValueNodePtr ConstructRunOpValueNode(const std::shared_ptr &graph, MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(input_tensor); auto value_node = std::make_shared(input_tensor); + MS_EXCEPTION_IF_NULL(value_node); // construct abstract of value node auto type_of_tensor = input_tensor->Dtype(); auto shape_of_tensor = input_tensor->shape(); @@ -240,6 +246,7 @@ ValueNodePtr ConstructRunOpValueNode(const std::shared_ptr &graph, ParameterPtr ConstructRunOpParameter(const std::shared_ptr &graph, const tensor::TensorPtr &input_tensor, int tensor_mask) { + MS_EXCEPTION_IF_NULL(graph); auto param = graph->NewParameter(); MS_EXCEPTION_IF_NULL(param); if (tensor_mask == kParameterWeightTensorMask) { @@ -291,6 +298,20 @@ void DumpGraphOutput(const Any &any, size_t recurse_level = 0) { (void)tab_str.append(any.ToString()); MS_LOG(INFO) << tab_str; } + +bool ExistSummaryNode(const KernelGraph *graph) { + MS_EXCEPTION_IF_NULL(graph); + auto ret = graph->get_return(); + MS_EXCEPTION_IF_NULL(ret); + auto all_nodes = DeepLinkedGraphSearch(ret); + for (auto &n : all_nodes) { + if (IsPrimitiveCNode(n, prim::kPrimScalarSummary) || IsPrimitiveCNode(n, prim::kPrimTensorSummary) || + IsPrimitiveCNode(n, prim::kPrimImageSummary) || IsPrimitiveCNode(n, prim::kPrimHistogramSummary)) { + return true; + } + } + return false; +} } // namespace GraphId SessionBasic::graph_sum_ = 0; @@ -300,7 +321,7 @@ ParameterPtr SessionBasic::CreateNewParameterFromParameter(const AnfNodePtr &anf if (!anf->isa()) { MS_LOG(EXCEPTION) << "anf[" << anf->DebugString() << "] is not a parameter"; } - + MS_EXCEPTION_IF_NULL(graph); auto m_tensor = GetParamDefaultInputTensor(anf); auto valid_inputs = graph->MutableValidInputs(); MS_EXCEPTION_IF_NULL(valid_inputs); @@ -311,8 +332,9 @@ ParameterPtr SessionBasic::CreateNewParameterFromParameter(const AnfNodePtr &anf if (python_paras_ == nullptr) { python_paras_ = std::make_shared>(); } - if (python_paras_->find(m_tensor) != python_paras_->end() && GetGraphIdByNode(anf) == kInvalidGraphId) { - new_parameter = (*python_paras_)[m_tensor]; + auto iter = python_paras_->find(m_tensor); + if (iter != python_paras_->end()) { + new_parameter = iter->second; } else { TraceManager::DebugTrace(std::make_shared(anf->debug_info())); new_parameter = graph->NewParameter(anf->cast()); @@ -328,6 +350,7 @@ ParameterPtr SessionBasic::CreateNewParameterFromParameter(const AnfNodePtr &anf AnfNodePtr SessionBasic::CreateNewParameterFromCNode(const AnfNodePtr &anf, bool valid_input, KernelGraph *graph) { MS_EXCEPTION_IF_NULL(anf); + MS_EXCEPTION_IF_NULL(graph); MS_LOG(INFO) << "Create a new parameter from cnode[" << anf->DebugString() << "]"; auto parameters = CreateParameterFromTuple(anf, valid_input, graph); if (parameters.empty()) { @@ -353,10 +376,17 @@ CNodePtr SessionBasic::CreateNewCNode(const CNodePtr &cnode, bool valid_input, K MS_EXCEPTION_IF_NULL(other_graph_cnode); *from_other_graph = false; // get primitive of old node + std::vector cnode_inputs; auto prim = AnfAlgo::GetCNodePrimitive(cnode); - MS_EXCEPTION_IF_NULL(prim); - // push attr to inputs[0] of new cnode - std::vector cnode_inputs = {std::make_shared(std::make_shared(*prim))}; + if (prim != nullptr) { + // push attr to inputs[0] of new cnode + cnode_inputs.push_back(std::make_shared(std::make_shared(*prim))); + } else { + auto fg = AnfAlgo::GetCNodeFuncGraphPtr(cnode); + MS_EXCEPTION_IF_NULL(fg); + auto new_fg = BasicClone(fg); + cnode_inputs.push_back(std::make_shared(new_fg)); + } // if has multiple depends,only select first depend as parameter for (size_t input_idx = 1; input_idx < cnode->inputs().size(); input_idx++) { auto anf = cnode->inputs()[input_idx]; @@ -446,6 +476,8 @@ CNodePtr SessionBasic::CreateNewCNode(const CNodePtr &cnode, KernelGraph *graph) if (graph->GetBackendAnfByFrontAnf(anf) != nullptr) { cnode_inputs.emplace_back(graph->GetBackendAnfByFrontAnf(anf)); continue; + } else if (IsValueNode(anf)) { + continue; } MS_LOG(EXCEPTION) << "Unexpected input[" << anf->DebugString() << "]"; } @@ -457,6 +489,7 @@ CNodePtr SessionBasic::CreateNewCNode(const CNodePtr &cnode, KernelGraph *graph) ValueNodePtr SessionBasic::CreateValueNodeKernelGraph(const AnfNodePtr &anf, KernelGraph *graph) { MS_EXCEPTION_IF_NULL(anf); + MS_EXCEPTION_IF_NULL(graph); auto value_node = anf->cast(); MS_EXCEPTION_IF_NULL(value_node); auto sub_func_graph = AnfAlgo::GetValueNodeFuncGraph(anf); @@ -484,16 +517,27 @@ ValueNodePtr SessionBasic::CreateValueNodeKernelGraph(const AnfNodePtr &anf, Ker ParameterPtr SessionBasic::CreateNewParameter(const AnfNodePtr &anf, KernelGraph *graph) { MS_EXCEPTION_IF_NULL(anf); + MS_EXCEPTION_IF_NULL(graph); if (!anf->isa()) { MS_LOG(EXCEPTION) << "anf[" << anf->DebugString() << "] is not a parameter"; } - auto graph_inputs = graph->MutableInputs(); - MS_EXCEPTION_IF_NULL(graph_inputs); - TraceManager::DebugTrace(std::make_shared(anf->debug_info())); - auto new_parameter = graph->NewParameter(anf->cast()); - TraceManager::EndTrace(); - graph_inputs->push_back(new_parameter); - graph->FrontBackendlMapAdd(anf, new_parameter); + + auto m_tensor = GetParamDefaultInputTensor(anf); + ParameterPtr new_parameter = nullptr; + if (python_paras_ == nullptr) { + python_paras_ = std::make_shared>(); + } + auto iter = python_paras_->find(m_tensor); + if (iter != python_paras_->end()) { + new_parameter = iter->second; + } else { + TraceManager::DebugTrace(std::make_shared(anf->debug_info())); + new_parameter = graph->NewParameter(anf->cast()); + if (m_tensor != nullptr) { + (*python_paras_)[m_tensor] = new_parameter; + } + TraceManager::EndTrace(); + } return new_parameter; } @@ -501,6 +545,7 @@ ParameterPtr SessionBasic::CreateNewParameter(const AnfNodePtr &anf, KernelGraph KernelGraphPtr SessionBasic::ConstructKernelGraph(const AnfNodePtrList &lst, const AnfNodePtrList &outputs) { std::unordered_map other_graph_cnode; auto graph = NewKernelGraph(); + MS_EXCEPTION_IF_NULL(graph); MS_LOG(INFO) << "Create graph: " << graph->graph_id(); size_t from_other_graph_depend_num = 0; for (const auto &node : lst) { @@ -537,14 +582,20 @@ KernelGraphPtr SessionBasic::ConstructKernelGraph(const AnfNodePtrList &lst, con graph->set_manager(manager); } graph->SetExecOrderByDefault(); + if (ExistSummaryNode(graph.get())) { + graph->set_summary_node_exist(true); + } opt::BackendCommonOptimization(graph); return graph; } -std::shared_ptr SessionBasic::ConstructKernelGraph(const FuncGraphPtr &func_graph) { +std::shared_ptr SessionBasic::ConstructKernelGraph(const FuncGraphPtr &func_graph, + std::vector *all_out_graph) { MS_EXCEPTION_IF_NULL(func_graph); + MS_EXCEPTION_IF_NULL(all_out_graph); auto node_list = TopoSort(func_graph->get_return()); auto graph = NewKernelGraph(); + MS_EXCEPTION_IF_NULL(graph); front_backend_graph_map_[func_graph] = graph; MS_LOG(INFO) << "Create graph: " << graph->graph_id(); @@ -553,7 +604,11 @@ std::shared_ptr SessionBasic::ConstructKernelGraph(const FuncGraphP MS_EXCEPTION_IF_NULL(node); MS_LOG(DEBUG) << "Start create new cnode, node = " << node->DebugString(); if (node->isa()) { - (void)CreateNewParameter(node, graph.get()); + auto graph_inputs = graph->MutableInputs(); + MS_EXCEPTION_IF_NULL(graph_inputs); + auto new_parameter = CreateNewParameter(node, graph.get()); + graph_inputs->push_back(new_parameter); + graph->FrontBackendlMapAdd(node, new_parameter); continue; } else if (node->isa()) { if (!IsValueNode(node)) { @@ -563,10 +618,9 @@ std::shared_ptr SessionBasic::ConstructKernelGraph(const FuncGraphP // if input is a ValueNode FuncGraphPtr child_graph = AnfAlgo::GetValueNodeFuncGraph(node); if (front_backend_graph_map_.find(child_graph) != front_backend_graph_map_.end()) { - MS_LOG(INFO) << "FuncGraph: " << child_graph->ToString() << " has been transformed to KernelGraph."; is_trace_back = true; } else { - (void)ConstructKernelGraph(child_graph); + (void)ConstructKernelGraph(child_graph, all_out_graph); } (void)CreateValueNodeKernelGraph(node, graph.get()); } @@ -578,6 +632,7 @@ std::shared_ptr SessionBasic::ConstructKernelGraph(const FuncGraphP auto new_cnode = CreateNewCNode(cnode, graph.get()); MS_EXCEPTION_IF_NULL(new_cnode); new_cnode->set_abstract(cnode->abstract()); + new_cnode->set_fullname_with_scope(cnode->fullname_with_scope()); new_cnode->set_scope(cnode->scope()); graph->FrontBackendlMapAdd(node, new_cnode); if (AnfAlgo::CheckPrimitiveType(new_cnode, prim::kPrimReturn)) { @@ -587,29 +642,33 @@ std::shared_ptr SessionBasic::ConstructKernelGraph(const FuncGraphP } // if a graph jump back unconditionally, return op of this graph will never be executed, so output is null. graph->set_output_null(is_trace_back); + AddParameterToGraphInputs(func_graph->parameters(), graph.get()); + graph->SetExecOrderByDefault(); + if (ExistSummaryNode(graph.get())) { + graph->set_summary_node_exist(true); + } + all_out_graph->push_back(graph); + return graph; +} + +void SessionBasic::AddParameterToGraphInputs(const std::vector ¶meters, KernelGraph *graph) { + MS_EXCEPTION_IF_NULL(graph); auto graph_inputs = graph->MutableInputs(); MS_EXCEPTION_IF_NULL(graph_inputs); graph_inputs->clear(); - for (auto ¶meter : func_graph->parameters()) { + for (auto ¶meter : parameters) { MS_EXCEPTION_IF_NULL(parameter); auto backend_parameter = graph->GetBackendAnfByFrontAnf(parameter); if (backend_parameter == nullptr) { // for example "def f(x,y,z) {return x + y}", parameter z in unused - CreateNewParameterFromParameter(parameter, false, graph.get()); + auto new_parameter = CreateNewParameter(parameter, graph); + graph_inputs->push_back(new_parameter); MS_LOG(INFO) << "Can't find parameter:" << parameter->DebugString(); continue; } MS_LOG(INFO) << "graph[" << graph->graph_id() << "],parameter:" << parameter->DebugString(); graph_inputs->push_back(backend_parameter); } - MS_EXCEPTION_IF_NULL(context_); - FuncGraphManagerPtr manager = context_->manager(); - if (manager) { - manager->AddFuncGraph(graph); - graph->set_manager(manager); - } - graph->SetExecOrderByDefault(); - return graph; } // run graph steps @@ -650,7 +709,9 @@ void SessionBasic::LoadInputData(const std::shared_ptr &kernel_grap } } if (need_sync) { - tensor->set_device_address(device_address); + if (ms_context->execution_mode() == kPynativeMode || AnfAlgo::IsParameterWeight(pk_node)) { + tensor->set_device_address(device_address); + } MS_EXCEPTION_IF_NULL(device_address); if (!device_address->SyncHostToDevice(trans::GetRuntimePaddingShape(pk_node, 0), LongToSize(tensor->data().nbytes()), tensor->data_type(), @@ -674,8 +735,8 @@ void SessionBasic::UpdateOutputs(const std::shared_ptr &kernel_grap } auto anf_outputs = kernel_graph->outputs(); for (auto &item : anf_outputs) { - MS_LOG(INFO) << "update output[" << item->DebugString() << "]"; MS_EXCEPTION_IF_NULL(item); + MS_LOG(INFO) << "update output[" << item->DebugString() << "]"; if (AnfAlgo::IsTupleOutput(item) && AnfAlgo::IsRealKernel(item)) { outputs->emplace_back(CreatTupleForOutput(item, *kernel_graph, input_tensors)); continue; @@ -689,29 +750,15 @@ void SessionBasic::RegisterSummaryCallBackFunc(const CallBackFunc &callback) { summary_callback_ = callback; } -void SessionBasic::Reorder(std::vector *node_list) { - MS_EXCEPTION_IF_NULL(node_list); - std::vector all_opt_list; - std::vector non_opt_list; +void SessionBasic::Reorder(std::vector *node_list) { AnfAlgo::ReorderExecList(NOT_NULL(node_list)); } - for (const auto &node : *node_list) { - MS_EXCEPTION_IF_NULL(node); - if (kOptOperatorSet.find(AnfAlgo::GetCNodeName(node)) != kOptOperatorSet.end()) { - all_opt_list.emplace_back(node); - } else { - non_opt_list.emplace_back(node); - } - } - node_list->clear(); - (void)std::copy(non_opt_list.begin(), non_opt_list.end(), std::back_inserter(*node_list)); - (void)std::copy(all_opt_list.begin(), all_opt_list.end(), std::back_inserter(*node_list)); -} - -void SessionBasic::GetSummaryNodes(const KernelGraph *graph, - std::unordered_map> *summary) { +void SessionBasic::GetSummaryNodes(KernelGraph *graph) { MS_LOG(DEBUG) << "Update summary Start"; MS_EXCEPTION_IF_NULL(graph); - MS_EXCEPTION_IF_NULL(summary); + if (!graph->summary_node_exist()) { + return; + } + auto summary = graph->summary_nodes(); auto apply_list = TopoSort(graph->get_return()); for (auto &n : apply_list) { MS_EXCEPTION_IF_NULL(n); @@ -724,14 +771,16 @@ void SessionBasic::GetSummaryNodes(const KernelGraph *graph, } auto node = cnode->input(kSummaryGetItem); MS_EXCEPTION_IF_NULL(node); - auto item_with_index = AnfAlgo::VisitKernelWithReturnType(node, 0); + auto item_with_index = AnfAlgo::VisitKernelWithReturnType(node, 0, true); + MS_EXCEPTION_IF_NULL(item_with_index.first); if (!AnfAlgo::IsRealKernel(item_with_index.first)) { MS_LOG(EXCEPTION) << "Unexpected node:" << item_with_index.first->DebugString(); } - (*summary)[n->fullname_with_scope()] = item_with_index; + summary[n->fullname_with_scope()] = item_with_index; } } - MS_LOG(DEBUG) << "Update summary end size: " << (*summary).size(); + graph->set_summary_nodes(summary); + MS_LOG(DEBUG) << "Update summary end size: " << summary.size(); } void SessionBasic::Summary(KernelGraph *graph) { @@ -739,12 +788,12 @@ void SessionBasic::Summary(KernelGraph *graph) { return; } MS_EXCEPTION_IF_NULL(graph); - std::unordered_map> summary_outputs; - GetSummaryNodes(graph, &summary_outputs); - // do not exist summary node - if (summary_outputs.empty()) { + bool exist_summary = graph->summary_node_exist(); + if (!exist_summary) { return; } + GetSummaryNodes(graph); + auto summary_outputs = graph->summary_nodes(); std::map params_list; // fetch outputs apply kernel in session & run callback functions for (auto &output_item : summary_outputs) { @@ -775,6 +824,7 @@ CNodePtr SessionBasic::ConstructOutput(const AnfNodePtrList &outputs, const std: MS_EXCEPTION_IF_NULL(graph); std::vector output_args; for (const auto &output : outputs) { + MS_EXCEPTION_IF_NULL(output); MS_LOG(INFO) << "output:" << output->DebugString(); } auto FindEqu = [graph, outputs](const AnfNodePtr &out) -> AnfNodePtr { @@ -846,7 +896,9 @@ std::shared_ptr SessionBasic::ConstructSingleOpGraph(const OpRunInf } auto parameter = ConstructRunOpParameter(graph, input_tensors[i], tensors_mask[i]); inputs.push_back(parameter); - graph->MutableInputs()->push_back(parameter); + auto mutable_inputs = graph->MutableInputs(); + MS_EXCEPTION_IF_NULL(mutable_inputs); + mutable_inputs->push_back(parameter); } // set execution order auto cnode = graph->NewCNode(inputs); diff --git a/mindspore/ccsrc/session/session_basic.h b/mindspore/ccsrc/session/session_basic.h index b2e8c8894f..27171b7589 100755 --- a/mindspore/ccsrc/session/session_basic.h +++ b/mindspore/ccsrc/session/session_basic.h @@ -48,11 +48,7 @@ using OpRunInfoPtr = std::shared_ptr; class SessionBasic { public: - SessionBasic() : device_id_(0) { - graphs_ = {}; - run_op_graphs_ = {}; - summary_callback_ = nullptr; - } + SessionBasic() : context_(nullptr), summary_callback_(nullptr), device_id_(0) {} virtual void Init(uint32_t device_id) { device_id_ = device_id; } @@ -75,7 +71,8 @@ class SessionBasic { virtual void RegisterSummaryCallBackFunc(const CallBackFunc &callback); std::shared_ptr ConstructKernelGraph(const AnfNodePtrList &lst, const AnfNodePtrList &outputs); - std::shared_ptr ConstructKernelGraph(const FuncGraphPtr &func_graph); + std::shared_ptr ConstructKernelGraph(const FuncGraphPtr &func_graph, + std::vector *all_out_graph); CNodePtr CreateNewCNode(const CNodePtr &cnode, bool valid_input, KernelGraph *graph, bool *from_other_graph, std::unordered_map *other_graph_cnode); @@ -93,8 +90,7 @@ class SessionBasic { virtual GraphId GetGraphIdByNode(const AnfNodePtr &) const { return kInvalidGraphId; } virtual GraphId GetFinalRunGraph() const { return kInvalidGraphId; } virtual void SetActive(GraphId, GraphId) {} - virtual void GetSummaryNodes(const KernelGraph *graph, - std::unordered_map> *summary); + virtual void GetSummaryNodes(KernelGraph *graph); protected: virtual void LoadInputData(const std::shared_ptr &kernel_graph, @@ -114,10 +110,11 @@ class SessionBasic { BaseRef TransformBaseRefListToTuple(const BaseRef &base_ref); // create a new kernel graph and update the graph sum KernelGraphPtr NewKernelGraph(); - ParameterPtr CreateNewParameterFromParameter(const AnfNodePtr &anf, bool valid_input, KernelGraph *graph); + virtual ParameterPtr CreateNewParameterFromParameter(const AnfNodePtr &anf, bool valid_input, KernelGraph *graph); ValueNodePtr CreateValueNodeKernelGraph(const AnfNodePtr &anf, KernelGraph *graph); ParameterPtr CreateNewParameter(const AnfNodePtr &anf, KernelGraph *graph); AnfNodePtr CreateNewParameterFromCNode(const AnfNodePtr &anf, bool valid_input, KernelGraph *graph); + void AddParameterToGraphInputs(const std::vector ¶meters, KernelGraph *graph); std::unordered_map> graphs_; std::unordered_map> run_op_graphs_; @@ -129,6 +126,7 @@ class SessionBasic { }; using SessionPtr = std::shared_ptr; +using NamedSummaryOutputs = std::map>; } // namespace session } // namespace mindspore #endif // MINDSPORE_CCSRC_SESSION_SESSION_BASIC_H diff --git a/mindspore/ccsrc/transform/convert.cc b/mindspore/ccsrc/transform/convert.cc index 1450572e4b..a5726b078a 100644 --- a/mindspore/ccsrc/transform/convert.cc +++ b/mindspore/ccsrc/transform/convert.cc @@ -103,6 +103,7 @@ const char kNameReLU6[] = "ReLU6"; const char kNameReLU6Grad[] = "ReLU6Grad"; const char kNameElu[] = "Elu"; const char kNameEluGrad[] = "EluGrad"; +const char kNameTensorScatterUpdate[] = "TensorScatterUpdate"; const char kNameScatterUpdate[] = "ScatterUpdate"; const char kNameScatterNdUpdate[] = "ScatterNdUpdate"; const char kNameScatterMax[] = "ScatterMax"; @@ -182,6 +183,7 @@ const char kNameBinaryCrossEntropy[] = "BinaryCrossEntropy"; const char kNameBinaryCrossEntropyGrad[] = "BinaryCrossEntropyGrad"; const char kNameSparseApplyAdagrad[] = "SparseApplyAdagrad"; const char kNameSparseApplyFtrlD[] = "SparseApplyFtrlD"; +const char kNameApplyProximalAdagrad[] = "ApplyProximalAdagrad"; const char kNameAcosh[] = "Acosh"; const char kNameAcoshGrad[] = "AcoshGrad"; const char kNameFloorMod[] = "FloorMod"; @@ -203,6 +205,8 @@ const char kNameL2Loss[] = "L2Loss"; const char kNameCTCLoss[] = "CTCLoss"; const char kNameRange[] = "Range"; const char kNameSquareSumAll[] = "SquareSumAll"; +const char kNameAscendQuant[] = "AscendQuant"; +const char kNameAscendDequant[] = "AscendDequant"; // -----------------OpAdapter initialization-------------- std::unordered_map &DfGraphConvertor::get_adpt_map() { @@ -211,7 +215,7 @@ std::unordered_map &DfGraphConvertor::get_adpt_ma {string(kNameIOU), ADPT_DESC(Iou)}, {string(kNameGreaterEqual), ADPT_DESC(GreaterEqual)}, {string(kNameSlice), ADPT_DESC(SliceD)}, - {string(kNameApplyMomentum), ADPT_DESC(ApplyMomentum)}, + {string(kNameApplyMomentum), ADPT_DESC(ApplyMomentumD)}, {string(kNameMaxPool), ADPT_DESC(MaxPool)}, {string(kNameAvgPool), ADPT_DESC(AvgPool)}, {string(kNameMaxPoolWithArgmax), ADPT_DESC(MaxPoolWithArgmax)}, @@ -260,6 +264,7 @@ std::unordered_map &DfGraphConvertor::get_adpt_ma {string(kNameResizeBilinear), ADPT_DESC(ResizeBilinearV2D)}, {string(kNameZerosLike), ADPT_DESC(ZerosLike)}, {string(kNameOnesLike), ADPT_DESC(OnesLike)}, + {string(kNameTensorScatterUpdate), ADPT_DESC(TensorScatterUpdate)}, {string(kNameScatterUpdate), ADPT_DESC(ScatterUpdate)}, {string(kNameScatterNdUpdate), ADPT_DESC(ScatterNdUpdate)}, {string(kNameScatterMax), ADPT_DESC(ScatterMax)}, @@ -386,6 +391,7 @@ std::unordered_map &DfGraphConvertor::get_adpt_ma {string(kNameBinaryCrossEntropyGrad), ADPT_DESC(BinaryCrossEntropyGrad)}, {string(kNameSparseApplyAdagrad), ADPT_DESC(SparseApplyAdagradD)}, {string(kNameSparseApplyFtrlD), ADPT_DESC(SparseApplyFtrlD)}, + {string(kNameApplyProximalAdagrad), ADPT_DESC(ApplyProximalAdagradD)}, {string(kNameAcosh), ADPT_DESC(Acosh)}, {string(kNameAcoshGrad), ADPT_DESC(AcoshGrad)}, {string(kNameFloorMod), ADPT_DESC(FloorMod)}, @@ -393,7 +399,7 @@ std::unordered_map &DfGraphConvertor::get_adpt_ma {string(kNameDepthToSpace), ADPT_DESC(DepthToSpace)}, {string(kNameSign), ADPT_DESC(Sign)}, {string(kNameRound), ADPT_DESC(Round)}, - {string(kNameApplyFtrl), ADPT_DESC(ApplyFtrl)}, + {string(kNameApplyFtrl), ADPT_DESC(ApplyFtrlD)}, {string(kNameDiag), ADPT_DESC(Diag)}, {string(kNameDiagPart), ADPT_DESC(DiagPart)}, {string(kNameSpaceToBatch), ADPT_DESC(SpaceToBatchD)}, @@ -404,10 +410,12 @@ std::unordered_map &DfGraphConvertor::get_adpt_ma {string(kNameL2Loss), ADPT_DESC(L2Loss)}, {string(kNameCTCLoss), ADPT_DESC(CTCLoss)}, {string(kNameRange), ADPT_DESC(RangeD)}, - {string(kNameSquareSumAll), ADPT_DESC(SquareSumAll)}}; + {string(kNameSquareSumAll), ADPT_DESC(SquareSumAll)}, + {string(kNameAscendQuant), ADPT_DESC(AscendQuant)}, + {string(kNameAscendDequant), ADPT_DESC(AscendDequant)}}; #ifdef ENABLE_GE adpt_map[string(kNamePrint)] = ADPT_DESC(Print); - adpt_map[string(kNameApplyAdam)] = ADPT_DESC(ApplyAdam); + adpt_map[string(kNameApplyAdam)] = ADPT_DESC(ApplyAdamD); #endif return adpt_map; } @@ -957,8 +965,8 @@ void DfGraphConvertor::TraceOutput(const AnfNodePtr node) { for (unsigned int i = 1; i < c->inputs().size(); i++) { TraceOutput(c->input(i)); } - } else if (name == "depend") { - if (c->inputs().size() < 3) { // "depend" primitive have 3 inputs + } else if (name == "Depend") { + if (c->inputs().size() < 3) { // "Depend" primitive have 3 inputs MS_LOG(EXCEPTION) << "length of inputs is " << c->inputs().size() << ", which is less than 3"; } TraceOutput(c->input(1)); @@ -1181,7 +1189,7 @@ void DfGraphConvertor::SetOpInput(const OpAdapterPtr &adpt, const CNodePtr &node auto &inputs = node->inputs(); for (size_t i = 1; i < inputs.size(); i++) { auto pred = inputs[i]; - while (pred->isa() && GetCNodeFuncName(pred->cast()) == "depend") { + while (pred->isa() && GetCNodeFuncName(pred->cast()) == "Depend") { pred = pred->cast()->input(1); } // skip the None input @@ -1360,7 +1368,7 @@ AnfNodePtr DfGraphConvertor::TraceTupleGetItem(const CNodePtr &node, unsigned in AnfNodePtr DfGraphConvertor::TraceDepend(const CNodePtr &node) { auto cnode = node->cast(); - if (cnode->inputs().size() < 3) { // "depend" primitive have 3 inputs + if (cnode->inputs().size() < 3) { // "Depend" primitive have 3 inputs MS_LOG(EXCEPTION) << "length of inputs of depend is less than 3"; } return cnode->inputs()[1]; @@ -1481,7 +1489,7 @@ AnfNodePtr DfGraphConvertor::GetRealOpNode(AnfNodePtr node) { // depend apply inputs: depend,output,depended_node if (IsPrimitiveCNode(node, prim::kPrimDepend)) { auto depend_inputs = node->cast()->inputs(); - if (depend_inputs.size() != 3) { // "depend" primitive have 3 inputs + if (depend_inputs.size() != 3) { // "Depend" primitive have 3 inputs MS_LOG(ERROR) << "depend input items not correct"; error_ = FAILED; return node; @@ -1698,7 +1706,7 @@ void DfGraphConvertor::ConvertControlDependNode(const CNodePtr node) { bool DfGraphConvertor::CheckCNode(const std::string &name, const CNodePtr node) { // ignore apply node of return - if (name == "return" || name == "depend") { + if (name == "return" || name == "Depend") { return false; } diff --git a/mindspore/ccsrc/transform/convert.h b/mindspore/ccsrc/transform/convert.h index 8a63f00c6c..2f6c9bb0ad 100644 --- a/mindspore/ccsrc/transform/convert.h +++ b/mindspore/ccsrc/transform/convert.h @@ -102,22 +102,15 @@ class DfGraphConvertor { explicit DfGraphConvertor(const AnfGraphPtr &anf_graph) : anf_graph_(anf_graph), df_graph_(std::make_shared(anf_graph_->ToString())) { #if (!defined ENABLE_GE) || (defined ENABLE_INFER) - auto it_training = anf_graph->flags().find("training"); - if (it_training != anf_graph->flags().end()) { - training_ = it_training->second; - } else { - training_ = false; - } + training_ = anf_graph->has_flag("training"); #else training_ = ENABLE_TRAIN; #endif - auto it_distribute = anf_graph->flags().find("broadcast_flag"); - if (it_distribute != anf_graph->flags().end()) { + distribute_ = anf_graph->has_flag("broadcast_flag"); + if (anf_graph->has_flag("broadcast_flag")) { ConfigManager::GetInstance().set_parallel_strategy(ParallelStrategy::DISTRIBUTION); - distribute_ = it_distribute->second; } else { ConfigManager::GetInstance().set_parallel_strategy(ParallelStrategy::ONE_DEVICE); - distribute_ = false; } MS_LOG(INFO) << "Create DfGraphConvertor with training: " << training_ << ", distribute: " << distribute_; diff --git a/mindspore/ccsrc/transform/op_declare.cc b/mindspore/ccsrc/transform/op_declare.cc index ee59d56003..7e5e69beb6 100644 --- a/mindspore/ccsrc/transform/op_declare.cc +++ b/mindspore/ccsrc/transform/op_declare.cc @@ -127,11 +127,12 @@ INPUT_MAP(Constant) = EMPTY_INPUT_MAP; ATTR_MAP(Constant) = {{"value", ATTR_DESC(value, AnyTraits())}}; OUTPUT_MAP(Constant) = {{0, OUTPUT_DESC(y)}}; -// ApplyMomentum -INPUT_MAP(ApplyMomentum) = { +// ApplyMomentumD +INPUT_MAP(ApplyMomentumD) = { {1, INPUT_DESC(var)}, {2, INPUT_DESC(accum)}, {3, INPUT_DESC(lr)}, {4, INPUT_DESC(grad)}, {5, INPUT_DESC(momentum)}}; -ATTR_MAP(ApplyMomentum) = {{"use_nesterov", ATTR_DESC(use_nesterov, AnyTraits())}}; -OUTPUT_MAP(ApplyMomentum) = {{0, OUTPUT_DESC(var)}}; +ATTR_MAP(ApplyMomentumD) = {{"use_nesterov", ATTR_DESC(use_nesterov, AnyTraits())}, + {"use_locking", ATTR_DESC(use_locking, AnyTraits())}}; +OUTPUT_MAP(ApplyMomentumD) = {{0, OUTPUT_DESC(var)}, {1, OUTPUT_DESC(accum)}}; // ScalarSummary INPUT_MAP(Summary) = {{2, INPUT_DESC(x)}}; @@ -472,6 +473,15 @@ ATTR_MAP(ApplyAdam) = {{"use_locking", ATTR_DESC(use_locking, AnyTraits()) {"use_nesterov", ATTR_DESC(use_nesterov, AnyTraits())}}; OUTPUT_MAP(ApplyAdam) = {{0, OUTPUT_DESC(var)}}; +// ApplyAdamD +INPUT_MAP(ApplyAdamD) = {{1, INPUT_DESC(var)}, {2, INPUT_DESC(m)}, {3, INPUT_DESC(v)}, + {4, INPUT_DESC(beta1_power)}, {5, INPUT_DESC(beta2_power)}, {6, INPUT_DESC(lr)}, + {7, INPUT_DESC(beta1)}, {8, INPUT_DESC(beta2)}, {9, INPUT_DESC(epsilon)}, + {10, INPUT_DESC(grad)}}; +ATTR_MAP(ApplyAdamD) = {{"use_locking", ATTR_DESC(use_locking, AnyTraits())}, + {"use_nesterov", ATTR_DESC(use_nesterov, AnyTraits())}}; +OUTPUT_MAP(ApplyAdamD) = {{0, OUTPUT_DESC(var)}, {1, OUTPUT_DESC(m)}, {2, OUTPUT_DESC(v)}}; + // Relu6 INPUT_MAP(Relu6) = {{1, INPUT_DESC(x)}}; ATTR_MAP(Relu6) = EMPTY_ATTR_MAP; @@ -515,6 +525,11 @@ INPUT_MAP(Unpack) = {{1, INPUT_DESC(x)}}; ATTR_MAP(Unpack) = {{"axis", ATTR_DESC(axis, AnyTraits())}, {"num", ATTR_DESC(num, AnyTraits())}}; DYN_OUTPUT_MAP(Unpack) = {{0, DYN_OUTPUT_DESC(y)}}; +// TensorScatterUpdate +INPUT_MAP(TensorScatterUpdate) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(indices)}, {3, INPUT_DESC(updates)}}; +ATTR_MAP(TensorScatterUpdate) = EMPTY_ATTR_MAP; +OUTPUT_MAP(TensorScatterUpdate) = {{0, OUTPUT_DESC(y)}}; + // ScatterUpdate INPUT_MAP(ScatterUpdate) = {{1, INPUT_DESC(var)}, {2, INPUT_DESC(indices)}, {3, INPUT_DESC(updates)}}; ATTR_MAP(ScatterUpdate) = {{"use_locking", ATTR_DESC(use_locking, AnyTraits())}}; @@ -1155,6 +1170,12 @@ ATTR_MAP(SparseApplyAdagradD) = {{"lr", ATTR_DESC(lr, AnyTraits())}, {"use_locking", ATTR_DESC(use_locking, AnyTraits())}}; OUTPUT_MAP(SparseApplyAdagradD) = {{0, OUTPUT_DESC(var)}, {1, OUTPUT_DESC(accum)}}; +// ApplyProximalAdagradD +INPUT_MAP(ApplyProximalAdagradD) = {{1, INPUT_DESC(var)}, {2, INPUT_DESC(accum)}, {3, INPUT_DESC(lr)}, + {4, INPUT_DESC(l1)}, {5, INPUT_DESC(l2)}, {6, INPUT_DESC(grad)}}; +ATTR_MAP(ApplyProximalAdagradD) = {{"use_locking", ATTR_DESC(use_locking, AnyTraits())}}; +OUTPUT_MAP(ApplyProximalAdagradD) = {{0, OUTPUT_DESC(var)}, {1, OUTPUT_DESC(accum)}}; + // SparseApplyFtrlD INPUT_MAP(SparseApplyFtrlD) = {{1, INPUT_DESC(var)}, {2, INPUT_DESC(accum)}, @@ -1188,12 +1209,12 @@ INPUT_MAP(Round) = {{1, INPUT_DESC(x)}}; ATTR_MAP(Round) = EMPTY_ATTR_MAP; OUTPUT_MAP(Round) = {{0, OUTPUT_DESC(y)}}; -// ApplyFtrl -INPUT_MAP(ApplyFtrl) = {{1, INPUT_DESC(var)}, {2, INPUT_DESC(accum)}, {3, INPUT_DESC(linear)}, - {4, INPUT_DESC(grad)}, {5, INPUT_DESC(lr)}, {6, INPUT_DESC(l1)}, - {7, INPUT_DESC(l2)}, {8, INPUT_DESC(lr_power)}}; -ATTR_MAP(ApplyFtrl) = {{"use_locking", ATTR_DESC(use_locking, AnyTraits())}}; -OUTPUT_MAP(ApplyFtrl) = {{0, OUTPUT_DESC(var)}}; +// ApplyFtrlD +INPUT_MAP(ApplyFtrlD) = {{1, INPUT_DESC(var)}, {2, INPUT_DESC(accum)}, {3, INPUT_DESC(linear)}, + {4, INPUT_DESC(grad)}, {5, INPUT_DESC(lr)}, {6, INPUT_DESC(l1)}, + {7, INPUT_DESC(l2)}, {8, INPUT_DESC(lr_power)}}; +ATTR_MAP(ApplyFtrlD) = {{"use_locking", ATTR_DESC(use_locking, AnyTraits())}}; +OUTPUT_MAP(ApplyFtrlD) = {{0, OUTPUT_DESC(var)}, {1, OUTPUT_DESC(accum)}, {2, OUTPUT_DESC(linear)}}; // Diag INPUT_MAP(Diag) = {{1, INPUT_DESC(x)}}; @@ -1256,6 +1277,19 @@ ATTR_MAP(CTCLoss) = { {"ignore_longer_outputs_than_inputs", ATTR_DESC(ignore_longer_outputs_than_inputs, AnyTraits())}}; OUTPUT_MAP(CTCLoss) = {{0, OUTPUT_DESC(loss)}, {1, OUTPUT_DESC(gradient)}}; +// AscendQuant +INPUT_MAP(AscendQuant) = {{1, INPUT_DESC(x)}}; +ATTR_MAP(AscendQuant) = {{"scale", ATTR_DESC(scale, AnyTraits())}, + {"offset", ATTR_DESC(offset, AnyTraits())}, + {"sqrt_mode", ATTR_DESC(sqrt_mode, AnyTraits())}, + {"round_mode", ATTR_DESC(round_mode, AnyTraits())}}; +OUTPUT_MAP(AscendQuant) = {{0, OUTPUT_DESC(y)}}; + +// AscendDequant +INPUT_MAP(AscendDequant) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(deq_scale)}}; +ATTR_MAP(AscendDequant) = {{"sqrt_mode", ATTR_DESC(sqrt_mode, AnyTraits())}, + {"relu_flag", ATTR_DESC(relu_flag, AnyTraits())}}; +OUTPUT_MAP(AscendDequant) = {{0, OUTPUT_DESC(y)}}; #ifdef ENABLE_GE // Print INPUT_MAP(Print) = EMPTY_INPUT_MAP; diff --git a/mindspore/ccsrc/transform/op_declare.h b/mindspore/ccsrc/transform/op_declare.h index 3d1b6e7a7f..f64dc7b671 100755 --- a/mindspore/ccsrc/transform/op_declare.h +++ b/mindspore/ccsrc/transform/op_declare.h @@ -120,6 +120,8 @@ DECLARE_OP_ADAPTER(ResizeNearestNeighborV2Grad) DECLARE_OP_USE_OUTPUT(ResizeNearestNeighborV2Grad) DECLARE_OP_ADAPTER(ApplyAdam) DECLARE_OP_USE_OUTPUT(ApplyAdam) +DECLARE_OP_ADAPTER(ApplyAdamD) +DECLARE_OP_USE_OUTPUT(ApplyAdamD) DECLARE_OP_ADAPTER(Relu6) DECLARE_OP_USE_OUTPUT(Relu6) DECLARE_OP_ADAPTER(Relu6Grad) @@ -132,6 +134,8 @@ DECLARE_OP_ADAPTER(ZerosLike) DECLARE_OP_USE_OUTPUT(ZerosLike) DECLARE_OP_ADAPTER(OnesLike) DECLARE_OP_USE_OUTPUT(OnesLike) +DECLARE_OP_ADAPTER(TensorScatterUpdate) +DECLARE_OP_USE_OUTPUT(TensorScatterUpdate) DECLARE_OP_ADAPTER(ScatterUpdate) DECLARE_OP_USE_OUTPUT(ScatterUpdate) DECLARE_OP_ADAPTER(ScatterNdUpdate) @@ -319,8 +323,8 @@ DECLARE_OP_ADAPTER(Assign) DECLARE_OP_USE_OUTPUT(Assign) DECLARE_OP_ADAPTER(Constant) DECLARE_OP_USE_OUTPUT(Constant) -DECLARE_OP_ADAPTER(ApplyMomentum) -DECLARE_OP_USE_OUTPUT(ApplyMomentum) +DECLARE_OP_ADAPTER(ApplyMomentumD) +DECLARE_OP_USE_OUTPUT(ApplyMomentumD) // ** Summary Operations ** DECLARE_OP_ADAPTER(Summary) @@ -442,6 +446,8 @@ DECLARE_OP_ADAPTER(BinaryCrossEntropyGrad) DECLARE_OP_USE_OUTPUT(BinaryCrossEntropyGrad) DECLARE_OP_ADAPTER(SparseApplyAdagradD) DECLARE_OP_USE_OUTPUT(SparseApplyAdagradD) +DECLARE_OP_ADAPTER(ApplyProximalAdagradD) +DECLARE_OP_USE_OUTPUT(ApplyProximalAdagradD) DECLARE_OP_ADAPTER(SpaceToDepth) DECLARE_OP_USE_OUTPUT(SpaceToDepth) DECLARE_OP_ADAPTER(DepthToSpace) @@ -452,8 +458,8 @@ DECLARE_OP_ADAPTER(LarsV2Update) DECLARE_OP_USE_OUTPUT(LarsV2Update) DECLARE_OP_ADAPTER(Round) DECLARE_OP_USE_OUTPUT(Round) -DECLARE_OP_ADAPTER(ApplyFtrl) -DECLARE_OP_USE_OUTPUT(ApplyFtrl) +DECLARE_OP_ADAPTER(ApplyFtrlD) +DECLARE_OP_USE_OUTPUT(ApplyFtrlD) DECLARE_OP_ADAPTER(SparseApplyFtrlD) DECLARE_OP_USE_OUTPUT(SparseApplyFtrlD) DECLARE_OP_ADAPTER(Diag) @@ -475,6 +481,10 @@ DECLARE_OP_ADAPTER(L2Loss) DECLARE_OP_USE_OUTPUT(L2Loss) DECLARE_OP_ADAPTER(CTCLoss) DECLARE_OP_USE_OUTPUT(CTCLoss) +DECLARE_OP_ADAPTER(AscendQuant) +DECLARE_OP_USE_OUTPUT(AscendQuant) +DECLARE_OP_ADAPTER(AscendDequant) +DECLARE_OP_USE_OUTPUT(AscendDequant) #ifdef ENABLE_GE DECLARE_OP_ADAPTER(Print) DECLARE_OP_USE_DYN_INPUT(Print) diff --git a/mindspore/ccsrc/utils/CMakeLists.txt b/mindspore/ccsrc/utils/CMakeLists.txt index 71d68729b9..72f698a97e 100644 --- a/mindspore/ccsrc/utils/CMakeLists.txt +++ b/mindspore/ccsrc/utils/CMakeLists.txt @@ -5,5 +5,11 @@ if (NOT ENABLE_GE) list(REMOVE_ITEM _UTILS_SRC_LIST ${_UTILS_GE_SRC_FILES}) endif () +file(GLOB_RECURSE _UTILS_LITE_SRC_FILES + ./load_onnx/anf_converter.cc + ./load_onnx/anf_model_parser.cc + ) +list(REMOVE_ITEM _UTILS_SRC_LIST ${_UTILS_LITE_SRC_FILES}) + set_property(SOURCE ${_UTILS_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_UTILS) add_library(_mindspore_utils_obj OBJECT ${_UTILS_SRC_LIST}) diff --git a/mindspore/ccsrc/utils/base_ref_utils.cc b/mindspore/ccsrc/utils/base_ref_utils.cc new file mode 100644 index 0000000000..87089c6266 --- /dev/null +++ b/mindspore/ccsrc/utils/base_ref_utils.cc @@ -0,0 +1,55 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "utils/base_ref_utils.h" +#include "include/ms_tensor.h" +#include "ir/tensor.h" + +namespace mindspore { +void IterateFindTensor(std::vector> *msTensors, const VectorRef &ref_list) { + for (size_t i = 0; i < ref_list.size(); ++i) { + if (utils::isa(ref_list[i])) { + auto tensor_ptr = utils::cast>(ref_list[i]); + MS_EXCEPTION_IF_NULL(tensor_ptr); + auto tensor = new inference::Tensor(tensor_ptr); + msTensors->emplace_back(std::shared_ptr(tensor)); + } else if (utils::isa(ref_list[i])) { + auto ref_iter = utils::cast(ref_list[i]); + IterateFindTensor(msTensors, ref_iter); + } else { + MS_LOG(EXCEPTION) << "The output is not a tensor"; + } + } +} + +std::vector> TransformVectorRefToMultiTensor(const VectorRef &base_ref) { + std::vector> msTensors; + if (utils::isa(base_ref)) { + auto ref_list = utils::cast(base_ref); + IterateFindTensor(&msTensors, ref_list); + } else if (utils::isa(base_ref)) { + auto tensor_ptr = utils::cast>(base_ref); + MS_EXCEPTION_IF_NULL(tensor_ptr); + auto tensor = new inference::Tensor(tensor_ptr); + msTensors.emplace_back(std::shared_ptr(tensor)); + } else { + MS_LOG(EXCEPTION) << "The output is not a base ref list or a tensor!"; + } + return msTensors; +} +} // namespace mindspore diff --git a/mindspore/ccsrc/utils/base_ref_utils.h b/mindspore/ccsrc/utils/base_ref_utils.h new file mode 100644 index 0000000000..2503eab738 --- /dev/null +++ b/mindspore/ccsrc/utils/base_ref_utils.h @@ -0,0 +1,27 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "utils/base_ref.h" +#include "include/ms_tensor.h" + +#ifndef MINDSPORE_CCSRC_UTILS_BASE_REF_UTILS_H +#define MINDSPORE_CCSRC_UTILS_BASE_REF_UTILS_H +namespace mindspore { +std::vector> TransformVectorRefToMultiTensor(const VectorRef &base_ref); +} // namespace mindspore +#endif // MINDSPORE_CCSRC_UTILS_BASE_REF_UTILS_H diff --git a/mindspore/ccsrc/utils/callbacks.cc b/mindspore/ccsrc/utils/callbacks.cc index ad9751c332..427cc5e568 100644 --- a/mindspore/ccsrc/utils/callbacks.cc +++ b/mindspore/ccsrc/utils/callbacks.cc @@ -26,9 +26,9 @@ namespace mindspore { namespace callbacks { -const char PYTHON_MOD_CALLBACK_MODULE[] = "mindspore.train.callback"; -const char PYTHON_FUN_PROCESS_CHECKPOINT[] = "_checkpoint_cb_for_save_op"; -const char PYTHON_FUN_PROCESS_SUMMARY[] = "_summary_cb_for_save_op"; +const char PYTHON_MOD_CALLBACK_MODULE[] = "mindspore.train.callback._callback"; +const char PYTHON_FUN_PROCESS_CHECKPOINT[] = "checkpoint_cb_for_save_op"; +const char PYTHON_FUN_PROCESS_SUMMARY[] = "summary_cb_for_save_op"; const char kSummary[] = "Summary"; const char kCheckPoint[] = "Save"; const int ONE_SHAPE = 1; diff --git a/mindspore/ccsrc/utils/callbacks_ge.cc b/mindspore/ccsrc/utils/callbacks_ge.cc index 151b78d010..3174ec4b15 100644 --- a/mindspore/ccsrc/utils/callbacks_ge.cc +++ b/mindspore/ccsrc/utils/callbacks_ge.cc @@ -25,9 +25,9 @@ namespace mindspore { namespace callbacks { -const char PYTHON_MOD_CALLBACK_MODULE[] = "mindspore.train.callback"; -const char PYTHON_FUN_PROCESS_CHECKPOINT[] = "_checkpoint_cb_for_save_op"; -const char PYTHON_FUN_PROCESS_SUMMARY[] = "_summary_cb_for_save_op"; +const char PYTHON_MOD_CALLBACK_MODULE[] = "mindspore.train.callback._callback"; +const char PYTHON_FUN_PROCESS_CHECKPOINT[] = "checkpoint_cb_for_save_op"; +const char PYTHON_FUN_PROCESS_SUMMARY[] = "summary_cb_for_save_op"; const char kSummary[] = "Summary"; const char kCheckPoint[] = "Save"; const int ONE_SHAPE = 1; diff --git a/mindspore/ccsrc/utils/context/ms_context.cc b/mindspore/ccsrc/utils/context/ms_context.cc index 0aacf2d2a1..35e053dd53 100644 --- a/mindspore/ccsrc/utils/context/ms_context.cc +++ b/mindspore/ccsrc/utils/context/ms_context.cc @@ -74,6 +74,7 @@ MsContext::MsContext(const std::string &policy, const std::string &target) { precompile_only_ = false; auto_mixed_precision_flag_ = false; enable_pynative_infer_ = false; + enable_pynative_hook_ = false; enable_dynamic_mem_pool_ = true; graph_memory_max_size_ = "0"; variable_memory_max_size_ = "0"; @@ -81,6 +82,9 @@ MsContext::MsContext(const std::string &policy, const std::string &target) { profiling_mode_ = false; profiling_options_ = "training_trace"; check_bprop_flag_ = false; + max_device_memory_ = kDefaultMaxDeviceMemory; + print_file_path_ = ""; + enable_graph_kernel_ = false; } std::shared_ptr MsContext::GetInstance() { diff --git a/mindspore/ccsrc/utils/context/ms_context.h b/mindspore/ccsrc/utils/context/ms_context.h index 9a91f391c9..9afe1fa5aa 100644 --- a/mindspore/ccsrc/utils/context/ms_context.h +++ b/mindspore/ccsrc/utils/context/ms_context.h @@ -41,9 +41,12 @@ const int kPynativeMode = 1; const char kCPUDevice[] = "CPU"; const char kGPUDevice[] = "GPU"; const char kAscendDevice[] = "Ascend"; +const char kDavinciInferenceDevice[] = "AscendInference"; const char kDavinciDevice[] = "Davinci"; const char KNpuLog[] = "_npu_log"; const std::set kTargetSet = {kCPUDevice, kGPUDevice, kAscendDevice, kDavinciDevice}; +// The default max available device memory is 1024GB. +const float kDefaultMaxDeviceMemory = 1024; class MsContext { public: @@ -62,6 +65,9 @@ class MsContext { bool enable_pynative_infer() const { return enable_pynative_infer_; } void set_enable_pynative_infer(bool enable_pynative_infer) { enable_pynative_infer_ = enable_pynative_infer; } + bool enable_pynative_hook() const { return enable_pynative_hook_; } + void set_enable_pynative_hook(bool enable_pynative_hook) { enable_pynative_hook_ = enable_pynative_hook; } + bool enable_task_sink() const { return enable_task_sink_; } void set_precompile_only(bool precompile_only) { precompile_only_ = precompile_only; } @@ -92,7 +98,7 @@ class MsContext { bool ir_fusion_flag() const { return ir_fusion_flag_; } bool loop_sink_flag() const { return enable_loop_sink_; } - + void set_loop_sink_flag(bool enable_loop_sink) { enable_loop_sink_ = enable_loop_sink; } void set_enable_mem_reuse(bool enable_mem_reuse) { enable_mem_reuse_ = enable_mem_reuse; } bool enable_mem_reuse() const { return enable_mem_reuse_; } @@ -135,6 +141,10 @@ class MsContext { variable_memory_max_size_ = variable_memory_max_size; } + const std::string &variable_memory_max_size() const { return variable_memory_max_size_; } + + const std::string &graph_memory_max_size() const { return graph_memory_max_size_; } + void set_enable_profiling(bool flag) { profiling_mode_ = flag; } bool enable_profiling() const { return profiling_mode_; } @@ -142,6 +152,14 @@ class MsContext { std::string profiling_options() const { return profiling_options_; } bool check_bprop_flag() const { return check_bprop_flag_; } void set_check_bprop_flag(bool check_bprop_flag) { check_bprop_flag_ = check_bprop_flag; } + void set_print_file_path(const std::string &file) { print_file_path_ = file; } + const std::string &print_file_path() const { return print_file_path_; } + + float max_device_memory() const { return max_device_memory_; } + void set_max_device_memory(float max_device_memory) { max_device_memory_ = max_device_memory; } + + void set_enable_graph_kernel(bool enable_graph_kernel) { enable_graph_kernel_ = enable_graph_kernel; } + bool enable_graph_kernel() const { return enable_graph_kernel_; } private: MsContext(const std::string &backend_policy, const std::string &target); @@ -156,6 +174,7 @@ class MsContext { uint32_t device_id_; int execution_mode_; bool enable_pynative_infer_; + bool enable_pynative_hook_; bool save_graphs_flag_; std::string save_graphs_path_; uint32_t tsd_ref_; @@ -182,6 +201,9 @@ class MsContext { bool profiling_mode_; std::string profiling_options_; bool check_bprop_flag_; + float max_device_memory_; + std::string print_file_path_; + bool enable_graph_kernel_; }; } // namespace mindspore diff --git a/mindspore/ccsrc/utils/convert_utils.cc b/mindspore/ccsrc/utils/convert_utils.cc index 45c292d545..6e28e38ed1 100644 --- a/mindspore/ccsrc/utils/convert_utils.cc +++ b/mindspore/ccsrc/utils/convert_utils.cc @@ -30,6 +30,7 @@ #include "pipeline/parse/parse_base.h" #include "ir/value.h" #include "ir/tensor.h" +#include "ir/param_value_py.h" #include "utils/base_ref_extends.h" namespace mindspore { @@ -105,7 +106,7 @@ py::object ValuePtrToPyData(const ValuePtr &value) { } ret = rets; } else if (value->isa()) { - ret = parse::python_adapter::CallPyFn(parse::PYTHON_MOD_PARSE_MODULE, parse::PYTHON_PARSE_CLASS_ELLIPSIS); + ret = py::ellipsis(); } else if (value->isa()) { auto slice = value->cast(); auto start = ValuePtrToPyData(slice->start()); @@ -426,7 +427,17 @@ bool IsGraphOutputValueNodeOrParameter(const AnfNodePtr &output, const py::tuple MS_EXCEPTION(UnknownError) << "Index " << index << " equal or larger than args size " << args.size() << " add Parameter count " << func_graph->hyper_param_count() << "."; } - *ret_val = args[index]; + if (index < args.size()) { + *ret_val = args[index]; + } else { + auto param = dyn_cast(params[index]); + MS_EXCEPTION_IF_NULL(param); + if (!param->has_default()) { + MS_LOG(EXCEPTION) << "Can not determine value of Parameter " << index << " (" << param->name() << ")"; + } + auto param_value = std::dynamic_pointer_cast(param->default_param()); + *ret_val = param_value->value().attr("data"); + } return true; } return false; diff --git a/mindspore/ccsrc/utils/graph_utils.h b/mindspore/ccsrc/utils/graph_utils.h index 0b49615523..93edda3e34 100644 --- a/mindspore/ccsrc/utils/graph_utils.h +++ b/mindspore/ccsrc/utils/graph_utils.h @@ -39,6 +39,7 @@ namespace mindspore { enum IncludeType { FOLLOW, NOFOLLOW, EXCLUDE }; using IncludeFunc = std::function; +using FilterFunc = std::function; using SuccFunc = std::function(AnfNodePtr)>; using SearchFunc = std::function(const AnfNodePtr &, const IncludeFunc &)>; @@ -58,6 +59,13 @@ std::vector DeepScopedGraphSearch(const AnfNodePtr &root, const Incl std::vector DeepUsedGraphSearch(const AnfNodePtr &root, const IncludeFunc &include = AlwaysInclude); std::vector DeepLinkedGraphSearch(const AnfNodePtr &root, const IncludeFunc &include = AlwaysInclude); +std::vector DeepScopedGraphSearchWithFilter(const AnfNodePtr &root, const IncludeFunc &include, + const FilterFunc &filter); + +class FuncGraphManager; +using FuncGraphManagerPtr = std::shared_ptr; +std::vector DeepUsersSearch(const AnfNodePtr &root, const IncludeFunc &include, + const FuncGraphManagerPtr &mng); std::vector TopoSort(const AnfNodePtr &root, const SuccFunc &succ = SuccIncoming, const IncludeFunc &include = AlwaysInclude); diff --git a/mindspore/ccsrc/utils/graph_utils_extends.cc b/mindspore/ccsrc/utils/graph_utils_extends.cc index 7c3991b638..0740c24236 100644 --- a/mindspore/ccsrc/utils/graph_utils_extends.cc +++ b/mindspore/ccsrc/utils/graph_utils_extends.cc @@ -26,6 +26,7 @@ #include #include "ir/visitor.h" +#include "ir/manager.h" #include "ir/func_graph.h" #include "debug/label.h" #include "utils/log_adapter.h" @@ -37,7 +38,8 @@ namespace mindspore { namespace { class DeepFirstSearcher : public AnfVisitor { public: - explicit DeepFirstSearcher(const IncludeFunc &include) : include_(include) {} + explicit DeepFirstSearcher(const IncludeFunc &include, const FilterFunc &filter = nullptr) + : include_(include), filter_(filter) {} ~DeepFirstSearcher() override = default; std::vector Search(const AnfNodePtr &root) { @@ -61,8 +63,9 @@ class DeepFirstSearcher : public AnfVisitor { if (incl == EXCLUDE) { return; } - - res_.push_back(node); + if (filter_ == nullptr || !filter_(node)) { + res_.push_back(node); + } if (incl == FOLLOW) { AnfVisitor::Visit(node); } @@ -71,6 +74,7 @@ class DeepFirstSearcher : public AnfVisitor { private: size_t seen_{0}; IncludeFunc include_; + FilterFunc filter_; std::vector res_{}; }; @@ -158,12 +162,36 @@ class DeepLinkedGraphSearcher : public DeepFirstSearcher { void Visit(const ValueNodePtr &) override {} }; + +class DeepUsersSearcher : public DeepFirstSearcher { + public: + explicit DeepUsersSearcher(const IncludeFunc &include, const FuncGraphManagerPtr &mng) + : DeepFirstSearcher(include), mng_(mng) {} + ~DeepUsersSearcher() override = default; + + void Visit(const CNodePtr &cnode) override { + auto &users = mng_->node_users()[cnode]; + for (auto iter = users.begin(); iter != users.end(); ++iter) { + DeepFirstSearcher::Visit(iter->first); + } + } + void Visit(const ValueNodePtr &) override {} + + private: + FuncGraphManagerPtr mng_; +}; } // namespace +// include for if expand the node the search, filter for if put the node to results. std::vector DeepScopedGraphSearch(const AnfNodePtr &root, const IncludeFunc &include) { return DeepScopedGraphSearcher(include).Search(root); } +std::vector DeepScopedGraphSearchWithFilter(const AnfNodePtr &root, const IncludeFunc &include, + const FilterFunc &filter) { + return DeepFirstSearcher(include, filter).Search(root); +} + std::vector DeepUsedGraphSearch(const AnfNodePtr &root, const IncludeFunc &include) { return DeepUsedGraphSearcher(include).Search(root); } @@ -171,4 +199,9 @@ std::vector DeepUsedGraphSearch(const AnfNodePtr &root, const Includ std::vector DeepLinkedGraphSearch(const AnfNodePtr &root, const IncludeFunc &include) { return DeepLinkedGraphSearcher(include).Search(root); } + +std::vector DeepUsersSearch(const AnfNodePtr &root, const IncludeFunc &include, + const FuncGraphManagerPtr &mng) { + return DeepUsersSearcher(include, mng).Search(root); +} } // namespace mindspore diff --git a/mindspore/ccsrc/utils/lineage.proto b/mindspore/ccsrc/utils/lineage.proto new file mode 100644 index 0000000000..dec6f9a3f6 --- /dev/null +++ b/mindspore/ccsrc/utils/lineage.proto @@ -0,0 +1,129 @@ +// Copyright 2020 Huawei Technologies Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mindspore.irpb; +option cc_enable_arenas = true; + + +// Event Protocol buffer, Top define +message LineageEvent { + // Timestamp + required double wall_time = 1; + + // The step of train. + optional int64 step = 2; + + oneof what { + // An event file was started, with the specified version. + // Now version is "MindSpore.Event:1" + string version = 3; + + // Train lineage + TrainLineage train_lineage = 6; + + // Evaluation lineage + EvaluationLineage evaluation_lineage = 7; + + // Dataset graph + DatasetGraph dataset_graph = 9; + + // User defined info + UserDefinedInfo user_defined_info = 10; + } +} + +// User defined info +message UserDefinedInfo{ + // repeated user defined info + repeated UserDefinedInfo user_info = 1; + + // key/value which contains both scalar and dict + map map_dict = 2; + map map_int32 = 3; + map map_str = 4; + map map_double = 5; +} + +// TrainLineage records infos of a train. +message TrainLineage{ + message HyperParameters{ + optional string optimizer = 1; + optional float learning_rate = 2; + optional string loss_function = 3; + optional int32 epoch = 4; + optional string parallel_mode = 5; + optional int32 device_num = 6; + optional int32 batch_size = 8; + } + + message TrainDataset{ + optional string train_dataset_path = 1; + optional int32 train_dataset_size = 2; + } + + message Algorithm{ + optional string network = 1; + optional float loss = 2; + } + + message Model{ + optional string path = 3; + optional int64 size = 4; + } + + optional HyperParameters hyper_parameters = 1; + optional TrainDataset train_dataset = 2; + optional Algorithm algorithm = 3; + optional Model model = 4; +} + +//EvalLineage records infos of evaluation. +message EvaluationLineage{ + message ValidDataset{ + optional string valid_dataset_path = 1; + optional int32 valid_dataset_size = 2; + } + + optional string metric = 2; + optional ValidDataset valid_dataset = 3; +} + + +// DatasetGraph +message DatasetGraph { + repeated DatasetGraph children = 1; + optional OperationParameter parameter = 2; + repeated Operation operations = 3; + optional Operation sampler = 4; +} + +message Operation { + optional OperationParameter operationParam = 1; + repeated int32 size = 2; + repeated float weights = 3; +} + +message OperationParameter{ + map mapStr = 1; + map mapStrList = 2; + map mapBool = 3; + map mapInt = 4; + map mapDouble = 5; +} + +message StrList { + repeated string strValue = 1; +} diff --git a/mindspore/ccsrc/utils/load_onnx/anf_converter.cc b/mindspore/ccsrc/utils/load_onnx/anf_converter.cc new file mode 100644 index 0000000000..ad87d6ae8f --- /dev/null +++ b/mindspore/ccsrc/utils/load_onnx/anf_converter.cc @@ -0,0 +1,115 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "utils/load_onnx/anf_model_parser.h" +#include "utils/load_onnx/anf_converter.h" +#include "google/protobuf/io/zero_copy_stream_impl.h" +#include "proto/onnx.pb.h" +#include "utils/log_adapter.h" + +namespace mindspore { +namespace lite { + +const char WHITESPACE[] = "\t\n\v\f\r "; +const int FLAG_PREFIX_LEN = 2; + +void AnfConverter::Trim(std::string *input) { + if (input == nullptr) { + return; + } + if (input->empty()) { + return; + } + input->erase(0, input->find_first_not_of(WHITESPACE)); + input->erase(input->find_last_not_of(WHITESPACE) + 1); +} + +int AnfConverter::ValidateFileStr(const std::string &modelFile, std::string fileType) { + if (modelFile.size() > fileType.size()) { + if (modelFile.substr(modelFile.size() - fileType.size()) == fileType) { + return 0; + } else { + return 1; + } + } else { + return 1; + } +} + +bool AnfConverter::ReadOnnxFromBinary(const std::string &modelFile, google::protobuf::Message *onnx_model) { + std::unique_ptr onnx_file(new (std::nothrow) char[PATH_MAX]{0}); + int fd = open(onnx_file.get(), O_RDONLY); + google::protobuf::io::FileInputStream input(fd); + google::protobuf::io::CodedInputStream code_input(&input); + code_input.SetTotalBytesLimit(INT_MAX, 536870912); + bool ret = onnx_model->ParseFromCodedStream(&code_input); + if (!ret) { + MS_LOG(ERROR) << "load onnx file failed"; + return false; + } + (void)close(fd); + MS_LOG(INFO) << "enter ReadProtoFromBinary success!" << std::endl; + return true; +} + +std::shared_ptr AnfConverter::RunAnfConverter(const std::string &file_path) { + std::string modelFile; + + std::string tmp = file_path; + Trim(&tmp); + const std::string flagItem(tmp); + + size_t pos = flagItem.find_first_of("="); + if (pos == std::string::npos) { + MS_LOG(ERROR) << "Trans data not support input format!"; + } else { + modelFile = flagItem.substr(pos + 1); + std::cout << "input protobuf file path is: " << flagItem.substr(pos + 1) << std::endl; + } + + if (ValidateFileStr(modelFile, ".pb") != 0) { + MS_LOG(EXCEPTION) << "INPUT ILLEGAL: modelFile must be *.pb"; + } + + onnx::ModelProto model_; + ReadOnnxFromBinary(modelFile, &model_); + MSANFModelParser model_parser; + FuncGraphPtr dstgraph_ptr = model_parser.Parse(model_); + return dstgraph_ptr; +} + +std::shared_ptr AnfConverter::RunAnfConverter(const char *buf, const size_t buf_size) { + Py_Initialize(); + MS_EXCEPTION_IF_NULL(buf); + std::string str((const char *)buf, buf_size); + onnx::ModelProto model_; + if (!model_.ParseFromString(str)) { + MS_LOG(EXCEPTION) << "Parse model from buffer fail!"; + } + MSANFModelParser model_parser; + FuncGraphPtr dstgraph_ptr = model_parser.Parse(model_); + return dstgraph_ptr; +} +} // namespace lite +} // namespace mindspore diff --git a/mindspore/ccsrc/utils/load_onnx/anf_converter.h b/mindspore/ccsrc/utils/load_onnx/anf_converter.h new file mode 100644 index 0000000000..4f5fe3971f --- /dev/null +++ b/mindspore/ccsrc/utils/load_onnx/anf_converter.h @@ -0,0 +1,39 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_UTILS_LOAD_ONNX_ANF_CONVERTER_H +#define MINDSPORE_CCSRC_UTILS_LOAD_ONNX_ANF_CONVERTER_H +#include +#include +#include "google/protobuf/io/zero_copy_stream_impl.h" +#include "proto/onnx.pb.h" +#include "ir/func_graph.h" + +namespace mindspore { +namespace lite { +class AnfConverter { + public: + static std::shared_ptr RunAnfConverter(const std::string &file_path); + static std::shared_ptr RunAnfConverter(const char *buf, const size_t buf_size); + + private: + static void Trim(std::string *input); + static int ValidateFileStr(const std::string &modelFile, std::string fileType); + static bool ReadOnnxFromBinary(const std::string &modelFile, google::protobuf::Message *onnx_model); +}; +} // namespace lite +} // namespace mindspore +#endif diff --git a/mindspore/ccsrc/utils/load_onnx/anf_model_parser.cc b/mindspore/ccsrc/utils/load_onnx/anf_model_parser.cc new file mode 100644 index 0000000000..e44eb23001 --- /dev/null +++ b/mindspore/ccsrc/utils/load_onnx/anf_model_parser.cc @@ -0,0 +1,571 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/load_onnx/anf_model_parser.h" +#include +#include +#include +#include +#include +#include "google/protobuf/io/zero_copy_stream_impl.h" +#include "ir/tensor.h" +#include "ir/param_value_py.h" +#include "operator/ops.h" +#include "pipeline/static_analysis/abstract_value.h" +#include "proto/onnx.pb.h" +#include "utils/log_adapter.h" + +using std::string; + +namespace mindspore { +namespace lite { +static constexpr char kConstantValueNode[] = "Constant"; +static constexpr char kCNodeShapeAttr[] = "shape"; +static constexpr char kCNodeShape1Attr[] = "shape1"; +static constexpr char kCNodeShape2Attr[] = "shape2"; +enum ParseForm : int { + FORM_PARSE_TYPE = 0, + FORM_PARSE_SCALAR = 1, + FORM_PARSE_TENSOR = 2, +}; + +static std::map kParseTypeSwitchMap{ + {"type", FORM_PARSE_TYPE}, {"scalar", FORM_PARSE_SCALAR}, {"tensor", FORM_PARSE_TENSOR}}; + +static std::unordered_map kDefaultValueSwitchMap{ + {onnx::TensorProto_DataType_BOOL, kNumberTypeBool}, {onnx::TensorProto_DataType_INT8, kNumberTypeInt8}, + {onnx::TensorProto_DataType_INT16, kNumberTypeInt16}, {onnx::TensorProto_DataType_INT32, kNumberTypeInt32}, + {onnx::TensorProto_DataType_INT64, kNumberTypeInt64}, {onnx::TensorProto_DataType_UINT8, kNumberTypeUInt8}, + {onnx::TensorProto_DataType_UINT16, kNumberTypeUInt16}, {onnx::TensorProto_DataType_UINT32, kNumberTypeUInt32}, + {onnx::TensorProto_DataType_UINT64, kNumberTypeUInt64}, {onnx::TensorProto_DataType_FLOAT16, kNumberTypeFloat16}, + {onnx::TensorProto_DataType_FLOAT, kNumberTypeFloat32}, {onnx::TensorProto_DataType_DOUBLE, kNumberTypeFloat64}, + {onnx::TensorProto_DataType_STRING, kObjectTypeString}, +}; + +#define PARSE_ONNXATTR_IN_SCALAR_FORM(type, valuetype) \ + void ParseAttrInScalar_##type##_##valuetype(const PrimitivePtr &prim, const std::string &attr_name, \ + const onnx::TensorProto &attr_tensor) { \ + MS_EXCEPTION_IF_NULL(prim); \ + std::vector attr_value_vec; \ + for (int i = 0; i < attr_tensor.type##_data_size(); ++i) { \ + auto value = static_cast(attr_tensor.type##_data(i)); \ + attr_value_vec.push_back(MakeValue(value)); \ + } \ + if (attr_value_vec.size() == 1) { \ + prim->AddAttr(attr_name, attr_value_vec[0]); \ + } else { \ + prim->AddAttr(attr_name, std::make_shared(attr_value_vec)); \ + } \ + } + +PARSE_ONNXATTR_IN_SCALAR_FORM(double, double) +PARSE_ONNXATTR_IN_SCALAR_FORM(float, float) +PARSE_ONNXATTR_IN_SCALAR_FORM(string, string) +PARSE_ONNXATTR_IN_SCALAR_FORM(int32, int32) +PARSE_ONNXATTR_IN_SCALAR_FORM(int32, bool) +PARSE_ONNXATTR_IN_SCALAR_FORM(int64, int64) +PARSE_ONNXATTR_IN_SCALAR_FORM(uint64, uint64) + +bool MSANFModelParser::BuildParameterForFuncGraph(const ParameterPtr &node, const onnx::ValueInfoProto &value_proto) { + MS_EXCEPTION_IF_NULL(node); + if (!value_proto.has_type() || !value_proto.has_name()) { + MS_LOG(ERROR) << "onnx ValueInfoProto has no type or name! "; + return false; + } + node->set_name(value_proto.name()); + const auto &type_proto = value_proto.type(); + if (!type_proto.has_tensor_type()) { + MS_LOG(ERROR) << "onnx TypeProto has no tesor_type! "; + return false; + } + const onnx::TypeProto_Tensor &tensor_typeproto = type_proto.tensor_type(); + if (!tensor_typeproto.has_elem_type() || !tensor_typeproto.has_shape()) { + MS_LOG(ERROR) << "onnx TypeProto_Tensor has no elem_type or shape! "; + return false; + } + const onnx::TensorShapeProto &tensor_shape = tensor_typeproto.shape(); + std::vector shape; + for (int i = 0; i < tensor_shape.dim_size(); ++i) { + shape.push_back(tensor_shape.dim(i).dim_value()); + } + + if (kDefaultValueSwitchMap.find(tensor_typeproto.elem_type()) == kDefaultValueSwitchMap.end()) { + MS_LOG(ERROR) << "onnx TypeProto_Tensor elem_type is not support yet!"; + return false; + } + + tensor::TensorPtr tensor_info = + std::make_shared(kDefaultValueSwitchMap[tensor_typeproto.elem_type()], shape); + MS_EXCEPTION_IF_NULL(tensor_info); + auto tensor_abstract = tensor_info->ToAbstract(); + MS_EXCEPTION_IF_NULL(tensor_abstract); + node->set_abstract(tensor_abstract); + + if (default_para_map_.find(value_proto.name()) != default_para_map_.end()) { + const onnx::TensorProto initialize_proto = default_para_map_[value_proto.name()]; + std::string initial_data = initialize_proto.raw_data(); + auto *tensor_data_buf = reinterpret_cast(tensor_info->data_c(true)); + MS_EXCEPTION_IF_NULL(tensor_data_buf); + memcpy_s(tensor_data_buf, tensor_info->data().nbytes(), initial_data.data(), initial_data.size()); + + py::array array_data = tensor_info->data(); + ParamValuePyPtr para_value_ptr = std::make_shared(); + MS_EXCEPTION_IF_NULL(para_value_ptr); + para_value_ptr->set_value(array_data); + node->set_default_param(para_value_ptr); + } + anfnode_build_map_[value_proto.name()] = node; + return true; +} + +bool MSANFModelParser::ImportParametersForGraph(const FuncGraphPtr &outputFuncGraph, + const onnx::GraphProto &importProto) { + MS_EXCEPTION_IF_NULL(outputFuncGraph); + MS_LOG(INFO) << "Parameters had default paramerer size is: " << importProto.initializer_size(); + + for (int i = 0; i < importProto.initializer_size(); ++i) { + const onnx::TensorProto &initializer_proto = importProto.initializer(i); + if (!initializer_proto.has_name()) { + MS_LOG(ERROR) << "initializer vector of onnx GraphProto has no name at index: " << i; + return false; + } + default_para_map_[initializer_proto.name()] = initializer_proto; + } + + MS_LOG(INFO) << "all parameters size: " << importProto.input_size(); + for (int i = 0; i < importProto.input_size(); ++i) { + const onnx::ValueInfoProto &input_proto = importProto.input(i); + if (!BuildParameterForFuncGraph(outputFuncGraph->add_parameter(), input_proto)) { + MS_LOG(ERROR) << "Build parameter for funcgraph fail at index: " << i; + return false; + } + } + return true; +} + +bool MSANFModelParser::ObtainCNodeAttrInTypeForm(const PrimitivePtr &prim, const std::string &attr_name, + const onnx::TensorProto &attr_tensor) { + MS_EXCEPTION_IF_NULL(prim); + const int attr_tensor_type = attr_tensor.data_type(); + if (kDefaultValueSwitchMap.find(attr_tensor_type) == kDefaultValueSwitchMap.end()) { + MS_LOG(ERROR) << "Obtain attr in type-form has not support input type:" << attr_tensor_type; + return false; + } + prim->AddAttr(attr_name, TypeIdToType(kDefaultValueSwitchMap[attr_tensor_type])); + return true; +} + +bool MSANFModelParser::ObtainCNodeAttrInScalarForm(const PrimitivePtr &prim, const std::string &attr_name, + const onnx::TensorProto &attr_tensor) { + MS_EXCEPTION_IF_NULL(prim); + const int attr_tensor_type = attr_tensor.data_type(); + switch (attr_tensor_type) { + case onnx::TensorProto_DataType_STRING: { + ParseAttrInScalar_string_string(prim, attr_name, attr_tensor); + break; + } + case onnx::TensorProto_DataType_INT32: { + ParseAttrInScalar_int32_int32(prim, attr_name, attr_tensor); + break; + } + case onnx::TensorProto_DataType_INT64: { + ParseAttrInScalar_int64_int64(prim, attr_name, attr_tensor); + break; + } + case onnx::TensorProto_DataType_UINT64: { + ParseAttrInScalar_uint64_uint64(prim, attr_name, attr_tensor); + break; + } + case onnx::TensorProto_DataType_FLOAT: { + ParseAttrInScalar_float_float(prim, attr_name, attr_tensor); + break; + } + case onnx::TensorProto_DataType_DOUBLE: { + ParseAttrInScalar_double_double(prim, attr_name, attr_tensor); + break; + } + case onnx::TensorProto_DataType_BOOL: { + ParseAttrInScalar_int32_bool(prim, attr_name, attr_tensor); + auto value = prim->GetAttr(attr_name); + break; + } + default: + MS_LOG(ERROR) << "Obtain attr in scalar-form has not support input type: " << attr_tensor_type; + return false; + } + return true; +} + +bool MSANFModelParser::ObtainCNodeAttrInTensorForm(const PrimitivePtr &prim, const std::string &attr_name, + const onnx::TensorProto &attr_tensor) { + MS_EXCEPTION_IF_NULL(prim); + MS_LOG(ERROR) << "parse attr type don't support attr type is tensor"; + return false; +} + +bool MSANFModelParser::GetAttrValueForCNode(const PrimitivePtr &prim, const onnx::AttributeProto &attr_proto) { + MS_EXCEPTION_IF_NULL(prim); + const std::string &attr_name = attr_proto.name(); + if (!attr_proto.has_ref_attr_name()) { + MS_LOG(ERROR) << "CNode parse attr type has no ref_attr_name"; + return false; + } + const std::string &ref_attr_name = attr_proto.ref_attr_name(); + const onnx::TensorProto &attr_tensor = attr_proto.t(); + switch (kParseTypeSwitchMap[ref_attr_name]) { + case FORM_PARSE_TYPE: { + return ObtainCNodeAttrInTypeForm(prim, attr_name, attr_tensor); + } + case FORM_PARSE_SCALAR: { + return ObtainCNodeAttrInScalarForm(prim, attr_name, attr_tensor); + } + case FORM_PARSE_TENSOR: { + return ObtainCNodeAttrInTensorForm(prim, attr_name, attr_tensor); + } + default: + MS_LOG(ERROR) << "parse attr type don't support input of ref_attr_name"; + return false; + } +} +bool MSANFModelParser::ObtainValueNodeInTensorForm(const std::string &value_node_name, + const onnx::TensorProto &attr_tensor) { + const int attr_tensor_type = attr_tensor.data_type(); + std::vector shape; + for (int i = 0; i < attr_tensor.dims_size(); ++i) { + shape.push_back(attr_tensor.dims(i)); + } + tensor::TensorPtr tensor_info = std::make_shared(kDefaultValueSwitchMap[attr_tensor_type], shape); + const std::string &tensor_buf = attr_tensor.raw_data(); + auto *tensor_data_buf = reinterpret_cast(tensor_info->data_c(true)); + memcpy_s(tensor_data_buf, tensor_info->data().nbytes(), tensor_buf.data(), tensor_buf.size()); + auto new_value_node = NewValueNode(MakeValue(tensor_info)); + MS_EXCEPTION_IF_NULL(new_value_node); + auto tensor_abstract = tensor_info->ToAbstract(); + MS_EXCEPTION_IF_NULL(tensor_abstract); + new_value_node->set_abstract(tensor_abstract); + anfnode_build_map_[value_node_name] = new_value_node; + return true; +} + +bool MSANFModelParser::ObtainValueNodeInScalarForm(const std::string &value_node_name, + const onnx::TensorProto &attr_tensor) { + const int attr_tensor_type = attr_tensor.data_type(); + ValuePtr value_ptr = nullptr; + switch (attr_tensor_type) { + case onnx::TensorProto_DataType_INT32: { + std::vector add_data; + for (int i = 0; i < attr_tensor.int32_data_size(); ++i) { + add_data.push_back(attr_tensor.int32_data(i)); + } + if (add_data.size() == 1) { + value_ptr = MakeValue(add_data[0]); + } else if (!add_data.empty()) { + value_ptr = MakeValue>(add_data); + } + break; + } + case onnx::TensorProto_DataType_FLOAT: { + std::vector add_data; + for (int i = 0; i < attr_tensor.float_data_size(); ++i) { + add_data.push_back(attr_tensor.float_data(i)); + } + + if (add_data.size() == 1) { + value_ptr = MakeValue(add_data[0]); + } else if (!add_data.empty()) { + value_ptr = MakeValue>(add_data); + } + break; + } + case onnx::TensorProto_DataType_UNDEFINED: { + std::vector elems; + value_ptr = std::make_shared(elems); + break; + } + default: + MS_LOG(ERROR) << "Obtain attr in scalar-form has not support input type: " << attr_tensor_type; + return false; + } + auto new_value_node = NewValueNode(value_ptr); + MS_EXCEPTION_IF_NULL(new_value_node); + new_value_node->set_abstract(value_ptr->ToAbstract()); + anfnode_build_map_[value_node_name] = new_value_node; + + return true; +} + +bool MSANFModelParser::ObtainValueNodeInTypeForm(const std::string &value_node_name, + const onnx::TensorProto &attr_tensor) { + const int attr_tensor_type = attr_tensor.data_type(); + if (kDefaultValueSwitchMap.find(attr_tensor_type) == kDefaultValueSwitchMap.end()) { + MS_LOG(ERROR) << "Obtain ValueNode attr in type-form has not support input type: " << attr_tensor_type; + return false; + } + auto new_value_node = NewValueNode(TypeIdToType(kDefaultValueSwitchMap[attr_tensor_type])); + abstract::AbstractTypePtr abs_type = std::make_shared(std::make_shared()); + new_value_node->set_abstract(abs_type); + anfnode_build_map_[value_node_name] = new_value_node; + return true; +} + +bool MSANFModelParser::GetAttrValueForValueNode(const std::string &ref_attr_name, const std::string &value_node_name, + const onnx::TensorProto &attr_tensor) { + switch (kParseTypeSwitchMap[ref_attr_name]) { + case FORM_PARSE_SCALAR: { + return ObtainValueNodeInScalarForm(value_node_name, attr_tensor); + } + case FORM_PARSE_TENSOR: { + return ObtainValueNodeInTensorForm(value_node_name, attr_tensor); + } + case FORM_PARSE_TYPE: { + return ObtainValueNodeInTypeForm(value_node_name, attr_tensor); + } + default: + MS_LOG(ERROR) << "parse ValueNode value don't support input of ref_attr_name"; + return false; + } + return true; +} + +bool MSANFModelParser::BuildValueNodeForFuncGraph(const onnx::NodeProto &node_proto) { + const std::string &value_node_name = node_proto.output(0); + const onnx::AttributeProto &attr_proto = node_proto.attribute(0); + if (!attr_proto.has_ref_attr_name()) { + MS_LOG(ERROR) << "parse ValueNode don't have ref_attr_name"; + return false; + } + const std::string &ref_attr_name = attr_proto.ref_attr_name(); + const onnx::TensorProto &attr_tensor = attr_proto.t(); + + return GetAttrValueForValueNode(ref_attr_name, value_node_name, attr_tensor); +} + +AbstractBasePtr MSANFModelParser::GetAbstractForCNode(const onnx::AttributeProto &attr_proto) { + std::vector shape_vec; + const onnx::TensorProto &attr_tensor = attr_proto.t(); + for (int i = 0; i < attr_tensor.dims_size(); ++i) { + shape_vec.push_back(attr_tensor.dims(i)); + } + tensor::TensorPtr tensor_info = + std::make_shared(kDefaultValueSwitchMap[attr_tensor.data_type()], shape_vec); + MS_EXCEPTION_IF_NULL(tensor_info); + auto abstract = tensor_info->ToAbstract(); + MS_EXCEPTION_IF_NULL(abstract); + return abstract; +} + +CNodePtr MSANFModelParser::BuildCNodeForFuncGraph(const FuncGraphPtr &outputFuncGraph, + const onnx::NodeProto &node_proto) { + MS_EXCEPTION_IF_NULL(outputFuncGraph); + if (!node_proto.has_op_type()) { + MS_LOG(ERROR) << "Get CNode op_type failed!"; + return nullptr; + } + const std::string &node_name = node_proto.output(0); + const std::string &fullname_with_scope = node_proto.domain(); + const std::string &node_type = node_proto.op_type(); + PrimitivePtr prim = std::make_shared(node_type); + MS_EXCEPTION_IF_NULL(prim); + prim->set_instance_name(node_type); + + AbstractBasePtr abstract = nullptr; + AbstractBasePtr abstract_first = nullptr; + AbstractBasePtr abstract_second = nullptr; + for (int i = 0; i < node_proto.attribute_size(); ++i) { + const onnx::AttributeProto &attr_proto = node_proto.attribute(i); + if (attr_proto.name() == kCNodeShapeAttr) { + abstract = GetAbstractForCNode(attr_proto); + continue; + } + if (attr_proto.name() == kCNodeShape1Attr) { + abstract_first = GetAbstractForCNode(attr_proto); + continue; + } + if (attr_proto.name() == kCNodeShape2Attr) { + abstract_second = GetAbstractForCNode(attr_proto); + continue; + } + if (!GetAttrValueForCNode(prim, attr_proto)) { + MS_LOG(ERROR) << "Get CNode attr failed!"; + return nullptr; + } + } + + std::vector inputs; + inputs.clear(); + inputs.push_back(NewValueNode(prim)); + for (int i = 0; i < node_proto.input_size(); ++i) { + const std::string &input_name = node_proto.input(i); + if (anfnode_build_map_.find(input_name) == anfnode_build_map_.end()) { + MS_LOG(ERROR) << node_name << " input " << i << input_name << "can't find in nodes have parsed"; + return nullptr; + } + inputs.push_back(anfnode_build_map_[input_name]); + } + CNodePtr cnode_ptr = outputFuncGraph->NewCNode(inputs); + MS_EXCEPTION_IF_NULL(cnode_ptr); + if (node_type == "LayerNorm") { + AbstractBasePtrList elem; + elem.push_back(abstract); + elem.push_back(abstract_first); + elem.push_back(abstract_second); + cnode_ptr->set_abstract(std::make_shared(elem)); + } else if (node_type == "ArgMaxWithValue") { + AbstractBasePtrList elem; + elem.push_back(abstract); + elem.push_back(abstract_first); + cnode_ptr->set_abstract(std::make_shared(elem)); + } else if (nullptr == abstract) { + AbstractBasePtrList elem; + for (size_t index = 1; index < cnode_ptr->inputs().size(); ++index) { + elem.push_back(cnode_ptr->input(index)->abstract()); + } + cnode_ptr->set_abstract(std::make_shared(elem)); + } else { + cnode_ptr->set_abstract(abstract); + } + cnode_ptr->set_fullname_with_scope(fullname_with_scope); + anfnode_build_map_[node_name] = cnode_ptr; + return cnode_ptr; +} + +bool MSANFModelParser::BuildReturnForFuncGraph(const FuncGraphPtr &outputFuncGraph, const onnx::GraphProto &importProto, + const CNodePtr &cnode_ptr) { + MS_EXCEPTION_IF_NULL(outputFuncGraph); + MS_EXCEPTION_IF_NULL(cnode_ptr); + std::vector inputs; + if (importProto.output_size() > 1) { + inputs.clear(); + inputs.push_back(NewValueNode(prim::kPrimMakeTuple)); + AbstractBasePtrList elem; + for (int out_size = 0; out_size < importProto.output_size(); ++out_size) { + const onnx::ValueInfoProto &output_node = importProto.output(out_size); + const std::string &out_tuple = output_node.name(); + inputs.push_back(anfnode_build_map_[out_tuple]); + elem.push_back(anfnode_build_map_[out_tuple]->abstract()); + } + auto maketuple_ptr = outputFuncGraph->NewCNode(inputs); + maketuple_ptr->set_abstract(std::make_shared(elem)); + inputs.clear(); + inputs.push_back(NewValueNode(prim::kPrimReturn)); + inputs.push_back(maketuple_ptr); + auto return_node = outputFuncGraph->NewCNode(inputs); + MS_EXCEPTION_IF_NULL(return_node); + outputFuncGraph->set_return(return_node); + MS_LOG(INFO) << "Construct funcgraph finined, all success."; + } else { + const onnx::ValueInfoProto &output_node = importProto.output(0); + const onnx::TypeProto &output_typeproto = output_node.type(); + int output_type = output_typeproto.tensor_type().elem_type(); + std::vector output_shape; + for (int i = 0; i < output_typeproto.tensor_type().shape().dim_size(); ++i) { + output_shape.push_back(output_typeproto.tensor_type().shape().dim(i).dim_value()); + } + tensor::TensorPtr tensor_return = + std::make_shared(kDefaultValueSwitchMap[output_type], output_shape); + inputs.clear(); + inputs.push_back(NewValueNode(prim::kPrimReturn)); + inputs.push_back(cnode_ptr); + auto return_node = outputFuncGraph->NewCNode(inputs); + MS_EXCEPTION_IF_NULL(return_node); + return_node->set_abstract(tensor_return->ToAbstract()); + outputFuncGraph->set_return(return_node); + MS_LOG(INFO) << "Construct funcgraph finined, all success!"; + } + return true; +} + +bool MSANFModelParser::ImportNodesForGraph(const FuncGraphPtr &outputFuncGraph, const onnx::GraphProto &importProto) { + MS_EXCEPTION_IF_NULL(outputFuncGraph); + MS_LOG(INFO) << "The CNdoe size : " << importProto.node_size(); + CNodePtr cnode_ptr = nullptr; + for (int i = 0; i < importProto.node_size(); ++i) { + const onnx::NodeProto &node_proto = importProto.node(i); + const std::string &node_type = node_proto.op_type(); + if (node_type == kConstantValueNode) { + if (!BuildValueNodeForFuncGraph(node_proto)) { + MS_LOG(ERROR) << "Build ValueNode for funcgraph fail at index: : " << i; + return false; + } + continue; + } + cnode_ptr = BuildCNodeForFuncGraph(outputFuncGraph, node_proto); + if (cnode_ptr == nullptr) { + MS_LOG(ERROR) << "Build CNode for funcgraph fail at index: : " << i; + return false; + } + } + + BuildReturnForFuncGraph(outputFuncGraph, importProto, cnode_ptr); + return true; +} + +bool MSANFModelParser::BuildFuncGraph(const FuncGraphPtr &outputFuncGraph, const onnx::GraphProto &importProto) { + MS_EXCEPTION_IF_NULL(outputFuncGraph); + GraphDebugInfoPtr debug_info_ptr = outputFuncGraph->debug_info(); + MS_EXCEPTION_IF_NULL(debug_info_ptr); + if (importProto.has_name()) { + debug_info_ptr->set_name(importProto.name()); + } else { + MS_LOG(ERROR) << "FuncGraph under converting has not name!"; + } + + if (!ImportParametersForGraph(outputFuncGraph, importProto)) { + return false; + } + return ImportNodesForGraph(outputFuncGraph, importProto); +} + +bool MSANFModelParser::MSANFParseModelConfigureInfo(const onnx::ModelProto &model_proto) { + if (!model_proto.has_producer_name()) { + MS_LOG(ERROR) << "Parse model producer name from pb file failed!"; + return false; + } + producer_name_ = model_proto.producer_name(); + MS_LOG(INFO) << "producer_name :" << producer_name_; + + if (!model_proto.has_model_version()) { + MS_LOG(ERROR) << "Parse model producer version from pb file failed!"; + return false; + } + model_version_ = model_proto.model_version(); + MS_LOG(INFO) << "producer_version : " << model_version_; + + if (!model_proto.has_ir_version()) { + MS_LOG(ERROR) << "Parse model version from pb file failed!"; + return false; + } + ir_version_ = model_proto.ir_version(); + MS_LOG(INFO) << "ir_version :" << ir_version_; + return true; +} + +FuncGraphPtr MSANFModelParser::Parse(const onnx::ModelProto &model_proto) { + FuncGraphPtr dstGraph = std::make_shared(); + MS_EXCEPTION_IF_NULL(dstGraph); + if (!MSANFParseModelConfigureInfo(model_proto)) { + MS_LOG(ERROR) << "Parse configuration info for pb file failed!"; + } + const onnx::GraphProto &graphBuild = model_proto.graph(); + if (!BuildFuncGraph(dstGraph, graphBuild)) { + MS_LOG(ERROR) << "Build funcgraph failed!"; + return nullptr; + } + MS_LOG(INFO) << "Parse pb to build FuncGraph Success!"; + return dstGraph; +} +} // namespace lite +} // namespace mindspore diff --git a/mindspore/ccsrc/utils/load_onnx/anf_model_parser.h b/mindspore/ccsrc/utils/load_onnx/anf_model_parser.h new file mode 100644 index 0000000000..11b9cd101f --- /dev/null +++ b/mindspore/ccsrc/utils/load_onnx/anf_model_parser.h @@ -0,0 +1,78 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_UTILS_LOAD_ONNX_ANF_MODEL_PARSER_H +#define MINDSPORE_CCSRC_UTILS_LOAD_ONNX_ANF_MODEL_PARSER_H + +#include +#include +#include +#include "google/protobuf/io/zero_copy_stream_impl.h" +#include "ir/func_graph.h" +#include "proto/onnx.pb.h" + +namespace mindspore { +namespace lite { +using int32 = int32_t; +using int64 = int64_t; +using uint64 = uint64_t; +using float16 = Eigen::half; +class MSANFModelParser { + public: + MSANFModelParser() = default; + ~MSANFModelParser() = default; + + FuncGraphPtr Parse(const onnx::ModelProto &model_proto); + bool MSANFParseModelConfigureInfo(const onnx::ModelProto &model_proto); + + std::string GetProducerName() { return producer_name_; } + int GetProducerVersion() { return model_version_; } + int GetIrVersion() { return ir_version_; } + + private: + bool BuildFuncGraph(const FuncGraphPtr &outputFuncGraph, const onnx::GraphProto &importProto); + bool ImportParametersForGraph(const FuncGraphPtr &outputFuncGraph, const onnx::GraphProto &importProto); + bool ImportNodesForGraph(const FuncGraphPtr &outputFuncGraph, const onnx::GraphProto &importProto); + bool BuildParameterForFuncGraph(const ParameterPtr &node, const onnx::ValueInfoProto &value_proto); + CNodePtr BuildCNodeForFuncGraph(const FuncGraphPtr &outputFuncGraph, const onnx::NodeProto &node_proto); + bool BuildReturnForFuncGraph(const FuncGraphPtr &outputFuncGraph, const onnx::GraphProto &importProto, + const CNodePtr &cnode_ptr); + bool GetAttrValueForCNode(const PrimitivePtr &prim, const onnx::AttributeProto &attr_proto); + bool ObtainCNodeAttrInTypeForm(const PrimitivePtr &prim, const std::string &attr_name, + const onnx::TensorProto &attr_tensor); + bool ObtainCNodeAttrInScalarForm(const PrimitivePtr &prim, const std::string &attr_name, + const onnx::TensorProto &attr_tensor); + bool ObtainCNodeAttrInTensorForm(const PrimitivePtr &prim, const std::string &attr_name, + const onnx::TensorProto &attr_tensor); + bool BuildValueNodeForFuncGraph(const onnx::NodeProto &node_proto); + bool ObtainValueNodeInTensorForm(const string &value_node_name, const onnx::TensorProto &attr_tensor); + + bool ObtainValueNodeInScalarForm(const string &value_node_name, const onnx::TensorProto &attr_tensor); + bool GetAttrValueForValueNode(const string &ref_attr_name, const std::string &value_node_name, + const onnx::TensorProto &attr_tensor); + bool ObtainValueNodeInTypeForm(const string &value_node_name, const onnx::TensorProto &attr_tensor); + AbstractBasePtr GetAbstractForCNode(const onnx::AttributeProto &attr_proto); + + std::string producer_name_; + int model_version_; + int ir_version_; + std::unordered_map anfnode_build_map_; + std::map default_para_map_; +}; +} // namespace lite +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_UTILS_LOAD_ONNX_ANF_MODEL_PARSER_H diff --git a/mindspore/ccsrc/utils/log_adapter.cc b/mindspore/ccsrc/utils/log_adapter.cc index 10f49c7036..d16fbead9b 100644 --- a/mindspore/ccsrc/utils/log_adapter.cc +++ b/mindspore/ccsrc/utils/log_adapter.cc @@ -289,7 +289,7 @@ class LogConfigLexer { return '\0'; } - LogConfigToken GetNext(std::string *ptr) { + LogConfigToken GetNext(std::string *const ptr) { #ifdef DEBUG std::string text; auto tok = GetNextInner(&text); diff --git a/mindspore/ccsrc/minnie/tensor_minnie.cc b/mindspore/ccsrc/utils/mpi/mpi_config.cc similarity index 60% rename from mindspore/ccsrc/minnie/tensor_minnie.cc rename to mindspore/ccsrc/utils/mpi/mpi_config.cc index 329bf228e6..e8d81cf843 100644 --- a/mindspore/ccsrc/minnie/tensor_minnie.cc +++ b/mindspore/ccsrc/utils/mpi/mpi_config.cc @@ -14,21 +14,16 @@ * limitations under the License. */ -#include "minnie/tensor_minnie.h" +#include "utils/mpi/mpi_config.h" namespace mindspore { -namespace tensor { -TensorMinnie &TensorMinnie::operator=(const TensorMinnie &tensor) { - if (&tensor == this) { - return *this; - } - this->tensor_addr_ = tensor.tensor_addr(); - this->tensor_size_ = tensor.tensor_size(); - return *this; -} +std::shared_ptr MpiConfig::instance_ = nullptr; -bool TensorMinnie::operator==(const TensorMinnie &tensor) { - return tensor_addr_ == tensor.tensor_addr() && tensor_size_ == tensor.tensor_size(); +std::shared_ptr MpiConfig::GetInstance() { + if (instance_ == nullptr) { + MS_LOG(DEBUG) << "Create new mpi config instance."; + instance_.reset(new (std::nothrow) MpiConfig()); + } + return instance_; } -} // namespace tensor } // namespace mindspore diff --git a/mindspore/ccsrc/utils/mpi/mpi_config.h b/mindspore/ccsrc/utils/mpi/mpi_config.h new file mode 100644 index 0000000000..044e767762 --- /dev/null +++ b/mindspore/ccsrc/utils/mpi/mpi_config.h @@ -0,0 +1,42 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_UTILS_MPI_MS_CONTEXT_H_ +#define MINDSPORE_CCSRC_UTILS_MPI_MS_CONTEXT_H_ +#include +#include "utils/log_adapter.h" + +namespace mindspore { +class MpiConfig { + public: + ~MpiConfig() = default; + MpiConfig(const MpiConfig &) = delete; + MpiConfig &operator=(const MpiConfig &) = delete; + + static std::shared_ptr GetInstance(); + + void set_enable_mpi(bool flag) { enable_mpi_ = flag; } + bool enable_mpi() const { return enable_mpi_; } + + private: + MpiConfig() : enable_mpi_(false) {} + + static std::shared_ptr instance_; + bool enable_mpi_; +}; +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_UTILS_MPI_MS_CONTEXT_H_ diff --git a/mindspore/ccsrc/utils/primitive_utils.cc b/mindspore/ccsrc/utils/primitive_utils.cc index cfbfdebac7..97fa954e12 100644 --- a/mindspore/ccsrc/utils/primitive_utils.cc +++ b/mindspore/ccsrc/utils/primitive_utils.cc @@ -29,9 +29,6 @@ py::function GetBpropFunctionByObj(py::object obj) { py::function GetBpropFunction(std::string name) { auto fn = GetBpropFunctionByObj(py::str(name)); - if (fn.is_none()) { - MS_LOG(WARNING) << "Can't find bprop function for " << name; - } return fn; } @@ -41,7 +38,7 @@ py::function GetComputeFunction(std::string name) { if (!py::hasattr(mod, common::SafeCStr(name))) { PyErr_SetString(PyExc_NotImplementedError, common::SafeCStr(name)); // If raise AttributeError, user can't understand. This case need raise NotImplementedError. - throw py::error_already_set(); + throw(py::error_already_set()); } py::object fn = mod.attr(common::SafeCStr(name)); return fn; diff --git a/mindspore/ccsrc/utils/print.proto b/mindspore/ccsrc/utils/print.proto new file mode 100644 index 0000000000..a82791bccf --- /dev/null +++ b/mindspore/ccsrc/utils/print.proto @@ -0,0 +1,39 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto2"; + +package mindspore.prntpb; + +message TensorProto { + // The shape of the tensor. + repeated int64 dims = 1; + // The type of the tensor. + required string tensor_type = 2; + // The data of the tensor. + required bytes tensor_content = 3; +} + + +message Print { + message Value { + oneof value { + string desc = 1; + TensorProto tensor = 2; + } + } + repeated Value value = 1; +} diff --git a/mindspore/ccsrc/utils/summary.proto b/mindspore/ccsrc/utils/summary.proto index 6ea6ce08b8..f4a2ce957b 100644 --- a/mindspore/ccsrc/utils/summary.proto +++ b/mindspore/ccsrc/utils/summary.proto @@ -32,7 +32,7 @@ message Event { oneof what { // An event file was started, with the specified version. - // Now version is "Mindspore.Event:1" + // Now version is "MindSpore.Event:1" string version = 3; // GraphDef. diff --git a/mindspore/ccsrc/utils/tensorprint_utils.cc b/mindspore/ccsrc/utils/tensorprint_utils.cc index f4715b22a8..0d464e88a8 100644 --- a/mindspore/ccsrc/utils/tensorprint_utils.cc +++ b/mindspore/ccsrc/utils/tensorprint_utils.cc @@ -47,6 +47,18 @@ static std::map type_size_map = { {"int64_t", sizeof(int64_t)}, {"uint64_t", sizeof(uint64_t)}, {"float16", sizeof(float) / 2}, {"float", sizeof(float)}, {"double", sizeof(double)}, {"bool", sizeof(bool)}}; +std::string GetParseType(const std::string &tensorType_) { + static const std::map print_parse_map = { + {"int8_t", "Int8"}, {"uint8_t", "Uint8"}, {"int16_t", "Int16"}, {"uint16_t", "Uint16"}, + {"int32_t", "Int32"}, {"uint32_t", "Uint32"}, {"int64_t", "Int64"}, {"uint64_t", "Uint64"}, + {"float16", "Float16"}, {"float", "Float32"}, {"double", "Float64"}, {"bool", "Bool"}}; + auto type_iter = print_parse_map.find(tensorType_); + if (type_iter == print_parse_map.end()) { + MS_LOG(EXCEPTION) << "type of tensor need to print is not support " << tensorType_; + } + return type_iter->second; +} + bool ParseTensorShape(const std::string &input_shape_str, std::vector *const tensor_shape, size_t *dims) { if (tensor_shape == nullptr) { return false; @@ -141,7 +153,7 @@ void convertDataItem2Scalar(const char *str_data_ptr, const string &tensor_type, } else { MS_LOG(EXCEPTION) << "Cannot print scalar because of unsupport data type: " << tensor_type << "."; } -} // namespace mindspore +} bool judgeLengthValid(const size_t str_len, const string &tensor_type) { auto type_iter = type_size_map.find(tensor_type); @@ -200,14 +212,84 @@ bool ConvertDataItem2Tensor(const std::vector &items) { return ret_end_sequence; } -void TensorPrint::operator()() { - while (true) { - std::vector bundle; - if (tdt::TdtHostPopData("_npu_log", bundle) != 0) { +bool SaveDataItem2File(const std::vector &items, const std::string &print_file_path, prntpb::Print print, + std::fstream *output) { + bool ret_end_sequence = false; + for (auto &item : items) { + if (item.dataType_ == tdt::TDT_END_OF_SEQUENCE) { + ret_end_sequence = true; break; } - if (ConvertDataItem2Tensor(bundle)) { - break; + prntpb::Print_Value *value = print.add_value(); + std::shared_ptr str_data_ptr = std::static_pointer_cast(item.dataPtr_); + MS_EXCEPTION_IF_NULL(str_data_ptr); + if (item.tensorShape_ == kShapeScalar || item.tensorShape_ == kShapeNone) { + if (!judgeLengthValid(str_data_ptr->size(), item.tensorType_)) { + MS_LOG(EXCEPTION) << "Print op receive data length is invalid."; + } + } + + std::vector tensor_shape; + size_t totaldims = 1; + if (!ParseTensorShape(item.tensorShape_, &tensor_shape, &totaldims)) { + MS_LOG(EXCEPTION) << "Tensor print can not parse tensor shape, receive info" << item.tensorShape_; + } + + if (item.tensorType_ == "string") { + std::string data(reinterpret_cast(str_data_ptr->c_str()), item.dataLen_); + value->set_desc(data); + } else { + auto parse_type = GetParseType(item.tensorType_); + prntpb::TensorProto *tensor = value->mutable_tensor(); + if (!(item.tensorShape_ == kShapeScalar) && !(item.tensorShape_ == kShapeNone)) { + for (const auto &dim : tensor_shape) { + tensor->add_dims(static_cast<::google::protobuf::int64>(dim)); + } + } + tensor->set_tensor_type(parse_type); + std::string data(reinterpret_cast(str_data_ptr->c_str()), item.dataLen_); + tensor->set_tensor_content(data); + } + + if (!print.SerializeToOstream(output)) { + MS_LOG(EXCEPTION) << "Save print file:" << print_file_path << " fail."; + } + print.Clear(); + } + return ret_end_sequence; +} + +void TensorPrint::operator()() { + prntpb::Print print; + auto ms_context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(ms_context); + std::string print_file_path = ms_context->print_file_path(); + if (print_file_path == "") { + while (true) { + std::vector bundle; + if (tdt::TdtHostPopData("_npu_log", bundle) != 0) { + break; + } + if (ConvertDataItem2Tensor(bundle)) { + break; + } + } + } else { + std::fstream output(print_file_path, std::ios::out | std::ios::trunc | std::ios::binary); + while (true) { + std::vector bundle; + if (tdt::TdtHostPopData("_npu_log", bundle) != 0) { + break; + } + if (SaveDataItem2File(bundle, print_file_path, print, &output)) { + break; + } + } + output.close(); + std::string path_string = print_file_path; + if (chmod(common::SafeCStr(path_string), S_IRUSR) == -1) { + MS_LOG(ERROR) << "Modify file:" << print_file_path << " to r fail."; + return; } } } diff --git a/mindspore/ccsrc/utils/tensorprint_utils.h b/mindspore/ccsrc/utils/tensorprint_utils.h index c8442e6291..4a40862ea3 100644 --- a/mindspore/ccsrc/utils/tensorprint_utils.h +++ b/mindspore/ccsrc/utils/tensorprint_utils.h @@ -23,6 +23,8 @@ #include "tdt/tsd_client.h" #include "tdt/tdt_host_interface.h" #include "tdt/data_common.h" +#include "proto/print.pb.h" +#include "utils/context/ms_context.h" #endif namespace mindspore { class TensorPrint { diff --git a/mindspore/ccsrc/utils/union_find_set.h b/mindspore/ccsrc/utils/union_find_set.h new file mode 100644 index 0000000000..81529c8bcf --- /dev/null +++ b/mindspore/ccsrc/utils/union_find_set.h @@ -0,0 +1,86 @@ +/** + * This is the C++ adaptation and derivative work of Myia (https://github.com/mila-iqia/myia/). + * + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_UTILS_UNION_FIND_SET_H_ +#define MINDSPORE_CCSRC_UTILS_UNION_FIND_SET_H_ + +#include +#include + +namespace mindspore { +template +class UnionFindSet { + public: + UnionFindSet() : union_find_set_() {} + ~UnionFindSet() = default; + void Add(const T &elem) { + if (union_find_set_.find(elem) != union_find_set_.end()) { + return; + } + + union_find_set_[elem] = elem; + } + + T Find(const T &key) { + T key_parent = key; + auto iter = union_find_set_.find(key_parent); + if (iter == union_find_set_.end()) { + MS_LOG(EXCEPTION) << "union_find_set_ cannot find key " << key_parent; + } + while (key_parent != iter->second) { + key_parent = iter->second; + iter = union_find_set_.find(key_parent); + if (iter == union_find_set_.end()) { + MS_LOG(EXCEPTION) << "union_find_set_ cannot find key " << key_parent; + } + } + + T tmp = key; + T tmp_parent; + while (tmp != key_parent) { + iter = union_find_set_.find(tmp); + if (iter == union_find_set_.end()) { + MS_LOG(EXCEPTION) << "union_find_set_ cannot find key " << tmp; + } + tmp_parent = iter->second; + union_find_set_[tmp] = key_parent; + tmp = tmp_parent; + } + return key_parent; + } + + void Union(const T &left, const T &right) { union_find_set_[Find(left)] = Find(right); } + + std::map> GetSets() { + std::map> result; + for (auto &iter : union_find_set_) { + (void)Find(iter.first); + } + for (auto &iter : union_find_set_) { + T parent = Find(iter.first); + result[parent].insert(iter.first); + } + return result; + } + + private: + std::map union_find_set_; +}; +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_UTILS_UNION_FIND_SET_H_ diff --git a/mindspore/ccsrc/utils/utils.h b/mindspore/ccsrc/utils/utils.h index a63810ffaa..972d8df319 100644 --- a/mindspore/ccsrc/utils/utils.h +++ b/mindspore/ccsrc/utils/utils.h @@ -25,6 +25,7 @@ #include #include "utils/log_adapter.h" +#include "ir/dtype/type.h" namespace mindspore { // op name. Op which not exists in operator/ops.h, so define it's name here @@ -55,6 +56,7 @@ constexpr auto kExtractImagePatchesOpName = "ExtractImagePatches"; constexpr auto kBNTrainingReduceOpName = "BNTrainingReduce"; constexpr auto kBNTrainingUpdateOpName = "BNTrainingUpdate"; constexpr auto kBNTrainingUpdateV2OpName = "BNTrainingUpdateV2"; +constexpr auto kBNTrainingUpdateV3OpName = "BNTrainingUpdateV3"; constexpr auto kSimpleMeanGradOpName = "SimpleMeanGrad"; constexpr auto kMeanGradOpName = "MeanGrad"; constexpr auto kSliceOpName = "Slice"; @@ -64,11 +66,13 @@ constexpr auto kScatterNdOpName = "ScatterNd"; constexpr auto kStridedSliceAssignOpName = "StridedSliceAssign"; constexpr auto kStridedSliceOpName = "StridedSlice"; constexpr auto kStridedSliceGradOpName = "StridedSliceGrad"; +constexpr auto kSparseGatherV2 = "SparseGatherV2"; constexpr auto kUnsortedSegmentProdOpName = "UnsortedSegmentProd"; constexpr auto kUnsortedSegmentMinOpName = "UnsortedSegmentMin"; constexpr auto kFlattenGradOpName = "FlattenGrad"; constexpr auto kExpandDimsOpName = "ExpandDims"; constexpr auto kSplitOpName = "Split"; +constexpr auto kSplitVOpName = "SplitV"; constexpr auto kSparseApplyAdagradOpName = "SparseApplyAdagrad"; constexpr auto kMomentumOpName = "Momentum"; constexpr auto kApplyMomentumOpName = "ApplyMomentum"; @@ -131,6 +135,8 @@ constexpr auto kResizeNearestNeighborV2OpName = "ResizeNearestNeighborV2"; constexpr auto kResizeNearestNeighborV2GradOpName = "ResizeNearestNeighborV2Grad"; constexpr auto kApplyRMSPropOpname = "ApplyRMSProp"; constexpr auto kCumsumOpName = "Cumsum"; +constexpr auto kInplaceAddOpName = "InplaceAdd"; +constexpr auto kInplaceSubOpName = "InplaceSub"; constexpr auto kResizeBilinearV2OpName = "kResizeBilinearV2"; constexpr auto kReduceProdOpName = "ReduceProd"; constexpr auto kCumprodOpName = "Cumprod"; @@ -153,6 +159,8 @@ constexpr auto kLarsV2UpdateOpName = "LarsV2Update"; constexpr auto kSquareSumAllOpName = "SquareSumAll"; constexpr auto kNMSWithMaskOpName = "NMSWithMask"; constexpr auto kSoftmaxGradExtOpName = "SoftmaxGradExt"; +constexpr auto kStridedReadOpName = "StridedRead"; +constexpr auto kStridedWriteOpName = "StridedWrite"; // attr key name constexpr auto kAttrInputNames = "input_names"; @@ -172,9 +180,9 @@ constexpr auto kAttrKeepDims = "keep_dims"; constexpr auto kAttrShapeGamma = "shape_gamma"; constexpr auto kAttrPerm = "perm"; constexpr auto kAttrTransposeFirst = "transpose_first"; -constexpr auto kAttrAutomicAddMemSize = "automic_add_mem_size"; -constexpr auto kAttrAutomicOutputIndexs = "atomic_output_clean_indexs"; -constexpr auto kAttrAutomicWorkspaceSize = "atomic_workspace_clean_size"; +constexpr auto kAttrAtomicAddMemSize = "automic_add_mem_size"; +constexpr auto kAttrAtomicOutputIndexs = "atomic_output_clean_indexs"; +constexpr auto kAttrAtomicWorkspaceIndexs = "atomic_workspace_clean_indexs"; constexpr auto kAttrSwitchCondition = "switch_condition"; constexpr auto kAttrDataType = "data_type"; constexpr auto kAttrActiveTarget = "active_target"; @@ -184,6 +192,9 @@ constexpr auto kAttrEventId = "event_id"; constexpr auto kAttrDynInput = "dynamic"; constexpr auto kAttrDynInputSizes = "dyn_input_sizes"; constexpr auto kAttrSrcFormat = "src_format"; +constexpr auto kAttrMultiples = "multiples"; +constexpr auto kAttrFixPrecision = "fix_precision"; +constexpr auto kAttrOutputPrecision = "output_precision"; constexpr auto kAttrOutputUsedNum = "output_used_num"; constexpr auto kAttrHasBias = "has_bias"; constexpr auto kAttrN = "n"; @@ -197,6 +208,18 @@ constexpr auto kAttrLabelIndex = "label_index"; constexpr auto kAttrLabelSwitchList = "label_switch_list"; constexpr auto kAttrNewAxisMask = "new_axis_mask"; constexpr auto kAttrShrinkAxisMask = "shrink_axis_mask"; +constexpr auto kAttrDatadumpOriginalNames = "_datadump_original_names"; +constexpr auto kAttrStreamId = "stream_id"; +constexpr auto kAttrRecordEvent = "record_event"; +constexpr auto kAttrWaitEvent = "wait_event"; +constexpr auto kAttrRecordEventStream = "record_event_stream"; +constexpr auto kAttrWaitEventStream = "wait_event_stream"; +constexpr auto kAttrIndex = "index"; +constexpr auto kAttrSplitDim = "split_dim"; +constexpr auto kAttrNumSplit = "num_split"; +constexpr auto kAttrOutputNum = "output_num"; +constexpr auto kAttrSizeSplits = "size_splits"; +constexpr auto kAttrOutputDefault = "output_default"; // attr value constexpr auto kValueTargetSwitch = "target_switch"; @@ -204,7 +227,9 @@ constexpr auto kValueTargetOther = "target_other"; // some size const size_t kShape4dDims = 4; +const size_t kShape2dDims = 2; const size_t kShape5dDims = 5; +const size_t kShape1dDims = 1; const size_t kCubeSize = 16; const size_t kMemAlignSize = 512; const int kParameterDataTensorMask = 0; @@ -213,6 +238,7 @@ const int kValueNodeTensorMask = 2; // define special index in special node constexpr auto kAnfPrimitiveIndex = 0; +constexpr auto kFirstDataInputIndex = 1; constexpr auto kAnfPartialFuncGraphIndex = 1; constexpr auto kRealInputNodeIndexInTupleGetItem = 1; constexpr auto kInputNodeOutputIndexInTupleGetItem = 2; @@ -251,17 +277,19 @@ const std::set kOptOperatorSet = { kApplyRMSPropOpName, }; -const std::set kNeedTransFormatSet = {kOpFormat_FRAC_Z, kOpFormat_NC1KHKWHWC0, kOpFormat_NC1HWC0, +const std::set kHWSpecialFormatSet = {kOpFormat_FRAC_Z, kOpFormat_NC1KHKWHWC0, kOpFormat_NC1HWC0, kOpFormat_FRAC_NZ, kOpFormat_C1HWNCoC0, kOpFormat_NC1HWC0_C04, kOpFormat_FRACTAL_Z_C04}; +const std::set kFloatDataTypeSet = {kNumberTypeFloat16, kNumberTypeFloat32}; + static inline void ChangeFileMode(const std::string &file_name, mode_t mode) { - if (access(file_name.c_str(), F_OK) != 0) { - MS_LOG(DEBUG) << "File `" << file_name << "` does not exist."; - return; - } - if (chmod(file_name.c_str(), mode) != 0) { - MS_LOG(WARNING) << "Change file `" << file_name << "` to mode " << std::oct << mode << " fail."; + try { + if (chmod(file_name.c_str(), mode) != 0) { + MS_LOG(WARNING) << "Change file `" << file_name << "` to mode " << std::oct << mode << " fail."; + } + } catch (std::exception &e) { + MS_LOG(DEBUG) << "File `" << file_name << "` change mode failed! May be not exist."; } } } // namespace mindspore diff --git a/mindspore/ccsrc/vm/backend.cc b/mindspore/ccsrc/vm/backend.cc index 0fac84d901..3fde263c9d 100644 --- a/mindspore/ccsrc/vm/backend.cc +++ b/mindspore/ccsrc/vm/backend.cc @@ -39,14 +39,14 @@ LinConvertResult MsBackend::GetMultiGraphRun(const FuncGraphPtr &g) { multi_result_.inputs = g->parameters(); final_output_ = NewValueNode("fake_output"); multi_result_.outputs = {final_output_}; - GraphId final_g = sess_->GetFinalRunGraph(); + GraphId final_g = target_sess_->GetFinalRunGraph(); multi_result_.run = std::make_shared( - [final_g, this](const VectorRef &args) -> VectorRef { return MsRunGraph(final_g, args); }); + [final_g, this](const VectorRef &args) -> VectorRef { return MsRunGraph(final_g, args, ""); }); return multi_result_; } -LinConvertResult MsBackend::MsConvert(const AnfNodePtrList &lst) { +LinConvertResult MsBackend::MsConvert(const AnfNodePtrList &lst, const std::string &target) { MS_LOG(DEBUG) << "MsConvert"; MS_EXCEPTION_IF_NULL(MsContext::GetInstance()); auto cached = g_ConvertCache.find(lst); @@ -64,17 +64,25 @@ LinConvertResult MsBackend::MsConvert(const AnfNodePtrList &lst) { result.inputs = inputs; result.outputs = outputs; result.graph_id = kInvalidGraphId; - auto graph_id = sess_->CompileGraph(lst, outputs); - if (MsContext::GetInstance()->execution_mode() == kPynativeMode) { - sess_->BuildGraph(graph_id); + GraphId graph_id = kInvalidGraphId; + if (target != target_device_ && !target.empty()) { + CreateOtherSession(target); + graph_id = other_sess_->CompileGraph(lst, outputs); + } else { + graph_id = target_sess_->CompileGraph(lst, outputs); } + if (MsContext::GetInstance()->precompile_only()) { MS_LOG(INFO) << "PrecompileOnly, stop run graph"; return result; } - + if (target != target_device_ && !target.empty()) { + other_sess_->BuildGraph(graph_id); + } else if (!is_multi_graph_sink_) { + target_sess_->BuildGraph(graph_id); + } result.run = std::make_shared( - [graph_id, this](const VectorRef &args) -> VectorRef { return MsRunGraph(graph_id, args); }); + [graph_id, target, this](const VectorRef &args) -> VectorRef { return MsRunGraph(graph_id, args, target); }); MS_EXCEPTION_IF_NULL(result.run); result.simu_run = std::make_shared( @@ -92,7 +100,7 @@ void MsBackend::SetSwitchActive(const BaseRef &c, bool cond) { GraphId cond_g = kInvalidGraphId; if (utils::isa(c)) { - cond_g = sess_->GetGraphIdByNode(utils::cast(c)); + cond_g = target_sess_->GetGraphIdByNode(utils::cast(c)); } else { MS_LOG(EXCEPTION) << "cond not a anf node:" << c.ToString(); } @@ -116,7 +124,7 @@ void MsBackend::SetSwitchActive(const BaseRef &c, bool cond) { MS_LOG(DEBUG) << "invoke set active:" << active_g; } MS_LOG(DEBUG) << "switch set active:" << active_g << ", " << cond_g; - sess_->SetActive(active_g, cond_g); + target_sess_->SetActive(active_g, cond_g); } void MsBackend::SetSwitchGraph() { @@ -135,12 +143,12 @@ void MsBackend::SetSwitchGraph() { } GraphId cond_g = kInvalidGraphId; if (utils::isa(curr_switch_)) { - cond_g = sess_->GetGraphIdByNode(utils::cast(curr_switch_)); + cond_g = target_sess_->GetGraphIdByNode(utils::cast(curr_switch_)); } else { MS_LOG(EXCEPTION) << "cond not a anf node:" << curr_switch_.ToString(); } MS_LOG(DEBUG) << "switch compile:" << cond_g << ", " << true_g << ", " << false_g; - sess_->SwitchCompile(cond_g, true_g, false_g, utils::cast(curr_switch_)); + target_sess_->SwitchCompile(cond_g, true_g, false_g, utils::cast(curr_switch_)); } is_switch_call_ = false; MS_LOG(DEBUG) << "end SetSwitchGraph:" << curr_cond << ", " << is_switch_call_; @@ -202,7 +210,7 @@ void MsBackend::RecallGraphInput(const FuncGraphPtr &func_graph, const VectorRef old_args[i] = args[it->second]; } } - sess_->SetChildGraphInput(graph, old_args); + target_sess_->SetChildGraphInput(graph, old_args); } graph_inputs_.erase(c); } @@ -211,7 +219,7 @@ void MsBackend::RecallGraphInput(const FuncGraphPtr &func_graph, const VectorRef VectorRef MsBackend::MsSimuRunGraph(const GraphId &g, const VectorRef &args) { MS_LOG(DEBUG) << "set graph input:" << g; // switch maybe twice - sess_->SetChildGraphInput(g, args); + target_sess_->SetChildGraphInput(g, args); if (is_switch_call_) { if (!curr_switch_.is_null()) { @@ -236,7 +244,7 @@ VectorRef MsBackend::MsSimuRunGraph(const GraphId &g, const VectorRef &args) { return VectorRef(outputs); } -VectorRef MsBackend::MsRunGraph(const GraphId &g, const VectorRef &args) { +VectorRef MsBackend::MsRunGraph(const GraphId &g, const VectorRef &args, const std::string &target) { MS_LOG(DEBUG) << "start ms graph run:" << args.size() << ", g:" << g; // Run graph std::vector inputs; @@ -271,7 +279,12 @@ VectorRef MsBackend::MsRunGraph(const GraphId &g, const VectorRef &args) { VectorRef outputs; // call ms rungraph (graphId, input ,output) - sess_->RunGraph(g, inputs, &outputs); + if (target != target_device_ && !target.empty()) { + other_sess_->RunGraph(g, inputs, &outputs); + } else { + target_sess_->RunGraph(g, inputs, &outputs); + } + MS_LOG(DEBUG) << "RunGraph finished:" << outputs.size(); return outputs; } @@ -300,17 +313,17 @@ void MsBackend::SimulateRun(FinalVMPtr rt, FuncGraphPtr root) { (void)std::transform(parameters.begin(), parameters.end(), std::back_inserter(args), [](const AnfNodePtr &v) { return v; }); MS_LOG(DEBUG) << "Simulate start"; - (void)sess_->SetFinalGraphInput(parameters); + (void)target_sess_->SetFinalGraphInput(parameters); BaseRef output = rt->Eval(VectorRef(args)); - sess_->SetFinalGraphOutput(output); + target_sess_->SetFinalGraphOutput(output); MS_LOG(DEBUG) << "Simulate Eval end"; } void MsBackend::Link(GraphId graph_id) { if (graph_id == kInvalidGraphId) { - graph_id = sess_->GetFinalRunGraph(); + graph_id = target_sess_->GetFinalRunGraph(); } - sess_->BuildGraph(graph_id); + target_sess_->BuildGraph(graph_id); } Backend::Backend(const std::string &name) : name_(name) { @@ -322,16 +335,30 @@ Backend::Backend(const std::string &name) : name_(name) { } MsBackend::MsBackend(const std::string &name, const std::string &target, uint32_t device_id) : Backend(name) { - convert_fn_ = std::bind(&MsBackend::MsConvert, this, std::placeholders::_1); - sess_ = session::SessionFactory::Get().Create(target); - if (sess_ == nullptr) { + convert_fn_ = std::bind(&MsBackend::MsConvert, this, std::placeholders::_1, std::placeholders::_2); + target_sess_ = session::SessionFactory::Get().Create(target); + if (target_sess_ == nullptr) { + MS_LOG(EXCEPTION) << "Session create failed!, please make sure target device:" << target << " is available."; + } + target_sess_->Init(device_id); + target_sess_->RegisterSummaryCallBackFunc(callbacks::SummarySaveCallback); + target_device_ = target; +} + +void MsBackend::CreateOtherSession(const std::string &target) { + if (other_sess_ != nullptr && other_device_ == target) { + return; + } + other_sess_ = session::SessionFactory::Get().Create(target); + if (other_sess_ == nullptr) { MS_LOG(EXCEPTION) << "Session create failed!, please make sure target device:" << target << " is available."; } - sess_->Init(device_id); - sess_->RegisterSummaryCallBackFunc(callbacks::SummarySaveCallback); + other_sess_->Init(0); + other_sess_->RegisterSummaryCallBackFunc(callbacks::SummarySaveCallback); + other_device_ = target; } -GraphId MsBackend::CompileGraph(NotNull fg) { return sess_->CompileGraph(fg); } +GraphId MsBackend::CompileGraph(NotNull fg) { return target_sess_->CompileGraph(fg); } VectorRef MsBackend::RunGraph(GraphId graph_id, const VectorRef &args) { return MsRunGraph(graph_id, args); } diff --git a/mindspore/ccsrc/vm/backend.h b/mindspore/ccsrc/vm/backend.h index 94b7a500e2..0e0b02c055 100644 --- a/mindspore/ccsrc/vm/backend.h +++ b/mindspore/ccsrc/vm/backend.h @@ -91,8 +91,8 @@ class MsBackend : public Backend { MsBackend(const std::string &name, const std::string &target, uint32_t device_id); ~MsBackend() override = default; - LinConvertResult MsConvert(const AnfNodePtrList &lst); - VectorRef MsRunGraph(const GraphId &g, const VectorRef &args); + LinConvertResult MsConvert(const AnfNodePtrList &lst, const std::string &target = ""); + VectorRef MsRunGraph(const GraphId &g, const VectorRef &args, const std::string &target = ""); VectorRef MsSimuRunGraph(const GraphId &g, const VectorRef &args); void SimulateRun(FinalVMPtr rt, FuncGraphPtr root) override; @@ -107,9 +107,13 @@ class MsBackend : public Backend { LinConvertResult GetMultiGraphRun(const FuncGraphPtr &g) override; GraphId CompileGraph(NotNull fg) override; VectorRef RunGraph(GraphId graph_id, const VectorRef &args); + void CreateOtherSession(const std::string &target); private: - session::SessionPtr sess_; + session::SessionPtr target_sess_; + session::SessionPtr other_sess_; + std::string target_device_; + std::string other_device_; std::unordered_map simu_cond_map_; std::unordered_map graph_id_map_; std::unordered_map>, BaseRefHash> graph_inputs_; diff --git a/mindspore/ccsrc/vm/segment_runner.cc b/mindspore/ccsrc/vm/segment_runner.cc index ae052770ff..9b2ee51b3f 100644 --- a/mindspore/ccsrc/vm/segment_runner.cc +++ b/mindspore/ccsrc/vm/segment_runner.cc @@ -92,6 +92,8 @@ std::tuple TransformSegmentToAnfGr } else if (eqv.find(a) == eqv.end()) { inputs.push_back(a); eqv[a] = fg->add_parameter(); + eqv[a]->set_abstract(a->abstract()); + eqv[a]->set_kernel_info(a->kernel_info_ptr()); } return eqv[a]; @@ -107,15 +109,20 @@ std::tuple TransformSegmentToAnfGr if (inps.empty()) { MS_LOG(EXCEPTION) << "Input is empty"; } - if (!IsValueNode(inps[0])) { + if (!IsValueNode(inps[0]) && + !(IsValueNode(inps[0]) && + inps[0]->cast()->value()->cast()->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL))) { MS_LOG(EXCEPTION) << "Input[0] Must be a Primitive valuenode"; } + auto fn = inps[0]; std::vector args{fn}; (void)std::transform(std::begin(inps) + 1, std::end(inps), std::back_inserter(args), ref); eqv[n] = fg->NewCNode(args); + eqv[n]->set_abstract(n->abstract()); + eqv[n]->set_kernel_info(n->kernel_info_ptr()); } std::vector eqv_keys; @@ -123,15 +130,18 @@ std::tuple TransformSegmentToAnfGr [](const std::pair &elem) -> AnfNodePtr { return elem.first; }); auto outputs = GetOutput(lst, lst[0]->func_graph()->manager()->node_users(), eqv_keys); - std::vector output_args; - output_args.push_back(NewValueNode(prim::kPrimMakeTuple)); - (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(output_args), - [&eqv](const AnfNodePtr &o) -> AnfNodePtr { return eqv[o]; }); - - // Set output for AnfGraph - auto fg_output = fg->NewCNode(output_args); + AnfNodePtr fg_output; + if (outputs.size() > 1) { + std::vector output_args; + output_args.push_back(NewValueNode(prim::kPrimMakeTuple)); + (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(output_args), + [&eqv](const AnfNodePtr &o) -> AnfNodePtr { return eqv[o]; }); + // Set output for AnfGraph + fg_output = fg->NewCNode(output_args); + } else { + fg_output = eqv[outputs[0]]; + } fg->set_output(fg_output); - return std::make_tuple(fg, inputs, outputs); } @@ -148,7 +158,7 @@ std::tuple TransformSegmentToAnfGr // This implementation will convert the nodes into a subgraph // that will run using the MsVM. template -LinConvertResult Convert(const AnfNodePtrList &lst) { +LinConvertResult Convert(const AnfNodePtrList &lst, const std::string &) { auto cached = g_ConvertCache.find(lst); if (cached != g_ConvertCache.end()) { return cached->second; diff --git a/mindspore/ccsrc/vm/segment_runner.h b/mindspore/ccsrc/vm/segment_runner.h index 8ea87da50c..c4458d4148 100644 --- a/mindspore/ccsrc/vm/segment_runner.h +++ b/mindspore/ccsrc/vm/segment_runner.h @@ -43,7 +43,7 @@ struct LinConvertResult { uint32_t graph_id; }; -using LinkFuncType = std::function; +using LinkFuncType = std::function; using ConvertCache = std::unordered_map; extern LinkFuncType MsVmConvert; extern LinkFuncType GeVmConvert; diff --git a/mindspore/ccsrc/vm/transform.cc b/mindspore/ccsrc/vm/transform.cc index 636d36f931..c1fba78be8 100644 --- a/mindspore/ccsrc/vm/transform.cc +++ b/mindspore/ccsrc/vm/transform.cc @@ -20,6 +20,9 @@ #include #include +#include +#include +#include #include #include @@ -30,6 +33,7 @@ #include "utils/graph_utils.h" #include "utils/context/ms_context.h" #include "debug/trace.h" +#include "debug/anf_ir_dump.h" namespace mindspore { namespace compile { @@ -47,6 +51,200 @@ const std::vector &GetMsNonlinearOps() { return ms_nonlinear_ops; } +namespace { +std::string GetCNodeTarget(const AnfNodePtr &node) { + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + std::string default_target = context_ptr->device_target(); + if (!node->isa()) { + return default_target; + } + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + auto attr_input = cnode->input(kAnfPrimitiveIndex); + if (attr_input == nullptr) { + return default_target; + } + auto value_node = attr_input->cast(); + if (value_node == nullptr) { + return default_target; + } + auto value = value_node->value(); + if (value == nullptr) { + return default_target; + } + if (!value->isa()) { + return default_target; + } + auto primitive = value->cast(); + auto att_target = primitive->GetAttr("primitive_target"); + if (att_target != nullptr) { + if (!att_target->isa()) { + MS_LOG(EXCEPTION) << "Only support string CPU|GPU|Ascend for primitive_target"; + } + auto target = GetValue(att_target); + if (kTargetSet.find(target) == kTargetSet.end()) { + MS_LOG(EXCEPTION) << "Only support string CPU|GPU|Ascend for primitive_target"; + } + return target; + } + return default_target; +} + +bool ContainMultiTarget(const std::vector &nodes) { + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + std::string last_target = context_ptr->device_target(); + for (auto &node : nodes) { + if (node->isa()) { + std::string cur_target = GetCNodeTarget(node); + if (last_target != cur_target) { + return true; + } + last_target = cur_target; + } + } + return false; +} + +void CalcNodeRefCount(const FuncGraphPtr &graph, std::map *nodes_ref) { + std::queue queue; + queue.push(graph->get_return()); + std::set visited; + while (!queue.empty()) { + auto &node = queue.front(); + queue.pop(); + MS_EXCEPTION_IF_NULL(node); + if (!node->isa()) { + continue; + } + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + for (auto &input : cnode->inputs()) { + auto iter = nodes_ref->find(input); + if (iter != nodes_ref->end()) { + iter->second++; + } else { + (void)nodes_ref->insert(std::pair(input, 1)); + } + if (visited.find(input) != visited.end()) { + continue; + } + visited.insert(input); + queue.push(input); + } + } +} + +bool IsGetItemNode(const AnfNodePtr &node) { + MS_EXCEPTION_IF_NULL(node); + if (node->isa()) { + auto cnode = node->cast(); + auto &inputs = cnode->inputs(); + if (inputs.empty()) { + MS_LOG(EXCEPTION) << "Inputs of apply node is empty"; + } + if (!IsValueNode(inputs[0])) { + return true; + } + PrimitivePtr node_prim = GetValueNode(inputs[0]); + return node_prim->name() == prim::kPrimTupleGetItem->name(); + } + return false; +} + +std::vector ReorderGetItemNode(const std::vector &nodes) { + std::vector result; + std::map> insert_positions; + std::map node_positions; + for (auto &node : nodes) { + if (IsGetItemNode(node)) { + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + auto &inputs = cnode->inputs(); + if (inputs.size() < 2) { + MS_LOG(EXCEPTION) << "Invalid get item node"; + } + auto &parent = inputs[1]; + auto iter = node_positions.find(parent); + if (iter != node_positions.end()) { + size_t position = iter->second; + auto iter_nodes = insert_positions.find(position); + if (iter_nodes != insert_positions.end()) { + iter_nodes->second.push_back(node); + } else { + (void)insert_positions.insert( + std::pair>(position, std::vector{node})); + } + continue; + } + } + result.emplace_back(node); + node_positions[node] = result.size(); + } + + size_t insert_num = 0; + for (auto &item : insert_positions) { + size_t position = item.first + insert_num; + (void)result.insert(result.begin() + position, item.second.begin(), item.second.end()); + insert_num += item.second.size(); + } + return result; +} + +std::vector SplitSort(const FuncGraphPtr &graph, const std::string &default_target) { + std::vector result; + std::stack to_visit; + std::stack next_to_visit; + std::map nodes_ref; + CalcNodeRefCount(graph, &nodes_ref); + std::string handle_target = default_target; + std::string next_target = ""; + to_visit.push(graph->get_return()); + while (!to_visit.empty() || !next_to_visit.empty()) { + if (to_visit.empty()) { + to_visit.swap(next_to_visit); + handle_target = next_target; + } + auto &node = to_visit.top(); + MS_EXCEPTION_IF_NULL(node); + to_visit.pop(); + result.emplace_back(node); + if (!node->isa()) { + continue; + } + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + auto node_inputs = cnode->inputs(); + std::reverse(node_inputs.begin(), node_inputs.end()); + for (auto &input : node_inputs) { + auto iter = nodes_ref.find(input); + if (iter != nodes_ref.end()) { + iter->second--; + if (iter->second != 0) { + continue; + } + } + if (!input->isa()) { + to_visit.push(input); + continue; + } + std::string input_target = GetCNodeTarget(input); + if (input_target == handle_target) { + to_visit.push(input); + } else if (next_to_visit.empty() || input_target == next_target) { + next_to_visit.push(input); + next_target = input_target; + } else { + MS_LOG(EXCEPTION) << "only support two different target"; + } + } + } + std::reverse(result.begin(), result.end()); + return ReorderGetItemNode(result); +} +} // namespace + CompileGraph::CompileGraph(const BackendPtr &backend, const std::vector &cut_list) : backend_(backend), cut_list_(cut_list) { MS_EXCEPTION_IF_NULL(backend_); @@ -72,6 +270,14 @@ bool CompileGraph::IsCut(const AnfNodePtr &node) { } AnfNodePtr fn = inputs[0]; + MS_EXCEPTION_IF_NULL(fn); + if (IsValueNode(fn)) { + auto fg = GetValueNode(fn); + if (fg->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) { + return false; + } + } + if (!IsValueNode(fn)) { return true; } @@ -80,6 +286,11 @@ bool CompileGraph::IsCut(const AnfNodePtr &node) { for (auto &prim : cut_list_) { MS_EXCEPTION_IF_NULL(prim); if (prim->name() == node_prim->name()) { + if (prim->name() == prim::kPrimBpropCut->name()) { + auto ms_context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(ms_context); + ms_context->set_enable_pynative_hook(true); + } return true; } } @@ -102,24 +313,33 @@ VectorRef CompileGraph::SplitNodes(const FuncGraphPtr &graph) { MS_EXCEPTION_IF_NULL(graph); VectorRef splits; VectorRef split; - std::vector nodes = TopoSort(graph->get_return()); - + auto nodes = TopoSort(graph->get_return()); + if (ContainMultiTarget(nodes)) { + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + std::string default_target = context_ptr->device_target(); + nodes = SplitSort(graph, default_target); + } + std::string last_target; MS_LOG(DEBUG) << "Split all nodes size:" << nodes.size(); for (auto &node : nodes) { MS_EXCEPTION_IF_NULL(node); if (IsCut(node)) { - MS_LOG(DEBUG) << "Cut node:" << node->DebugString(10) << ", size:" << split.size(); if (split.size() != 0) { splits.push_back(split); } splits.push_back(node); split.clear(); - } else if (!(node->isa() || node->isa())) { + } else if (node->isa()) { + std::string cur_target = GetCNodeTarget(node); + if (cur_target != last_target && !last_target.empty() && split.size() != 0) { + splits.push_back(split); + split.clear(); + } + last_target = cur_target; split.push_back(node); - MS_LOG(DEBUG) << "Insert node:" << node->DebugString(10) << ", size:" << split.size(); } } - MS_LOG(DEBUG) << "Split node size :" << splits.size(); return splits; } @@ -200,14 +420,14 @@ void CompileGraph::PushParameters(const FuncGraphPtr &graph) { } } -int CompileGraph::LinConvert(const FuncGraphPtr &graph, const AnfNodePtrList &node_list) { +int CompileGraph::LinConvert(const FuncGraphPtr &graph, const AnfNodePtrList &node_list, const std::string &target) { MS_LOG(DEBUG) << "LinConvert start"; LinConvertResult result; if (backend_->simu_flag()) { result = backend_->GetMultiGraphRun(graph); } else { - result = lin_convert_(node_list); + result = lin_convert_(node_list, target); } if (result.run == nullptr) { @@ -316,7 +536,12 @@ bool CompileGraph::SplitGraph(const FuncGraphPtr &graph) { auto vec_ref = utils::cast(split); (void)std::transform(vec_ref.begin(), vec_ref.end(), std::back_inserter(args), [](const BaseRef &v) { return utils::cast(v); }); - ret = LinConvert(graph, args); + if (args.size() > 0) { + std::string cur_target = GetCNodeTarget(args[0]); + ret = LinConvert(graph, args, cur_target); + } else { + ret = LinConvert(graph, args); + } MS_LOG(DEBUG) << "End a extern LinConvert"; if (ret == RET_FAILED) { return false; @@ -348,7 +573,6 @@ InstSet CompileGraph::GenMultiGraphsSinkInst(const FuncGraphPtr &graph) { InstSet CompileGraph::Run(const FuncGraphPtr &graph) { MS_EXCEPTION_IF_NULL(graph); - MS_LOG(DEBUG) << "Compile start graph: " << graph->ToString(); Reset(); PushParameters(graph); @@ -574,16 +798,11 @@ CompileGraphs::CompileGraphs(const BackendPtr &backend, const std::vectormanager(); - MS_EXCEPTION_IF_NULL(graph_manager); - FuncGraphSet graphs = graph_manager->func_graphs(); - for (auto &g : graphs) { - mapping_[g] = static_cast(insts_.size()); - if (transform_ != nullptr) { - InstSet insts = transform_->Run(g); - if (!insts.empty()) { - (void)insts_.insert(insts_.end(), insts.begin(), insts.end()); - } + mapping_[graph] = static_cast(insts_.size()); + if (transform_ != nullptr) { + InstSet insts = transform_->Run(graph); + if (!insts.empty()) { + (void)insts_.insert(insts_.end(), insts.begin(), insts.end()); } } MS_LOG(DEBUG) << "End"; @@ -628,8 +847,15 @@ FinalVMPtr CompileGraphs::CompileAndLink(const FuncGraphPtr &graph) { Reset(); MS_LOG(DEBUG) << "Begin parameter:" << graph->parameters().size(); - (void)WrapPrimitives(graph); - Compile(graph); + FuncGraphPtr prim_graph = WrapPrimitives(graph); + Compile(prim_graph); + MS_EXCEPTION_IF_NULL(prim_graph); + FuncGraphSet graphs = prim_graph->manager()->func_graphs(); + for (auto g : graphs) { + if (g != graph && g != nullptr && !(g->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL))) { + Compile(g); + } + } FinalVMPtr rt = Link(graph); Reset(); @@ -637,6 +863,20 @@ FinalVMPtr CompileGraphs::CompileAndLink(const FuncGraphPtr &graph) { return rt; } +bool CompileGraphs::ContainMixedTarget(const FuncGraphPtr &graph) { + MS_EXCEPTION_IF_NULL(graph); + auto graph_manager = graph->manager(); + MS_EXCEPTION_IF_NULL(graph_manager); + FuncGraphSet graphs = graph_manager->func_graphs(); + for (auto &g : graphs) { + auto nodes = TopoSort(g->get_return()); + if (ContainMultiTarget(nodes)) { + return true; + } + } + return false; +} + BackendPtr CreateBackend() { auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); diff --git a/mindspore/ccsrc/vm/transform.h b/mindspore/ccsrc/vm/transform.h index 711c1777ab..f2d54198d6 100644 --- a/mindspore/ccsrc/vm/transform.h +++ b/mindspore/ccsrc/vm/transform.h @@ -32,7 +32,7 @@ #include "vm/segment_runner.h" #include "vm/backend.h" -// mindspore namespace is the top level namespace of Mindsporeession project. +// mindspore namespace is the top level namespace of MindSpore project. // Other namespace should be a sub namespace of mindspore namespace in the ME project. namespace mindspore { extern const char kMsVm[]; @@ -80,7 +80,7 @@ class CompileGraph { private: void PushParameters(const FuncGraphPtr &func_graph); bool SplitGraph(const FuncGraphPtr &func_graph); - int LinConvert(const FuncGraphPtr &func_graph, const AnfNodePtrList &node_list); + int LinConvert(const FuncGraphPtr &func_graph, const AnfNodePtrList &node_list, const std::string &target = ""); int InterpretNode(const FuncGraphPtr &func_graph, const CNodePtr &node); int AddCall(const FuncGraphPtr &graph, const CNodePtr &node); void AddSinkSwitch(const CNodePtr &node); @@ -124,6 +124,7 @@ class CompileGraphs { void Compile(const FuncGraphPtr &func_graph); FinalVMPtr Link(const FuncGraphPtr &func_graph); FinalVMPtr CompileAndLink(const FuncGraphPtr &func_graph); + static bool ContainMixedTarget(const FuncGraphPtr &graph); private: InstSet insts_; diff --git a/mindspore/ccsrc/vm/vm.cc b/mindspore/ccsrc/vm/vm.cc index 7107212b6c..c73d41df6c 100644 --- a/mindspore/ccsrc/vm/vm.cc +++ b/mindspore/ccsrc/vm/vm.cc @@ -585,8 +585,8 @@ void FinalVM::InstPushPrim(const VectorRef &args) { return; } - VectorRef tuple; auto prim = utils::cast(args[0]); + VectorRef tuple; for (size_t i = 1; i < args.size(); ++i) { auto index = utils::cast(args[i]); tuple.push_back(Ref(index)); @@ -618,8 +618,9 @@ void FinalVM::SyncData(const py::object &arg) { BaseRef FinalVM::RunHook(const PrimitivePtr &prim, const VectorRef &args) { MS_LOG(DEBUG) << "input for operation:"; + auto prim_py = dyn_cast(prim); std::size_t args_size = args.size(); - py::tuple py_args = py::tuple(args_size); + auto py_args = py::tuple(args_size); size_t i = 0; for (auto &arg : args) { py_args[i] = BaseRefToPyData(arg); @@ -631,7 +632,7 @@ BaseRef FinalVM::RunHook(const PrimitivePtr &prim, const VectorRef &args) { bool is_bprop = prim->HasAttr("bprop"); if (is_bprop) { SyncData(py_args); - py::function fn_bprop = prim->hook(); + py::function fn_bprop = prim_py->hook(); obj = fn_bprop(*py_args); return obj; } @@ -643,11 +644,11 @@ BaseRef FinalVM::RunHook(const PrimitivePtr &prim, const VectorRef &args) { std::string cell_id = GetValue(prim->GetAttr("cell_id")); if (_hook_grad.find(cell_id) != _hook_grad.end()) { std::size_t hook_args_size = 3; - py::tuple hook_args = py::tuple(hook_args_size); + auto hook_args = py::tuple(hook_args_size); hook_args[0] = cell_id; hook_args[1] = py::make_tuple(_hook_grad[cell_id]); hook_args[2] = py::make_tuple(py_args[2]); - py::function fn_hook = prim->hook(); + py::function fn_hook = prim_py->hook(); obj = fn_hook(*hook_args); if (py::isinstance(obj)) { obj = py_args[2]; @@ -659,7 +660,7 @@ BaseRef FinalVM::RunHook(const PrimitivePtr &prim, const VectorRef &args) { } } else { // Hook operator for execute variable hook function - py::function fn_hook = prim->hook(); + py::function fn_hook = prim_py->hook(); obj = fn_hook(py::make_tuple(py_args[2])); if (py::isinstance(obj)) { obj = py_args[2]; diff --git a/mindspore/common/_register_for_tensor.py b/mindspore/common/_register_for_tensor.py index da183d9549..8ba2ff7cc4 100644 --- a/mindspore/common/_register_for_tensor.py +++ b/mindspore/common/_register_for_tensor.py @@ -16,6 +16,7 @@ """Registry the relation.""" from collections import UserDict +from .. import context class Registry(UserDict): @@ -27,9 +28,16 @@ class Registry(UserDict): def get(self, obj_str): """Get the value by str.""" - if isinstance(obj_str, str): + if not isinstance(obj_str, str): + raise TypeError("key for tensor registry must be string.") + if context.get_context("enable_ge"): + def wrap(*args): + new_args = list(args) + new_args.append(obj_str) + return self["vm_compare"](*new_args) + obj = wrap + else: obj = self[obj_str] return obj - tensor_operator_registry = Registry() diff --git a/mindspore/common/api.py b/mindspore/common/api.py index 1a726f527e..4fad3e455b 100644 --- a/mindspore/common/api.py +++ b/mindspore/common/api.py @@ -20,7 +20,7 @@ from collections import OrderedDict from functools import wraps from mindspore import context from mindspore import log as logger -from .._c_expression import generate_key, Executor_, Tensor, MetaTensor +from .._c_expression import generate_key, Executor_, Tensor, MetaTensor, PynativeExecutor_ from .._c_expression import verify_inputs_signature, init_exec_dataset, _set_dataset_mode_config, init_backend from .tensor import Tensor as MsTensor @@ -273,6 +273,34 @@ def _generate_pip_args(obj, *args, method="construct"): obj.__parse_method__ = parse_method return args_names, args_list +class _PynativeExecutor: + """ + An pynative executor used to compile/manage/run graph. + + Returns: + Graph, return the result of pipeline running. + """ + + def __init__(self): + self._executor = PynativeExecutor_.get_instance() + + def new_graph(self, obj, *args): + self._executor.new_graph(obj, *args) + + def end_graph(self, obj, output, *args): + self._executor.end_graph(obj, output, *args) + + def grad(self, grad, obj, weights, *args): + self._executor.grad_net(grad, obj, weights, *args) + + def clear(self, flag=""): + self._executor.clear(flag) + + def set_grad_flag(self, flag): + self._executor.set_grad_flag(flag) + + def __call__(self, *args): + return self._executor(args, "") class _Executor: """ @@ -334,7 +362,7 @@ class _Executor: if not auto_parallel_mode: param.init_data() elif key not in obj.parameter_layout_dict: - logger.info("Layout dict does not contain the key %s.", key) + logger.debug("Layout dict does not contain the key %s.", key) param.init_data(set_sliced=True) else: layout = obj.parameter_layout_dict[key] @@ -372,7 +400,7 @@ class _Executor: key = generate_key(phase, dic) self.phase_prefix = str(key[1]) if phase == 'export': - phase = phase + '.' + str(obj.create_time) + phase = phase + '.' + self.phase_prefix + '.' + str(obj.create_time) else: phase = self.phase_prefix + phase + '.' + str(obj.create_time) enable_debug_runtime = context.get_context("enable_debug_runtime") @@ -495,10 +523,16 @@ class _Executor: file_format (str): MindSpore currently support 'GEIR' and 'ONNX' format for exported model """ from .._c_expression import export_graph - phase = 'export' + '.' + str(net.create_time) + phase = 'export' + '.' + self.phase_prefix + '.' + str(net.create_time) export_graph(file_name, file_format, phase) + def fetch_info_for_quant_export(self, exec_id): + """Get graph proto from pipeline.""" + if self._executor.has_compiled(exec_id) is False: + return None + return self._executor.fetch_info_for_quant_export(exec_id) _executor = _Executor() +_pynative_exec = _PynativeExecutor() __all__ = ['ms_function'] diff --git a/mindspore/common/dtype.py b/mindspore/common/dtype.py index 02a27591d4..46b111d2f6 100644 --- a/mindspore/common/dtype.py +++ b/mindspore/common/dtype.py @@ -170,8 +170,8 @@ def get_py_obj_dtype(obj): Type of MindSpore type. """ # Tensor - if hasattr(obj, 'dtype') and callable(obj.dtype) and isinstance(obj.dtype(), typing.Type): - return tensor_type(obj.dtype()) + if hasattr(obj, 'dtype') and isinstance(obj.dtype, typing.Type): + return tensor_type(obj.dtype) if hasattr(obj, '__primitive_flag__') or hasattr(obj, 'construct'): return function if isinstance(obj, (typing.Type, type)): diff --git a/mindspore/common/initializer.py b/mindspore/common/initializer.py index 54c0a1debe..83586272ee 100644 --- a/mindspore/common/initializer.py +++ b/mindspore/common/initializer.py @@ -41,7 +41,6 @@ class Initializer: self._kwargs = kwargs self.shape = None self.dtype = None - self._seed = None def _initialize(self, *kwargs): raise NotImplementedError('Must be overridden!') @@ -49,15 +48,6 @@ class Initializer: def __call__(self, arr): return self._initialize(arr) - @property - def seed(self): - return self._seed - - @seed.setter - def seed(self, seed_): - """set the random seed.""" - self._seed = seed_ - @property def shape(self): return self._shape @@ -74,19 +64,30 @@ class Initializer: def dtype(self, dtype): self._dtype = dtype - def to_tensor(self): - """Get the tensor format data of this Initializer.""" + def to_tensor(self, slice_index=None, shape=None): + """ + Get the tensor format data of this Initializer. + + Args: + slice_index (int): Slice index of a parameter's slices. + Used when initialize a slice of a parameter, it guarantee that + devices use the same slice can generate the same tensor. + shape (list[int]): Shape of the slice, used when initialize a slice of the parameter. + """ arr = None + if shape is None: + shape = self.shape + try: - arr = np.ndarray(self.shape) + arr = np.ndarray(shape) except ValueError: - msg = "Error shape={}".format(self.shape) + msg = "Error shape={}".format(shape) logger.error(msg) raise ValueError(msg) - if self._seed is not None: - np.random.seed(self.seed) + + if slice_index is not None: + np.random.seed(slice_index) self.__call__(arr) - self._seed = None return Tensor(arr, dtype=self.dtype) def _register(*aliases): @@ -331,11 +332,11 @@ def initializer(init, shape=None, dtype=mstype.float32): raise TypeError("Unsupported init type '{}'.".format(type(init))) if isinstance(init, Tensor): - init_shape = init.shape() + init_shape = init.shape shape = shape if isinstance(shape, (tuple, list)) else [shape] if shape is not None and init_shape != tuple(shape): raise ValueError("The shape of init should be same as variable shape, but got the shape of init {} and " - "the variable shape {}.".format(list(init.shape()), shape)) + "the variable shape {}.".format(list(init.shape), shape)) return init if isinstance(shape, list): diff --git a/mindspore/common/parameter.py b/mindspore/common/parameter.py index 788c2d0307..773f6a99a6 100644 --- a/mindspore/common/parameter.py +++ b/mindspore/common/parameter.py @@ -15,13 +15,14 @@ """Parameter for cell.""" import numbers -from copy import copy, deepcopy +from copy import copy +from mindspore import context from . import dtype as mstype from .initializer import initializer, Initializer from .tensor import Tensor, MetaTensor from .._checkparam import _check_str_by_regular from ..parallel._utils import _set_clone_info, _CloneInfo -from ..parallel._tensor import _get_seed +from ..parallel._tensor import _get_slice_index __all__ = ['Parameter', 'ParameterTuple'] @@ -50,15 +51,19 @@ class Parameter: requires_grad (bool): True if the parameter requires gradient. Default: True. layerwise_parallel (bool): A kind of model parallel mode. When layerwise_parallel is true in paralle mode, broadcast and gradients communication would not be applied on parameters. Default: False. + sparse_grad (str): Set if the parameter's gradient is sparse. Default: empty. """ - def __init__(self, default_input, name, requires_grad=True, layerwise_parallel=False): + def __init__(self, default_input, name, requires_grad=True, layerwise_parallel=False, sparse_grad=""): self.set_parameter_data(default_input) self.name = name self.requires_grad = requires_grad self.layerwise_parallel = layerwise_parallel + self.sparse_grad = sparse_grad self._is_init = False self._sliced = False self.clone_info = _CloneInfo() + if context.get_context("mode") == context.PYNATIVE_MODE: + self.init_data() def __repr__(self): format_str = 'Parameter (name={name})' @@ -135,11 +140,13 @@ class Parameter: x.name = prefix + '.' + x.name x.is_init = False if init != 'same': - shape = self.default_input.shape() - dtype = self.default_input.dtype() + shape = self.default_input.shape + dtype = self.default_input.dtype if isinstance(init, (str, Initializer, numbers.Number)): x.init_mode = initializer(init, shape=shape, dtype=dtype) x.default_input = MetaTensor(dtype, shape) + if context.get_context("mode") == context.PYNATIVE_MODE: + x.init_data() else: x.default_input = initializer(init, shape=shape, dtype=dtype) @@ -168,30 +175,37 @@ class Parameter: raise TypeError("`requires_grad` parameter must be bool type") self._requires_grad = value + @property + def sparse_grad(self): + """Return whether the parameter's gradient is sparse.""" + return self._sparse_grad + + @sparse_grad.setter + def sparse_grad(self, value=""): + if not isinstance(value, str): + raise TypeError("`sparse_grad` parameter must be str type") + self._sparse_grad = value + @property def data(self): return self.default_input def __add__(self, other): - res = deepcopy(self) - res.default_input = res.default_input + other - return res + return self.default_input + other def __sub__(self, other): - res = deepcopy(self) - res.default_input = res.default_input - other - return res + return self.default_input - other def __mul__(self, other): - res = deepcopy(self) - default_input = res.default_input * other - res.default_input = Tensor(default_input.asnumpy().copy()) - return res + return self.default_input * other def __truediv__(self, other): - res = deepcopy(self) - res.default_input = res.default_input / other - return res + return self.default_input / other + + def __setitem__(self, index, value): + default_input = self.default_input + default_input[index] = value + return self def set_parameter_data(self, data): """Set `default_input` of current `Parameter`.""" @@ -237,10 +251,11 @@ class Parameter: if len(layout) != 3: raise ValueError("The length of layout must be 3! layout is {}." .format(layout)) - self.init_mode.shape = layout[2] - self.init_mode.seed = int(_get_seed(layout[0], layout[1])) + slice_index = int(_get_slice_index(layout[0], layout[1])) + self.default_input = self.init_mode.to_tensor(slice_index, layout[2]) + else: + self.default_input = self.init_mode.to_tensor() - self.default_input = self.init_mode.to_tensor() self.init_mode = None if set_sliced: self.sliced = True diff --git a/mindspore/common/tensor.py b/mindspore/common/tensor.py index 864447c04d..0a631b954f 100644 --- a/mindspore/common/tensor.py +++ b/mindspore/common/tensor.py @@ -44,13 +44,13 @@ class Tensor(Tensor_): >>> # init a tensor with input data >>> t1 = Tensor(np.zeros([1, 2, 3]), mindspore.float32) >>> assert isinstance(t1, Tensor) - >>> assert t1.shape() == (1, 2, 3) - >>> assert t1.dtype() == mindspore.float32 + >>> assert t1.shape == (1, 2, 3) + >>> assert t1.dtype == mindspore.float32 >>> >>> # init a tensor with a float scalar >>> t2 = Tensor(0.1) >>> assert isinstance(t2, Tensor) - >>> assert t2.dtype() == mindspore.float64 + >>> assert t2.dtype == mindspore.float64 """ def __init__(self, input_data, dtype=None): @@ -71,38 +71,42 @@ class Tensor(Tensor_): return str(self.__str__()) def __add__(self, other): - check_type('tensor input_data', other, (Tensor, float, int)) out = tensor_operator_registry.get('__add__')(self, other) return out def __eq__(self, other): - if not isinstance(other, Tensor): + if not isinstance(other, (int, float, Tensor)): return False - return Tensor(np.array(self.asnumpy() == other.asnumpy())) + # bool type is not supported for `Equal` operator in backend. + if self.dtype == mstype.bool_ or (isinstance(other, Tensor) and other.dtype == mstype.bool_): + return Tensor(np.array(self.asnumpy() == other.asnumpy())) + return tensor_operator_registry.get('__eq__')(self, other) def __ne__(self, other): - if not isinstance(other, Tensor): + if not isinstance(other, (int, float, Tensor)): return True - return Tensor(np.array(self.asnumpy() != other.asnumpy())) + # bool type is not supported for `NotEqual` operator in backend. + if self.dtype == mstype.bool_ or (isinstance(other, Tensor) and other.dtype == mstype.bool_): + return Tensor(np.array(self.asnumpy() != other.asnumpy())) + return tensor_operator_registry.get('__ne__')(self, other) def __hash__(self): return hash(id(self)) def __mul__(self, other): - check_type('tensor input_data', other, (Tensor, float, int)) out = tensor_operator_registry.get('__mul__')(self, other) return out def __neg__(self): - return Tensor(-self.asnumpy()) + out = tensor_operator_registry.get('__neg__')(self) + return out def __iadd__(self, other): out = self.__add__(other) return out def __radd__(self, other): - check_type('tensor operation input', other, (Tensor, float, int)) - out = tensor_operator_registry.get('__add__')(other, self) + out = tensor_operator_registry.get('__add__')(self, other) return out def __imul__(self, other): @@ -110,23 +114,19 @@ class Tensor(Tensor_): return out def __rmul__(self, other): - check_type('tensor operation input', other, (Tensor, float, int)) - out = tensor_operator_registry.get('__mul__')(other, self) + out = tensor_operator_registry.get('__mul__')(self, other) return out def __truediv__(self, other): - check_type('tensor operation input', other, (Tensor, float, int)) - out = tensor_operator_registry.get('__div__')(self, other) + out = tensor_operator_registry.get('__truediv__')(self, other) return out def __rtruediv__(self, other): - check_type('tensor operation input', other, (Tensor, float, int)) - out = tensor_operator_registry.get('__div__')(other, self) + out = tensor_operator_registry.get('__truediv__')(other, self) return out def __sub__(self, other): - check_type('tensor operation input', other, (Tensor, float, int)) - out = self.__add__(-other) + out = tensor_operator_registry.get('__sub__')(self, other) return out def __isub__(self, other): @@ -134,12 +134,42 @@ class Tensor(Tensor_): return out def __rsub__(self, other): - check_type('tensor operation input', other, (Tensor, float, int)) - out = tensor_operator_registry.get('__add__')(other, Tensor(-self.asnumpy())) + out = tensor_operator_registry.get('__sub__')(other, self) + return out + + def __lt__(self, other): + out = tensor_operator_registry.get('__lt__')(self, other) + return out + + def __le__(self, other): + out = tensor_operator_registry.get('__le__')(self, other) return out + def __getitem__(self, index): + out = tensor_operator_registry.get('__getitem__')(self, index) + return out + + def __setitem__(self, index, value): + out = tensor_operator_registry.get('__setitem__')(self, index, value) + self.assign_value(out) + return self + + def __gt__(self, other): + out = tensor_operator_registry.get('__gt__')(self, other) + return out + + def __ge__(self, other): + out = tensor_operator_registry.get('__ge__')(self, other) + return out + + def __len__(self): + out = tensor_operator_registry.get('shape')(self) + if not out: + return 1 + return out[0] + def __str__(self): - if self.dtype() == mstype.type_none: + if self.dtype == mstype.type_none: return "Unknown Tensor type!" return str(self.asnumpy()) diff --git a/mindspore/context.py b/mindspore/context.py index 89fb56b843..ad601f8fab 100644 --- a/mindspore/context.py +++ b/mindspore/context.py @@ -25,6 +25,7 @@ from mindspore._c_expression import MSContext from mindspore._checkparam import args_type_check from mindspore.parallel._auto_parallel_context import _set_auto_parallel_context, _get_auto_parallel_context, \ _reset_auto_parallel_context +from mindspore.parallel.mpi._mpi_config import _set_mpi_config, _get_mpi_config __all__ = ['GRAPH_MODE', 'PYNATIVE_MODE', 'set_context', 'get_context', 'set_auto_parallel_context', 'get_auto_parallel_context', 'reset_auto_parallel_context'] @@ -55,7 +56,8 @@ def _make_directory(path): os.makedirs(path) real_path = path except PermissionError as e: - logger.error(f"No write permission on the directory `{path}, error = {e}") + logger.error( + f"No write permission on the directory `{path}, error = {e}") raise ValueError(f"No write permission on the directory `{path}`.") return real_path @@ -78,11 +80,13 @@ class _ThreadLocalInfo(threading.local): def reserve_class_name_in_scope(self, reserve_class_name_in_scope): """Sets whether to save the network class name in the scope.""" if not isinstance(reserve_class_name_in_scope, bool): - raise ValueError("Set reserve_class_name_in_scope value must be bool!") + raise ValueError( + "Set reserve_class_name_in_scope value must be bool!") self._reserve_class_name_in_scope = reserve_class_name_in_scope -_ContextRecord = namedtuple("_ContextRecord", ["is_pynative_mode", "switch_context_fn"]) +_ContextRecord = namedtuple( + "_ContextRecord", ["is_pynative_mode", "switch_context_fn"]) class _ContextSwitchInfo(threading.local): @@ -109,7 +113,8 @@ class _ContextSwitchInfo(threading.local): """ if isinstance(switch_context_fn, FunctionType): switch_context_fn() - self.context_stack.append(_ContextRecord(is_pynative, switch_context_fn)) + self.context_stack.append( + _ContextRecord(is_pynative, switch_context_fn)) def pop(self): self.context_stack.pop() @@ -193,7 +198,8 @@ class _Context: @save_graphs_path.setter def save_graphs_path(self, save_graphs_path): - self._context_handle.set_save_graphs_path(_make_directory(save_graphs_path)) + self._context_handle.set_save_graphs_path( + _make_directory(save_graphs_path)) @property def device_target(self): @@ -212,7 +218,8 @@ class _Context: @device_id.setter def device_id(self, device_id): if device_id < 0 or device_id > 4095: - raise ValueError("Device id must be in [0, 4095], but got {}".format(device_id)) + raise ValueError( + "Device id must be in [0, 4095], but got {}".format(device_id)) success = self._context_handle.set_device_id(device_id) if not success: raise RuntimeError("Device id set failed!!!") @@ -239,7 +246,8 @@ class _Context: @enable_auto_mixed_precision.setter def enable_auto_mixed_precision(self, enable_auto_mixed_precision): - self._context_handle.set_auto_mixed_precision_flag(enable_auto_mixed_precision) + self._context_handle.set_auto_mixed_precision_flag( + enable_auto_mixed_precision) @property def enable_reduce_precision(self): @@ -247,7 +255,8 @@ class _Context: @enable_reduce_precision.setter def enable_reduce_precision(self, enable_reduce_precision): - self._context_handle.set_enable_reduce_precision_flag(enable_reduce_precision) + self._context_handle.set_enable_reduce_precision_flag( + enable_reduce_precision) @property def enable_dump(self): @@ -279,12 +288,21 @@ class _Context: @profiling_options.setter def profiling_options(self, option): - options = ["training_trace", "task_trace", "task_trace:training_trace", "training_trace:task_trace", "op_trace"] + options = ["training_trace", "task_trace", + "task_trace:training_trace", "training_trace:task_trace", "op_trace"] if option not in options: raise ValueError("Profiling options must be in 'training_trace' 'task_trace' " "'task_trace:training_trace' 'training_trace:task_trace' or 'op_trace'.") self._context_handle.set_profiling_options(option) + @property + def enable_graph_kernel(self): + return self._context_handle.get_enable_graph_kernel() + + @enable_graph_kernel.setter + def enable_graph_kernel(self, graph_kernel_switch_): + self._context_handle.set_enable_graph_kernel(graph_kernel_switch_) + @property def reserve_class_name_in_scope(self): """Gets whether to save the network class name in the scope.""" @@ -302,13 +320,19 @@ class _Context: @variable_memory_max_size.setter def variable_memory_max_size(self, variable_memory_max_size): if not check_input_format(variable_memory_max_size): - raise ValueError("Context param variable_memory_max_size should be in correct format! Such as \"5GB\"") + raise ValueError( + "Context param variable_memory_max_size should be in correct format! Such as \"5GB\"") if int(variable_memory_max_size[:-2]) >= _DEVICE_APP_MEMORY_SIZE: - raise ValueError("Context param variable_memory_max_size should be less than 31GB.") - variable_memory_max_size_ = variable_memory_max_size[:-2] + " * 1024 * 1024 * 1024" - graph_memory_max_size = _DEVICE_APP_MEMORY_SIZE - int(variable_memory_max_size[:-2]) - graph_memory_max_size_ = str(graph_memory_max_size) + " * 1024 * 1024 * 1024" - self._context_handle.set_variable_memory_max_size(variable_memory_max_size_) + raise ValueError( + "Context param variable_memory_max_size should be less than 31GB.") + variable_memory_max_size_ = variable_memory_max_size[:- + 2] + " * 1024 * 1024 * 1024" + graph_memory_max_size = _DEVICE_APP_MEMORY_SIZE - \ + int(variable_memory_max_size[:-2]) + graph_memory_max_size_ = str( + graph_memory_max_size) + " * 1024 * 1024 * 1024" + self._context_handle.set_variable_memory_max_size( + variable_memory_max_size_) self._context_handle.set_graph_memory_max_size(graph_memory_max_size_) @property @@ -332,6 +356,28 @@ class _Context: def check_bprop(self, check_bprop_flag): self._context_handle.set_check_bprop_flag(check_bprop_flag) + @property + def max_device_memory(self): + return self._context_handle.get_max_device_memory() + + @max_device_memory.setter + def max_device_memory(self, max_device_memory): + if not check_input_format(max_device_memory): + raise ValueError("Context param max_device_memory should be in correct format! Such as \"3.5GB\"") + max_device_memory_value = float(max_device_memory[:-2]) + if max_device_memory_value == 0: + raise ValueError("Context param max_device_memory should be in correct format! Such as \"3.5GB\"") + self._context_handle.set_max_device_memory(max_device_memory_value) + + @property + def print_file_path(self): + return None + + @print_file_path.setter + def print_file_path(self, file): + self._context_handle.set_print_file_path(file) + + def check_input_format(x): import re pattern = r'[1-9][0-9]*(\.)?[0-9]*GB|0\.[0-9]*GB' @@ -367,7 +413,8 @@ def _context(): @args_type_check(device_num=int, global_rank=int, mirror_mean=bool, cast_before_mirror=bool, parallel_mode=str, - parameter_broadcast=bool, strategy_ckpt_load_file=str, strategy_ckpt_save_file=str) + auto_parallel_search_mode=str, parameter_broadcast=bool, strategy_ckpt_load_file=str, + strategy_ckpt_save_file=str, full_batch=bool) def set_auto_parallel_context(**kwargs): """ Set auto parallel context. @@ -399,11 +446,18 @@ def set_auto_parallel_context(**kwargs): setting parallel strategies. - auto_parallel: Achieving parallelism automatically. + auto_parallel_search_mode (str): There are two kinds of search modes, "recursive_programming" + and "dynamic_programming". Default: "dynamic_programming". + + - recursive_programming: Recursive programming search mode. + + - dynamic_programming: Dynamic programming search mode. parameter_broadcast (bool): Indicating whether to broadcast parameters before training. "stand_alone", "semi_auto_parallel" and "auto_parallel" do not support parameter broadcast. Default: False. strategy_ckpt_load_file (str): The path to load parallel strategy checkpoint. Default: '' strategy_ckpt_save_file (str): The path to save parallel strategy checkpoint. Default: '' + full_batch (bool): Whether to load the whole batch on each device. Default: False. Raises: ValueError: If input key is not attribute in auto parallel context. @@ -453,13 +507,11 @@ def reset_auto_parallel_context(): _reset_auto_parallel_context() -@args_type_check(mode=int, precompile_only=bool, device_target=str, - device_id=int, enable_ir_fusion=bool, save_graphs=bool, - enable_task_sink=bool, save_graphs_path=str, enable_loop_sink=bool, - enable_mem_reuse=bool, save_ms_model=bool, save_ms_model_path=str, enable_gpu_summary=bool, - enable_auto_mixed_precision=bool, enable_dump=bool, save_dump_path=str, - enable_reduce_precision=bool, enable_dynamic_memory=bool, graph_memory_max_size=str, - variable_memory_max_size=str, enable_profiling=bool, profiling_options=str) +@args_type_check(mode=int, precompile_only=bool, device_target=str, device_id=int, save_graphs=bool, + save_graphs_path=str, save_ms_model=bool, save_ms_model_path=str, enable_dump=bool, + save_dump_path=str, enable_reduce_precision=bool, variable_memory_max_size=str, + enable_profiling=bool, profiling_options=str, enable_auto_mixed_precision=bool, + check_bprop=bool, max_device_memory=str, print_file_path=str) def set_context(**kwargs): """ Sets context for running environment. @@ -476,7 +528,6 @@ def set_context(**kwargs): Note: Attribute name is required for setting attributes. - If need to config graph max memory size and variable max memory size, one must make sure: Args: mode (int): Running in GRAPH_MODE(0) or PYNATIVE_MODE(1). Default: PYNATIVE_MODE. @@ -511,6 +562,8 @@ def set_context(**kwargs): separated by colons; single operator can choose op_trace, op_trace cannot be combined with training_trace and task_trace. Default: "training_trace". check_bprop (bool): Whether to check bprop. Default: False. + max_device_memory (str): Sets the maximum memory available for device, currently only supported on GPU. + The format is "xxGB". Default: "1024GB". Raises: ValueError: If input key is not an attribute in context. @@ -530,6 +583,7 @@ def set_context(**kwargs): >>> device_target="Ascend",device_id=0, save_graphs=True, >>> save_graphs_path="/mindspore") >>> context.set_context(enable_profiling=True, profiling_options="training_trace") + >>> context.set_context(max_device_memory="3.5GB") """ for key, value in kwargs.items(): if not hasattr(_context(), key): @@ -551,5 +605,43 @@ def get_context(attr_key): ValueError: If input key is not an attribute in context. """ if not hasattr(_context(), attr_key): - raise ValueError("Get context keyword %s is not recognized!" % attr_key) + raise ValueError( + "Get context keyword %s is not recognized!" % attr_key) return getattr(_context(), attr_key) + +@args_type_check(enable_mpi=bool) +def set_mpi_config(**kwargs): + """ + Sets mpi config for running environment. + + mpi config should be configured before running your program. If there is no configuration, + mpi moudle will be disabled by default. + + Note: + Attribute name is required for setting attributes. + + Args: + enable_mpi (bool): Whether to enable mpi. Default: False. + + Raises: + ValueError: If input key is not an attribute in mpi config. + + Examples: + >>> mpiconfig.set_mpi_config(enable_mpi=True) + """ + _set_mpi_config(**kwargs) + +def get_mpi_config(attr_key): + """ + Gets mpi config attribute value according to the input key. + + Args: + attr_key (str): The key of the attribute. + + Returns: + Object, The value of given attribute key. + + Raises: + ValueError: If input key is not an attribute in context. + """ + return _get_mpi_config(attr_key) diff --git a/mindspore/dataset/__init__.py b/mindspore/dataset/__init__.py index ceca188112..f0070b428d 100644 --- a/mindspore/dataset/__init__.py +++ b/mindspore/dataset/__init__.py @@ -19,16 +19,16 @@ can also create samplers with this module to sample data. """ from .core.configuration import config -from .engine.datasets import TFRecordDataset, ImageFolderDatasetV2, MnistDataset, MindDataset, \ - GeneratorDataset, ManifestDataset, Cifar10Dataset, Cifar100Dataset, VOCDataset, CelebADataset, TextFileDataset, \ - Schema, Shuffle, zip, RandomDataset +from .engine.datasets import TFRecordDataset, ImageFolderDatasetV2, MnistDataset, MindDataset, NumpySlicesDataset, \ + GeneratorDataset, ManifestDataset, Cifar10Dataset, Cifar100Dataset, VOCDataset, CocoDataset, CelebADataset,\ + TextFileDataset, CLUEDataset, Schema, Shuffle, zip, RandomDataset from .engine.samplers import DistributedSampler, PKSampler, RandomSampler, SequentialSampler, SubsetRandomSampler, \ - WeightedRandomSampler, SubsetSampler, Sampler + WeightedRandomSampler, Sampler from .engine.serializer_deserializer import serialize, deserialize, show from .engine.graphdata import GraphData __all__ = ["config", "ImageFolderDatasetV2", "MnistDataset", "MindDataset", "GeneratorDataset", "TFRecordDataset", - "ManifestDataset", "Cifar10Dataset", "Cifar100Dataset", "CelebADataset", - "VOCDataset", "TextFileDataset", "Schema", "DistributedSampler", "PKSampler", "RandomSampler", - "SequentialSampler", "SubsetRandomSampler", "WeightedRandomSampler", "zip", "GraphData"] + "ManifestDataset", "Cifar10Dataset", "Cifar100Dataset", "CelebADataset", "NumpySlicesDataset", "VOCDataset", + "CocoDataset", "TextFileDataset", "CLUEDataset", "Schema", "DistributedSampler", "PKSampler", + "RandomSampler", "SequentialSampler", "SubsetRandomSampler", "WeightedRandomSampler", "zip", "GraphData"] diff --git a/mindspore/dataset/core/configuration.py b/mindspore/dataset/core/configuration.py index 38b25368b3..d3175cd181 100644 --- a/mindspore/dataset/core/configuration.py +++ b/mindspore/dataset/core/configuration.py @@ -125,6 +125,35 @@ class ConfigurationManager: """ return self.config.get_num_parallel_workers() + def set_monitor_sampling_interval(self, interval): + """ + Set the default interval(ms) of monitor sampling. + + Args: + interval: interval(ms) to be used to performance monitor sampling. + + Raises: + ValueError: If interval is invalid (<= 0 or > MAX_INT_32). + + Examples: + >>> import mindspore.dataset as ds + >>> con = ds.engine.ConfigurationManager() + >>> # sets the new interval value. + >>> con.set_monitor_sampling_interval(100) + """ + if interval <= 0 or interval > INT32_MAX: + raise ValueError("Interval given is not within the required range") + self.config.set_monitor_sampling_interval(interval) + + def get_monitor_sampling_interval(self): + """ + Get the default interval of performance monitor sampling. + + Returns: + Interval: interval(ms) of performance monitor sampling. + """ + return self.config.get_monitor_sampling_interval() + def __str__(self): """ String representation of the configurations. diff --git a/mindspore/dataset/engine/__init__.py b/mindspore/dataset/engine/__init__.py index 59dca2f681..674848f156 100644 --- a/mindspore/dataset/engine/__init__.py +++ b/mindspore/dataset/engine/__init__.py @@ -28,10 +28,9 @@ from .serializer_deserializer import serialize, deserialize, show, compare from .samplers import * from ..core.configuration import config, ConfigurationManager - __all__ = ["config", "ConfigurationManager", "zip", "ImageFolderDatasetV2", "MnistDataset", - "MindDataset", "GeneratorDataset", "TFRecordDataset", + "MindDataset", "GeneratorDataset", "TFRecordDataset", "CLUEDataset", "ManifestDataset", "Cifar10Dataset", "Cifar100Dataset", "CelebADataset", - "VOCDataset", "TextFileDataset", "Schema", "DistributedSampler", "PKSampler", - "RandomSampler", "SequentialSampler", "SubsetRandomSampler", "WeightedRandomSampler"] + "VOCDataset", "CocoDataset", "TextFileDataset", "Schema", "DistributedSampler", + "PKSampler", "RandomSampler", "SequentialSampler", "SubsetRandomSampler", "WeightedRandomSampler"] diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py index 04d6a6e11d..ca6f7ca33e 100644 --- a/mindspore/dataset/engine/datasets.py +++ b/mindspore/dataset/engine/datasets.py @@ -33,17 +33,18 @@ import copy import numpy as np from mindspore._c_dataengine import DataType, TFReaderOp, ImageFolderOp, CifarOp, MnistOp, ManifestOp, \ - MindRecordOp, TextFileOp, VOCOp, CBatchInfo + MindRecordOp, TextFileOp, ClueOp, VOCOp, CocoOp, CBatchInfo from mindspore._c_expression import typing from mindspore import log as logger from . import samplers from .iterators import DictIterator, TupleIterator from .validators import check_batch, check_shuffle, check_map, check_filter, check_repeat, check_skip, check_zip, \ - check_rename, \ + check_rename, check_numpyslicesdataset, \ check_take, check_project, check_imagefolderdatasetv2, check_mnist_cifar_dataset, check_manifestdataset, \ - check_tfrecorddataset, check_vocdataset, check_celebadataset, check_minddataset, check_generatordataset, \ - check_sync_wait, check_zip_dataset, check_add_column, check_textfiledataset, check_concat, check_split + check_tfrecorddataset, check_vocdataset, check_cocodataset, check_celebadataset, check_minddataset, \ + check_generatordataset, check_sync_wait, check_zip_dataset, check_add_column, check_textfiledataset, check_concat, \ + check_split, check_bucket_batch_by_length, check_cluedataset from ..core.datatypes import mstype_to_detype, mstypelist_to_detypelist try: @@ -60,7 +61,7 @@ class Shuffle(str, Enum): @check_zip def zip(datasets): """ - Zips the datasets in the input tuple of datasets. + Zip the datasets in the input tuple of datasets. Args: datasets (tuple of class Dataset): A tuple of datasets to be zipped together. @@ -133,8 +134,8 @@ class Dataset: """ def __init__(self, num_parallel_workers=None): - self.input = [] - self.output = [] + self.children = [] + self.parent = [] self.num_parallel_workers = num_parallel_workers self._device_iter = 0 self._input_indexs = () @@ -151,7 +152,7 @@ class Dataset: def get_args(self): """ - Returns attributes (member variables) related to the current class. + Return attributes (member variables) related to the current class. Must include all arguments passed to the __init__() of the current class, excluding 'input_dataset'. @@ -164,11 +165,81 @@ class Dataset: args["num_parallel_workers"] = self.num_parallel_workers return args + @check_bucket_batch_by_length + def bucket_batch_by_length(self, column_names, bucket_boundaries, bucket_batch_sizes, + element_length_function=None, pad_info=None, + pad_to_bucket_boundary=False, drop_remainder=False): + """ + Bucket elements according to their lengths, and pad and batch the buckets when + they are full. + + A length function is called on each row in the dataset, the row is then + bucketed based on its length and bucket_boundaries. When a bucket reaches its + corresponding size specified in bucket_batch_sizes, the entire bucket will be + padded according to batch_info, and then batched. Each batch will be full, + except for maybe the last batch for each bucket. + + Args: + column_names (list of string): Columns passed to element_length_function. + bucket_boundaries (list of int): A list consisting of the upper boundaries + of the buckets. Must be strictly increasing. If there are n boundaries, + n+1 buckets are created: One bucket for [0, bucket_boundaries[0]), one + bucket for [bucket_boundaries[i], bucket_boundaries[i+1]) for each + 0>> import mindspore.dataset as ds + >>> # data is an instance of Dataset object. + >>> + >>> # creates a dataset where every 100 rows is combined into a batch + >>> # and drops the last incomplete batch if there is one. + >>> column_names = ["col1", "col2"] + >>> buket_boundaries = [5, 10] + >>> bucket_batch_sizes = [5, 1, 1] + >>> element_length_function = (lambda col1, col2: max(len(col1), len(col2))) + >>> + >>> # will pad col1 to shape [2, bucket_boundaries[i]] where i is the + >>> # index of the bucket that is currently being batched. + >>> # will pad col2 to a shape where each dimension is the longest in all + >>> # the elements currently being batched. + >>> pad_info = {"col1", ([2, None], -1)} + >>> pad_to_bucket_boundary = True + >>> + >>> data = data.bucket_batch_by_length(column_names, bucket_boundaries, + >>> bucket_batch_sizes, + >>> element_length_function, pad_info, + >>> pad_to_bucket_boundary) + """ + return BucketBatchByLengthDataset(self, column_names, bucket_boundaries, bucket_batch_sizes, + element_length_function, pad_info, + pad_to_bucket_boundary, drop_remainder) + @check_batch def batch(self, batch_size, drop_remainder=False, num_parallel_workers=None, per_batch_map=None, input_columns=None, pad_info=None): """ - Combines batch_size number of consecutive rows into batches. + Combine batch_size number of consecutive rows into batches. For any child node, a batch is treated as a single row. For any column, all the elements within that column must have the same shape. @@ -269,7 +340,7 @@ class Dataset: def flat_map(self, func): """ - Maps `func` to each row in dataset and flatten the result. + Map `func` to each row in dataset and flatten the result. The specified `func` is a function that must take one 'Ndarray' as input and return a 'Dataset'. @@ -299,6 +370,7 @@ class Dataset: """ dataset = None if not hasattr(func, '__call__'): + logger.error("func must be a function.") raise TypeError("func must be a function.") for row_data in self: @@ -308,6 +380,7 @@ class Dataset: dataset += func(row_data) if not isinstance(dataset, Dataset): + logger.error("flat_map must return a Dataset object.") raise TypeError("flat_map must return a Dataset object.") return dataset @@ -315,7 +388,7 @@ class Dataset: def map(self, input_columns=None, operations=None, output_columns=None, columns_order=None, num_parallel_workers=None, python_multiprocessing=False): """ - Applies each operation in operations to this dataset. + Apply each operation in operations to this dataset. The order of operations is determined by the position of each operation in operations. operations[0] will be applied first, then operations[1], then operations[2], etc. @@ -499,7 +572,7 @@ class Dataset: @check_repeat def repeat(self, count=None): """ - Repeats this dataset count times. Repeat indefinitely if the count is None or -1. + Repeat this dataset count times. Repeat indefinitely if the count is None or -1. Note: The order of using repeat and batch reflects the number of batches. Recommend that @@ -591,13 +664,16 @@ class Dataset: dataset_size = self.get_dataset_size() if dataset_size is None or dataset_size <= 0: - raise RuntimeError("dataset size unknown, unable to split.") + raise RuntimeError("dataset_size is unknown, unable to split.") + + if not isinstance(sizes, list): + raise RuntimeError("sizes should be a list.") all_int = all(isinstance(item, int) for item in sizes) if all_int: sizes_sum = sum(sizes) if sizes_sum != dataset_size: - raise RuntimeError("sum of split sizes {} is not equal to dataset size {}." + raise RuntimeError("Sum of split sizes {} is not equal to dataset size {}." .format(sizes_sum, dataset_size)) return sizes @@ -605,7 +681,7 @@ class Dataset: for item in sizes: absolute_size = int(round(item * dataset_size)) if absolute_size == 0: - raise RuntimeError("split percentage {} is too small.".format(item)) + raise RuntimeError("Split percentage {} is too small.".format(item)) absolute_sizes.append(absolute_size) absolute_sizes_sum = sum(absolute_sizes) @@ -613,7 +689,7 @@ class Dataset: # if we still need more rows, give them to the first split. # if we have too many rows, remove the extras from the first split that has # enough rows. - size_difference = dataset_size - absolute_sizes_sum + size_difference = int(dataset_size - absolute_sizes_sum) if size_difference > 0: absolute_sizes[0] += size_difference else: @@ -623,7 +699,7 @@ class Dataset: break if sum(absolute_sizes) != dataset_size: - raise RuntimeError("sum of calculated split sizes {} is not equal to dataset size {}." + raise RuntimeError("Sum of calculated split sizes {} is not equal to dataset size {}." .format(absolute_sizes_sum, dataset_size)) return absolute_sizes @@ -631,7 +707,7 @@ class Dataset: @check_split def split(self, sizes, randomize=True): """ - Splits the dataset into smaller, non-overlapping datasets. + Split the dataset into smaller, non-overlapping datasets. This is a general purpose split function which can be called from any operator in the pipeline. There is another, optimized split function, which will be called automatically if ds.split is @@ -647,10 +723,14 @@ class Dataset: Datasets of size round(f1*K), round(f2*K), …, round(fn*K) where K is the size of the original dataset. If after rounding: - -Any size equals 0, an error will occur. - -The sum of split sizes < K, the difference will be added to the first split. - -The sum of split sizes > K, the difference will be removed from the first large - enough split such that it will have atleast 1 row after removing the difference. + + - Any size equals 0, an error will occur. + + - The sum of split sizes < K, the difference will be added to the first split. + + - The sum of split sizes > K, the difference will be removed from the first large + enough split such that it will have atleast 1 row after removing the difference. + randomize (bool, optional): determines whether or not to split the data randomly (default=True). If true, the data will be randomly split. Otherwise, each split will be created with consecutive rows from the dataset. @@ -684,10 +764,10 @@ class Dataset: >>> train, test = data.split([0.9, 0.1]) """ if self.is_shuffled(): - logger.warning("dataset is shuffled before split.") + logger.warning("Dataset is shuffled before split.") if self.is_sharded(): - raise RuntimeError("dataset should not be sharded before split.") + raise RuntimeError("Dataset should not be sharded before split.") absolute_sizes = self._get_absolute_split_sizes(sizes) splits = [] @@ -713,7 +793,7 @@ class Dataset: @check_zip_dataset def zip(self, datasets): """ - Zips the datasets in the input tuple of datasets. Columns in the input datasets must not have the same name. + Zip the datasets in the input tuple of datasets. Columns in the input datasets must not have the same name. Args: datasets (tuple or class Dataset): A tuple of datasets or a single class Dataset @@ -770,7 +850,7 @@ class Dataset: @check_rename def rename(self, input_columns, output_columns): """ - Renames the columns in input datasets. + Rename the columns in input datasets. Args: input_columns (list[str]): list of names of the input columns. @@ -796,7 +876,7 @@ class Dataset: @check_project def project(self, columns): """ - Projects certain columns in input datasets. + Project certain columns in input datasets. The specified columns will be selected from the dataset and passed down the pipeline in the order specified. The other columns are discarded. @@ -819,6 +899,9 @@ class Dataset: return ProjectDataset(self, columns) + def build_vocab(self, vocab, columns, freq_range, top_k, special_tokens, special_first): + return BuildVocabDataset(self, vocab, columns, freq_range, top_k, special_tokens, special_first) + def apply(self, apply_func): """ Apply a function in this dataset. @@ -858,7 +941,7 @@ class Dataset: def device_que(self, prefetch_size=None): """ - Returns a transferredDataset that transfer data through device. + Return a transferredDataset that transfer data through device. Args: prefetch_size (int, optional): prefetch number of records ahead of the @@ -875,7 +958,7 @@ class Dataset: def to_device(self, num_batch=None): """ - Transfers data through CPU, GPU or Ascend devices. + Transfer data through CPU, GPU or Ascend devices. Args: num_batch (int, optional): limit the number of batch to be sent to device (default=None). @@ -910,29 +993,28 @@ class Dataset: raise TypeError("Please set device_type in context") if device_type not in ('Ascend', 'GPU', 'CPU'): - raise ValueError("only support CPU, Ascend, GPU") + raise ValueError("Only support CPU, Ascend, GPU") if num_batch is None or num_batch == 0: raise ValueError("num_batch is None or 0.") def get_distribution(output_dataset): dev_id = 0 - if isinstance(output_dataset, (MindDataset)): - return output_dataset.distribution, dev_id if isinstance(output_dataset, (Cifar10Dataset, Cifar100Dataset, GeneratorDataset, ImageFolderDatasetV2, - ManifestDataset, MnistDataset, VOCDataset, CelebADataset)): + ManifestDataset, MnistDataset, VOCDataset, CocoDataset, CelebADataset, + MindDataset)): sampler = output_dataset.sampler if isinstance(sampler, samplers.DistributedSampler): dev_id = sampler.shard_id return "", dev_id - if isinstance(output_dataset, TFRecordDataset): + if isinstance(output_dataset, (TFRecordDataset, TextFileDataset, CLUEDataset)): if output_dataset.shard_id is not None: dev_id = output_dataset.shard_id return "", dev_id - if not output_dataset.input: + if not output_dataset.children: raise RuntimeError("Unknown output_dataset: {}".format(type(output_dataset))) - input_dataset = output_dataset.input[0] + input_dataset = output_dataset.children[0] return get_distribution(input_dataset) distribution_path, device_id = get_distribution(self) @@ -1012,7 +1094,7 @@ class Dataset: def _get_pipeline_info(self): """ - Gets pipeline information. + Get pipeline information. """ device_iter = TupleIterator(self) self._output_shapes = device_iter.get_output_shapes() @@ -1053,8 +1135,8 @@ class Dataset: Return: Number, number of batches. """ - if self.input: - return self.input[0].get_dataset_size() + if self.children: + return self.children[0].get_dataset_size() return None def num_classes(self): @@ -1064,23 +1146,23 @@ class Dataset: Return: Number, number of classes. """ - if self.input: - return self.input[0].num_classes() + if self.children: + return self.children[0].num_classes() return None def get_sync_notifiers(self): - if self.input: - return self.input[0].get_sync_notifiers() + if self.children: + return self.children[0].get_sync_notifiers() return {} def disable_sync(self): - if self.input: - return self.input[0].disable_sync() + if self.children: + return self.children[0].disable_sync() return {} def is_sync(self): - if self.input: - return self.input[0].is_sync() + if self.children: + return self.children[0].is_sync() return False def sync_update(self, condition_name, num_batch=None, data=None): @@ -1114,8 +1196,8 @@ class Dataset: Return: Number, the number of data in a batch. """ - if self.input: - return self.input[0].get_batch_size() + if self.children: + return self.children[0].get_batch_size() return 1 def get_repeat_count(self): @@ -1125,8 +1207,8 @@ class Dataset: Return: Number, the count of repeat. """ - if self.input: - return self.input[0].get_repeat_count() + if self.children: + return self.children[0].get_repeat_count() return 1 def get_class_indexing(self): @@ -1136,22 +1218,22 @@ class Dataset: Return: Dict, A str-to-int mapping from label name to index. """ - if self.input: - return self.input[0].get_class_indexing() + if self.children: + return self.children[0].get_class_indexing() raise NotImplementedError("Dataset {} has not supported api get_class_indexing yet.".format(type(self))) def reset(self): """Reset the dataset for next epoch.""" def is_shuffled(self): - for input_dataset in self.input: + for input_dataset in self.children: if input_dataset.is_shuffled(): return True return False def is_sharded(self): - for input_dataset in self.input: + for input_dataset in self.children: if input_dataset.is_sharded(): return True @@ -1257,8 +1339,8 @@ class MappableDataset(SourceDataset): def _get_sampler_dataset_size(self): if self.sampler is not None: - if hasattr(self.sampler, 'get_dataset_size'): - return self.sampler.get_dataset_size() + if hasattr(self.sampler, 'get_num_samples'): + return self.sampler.get_num_samples() if hasattr(self.sampler, '__len__'): return len(self.sampler) @@ -1267,7 +1349,7 @@ class MappableDataset(SourceDataset): @check_split def split(self, sizes, randomize=True): """ - Splits the dataset into smaller, non-overlapping datasets. + Split the dataset into smaller, non-overlapping datasets. There is the optimized split function, which will be called automatically when the dataset that calls this function is a MappableDataset. @@ -1282,10 +1364,14 @@ class MappableDataset(SourceDataset): Datasets of size round(f1*K), round(f2*K), …, round(fn*K) where K is the size of the original dataset. If after rounding: - -Any size equals 0, an error will occur. - -The sum of split sizes < K, the difference will be added to the first split. - -The sum of split sizes > K, the difference will be removed from the first large - enough split such that it will have atleast 1 row after removing the difference. + + - Any size equals 0, an error will occur. + + - The sum of split sizes < K, the difference will be added to the first split. + + - The sum of split sizes > K, the difference will be removed from the first large + enough split such that it will have atleast 1 row after removing the difference. + randomize (bool, optional): determines whether or not to split the data randomly (default=True). If true, the data will be randomly split. Otherwise, each split will be created with consecutive rows from the dataset. @@ -1330,10 +1416,10 @@ class MappableDataset(SourceDataset): >>> train.use_sampler(train_sampler) """ if self.is_shuffled(): - logger.warning("dataset is shuffled before split.") + logger.warning("Dataset is shuffled before split.") if self.is_sharded(): - raise RuntimeError("dataset should not be sharded before split.") + raise RuntimeError("Dataset should not be sharded before split.") absolute_sizes = self._get_absolute_split_sizes(sizes) splits = [] @@ -1347,7 +1433,7 @@ class MappableDataset(SourceDataset): random_sampler.reshuffle_each_epoch = False ds.add_sampler(random_sampler) - subset_sampler = samplers.SubsetSampler(current_split_start_index, size) + subset_sampler = samplers.SequentialSampler(current_split_start_index, size) ds.add_sampler(subset_sampler) # add sequential sampler, so that if user calls use_sampler, we will @@ -1369,6 +1455,48 @@ class DatasetOp(Dataset): # No need for __init__ since it is the same as the super's init +class BucketBatchByLengthDataset(DatasetOp): + """ + The result of applying BucketBatchByLength operator to the input dataset. + """ + + def __init__(self, input_dataset, column_names, bucket_boundaries, bucket_batch_sizes, + element_length_function, pad_info, pad_to_bucket_boundary, drop_remainder): + super().__init__() + + self.column_names = column_names + self.bucket_boundaries = bucket_boundaries + self.bucket_batch_sizes = bucket_batch_sizes + self.element_length_function = element_length_function + self.pad_info = pad_info + self.pad_to_bucket_boundary = pad_to_bucket_boundary + self.drop_remainder = drop_remainder + + self.children.append(input_dataset) + input_dataset.parent.append(self) + self._input_indexs = input_dataset.input_indexs + + def get_args(self): + args = super().get_args() + args["length_dependent_columns"] = self.column_names + args["bucket_boundaries"] = self.bucket_boundaries + args["bucket_batch_sizes"] = self.bucket_batch_sizes + args["element_length_function"] = self.element_length_function + args["pad_info"] = self.pad_info + args["pad_to_bucket_boundary"] = self.pad_to_bucket_boundary + args["drop_remainder"] = self.drop_remainder + return args + + def get_dataset_size(self): + """ + Get the number of batches in an epoch. + + Return: + Number, number of batches. + """ + return None + + class BatchDataset(DatasetOp): """ The result of applying Batch operator to the input dataset. @@ -1407,8 +1535,8 @@ class BatchDataset(DatasetOp): self.per_batch_map = per_batch_map self.input_columns = input_columns self.pad_info = pad_info - self.input.append(input_dataset) - input_dataset.output.append(self) + self.children.append(input_dataset) + input_dataset.parent.append(self) self._input_indexs = input_dataset.input_indexs def get_args(self): @@ -1427,7 +1555,7 @@ class BatchDataset(DatasetOp): Return: Number, number of batches. """ - child_size = self.input[0].get_dataset_size() + child_size = self.children[0].get_dataset_size() if child_size is not None: if self.drop_remainder: return math.floor(child_size / self.batch_size) @@ -1456,7 +1584,7 @@ class BatchDataset(DatasetOp): if isinstance(dataset, RepeatDataset): return True flag = False - for input_dataset in dataset.input: + for input_dataset in dataset.children: flag = flag | BatchDataset._is_ancestor_of_repeat(input_dataset) return flag @@ -1467,13 +1595,14 @@ class BatchDataset(DatasetOp): Args: dataset (Dataset): dataset to be checked. - batchsize (int): batch size to notify. + batch_size (int): batch size to notify. """ if isinstance(dataset, SyncWaitDataset): dataset.update_sync_batch_size(batch_size) - for input_dataset in dataset.input: + for input_dataset in dataset.children: BatchDataset._update_batch_size_for_syncwait(input_dataset, batch_size) + class BatchInfo(CBatchInfo): """ The information object associates with the current batch of tensors. @@ -1497,17 +1626,19 @@ class BatchInfo(CBatchInfo): """ return + class BlockReleasePair: """ The blocking condition class used by SyncWaitDataset. Args: init_release_rows (int): Number of lines to allow through the pipeline. - callback (function): The callback funciton that will be called when release is called. + callback (function): The callback function that will be called when release is called. """ + def __init__(self, init_release_rows, callback=None): if isinstance(init_release_rows, int) and init_release_rows <= 0: - raise ValueError("release_rows need to be greater than 0.") + raise ValueError("release_rows need to be greater than 0.") self.row_count = -init_release_rows self.cv = threading.Condition() self.callback = callback @@ -1566,7 +1697,7 @@ class SyncWaitDataset(DatasetOp): input_dataset (Dataset): Input dataset to apply flow control. num_batch (int): the number of batches without blocking at the start of each epoch. condition_name (str): The condition name that is used to toggle sending next row. - callback (function): The callback funciton that will be invoked when sync_update is called. + callback (function): The callback function that will be invoked when sync_update is called. Raises: RuntimeError: If condition name already exists. @@ -1574,21 +1705,21 @@ class SyncWaitDataset(DatasetOp): def __init__(self, input_dataset, condition_name, num_batch, callback=None): super().__init__() - self.input.append(input_dataset) - input_dataset.output.append(self) + self.children.append(input_dataset) + input_dataset.parent.append(self) # set to the default value, waiting for the batch to update it self._condition_name = condition_name if isinstance(num_batch, int) and num_batch <= 0: raise ValueError("num_batch need to be greater than 0.") self._pair = BlockReleasePair(num_batch, callback) - if self._condition_name in self.input[0].get_sync_notifiers(): + if self._condition_name in self.children[0].get_sync_notifiers(): raise RuntimeError("Condition name is already in use") logger.warning("Please remember to add dataset.sync_update(condition=%s), otherwise will result in hanging", condition_name) def get_sync_notifiers(self): - return {**self.input[0].get_sync_notifiers(), **{self._condition_name: self._pair.release_func}} + return {**self.children[0].get_sync_notifiers(), **{self._condition_name: self._pair.release_func}} def is_sync(self): return True @@ -1621,7 +1752,7 @@ class SyncWaitDataset(DatasetOp): if isinstance(dataset, BatchDataset): return True flag = False - for input_dataset in dataset.input: + for input_dataset in dataset.children: flag = flag | SyncWaitDataset._is_ancestor_of_batch(input_dataset) return flag @@ -1641,9 +1772,9 @@ class ShuffleDataset(DatasetOp): def __init__(self, input_dataset, buffer_size): super().__init__() self.buffer_size = buffer_size - self.input.append(input_dataset) + self.children.append(input_dataset) self.reshuffle_each_epoch = None - input_dataset.output.append(self) + input_dataset.parent.append(self) self._input_indexs = input_dataset.input_indexs if self.is_sync(): raise RuntimeError("No shuffle after sync operators") @@ -1687,6 +1818,7 @@ class _PythonCallable: """ Internal python function wrapper for multiprocessing pyfunc. """ + def __init__(self, py_callable, idx, pool=None): # Original python callable from user. self.py_callable = py_callable @@ -1738,7 +1870,7 @@ class MapDataset(DatasetOp): def __init__(self, input_dataset, input_columns=None, operations=None, output_columns=None, columns_order=None, num_parallel_workers=None, python_multiprocessing=False): super().__init__(num_parallel_workers) - self.input.append(input_dataset) + self.children.append(input_dataset) if input_columns is not None and not isinstance(input_columns, list): input_columns = [input_columns] self.input_columns = input_columns @@ -1755,7 +1887,7 @@ class MapDataset(DatasetOp): and self.columns_order is None: raise ValueError("When (len(input_columns) != len(output_columns)), columns_order must be specified.") - input_dataset.output.append(self) + input_dataset.parent.append(self) self._input_indexs = input_dataset.input_indexs self.python_multiprocessing = python_multiprocessing self.process_pool = None @@ -1765,6 +1897,7 @@ class MapDataset(DatasetOp): args["input_columns"] = self.input_columns args["operations"] = self.operations args["output_columns"] = self.output_columns + args["columns_order"] = self.columns_order return args def get_dataset_size(self): @@ -1774,7 +1907,7 @@ class MapDataset(DatasetOp): Return: Number, number of batches. """ - return self.input[0].get_dataset_size() + return self.children[0].get_dataset_size() def __deepcopy__(self, memodict): if id(self) in memodict: @@ -1782,12 +1915,12 @@ class MapDataset(DatasetOp): cls = self.__class__ new_op = cls.__new__(cls) memodict[id(self)] = new_op - new_op.input = copy.deepcopy(self.input, memodict) + new_op.children = copy.deepcopy(self.children, memodict) new_op.input_columns = copy.deepcopy(self.input_columns, memodict) new_op.output_columns = copy.deepcopy(self.output_columns, memodict) new_op.columns_order = copy.deepcopy(self.columns_order, memodict) new_op.num_parallel_workers = copy.deepcopy(self.num_parallel_workers, memodict) - new_op.output = copy.deepcopy(self.output, memodict) + new_op.parent = copy.deepcopy(self.parent, memodict) new_op.input_indexs = copy.deepcopy(self._input_indexs, memodict) new_op.python_multiprocessing = copy.deepcopy(self.python_multiprocessing, memodict) new_op.operations = self.operations @@ -1848,8 +1981,8 @@ class FilterDataset(DatasetOp): def __init__(self, input_dataset, predicate, input_columns=None, num_parallel_workers=None): super().__init__(num_parallel_workers) self.predicate = lambda *args: bool(predicate(*args)) - self.input.append(input_dataset) - input_dataset.output.append(self) + self.children.append(input_dataset) + input_dataset.parent.append(self) if input_columns is not None and not isinstance(input_columns, list): input_columns = [input_columns] self.input_columns = input_columns @@ -1885,8 +2018,8 @@ class RepeatDataset(DatasetOp): self.count = -1 else: self.count = count - self.input.append(input_dataset) - input_dataset.output.append(self) + self.children.append(input_dataset) + input_dataset.parent.append(self) self._input_indexs = input_dataset.input_indexs def get_args(self): @@ -1901,7 +2034,7 @@ class RepeatDataset(DatasetOp): Return: Number, number of batches. """ - child_size = self.input[0].get_dataset_size() + child_size = self.children[0].get_dataset_size() if child_size is not None: return child_size return None @@ -1921,15 +2054,15 @@ class SkipDataset(DatasetOp): The result of applying Skip operator to the input Dataset. Args: - datasets (tuple): A tuple of datasets to be skipped. + input_dataset (tuple): A tuple of datasets to be skipped. count (int): Number of rows the dataset should be skipped. """ def __init__(self, input_dataset, count): super().__init__() self.count = count - self.input.append(input_dataset) - input_dataset.output.append(self) + self.children.append(input_dataset) + input_dataset.parent.append(self) self._input_indexs = input_dataset.input_indexs def get_args(self): @@ -1944,7 +2077,7 @@ class SkipDataset(DatasetOp): Return: Number, number of batches. """ - child_size = self.input[0].get_dataset_size() + child_size = self.children[0].get_dataset_size() output_size = 0 if self.count >= 0 and self.count < child_size: output_size = child_size - self.count @@ -1963,8 +2096,8 @@ class TakeDataset(DatasetOp): def __init__(self, input_dataset, count): super().__init__() self.count = count - self.input.append(input_dataset) - input_dataset.output.append(self) + self.children.append(input_dataset) + input_dataset.parent.append(self) self._input_indexs = input_dataset.input_indexs def get_args(self): @@ -1979,7 +2112,7 @@ class TakeDataset(DatasetOp): Return: Number, number of batches. """ - child_size = self.input[0].get_dataset_size() + child_size = self.children[0].get_dataset_size() if child_size < self.count: return child_size return self.count @@ -2003,8 +2136,8 @@ class ZipDataset(DatasetOp): raise TypeError("The parameter %s of zip has type error!" % (dataset)) self.datasets = datasets for data in datasets: - self.input.append(data) - data.output.append(self) + self.children.append(data) + data.parent.append(self) def get_dataset_size(self): """ @@ -2013,7 +2146,7 @@ class ZipDataset(DatasetOp): Return: Number, number of batches. """ - children_sizes = [c.get_dataset_size() for c in self.input] + children_sizes = [c.get_dataset_size() for c in self.children] if all(c is not None for c in children_sizes): return min(children_sizes) return None @@ -2028,7 +2161,7 @@ class ZipDataset(DatasetOp): return None def is_sync(self): - return any([c.is_sync() for c in self.input]) + return any([c.is_sync() for c in self.children]) def get_args(self): args = super().get_args() @@ -2053,8 +2186,8 @@ class ConcatDataset(DatasetOp): raise TypeError("The parameter %s of concat has type error!" % (dataset)) self.datasets = datasets for data in datasets: - self.input.append(data) - data.output.append(self) + self.children.append(data) + data.parent.append(self) def get_dataset_size(self): """ @@ -2063,8 +2196,8 @@ class ConcatDataset(DatasetOp): Return: Number, number of batches. """ - children_sizes = [c.get_dataset_size() for c in self.input] - dataset_size = np.sum(children_sizes) + children_sizes = [c.get_dataset_size() for c in self.children] + dataset_size = sum(children_sizes) return dataset_size @@ -2074,8 +2207,8 @@ class RenameDataset(DatasetOp): Args: input_dataset (Dataset): Input Dataset to be Renamed. - input_column_names (list[str]): list of names of the input columns. - output_column_names (list[str]): list of names of the output columns. + input_columns (list[str]): list of names of the input columns. + output_columns (list[str]): list of names of the output columns. """ def __init__(self, input_dataset, input_columns, output_columns): @@ -2086,8 +2219,8 @@ class RenameDataset(DatasetOp): output_columns = [output_columns] self.input_column_names = input_columns self.output_column_names = output_columns - self.input.append(input_dataset) - input_dataset.output.append(self) + self.children.append(input_dataset) + input_dataset.parent.append(self) self._input_indexs = input_dataset.input_indexs def get_args(self): @@ -2113,10 +2246,10 @@ class ProjectDataset(DatasetOp): if not isinstance(columns, list): columns = [columns] self.columns = columns - self.input.append(input_dataset) + self.children.append(input_dataset) self.prefetch_size = prefetch_size - input_dataset.output.append(self) + input_dataset.parent.append(self) self._input_indexs = input_dataset.input_indexs def get_args(self): @@ -2140,8 +2273,8 @@ class TransferDataset(DatasetOp): def __init__(self, input_dataset, queue_name, device_id, device_type, num_batch=None): super().__init__() - self.input.append(input_dataset) - input_dataset.output.append(self) + self.children.append(input_dataset) + input_dataset.parent.append(self) self.queue_name = queue_name self._input_indexs = input_dataset.input_indexs self._device_type = device_type @@ -2218,31 +2351,45 @@ def _select_sampler(num_samples, input_sampler, shuffle, num_shards, shard_id): num_shards (int): Number of shard for sharding. shard_id (int): Shard ID. """ + if input_sampler is not None: + # If the user provided a sampler, then it doesn't matter what the other args are because + # we are being asked specifically to use the given sampler. + # That means the following arguments: num_shards, shard_id, shuffle, num_samples should all + # be None. Consider this example: + # sampler = ds.DistributedSampler(num_shards=8, shard_id=3, shuffle=shuffle) + # data1 = ds.VOCDataset(voc_dir, decode=True, sampler=sampler, num_shards=4, shard_id=1) + # In this case, the user has given different sample-related arguments that contradict each other. + # To prevent this, only allow the user to manually specify the sampler if those arguments are all None + if (isinstance(input_sampler, (samplers.SequentialSampler, samplers.DistributedSampler, + samplers.RandomSampler, samplers.SubsetRandomSampler, + samplers.WeightedRandomSampler, samplers.Sampler)) and + (num_shards is not None or shard_id is not None or shuffle is not None or num_samples is not None)): + raise ValueError( + 'Conflicting arguments during sampler assignments. num_samples: {}, num_shards: {},' + ' shard_id: {}, shuffle: {})'.format(num_samples, num_shards, shard_id, shuffle)) + return input_sampler if shuffle is None: - if input_sampler is not None: - # If shuffle is not specified, user provided sampler, use user's sampler - return input_sampler if num_shards is not None: # If shuffle is not specified, sharding enabled, use distributed random sampler shuffle = True - return samplers.DistributedSampler(num_shards, shard_id, shuffle=shuffle) + return samplers.DistributedSampler(num_shards, shard_id, shuffle=shuffle, num_samples=num_samples) # If shuffle is not specified, sharding disabled, use random sampler if num_samples is not None: return samplers.RandomSampler(replacement=True, num_samples=num_samples) - return samplers.RandomSampler() + return samplers.RandomSampler(num_samples=num_samples) if shuffle is True: if num_shards is not None: # If shuffle enabled, sharding enabled, use distributed random sampler - return samplers.DistributedSampler(num_shards, shard_id, shuffle=shuffle) + return samplers.DistributedSampler(num_shards, shard_id, shuffle=shuffle, num_samples=num_samples) # If shuffle enabled, sharding disabled, use random sampler if num_samples is not None: return samplers.RandomSampler(replacement=True, num_samples=num_samples) - return samplers.RandomSampler() + return samplers.RandomSampler(num_samples=num_samples) if num_shards is not None: # If shuffle disabled, sharding enabled, use distributed sequential sampler - return samplers.DistributedSampler(num_shards, shard_id, shuffle=shuffle) + return samplers.DistributedSampler(num_shards, shard_id, shuffle=shuffle, num_samples=num_samples) # If shuffle disabled, sharding disabled, use sequential sampler - return samplers.SequentialSampler() + return samplers.SequentialSampler(num_samples=num_samples) class ImageFolderDatasetV2(MappableDataset): @@ -2362,11 +2509,7 @@ class ImageFolderDatasetV2(MappableDataset): Return: Number, number of batches. """ - if self.num_samples is None: - num_samples = 0 - else: - num_samples = self.num_samples - num_rows = ImageFolderOp.get_num_rows_and_classes(self.dataset_dir, num_samples)[0] + num_rows = ImageFolderOp.get_num_rows_and_classes(self.dataset_dir)[0] rows_per_shard = get_num_rows(num_rows, self.num_shards) rows_from_sampler = self._get_sampler_dataset_size() @@ -2382,11 +2525,7 @@ class ImageFolderDatasetV2(MappableDataset): Return: Number, number of classes. """ - if self.num_samples is None: - num_samples = 0 - else: - num_samples = self.num_samples - return ImageFolderOp.get_num_rows_and_classes(self.dataset_dir, num_samples)[1] + return ImageFolderOp.get_num_rows_and_classes(self.dataset_dir)[1] def is_shuffled(self): if self.shuffle_level is None: @@ -2495,12 +2634,7 @@ class MnistDataset(MappableDataset): Return: Number, number of batches. """ - if self.num_samples is None: - num_samples = 0 - else: - num_samples = self.num_samples - - num_rows = MnistOp.get_num_rows(self.dataset_dir, num_samples) + num_rows = MnistOp.get_num_rows(self.dataset_dir) rows_per_shard = get_num_rows(num_rows, self.num_shards) rows_from_sampler = self._get_sampler_dataset_size() @@ -2522,7 +2656,7 @@ class MnistDataset(MappableDataset): return self.sampler.is_sharded() -class MindDataset(SourceDataset): +class MindDataset(MappableDataset): """ A source dataset that reads from shard files and database. @@ -2539,7 +2673,13 @@ class MindDataset(SourceDataset): sampler (Sampler, optional): Object used to choose samples from the dataset (default=None, sampler is exclusive with shuffle and block_reader). Support list: SubsetRandomSampler, - PkSampler + PkSampler, RandomSampler, SequentialSampler, DistributedSampler. + padded_sample (dict, optional): Samples will be appended to dataset, which + keys are the same as column_list. + num_padded (int, optional): Number of padding samples.Dataset size + plus num_padded should be divisible by num_shards. + num_samples (int, optional): The number of samples to be included in the dataset + (default=None, all samples). Raises: ValueError: If num_shards is specified but shard_id is None. @@ -2550,7 +2690,8 @@ class MindDataset(SourceDataset): @check_minddataset def __init__(self, dataset_file, columns_list=None, num_parallel_workers=None, shuffle=None, num_shards=None, shard_id=None, - block_reader=False, sampler=None): + block_reader=False, sampler=None, padded_sample=None, + num_padded=None, num_samples=None): super().__init__(num_parallel_workers) if isinstance(dataset_file, list): self.load_dataset = False @@ -2558,53 +2699,57 @@ class MindDataset(SourceDataset): self.load_dataset = True self.dataset_file = dataset_file self.columns_list = columns_list - self.global_shuffle = shuffle - self.distribution = "" - self.sampler = sampler - - if num_shards is None or shard_id is None: - self.partitions = None - else: - self.partitions = [num_shards, shard_id] + self.shuffle_option = shuffle + self.num_shards = num_shards + self.shard_id = shard_id - if block_reader is True and self.partitions is not None: - raise ValueError("block reader not allowed true when use partitions") + if block_reader is True and num_shards is not None: + raise ValueError("block_reader not allowed true when use partitions") if block_reader is True and shuffle is True: - raise ValueError("block reader not allowed true when use shuffle") + raise ValueError("block_reader not allowed true when use shuffle") if block_reader is True: logger.warning("WARN: global shuffle is not used.") if sampler is not None: - if isinstance(sampler, samplers.SubsetRandomSampler) is False and \ - isinstance(sampler, samplers.PKSampler) is False: - raise ValueError("the sampler is not supported yet.") + if isinstance(sampler, (samplers.SubsetRandomSampler, samplers.PKSampler, + samplers.DistributedSampler, samplers.RandomSampler, + samplers.SequentialSampler)) is False: + raise ValueError("The sampler is not supported yet.") + + self.sampler = _select_sampler(num_samples, sampler, shuffle, num_shards, shard_id) + self.num_samples = num_samples # sampler exclusive if block_reader is True and sampler is not None: - raise ValueError("block reader not allowed true when use sampler") - - if shuffle is not None and sampler is not None: - raise ValueError("shuffle not allowed when use sampler") + raise ValueError("block_reader not allowed true when use sampler") - if block_reader is False and sampler is None: - self.global_shuffle = not bool(shuffle is False) + if num_padded is None: + num_padded = 0 - self.num_shards = num_shards - self.shard_id = shard_id self.block_reader = block_reader + self.padded_sample = padded_sample + self.num_padded = num_padded def get_args(self): args = super().get_args() + padded_sample = None + if self.padded_sample: + padded_sample = {} + for k, v in self.padded_sample.items(): + if isinstance(v, np.ndarray): + padded_sample[k] = v.tobytes() + else: + padded_sample[k] = v args["dataset_file"] = self.dataset_file args["load_dataset"] = self.load_dataset args["columns_list"] = self.columns_list - args["global_shuffle"] = self.global_shuffle - args["partitions"] = self.partitions + args["shuffle_option"] = self.shuffle_option + args["num_samples"] = self.num_samples args["block_reader"] = self.block_reader - args["num_shards"] = self.num_shards - args["shard_id"] = self.shard_id + args["num_padded"] = self.num_padded + args["padded_sample"] = padded_sample args["sampler"] = self.sampler return args @@ -2615,23 +2760,28 @@ class MindDataset(SourceDataset): Return: Number, number of batches. """ - if self.load_dataset: - dataset_file = [self.dataset_file] - else: - dataset_file = self.dataset_file - num_rows = MindRecordOp.get_num_rows(dataset_file, self.load_dataset, self.sampler) - if self.partitions is not None and self.partitions[0] > 0: - if num_rows % self.partitions[0] == 0: - num_rows = num_rows // self.partitions[0] + if self._dataset_size is None: + if self.load_dataset: + dataset_file = [self.dataset_file] else: - num_rows = num_rows // self.partitions[0] + 1 - return num_rows + dataset_file = self.dataset_file + num_rows = MindRecordOp.get_num_rows(dataset_file, self.load_dataset, self.sampler, self.num_padded) + return num_rows + return self._dataset_size + + # manually set dataset_size as a tempoary solution. + def set_dataset_size(self, value): + logger.warning("WARN_DEPRECATED: This method is deprecated. Please use get_dataset_size directly.") + if value >= 0: + self._dataset_size = value + else: + raise ValueError('Set dataset_size with negative value {}'.format(value)) def is_shuffled(self): - if self.global_shuffle is None: + if self.shuffle_option is None: return True - return self.global_shuffle or self.sampler.is_shuffled() + return self.shuffle_option or self.sampler.is_shuffled() def is_sharded(self): if self.num_shards is not None: @@ -2727,7 +2877,7 @@ def _py_sampler_fn_mp(sampler, num_samples, dataset, num_worker): def _fetch_py_sampler_indices(sampler, num_samples): """ - Indices fetcher for python sampler. + Indice fetcher for python sampler. """ if num_samples is not None: sampler_iter = iter(sampler) @@ -2827,6 +2977,7 @@ class _GeneratorWorker(multiprocessing.Process): """ Worker process for multiprocess Generator. """ + def __init__(self, dataset, eoe): self.idx_queue = multiprocessing.Queue(16) self.res_queue = multiprocessing.Queue(16) @@ -2892,7 +3043,7 @@ class GeneratorDataset(MappableDataset): provide either column_names or schema. column_types (list[mindspore.dtype], optional): List of column data types of the dataset (default=None). If provided, sanity check will be performed on generator output. - schema (Schema/String, optional): Path to the json schema file or schema object (default=None). Users are + schema (Schema/str, optional): Path to the json schema file or schema object (default=None). Users are required to provide either column_names or schema. If both are provided, schema will be used. num_samples (int, optional): The number of samples to be included in the dataset (default=None, all images). @@ -2948,11 +3099,8 @@ class GeneratorDataset(MappableDataset): if isinstance(self.sampler, (samplers.SequentialSampler, samplers.DistributedSampler, samplers.RandomSampler, samplers.SubsetRandomSampler, samplers.WeightedRandomSampler, samplers.Sampler)): - if num_samples is None: - num_samples = len(source) sampler_instance = self.sampler.create() sampler_instance.set_num_rows(len(source)) - sampler_instance.set_num_samples(num_samples) sampler_instance.initialize() if num_parallel_workers > 1: self.source = (lambda: _cpp_sampler_fn_mp(sampler_instance, source, num_parallel_workers)) @@ -3020,7 +3168,7 @@ class GeneratorDataset(MappableDataset): if value >= 0: self._dataset_size = value else: - raise ValueError('set dataset_size with negative value {}'.format(value)) + raise ValueError('Set dataset_size with negative value {}'.format(value)) def __deepcopy__(self, memodict): if id(self) in memodict: @@ -3028,8 +3176,8 @@ class GeneratorDataset(MappableDataset): cls = self.__class__ new_op = cls.__new__(cls) memodict[id(self)] = new_op - new_op.input = copy.deepcopy(self.input, memodict) - new_op.output = copy.deepcopy(self.output, memodict) + new_op.children = copy.deepcopy(self.children, memodict) + new_op.parent = copy.deepcopy(self.parent, memodict) new_op.num_parallel_workers = copy.deepcopy(self.num_parallel_workers, memodict) new_op.column_types = copy.deepcopy(self.column_types, memodict) new_op.column_names = copy.deepcopy(self.column_names, memodict) @@ -3139,6 +3287,7 @@ class TFRecordDataset(SourceDataset): args["num_samples"] = self.num_samples if self.shuffle_files is not None: args["shuffle_files"] = self.shuffle_files + args["shuffle_global"] = (self.shuffle_level == Shuffle.GLOBAL) args["shuffle"] = self.shuffle_level args["num_shards"] = self.num_shards args["shard_id"] = self.shard_id @@ -3169,7 +3318,7 @@ class TFRecordDataset(SourceDataset): if value >= 0: self._dataset_size = value else: - raise ValueError('set dataset_size with negative value {}'.format(value)) + raise ValueError('Set dataset_size with negative value {}'.format(value)) def is_shuffled(self): return self.shuffle_files @@ -3296,17 +3445,12 @@ class ManifestDataset(MappableDataset): Return: Number, number of batches. """ - if self.num_samples is None: - num_samples = 0 - else: - num_samples = self.num_samples - if self.class_indexing is None: class_indexing = dict() else: class_indexing = self.class_indexing - num_rows = ManifestOp.get_num_rows_and_classes(self.dataset_file, num_samples, class_indexing, self.usage)[0] + num_rows = ManifestOp.get_num_rows_and_classes(self.dataset_file, class_indexing, self.usage)[0] rows_per_shard = get_num_rows(num_rows, self.num_shards) rows_from_sampler = self._get_sampler_dataset_size() @@ -3322,17 +3466,12 @@ class ManifestDataset(MappableDataset): Return: Number, number of classes. """ - if self.num_samples is None: - num_samples = 0 - else: - num_samples = self.num_samples - if self.class_indexing is None: class_indexing = dict() else: class_indexing = self.class_indexing - return ManifestOp.get_num_rows_and_classes(self.dataset_file, num_samples, class_indexing, self.usage)[1] + return ManifestOp.get_num_rows_and_classes(self.dataset_file, class_indexing, self.usage)[1] def get_class_indexing(self): """ @@ -3341,17 +3480,12 @@ class ManifestDataset(MappableDataset): Return: Dict, A str-to-int mapping from label name to index. """ - if self.num_samples is None: - num_samples = 0 - else: - num_samples = self.num_samples - if self.class_indexing is None: class_indexing = dict() else: class_indexing = self.class_indexing - return ManifestOp.get_class_indexing(self.dataset_file, num_samples, class_indexing, self.usage) + return ManifestOp.get_class_indexing(self.dataset_file, class_indexing, self.usage) def is_shuffled(self): if self.shuffle_level is None: @@ -3465,12 +3599,8 @@ class Cifar10Dataset(MappableDataset): Return: Number, number of batches. """ - if self.num_samples is None: - num_samples = 0 - else: - num_samples = self.num_samples - num_rows = CifarOp.get_num_rows(self.dataset_dir, num_samples, True) + num_rows = CifarOp.get_num_rows(self.dataset_dir, True) rows_per_shard = get_num_rows(num_rows, self.num_shards) rows_from_sampler = self._get_sampler_dataset_size() @@ -3589,12 +3719,8 @@ class Cifar100Dataset(MappableDataset): Return: Number, number of batches. """ - if self.num_samples is None: - num_samples = 0 - else: - num_samples = self.num_samples - num_rows = CifarOp.get_num_rows(self.dataset_dir, num_samples, False) + num_rows = CifarOp.get_num_rows(self.dataset_dir, False) rows_per_shard = get_num_rows(num_rows, self.num_shards) rows_from_sampler = self._get_sampler_dataset_size() @@ -3623,7 +3749,7 @@ class RandomDataset(SourceDataset): Args: num_samples (int): number of samples to generate. schema (str or Schema, optional): Path to the json schema file or schema object (default=None). - If the schema is not provided, the meta data from the TFRecord file is considered the schema. + If the schema is not provided, the random dataset generates a random schema. columns_list (list[str], optional): List of columns to be read (default=None, read all columns) num_parallel_workers (int, optional): number of workers to read the data (default=None, number set in the config). @@ -3636,9 +3762,12 @@ class RandomDataset(SourceDataset): schema_obj = Schema(schema) # read the schema file and convert to schema object to validate it self.schema = schema self.columns_list = columns_list - self.num_samples = num_samples if schema_obj is not None and num_samples is None: self.num_samples = schema_obj.num_rows + elif num_samples is None: + self.num_samples = 0 + else: + self.num_samples = num_samples def get_args(self): args = super().get_args() @@ -3677,6 +3806,7 @@ class RandomDataset(SourceDataset): def is_sharded(self): return False + class Schema: """ Class to represent a schema of dataset. @@ -3859,10 +3989,14 @@ class VOCDataset(MappableDataset): """ A source dataset for reading and parsing VOC dataset. - The generated dataset has two columns ['image', 'target']. - The shape of both column is [image_size] if decode flag is False, or [H, W, C] + The generated dataset has two columns : + task='Detection' : ['image', 'annotation']. + task='Segmentation' : ['image', 'target'] + The shape of both column 'image' and 'target' is [image_size] if decode flag is False, or [H, W, C] otherwise. - The type of both tensor is uint8. + The type of both tensor 'image' and 'target' is uint8. + The type of tensor 'annotation' is uint32. + This dataset can take in a sampler. sampler and shuffle are mutually exclusive. Table below shows what input args are allowed and their expected behavior. @@ -4007,17 +4141,171 @@ class VOCDataset(MappableDataset): if self.task != "Detection": raise NotImplementedError() - if self.num_samples is None: - num_samples = 0 - else: - num_samples = self.num_samples - if self.class_indexing is None: class_indexing = dict() else: class_indexing = self.class_indexing - return VOCOp.get_class_indexing(self.dataset_dir, self.task, self.mode, class_indexing, num_samples) + return VOCOp.get_class_indexing(self.dataset_dir, self.task, self.mode, class_indexing) + + def is_shuffled(self): + if self.shuffle_level is None: + return True + + return self.shuffle_level or self.sampler.is_shuffled() + + def is_sharded(self): + if self.num_shards is not None: + return self.num_shards > 1 + + return self.sampler.is_sharded() + + +class CocoDataset(MappableDataset): + """ + A source dataset for reading and parsing COCO dataset. + + CocoDataset support four kinds of task: + 2017 Train/Val/Test Detection, Keypoints, Stuff, Panoptic. + + The generated dataset has multi-columns : + - task='Detection', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['category_id', dtype=uint32], + ['iscrowd', dtype=uint32]]. + - task='Stuff', column: [['image', dtype=uint8], ['segmentation',dtype=float32], ['iscrowd',dtype=uint32]]. + - task='Keypoint', column: [['image', dtype=uint8], ['keypoints', dtype=float32], + ['num_keypoints', dtype=uint32]]. + - task='Panoptic', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['category_id', dtype=uint32], + ['iscrowd', dtype=uint32], ['area', dtype=uint32]]. + + This dataset can take in a sampler. sampler and shuffle are mutually exclusive. CocoDataset doesn't support + PKSampler. Table below shows what input args are allowed and their expected behavior. + + .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle' + :widths: 25 25 50 + :header-rows: 1 + + * - Parameter 'sampler' + - Parameter 'shuffle' + - Expected Order Behavior + * - None + - None + - random order + * - None + - True + - random order + * - None + - False + - sequential order + * - Sampler object + - None + - order defined by sampler + * - Sampler object + - True + - not allowed + * - Sampler object + - False + - not allowed + + Args: + dataset_dir (str): Path to the root directory that contains the dataset. + annotation_file (str): Path to the annotation json. + task (str): Set the task type of reading coco data, now support 'Detection'/'Stuff'/'Panoptic'/'Keypoint' + (default='Detection') + num_samples (int, optional): The number of images to be included in the dataset + (default=None, all images). + num_parallel_workers (int, optional): Number of workers to read the data + (default=None, number set in the config). + shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected + order behavior shown in the table). + decode (bool, optional): Decode the images after reading (default=False). + sampler (Sampler, optional): Object used to choose samples from the dataset + (default=None, expected order behavior shown in the table). + num_shards (int, optional): Number of shards that the dataset should be divided + into (default=None). + shard_id (int, optional): The shard ID within num_shards (default=None). This + argument should be specified only when num_shards is also specified. + + Raises: + RuntimeError: If sampler and shuffle are specified at the same time. + RuntimeError: If sampler and sharding are specified at the same time. + RuntimeError: If num_shards is specified but shard_id is None. + RuntimeError: If shard_id is specified but num_shards is None. + RuntimeError: If parse json file failed. + ValueError: If task is not in ['Detection', 'Stuff', 'Panoptic', 'Keypoint']. + ValueError: If annotation_file is not exist. + ValueError: If dataset_dir is not exist. + ValueError: If shard_id is invalid (< 0 or >= num_shards). + + Examples: + >>> import mindspore.dataset as ds + >>> dataset_dir = "/path/to/coco_dataset_directory/image_folder" + >>> annotation_file = "/path/to/coco_dataset_directory/annotation_folder/annotation.json" + >>> # 1) read COCO data for Detection task + >>> coco_dataset = ds.CocoDataset(dataset_dir, annotation_file=annotation_file, task='Detection') + >>> # 2) read COCO data for Stuff task + >>> coco_dataset = ds.CocoDataset(dataset_dir, annotation_file=annotation_file, task='Stuff') + >>> # 3) read COCO data for Panoptic task + >>> coco_dataset = ds.CocoDataset(dataset_dir, annotation_file=annotation_file, task='Panoptic') + >>> # 4) read COCO data for Keypoint task + >>> coco_dataset = ds.CocoDataset(dataset_dir, annotation_file=annotation_file, task='Keypoint') + >>> # in COCO dataset, each dictionary has keys "image" and "annotation" + """ + + @check_cocodataset + def __init__(self, dataset_dir, annotation_file, task="Detection", num_samples=None, num_parallel_workers=None, + shuffle=None, decode=False, sampler=None, num_shards=None, shard_id=None): + super().__init__(num_parallel_workers) + self.dataset_dir = dataset_dir + self.annotation_file = annotation_file + self.task = task + self.sampler = _select_sampler(num_samples, sampler, shuffle, num_shards, shard_id) + self.num_samples = num_samples + self.decode = decode + self.shuffle_level = shuffle + self.num_shards = num_shards + self.shard_id = shard_id + + def get_args(self): + args = super().get_args() + args["dataset_dir"] = self.dataset_dir + args["annotation_file"] = self.annotation_file + args["task"] = self.task + args["num_samples"] = self.num_samples + args["sampler"] = self.sampler + args["decode"] = self.decode + args["shuffle"] = self.shuffle_level + args["num_shards"] = self.num_shards + args["shard_id"] = self.shard_id + return args + + def get_dataset_size(self): + """ + Get the number of batches in an epoch. + + Return: + Number, number of batches. + """ + num_rows = CocoOp.get_num_rows(self.dataset_dir, self.annotation_file, self.task) + rows_per_shard = get_num_rows(num_rows, self.num_shards) + rows_from_sampler = self._get_sampler_dataset_size() + + if rows_from_sampler is None: + return rows_per_shard + + return min(rows_from_sampler, rows_per_shard) + + def get_class_indexing(self): + """ + Get the class index. + + Return: + Dict, A str-to-int mapping from label name to index. + """ + if self.task not in {"Detection", "Panoptic"}: + raise NotImplementedError("Only 'Detection' and 'Panoptic' support get_class_indexing.") + + class_index = CocoOp.get_class_indexing(self.dataset_dir, self.annotation_file, self.task) + return dict(class_index) def is_shuffled(self): if self.shuffle_level is None: @@ -4044,7 +4332,7 @@ class CelebADataset(MappableDataset): dataset_dir (str): Path to the root directory that contains the dataset. num_parallel_workers (int, optional): Number of workers to read the data (default=value set in the config). shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None). - dataset_type (string): one of 'all', 'train', 'valid' or 'test'. + dataset_type (str): one of 'all', 'train', 'valid' or 'test'. sampler (Sampler, optional): Object used to choose samples from the dataset (default=None). decode (bool, optional): decode the images after reading (default=False). extensions (list[str], optional): List of file extensions to be @@ -4099,7 +4387,9 @@ class CelebADataset(MappableDataset): try: with open(attr_file, 'r') as f: num_rows = int(f.readline()) - except Exception: + except FileNotFoundError: + raise RuntimeError("attr_file not found.") + except BaseException: raise RuntimeError("Get dataset size failed from attribution file.") rows_per_shard = get_num_rows(num_rows, self.num_shards) if self.num_samples is not None: @@ -4123,6 +4413,223 @@ class CelebADataset(MappableDataset): return self.sampler.is_sharded() +class CLUEDataset(SourceDataset): + """ + A source dataset that reads and parses CLUE datasets. + CLUE, the Chinese Language Understanding Evaluation Benchmark, a collection of datasets, baselines, pre-trained + models, corpus and leaderboard. Here we bring in classification task of CLUE, which are AFQMC, TNEWS, IFLYTEK, + CMNLI, WSC and CSL. + + Args: + dataset_files (str or list[str]): String or list of files to be read or glob strings to search for a pattern of + files. The list will be sorted in a lexicographical order. + task (str, optional): The kind of task, one of 'AFQMC', 'TNEWS', 'IFLYTEK', 'CMNLI', 'WSC' and 'CSL'. + (default=AFQMC). + usage (str, optional): Need train, test or eval data (default="train"). + num_samples (int, optional): number of samples(rows) to read (default=None, reads the full dataset). + num_parallel_workers (int, optional): number of workers to read the data + (default=None, number set in the config). + shuffle (bool, Shuffle level, optional): perform reshuffling of the data every epoch (default=Shuffle.GLOBAL). + If shuffle is False, no shuffling will be performed; + If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL + Otherwise, there are two levels of shuffling: + + - Shuffle.GLOBAL: Shuffle both the files and samples. + + - Shuffle.FILES: Shuffle files only. + + num_shards (int, optional): Number of shards that the dataset should be divided into (default=None). + shard_id (int, optional): The shard ID within num_shards (default=None). This + argument should be specified only when num_shards is also specified. + + Examples: + >>> import mindspore.dataset as ds + >>> dataset_files = ["/path/to/1", "/path/to/2"] # contains 1 or multiple text files + >>> dataset = ds.CLUEDataset(dataset_files=dataset_files, task='AFQMC', usage='train') + + """ + + @check_cluedataset + def __init__(self, dataset_files, task='AFQMC', usage='train', num_samples=None, + num_parallel_workers=None, shuffle=Shuffle.GLOBAL, num_shards=None, shard_id=None): + super().__init__(num_parallel_workers) + self.dataset_files = self._find_files(dataset_files) + self.dataset_files.sort() + self.num_samples = num_samples + self.task_dict = { + 'AFQMC': { + 'train': { + 'sentence1': 'sentence1', + 'sentence2': 'sentence2', + 'label': 'label' + }, + 'test': { + 'id': 'id', + 'sentence1': 'sentence1', + 'sentence2': 'sentence2' + }, + 'eval': { + 'sentence1': 'sentence1', + 'sentence2': 'sentence2', + 'label': 'label' + } + }, + 'CMNLI': { + 'train': { + 'sentence1': 'sentence1', + 'sentence2': 'sentence2', + 'label': 'label' + }, + 'test': { + 'id': 'id', + 'sentence1': 'sentence1', + 'sentence2': 'sentence2' + }, + 'eval': { + 'sentence1': 'sentence1', + 'sentence2': 'sentence2', + 'label': 'label' + } + }, + 'CSL': { + 'train': { + 'id': 'id', + 'abst': 'abst', + 'keyword': 'keyword', + 'label': 'label' + }, + 'test': { + 'id': 'id', + 'abst': 'abst', + 'keyword': 'keyword' + }, + 'eval': { + 'id': 'id', + 'abst': 'abst', + 'keyword': 'keyword', + 'label': 'label' + } + }, + 'IFLYTEK': { + 'train': { + 'label': 'label', + 'label_des': 'label_des', + 'sentence': 'sentence' + }, + 'test': { + 'id': 'id', + 'sentence': 'sentence', + }, + 'eval': { + 'label': 'label', + 'label_des': 'label_des', + 'sentence': 'sentence' + } + }, + 'TNEWS': { + 'train': { + 'label': 'label', + 'label_desc': 'label_desc', + 'sentence': 'sentence', + 'keywords': 'keywords' + }, + 'test': { + 'id': 'id', + 'sentence': 'sentence', + 'keywords': 'keywords' + }, + 'eval': { + 'label': 'label', + 'label_desc': 'label_desc', + 'sentence': 'sentence', + 'keywords': 'keywords' + } + }, + 'WSC': { + 'train': { + 'span1_index': 'target/span1_index', + 'span2_index': 'target/span2_index', + 'span1_text': 'target/span1_text', + 'span2_text': 'target/span2_text', + 'idx': 'idx', + 'label': 'label', + 'text': 'text' + }, + 'test': { + 'span1_index': 'target/span1_index', + 'span2_index': 'target/span2_index', + 'span1_text': 'target/span1_text', + 'span2_text': 'target/span2_text', + 'idx': 'idx', + 'text': 'text' + }, + 'eval': { + 'span1_index': 'target/span1_index', + 'span2_index': 'target/span2_index', + 'span1_text': 'target/span1_text', + 'span2_text': 'target/span2_text', + 'idx': 'idx', + 'label': 'label', + 'text': 'text' + } + } + } + self.cols_to_keyword = self.task_dict[task][usage] + + if not isinstance(shuffle, (bool, Shuffle)): + raise TypeError("shuffle should be of boolean or enum 'Shuffle'.") + if not isinstance(shuffle, Shuffle): + if shuffle: + self.shuffle_level = Shuffle.GLOBAL + self.shuffle_files = True + else: + self.shuffle_level = None + self.shuffle_files = False + else: + self.shuffle_level = shuffle + self.shuffle_files = True + + self.num_shards = num_shards + self.shard_id = shard_id + + def get_args(self): + args = super().get_args() + args["dataset_files"] = self.dataset_files + args["num_samples"] = self.num_samples + if self.shuffle_files is not None: + args["shuffle_files"] = self.shuffle_files + args["shuffle_global"] = (self.shuffle_level == Shuffle.GLOBAL) + args["shuffle"] = self.shuffle_level + args["num_shards"] = self.num_shards + args["shard_id"] = self.shard_id + args["cols_to_keyword"] = self.cols_to_keyword + return args + + def get_dataset_size(self): + """ + Get the number of batches in an epoch. + + Return: + Number, number of batches. + """ + if self._dataset_size is None: + num_rows = ClueOp.get_num_rows(self.dataset_files) + num_rows = get_num_rows(num_rows, self.num_shards) + if self.num_samples is None: + return num_rows + return min(self.num_samples, num_rows) + return self._dataset_size + + def is_shuffled(self): + return self.shuffle_files + + def is_sharded(self): + if self.num_shards is not None: + return self.num_shards > 1 + + return False + + class TextFileDataset(SourceDataset): """ A source dataset that reads and parses datasets stored on disk in text format. @@ -4182,6 +4689,7 @@ class TextFileDataset(SourceDataset): args["num_samples"] = self.num_samples if self.shuffle_files is not None: args["shuffle_files"] = self.shuffle_files + args["shuffle_global"] = (self.shuffle_level == Shuffle.GLOBAL) args["shuffle"] = self.shuffle_level args["num_shards"] = self.num_shards args["shard_id"] = self.shard_id @@ -4197,9 +4705,11 @@ class TextFileDataset(SourceDataset): if self._dataset_size is None: num_rows = TextFileOp.get_num_rows(self.dataset_files) num_rows = get_num_rows(num_rows, self.num_shards) - if self.num_samples is None: - return num_rows - return min(self.num_samples, num_rows) + # If the user gave a num samples in the dataset, then the sampler will limit the rows returned + # to that amount. Account for that here in the row count + if self.num_samples is not None and self.num_samples > 0 and num_rows > self.num_samples: + num_rows = self.num_samples + return num_rows return self._dataset_size def is_shuffled(self): @@ -4210,3 +4720,208 @@ class TextFileDataset(SourceDataset): return self.num_shards > 1 return False + + +class _NumpySlicesDataset: + """ + Mainly for dealing with several kinds of format of python data, and return one row each time. + """ + + def __init__(self, data, column_list=None): + self.column_list = None + # Convert dict data into tuple + if isinstance(data, dict): + data = self.process_dict(data) + + if isinstance(data, tuple): + self.data = () + data_len = len(data) + for i in range(data_len): + self.data = self.data + (np.array(data[i]),) + else: + self.data = (np.array(data),) + + # Init column_name + if column_list is not None: + self.column_list = column_list + elif self.column_list is None: + self.column_list = [] + column_num = len(self.data) + for i in range(column_num): + self.column_list.append("column_" + str(i)) + + def __getitem__(self, index): + data_row = [d[index, ...] for d in self.data] + data_res = tuple(data_row) + return data_res + + def __len__(self): + return len(self.data[0]) + + def process_dict(self, input_data): + """ + Convert the dict like data into tuple format, when input is a tuple of dict then compose it into a dict first. + """ + # Convert pandas like dict(has "values" column) into General dict + data_keys = list(input_data.keys()) + data_col = input_data[data_keys[0]] + if hasattr(data_col, "values"): + new_dict = {} + for key in data_keys: + item1 = input_data.pop(key) + new_dict[key] = item1.values + input_data = new_dict + + # Convert the data in dict into tuple + data = () + keys = list(input_data.keys()) + self.column_list = keys + for key in keys: + value = input_data[key] + data = data + (list(value),) + + return data + + +class NumpySlicesDataset(GeneratorDataset): + """ + Create a dataset with given data slices, mainly for loading python data into dataset. + + This dataset can take in a sampler. sampler and shuffle are mutually exclusive. Table + below shows what input args are allowed and their expected behavior. + + .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle' + :widths: 25 25 50 + :header-rows: 1 + + * - Parameter 'sampler' + - Parameter 'shuffle' + - Expected Order Behavior + * - None + - None + - random order + * - None + - True + - random order + * - None + - False + - sequential order + * - Sampler object + - None + - order defined by sampler + * - Sampler object + - True + - not allowed + * - Sampler object + - False + - not allowed + + Args: + data (list, tuple or dict) Input of Given data, supported data type includes list, tuple, dict and other numpy + format. Input data will be sliced in first dimension and generate many rows, large data is not recommend to + load in this way as data is loading into memory. + column_names (list[str], optional): List of column names of the dataset (default=None). If column_names not + provided, when data is dict, column_names will be its key, otherwise it will be like column_1, column_2 ... + num_samples (int, optional): The number of samples to be included in the dataset (default=None, all images). + num_parallel_workers (int, optional): Number of subprocesses used to fetch the dataset in parallel (default=1). + shuffle (bool, optional): Whether or not to perform shuffle on the dataset. Random accessible input is required. + (default=None, expected order behavior shown in the table). + sampler (Sampler/Iterable, optional): Object used to choose samples from the dataset. Random accessible input is + required (default=None, expected order behavior shown in the table). + num_shards (int, optional): Number of shards that the dataset should be divided into (default=None). + This argument should be specified only when 'num_samples' is "None". Random accessible input is required. + shard_id (int, optional): The shard ID within num_shards (default=None). This argument should be specified only + when num_shards is also specified. Random accessible input is required. + + Examples: + >>> import mindspore.dataset as ds + >>> # 1) Input data can be a list + >>> data = [1, 2, 3] + >>> dataset1 = ds.NumpySlicesDataset(data, column_names=["column_1"]) + >>> # 2) Input data can be a dict, and column_names will be its key + >>> data = {"a": [1, 2], "b": [3, 4]} + >>> dataset2 = ds.NumpySlicesDataset(data) + >>> # 3) Input data can be a tuple of lists (or numpy arrays), each tuple element refers to data in each column + >>> data = ([1, 2], [3, 4], [5, 6]) + >>> dataset3 = ds.NumpySlicesDataset(data, column_names=["column_1", "column_2", "column_3"]) + >>> # 4) Load data from csv file + >>> import pandas as pd + >>> df = pd.read_csv("file.csv") + >>> dataset4 = ds.NumpySlicesDataset(dict(df), shuffle=False) + """ + + @check_numpyslicesdataset + def __init__(self, data, column_names=None, num_samples=None, num_parallel_workers=1, shuffle=None, + sampler=None, num_shards=None, shard_id=None): + dataset = _NumpySlicesDataset(data, column_names) + super().__init__(dataset, column_names=dataset.column_list, num_samples=num_samples, + num_parallel_workers=num_parallel_workers, shuffle=shuffle, sampler=sampler, + num_shards=num_shards, shard_id=shard_id) + + +class BuildVocabDataset(DatasetOp): + """ + Build a vocab from a dataset. This would collect all the unique words in a dataset and return a vocab + which contains top_k most frequent words (if top_k is specified) + This function is not meant to be called directly by user. To build vocab, please use the function + text.Vocab.from_dataset() + + Args: + vocab(Vocab): text.vocab object. + columns(str or list, optional): column names to get words from. It can be a list of column names (Default is + None, all columns are used, return error if any column isn't string). + freq_range(tuple, optional): A tuple of integers (min_frequency, max_frequency). Words within the frequency + range would be kept. 0 <= min_frequency <= max_frequency <= total_words. min_frequency/max_frequency + can be None, which corresponds to 0/total_words separately (default=None, all words are included). + top_k(int, optional): top_k > 0. Number of words to be built into vocab. top_k most frequent words are + taken. The top_k is taken after freq_range. If not enough top_k, all words will be taken (default=None, + all words are included). + special_tokens(list, optional): a list of strings, each one is a special token. for example + special_tokens=["",""] (default=None, no special tokens will be added). + special_first(bool, optional): whether special_tokens will be prepended/appended to vocab, If special_tokens + is specified and special_first is set to None, special_tokens will be prepended. (default=None). + prefetch_size (int, optional): prefetch number of records ahead of the user's request (default=None). + """ + + def __init__(self, input_dataset, vocab, columns, freq_range, top_k, special_tokens, special_first, + prefetch_size=None): + super().__init__() + self.columns = columns + self.children.append(input_dataset) + self.prefetch_size = prefetch_size + self.vocab = vocab + self.freq_range = freq_range + self.top_k = top_k + self.special_tokens = special_tokens + self.special_first = special_first + input_dataset.parent.append(self) + + def get_args(self): + args = super().get_args() + args["columns"] = self.columns + args["vocab"] = self.vocab + args["freq_range"] = self.freq_range + args["prefetch_size"] = self.prefetch_size + args["top_k"] = self.top_k + args["special_tokens"] = self.special_tokens + args["special_first"] = self.special_first + return args + + def __deepcopy__(self, memodict): + if id(self) in memodict: + return memodict[id(self)] + cls = self.__class__ + new_op = cls.__new__(cls) + memodict[id(self)] = new_op + new_op.children = copy.deepcopy(self.children, memodict) + new_op.columns = copy.deepcopy(self.columns, memodict) + new_op.num_parallel_workers = copy.deepcopy(self.num_parallel_workers, memodict) + new_op.prefetch_size = copy.deepcopy(self.prefetch_size, memodict) + new_op.parent = copy.deepcopy(self.parent, memodict) + new_op.freq_range = copy.deepcopy(self.freq_range, memodict) + new_op.top_k = copy.deepcopy(self.top_k, memodict) + new_op.vocab = self.vocab + new_op.special_tokens = copy.deepcopy(self.special_tokens) + new_op.special_first = copy.deepcopy(self.special_first) + + return new_op diff --git a/mindspore/dataset/engine/graphdata.py b/mindspore/dataset/engine/graphdata.py index 23f8dbda6a..472819784e 100644 --- a/mindspore/dataset/engine/graphdata.py +++ b/mindspore/dataset/engine/graphdata.py @@ -20,8 +20,9 @@ import numpy as np from mindspore._c_dataengine import Graph from mindspore._c_dataengine import Tensor -from .validators import check_gnn_graphdata, check_gnn_get_all_nodes, check_gnn_get_all_neighbors, \ - check_gnn_get_node_feature +from .validators import check_gnn_graphdata, check_gnn_get_all_nodes, check_gnn_get_all_edges, \ + check_gnn_get_nodes_from_edges, check_gnn_get_all_neighbors, check_gnn_get_sampled_neighbors, \ + check_gnn_get_neg_sampled_neighbors, check_gnn_get_node_feature, check_gnn_random_walk class GraphData: @@ -60,7 +61,44 @@ class GraphData: Raises: TypeError: If `node_type` is not integer. """ - return self._graph.get_nodes(node_type, -1).as_array() + return self._graph.get_all_nodes(node_type).as_array() + + @check_gnn_get_all_edges + def get_all_edges(self, edge_type): + """ + Get all edges in the graph. + + Args: + edge_type (int): Specify the type of edge. + + Returns: + numpy.ndarray: array of edges. + + Examples: + >>> import mindspore.dataset as ds + >>> data_graph = ds.GraphData('dataset_file', 2) + >>> nodes = data_graph.get_all_edges(0) + + Raises: + TypeError: If `edge_type` is not integer. + """ + return self._graph.get_all_edges(edge_type).as_array() + + @check_gnn_get_nodes_from_edges + def get_nodes_from_edges(self, edge_list): + """ + Get nodes from the edges. + + Args: + edge_list (list or numpy.ndarray): The given list of edges. + + Returns: + numpy.ndarray: array of nodes. + + Raises: + TypeError: If `edge_list` is not list or ndarray. + """ + return self._graph.get_nodes_from_edges(edge_list).as_array() @check_gnn_get_all_neighbors def get_all_neighbors(self, node_list, neighbor_type): @@ -86,6 +124,60 @@ class GraphData: """ return self._graph.get_all_neighbors(node_list, neighbor_type).as_array() + @check_gnn_get_sampled_neighbors + def get_sampled_neighbors(self, node_list, neighbor_nums, neighbor_types): + """ + Get sampled neighbor information, maximum support 6-hop sampling. + + Args: + node_list (list or numpy.ndarray): The given list of nodes. + neighbor_nums (list or numpy.ndarray): Number of neighbors sampled per hop. + neighbor_types (list or numpy.ndarray): Neighbor type sampled per hop. + + Returns: + numpy.ndarray: array of nodes. + + Examples: + >>> import mindspore.dataset as ds + >>> data_graph = ds.GraphData('dataset_file', 2) + >>> nodes = data_graph.get_all_nodes(0) + >>> neighbors = data_graph.get_all_neighbors(nodes, [2, 2], [0, 0]) + + Raises: + TypeError: If `node_list` is not list or ndarray. + TypeError: If `neighbor_nums` is not list or ndarray. + TypeError: If `neighbor_types` is not list or ndarray. + """ + return self._graph.get_sampled_neighbors( + node_list, neighbor_nums, neighbor_types).as_array() + + @check_gnn_get_neg_sampled_neighbors + def get_neg_sampled_neighbors(self, node_list, neg_neighbor_num, neg_neighbor_type): + """ + Get `neg_neighbor_type` negative sampled neighbors of the nodes in `node_list`. + + Args: + node_list (list or numpy.ndarray): The given list of nodes. + neg_neighbor_num (int): Number of neighbors sampled. + neg_neighbor_type (int): Specify the type of negative neighbor. + + Returns: + numpy.ndarray: array of nodes. + + Examples: + >>> import mindspore.dataset as ds + >>> data_graph = ds.GraphData('dataset_file', 2) + >>> nodes = data_graph.get_all_nodes(0) + >>> neg_neighbors = data_graph.get_neg_sampled_neighbors(nodes, 5, 0) + + Raises: + TypeError: If `node_list` is not list or ndarray. + TypeError: If `neg_neighbor_num` is not integer. + TypeError: If `neg_neighbor_type` is not integer. + """ + return self._graph.get_neg_sampled_neighbors( + node_list, neg_neighbor_num, neg_neighbor_type).as_array() + @check_gnn_get_node_feature def get_node_feature(self, node_list, feature_types): """ @@ -110,4 +202,51 @@ class GraphData: """ if isinstance(node_list, list): node_list = np.array(node_list, dtype=np.int32) - return [t.as_array() for t in self._graph.get_node_feature(Tensor(node_list), feature_types)] + return [ + t.as_array() for t in self._graph.get_node_feature( + Tensor(node_list), + feature_types)] + + def graph_info(self): + """ + Get the meta information of the graph, including the number of nodes, the type of nodes, + the feature information of nodes, the number of edges, the type of edges, and the feature information of edges. + + Returns: + Dict: Meta information of the graph. The key is node_type, edge_type, node_num, edge_num, + node_feature_type and edge_feature_type. + """ + return self._graph.graph_info() + + @check_gnn_random_walk + def random_walk( + self, + target_nodes, + meta_path, + step_home_param=1.0, + step_away_param=1.0, + default_node=-1): + """ + Random walk in nodes. + + Args: + target_nodes (list[int]): Start node list in random walk + meta_path (list[int]): node type for each walk step + step_home_param (float): return hyper parameter in node2vec algorithm + step_away_param (float): inout hyper parameter in node2vec algorithm + default_node (int): default node if no more neighbors found + + Returns: + numpy.ndarray: array of nodes. + + Examples: + >>> import mindspore.dataset as ds + >>> data_graph = ds.GraphData('dataset_file', 2) + >>> nodes = data_graph.random_walk([1,2], [1,2,1,2,1]) + + Raises: + TypeError: If `target_nodes` is not list or ndarray. + TypeError: If `meta_path` is not list or ndarray. + """ + return self._graph.random_walk(target_nodes, meta_path, step_home_param, step_away_param, + default_node).as_array() diff --git a/mindspore/dataset/engine/iterators.py b/mindspore/dataset/engine/iterators.py index f58db32094..4946fb3252 100644 --- a/mindspore/dataset/engine/iterators.py +++ b/mindspore/dataset/engine/iterators.py @@ -38,43 +38,24 @@ def _cleanup(): def alter_tree(node): """Traversing the python Dataset tree/graph to perform some alteration to some specific nodes.""" - if not node.input: + if not node.children: return _alter_node(node) converted_children = [] - for input_op in node.input: + for input_op in node.children: converted_children.append(alter_tree(input_op)) - node.input = converted_children + node.children = converted_children return _alter_node(node) def _alter_node(node): - """Performing some alteration to a dataset node. A common alteration is to insert a node.""" - if isinstance(node, (de.TFRecordDataset, de.TextFileDataset)) and node.shuffle_level == de.Shuffle.GLOBAL: - # Remove the connection between the parent's node to the current node because we are inserting a node. - if node.output: - node.output.pop() - # Perform a fast scan for average rows per file - if isinstance(node, de.TFRecordDataset): - avg_rows_per_file = node.get_dataset_size(True) // len(node.dataset_files) - else: - avg_rows_per_file = node.get_dataset_size() // len(node.dataset_files) - - # Shuffle between 4 files with a minimum size of 10000 rows - new_shuffle = node.shuffle(max(avg_rows_per_file * 4, 10000)) - return new_shuffle - + """DEPRECATED""" + # Please check ccsrc/dataset/engine/opt for tree transformation. if isinstance(node, de.MapDataset): if node.python_multiprocessing: # Bootstrap can only be performed on a copy of the original dataset node. # Bootstrap on original dataset node will make all iterators share the same process pool node.iterator_bootstrap() - if node.columns_order is not None: - # Remove the connection between the parent's node to the current node because we are inserting a node. - if node.output: - node.output.pop() - - return node.project(node.columns_order) return node @@ -105,14 +86,14 @@ class Iterator: def __is_tree_node(self, node): """Check if a node is tree node.""" - if not node.input: - if len(node.output) > 1: + if not node.children: + if len(node.parent) > 1: return False - if len(node.output) > 1: + if len(node.parent) > 1: return False - for input_node in node.input: + for input_node in node.children: cls = self.__is_tree_node(input_node) if not cls: return False @@ -131,6 +112,8 @@ class Iterator: op_type = OpName.MINDRECORD elif isinstance(dataset, de.BatchDataset): op_type = OpName.BATCH + elif isinstance(dataset, de.BucketBatchByLengthDataset): + op_type = OpName.BUCKETBATCH elif isinstance(dataset, de.SyncWaitDataset): op_type = OpName.BARRIER elif isinstance(dataset, de.ZipDataset): @@ -165,6 +148,8 @@ class Iterator: op_type = OpName.MANIFEST elif isinstance(dataset, de.VOCDataset): op_type = OpName.VOC + elif isinstance(dataset, de.CocoDataset): + op_type = OpName.COCO elif isinstance(dataset, de.Cifar10Dataset): op_type = OpName.CIFAR10 elif isinstance(dataset, de.Cifar100Dataset): @@ -175,6 +160,10 @@ class Iterator: op_type = OpName.RANDOMDATA elif isinstance(dataset, de.TextFileDataset): op_type = OpName.TEXTFILE + elif isinstance(dataset, de.BuildVocabDataset): + op_type = OpName.BUILDVOCAB + elif isinstance(dataset, de.CLUEDataset): + op_type = OpName.CLUE else: raise ValueError("Unsupported DatasetOp") @@ -185,7 +174,7 @@ class Iterator: op_type = self.__get_dataset_type(node) c_node = self.depipeline.AddNodeToTree(op_type, node.get_args()) - for py_child in node.input: + for py_child in node.children: c_child = self.__convert_node_postorder(py_child) self.depipeline.AddChildToParentNode(c_child, c_node) @@ -195,7 +184,7 @@ class Iterator: """Recursively get batch node in the dataset tree.""" if isinstance(dataset, de.BatchDataset): return - for input_op in dataset.input: + for input_op in dataset.children: self.__batch_node(input_op, level + 1) @staticmethod @@ -205,11 +194,11 @@ class Iterator: ptr = hex(id(dataset)) for _ in range(level): logger.info("\t", end='') - if not dataset.input: + if not dataset.children: logger.info("-%s (%s)", name, ptr) else: logger.info("+%s (%s)", name, ptr) - for input_op in dataset.input: + for input_op in dataset.children: Iterator.__print_local(input_op, level + 1) def print(self): diff --git a/mindspore/dataset/engine/samplers.py b/mindspore/dataset/engine/samplers.py index 8951a1c4a0..b74874f9cf 100644 --- a/mindspore/dataset/engine/samplers.py +++ b/mindspore/dataset/engine/samplers.py @@ -22,7 +22,6 @@ User can also define custom sampler by extending from Sampler class. import numpy as np import mindspore._c_dataengine as cde - class Sampler: """ Base class for user defined sampler. @@ -44,10 +43,10 @@ class Sampler: >>> ds = ds.ImageFolderDatasetV2(path, sampler=ReverseSampler()) """ - def __init__(self): + def __init__(self, num_samples=None): self.dataset_size = 0 - self.num_samples = 0 self.child_sampler = None + self.num_samples = num_samples def __iter__(self): """ @@ -84,7 +83,8 @@ class Sampler: # Instance fetcher # Do not override this method! def create(self): - c_sampler = cde.PythonSampler(self) + num_samples = self.num_samples if self.num_samples is not None else 0 + c_sampler = cde.PythonSampler(num_samples, self) c_child_sampler = self.create_child() c_sampler.add_child(c_child_sampler) return c_sampler @@ -114,7 +114,9 @@ class Sampler: return self.child_sampler.is_sharded() - def get_dataset_size(self): + def get_num_samples(self): + if self.num_samples is None: + return None return self._get_indices().size @@ -124,8 +126,9 @@ class BuiltinSampler: User should not extend this class. """ - def __init__(self): + def __init__(self, num_samples=None): self.child_sampler = None + self.num_samples = num_samples def create(self): pass @@ -140,7 +143,12 @@ class BuiltinSampler: c_child_sampler = None if self.child_sampler is not None: c_child_sampler = self.child_sampler.create() + return c_child_sampler + def create_child_for_minddataset(self): + c_child_sampler = None + if self.child_sampler is not None: + c_child_sampler = self.child_sampler.create_for_minddataset() return c_child_sampler def is_shuffled(self): @@ -149,11 +157,61 @@ class BuiltinSampler: def is_sharded(self): raise NotImplementedError("Sampler must implement is_sharded.") - def get_dataset_size(self): + def get_num_samples(self): + """ + All samplers can contain a numeric num_samples value (or it could be set to None). + Child sampler can exist or be None. + if child sampler exists, then the child sampler count can be a numeric value or None. + Given these conditions, we need to output what the sampler count is for this sampler. + The following table shows the possible results from calling this function. + + .. list-table:: + :widths: 25 25 25 25 + :header-rows: 1 + + * - child sampler + - num_samples + - child_samples + - result + * - T + - x + - y + - min(x, y) + * - T + - x + - None + - x + * - T + - None + - y + - y + * - T + - None + - None + - None + * - None + - x + - n/a + - x + * - None + - None + - n/a + - None + + Returns: + int, The number of samples, or None + """ if self.child_sampler is not None: - return self.child_sampler.get_dataset_size() + child_samples = self.child_sampler.get_num_samples() + if self.num_samples is not None: + if child_samples is not None: + return min(self.num_samples, child_samples) + + return self.num_samples - return None + return child_samples + + return self.num_samples class DistributedSampler(BuiltinSampler): @@ -164,6 +222,7 @@ class DistributedSampler(BuiltinSampler): num_shards (int): Number of shards to divide the dataset into. shard_id (int): Shard ID of the current shard within num_shards. shuffle (bool, optional): If true, the indices are shuffled (default=True). + num_samples (int, optional): The number of samples to draw (default=None, all elements). Examples: >>> import mindspore.dataset as ds @@ -180,7 +239,7 @@ class DistributedSampler(BuiltinSampler): ValueError: If shuffle is not a boolean value. """ - def __init__(self, num_shards, shard_id, shuffle=True): + def __init__(self, num_shards, shard_id, shuffle=True, num_samples=None): if num_shards <= 0: raise ValueError("num_shards should be a positive integer value, but got num_shards={}".format(num_shards)) @@ -190,20 +249,32 @@ class DistributedSampler(BuiltinSampler): if not isinstance(shuffle, bool): raise ValueError("shuffle should be a boolean value, but got shuffle={}".format(shuffle)) + if num_samples is not None: + if num_samples <= 0: + raise ValueError("num_samples should be a positive integer " + "value, but got num_samples={}".format(num_samples)) + self.num_shards = num_shards self.shard_id = shard_id self.shuffle = shuffle self.seed = 0 - super().__init__() + super().__init__(num_samples) def create(self): + num_samples = self.num_samples if self.num_samples is not None else 0 # each time user calls create_dict_iterator() (to do repeat) sampler would get a different seed to shuffle self.seed += 1 - c_sampler = cde.DistributedSampler(self.num_shards, self.shard_id, self.shuffle, self.seed) + c_sampler = cde.DistributedSampler(num_samples, self.num_shards, self.shard_id, self.shuffle, self.seed) c_child_sampler = self.create_child() c_sampler.add_child(c_child_sampler) return c_sampler + def create_for_minddataset(self): + c_sampler = cde.MindrecordDistributedSampler(self.num_shards, self.shard_id, self.shuffle, self.seed) + c_child_sampler = self.create_child_for_minddataset() + c_sampler.add_child(c_child_sampler) + return c_sampler + def is_shuffled(self): if self.child_sampler is None: return self.shuffle @@ -226,6 +297,7 @@ class PKSampler(BuiltinSampler): num_class (int, optional): Number of classes to sample (default=None, all classes). shuffle (bool, optional): If true, the class IDs are shuffled (default=False). class_column (str, optional): Name of column to classify dataset(default='label'), for MindDataset. + num_samples (int, optional): The number of samples to draw (default=None, all elements). Examples: >>> import mindspore.dataset as ds @@ -242,23 +314,29 @@ class PKSampler(BuiltinSampler): ValueError: If shuffle is not boolean. """ - def __init__(self, num_val, num_class=None, shuffle=False, class_column='label'): + def __init__(self, num_val, num_class=None, shuffle=False, class_column='label', num_samples=None): if num_val <= 0: raise ValueError("num_val should be a positive integer value, but got num_val={}".format(num_val)) if num_class is not None: - raise NotImplementedError + raise NotImplementedError("Not support specify num_class") if not isinstance(shuffle, bool): raise ValueError("shuffle should be a boolean value, but got shuffle={}".format(shuffle)) + if num_samples is not None: + if num_samples <= 0: + raise ValueError("num_samples should be a positive integer " + "value, but got num_samples={}".format(num_samples)) + self.num_val = num_val self.shuffle = shuffle - self.class_column = class_column # work for minddataset - super().__init__() + self.class_column = class_column # work for minddataset + super().__init__(num_samples) def create(self): - c_sampler = cde.PKSampler(self.num_val, self.shuffle) + num_samples = self.num_samples if self.num_samples is not None else 0 + c_sampler = cde.PKSampler(num_samples, self.num_val, self.shuffle) c_child_sampler = self.create_child() c_sampler.add_child(c_child_sampler) return c_sampler @@ -275,12 +353,14 @@ class PKSampler(BuiltinSampler): return self.child_sampler.is_sharded() - def _create_for_minddataset(self): + def create_for_minddataset(self): if not self.class_column or not isinstance(self.class_column, str): raise ValueError("class_column should be a not empty string value, \ but got class_column={}".format(class_column)) - return cde.MindrecordPkSampler(self.num_val, self.class_column, self.shuffle) - + c_sampler = cde.MindrecordPkSampler(self.num_val, self.class_column, self.shuffle) + c_child_sampler = self.create_child_for_minddataset() + c_sampler.add_child(c_child_sampler) + return c_sampler class RandomSampler(BuiltinSampler): """ @@ -315,59 +395,25 @@ class RandomSampler(BuiltinSampler): self.deterministic = False self.replacement = replacement - self.num_samples = num_samples self.reshuffle_each_epoch = True - super().__init__() + super().__init__(num_samples) def create(self): - c_sampler = None - if self.num_samples is None: - c_sampler = cde.RandomSampler(self.replacement, self.reshuffle_each_epoch) - else: - c_sampler = cde.RandomSampler(self.replacement, self.reshuffle_each_epoch, self.num_samples) - + num_samples = self.num_samples if self.num_samples is not None else 0 + c_sampler = cde.RandomSampler(num_samples, self.replacement, self.reshuffle_each_epoch) c_child_sampler = self.create_child() c_sampler.add_child(c_child_sampler) return c_sampler - def is_shuffled(self): - return True - - def is_sharded(self): - if self.child_sampler is None: - return False - - return self.child_sampler.is_sharded() - - def get_dataset_size(self): - return self.num_samples - - -class SequentialSampler(BuiltinSampler): - """ - Samples the dataset elements sequentially, same as not having a sampler. - - Examples: - >>> import mindspore.dataset as ds - >>> - >>> dataset_dir = "path/to/imagefolder_directory" - >>> - >>> # creates a SequentialSampler - >>> sampler = ds.SequentialSampler() - >>> data = ds.ImageFolderDatasetV2(dataset_dir, num_parallel_workers=8, sampler=sampler) - """ - - def create(self): - c_sampler = cde.SequentialSampler() - c_child_sampler = self.create_child() + def create_for_minddataset(self): + num_samples = self.num_samples if self.num_samples is not None else 0 + c_sampler = cde.MindrecordRandomSampler(num_samples, self.replacement, self.reshuffle_each_epoch) + c_child_sampler = self.create_child_for_minddataset() c_sampler.add_child(c_child_sampler) return c_sampler def is_shuffled(self): - if self.child_sampler is None: - return False - - return self.child_sampler.is_shuffled() + return True def is_sharded(self): if self.child_sampler is None: @@ -376,51 +422,54 @@ class SequentialSampler(BuiltinSampler): return self.child_sampler.is_sharded() -class SubsetSampler(BuiltinSampler): +class SequentialSampler(BuiltinSampler): """ - Samples a subset of elements consecutively from a given index. + Samples the dataset elements sequentially, same as not having a sampler. Args: - start_index (int): Index to start sampling at. - subset_size (int): How many samples to include in this subset. + start_index (int, optional): Index to start sampling at. (dafault=None starts at first id) + num_samples (int, optional): Number of elements to sample (default=None, all elements). Examples: >>> import mindspore.dataset as ds >>> >>> dataset_dir = "path/to/imagefolder_directory" >>> - >>> # creates a SubsetSampler, will sample the next 5 images from the 100th image. - >>> sampler = ds.SubsetSampler(100, 5) + >>> # creates a SequentialSampler + >>> sampler = ds.SequentialSampler() >>> data = ds.ImageFolderDatasetV2(dataset_dir, num_parallel_workers=8, sampler=sampler) - - Raises: - ValueError: If start_index is not a positive int. - ValueError: If subset_size is not a positive int. """ - def __init__(self, start_index, subset_size): - if not isinstance(start_index, int): - raise ValueError("start_index should be an int.") - - if start_index < 0: - raise ValueError("start_index should not be negative.") - - if not isinstance(subset_size, int): - raise ValueError("start_index should be an int") + def __init__(self, start_index=None, num_samples=None): + if num_samples is not None: + if num_samples <= 0: + raise ValueError("num_samples should be a positive integer " + "value, but got num_samples={}".format(num_samples)) - if subset_size < 0: - raise ValueError("subset_size should not be negative.") + if start_index is not None: + if start_index < 0: + raise ValueError("start_index should be a positive integer " + "value or 0, but got start_index={}".format(start_index)) self.start_index = start_index - self.subset_size = subset_size - super().__init__() + super().__init__(num_samples) def create(self): - c_sampler = cde.SubsetSampler(self.start_index, self.subset_size) + start_index = self.start_index if self.start_index is not None else 0 + num_samples = self.num_samples if self.num_samples is not None else 0 + c_sampler = cde.SequentialSampler(num_samples, start_index) c_child_sampler = self.create_child() c_sampler.add_child(c_child_sampler) return c_sampler + def create_for_minddataset(self): + start_index = self.start_index if self.start_index is not None else 0 + num_samples = self.num_samples if self.num_samples is not None else 0 + c_sampler = cde.MindrecordSequentialSampler(num_samples, start_index) + c_child_sampler = self.create_child_for_minddataset() + c_sampler.add_child(c_child_sampler) + return c_sampler + def is_shuffled(self): if self.child_sampler is None: return False @@ -433,9 +482,6 @@ class SubsetSampler(BuiltinSampler): return self.child_sampler.is_sharded() - def get_dataset_size(self): - return self.subset_size - class SubsetRandomSampler(BuiltinSampler): """ @@ -443,6 +489,7 @@ class SubsetRandomSampler(BuiltinSampler): Args: indices (list[int]): A sequence of indices. + num_samples (int, optional): Number of elements to sample (default=None, all elements). Examples: >>> import mindspore.dataset as ds @@ -456,15 +503,21 @@ class SubsetRandomSampler(BuiltinSampler): >>> data = ds.ImageFolderDatasetV2(dataset_dir, num_parallel_workers=8, sampler=sampler) """ - def __init__(self, indices): + def __init__(self, indices, num_samples=None): + if num_samples is not None: + if num_samples <= 0: + raise ValueError("num_samples should be a positive integer " + "value, but got num_samples={}".format(num_samples)) + if not isinstance(indices, list): indices = [indices] self.indices = indices - super().__init__() + super().__init__(num_samples) def create(self): - c_sampler = cde.SubsetRandomSampler(self.indices) + num_samples = self.num_samples if self.num_samples is not None else 0 + c_sampler = cde.SubsetRandomSampler(num_samples, self.indices) c_child_sampler = self.create_child() c_sampler.add_child(c_child_sampler) return c_sampler @@ -478,12 +531,18 @@ class SubsetRandomSampler(BuiltinSampler): return self.child_sampler.is_sharded() - def _create_for_minddataset(self): - return cde.MindrecordSubsetRandomSampler(self.indices) + def create_for_minddataset(self): + c_sampler = cde.MindrecordSubsetRandomSampler(self.indices) + c_child_sampler = self.create_child_for_minddataset() + c_sampler.add_child(c_child_sampler) + return c_sampler + def get_num_samples(self): + num_samples = super().get_num_samples() + if num_samples is None: + return len(self.indices) - def get_dataset_size(self): - return len(self.indices) + return min(len(self.indices), num_samples) class WeightedRandomSampler(BuiltinSampler): @@ -492,8 +551,8 @@ class WeightedRandomSampler(BuiltinSampler): Args: weights (list[float]): A sequence of weights, not necessarily summing up to 1. - num_samples (int): Number of elements to sample. - replacement (bool, optional): If True, put the sample ID back for the next draw (default=True). + num_samples (int, optional): Number of elements to sample (default=None, all elements). + replacement (bool): If True, put the sample ID back for the next draw (default=True). Examples: >>> import mindspore.dataset as ds @@ -511,24 +570,25 @@ class WeightedRandomSampler(BuiltinSampler): ValueError: If replacement is not boolean. """ - def __init__(self, weights, num_samples, replacement=True): + def __init__(self, weights, num_samples=None, replacement=True): if not isinstance(weights, list): weights = [weights] - if num_samples <= 0: - raise ValueError("num_samples should be a positive integer " - "value, but got num_samples={}".format(num_samples)) + if num_samples is not None: + if num_samples <= 0: + raise ValueError("num_samples should be a positive integer " + "value, but got num_samples={}".format(num_samples)) if not isinstance(replacement, bool): raise ValueError("replacement should be a boolean value, but got replacement={}".format(replacement)) self.weights = weights - self.num_samples = num_samples self.replacement = replacement - super().__init__() + super().__init__(num_samples) def create(self): - c_sampler = cde.WeightedRandomSampler(self.weights, self.num_samples, self.replacement) + num_samples = self.num_samples if self.num_samples is not None else 0 + c_sampler = cde.WeightedRandomSampler(num_samples, self.weights, self.replacement) c_child_sampler = self.create_child() c_sampler.add_child(c_child_sampler) return c_sampler @@ -541,6 +601,3 @@ class WeightedRandomSampler(BuiltinSampler): return False return self.child_sampler.is_sharded() - - def get_dataset_size(self): - return self.num_samples diff --git a/mindspore/dataset/engine/serializer_deserializer.py b/mindspore/dataset/engine/serializer_deserializer.py index 688ef16753..833f660f16 100644 --- a/mindspore/dataset/engine/serializer_deserializer.py +++ b/mindspore/dataset/engine/serializer_deserializer.py @@ -156,17 +156,37 @@ def traverse(node): serialize_operations(node_repr, k, v) elif k == 'sampler': serialize_sampler(node_repr, v) + elif k == 'padded_sample' and v: + v1 = {key: value for key, value in v.items() if not isinstance(value, bytes)} + node_repr[k] = json.dumps(v1, indent=2) + # return schema json str if its type is mindspore.dataset.Schema + elif k == 'schema' and isinstance(v, de.Schema): + node_repr[k] = v.to_json() elif k in set(['schema', 'dataset_files', 'dataset_dir', 'schema_file_path']): expand_path(node_repr, k, v) else: node_repr[k] = v + # If a sampler exists in this node, then the following 4 arguments must be set to None: + # num_samples, shard_id, num_shards, shuffle + # These arguments get moved into the sampler itself, so they are no longer needed to + # be set at the dataset level. + if 'sampler' in node_args.keys(): + if 'num_samples' in node_repr.keys(): + node_repr['num_samples'] = None + if 'shuffle' in node_repr.keys(): + node_repr['shuffle'] = None + if 'num_shards' in node_repr.keys(): + node_repr['num_shards'] = None + if 'shard_id' in node_repr.keys(): + node_repr['shard_id'] = None + # Leaf node doesn't have input attribute. - if not node.input: + if not node.children: return node_repr # Recursively traverse the child and assign it to the current node_repr['children']. - for child in node.input: + for child in node.children: node_repr["children"].append(traverse(child)) return node_repr @@ -206,11 +226,11 @@ def construct_pipeline(node): # Instantiate python Dataset object based on the current dictionary element dataset = create_node(node) # Initially it is not connected to any other object. - dataset.input = [] + dataset.children = [] # Construct the children too and add edge between the children and parent. for child in node['children']: - dataset.input.append(construct_pipeline(child)) + dataset.children.append(construct_pipeline(child)) return dataset @@ -285,6 +305,12 @@ def create_node(node): node.get('num_samples'), node.get('num_parallel_workers'), node.get('shuffle'), node.get('decode'), sampler, node.get('num_shards'), node.get('shard_id')) + elif dataset_op == 'CocoDataset': + sampler = construct_sampler(node.get('sampler')) + pyobj = pyclass(node['dataset_dir'], node.get('annotation_file'), node.get('task'), node.get('num_samples'), + node.get('num_parallel_workers'), node.get('shuffle'), node.get('decode'), sampler, + node.get('num_shards'), node.get('shard_id')) + elif dataset_op == 'CelebADataset': sampler = construct_sampler(node.get('sampler')) pyobj = pyclass(node['dataset_dir'], node.get('num_parallel_workers'), node.get('shuffle'), diff --git a/mindspore/dataset/engine/validators.py b/mindspore/dataset/engine/validators.py index 049931c80e..005f7072aa 100644 --- a/mindspore/dataset/engine/validators.py +++ b/mindspore/dataset/engine/validators.py @@ -19,10 +19,12 @@ import inspect as ins import os from functools import wraps from multiprocessing import cpu_count + import numpy as np from mindspore._c_expression import typing -from . import samplers + from . import datasets +from . import samplers INT32_MAX = 2147483647 valid_detype = [ @@ -31,169 +33,6 @@ valid_detype = [ ] -def check(method): - """Check the function parameters and return the function .""" - func_name = method.__name__ - # Required parameter - req_param_int = [] - req_param_bool = [] - # Non-required parameter - nreq_param_int = [] - nreq_param_bool = [] - - if func_name in 'repeat': - nreq_param_int = ['count', 'prefetch_size'] - - if func_name in 'take': - req_param_int = ['count'] - nreq_param_int = ['prefetch_size'] - - elif func_name in 'shuffle': - req_param_int = ['buffer_size'] - nreq_param_bool = ['reshuffle_each_iteration'] - nreq_param_int = ['prefetch_size', 'seed'] - - elif func_name in 'batch': - req_param_int = ['batch_size'] - nreq_param_int = ['num_parallel_workers', 'prefetch_size'] - nreq_param_bool = ['drop_remainder'] - - elif func_name in ('zip', 'filter', 'cache', 'rename', 'project'): - nreq_param_int = ['prefetch_size'] - - elif func_name in ('map', '__init__'): - nreq_param_int = ['num_parallel_workers', 'prefetch_size', 'seed'] - nreq_param_bool = ['block_reader'] - - @wraps(method) - def wrapper(*args, **kwargs): - - def _make_key(): - sig = ins.signature(method) - params = sig.parameters - keys = list(params.keys()) - param_dic = dict() - for name, value in enumerate(args): - param_dic[keys[name]] = value - param_dic.update(zip(params.keys(), args)) - param_dic.update(kwargs) - - for name, value in params.items(): - if name not in param_dic: - param_dic[name] = value.default - return param_dic - - # check type - def _check_param_type(arg, param_name, param_type=None): - if param_type is not None and not isinstance(arg, param_type): - raise ValueError( - "The %s function %s type error!" % (func_name, param_name)) - - # check range - def _check_param_range(arg, param_name): - if isinstance(arg, int) and param_name == "seed" and ( - arg < 0 or arg > 2147483647): - raise ValueError( - "The %s function %s exceeds the boundary!" % ( - func_name, param_name)) - if isinstance(arg, int) and param_name == "count" and ((arg <= 0 and arg != -1) or arg > 2147483647): - raise ValueError( - "The %s function %s exceeds the boundary!" % ( - func_name, param_name)) - if isinstance(arg, int) and param_name == "prefetch_size" and ( - arg <= 0 or arg > 1024): - raise ValueError( - "The %s function %s exceeds the boundary!" % ( - func_name, param_name)) - if isinstance(arg, int) and param_name == "num_parallel_workers" and ( - arg < 1 or arg > cpu_count()): - raise ValueError( - "The %s function %s exceeds the boundary(%s)!" % ( - func_name, param_name, cpu_count())) - if isinstance(arg, int) and param_name != "seed" \ - and param_name != "count" and param_name != "prefetch_size" \ - and param_name != "num_parallel_workers" and (arg < 1 or arg > 2147483647): - raise ValueError( - "The %s function %s exceeds the boundary!" % ( - func_name, param_name)) - - key = _make_key() - # check integer - for karg in req_param_int: - _check_param_type(key[karg], karg, int) - _check_param_range(key[karg], karg) - for karg in nreq_param_int: - if karg in key: - if key[karg] is not None: - _check_param_type(key[karg], karg, int) - _check_param_range(key[karg], karg) - # check bool - for karg in req_param_bool: - _check_param_type(key[karg], karg, bool) - for karg in nreq_param_bool: - if karg in key: - if key[karg] is not None: - _check_param_type(key[karg], karg, bool) - - if func_name in '__init__': - if 'columns_list' in key.keys(): - columns_list = key['columns_list'] - if columns_list is not None: - _check_param_type(columns_list, 'columns_list', list) - - if 'columns' in key.keys(): - columns = key['columns'] - if columns is not None: - _check_param_type(columns, 'columns', list) - - if 'partitions' in key.keys(): - partitions = key['partitions'] - if partitions is not None: - _check_param_type(partitions, 'partitions', list) - - if 'schema' in key.keys(): - schema = key['schema'] - if schema is not None: - check_filename(schema) - if not os.path.isfile(schema) or not os.access(schema, os.R_OK): - raise ValueError( - "The file %s does not exist or permission denied!" % schema) - - if 'dataset_dir' in key.keys(): - dataset_dir = key['dataset_dir'] - if dataset_dir is not None: - if not os.path.isdir(dataset_dir) or not os.access(dataset_dir, os.R_OK): - raise ValueError( - "The folder %s does not exist or permission denied!" % dataset_dir) - - if 'dataset_files' in key.keys(): - dataset_files = key['dataset_files'] - if not dataset_files: - raise ValueError( - "The dataset file does not exists!") - if dataset_files is not None: - _check_param_type(dataset_files, 'dataset_files', list) - for file in dataset_files: - if not os.path.isfile(file) or not os.access(file, os.R_OK): - raise ValueError( - "The file %s does not exist or permission denied!" % file) - - if 'dataset_file' in key.keys(): - dataset_file = key['dataset_file'] - if not dataset_file: - raise ValueError( - "The dataset file does not exists!") - check_filename(dataset_file) - if dataset_file is not None: - if not os.path.isfile(dataset_file) or not os.access(dataset_file, os.R_OK): - raise ValueError( - "The file %s does not exist or permission denied!" % dataset_file) - - return method(*args, **kwargs) - - return wrapper - - def check_valid_detype(type_): if type_ not in valid_detype: raise ValueError("Unknown column type") @@ -211,7 +50,7 @@ def check_filename(path): Exception: when error """ if not isinstance(path, str): - raise ValueError("path: {} is not string".format(path)) + raise TypeError("path: {} is not string".format(path)) filename = os.path.basename(path) # '#', ':', '|', ' ', '}', '"', '+', '!', ']', '[', '\\', '`', @@ -283,8 +122,8 @@ def check_num_parallel_workers(value): def check_num_samples(value): check_type(value, 'num_samples', int) - if value <= 0: - raise ValueError("num_samples must be greater than 0!") + if value < 0: + raise ValueError("num_samples cannot be less than 0!") def check_dataset_dir(dataset_dir): @@ -304,7 +143,7 @@ def check_sampler_shuffle_shard_options(param_dict): num_shards, shard_id = param_dict.get('num_shards'), param_dict.get('shard_id') if sampler is not None and not isinstance(sampler, (samplers.BuiltinSampler, samplers.Sampler)): - raise ValueError("sampler is not a valid Sampler type.") + raise TypeError("sampler is not a valid Sampler type.") if sampler is not None: if shuffle is not None: @@ -323,6 +162,27 @@ def check_sampler_shuffle_shard_options(param_dict): raise RuntimeError("shard_id is specified but num_shards is not.") +def check_padding_options(param_dict): + """ check for valid padded_sample and num_padded of padded samples""" + columns_list = param_dict.get('columns_list') + block_reader = param_dict.get('block_reader') + padded_sample, num_padded = param_dict.get('padded_sample'), param_dict.get('num_padded') + if padded_sample is not None: + if num_padded is None: + raise RuntimeError("padded_sample is specified and requires num_padded as well.") + if num_padded < 0: + raise ValueError("num_padded is invalid, num_padded={}.".format(num_padded)) + if columns_list is None: + raise RuntimeError("padded_sample is specified and requires columns_list as well.") + for column in columns_list: + if column not in padded_sample: + raise ValueError("padded_sample cannot match columns_list.") + if block_reader: + raise RuntimeError("block_reader and padded_sample cannot be specified at the same time.") + + if padded_sample is None and num_padded is not None: + raise RuntimeError("num_padded is specified but padded_sample is not.") + def check_imagefolderdatasetv2(method): """A wrapper that wrap a parameter checker to the original Dataset(ImageFolderDatasetV2).""" @@ -468,13 +328,13 @@ def check_vocdataset(method): if task is None: raise ValueError("task is not provided.") if not isinstance(task, str): - raise ValueError("task is not str type.") + raise TypeError("task is not str type.") # check mode; required argument mode = param_dict.get('mode') if mode is None: raise ValueError("mode is not provided.") if not isinstance(mode, str): - raise ValueError("mode is not str type.") + raise TypeError("mode is not str type.") imagesets_file = "" if task == "Segmentation": @@ -501,6 +361,52 @@ def check_vocdataset(method): return new_method +def check_cocodataset(method): + """A wrapper that wrap a parameter checker to the original Dataset(CocoDataset).""" + + @wraps(method) + def new_method(*args, **kwargs): + param_dict = make_param_dict(method, args, kwargs) + + nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id'] + nreq_param_bool = ['shuffle', 'decode'] + + # check dataset_dir; required argument + dataset_dir = param_dict.get('dataset_dir') + if dataset_dir is None: + raise ValueError("dataset_dir is not provided.") + check_dataset_dir(dataset_dir) + + # check annotation_file; required argument + annotation_file = param_dict.get('annotation_file') + if annotation_file is None: + raise ValueError("annotation_file is not provided.") + check_dataset_file(annotation_file) + + # check task; required argument + task = param_dict.get('task') + if task is None: + raise ValueError("task is not provided.") + if not isinstance(task, str): + raise TypeError("task is not str type.") + + if task not in {'Detection', 'Stuff', 'Panoptic', 'Keypoint'}: + raise ValueError("Invalid task type") + + check_param_type(nreq_param_int, param_dict, int) + + check_param_type(nreq_param_bool, param_dict, bool) + + sampler = param_dict.get('sampler') + if sampler is not None and isinstance(sampler, samplers.PKSampler): + raise ValueError("CocoDataset doesn't support PKSampler") + check_sampler_shuffle_shard_options(param_dict) + + return method(*args, **kwargs) + + return new_method + + def check_celebadataset(method): """A wrapper that wrap a parameter checker to the original Dataset(CelebADataset).""" @@ -549,9 +455,10 @@ def check_minddataset(method): def new_method(*args, **kwargs): param_dict = make_param_dict(method, args, kwargs) - nreq_param_int = ['num_samples', 'num_parallel_workers', 'seed', 'num_shards', 'shard_id'] + nreq_param_int = ['num_samples', 'num_parallel_workers', 'seed', 'num_shards', 'shard_id', 'num_padded'] nreq_param_list = ['columns_list'] nreq_param_bool = ['block_reader'] + nreq_param_dict = ['padded_sample'] # check dataset_file; required argument dataset_file = param_dict.get('dataset_file') @@ -569,12 +476,11 @@ def check_minddataset(method): check_param_type(nreq_param_bool, param_dict, bool) - num_shards, shard_id = param_dict.get('num_shards'), param_dict.get('shard_id') - if (num_shards is not None and shard_id is None) or (num_shards is None and shard_id is not None): - raise ValueError("num_shards and shard_id need to be set or not set at the same time") + check_param_type(nreq_param_dict, param_dict, dict) check_sampler_shuffle_shard_options(param_dict) + check_padding_options(param_dict) return method(*args, **kwargs) return new_method @@ -599,6 +505,8 @@ def check_generatordataset(method): # check column_names or schema; required argument column_names = param_dict.get('column_names') + if column_names is not None: + check_columns(column_names, "column_names") schema = param_dict.get('schema') if column_names is None and schema is None: raise ValueError("Neither columns_names not schema are provided.") @@ -648,7 +556,7 @@ def check_generatordataset(method): def check_batch_size(batch_size): if not (isinstance(batch_size, int) or (callable(batch_size))): - raise ValueError("batch_size should either be an int or a callable.") + raise TypeError("batch_size should either be an int or a callable.") if callable(batch_size): sig = ins.signature(batch_size) if len(sig.parameters) != 1: @@ -683,7 +591,68 @@ def check_pad_info(key, val): check_type(dim, "dim in pad_shape", int) assert dim > 0, "pad shape should be positive integers" if val[1] is not None: - check_type(val[1], "pad_value", (int, float)) + check_type(val[1], "pad_value", (int, float, str, bytes)) + + +def check_bucket_batch_by_length(method): + """check the input arguments of bucket_batch_by_length.""" + + @wraps(method) + def new_method(*args, **kwargs): + param_dict = make_param_dict(method, args, kwargs) + + nreq_param_list = ['column_names', 'bucket_boundaries', 'bucket_batch_sizes'] + check_param_type(nreq_param_list, param_dict, list) + + # check column_names: must be list of string. + column_names = param_dict.get("column_names") + all_string = all(isinstance(item, str) for item in column_names) + if not all_string: + raise TypeError("column_names should be a list of str.") + + element_length_function = param_dict.get("element_length_function") + if element_length_function is None and len(column_names) != 1: + raise ValueError("If element_length_function is not specified, exactly one column name should be passed.") + + # check bucket_boundaries: must be list of int, positive and strictly increasing + bucket_boundaries = param_dict.get('bucket_boundaries') + + if not bucket_boundaries: + raise ValueError("bucket_boundaries cannot be empty.") + + all_int = all(isinstance(item, int) for item in bucket_boundaries) + if not all_int: + raise TypeError("bucket_boundaries should be a list of int.") + + all_non_negative = all(item >= 0 for item in bucket_boundaries) + if not all_non_negative: + raise ValueError("bucket_boundaries cannot contain any negative numbers.") + + for i in range(len(bucket_boundaries) - 1): + if not bucket_boundaries[i + 1] > bucket_boundaries[i]: + raise ValueError("bucket_boundaries should be strictly increasing.") + + # check bucket_batch_sizes: must be list of int and positive + bucket_batch_sizes = param_dict.get('bucket_batch_sizes') + if len(bucket_batch_sizes) != len(bucket_boundaries) + 1: + raise ValueError("bucket_batch_sizes must contain one element more than bucket_boundaries.") + + all_int = all(isinstance(item, int) for item in bucket_batch_sizes) + if not all_int: + raise TypeError("bucket_batch_sizes should be a list of int.") + + all_non_negative = all(item >= 0 for item in bucket_batch_sizes) + if not all_non_negative: + raise ValueError("bucket_batch_sizes cannot contain any negative numbers.") + + if param_dict.get('pad_info') is not None: + check_type(param_dict["pad_info"], "pad_info", dict) + for k, v in param_dict.get('pad_info').items(): + check_pad_info(k, v) + + return method(*args, **kwargs) + + return new_method def check_batch(method): @@ -737,6 +706,7 @@ def check_batch(method): def check_sync_wait(method): """check the input arguments of sync_wait.""" + @wraps(method) def new_method(*args, **kwargs): param_dict = make_param_dict(method, args, kwargs) @@ -804,7 +774,7 @@ def check_filter(method): param_dict = make_param_dict(method, args, kwargs) predicate = param_dict.get("predicate") if not callable(predicate): - raise ValueError("Predicate should be a python function or a callable python object.") + raise TypeError("Predicate should be a python function or a callable python object.") nreq_param_int = ['num_parallel_workers'] check_param_type(nreq_param_int, param_dict, int) @@ -896,7 +866,7 @@ def check_zip_dataset(method): raise ValueError("datasets is not provided.") if not isinstance(ds, (tuple, datasets.Dataset)): - raise ValueError("datasets is not tuple or of type Dataset.") + raise TypeError("datasets is not tuple or of type Dataset.") return method(*args, **kwargs) @@ -916,7 +886,7 @@ def check_concat(method): raise ValueError("datasets is not provided.") if not isinstance(ds, (list, datasets.Dataset)): - raise ValueError("datasets is not list or of type Dataset.") + raise TypeError("datasets is not list or of type Dataset.") return method(*args, **kwargs) @@ -995,7 +965,7 @@ def check_add_column(method): de_type = param_dict.get("de_type") if de_type is not None: if not isinstance(de_type, typing.Type) and not check_valid_detype(de_type): - raise ValueError("Unknown column type.") + raise TypeError("Unknown column type.") else: raise TypeError("Expected non-empty string.") @@ -1009,6 +979,41 @@ def check_add_column(method): return new_method +def check_cluedataset(method): + """A wrapper that wrap a parameter checker to the original Dataset(CLUEDataset).""" + + @wraps(method) + def new_method(*args, **kwargs): + param_dict = make_param_dict(method, args, kwargs) + + nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id'] + + # check dataset_files; required argument + dataset_files = param_dict.get('dataset_files') + if dataset_files is None: + raise ValueError("dataset_files is not provided.") + if not isinstance(dataset_files, (str, list)): + raise TypeError("dataset_files should be of type str or a list of strings.") + + # check task + task_param = param_dict.get('task') + if task_param not in ['AFQMC', 'TNEWS', 'IFLYTEK', 'CMNLI', 'WSC', 'CSL']: + raise ValueError("task should be AFQMC, TNEWS, IFLYTEK, CMNLI, WSC or CSL") + + # check usage + usage_param = param_dict.get('usage') + if usage_param not in ['train', 'test', 'eval']: + raise ValueError("usage should be train, test or eval") + + check_param_type(nreq_param_int, param_dict, int) + + check_sampler_shuffle_shard_options(param_dict) + + return method(*args, **kwargs) + + return new_method + + def check_textfiledataset(method): """A wrapper that wrap a parameter checker to the original Dataset(TextFileDataset).""" @@ -1130,6 +1135,36 @@ def check_gnn_get_all_nodes(method): return new_method +def check_gnn_get_all_edges(method): + """A wrapper that wrap a parameter checker to the GNN `get_all_edges` function.""" + + @wraps(method) + def new_method(*args, **kwargs): + param_dict = make_param_dict(method, args, kwargs) + + # check node_type; required argument + check_type(param_dict.get("edge_type"), 'edge_type', int) + + return method(*args, **kwargs) + + return new_method + + +def check_gnn_get_nodes_from_edges(method): + """A wrapper that wrap a parameter checker to the GNN `get_nodes_from_edges` function.""" + + @wraps(method) + def new_method(*args, **kwargs): + param_dict = make_param_dict(method, args, kwargs) + + # check edge_list; required argument + check_gnn_list_or_ndarray(param_dict.get("edge_list"), 'edge_list') + + return method(*args, **kwargs) + + return new_method + + def check_gnn_get_all_neighbors(method): """A wrapper that wrap a parameter checker to the GNN `get_all_neighbors` function.""" @@ -1148,6 +1183,79 @@ def check_gnn_get_all_neighbors(method): return new_method +def check_gnn_get_sampled_neighbors(method): + """A wrapper that wrap a parameter checker to the GNN `get_sampled_neighbors` function.""" + + @wraps(method) + def new_method(*args, **kwargs): + param_dict = make_param_dict(method, args, kwargs) + + # check node_list; required argument + check_gnn_list_or_ndarray(param_dict.get("node_list"), 'node_list') + + # check neighbor_nums; required argument + neighbor_nums = param_dict.get("neighbor_nums") + check_gnn_list_or_ndarray(neighbor_nums, 'neighbor_nums') + if len(neighbor_nums) > 6: + raise ValueError("Wrong number of input members for {0}, should be less than or equal to 6, got {1}".format( + 'neighbor_nums', len(neighbor_nums))) + + # check neighbor_types; required argument + neighbor_types = param_dict.get("neighbor_types") + check_gnn_list_or_ndarray(neighbor_types, 'neighbor_types') + if len(neighbor_nums) > 6: + raise ValueError("Wrong number of input members for {0}, should be less than or equal to 6, got {1}".format( + 'neighbor_types', len(neighbor_types))) + + if len(neighbor_nums) != len(neighbor_types): + raise ValueError( + "The number of members of neighbor_nums and neighbor_types is inconsistent") + + return method(*args, **kwargs) + + return new_method + + +def check_gnn_get_neg_sampled_neighbors(method): + """A wrapper that wrap a parameter checker to the GNN `get_neg_sampled_neighbors` function.""" + + @wraps(method) + def new_method(*args, **kwargs): + param_dict = make_param_dict(method, args, kwargs) + + # check node_list; required argument + check_gnn_list_or_ndarray(param_dict.get("node_list"), 'node_list') + + # check neg_neighbor_num; required argument + check_type(param_dict.get("neg_neighbor_num"), 'neg_neighbor_num', int) + + # check neg_neighbor_type; required argument + check_type(param_dict.get("neg_neighbor_type"), + 'neg_neighbor_type', int) + + return method(*args, **kwargs) + + return new_method + + +def check_gnn_random_walk(method): + """A wrapper that wrap a parameter checker to the GNN `random_walk` function.""" + + @wraps(method) + def new_method(*args, **kwargs): + param_dict = make_param_dict(method, args, kwargs) + + # check node_list; required argument + check_gnn_list_or_ndarray(param_dict.get("target_nodes"), 'target_nodes') + + # check meta_path; required argument + check_gnn_list_or_ndarray(param_dict.get("meta_path"), 'meta_path') + + return method(*args, **kwargs) + + return new_method + + def check_aligned_list(param, param_name, membor_type): """Check whether the structure of each member of the list is the same.""" @@ -1205,3 +1313,48 @@ def check_gnn_get_node_feature(method): return method(*args, **kwargs) return new_method + + +def check_numpyslicesdataset(method): + """A wrapper that wrap a parameter checker to the original Dataset(NumpySlicesDataset).""" + + @wraps(method) + def new_method(*args, **kwargs): + param_dict = make_param_dict(method, args, kwargs) + + # check data; required argument + data = param_dict.get('data') + if not isinstance(data, (list, tuple, dict, np.ndarray)): + raise TypeError("Unsupported data type: {}, only support some common python data type, " + "like list, tuple, dict, and numpy array.".format(type(data))) + if isinstance(data, tuple) and not isinstance(data[0], (list, np.ndarray)): + raise TypeError("Unsupported data type: when input is tuple, only support some common python " + "data type, like tuple of lists and tuple of numpy arrays.") + if not data: + raise ValueError("Input data is empty.") + + # check column_names + column_names = param_dict.get('column_names') + if column_names is not None: + check_columns(column_names, "column_names") + + # check num of input column in column_names + column_num = 1 if isinstance(column_names, str) else len(column_names) + if isinstance(data, dict): + data_column = len(list(data.keys())) + if column_num != data_column: + raise ValueError("Num of input column names is {0}, but required is {1}." + .format(column_num, data_column)) + + elif isinstance(data, tuple): + if column_num != len(data): + raise ValueError("Num of input column names is {0}, but required is {1}." + .format(column_num, len(data))) + else: + if column_num != 1: + raise ValueError("Num of input column names is {0}, but required is {1} as data is list." + .format(column_num, 1)) + + return method(*args, **kwargs) + + return new_method diff --git a/mindspore/dataset/text/__init__.py b/mindspore/dataset/text/__init__.py index b90f912a98..7c43a2888c 100644 --- a/mindspore/dataset/text/__init__.py +++ b/mindspore/dataset/text/__init__.py @@ -11,9 +11,26 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """ -mindspore.dataset.text +This module is to support text processing for nlp. It includes two parts: +transforms and utils. transforms is a high performance +nlp text processing module which is developed with icu4c and cppjieba. +utils provides some general methods for nlp text processing. """ -from .transforms import Lookup, JiebaTokenizer, UnicodeCharTokenizer -from .utils import to_str, to_bytes, JiebaMode, Vocab +import platform +from .transforms import Lookup, JiebaTokenizer, UnicodeCharTokenizer, Ngram, WordpieceTokenizer, TruncateSequencePair, \ + ToNumber +from .utils import to_str, to_bytes, JiebaMode, Vocab, NormalizeForm + +__all__ = [ + "Lookup", "JiebaTokenizer", "UnicodeCharTokenizer", "Ngram", + "to_str", "to_bytes", "JiebaMode", "Vocab", "WordpieceTokenizer", "TruncateSequencePair", "ToNumber", + "PythonTokenizer" +] + +if platform.system().lower() != 'windows': + from .transforms import UnicodeScriptTokenizer, WhitespaceTokenizer, CaseFold, NormalizeUTF8, \ + RegexReplace, RegexTokenizer, BasicTokenizer, BertTokenizer, PythonTokenizer + + __all__.append(["UnicodeScriptTokenizer", "WhitespaceTokenizer", "CaseFold", "NormalizeUTF8", + "RegexReplace", "RegexTokenizer", "BasicTokenizer", "BertTokenizer", "NormalizeForm"]) diff --git a/mindspore/dataset/text/transforms.py b/mindspore/dataset/text/transforms.py index 79a5b744c9..fe970e06cc 100644 --- a/mindspore/dataset/text/transforms.py +++ b/mindspore/dataset/text/transforms.py @@ -12,25 +12,60 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -c transforms for all text related operators -""" +The module text.transforms is inheritted from _c_dataengine +which is implemented basing on icu4c and cppjieba in C++. +It's a high performance module to process nlp text. +Users can use Vocab to build their own dictionary, +use appropriate tokenizers to split sentences into different tokens, +and use Lookup to find the index of tokens in Vocab. + +.. Note:: + Constructor's arguments for every class in this module must be saved into the + class attributes (self.xxx) to support save() and load(). +Examples: + >>> import mindspore.dataset as ds + >>> import mindspore.dataset.text as text + >>> dataset_file = "path/to/text_file_path" + >>> # sentences as line data saved in a file + >>> dataset = ds.TextFileDataset(dataset_file, shuffle=False) + >>> # tokenize sentence to unicode characters + >>> tokenizer = text.UnicodeCharTokenizer() + >>> # load vocabulary form list + >>> vocab = text.Vocab.from_list(['深', '圳', '欢', '迎', '您']) + >>> # lookup is an operation for mapping tokens to ids + >>> lookup = text.Lookup(vocab) + >>> dataset = dataset.map(operations=[tokenizer, lookup]) + >>> for i in dataset.create_dict_iterator(): + >>> print(i) + >>> # if text line in dataset_file is: + >>> # 深圳欢迎您 + >>> # then the output will be: + >>> # {'text': array([0, 1, 2, 3, 4], dtype=int32)} +""" import os import re +import platform +import numpy as np import mindspore._c_dataengine as cde -from .utils import JiebaMode +from .utils import JiebaMode, NormalizeForm, to_str from .validators import check_lookup, check_jieba_add_dict, \ - check_jieba_add_word, check_jieba_init + check_jieba_add_word, check_jieba_init, check_ngram, check_pair_truncate, \ + check_to_number, check_python_tokenizer +from ..core.datatypes import mstype_to_detype class Lookup(cde.LookupOp): """ - Lookup operator that looks up a word to an id + Lookup operator that looks up a word to an id. + Args: - vocab(Vocab): a Vocab object - unknown(None,int): default id to lookup a word that is out of vocab + vocab(Vocab): a Vocab object. + unknown(int, optional): default id to lookup a word that is out of vocab. If no argument is passed, 1 will be + used to be the default id which is the convention for unknown_token . Otherwise, user is strongly + encouraged to pass in the id for (default=None). """ @check_lookup @@ -41,6 +76,33 @@ class Lookup(cde.LookupOp): super().__init__(vocab, unknown) +class Ngram(cde.NgramOp): + """ + TensorOp to generate n-gram from a 1-D string Tensor. + + Refer to https://en.wikipedia.org/wiki/N-gram#Examples for an overview of what n-gram is and how it works. + + Args: + n (list of int): n in n-gram, n >= 1. n is a list of positive integers, for e.g. n=[4,3], The result + would be a 4-gram followed by a 3-gram in the same tensor. If number of words is not enough to make up for + a n-gram, an empty string would be returned. For e.g. 3 grams on ["mindspore","best"] would result in an + empty string be produced. + left_pad (tuple, optional): ("pad_token", pad_width). Padding performed on left side of the sequence. pad_width + will be capped at n-1. left_pad=("_",2) would pad left side of the sequence with "__" (default=None). + right_pad (tuple, optional): ("pad_token", pad_width). Padding performed on right side of the sequence. + pad_width will be capped at n-1. right_pad=("-":2) would pad right side of the sequence with "--" + (default=None). + separator (str, optional): symbol used to join strings together. for e.g. if 2-gram the ["mindspore", "amazing"] + with separator="-" the result would be ["mindspore-amazing"] (default=None, which means whitespace is + used). + """ + + @check_ngram + def __init__(self, n, left_pad=None, right_pad=None, separator=None): + super().__init__(ngrams=n, l_pad_len=left_pad[1], r_pad_len=right_pad[1], l_pad_token=left_pad[0], + r_pad_token=right_pad[0], separator=separator) + + DE_C_INTER_JIEBA_MODE = { JiebaMode.MIX: cde.JiebaMode.DE_JIEBA_MIX, JiebaMode.MP: cde.JiebaMode.DE_JIEBA_MP, @@ -55,11 +117,12 @@ class JiebaTokenizer(cde.JiebaTokenizerOp): Args: hmm_path (str): the dictionary file is used by HMMSegment algorithm, the dictionary can be obtained on the official website of cppjieba. - mp_path(str): the dictionary file is used by MPSegment algorithm, + mp_path (str): the dictionary file is used by MPSegment algorithm, the dictionary can be obtained on the official website of cppjieba. - mode (Enum): [Default "MIX"], "MP" model will tokenize with MPSegment algorithm, + mode (JiebaMode, optional): "MP" model will tokenize with MPSegment algorithm, "HMM" mode will tokenize with Hiddel Markov Model Segment algorithm, - "MIX" model will tokenize with a mix of MPSegment and HMMSegment algorithm. + "MIX" model will tokenize with a mix of MPSegment and HMMSegment algorithm + (default="MIX"). """ @check_jieba_init @@ -73,13 +136,15 @@ class JiebaTokenizer(cde.JiebaTokenizerOp): @check_jieba_add_word def add_word(self, word, freq=None): """ - Add user defined word to JiebaTokenizer's dictionary + Add user defined word to JiebaTokenizer's dictionary. + Args: - word(required, string): The word to be added to the JiebaTokenizer instance. + word (str): The word to be added to the JiebaTokenizer instance. The added word will not be written into the built-in dictionary on disk. - freq(optional, int): The frequency of the word to be added, The higher the frequency, - the better change the word will be tokenized(default None, use default frequency). + freq (int, optional): The frequency of the word to be added, The higher the frequency, + the better change the word will be tokenized(default=None, use default frequency). """ + if freq is None: super().add_word(word, 0) else: @@ -88,15 +153,20 @@ class JiebaTokenizer(cde.JiebaTokenizerOp): @check_jieba_add_dict def add_dict(self, user_dict): """ - Add user defined word to JiebaTokenizer's dictionary + Add user defined word to JiebaTokenizer's dictionary. + Args: - user_dict(path/dict):Dictionary to be added, file path or Python dictionary, - Python Dict format: {word1:freq1, word2:freq2,...} - Jieba dictionary format : word(required), freq(optional), such as: - word1 freq1 - word2 - word3 freq3 + user_dict (str or dict): Dictionary to be added, file path or Python dictionary, + Python Dict format: {word1:freq1, word2:freq2,...}. + Jieba dictionary format : word(required), freq(optional), such as: + + .. code-block:: + + word1 freq1 + word2 + word3 freq3 """ + if isinstance(user_dict, str): self.__add_dict_py_file(user_dict) elif isinstance(user_dict, dict): @@ -153,3 +223,249 @@ class UnicodeCharTokenizer(cde.UnicodeCharTokenizerOp): """ Tokenize a scalar tensor of UTF-8 string to Unicode characters. """ + + +class WordpieceTokenizer(cde.WordpieceTokenizerOp): + """ + Tokenize scalar token or 1-D tokens to 1-D subword tokens. + + Args: + vocab (Vocab): a Vocab object. + suffix_indicator (str, optional): Used to show that the subword is the last part of a word(default='##'). + max_bytes_per_token (int, optional): Tokens exceeding this length will not be further split(default=100). + unknown_token (str, optional): When we can not found the token: if 'unknown_token' is empty string, + return the token directly, else return 'unknown_token'(default='[UNK]'). + """ + + def __init__(self, vocab, suffix_indicator='##', max_bytes_per_token=100, unknown_token='[UNK]'): + self.vocab = vocab + self.suffix_indicator = suffix_indicator + self.max_bytes_per_token = max_bytes_per_token + self.unknown_token = unknown_token + super().__init__(self.vocab, self.suffix_indicator, self.max_bytes_per_token, self.unknown_token) + + +if platform.system().lower() != 'windows': + class WhitespaceTokenizer(cde.WhitespaceTokenizerOp): + """ + Tokenize a scalar tensor of UTF-8 string on ICU defined whitespaces(such as: ' ', '\\\\t', '\\\\r', '\\\\n'). + """ + + + class UnicodeScriptTokenizer(cde.UnicodeScriptTokenizerOp): + """ + Tokenize a scalar tensor of UTF-8 string on Unicode script boundaries. + + Args: + keep_whitespace (bool, optional): If or not emit whitespace tokens (default=False). + """ + + def __init__(self, keep_whitespace=False): + self.keep_whitespace = keep_whitespace + super().__init__(self.keep_whitespace) + + + class CaseFold(cde.CaseFoldOp): + """ + Apply case fold operation on utf-8 string tensor. + """ + + + DE_C_INTER_NORMALIZE_FORM = { + NormalizeForm.NONE: cde.NormalizeForm.DE_NORMALIZE_NONE, + NormalizeForm.NFC: cde.NormalizeForm.DE_NORMALIZE_NFC, + NormalizeForm.NFKC: cde.NormalizeForm.DE_NORMALIZE_NFKC, + NormalizeForm.NFD: cde.NormalizeForm.DE_NORMALIZE_NFD, + NormalizeForm.NFKD: cde.NormalizeForm.DE_NORMALIZE_NFKD + } + + + class NormalizeUTF8(cde.NormalizeUTF8Op): + """ + Apply normalize operation on utf-8 string tensor. + + Args: + normalize_form (NormalizeForm, optional): Valid values are "NONE", "NFC", "NFKC", "NFD", "NFKD". + If set "NONE", will do nothing for input string tensor. + If set to any of "NFC", "NFKC", "NFD", "NFKD", will apply normalize operation(default="NFKC"). + See http://unicode.org/reports/tr15/ for details. + """ + + def __init__(self, normalize_form=NormalizeForm.NFKC): + self.normalize_form = DE_C_INTER_NORMALIZE_FORM[normalize_form] + super().__init__(self.normalize_form) + + + class RegexReplace(cde.RegexReplaceOp): + """ + Replace utf-8 string tensor with 'replace' according to regular expression 'pattern'. + + See http://userguide.icu-project.org/strings/regexp for support regex pattern. + + Args: + pattern(str): the regex expression patterns. + replace(str): the string to replace matched element. + replace_all(bool, optional): If False, only replace first matched element; + if True, replace all matched elements(default=True). + """ + + def __init__(self, pattern, replace, replace_all=True): + self.pattern = pattern + self.replace = replace + self.replace_all = replace_all + super().__init__(self.pattern, self.replace, self.replace_all) + + + class RegexTokenizer(cde.RegexTokenizerOp): + """ + Tokenize a scalar tensor of UTF-8 string by regex expression pattern. + + See http://userguide.icu-project.org/strings/regexp for support regex pattern. + + Args: + delim_pattern(str): The pattern of regex delimiters. + The original string will be split by matched elements. + keep_delim_pattern(str, optional): The string matched by 'delim_pattern' can be kept as a token + if it can be matched by 'keep_delim_pattern'. And the default value is empty str(''), + in this situation, delimiters will not kept as a output token(default=''). + """ + + def __init__(self, delim_pattern, keep_delim_pattern=''): + self.delim_pattern = delim_pattern + self.keep_delim_pattern = keep_delim_pattern + super().__init__(self.delim_pattern, self.keep_delim_pattern) + + + class BasicTokenizer(cde.BasicTokenizerOp): + """ + Tokenize a scalar tensor of UTF-8 string by specific rules. + + Args: + lower_case(bool, optional): If True, apply CaseFold, NormalizeUTF8(NFD mode), RegexReplace operation + on input text to make the text to lower case and strip accents characters; If False, only apply + NormalizeUTF8('normalization_form' mode) operation on input text(default=False). + keep_whitespace(bool, optional): If True, the whitespace will be kept in out tokens(default=False). + normalization_form(NormalizeForm, optional): Used to specify a specific normlaize mode, + only effective when 'lower_case' is False. See NormalizeUTF8 for details(default='NONE'). + preserve_unused_token(bool, optional): If True, do not split special tokens like + '[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]'(default=True). + """ + + def __init__(self, lower_case=False, keep_whitespace=False, + normalization_form=NormalizeForm.NONE, preserve_unused_token=True): + self.lower_case = lower_case + self.keep_whitespace = keep_whitespace + self.normalization_form = DE_C_INTER_NORMALIZE_FORM[normalization_form] + self.preserve_unused_token = preserve_unused_token + super().__init__(self.lower_case, self.keep_whitespace, + self.normalization_form, self.preserve_unused_token) + + + class BertTokenizer(cde.BertTokenizerOp): + """ + Tokenizer used for Bert text process. + + Args: + vocab(Vocab): a Vocab object. + suffix_indicator(str, optional): Used to show that the subword is the last part of a word(default='##'). + max_bytes_per_token(int, optional): Tokens exceeding this length will not be further split(default=100). + unknown_token(str, optional): When we can not found the token: if 'unknown_token' is empty string, + return the token directly, else return 'unknown_token'(default='[UNK]'). + lower_case(bool, optional): If True, apply CaseFold, NormalizeUTF8(NFD mode), RegexReplace operation + on input text to make the text to lower case and strip accents characters; If False, only apply + NormalizeUTF8('normalization_form' mode) operation on input text(default=False). + keep_whitespace(bool, optional): If True, the whitespace will be kept in out tokens(default=False). + normalization_form(NormalizeForm, optional): Used to specify a specific normlaize mode, + only effective when 'lower_case' is False. See NormalizeUTF8 for details(default='NONE'). + preserve_unused_token(bool, optional): If True, do not split special tokens like + '[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]'(default=True). + """ + + def __init__(self, vocab, suffix_indicator='##', max_bytes_per_token=100, + unknown_token='[UNK]', lower_case=False, keep_whitespace=False, + normalization_form=NormalizeForm.NONE, preserve_unused_token=True): + self.vocab = vocab + self.suffix_indicator = suffix_indicator + self.max_bytes_per_token = max_bytes_per_token + self.unknown_token = unknown_token + self.lower_case = lower_case + self.keep_whitespace = keep_whitespace + self.normalization_form = DE_C_INTER_NORMALIZE_FORM[normalization_form] + self.preserve_unused_token = preserve_unused_token + super().__init__(self.vocab, self.suffix_indicator, self.max_bytes_per_token, self.unknown_token, + self.lower_case, self.keep_whitespace, self.normalization_form, self.preserve_unused_token) + + +class TruncateSequencePair(cde.TruncateSequencePairOp): + """ + Truncate a pair of rank-1 tensors such that the total length is less than max_length. + + This operation takes two input tensors and returns two output Tenors. + + Args: + max_length(int): Maximum length required. + + Examples: + >>> # Data before + >>> # | col1 | col2 | + >>> # +---------+---------| + >>> # | [1,2,3] | [4,5] | + >>> # +---------+---------+ + >>> data = data.map(operations=TruncateSequencePair(4)) + >>> # Data after + >>> # | col1 | col2 | + >>> # +---------+---------+ + >>> # | [1,2] | [4,5] | + >>> # +---------+---------+ + """ + + @check_pair_truncate + def __init__(self, max_length): + super().__init__(max_length) + + +class ToNumber(cde.ToNumberOp): + """ + Tensor operation to convert every element of a string tensor to a number. + + Strings are casted according to the rules specified in the following links: + https://en.cppreference.com/w/cpp/string/basic_string/stof, + https://en.cppreference.com/w/cpp/string/basic_string/stoul, + except that any strings which represent negative numbers cannot be casted to an + unsigned integer type. + + Args: + data_type (mindspore.dtype): mindspore.dtype to be casted to. Must be + a numeric type. + + Raises: + RuntimeError: If strings are invalid to cast, or are out of range after being casted. + """ + + @check_to_number + def __init__(self, data_type): + data_type = mstype_to_detype(data_type) + self.data_type = str(data_type) + super().__init__(data_type) + + +class PythonTokenizer: + """ + Callable class to be used for user-defined string tokenizer. + Args: + tokenizer (Callable): Python function that takes a `str` and returns a list of `str` as tokens. + + Examples: + >>> def my_tokenizer(line): + >>> return line.split() + >>> data = data.map(operations=PythonTokenizer(my_tokenizer)) + """ + + @check_python_tokenizer + def __init__(self, tokenizer): + self.tokenizer = np.vectorize(lambda x: np.array(tokenizer(x), dtype='U'), signature='()->(n)') + + def __call__(self, in_array): + in_array = to_str(in_array) + tokens = self.tokenizer(in_array) + return tokens diff --git a/mindspore/dataset/text/utils.py b/mindspore/dataset/text/utils.py index f3f442f238..766de76e01 100644 --- a/mindspore/dataset/text/utils.py +++ b/mindspore/dataset/text/utils.py @@ -12,55 +12,113 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -Some basic function for nlp +The module text.utils provides some general methods for nlp text processing. +For example, you can use Vocab to build a dictionary, +use to_bytes and to_str to encode and decode strings into a specified format. """ from enum import IntEnum -import mindspore._c_dataengine as cde +import copy import numpy as np +import mindspore._c_dataengine as cde -from .validators import check_from_file, check_from_list, check_from_dict +from .validators import check_from_file, check_from_list, check_from_dict, check_from_dataset class Vocab(cde.Vocab): """ - Vocab object that is used for lookup word - Args: + Vocab object that is used to lookup a word. + + It contains a map that maps each word(str) to an id (int). """ - def __init__(self): - pass + @classmethod + @check_from_dataset + def from_dataset(cls, dataset, columns=None, freq_range=None, top_k=None, special_tokens=None, + special_first=None): + """ + Build a vocab from a dataset. + + This would collect all unique words in a dataset and return a vocab within + the frequency range specified by user in freq_range. User would be warned if no words fall into the frequency. + Words in vocab are ordered from highest frequency to lowest frequency. Words with the same frequency would be + ordered lexicographically. + + Args: + dataset(Dataset): dataset to build vocab from. + columns(list of str, optional): column names to get words from. It can be a list of column names. + (default=None, where all columns will be used. If any column isn't string type, will return error). + freq_range(tuple, optional): A tuple of integers (min_frequency, max_frequency). Words within the frequency + range would be kept. 0 <= min_frequency <= max_frequency <= total_words. min_frequency=0 is the same as + min_frequency=1. max_frequency > total_words is the same as max_frequency = total_words. + min_frequency/max_frequency can be None, which corresponds to 0/total_words separately + (default=None, all words are included). + top_k(int, optional): top_k > 0. Number of words to be built into vocab. top_k most frequent words are + taken. top_k is taken after freq_range. If not enough top_k, all words will be taken (default=None, + all words are included). + special_tokens(list, optional): a list of strings, each one is a special token. for example + special_tokens=["",""] (default=None, no special tokens will be added). + special_first(bool, optional): whether special_tokens will be prepended/appended to vocab. If special_tokens + is specified and special_first is set to None, special_tokens will be prepended (default=None). + + Returns: + Vocab, Vocab object built from dataset. + """ + + vocab = Vocab() + root = copy.deepcopy(dataset).build_vocab(vocab, columns, freq_range, top_k, special_tokens, special_first) + for d in root.create_dict_iterator(): + if d is not None: + raise ValueError("from_dataset should receive data other than None.") + return vocab @classmethod @check_from_list - def from_list(cls, word_list): + def from_list(cls, word_list, special_tokens=None, special_first=None): """ - build a vocab object from a list of word + Build a vocab object from a list of word. + Args: - word_list(list): a list of string where each element is a word + word_list(list): a list of string where each element is a word of type string. + special_tokens(list, optional): a list of strings, each one is a special token. for example + special_tokens=["",""] (default=None, no special tokens will be added). + special_first(bool, optional): whether special_tokens will be prepended/appended to vocab, If special_tokens + is specified and special_first is set to None, special_tokens will be prepended (default=None). """ - return super().from_list(word_list) + + return super().from_list(word_list, special_tokens, special_first) @classmethod @check_from_file - def from_file(cls, file_path, delimiter=None, vocab_size=None): + def from_file(cls, file_path, delimiter=None, vocab_size=None, special_tokens=None, special_first=None): """ - build a vocab object from a list of word + Build a vocab object from a list of word. + Args: - file_path(str): path to the file which contains the vocab list - delimiter(None, str): a delimiter to break up each line in file, the first element is taken to be the word - vocab_size(None, int): number of words to read from file_path + file_path (str): path to the file which contains the vocab list. + delimiter (str, optional): a delimiter to break up each line in file, the first element is taken to be + the word (default=None). + vocab_size (int, optional): number of words to read from file_path (default=None, all words are taken). + special_tokens (list, optional): a list of strings, each one is a special token. for example + special_tokens=["",""] (default=None, no special tokens will be added). + special_first (bool, optional): whether special_tokens will be prepended/appended to vocab, + If special_tokens is specified and special_first is set to None, + special_tokens will be prepended (default=None). """ - return super().from_file(file_path, delimiter, vocab_size) + + return super().from_file(file_path, delimiter, vocab_size, special_tokens, special_first) @classmethod @check_from_dict def from_dict(cls, word_dict): """ - build a vocab object from a dict. + Build a vocab object from a dict. + Args: - word_dict(dict): dict contains word, id pairs. id should start from 2 and continuous + word_dict (dict): dict contains word, id pairs where word should be str and id int. id is recommended to + start from 0 and be continuous. ValueError will be raised if id is negative. """ + return super().from_dict(word_dict) @@ -69,15 +127,15 @@ def to_str(array, encoding='utf8'): Convert numpy array of `bytes` to array of `str` by decoding each element based on charset `encoding`. Args: - array (numpy array): Array of type `bytes` representing strings. + array (numpy.ndarray): Array of type `bytes` representing strings. encoding (string): Indicating the charset for decoding. - Returns: - Numpy array of `str`. + Returns: + numpy.ndarray, numpy array of `str`. """ if not isinstance(array, np.ndarray): - raise ValueError('input should be a numpy array') + raise ValueError('input should be a numpy array.') return np.char.decode(array, encoding) @@ -87,20 +145,30 @@ def to_bytes(array, encoding='utf8'): Convert numpy array of `str` to array of `bytes` by encoding each element based on charset `encoding`. Args: - array (numpy array): Array of type `str` representing strings. - encoding (string): Indicating the charset for encoding. - Returns: - Numpy array of `bytes`. + array (numpy.ndarray): Array of type `str` representing strings. + encoding (str): Indicating the charset for encoding. + Returns: + numpy.ndarray, numpy array of `bytes`. """ if not isinstance(array, np.ndarray): - raise ValueError('input should be a numpy array') + raise ValueError('input should be a numpy array.') return np.char.encode(array, encoding) class JiebaMode(IntEnum): + """An enumeration for JiebaTokenizer, effective enumeration types are MIX, MP, HMM.""" MIX = 0 MP = 1 HMM = 2 + + +class NormalizeForm(IntEnum): + """An enumeration for NormalizeUTF8, effective enumeration types are NONE, NFC, NFKC, NFD, NFKD.""" + NONE = 0 + NFC = 1 + NFKC = 2 + NFD = 3 + NFKD = 4 diff --git a/mindspore/dataset/text/validators.py b/mindspore/dataset/text/validators.py index 479b90d1f0..afab8665cd 100644 --- a/mindspore/dataset/text/validators.py +++ b/mindspore/dataset/text/validators.py @@ -19,12 +19,29 @@ validators for text ops from functools import wraps import mindspore._c_dataengine as cde +import mindspore.common.dtype as mstype -from ..transforms.validators import check_uint32 +from mindspore._c_expression import typing +from ..transforms.validators import check_uint32, check_pos_int64 + + +def check_unique_list_of_words(words, arg_name): + """Check that words is a list and each element is a str without any duplication""" + + if not isinstance(words, list): + raise ValueError(arg_name + " needs to be a list of words of type string.") + words_set = set() + for word in words: + if not isinstance(word, str): + raise ValueError("each word in " + arg_name + " needs to be type str.") + if word in words_set: + raise ValueError(arg_name + " contains duplicate word: " + word + ".") + words_set.add(word) + return words_set def check_lookup(method): - """A wrapper that wrap a parameter checker to the original function(crop operation).""" + """A wrapper that wrap a parameter checker to the original function.""" @wraps(method) def new_method(self, *args, **kwargs): @@ -34,9 +51,11 @@ def check_lookup(method): if "unknown" in kwargs: unknown = kwargs.get("unknown") if unknown is not None: - assert isinstance(unknown, int) and unknown >= 0, "unknown needs to be a non-negative integer" + if not (isinstance(unknown, int) and unknown >= 0): + raise ValueError("unknown needs to be a non-negative integer.") - assert isinstance(vocab, cde.Vocab), "vocab is not an instance of cde.Vocab" + if not isinstance(vocab, cde.Vocab): + raise ValueError("vocab is not an instance of cde.Vocab.") kwargs["vocab"] = vocab kwargs["unknown"] = unknown @@ -46,65 +65,109 @@ def check_lookup(method): def check_from_file(method): - """A wrapper that wrap a parameter checker to the original function(crop operation).""" + """A wrapper that wrap a parameter checker to the original function.""" @wraps(method) def new_method(self, *args, **kwargs): - file_path, delimiter, vocab_size = (list(args) + 3 * [None])[:3] + file_path, delimiter, vocab_size, special_tokens, special_first = (list(args) + 5 * [None])[:5] if "file_path" in kwargs: file_path = kwargs.get("file_path") if "delimiter" in kwargs: delimiter = kwargs.get("delimiter") if "vocab_size" in kwargs: vocab_size = kwargs.get("vocab_size") + if "special_tokens" in kwargs: + special_tokens = kwargs.get("special_tokens") + if "special_first" in kwargs: + special_first = kwargs.get("special_first") + + if not isinstance(file_path, str): + raise ValueError("file_path needs to be str.") - assert isinstance(file_path, str), "file_path needs to be str" if delimiter is not None: - assert isinstance(delimiter, str), "delimiter needs to be str" + if not isinstance(delimiter, str): + raise ValueError("delimiter needs to be str.") else: delimiter = "" if vocab_size is not None: - assert isinstance(vocab_size, int) and vocab_size > 0, "vocab size needs to be a positive integer" + if not (isinstance(vocab_size, int) and vocab_size > 0): + raise ValueError("vocab size needs to be a positive integer.") else: vocab_size = -1 + + if special_first is None: + special_first = True + + if not isinstance(special_first, bool): + raise ValueError("special_first needs to be a boolean value") + + if special_tokens is None: + special_tokens = [] + + check_unique_list_of_words(special_tokens, "special_tokens") + kwargs["file_path"] = file_path kwargs["delimiter"] = delimiter kwargs["vocab_size"] = vocab_size + kwargs["special_tokens"] = special_tokens + kwargs["special_first"] = special_first + return method(self, **kwargs) return new_method def check_from_list(method): - """A wrapper that wrap a parameter checker to the original function(crop operation).""" + """A wrapper that wrap a parameter checker to the original function.""" @wraps(method) def new_method(self, *args, **kwargs): - word_list, = (list(args) + [None])[:1] + word_list, special_tokens, special_first = (list(args) + 3 * [None])[:3] if "word_list" in kwargs: word_list = kwargs.get("word_list") - assert isinstance(word_list, list), "word_list needs to be a list of words" - for word in word_list: - assert isinstance(word, str), "each word in word list needs to be type str" + if "special_tokens" in kwargs: + special_tokens = kwargs.get("special_tokens") + if "special_first" in kwargs: + special_first = kwargs.get("special_first") + if special_tokens is None: + special_tokens = [] + word_set = check_unique_list_of_words(word_list, "word_list") + token_set = check_unique_list_of_words(special_tokens, "special_tokens") + + intersect = word_set.intersection(token_set) + + if intersect != set(): + raise ValueError("special_tokens and word_list contain duplicate word :" + str(intersect) + ".") + + if special_first is None: + special_first = True + + if not isinstance(special_first, bool): + raise ValueError("special_first needs to be a boolean value.") kwargs["word_list"] = word_list + kwargs["special_tokens"] = special_tokens + kwargs["special_first"] = special_first return method(self, **kwargs) return new_method def check_from_dict(method): - """A wrapper that wrap a parameter checker to the original function(crop operation).""" + """A wrapper that wrap a parameter checker to the original function.""" @wraps(method) def new_method(self, *args, **kwargs): word_dict, = (list(args) + [None])[:1] if "word_dict" in kwargs: word_dict = kwargs.get("word_dict") - assert isinstance(word_dict, dict), "word_dict needs to be a list of word,id pairs" + if not isinstance(word_dict, dict): + raise ValueError("word_dict needs to be a list of word,id pairs.") for word, word_id in word_dict.items(): - assert isinstance(word, str), "each word in word_dict needs to be type str" - assert isinstance(word_id, int) and word_id >= 0, "each word id needs to be positive integer" + if not isinstance(word, str): + raise ValueError("Each word in word_dict needs to be type string.") + if not (isinstance(word_id, int) and word_id >= 0): + raise ValueError("Each word id needs to be positive integer.") kwargs["word_dict"] = word_dict return method(self, **kwargs) @@ -124,11 +187,11 @@ def check_jieba_init(method): mp_path = kwargs.get("mp_path") if hmm_path is None: raise ValueError( - "the dict of HMMSegment in cppjieba is not provided") + "The dict of HMMSegment in cppjieba is not provided.") kwargs["hmm_path"] = hmm_path if mp_path is None: raise ValueError( - "the dict of MPSegment in cppjieba is not provided") + "The dict of MPSegment in cppjieba is not provided.") kwargs["mp_path"] = mp_path if model is not None: kwargs["model"] = model @@ -149,7 +212,7 @@ def check_jieba_add_word(method): if "freq" in kwargs: freq = kwargs.get("freq") if word is None: - raise ValueError("word is not provided") + raise ValueError("word is not provided.") kwargs["word"] = word if freq is not None: check_uint32(freq) @@ -160,7 +223,7 @@ def check_jieba_add_word(method): def check_jieba_add_dict(method): - """Wrapper method to check the parameters of add dict""" + """Wrapper method to check the parameters of add dict.""" @wraps(method) def new_method(self, *args, **kwargs): @@ -168,8 +231,205 @@ def check_jieba_add_dict(method): if "user_dict" in kwargs: user_dict = kwargs.get("user_dict") if user_dict is None: - raise ValueError("user_dict is not provided") + raise ValueError("user_dict is not provided.") kwargs["user_dict"] = user_dict return method(self, **kwargs) return new_method + + +def check_from_dataset(method): + """A wrapper that wrap a parameter checker to the original function.""" + + @wraps(method) + def new_method(self, *args, **kwargs): + + dataset, columns, freq_range, top_k, special_tokens, special_first = (list(args) + 6 * [None])[:6] + if "dataset" in kwargs: + dataset = kwargs.get("dataset") + if "columns" in kwargs: + columns = kwargs.get("columns") + if "freq_range" in kwargs: + freq_range = kwargs.get("freq_range") + if "top_k" in kwargs: + top_k = kwargs.get("top_k") + if "special_tokens" in kwargs: + special_tokens = kwargs.get("special_tokens") + if "special_first" in kwargs: + special_first = kwargs.get("special_first") + + if columns is None: + columns = [] + + if not isinstance(columns, list): + columns = [columns] + + for column in columns: + if not isinstance(column, str): + raise ValueError("columns need to be a list of strings.") + + if freq_range is None: + freq_range = (None, None) + + if not isinstance(freq_range, tuple) or len(freq_range) != 2: + raise ValueError("freq_range needs to be either None or a tuple of 2 integers or an int and a None.") + + for num in freq_range: + if num is not None and (not isinstance(num, int)): + raise ValueError("freq_range needs to be either None or a tuple of 2 integers or an int and a None.") + + if isinstance(freq_range[0], int) and isinstance(freq_range[1], int): + if freq_range[0] > freq_range[1] or freq_range[0] < 0: + raise ValueError("frequency range [a,b] should be 0 <= a <= b (a,b are inclusive).") + + if top_k is not None and (not isinstance(top_k, int)): + raise ValueError("top_k needs to be a positive integer.") + + if isinstance(top_k, int) and top_k <= 0: + raise ValueError("top_k needs to be a positive integer.") + + if special_first is None: + special_first = True + + if special_tokens is None: + special_tokens = [] + + if not isinstance(special_first, bool): + raise ValueError("special_first needs to be a boolean value.") + + check_unique_list_of_words(special_tokens, "special_tokens") + + kwargs["dataset"] = dataset + kwargs["columns"] = columns + kwargs["freq_range"] = freq_range + kwargs["top_k"] = top_k + kwargs["special_tokens"] = special_tokens + kwargs["special_first"] = special_first + + return method(self, **kwargs) + + return new_method + + +def check_ngram(method): + """A wrapper that wrap a parameter checker to the original function.""" + + @wraps(method) + def new_method(self, *args, **kwargs): + n, left_pad, right_pad, separator = (list(args) + 4 * [None])[:4] + if "n" in kwargs: + n = kwargs.get("n") + if "left_pad" in kwargs: + left_pad = kwargs.get("left_pad") + if "right_pad" in kwargs: + right_pad = kwargs.get("right_pad") + if "separator" in kwargs: + separator = kwargs.get("separator") + + if isinstance(n, int): + n = [n] + + if not (isinstance(n, list) and n != []): + raise ValueError("n needs to be a non-empty list of positive integers.") + + for gram in n: + if not (isinstance(gram, int) and gram > 0): + raise ValueError("n in ngram needs to be a positive number.") + + if left_pad is None: + left_pad = ("", 0) + + if right_pad is None: + right_pad = ("", 0) + + if not (isinstance(left_pad, tuple) and len(left_pad) == 2 and isinstance(left_pad[0], str) and isinstance( + left_pad[1], int)): + raise ValueError("left_pad needs to be a tuple of (str, int) str is pad token and int is pad_width.") + + if not (isinstance(right_pad, tuple) and len(right_pad) == 2 and isinstance(right_pad[0], str) and isinstance( + right_pad[1], int)): + raise ValueError("right_pad needs to be a tuple of (str, int) str is pad token and int is pad_width.") + + if not (left_pad[1] >= 0 and right_pad[1] >= 0): + raise ValueError("padding width need to be positive numbers.") + + if separator is None: + separator = " " + + if not isinstance(separator, str): + raise ValueError("separator needs to be a string.") + + kwargs["n"] = n + kwargs["left_pad"] = left_pad + kwargs["right_pad"] = right_pad + kwargs["separator"] = separator + + return method(self, **kwargs) + + return new_method + + +def check_pair_truncate(method): + """Wrapper method to check the parameters of number of pair truncate.""" + + @wraps(method) + def new_method(self, *args, **kwargs): + max_length = (list(args) + [None])[0] + if "max_length" in kwargs: + max_length = kwargs.get("max_length") + if max_length is None: + raise ValueError("max_length is not provided.") + + check_pos_int64(max_length) + kwargs["max_length"] = max_length + + return method(self, **kwargs) + + return new_method + + +def check_to_number(method): + """A wrapper that wraps a parameter check to the original function (ToNumber).""" + + @wraps(method) + def new_method(self, *args, **kwargs): + data_type = (list(args) + [None])[0] + if "data_type" in kwargs: + data_type = kwargs.get("data_type") + + if data_type is None: + raise ValueError("data_type is a mandatory parameter but was not provided.") + + if not isinstance(data_type, typing.Type): + raise TypeError("data_type is not a MindSpore data type.") + + if data_type not in mstype.number_type: + raise TypeError("data_type is not numeric data type.") + + kwargs["data_type"] = data_type + + return method(self, **kwargs) + + return new_method + + +def check_python_tokenizer(method): + """A wrapper that wraps a parameter check to the original function (PythonTokenizer).""" + + @wraps(method) + def new_method(self, *args, **kwargs): + tokenizer = (list(args) + [None])[0] + if "tokenizer" in kwargs: + tokenizer = kwargs.get("tokenizer") + + if tokenizer is None: + raise ValueError("tokenizer is a mandatory parameter.") + + if not callable(tokenizer): + raise TypeError("tokenizer is not a callable python function") + + kwargs["tokenizer"] = tokenizer + + return method(self, **kwargs) + + return new_method diff --git a/mindspore/dataset/transforms/c_transforms.py b/mindspore/dataset/transforms/c_transforms.py index 91fb486531..ffe711b106 100644 --- a/mindspore/dataset/transforms/c_transforms.py +++ b/mindspore/dataset/transforms/c_transforms.py @@ -15,9 +15,14 @@ """ This module c_transforms provides common operations, including OneHotOp and TypeCast. """ +from enum import IntEnum +import numpy as np + +import mindspore.common.dtype as mstype import mindspore._c_dataengine as cde -from .validators import check_num_classes, check_de_type +from .validators import check_num_classes, check_de_type, check_fill_value, check_slice_op, check_mask_op, \ + check_pad_end, check_concat_type from ..core.datatypes import mstype_to_detype @@ -35,6 +40,21 @@ class OneHot(cde.OneHotOp): super().__init__(num_classes) +class Fill(cde.FillOp): + """ + Tensor operation to create a tensor filled with passed scalar value. + The output tensor will have the same shape and type as the input tensor. + + Args: + fill_value (python types (str, bytes, int, float, or bool)) : scalar value + to fill created tensor with. + """ + + @check_fill_value + def __init__(self, fill_value): + super().__init__(cde.Tensor(np.array(fill_value))) + + class TypeCast(cde.TypeCastOp): """ Tensor operation to cast to a given MindSpore data type. @@ -48,3 +68,165 @@ class TypeCast(cde.TypeCastOp): data_type = mstype_to_detype(data_type) self.data_type = str(data_type) super().__init__(data_type) + + +class Slice(cde.SliceOp): + """ + Slice operation to extract a tensor out using the given n slices. + + The functionality of Slice is similar to NumPy indexing feature. + (Currently only rank 1 Tensors are supported) + + Args: + *slices(Variable length argument list): Maximum `n` number of arguments to slice a tensor of rank `n`. + One object in slices can be one of: + 1. int: slice this index only. Negative index is supported. + 2. slice object: slice the generated indices from the slice object. Similar to `start:stop:step`. + 3. None: slice the whole dimension. Similar to `:` in python indexing. + 4. Ellipses ...: slice all dimensions between the two slices. + + Examples: + >>> # Data before + >>> # | col | + >>> # +---------+ + >>> # | [1,2,3] | + >>> # +---------| + >>> data = data.map(operations=Slice(slice(1,3))) # slice indices 1 and 2 only + >>> # Data after + >>> # | col | + >>> # +------------+ + >>> # | [1,2] | + >>> # +------------| + """ + + @check_slice_op + def __init__(self, *slices): + dim0 = slices[0] + if isinstance(dim0, int): + dim0 = [dim0] + elif dim0 is None: + dim0 = True + elif isinstance(dim0, slice): + dim0 = (dim0.start, dim0.stop, dim0.step) + elif dim0 is Ellipsis: + dim0 = True + super().__init__(dim0) + + +class Relational(IntEnum): + EQ = 0 + NE = 1 + GT = 2 + GE = 3 + LT = 4 + LE = 5 + + +DE_C_RELATIONAL = {Relational.EQ: cde.RelationalOp.EQ, + Relational.NE: cde.RelationalOp.NE, + Relational.GT: cde.RelationalOp.GT, + Relational.GE: cde.RelationalOp.GE, + Relational.LT: cde.RelationalOp.LT, + Relational.LE: cde.RelationalOp.LE} + + +class Mask(cde.MaskOp): + """ + Mask content of the input tensor with the given predicate. + Any element of the tensor that matches the predicate will be evaluated to True, otherwise False. + + Args: + operator (Relational): One of the relational operator EQ, NE LT, GT, LE or GE + constant (python types (str, int, float, or bool): constant to be compared to. + Constant will be casted to the type of the input tensor + dtype (optional, mindspore.dtype): type of the generated mask. Default to bool + + Examples: + >>> # Data before + >>> # | col1 | + >>> # +---------+ + >>> # | [1,2,3] | + >>> # +---------+ + >>> data = data.map(operations=Mask(Relational.EQ, 2)) + >>> # Data after + >>> # | col1 | + >>> # +--------------------+ + >>> # | [False,True,False] | + >>> # +--------------------+ + """ + + @check_mask_op + def __init__(self, operator, constant, dtype=mstype.bool_): + dtype = mstype_to_detype(dtype) + constant = cde.Tensor(np.array(constant)) + super().__init__(DE_C_RELATIONAL[operator], constant, dtype) + + +class PadEnd(cde.PadEndOp): + """ + Pad input tensor according to `pad_shape`, need to have same rank. + + Args: + pad_shape (list of `int`): list on integers representing the shape needed. Dimensions that set to `None` will + not be padded (i.e., original dim will be used). Shorter dimensions will truncate the values. + pad_value (python types (str, bytes, int, float, or bool), optional): value used to pad. Default to 0 or empty + string in case of Tensors of strings. + + Examples: + >>> # Data before + >>> # | col | + >>> # +---------+ + >>> # | [1,2,3] | + >>> # +---------| + >>> data = data.map(operations=PadEnd(pad_shape=[4], pad_value=10)) + >>> # Data after + >>> # | col | + >>> # +------------+ + >>> # | [1,2,3,10] | + >>> # +------------| + """ + + @check_pad_end + def __init__(self, pad_shape, pad_value=None): + if pad_value is not None: + pad_value = cde.Tensor(np.array(pad_value)) + super().__init__(cde.TensorShape(pad_shape), pad_value) + + +class Concatenate(cde.ConcatenateOp): + """ + Tensor operation to prepend and append to a tensor. + + Args: + axis (int, optional): axis to concatenate the tensors along (Default=0). + prepend (np.array, optional): numpy array to be prepended to the already concatenated tensors (Default=None). + append (np.array, optional): numpy array to be appended to the already concatenated tensors (Default=None). + """ + + @check_concat_type + def __init__(self, axis=0, prepend=None, append=None): + if prepend is not None: + prepend = cde.Tensor(np.array(prepend)) + if append is not None: + append = cde.Tensor(np.array(append)) + super().__init__(axis, prepend, append) + + +class Duplicate(cde.DuplicateOp): + """ + Duplicate the input tensor to a new output tensor. The input tensor is carried over to the output list. + + Examples: + >>> # Data before + >>> # | x | + >>> # +---------+ + >>> # | [1,2,3] | + >>> # +---------+ + >>> data = data.map(input_columns=["x"], operations=Duplicate(), + >>> output_columns=["x", "y"], columns_order=["x", "y"]) + >>> # Data after + >>> # | x | y | + >>> # +---------+---------+ + >>> # | [1,2,3] | [1,2,3] | + >>> # +---------+---------+ + """ diff --git a/mindspore/dataset/transforms/validators.py b/mindspore/dataset/transforms/validators.py index 5572e5285e..6b5760e0c5 100644 --- a/mindspore/dataset/transforms/validators.py +++ b/mindspore/dataset/transforms/validators.py @@ -15,8 +15,9 @@ """Validators for TensorOps. """ from functools import wraps -from mindspore._c_expression import typing +import numpy as np +from mindspore._c_expression import typing # POS_INT_MIN is used to limit values from starting from 0 POS_INT_MIN = 1 @@ -159,6 +160,25 @@ def check_num_classes(method): return new_method +def check_fill_value(method): + """Wrapper method to check the parameters of fill value.""" + + @wraps(method) + def new_method(self, *args, **kwargs): + fill_value = (list(args) + [None])[0] + if "fill_value" in kwargs: + fill_value = kwargs.get("fill_value") + if fill_value is None: + raise ValueError("fill_value is not provided.") + if not isinstance(fill_value, (str, float, bool, int, bytes)): + raise TypeError("fill_value must be either a primitive python str, float, bool, bytes or int") + kwargs["fill_value"] = fill_value + + return method(self, **kwargs) + + return new_method + + def check_de_type(method): """Wrapper method to check the parameters of data type.""" @@ -177,3 +197,130 @@ def check_de_type(method): return method(self, **kwargs) return new_method + + +def check_slice_op(method): + """Wrapper method to check the parameters of slice.""" + + @wraps(method) + def new_method(self, *args): + for i, arg in enumerate(args): + if arg is not None and arg is not Ellipsis and not isinstance(arg, (int, slice, list)): + raise TypeError("Indexing of dim " + str(i) + "is not of valid type") + if isinstance(arg, list): + for a in arg: + if not isinstance(a, int): + raise TypeError("Index " + a + " is not an int") + return method(self, *args) + + return new_method + + +def check_mask_op(method): + """Wrapper method to check the parameters of mask.""" + + @wraps(method) + def new_method(self, *args, **kwargs): + operator, constant, dtype = (list(args) + 3 * [None])[:3] + if "operator" in kwargs: + operator = kwargs.get("operator") + if "constant" in kwargs: + constant = kwargs.get("constant") + if "dtype" in kwargs: + dtype = kwargs.get("dtype") + + if operator is None: + raise ValueError("operator is not provided.") + + if constant is None: + raise ValueError("constant is not provided.") + + from .c_transforms import Relational + if not isinstance(operator, Relational): + raise TypeError("operator is not a Relational operator enum.") + + if not isinstance(constant, (str, float, bool, int, bytes)): + raise TypeError("constant must be either a primitive python str, float, bool, bytes or int") + + if dtype is not None: + if not isinstance(dtype, typing.Type): + raise TypeError("dtype is not a MindSpore data type.") + kwargs["dtype"] = dtype + + kwargs["operator"] = operator + kwargs["constant"] = constant + + return method(self, **kwargs) + + return new_method + + +def check_pad_end(method): + """Wrapper method to check the parameters of PadEnd.""" + + @wraps(method) + def new_method(self, *args, **kwargs): + pad_shape, pad_value = (list(args) + 2 * [None])[:2] + if "pad_shape" in kwargs: + pad_shape = kwargs.get("pad_shape") + if "pad_value" in kwargs: + pad_value = kwargs.get("pad_value") + + if pad_shape is None: + raise ValueError("pad_shape is not provided.") + + if pad_value is not None: + if not isinstance(pad_value, (str, float, bool, int, bytes)): + raise TypeError("pad_value must be either a primitive python str, float, bool, int or bytes") + kwargs["pad_value"] = pad_value + + if not isinstance(pad_shape, list): + raise TypeError("pad_shape must be a list") + + for dim in pad_shape: + if dim is not None: + if isinstance(dim, int): + check_pos_int64(dim) + else: + raise TypeError("a value in the list is not an integer.") + + kwargs["pad_shape"] = pad_shape + + return method(self, **kwargs) + + return new_method + + +def check_concat_type(method): + """Wrapper method to check the parameters of concatenation op.""" + + @wraps(method) + def new_method(self, *args, **kwargs): + axis, prepend, append = (list(args) + 3 * [None])[:3] + if "prepend" in kwargs: + prepend = kwargs.get("prepend") + if "append" in kwargs: + append = kwargs.get("append") + if "axis" in kwargs: + axis = kwargs.get("axis") + + if axis is not None: + if not isinstance(axis, int): + raise TypeError("axis type is not valid, must be an integer.") + if axis not in (0, -1): + raise ValueError("only 1D concatenation supported.") + kwargs["axis"] = axis + + if prepend is not None: + if not isinstance(prepend, (type(None), np.ndarray)): + raise ValueError("prepend type is not valid, must be None for no prepend tensor or a numpy array.") + kwargs["prepend"] = prepend + + if append is not None: + if not isinstance(append, (type(None), np.ndarray)): + raise ValueError("append type is not valid, must be None for no append tensor or a numpy array.") + kwargs["append"] = append + + return method(self, **kwargs) + + return new_method diff --git a/mindspore/dataset/transforms/vision/c_transforms.py b/mindspore/dataset/transforms/vision/c_transforms.py index 5676a8408c..c2497f9629 100644 --- a/mindspore/dataset/transforms/vision/c_transforms.py +++ b/mindspore/dataset/transforms/vision/c_transforms.py @@ -45,7 +45,7 @@ import mindspore._c_dataengine as cde from .utils import Inter, Border from .validators import check_prob, check_crop, check_resize_interpolation, check_random_resize_crop, \ check_normalize_c, check_random_crop, check_random_color_adjust, check_random_rotation, \ - check_resize, check_rescale, check_pad, check_cutout, check_uniform_augment_cpp + check_resize, check_rescale, check_pad, check_cutout, check_uniform_augment_cpp, check_bounding_box_augment_cpp DE_C_INTER_MODE = {Inter.NEAREST: cde.InterpolationMode.DE_INTER_NEAREST_NEIGHBOUR, Inter.LINEAR: cde.InterpolationMode.DE_INTER_LINEAR, @@ -149,6 +149,54 @@ class RandomCrop(cde.RandomCropOp): super().__init__(*size, *padding, border_type, pad_if_needed, *fill_value) +class RandomCropWithBBox(cde.RandomCropWithBBoxOp): + """ + Crop the input image at a random location and adjust bounding boxes for crop area + + Args: + size (int or sequence): The output size of the cropped image. + If size is an int, a square crop of size (size, size) is returned. + If size is a sequence of length 2, it should be (height, width). + padding (int or sequence, optional): The number of pixels to pad the image (default=None). + If padding is not None, pad image firstly with padding values. + If a single number is provided, it pads all borders with this value. + If a tuple or list of 2 values are provided, it pads the (left and top) + with the first value and (right and bottom) with the second value. + If 4 values are provided as a list or tuple,it pads the left, top, right and bottom respectively. + pad_if_needed (bool, optional): Pad the image if either side is smaller than + the given output size (default=False). + fill_value (int or tuple, optional): The pixel intensity of the borders if + the padding_mode is Border.CONSTANT (default=0). If it is a 3-tuple, it is used to + fill R, G, B channels respectively. + padding_mode (Border mode, optional): The method of padding (default=Border.CONSTANT). Can be any of + [Border.CONSTANT, Border.EDGE, Border.REFLECT, Border.SYMMETRIC]. + + - Border.CONSTANT, means it fills the border with constant values. + + - Border.EDGE, means it pads with the last value on the edge. + + - Border.REFLECT, means it reflects the values on the edge omitting the last + value of edge. + + - Border.SYMMETRIC, means it reflects the values on the edge repeating the last + value of edge. + """ + + @check_random_crop + def __init__(self, size, padding=None, pad_if_needed=False, fill_value=0, padding_mode=Border.CONSTANT): + self.size = size + self.padding = padding + self.pad_if_needed = pad_if_needed + self.fill_value = fill_value + self.padding_mode = padding_mode.value + if padding is None: + padding = (0, 0, 0, 0) + if isinstance(fill_value, int): # temporary fix + fill_value = tuple([fill_value] * 3) + border_type = DE_C_BORDER_TYPE[padding_mode] + super().__init__(*size, *padding, border_type, pad_if_needed, *fill_value) + + class RandomHorizontalFlip(cde.RandomHorizontalFlipOp): """ Flip the input image horizontally, randomly with a given probability. @@ -163,6 +211,21 @@ class RandomHorizontalFlip(cde.RandomHorizontalFlipOp): super().__init__(prob) +class RandomHorizontalFlipWithBBox(cde.RandomHorizontalFlipWithBBoxOp): + """ + Flip the input image horizontally, randomly with a given probability. + Maintains data integrity by also flipping bounding boxes in an object detection pipeline. + + Args: + prob (float): Probability of the image being flipped (default=0.5). + """ + + @check_prob + def __init__(self, prob=0.5): + self.prob = prob + super().__init__(prob) + + class RandomVerticalFlip(cde.RandomVerticalFlipOp): """ Flip the input image vertically, randomly with a given probability. @@ -177,6 +240,38 @@ class RandomVerticalFlip(cde.RandomVerticalFlipOp): super().__init__(prob) +class RandomVerticalFlipWithBBox(cde.RandomVerticalFlipWithBBoxOp): + """ + Flip the input image vertically, randomly with a given probability and adjust bounding boxes as well + + Args: + prob (float, optional): Probability of the image being flipped (default=0.5). + """ + + @check_prob + def __init__(self, prob=0.5): + self.prob = prob + super().__init__(prob) + + +class BoundingBoxAugment(cde.BoundingBoxAugmentOp): + """ + Apply a given image transform on a random selection of bounding box regions + of a given image. + + Args: + transform: C++ transformation function to be applied on random selection + of bounding box regions of a given image. + ratio (float, optional): Ratio of bounding boxes to apply augmentation on. + Range: [0,1] (default=0.3). + """ + @check_bounding_box_augment_cpp + def __init__(self, transform, ratio=0.3): + self.ratio = ratio + self.transform = transform + super().__init__(transform, ratio) + + class Resize(cde.ResizeOp): """ Resize the input image to the given size. @@ -207,6 +302,42 @@ class Resize(cde.ResizeOp): super().__init__(*size, interpoltn) +class RandomResizedCropWithBBox(cde.RandomCropAndResizeWithBBoxOp): + """ + Crop the input image to a random size and aspect ratio and adjust the Bounding Boxes accordingly + + Args: + size (int or sequence): The size of the output image. + If size is an int, a square crop of size (size, size) is returned. + If size is a sequence of length 2, it should be (height, width). + scale (tuple, optional): Range (min, max) of respective size of the original + size to be cropped (default=(0.08, 1.0)). + ratio (tuple, optional): Range (min, max) of aspect ratio to be cropped + (default=(3. / 4., 4. / 3.)). + interpolation (Inter mode, optional): Image interpolation mode (default=Inter.BILINEAR). + It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC]. + + - Inter.BILINEAR, means interpolation method is bilinear interpolation. + + - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation. + + - Inter.BICUBIC, means interpolation method is bicubic interpolation. + + max_attempts (int, optional): The maximum number of attempts to propose a valid + crop_area (default=10). If exceeded, fall back to use center_crop instead. + """ + @check_random_resize_crop + def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), + interpolation=Inter.BILINEAR, max_attempts=10): + self.size = size + self.scale = scale + self.ratio = ratio + self.interpolation = interpolation + self.max_attempts = max_attempts + interpoltn = DE_C_INTER_MODE[interpolation] + super().__init__(*size, *scale, *ratio, interpoltn, max_attempts) + + class RandomResizedCrop(cde.RandomCropAndResizeOp): """ Crop the input image to a random size and aspect ratio. diff --git a/mindspore/dataset/transforms/vision/py_transforms.py b/mindspore/dataset/transforms/vision/py_transforms.py index ee5a4b09fd..b252c3434b 100644 --- a/mindspore/dataset/transforms/vision/py_transforms.py +++ b/mindspore/dataset/transforms/vision/py_transforms.py @@ -606,7 +606,7 @@ class RandomRotation: class RandomOrder: """ - Perform a series of transforms to the input PIL image in a random oreder. + Perform a series of transforms to the input PIL image in a random order. Args: transforms (list): List of the transformations to be applied. @@ -1087,7 +1087,7 @@ class RandomAffine: The horizontal and vertical shift is selected randomly from the range: (-tx*width, tx*width) and (-ty*height, ty*height), respectively. If None, no translations gets applied. - scale (sequence, optional): Scaling factor interval (default=None, riginal scale is used). + scale (sequence, optional): Scaling factor interval (default=None, original scale is used). shear (int or float or sequence, optional): Range of shear factor (default=None). If a number 'shear', then a shear parallel to the x axis in the range of (-shear, +shear) is applied. If a tuple or list of size 2, then a shear parallel to the x axis in the range of (shear[0], shear[1]) diff --git a/mindspore/dataset/transforms/vision/py_transforms_util.py b/mindspore/dataset/transforms/vision/py_transforms_util.py index ac77624bf8..d076109ff4 100644 --- a/mindspore/dataset/transforms/vision/py_transforms_util.py +++ b/mindspore/dataset/transforms/vision/py_transforms_util.py @@ -455,6 +455,9 @@ def random_crop(img, size, padding, pad_if_needed, fill_value, padding_mode): def _input_to_factor(img, size): img_width, img_height = img.size height, width = size + if height > img_height or width > img_width: + raise ValueError("Crop size {} is larger than input image size {}".format(size, (img_height, img_width))) + if width == img_width and height == img_height: return 0, 0, img_height, img_width @@ -551,26 +554,28 @@ def adjust_hue(img, hue_factor): Returns: img (PIL Image), Hue adjusted image. """ - if not -0.5 <= hue_factor <= 0.5: - raise ValueError('hue_factor {} is not in [-0.5, 0.5].'.format(hue_factor)) + image = img + image_hue_factor = hue_factor + if not -0.5 <= image_hue_factor <= 0.5: + raise ValueError('image_hue_factor {} is not in [-0.5, 0.5].'.format(image_hue_factor)) - if not is_pil(img): - raise TypeError(augment_error_message.format(type(img))) + if not is_pil(image): + raise TypeError(augment_error_message.format(type(image))) - input_mode = img.mode - if input_mode in {'L', '1', 'I', 'F'}: - return img + mode = image.mode + if mode in {'L', '1', 'I', 'F'}: + return image - h, s, v = img.convert('HSV').split() + hue, saturation, value = img.convert('HSV').split() - np_h = np.array(h, dtype=np.uint8) + np_hue = np.array(hue, dtype=np.uint8) with np.errstate(over='ignore'): - np_h += np.uint8(hue_factor * 255) - h = Image.fromarray(np_h, 'L') + np_hue += np.uint8(image_hue_factor * 255) + hue = Image.fromarray(np_hue, 'L') - img = Image.merge('HSV', (h, s, v)).convert(input_mode) - return img + image = Image.merge('HSV', (hue, saturation, value)).convert(mode) + return image def to_type(img, output_type): diff --git a/mindspore/dataset/transforms/vision/validators.py b/mindspore/dataset/transforms/vision/validators.py index 20239232b5..b49116349b 100644 --- a/mindspore/dataset/transforms/vision/validators.py +++ b/mindspore/dataset/transforms/vision/validators.py @@ -852,6 +852,32 @@ def check_uniform_augment_cpp(method): return new_method +def check_bounding_box_augment_cpp(method): + """Wrapper method to check the parameters of BoundingBoxAugment cpp op.""" + + @wraps(method) + def new_method(self, *args, **kwargs): + transform, ratio = (list(args) + 2 * [None])[:2] + if "transform" in kwargs: + transform = kwargs.get("transform") + if "ratio" in kwargs: + ratio = kwargs.get("ratio") + if not isinstance(ratio, float) and not isinstance(ratio, int): + raise ValueError("Ratio should be an int or float.") + if ratio is not None: + check_value(ratio, [0., 1.]) + kwargs["ratio"] = ratio + else: + ratio = 0.3 + if not isinstance(transform, TensorOp): + raise ValueError("Transform can only be a C++ operation.") + kwargs["transform"] = transform + kwargs["ratio"] = ratio + return method(self, **kwargs) + + return new_method + + def check_uniform_augment_py(method): """Wrapper method to check the parameters of python UniformAugment op.""" diff --git a/mindspore/mindrecord/__init__.py b/mindspore/mindrecord/__init__.py index 31fb801c46..ee23b68cb6 100644 --- a/mindspore/mindrecord/__init__.py +++ b/mindspore/mindrecord/__init__.py @@ -29,9 +29,11 @@ from .common.exceptions import * from .shardutils import SUCCESS, FAILED from .tools.cifar10_to_mr import Cifar10ToMR from .tools.cifar100_to_mr import Cifar100ToMR +from .tools.csv_to_mr import CsvToMR from .tools.imagenet_to_mr import ImageNetToMR from .tools.mnist_to_mr import MnistToMR +from .tools.tfrecord_to_mr import TFRecordToMR __all__ = ['FileWriter', 'FileReader', 'MindPage', - 'Cifar10ToMR', 'Cifar100ToMR', 'ImageNetToMR', 'MnistToMR', + 'Cifar10ToMR', 'Cifar100ToMR', 'CsvToMR', 'ImageNetToMR', 'MnistToMR', 'TFRecordToMR', 'SUCCESS', 'FAILED'] diff --git a/mindspore/mindrecord/tools/csv_to_mr.py b/mindspore/mindrecord/tools/csv_to_mr.py new file mode 100644 index 0000000000..4bc8f37b47 --- /dev/null +++ b/mindspore/mindrecord/tools/csv_to_mr.py @@ -0,0 +1,168 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +""" +Csv format convert tool for MindRecord. +""" +from importlib import import_module +import os + +from mindspore import log as logger +from ..filewriter import FileWriter +from ..shardutils import check_filename + +try: + pd = import_module("pandas") +except ModuleNotFoundError: + pd = None + +__all__ = ['CsvToMR'] + +class CsvToMR: + """ + Class is for transformation from csv to MindRecord. + + Args: + source (str): the file path of csv. + destination (str): the MindRecord file path to transform into. + columns_list(list[str], optional): List of columns to be read(default=None). + partition_number (int, optional): partition size (default=1). + + Raises: + ValueError: If source, destination, partition_number is invalid. + RuntimeError: If columns_list is invalid. + """ + + def __init__(self, source, destination, columns_list=None, partition_number=1): + if not pd: + raise Exception("Module pandas is not found, please use pip install it.") + if isinstance(source, str): + check_filename(source) + self.source = source + else: + raise ValueError("The parameter source must be str.") + + self._check_columns(columns_list, "columns_list") + self.columns_list = columns_list + + if isinstance(destination, str): + check_filename(destination) + self.destination = destination + else: + raise ValueError("The parameter destination must be str.") + + if partition_number is not None: + if not isinstance(partition_number, int): + raise ValueError("The parameter partition_number must be int") + self.partition_number = partition_number + else: + raise ValueError("The parameter partition_number must be int") + + self.writer = FileWriter(self.destination, self.partition_number) + + def _check_columns(self, columns, columns_name): + if columns: + if isinstance(columns, list): + for col in columns: + if not isinstance(col, str): + raise ValueError("The parameter {} must be list of str.".format(columns_name)) + else: + raise ValueError("The parameter {} must be list of str.".format(columns_name)) + + def _get_schema(self, df): + """ + Construct schema from df columns + """ + if self.columns_list: + for col in self.columns_list: + if col not in df.columns: + raise RuntimeError("The parameter columns_list is illegal, column {} does not exist.".format(col)) + else: + self.columns_list = df.columns + + schema = {} + for col in self.columns_list: + if str(df[col].dtype) == 'int64': + schema[col] = {"type": "int64"} + elif str(df[col].dtype) == 'float64': + schema[col] = {"type": "float64"} + elif str(df[col].dtype) == 'bool': + schema[col] = {"type": "int32"} + else: + schema[col] = {"type": "string"} + if not schema: + raise RuntimeError("Failed to generate schema from csv file.") + return schema + + def _get_row_of_csv(self, df): + """Get row data from csv file.""" + for _, r in df.iterrows(): + row = {} + for col in self.columns_list: + if str(df[col].dtype) == 'bool': + row[col] = int(r[col]) + else: + row[col] = r[col] + yield row + + def transform(self): + """ + Executes transformation from csv to MindRecord. + + Returns: + SUCCESS/FAILED, whether successfully written into MindRecord. + """ + if not os.path.exists(self.source): + raise IOError("Csv file {} do not exist.".format(self.source)) + + pd.set_option('display.max_columns', None) + df = pd.read_csv(self.source) + + csv_schema = self._get_schema(df) + + logger.info("transformed MindRecord schema is: {}".format(csv_schema)) + + # set the header size + self.writer.set_header_size(1 << 24) + + # set the page size + self.writer.set_page_size(1 << 26) + + # create the schema + self.writer.add_schema(csv_schema, "csv_schema") + + # add the index + self.writer.add_index(list(self.columns_list)) + + csv_iter = self._get_row_of_csv(df) + batch_size = 256 + transform_count = 0 + while True: + data_list = [] + try: + for _ in range(batch_size): + data_list.append(csv_iter.__next__()) + transform_count += 1 + self.writer.write_raw_data(data_list) + logger.info("transformed {} record...".format(transform_count)) + except StopIteration: + if data_list: + self.writer.write_raw_data(data_list) + logger.info( + "transformed {} record...".format(transform_count)) + break + + ret = self.writer.commit() + + return ret diff --git a/mindspore/mindrecord/tools/tfrecord_to_mr.py b/mindspore/mindrecord/tools/tfrecord_to_mr.py new file mode 100644 index 0000000000..e8c52001fd --- /dev/null +++ b/mindspore/mindrecord/tools/tfrecord_to_mr.py @@ -0,0 +1,266 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +TFRecord convert tool for MindRecord +""" + +from importlib import import_module +from string import punctuation +import numpy as np + +from mindspore import log as logger +from ..filewriter import FileWriter +from ..shardutils import check_filename + +try: + tf = import_module("tensorflow") # just used to convert tfrecord to mindrecord +except ModuleNotFoundError: + tf = None + +__all__ = ['TFRecordToMR'] + +SupportedTensorFlowVersion = '2.1.0' + +def _cast_type(value): + """ + Cast complex data type to basic datatype for MindRecord to recognize. + + Args: + value: the TFRecord data type + + Returns: + str, which is MindRecord field type. + """ + tf_type_to_mr_type = {tf.string: "string", + tf.int8: "int32", + tf.int16: "int32", + tf.int32: "int32", + tf.int64: "int64", + tf.uint8: "int32", + tf.uint16: "int32", + tf.uint32: "int64", + tf.uint64: "int64", + tf.float16: "float32", + tf.float32: "float32", + tf.float64: "float64", + tf.double: "float64", + tf.bool: "int32"} + unsupport_tf_type_to_mr_type = {tf.complex64: "None", + tf.complex128: "None"} + + if value in tf_type_to_mr_type: + return tf_type_to_mr_type[value] + + raise ValueError("Type " + value + " is not supported in MindRecord.") + +def _cast_string_type_to_np_type(value): + """Cast string type like: int32/int64/float32/float64 to np.int32/np.int64/np.float32/np.float64""" + string_type_to_np_type = {"int32": np.int32, + "int64": np.int64, + "float32": np.float32, + "float64": np.float64} + + if value in string_type_to_np_type: + return string_type_to_np_type[value] + + raise ValueError("Type " + value + " is not supported cast to numpy type in MindRecord.") + +def _cast_name(key): + """ + Cast schema names which containing special characters to valid names. + + Here special characters means any characters in + '!"#$%&\'()*+,./:;<=>?@[\\]^`{|}~ + Valid names can only contain a-z, A-Z, and 0-9 and _ + + Args: + key (str): original key that might contains special characters. + + Returns: + str, casted key that replace the special characters with "_". i.e. if + key is "a b" then returns "a_b". + """ + special_symbols = set('{}{}'.format(punctuation, ' ')) + special_symbols.remove('_') + new_key = ['_' if x in special_symbols else x for x in key] + casted_key = ''.join(new_key) + return casted_key + +class TFRecordToMR: + """ + Class is for tranformation from TFRecord to MindRecord. + + Args: + source (str): the TFRecord file to be transformed. + destination (str): the MindRecord file path to tranform into. + feature_dict (dict): a dictionary than states the feature type, i.e. + feature_dict = {"xxxx": tf.io.FixedLenFeature([], tf.string), \ + "yyyy": tf.io.FixedLenFeature([], tf.int64)} + + **Follow case which uses VarLenFeature not support** + + feature_dict = {"context": {"xxxx": tf.io.FixedLenFeature([], tf.string), \ + "yyyy": tf.io.VarLenFeature(tf.int64)}, \ + "sequence": {"zzzz": tf.io.FixedLenSequenceFeature([], tf.float32)}} + bytes_fields (list): the bytes fields which are in feature_dict. + + Raises: + ValueError: If parameter is invalid. + Exception: when tensorflow module not found or version is not correct. + """ + def __init__(self, source, destination, feature_dict, bytes_fields=None): + if not tf: + raise Exception("Module tensorflow is not found, please use pip install it.") + + if tf.__version__ < SupportedTensorFlowVersion: + raise Exception("Module tensorflow version must be greater or equal {}.".format(SupportedTensorFlowVersion)) + + if not isinstance(source, str): + raise ValueError("Parameter source must be string.") + check_filename(source) + + if not isinstance(destination, str): + raise ValueError("Parameter destination must be string.") + check_filename(destination) + + self.source = source + self.destination = destination + + if feature_dict is None or not isinstance(feature_dict, dict): + raise ValueError("Parameter feature_dict is None or not dict.") + + for key, val in feature_dict.items(): + if not isinstance(val, tf.io.FixedLenFeature): + raise ValueError("Parameter feature_dict: {} only support FixedLenFeature.".format(feature_dict)) + + self.feature_dict = feature_dict + + bytes_fields_list = [] + if bytes_fields: + if not isinstance(bytes_fields, list): + raise ValueError("Parameter bytes_fields: {} must be list(str).".format(bytes_fields)) + for item in bytes_fields: + if not isinstance(item, str): + raise ValueError("Parameter bytes_fields's item: {} is not str.".format(item)) + + if item not in self.feature_dict: + raise ValueError("Parameter bytes_fields's item: {} is not in feature_dict: {}." + .format(item, self.feature_dict)) + + if not isinstance(self.feature_dict[item].shape, list): + raise ValueError("Parameter feature_dict[{}].shape should be a list.".format(item)) + + casted_bytes_field = _cast_name(item) + bytes_fields_list.append(casted_bytes_field) + + self.bytes_fields_list = bytes_fields_list + self.scalar_set = set() + self.list_set = set() + + mindrecord_schema = {} + for key, val in self.feature_dict.items(): + if not val.shape: + self.scalar_set.add(_cast_name(key)) + if key in self.bytes_fields_list: + mindrecord_schema[_cast_name(key)] = {"type": "bytes"} + else: + mindrecord_schema[_cast_name(key)] = {"type": _cast_type(val.dtype)} + else: + if len(val.shape) != 1: + raise ValueError("Parameter len(feature_dict[{}].shape) should be 1.") + if val.shape[0] < 1: + raise ValueError("Parameter feature_dict[{}].shape[0] should > 0".format(key)) + if val.dtype == tf.string: + raise ValueError("Parameter feautre_dict[{}].dtype is tf.string which shape[0] \ + is not None. It is not supported.".format(key)) + self.list_set.add(_cast_name(key)) + mindrecord_schema[_cast_name(key)] = {"type": _cast_type(val.dtype), "shape": [val.shape[0]]} + self.mindrecord_schema = mindrecord_schema + + def _parse_record(self, example): + """Returns features for a single example""" + features = tf.io.parse_single_example(example, features=self.feature_dict) + return features + + def _get_data_when_scalar_field(self, ms_dict, cast_key, key, val): + """put data in ms_dict when field type is string""" + if isinstance(val.numpy(), (np.ndarray, list)): + raise ValueError("The response key: {}, value: {} from TFRecord should be a scalar.".format(key, val)) + if self.feature_dict[key].dtype == tf.string: + if cast_key in self.bytes_fields_list: + ms_dict[cast_key] = val.numpy() + else: + ms_dict[cast_key] = str(val.numpy(), encoding="utf-8") + elif _cast_type(self.feature_dict[key].dtype).startswith("int"): + ms_dict[cast_key] = int(val.numpy()) + else: + ms_dict[cast_key] = float(val.numpy()) + + def tfrecord_iterator(self): + """Yield a dict with key to be fields in schema, and value to be data.""" + dataset = tf.data.TFRecordDataset(self.source) + dataset = dataset.map(self._parse_record) + iterator = dataset.__iter__() + index_id = 0 + try: + for features in iterator: + ms_dict = {} + index_id = index_id + 1 + for key, val in features.items(): + cast_key = _cast_name(key) + if key in self.scalar_set: + self._get_data_when_scalar_field(ms_dict, cast_key, key, val) + else: + if not isinstance(val.numpy(), np.ndarray) and not isinstance(val.numpy(), list): + raise ValueError("he response key: {}, value: {} from TFRecord should be a ndarray or list." + .format(key, val)) + # list set + ms_dict[cast_key] = \ + np.asarray(val, _cast_string_type_to_np_type(self.mindrecord_schema[cast_key]["type"])) + yield ms_dict + except tf.errors.InvalidArgumentError: + raise ValueError("TFRecord feature_dict parameter error.") + + def transform(self): + """ + Executes transform from TFRecord to MindRecord. + + Returns: + SUCCESS/FAILED, whether successfuly written into MindRecord. + """ + writer = FileWriter(self.destination) + logger.info("Transformed MindRecord schema is: {}, TFRecord feature dict is: {}" + .format(self.mindrecord_schema, self.feature_dict)) + + writer.add_schema(self.mindrecord_schema, "TFRecord to MindRecord") + + tf_iter = self.tfrecord_iterator() + batch_size = 256 + + transform_count = 0 + while True: + data_list = [] + try: + for _ in range(batch_size): + data_list.append(tf_iter.__next__()) + transform_count += 1 + + writer.write_raw_data(data_list) + logger.info("Transformed {} records...".format(transform_count)) + except StopIteration: + if data_list: + writer.write_raw_data(data_list) + logger.info("Transformed {} records...".format(transform_count)) + break + return writer.commit() diff --git a/mindspore/model_zoo/mobilenetV2.py b/mindspore/model_zoo/mobilenetV2.py deleted file mode 100644 index df35c5f369..0000000000 --- a/mindspore/model_zoo/mobilenetV2.py +++ /dev/null @@ -1,291 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""MobileNetV2 model define""" -import numpy as np -import mindspore.nn as nn -from mindspore.ops import operations as P -from mindspore.ops.operations import TensorAdd -from mindspore import Parameter, Tensor -from mindspore.common.initializer import initializer - -__all__ = ['mobilenet_v2'] - - -def _make_divisible(v, divisor, min_value=None): - if min_value is None: - min_value = divisor - new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) - # Make sure that round down does not go down by more than 10%. - if new_v < 0.9 * v: - new_v += divisor - return new_v - - -class GlobalAvgPooling(nn.Cell): - """ - Global avg pooling definition. - - Args: - - Returns: - Tensor, output tensor. - - Examples: - >>> GlobalAvgPooling() - """ - - def __init__(self): - super(GlobalAvgPooling, self).__init__() - self.mean = P.ReduceMean(keep_dims=False) - - def construct(self, x): - x = self.mean(x, (2, 3)) - return x - - -class DepthwiseConv(nn.Cell): - """ - Depthwise Convolution warpper definition. - - Args: - in_planes (int): Input channel. - kernel_size (int): Input kernel size. - stride (int): Stride size. - pad_mode (str): pad mode in (pad, same, valid) - channel_multiplier (int): Output channel multiplier - has_bias (bool): has bias or not - - Returns: - Tensor, output tensor. - - Examples: - >>> DepthwiseConv(16, 3, 1, 'pad', 1, channel_multiplier=1) - """ - - def __init__(self, in_planes, kernel_size, stride, pad_mode, pad, channel_multiplier=1, has_bias=False): - super(DepthwiseConv, self).__init__() - self.has_bias = has_bias - self.in_channels = in_planes - self.channel_multiplier = channel_multiplier - self.out_channels = in_planes * channel_multiplier - self.kernel_size = (kernel_size, kernel_size) - self.depthwise_conv = P.DepthwiseConv2dNative(channel_multiplier=channel_multiplier, - kernel_size=self.kernel_size, - stride=stride, pad_mode=pad_mode, pad=pad) - self.bias_add = P.BiasAdd() - weight_shape = [channel_multiplier, in_planes, *self.kernel_size] - self.weight = Parameter(initializer('ones', weight_shape), name='weight') - - if has_bias: - bias_shape = [channel_multiplier * in_planes] - self.bias = Parameter(initializer('zeros', bias_shape), name='bias') - else: - self.bias = None - - def construct(self, x): - output = self.depthwise_conv(x, self.weight) - if self.has_bias: - output = self.bias_add(output, self.bias) - return output - - -class ConvBNReLU(nn.Cell): - """ - Convolution/Depthwise fused with Batchnorm and ReLU block definition. - - Args: - in_planes (int): Input channel. - out_planes (int): Output channel. - kernel_size (int): Input kernel size. - stride (int): Stride size for the first convolutional layer. Default: 1. - groups (int): channel group. Convolution is 1 while Depthiwse is input channel. Default: 1. - - Returns: - Tensor, output tensor. - - Examples: - >>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1) - """ - - def __init__(self, platform, in_planes, out_planes, kernel_size=3, stride=1, groups=1): - super(ConvBNReLU, self).__init__() - padding = (kernel_size - 1) // 2 - if groups == 1: - conv = nn.Conv2d(in_planes, out_planes, kernel_size, stride, pad_mode='pad', padding=padding) - else: - if platform == "Ascend": - conv = DepthwiseConv(in_planes, kernel_size, stride, pad_mode='pad', pad=padding) - elif platform == "GPU": - conv = nn.Conv2d(in_planes, out_planes, kernel_size, stride, - group=in_planes, pad_mode='pad', padding=padding) - - layers = [conv, nn.BatchNorm2d(out_planes), nn.ReLU6()] - self.features = nn.SequentialCell(layers) - - def construct(self, x): - output = self.features(x) - return output - - -class InvertedResidual(nn.Cell): - """ - Mobilenetv2 residual block definition. - - Args: - inp (int): Input channel. - oup (int): Output channel. - stride (int): Stride size for the first convolutional layer. Default: 1. - expand_ratio (int): expand ration of input channel - - Returns: - Tensor, output tensor. - - Examples: - >>> ResidualBlock(3, 256, 1, 1) - """ - - def __init__(self, platform, inp, oup, stride, expand_ratio): - super(InvertedResidual, self).__init__() - assert stride in [1, 2] - - hidden_dim = int(round(inp * expand_ratio)) - self.use_res_connect = stride == 1 and inp == oup - - layers = [] - if expand_ratio != 1: - layers.append(ConvBNReLU(platform, inp, hidden_dim, kernel_size=1)) - layers.extend([ - # dw - ConvBNReLU(platform, hidden_dim, hidden_dim, - stride=stride, groups=hidden_dim), - # pw-linear - nn.Conv2d(hidden_dim, oup, kernel_size=1, - stride=1, has_bias=False), - nn.BatchNorm2d(oup), - ]) - self.conv = nn.SequentialCell(layers) - self.add = TensorAdd() - self.cast = P.Cast() - - def construct(self, x): - identity = x - x = self.conv(x) - if self.use_res_connect: - return self.add(identity, x) - return x - - -class MobileNetV2(nn.Cell): - """ - MobileNetV2 architecture. - - Args: - class_num (Cell): number of classes. - width_mult (int): Channels multiplier for round to 8/16 and others. Default is 1. - has_dropout (bool): Is dropout used. Default is false - inverted_residual_setting (list): Inverted residual settings. Default is None - round_nearest (list): Channel round to . Default is 8 - Returns: - Tensor, output tensor. - - Examples: - >>> MobileNetV2(num_classes=1000) - """ - - def __init__(self, platform, num_classes=1000, width_mult=1., - has_dropout=False, inverted_residual_setting=None, round_nearest=8): - super(MobileNetV2, self).__init__() - block = InvertedResidual - input_channel = 32 - last_channel = 1280 - # setting of inverted residual blocks - self.cfgs = inverted_residual_setting - if inverted_residual_setting is None: - self.cfgs = [ - # t, c, n, s - [1, 16, 1, 1], - [6, 24, 2, 2], - [6, 32, 3, 2], - [6, 64, 4, 2], - [6, 96, 3, 1], - [6, 160, 3, 2], - [6, 320, 1, 1], - ] - - # building first layer - input_channel = _make_divisible(input_channel * width_mult, round_nearest) - self.out_channels = _make_divisible(last_channel * max(1.0, width_mult), round_nearest) - features = [ConvBNReLU(platform, 3, input_channel, stride=2)] - # building inverted residual blocks - for t, c, n, s in self.cfgs: - output_channel = _make_divisible(c * width_mult, round_nearest) - for i in range(n): - stride = s if i == 0 else 1 - features.append(block(platform, input_channel, output_channel, stride, expand_ratio=t)) - input_channel = output_channel - # building last several layers - features.append(ConvBNReLU(platform, input_channel, self.out_channels, kernel_size=1)) - # make it nn.CellList - self.features = nn.SequentialCell(features) - # mobilenet head - head = ([GlobalAvgPooling(), nn.Dense(self.out_channels, num_classes, has_bias=True)] if not has_dropout else - [GlobalAvgPooling(), nn.Dropout(0.2), nn.Dense(self.out_channels, num_classes, has_bias=True)]) - self.head = nn.SequentialCell(head) - - self._initialize_weights() - - def construct(self, x): - x = self.features(x) - x = self.head(x) - return x - - def _initialize_weights(self): - """ - Initialize weights. - - Args: - - Returns: - None. - - Examples: - >>> _initialize_weights() - """ - for _, m in self.cells_and_names(): - if isinstance(m, (nn.Conv2d, DepthwiseConv)): - n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels - m.weight.set_parameter_data(Tensor(np.random.normal(0, np.sqrt(2. / n), - m.weight.data.shape()).astype("float32"))) - if m.bias is not None: - m.bias.set_parameter_data( - Tensor(np.zeros(m.bias.data.shape(), dtype="float32"))) - elif isinstance(m, nn.BatchNorm2d): - m.gamma.set_parameter_data( - Tensor(np.ones(m.gamma.data.shape(), dtype="float32"))) - m.beta.set_parameter_data( - Tensor(np.zeros(m.beta.data.shape(), dtype="float32"))) - elif isinstance(m, nn.Dense): - m.weight.set_parameter_data(Tensor(np.random.normal( - 0, 0.01, m.weight.data.shape()).astype("float32"))) - if m.bias is not None: - m.bias.set_parameter_data( - Tensor(np.zeros(m.bias.data.shape(), dtype="float32"))) - - -def mobilenet_v2(**kwargs): - """ - Constructs a MobileNet V2 model - """ - return MobileNetV2(**kwargs) diff --git a/mindspore/model_zoo/mobilenetV3.py b/mindspore/model_zoo/mobilenetV3.py deleted file mode 100644 index 820e60493f..0000000000 --- a/mindspore/model_zoo/mobilenetV3.py +++ /dev/null @@ -1,390 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""MobileNetV3 model define""" -from functools import partial -import numpy as np -import mindspore.nn as nn -from mindspore.ops import operations as P -from mindspore import Tensor - - -__all__ = ['mobilenet_v3_large', - 'mobilenet_v3_small'] - - -def _make_divisible(x, divisor=8): - return int(np.ceil(x * 1. / divisor) * divisor) - - -class Activation(nn.Cell): - """ - Activation definition. - - Args: - act_func(string): activation name. - - Returns: - Tensor, output tensor. - """ - - def __init__(self, act_func): - super(Activation, self).__init__() - if act_func == 'relu': - self.act = nn.ReLU() - elif act_func == 'relu6': - self.act = nn.ReLU6() - elif act_func in ('hsigmoid', 'hard_sigmoid'): - self.act = nn.HSigmoid() - elif act_func in ('hswish', 'hard_swish'): - self.act = nn.HSwish() - else: - raise NotImplementedError - - def construct(self, x): - return self.act(x) - - -class GlobalAvgPooling(nn.Cell): - """ - Global avg pooling definition. - - Args: - - Returns: - Tensor, output tensor. - - Examples: - >>> GlobalAvgPooling() - """ - - def __init__(self, keep_dims=False): - super(GlobalAvgPooling, self).__init__() - self.mean = P.ReduceMean(keep_dims=keep_dims) - - def construct(self, x): - x = self.mean(x, (2, 3)) - return x - - -class SE(nn.Cell): - """ - SE warpper definition. - - Args: - num_out (int): Output channel. - ratio (int): middle output ratio. - - Returns: - Tensor, output tensor. - - Examples: - >>> SE(4) - """ - - def __init__(self, num_out, ratio=4): - super(SE, self).__init__() - num_mid = _make_divisible(num_out // ratio) - self.pool = GlobalAvgPooling(keep_dims=True) - self.conv1 = nn.Conv2d(in_channels=num_out, out_channels=num_mid, - kernel_size=1, has_bias=True, pad_mode='pad') - self.act1 = Activation('relu') - self.conv2 = nn.Conv2d(in_channels=num_mid, out_channels=num_out, - kernel_size=1, has_bias=True, pad_mode='pad') - self.act2 = Activation('hsigmoid') - self.mul = P.Mul() - - def construct(self, x): - out = self.pool(x) - out = self.conv1(out) - out = self.act1(out) - out = self.conv2(out) - out = self.act2(out) - out = self.mul(x, out) - return out - - -class Unit(nn.Cell): - """ - Unit warpper definition. - - Args: - num_in (int): Input channel. - num_out (int): Output channel. - kernel_size (int): Input kernel size. - stride (int): Stride size. - padding (int): Padding number. - num_groups (int): Output num group. - use_act (bool): Used activation or not. - act_type (string): Activation type. - - Returns: - Tensor, output tensor. - - Examples: - >>> Unit(3, 3) - """ - - def __init__(self, num_in, num_out, kernel_size=1, stride=1, padding=0, num_groups=1, - use_act=True, act_type='relu'): - super(Unit, self).__init__() - self.conv = nn.Conv2d(in_channels=num_in, - out_channels=num_out, - kernel_size=kernel_size, - stride=stride, - padding=padding, - group=num_groups, - has_bias=False, - pad_mode='pad') - self.bn = nn.BatchNorm2d(num_out) - self.use_act = use_act - self.act = Activation(act_type) if use_act else None - - def construct(self, x): - out = self.conv(x) - out = self.bn(out) - if self.use_act: - out = self.act(out) - return out - - -class ResUnit(nn.Cell): - """ - ResUnit warpper definition. - - Args: - num_in (int): Input channel. - num_mid (int): Middle channel. - num_out (int): Output channel. - kernel_size (int): Input kernel size. - stride (int): Stride size. - act_type (str): Activation type. - use_se (bool): Use SE warpper or not. - - Returns: - Tensor, output tensor. - - Examples: - >>> ResUnit(16, 3, 1, 1) - """ - def __init__(self, num_in, num_mid, num_out, kernel_size, stride=1, act_type='relu', use_se=False): - super(ResUnit, self).__init__() - self.use_se = use_se - self.first_conv = (num_out != num_mid) - self.use_short_cut_conv = True - - if self.first_conv: - self.expand = Unit(num_in, num_mid, kernel_size=1, - stride=1, padding=0, act_type=act_type) - else: - self.expand = None - self.conv1 = Unit(num_mid, num_mid, kernel_size=kernel_size, stride=stride, - padding=self._get_pad(kernel_size), act_type=act_type, num_groups=num_mid) - if use_se: - self.se = SE(num_mid) - self.conv2 = Unit(num_mid, num_out, kernel_size=1, stride=1, - padding=0, act_type=act_type, use_act=False) - if num_in != num_out or stride != 1: - self.use_short_cut_conv = False - self.add = P.TensorAdd() if self.use_short_cut_conv else None - - def construct(self, x): - if self.first_conv: - out = self.expand(x) - else: - out = x - out = self.conv1(out) - if self.use_se: - out = self.se(out) - out = self.conv2(out) - if self.use_short_cut_conv: - out = self.add(x, out) - return out - - def _get_pad(self, kernel_size): - """set the padding number""" - pad = 0 - if kernel_size == 1: - pad = 0 - elif kernel_size == 3: - pad = 1 - elif kernel_size == 5: - pad = 2 - elif kernel_size == 7: - pad = 3 - else: - raise NotImplementedError - return pad - - -class MobileNetV3(nn.Cell): - """ - MobileNetV3 architecture. - - Args: - model_cfgs (Cell): number of classes. - num_classes (int): Output number classes. - multiplier (int): Channels multiplier for round to 8/16 and others. Default is 1. - final_drop (float): Dropout number. - round_nearest (list): Channel round to . Default is 8. - Returns: - Tensor, output tensor. - - Examples: - >>> MobileNetV3(num_classes=1000) - """ - - def __init__(self, model_cfgs, num_classes=1000, multiplier=1., final_drop=0., round_nearest=8): - super(MobileNetV3, self).__init__() - self.cfgs = model_cfgs['cfg'] - self.inplanes = 16 - self.features = [] - first_conv_in_channel = 3 - first_conv_out_channel = _make_divisible(multiplier * self.inplanes) - - self.features.append(nn.Conv2d(in_channels=first_conv_in_channel, - out_channels=first_conv_out_channel, - kernel_size=3, padding=1, stride=2, - has_bias=False, pad_mode='pad')) - self.features.append(nn.BatchNorm2d(first_conv_out_channel)) - self.features.append(Activation('hswish')) - for layer_cfg in self.cfgs: - self.features.append(self._make_layer(kernel_size=layer_cfg[0], - exp_ch=_make_divisible(multiplier * layer_cfg[1]), - out_channel=_make_divisible(multiplier * layer_cfg[2]), - use_se=layer_cfg[3], - act_func=layer_cfg[4], - stride=layer_cfg[5])) - output_channel = _make_divisible(multiplier * model_cfgs["cls_ch_squeeze"]) - self.features.append(nn.Conv2d(in_channels=_make_divisible(multiplier * self.cfgs[-1][2]), - out_channels=output_channel, - kernel_size=1, padding=0, stride=1, - has_bias=False, pad_mode='pad')) - self.features.append(nn.BatchNorm2d(output_channel)) - self.features.append(Activation('hswish')) - self.features.append(GlobalAvgPooling(keep_dims=True)) - self.features.append(nn.Conv2d(in_channels=output_channel, - out_channels=model_cfgs['cls_ch_expand'], - kernel_size=1, padding=0, stride=1, - has_bias=False, pad_mode='pad')) - self.features.append(Activation('hswish')) - if final_drop > 0: - self.features.append((nn.Dropout(final_drop))) - - # make it nn.CellList - self.features = nn.SequentialCell(self.features) - self.output = nn.Conv2d(in_channels=model_cfgs['cls_ch_expand'], - out_channels=num_classes, - kernel_size=1, has_bias=True, pad_mode='pad') - self.squeeze = P.Squeeze(axis=(2, 3)) - - self._initialize_weights() - - def construct(self, x): - x = self.features(x) - x = self.output(x) - x = self.squeeze(x) - return x - - def _make_layer(self, kernel_size, exp_ch, out_channel, use_se, act_func, stride=1): - mid_planes = exp_ch - out_planes = out_channel - #num_in, num_mid, num_out, kernel_size, stride=1, act_type='relu', use_se=False): - layer = ResUnit(self.inplanes, mid_planes, out_planes, - kernel_size, stride=stride, act_type=act_func, use_se=use_se) - self.inplanes = out_planes - return layer - - def _initialize_weights(self): - """ - Initialize weights. - - Args: - - Returns: - None. - - Examples: - >>> _initialize_weights() - """ - for _, m in self.cells_and_names(): - if isinstance(m, (nn.Conv2d)): - n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels - m.weight.set_parameter_data(Tensor(np.random.normal(0, np.sqrt(2. / n), - m.weight.data.shape()).astype("float32"))) - if m.bias is not None: - m.bias.set_parameter_data( - Tensor(np.zeros(m.bias.data.shape(), dtype="float32"))) - elif isinstance(m, nn.BatchNorm2d): - m.gamma.set_parameter_data( - Tensor(np.ones(m.gamma.data.shape(), dtype="float32"))) - m.beta.set_parameter_data( - Tensor(np.zeros(m.beta.data.shape(), dtype="float32"))) - elif isinstance(m, nn.Dense): - m.weight.set_parameter_data(Tensor(np.random.normal( - 0, 0.01, m.weight.data.shape()).astype("float32"))) - if m.bias is not None: - m.bias.set_parameter_data( - Tensor(np.zeros(m.bias.data.shape(), dtype="float32"))) - - -def mobilenet_v3(model_name, **kwargs): - """ - Constructs a MobileNet V2 model - """ - model_cfgs = { - "large": { - "cfg": [ - # k, exp, c, se, nl, s, - [3, 16, 16, False, 'relu', 1], - [3, 64, 24, False, 'relu', 2], - [3, 72, 24, False, 'relu', 1], - [5, 72, 40, True, 'relu', 2], - [5, 120, 40, True, 'relu', 1], - [5, 120, 40, True, 'relu', 1], - [3, 240, 80, False, 'hswish', 2], - [3, 200, 80, False, 'hswish', 1], - [3, 184, 80, False, 'hswish', 1], - [3, 184, 80, False, 'hswish', 1], - [3, 480, 112, True, 'hswish', 1], - [3, 672, 112, True, 'hswish', 1], - [5, 672, 160, True, 'hswish', 2], - [5, 960, 160, True, 'hswish', 1], - [5, 960, 160, True, 'hswish', 1]], - "cls_ch_squeeze": 960, - "cls_ch_expand": 1280, - }, - "small": { - "cfg": [ - # k, exp, c, se, nl, s, - [3, 16, 16, True, 'relu', 2], - [3, 72, 24, False, 'relu', 2], - [3, 88, 24, False, 'relu', 1], - [5, 96, 40, True, 'hswish', 2], - [5, 240, 40, True, 'hswish', 1], - [5, 240, 40, True, 'hswish', 1], - [5, 120, 48, True, 'hswish', 1], - [5, 144, 48, True, 'hswish', 1], - [5, 288, 96, True, 'hswish', 2], - [5, 576, 96, True, 'hswish', 1], - [5, 576, 96, True, 'hswish', 1]], - "cls_ch_squeeze": 576, - "cls_ch_expand": 1280, - } - } - return MobileNetV3(model_cfgs[model_name], **kwargs) - - -mobilenet_v3_large = partial(mobilenet_v3, model_name="large") -mobilenet_v3_small = partial(mobilenet_v3, model_name="small") diff --git a/mindspore/nn/__init__.py b/mindspore/nn/__init__.py index f3f59edcbf..8d5e7d3b0a 100644 --- a/mindspore/nn/__init__.py +++ b/mindspore/nn/__init__.py @@ -18,14 +18,14 @@ Neural Networks Cells. Pre-defined building blocks or computing units to construct Neural Networks. """ from . import layer, loss, optim, metrics, wrap -from .cell import Cell +from .cell import Cell, GraphKernel from .layer import * from .loss import * from .optim import * from .metrics import * from .wrap import * -__all__ = ["Cell"] +__all__ = ["Cell", "GraphKernel"] __all__.extend(layer.__all__) __all__.extend(loss.__all__) __all__.extend(optim.__all__) diff --git a/mindspore/nn/cell.py b/mindspore/nn/cell.py index dd8c4dac27..c046c2e1bf 100755 --- a/mindspore/nn/cell.py +++ b/mindspore/nn/cell.py @@ -19,7 +19,7 @@ from collections import OrderedDict from mindspore import log as logger from .. import context from ..common import dtype as mstype -from ..common.api import _executor +from ..common.api import _executor, _pynative_exec from .._checkparam import _check_str_by_regular from ..common.parameter import Parameter, ParameterTuple from .._c_expression import init_backend @@ -60,6 +60,7 @@ class Cell: self._params = OrderedDict() self._cells = OrderedDict() self.training = False + self.requires_grad = False self.pynative = False self._param_prefix = '' self._auto_prefix = auto_prefix @@ -79,6 +80,15 @@ class Cell: self._backward_hook = None self.enable_hook = False self._bprop_debug = False + self._is_run = False + + @property + def is_run(self): + return self._is_run + + @is_run.setter + def is_run(self, value): + self._is_run = value @property def create_time(self): @@ -176,6 +186,7 @@ class Cell: raise AttributeError("'{}' object has no attribute '{}'.".format(type(self).__name__, name)) def __del__(self): + _pynative_exec.clear("resource") if hasattr(self, "_create_time"): _executor.del_net_res(str(self._create_time)) @@ -192,9 +203,26 @@ class Cell: out = self.compile_and_run(*inputs) return out self.init_parameters_data() - output = self.construct(*inputs) + orign_grad = [] + if self.requires_grad is True: + _pynative_exec.set_grad_flag(True) + _pynative_exec.new_graph(self, *inputs) + for cell in self.cells(): + orign_grad.append(cell.requires_grad) + cell.set_grad(True) + else: + _pynative_exec.set_grad_flag(False) + if self.enable_hook: + output = self._hook_construct(*inputs) + else: + output = self.construct(*inputs) if isinstance(output, Parameter): output = output.data + if self.requires_grad is True: + _pynative_exec.end_graph(self, output, *inputs) + for i, cell in enumerate(self.cells()): + cell.set_grad(orign_grad[i]) + self._is_run = True return output def __setattr__(self, name, value): @@ -227,9 +255,12 @@ class Cell: value.update_parameters_name(name + '.') cells[name] = value elif params and name in params: - if value is not None: + if isinstance(value, Tensor) and self._params[name] is not None: + self._params[name].set_parameter_data(value) + elif value is not None: raise TypeError("Expected type in (Parameter, ParameterTuple), but got {}.".format(type(value))) - self.insert_param_to_cell(name, None) + else: + self.insert_param_to_cell(name, None) elif cells and name in cells: if value is not None: raise TypeError("Expected type is cell, but got {}.".format(type(value))) @@ -278,7 +309,7 @@ class Cell: logger.info("layout dict does not contain the key %s", key) continue if self.parameters_dict()[key].sliced: - logger.info("Param %s is already sliced.", key) + logger.debug("Param %s is already sliced.", key) continue layout = self.parameter_layout_dict[key] new_tensor = _load_tensor_by_layout(tensor, layout) @@ -291,7 +322,7 @@ class Cell: logger.info("layout dict does not contain the key %s", key) continue if params[key].sliced: - logger.info("Param %s is already sliced.", key) + logger.debug("Param %s is already sliced.", key) continue layout = self.parameter_layout_dict[key] new_tensor = _load_tensor_by_layout(tensor, layout) @@ -457,7 +488,7 @@ class Cell: if not auto_parallel_mode: param.init_data() elif param.name not in self.parameter_layout_dict: - logger.info("Layout dict does not contain the key %s.", param.name) + logger.debug("Layout dict does not contain the key %s.", param.name) param.init_data(set_sliced=True) else: layout = self.parameter_layout_dict[param.name] @@ -676,9 +707,6 @@ class Cell: return cells def add_flags(self, **flags): - for x in flags: - if not isinstance(flags[x], bool): - raise TypeError(f"Flags (f{x}) must be bool but {type(flags[x])}.") if not hasattr(self, "_mindspore_flags"): self._mindspore_flags = {} self._mindspore_flags.update({**flags}) @@ -722,6 +750,10 @@ class Cell: self.add_flags_recursive(**flags) return self + def set_grad(self, mode=True): + self.requires_grad = mode + return self + def set_train(self, mode=True): """ Sets the cell to training mode. @@ -762,9 +794,9 @@ class Cell: self.add_flags(auto_parallel=True) self._get_construct_inputs_number_and_name() - def _hook_construct(self, inputs): + def _hook_construct(self, *inputs): """Hook construct method to replace original construct method when hook function enabled.""" - inputs = self._backward_hook(inputs) + inputs = self._backward_hook(*inputs) inputs = self.construct(inputs) outputs = self._backward_hook(inputs) return outputs @@ -784,4 +816,28 @@ class Cell: """ self._backward_hook = HookBackward(fn, self.cls_name + "(" + str(id(self)) + ")") - self._enable_hook = True + self.enable_hook = True + +class GraphKernel(Cell): + """ + Base class for GraphKernel. + + A `GraphKernel` a composite of basic primitives and can be compiled into a fused kernel automaticly when + context.set_context(enable_graph_kernel=True). + + Examples: + >>> class Relu(GraphKernel): + >>> def __init__(self): + >>> super(Relu, self).__init__() + >>> self.max = P.Maximum() + >>> + >>> def construct(self, x): + >>> return self.max(P.Fill()(P.DType()(x), P.Shape()(x), 0.0), x) + """ + def __init__(self, auto_prefix=True, pips=None): + super(GraphKernel, self).__init__(auto_prefix, pips) + class_name = self.__class__.__name__ + self.add_flags(graph_kernel=class_name) + + def construct(self): + raise NotImplementedError diff --git a/mindspore/nn/graph_kernels/__init__.py b/mindspore/nn/graph_kernels/__init__.py new file mode 100644 index 0000000000..8128f2db60 --- /dev/null +++ b/mindspore/nn/graph_kernels/__init__.py @@ -0,0 +1,30 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +GraphKernel. + +GraphKernel provides a unified style to express graph and kernel for user. +It breaks the boundary between graph and kernel and provides more opportunities to do compile optimization. +""" +from .graph_kernels import MaximumGrad, MinimumGrad, AbsGrad, ApplyMomentum, BiasAdd, EqualCount, \ + ReduceMean, ReLU, SoftmaxCrossEntropyWithLogits, LayerNorm, LayerNormXBackprop, \ + LayerNormBetaGammaBackprop, LogSoftmax, Tanh, TanhGrad, Gelu, Softmax, BiasAddGrad, \ + LambUpdateWithLR, LambNextMV + +__all__ = ['MaximumGrad', 'MinimumGrad', 'AbsGrad', 'ApplyMomentum', 'BiasAdd', 'EqualCount', + 'ReduceMean', 'ReLU', 'SoftmaxCrossEntropyWithLogits', 'LayerNorm', + 'LayerNormXBackprop', 'LayerNormBetaGammaBackprop', 'LogSoftmax', 'Tanh', 'TanhGrad', + 'Gelu', 'Softmax', 'BiasAddGrad', 'LambUpdateWithLR', 'LambNextMV' + ] diff --git a/mindspore/nn/graph_kernels/graph_kernels.py b/mindspore/nn/graph_kernels/graph_kernels.py new file mode 100644 index 0000000000..21cc4f8710 --- /dev/null +++ b/mindspore/nn/graph_kernels/graph_kernels.py @@ -0,0 +1,1201 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +Graph kernels. They are composites of basic primitives and can be compiled into +a fused kernel automaticly when context.set_context(enable_graph_kernel=True). +""" +from ...common import dtype as mstype +from ...ops import operations as P +from ...ops.primitive import PrimitiveWithInfer, prim_attr_register +from ...ops.composite import multitype_ops as C +from ...ops.operations import _grad_ops as G +from ..._checkparam import ParamValidator as validator +from ..cell import Cell, GraphKernel + + +class InplaceAssign(PrimitiveWithInfer): + """ + Inplace assign `Parameter` with a value. + + This primitive can only use in graph kernel. + + Inputs: + - **variable** (Parameter) - The `Parameter`. + - **value** (Tensor) - The value to assign. + - **depend** (Tensor) - The dependent tensor to keep this op connected in graph. + + Outputs: + Tensor, has the same type as original `variable`. + + Examples: + >>> def construct(self, x): + >>> val = x - 1.0 + >>> ret = x + 2.0 + >>> return InplaceAssign()(x, val, ret) + >>> x = Tensor([2.0], mindspore.float32) + >>> net = Net() + >>> net(x) + """ + @prim_attr_register + def __init__(self): + self.init_prim_io_names(inputs=['x', 'y', 'z'], outputs=['output']) + + def infer_shape(self, x, y, z): + return z + + def infer_dtype(self, x, y, z): + return z + + def get_bprop(self): + def bprop(x, y, z, out, dout): + return (x, C.zeros_like(y), dout) + return bprop + + +class MaximumGrad(GraphKernel): + """ + + Backprop function for Maximum operator. + + Inputs: + - **x** (Tensor) - The first input tensor of maximum. + - **y** (Tensor) - The second input tensor of maximum. + - **dout** (Tensor) - has the same shape as x and y, next operator's backprop output. + + Outputs: + dx (Tensor): has the same shape as x and y, returns dout element if + `x >= y` returns true at the same position, or returns zero at that + position + dy (Tensor): has the same shape as x and y, dy = dout - dx + + Examples: + >>> layer = MaximumGrad() + >>> output = layer(Tensor([1,2,3], [3, 2, 1], [4, 5, 6])) + """ + + def __init__(self, grad_x=True, grad_y=True): + super(MaximumGrad, self).__init__() + self.grad_x = grad_x + self.grad_y = grad_y + self.select = P.Select() + self.greater_equal = P.GreaterEqual() + self.zeros_like = P.ZerosLike() + self.sub = P.Sub() + + def construct(self, x, y, dout): + cmp_result = self.greater_equal(x, y) + dx = self.select(cmp_result, dout, self.zeros_like(dout)) + dy = dout - dx + + return dx, dy + + +class MinimumGrad(GraphKernel): + """ + Backprop function for Minimum operator. + + Compares x and y elementwise, dout should has the same shape with x and y. + + Inputs: + - **x** (Tensor) - The first input + - **y** (Tensor) - x and y should have same shape + - **dout** (Tensor) - Has the same shape as x and y, next operator's backprop output + + Outputs: + - dx (Tensor) - Has the same shape as x and y, returns dout element if + `x <= y` returns true at the same position, or returns zero at that + position + - dy (Tensor) - Has the same shape as x and y, dy = dout - dx + + Examples: + >>> layer = MinimumGrad() + >>> output = layer(Tensor([1,2,3], [3, 2, 1], [4, 5, 6])) + """ + + def __init__(self, grad_x=True, grad_y=True): + super(MinimumGrad, self).__init__() + self.grad_x = grad_x + self.grad_y = grad_y + self.select = P.Select() + self.less_equal = P.LessEqual() + self.zeros_like = P.ZerosLike() + self.sub = P.Sub() + + def construct(self, x, y, dout): + cmp_result = self.less_equal(x, y) + dx = self.select(cmp_result, dout, self.zeros_like(dout)) + # dy = self.select(cmp_result, self.zeros_like(dout), dout) + dy = dout - dx + + return dx, dy + + +class AbsGrad(GraphKernel): + """ + Abs's backprop function. + + Inputs: + **input_x** (Tensor) - input data of this operator. + **dout** (Tensor) - output of the next operator's backprop function. + + Outputs: + Tensor, has the same shape as input_x. + + Examples: + >>> back = AbsGrad() + >>> output = back(Tensor([1, 2, 3]), Tensor([4, 5, 6])) + """ + + def __init__(self): + super(AbsGrad, self).__init__() + self.mul = P.Mul() + self.abs = P.Abs() + self.add = P.TensorAdd() + self.div = P.RealDiv() + self.round = P.Round() + + def construct(self, input_x, dout): + NUM_MAX = 32768 + mul_max = self.mul(input_x, P.Fill()(P.DType()(input_x), (1,), NUM_MAX)) + res_abs = self.abs(mul_max) + res_div = self.div(mul_max, res_abs) + res_round = self.round(res_div) + res = self.mul(res_round, dout) + return res + + +class ApplyMomentum(GraphKernel): + """ + Update parameter according to the ApplyMomentum algorithm. + + Inputs: + variable (Tensor): mutable tensor var + accumulation (Tensor): mutable tensor accum + learning_rate (float32): learning rate + gradient (float32): The gradient + momentum (float32): Momentum + + Outputs: updated accumulation and variable + """ + + def __init__(self, + use_nesterov=False, + use_locking=False, + gradient_scale=1.0): + super(ApplyMomentum, self).__init__() + self.gradient_scale = validator.check_type('gradient_scale', gradient_scale, [float]) + self.fake_output_assign_1 = InplaceAssign() + self.fake_output_assign_1.add_prim_attr("fake_output", True) + self.fake_output_assign_2 = InplaceAssign() + self.fake_output_assign_2.add_prim_attr("fake_output", True) + + def construct(self, variable, accumulation, learning_rate, gradient, momentum): + gradient = gradient * self.gradient_scale + momt_accumulation = accumulation * momentum + accumulation_inplace = momt_accumulation + gradient + + sum_gradient = accumulation_inplace * learning_rate + variable_inplace = variable - sum_gradient + + accumulation_inplace = self.fake_output_assign_1(accumulation, accumulation_inplace, accumulation_inplace) + variable_inplace = self.fake_output_assign_2(variable, variable_inplace, variable_inplace) + return accumulation_inplace, variable_inplace + + +class BiasAdd(GraphKernel): + """ + Return the sum of x and bias. + + Inputs: + x (Tensor): Tensor of input data. + bias (Tensor): The bias tensor. + + Output: + Tensor, the sum of x and bias. + + Example: + >>> layer = BiasGrad() + >>> output = BiasAdd(Tensor([1, 2, 3]), Tensor([1,])) + """ + + def __init__(self): + super(BiasAdd, self).__init__() + + def construct(self, x, bias): + shape = P.Shape()(x) + if len(shape) == 4: + bias_shape = (1, P.Shape()(bias)[0], 1, 1) # NCHW + else: + bias_shape = (1, P.Shape()(bias)[0]) + res = x + P.Reshape()(bias, bias_shape) + return res + +class BiasAddGrad(GraphKernel): + """ + Computes gradients of BiasAdd. + + Inputs: + x (Tensor): the gradients of bias add output. + + Output: + Tensor, the gradients of bias add input. + + Examples: + >>> dout = Tensor(np.ones(shape=[1, 2, 3, 4]), mindspore.float32) + >>> bias_add_grad = BiasAddGrad() + >>> dx = bias_add_grad(dout) + """ + def __init__(self): + super(BiasAddGrad, self).__init__() + + def construct(self, x): + shape_x = P.Shape()(x) + reduce_axis = [0] + for i in range(2, len(shape_x)): + reduce_axis.append(i) + + res = P.ReduceSum()(x, reduce_axis) + return res + + +class EqualCount(GraphKernel): + """ + Computes the number of the same elements of two tensors. + + The two input tensors should have same shape and data type. + + Inputs: + x (Tensor): the first input tensor. + y (Tensor): the second input tensor. + + Outputs: + Tensor, the type is same as input tensor and size as (1,). + + Examples: + >>> x = Tensor(np.array([1, 2, 3]), mindspore.int32) + >>> y = Tensor(np.array([1, 2, 4]), mindspore.int32) + >>> equal_count = EqualCount() + >>> equal_count(x, y) + """ + def __init__(self): + super(EqualCount, self).__init__() + + def construct(self, x, y): + equal_bool = P.Equal()(P.Cast()(x, mstype.float32), P.Cast()(y, mstype.float32)) + equal_count = P.Cast()(equal_bool, mstype.float16) + + axes = (0,) + res = P.ReduceSum()(equal_count, axes) + res = P.Cast()(res, P.DType()(x)) + return res + + +class ReduceMean(GraphKernel): + """ + Reduce a dimension of a tensor by averaging all elements in the dimension. + + The dtype of the tensor to be reduced is number. + + Args: + keep_dims (bool): If True, keep these reduced dimensions and the length is 1. + If False, don't keep these dimensions. Default : False. + + Inputs: + - **input_x** (Tensor[Number]) - The input tensor. + - **axis** (Union[int, tuple(int), list(int)]) - The dimensions to reduce. Default: (), reduce all dimensions. + Only constant value is allowed. + + Outputs: + Tensor, has the same dtype as the 'input_x'. + + - If axis is (), and keep_dims is false, + the output is a 0-D tensor representing the sum of all elements in the input tensor. + - If axis is int, set as 2, and keep_dims is false, + the shape of output is :math:`(x_1, x_3, ..., x_R)`. + - If axis is tuple(int), set as (2, 3), and keep_dims is false, + the shape of output is :math:`(x_1, x_4, ..., x_R)`. + + Examples: + >>> input_x = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) + >>> op = ReduceMean(keep_dims=True) + >>> output = op(input_x, 1) + """ + + def __init__(self, keep_dims=True): + super(ReduceMean, self).__init__() + self.keep_dims = validator.check_type('keep_dims', keep_dims, [bool]) + self.sum = P.ReduceSum(self.keep_dims) + + def construct(self, x, axis): + shape = P.Shape()(x) + value_num = 1 + for i in axis: + value_num *= shape[i] + + data_sum = self.sum(x, axis) + avg = 1.0 / P.Fill()(P.DType()(x), (1,), value_num) + res = data_sum * avg + return res + + +class ReLU(GraphKernel): + r""" + Computes ReLU(Rectified Linear Unit) of input tensor element-wise. + + It returns :math:`\max(x,\ 0)` element-wise. + + Inputs: + - **input_x** (Tensor) - The input tensor. + + Outputs: + Tensor, with the same type and shape as the `input_x`. + + Examples: + >>> input_x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32) + >>> relu = ReLU() + >>> result = relu(input_x) + [[0, 4.0, 0.0], [2.0, 0.0, 9.0]] + """ + def __init__(self): + super(ReLU, self).__init__() + self.max = P.Maximum() + + def construct(self, x): + return self.max(P.Fill()(P.DType()(x), P.Shape()(x), 0.0), x) + + +class SoftmaxCrossEntropyWithLogits(GraphKernel): + r""" + Gets the softmax cross-entropy value between logits and labels which shoule be one-hot encoding. + + Note: + Sets input logits as `X`, input label as `Y`, output as `loss`. Then, + + .. math:: + p_{ij} = softmax(X_{ij}) = \frac{exp(x_i)}{\sum_{j = 0}^{N-1}\exp(x_j)} + + .. math:: + loss_{ij} = -\sum_j{Y_{ij} * ln(p_{ij})} + + Inputs: + - **logits** (Tensor) - Input logits, with shape :math:`(N, C)`. + - **labels** (Tensor) - Ground truth labels, with shape :math:`(N, C)`. + + Outputs: + Tuple of 2 Tensor, the loss shape is `(N,)`, and the dlogits with the same shape as `logits`. + + Examples: + >>> logits = Tensor([[2, 4, 1, 4, 5], [2, 1, 2, 4, 3]], mindspore.float32) + >>> labels = Tensor([[0, 0, 0, 0, 1], [0, 0, 0, 1, 0]], mindspore.float32) + >>> softmax_cross = SoftmaxCrossEntropyWithLogits() + >>> loss, backprop = softmax_cross(logits, labels) + """ + + def __init__(self): + super(SoftmaxCrossEntropyWithLogits, self).__init__() + self.max = P.ReduceMax(keep_dims=True) + self.sum_keep_dims = P.ReduceSum(keep_dims=True) + + def construct(self, features, labels): + data_max = self.max(features, (1,)) + data_sub = features - data_max + data_exp = P.Exp()(data_sub) + data_sum = self.sum_keep_dims(data_exp, (1,)) + data_div = data_exp / data_sum + data_log_tmp = P.Log()(data_sum) + data_log = data_sub - data_log_tmp + data_mul = labels * data_log + data_muls = P.Neg()(data_mul) + loss = P.ReduceSum()(data_muls, (1,)) + backprop = data_div - labels + return loss, backprop + + def bprop(self, features, labels, out, dout): + grad = out[1] + grad = grad * P.ExpandDims()(dout[0], -1) + return grad, P.ZerosLike()(labels) + + +class LayerNormForward(GraphKernel): + """ Forward function of the LayerNorm operator. """ + def __init__(self, begin_norm_axis=1, begin_params_axis=1): + super(LayerNormForward, self).__init__() + self.begin_norm_axis = validator.check_type('begin_norm_axis', begin_norm_axis, [int]) + self.begin_params_axis = validator.check_type('begin_params_axis', begin_params_axis, [int]) + self.mul = P.Mul() + self.sum_keep_dims = P.ReduceSum(keep_dims=True) + self.sub = P.Sub() + self.add = P.TensorAdd() + self.log = P.Log() + self.exp = P.Exp() + self.eps = P.Eps() + + def construct(self, input_x, input_gamma, input_beta): + shape_x = P.Shape()(input_x) + + # Calculate the scaling ratio of the average + begin_norm_axis = self.begin_norm_axis + if begin_norm_axis < 0: + begin_norm_axis += len(shape_x) + reduce_axis = () + for i in range(len(shape_x)): + if i > begin_norm_axis or i == begin_norm_axis: + reduce_axis = reduce_axis + (i,) + + reduce_elts = 1.0 + for i in reduce_axis: + reduce_elts *= shape_x[i] + mean_cof = 1.0 / reduce_elts + + # Calculate mean + mean_muls = self.mul(input_x, mean_cof) + mean = self.sum_keep_dims(mean_muls, reduce_axis) + + # Calculate variance + variance_sub = self.sub(input_x, mean) + variance_mul = self.mul(variance_sub, variance_sub) + variance_muls = self.mul(variance_mul, mean_cof) + variance = self.sum_keep_dims(variance_muls, reduce_axis) + + # Calculate normalize + normalize_sub = self.sub(input_x, mean) + epsilon = self.eps(input_x) + normalize_add = self.add(variance, epsilon) + normalize_log = self.log(normalize_add) + normalize_log_mul = self.mul(normalize_log, -0.5) + normalize_exp = self.exp(normalize_log_mul) + normalize_mul = self.mul(normalize_sub, normalize_exp) + + # Calculate scale and translate + if self.begin_params_axis == 0: + scale_mul = self.mul(input_gamma, normalize_mul) + res = self.add(scale_mul, input_beta) + else: + scale_mul = self.mul(input_gamma, normalize_mul) + res = self.add(scale_mul, input_beta) + + return res, mean, variance + + +class LayerNormXBackprop(GraphKernel): + r""" + Together with LayerNormBetaGammaBackprop, to supply the backprop + functionality for LayerNorm. + + Note: + Sets input_x as :math:`x_i`, variance as :math:`\sigma^2`, mean as :math:`\mu`, + input_gamma as :math:`\gamma`. Then, + .. math:: + \begin{array}{ll} \\ + \hat{x_i} = \frac{x_i - \mu}{\sqrt{\sigma^2 + \epsilon}} \\ + \frac {\partial L} {\partial x_i} = + \frac{\gamma}{\sqrt{\sigma^2+\epsilon}} + ( \frac{\partial L}{\partial y_i} + - \frac{1}{m} \cdot \frac{\partial L}{\partial \beta} + - \frac{\hat{x_i}}{m} \cdot \frac{\partial L}{\partial \gamma}) + \end{array} + + Inputs: + - **dy**(Tensor) - The first item of the next operator's backprop's output. + - **input_x**(Tensor) - The first input of the forward function of LayerNorm. + - **variance**(Tensor) - The second input of the forward function of LayerNorm. + - **mean**(Tensor) - The third input of the forward function of LayerNorm. + - **input_gamma**(Tensor) - The fourth input of the forward function of LayerNorm. + + Outputs: + Tensor, the output of this operator, will be used as the first item of the result of + LayerNorm's backprop function, has the same shape and data type as 'input_x'. + + Examples: + >>> dy = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) + >>> input_x = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) + >>> variance = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) + >>> mean = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) + >>> input_gamma = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) + >>> op = LayerNormXBackprop(keep_dims=False) + >>> output = op(dy, input_x, variance, mean, input_gamma) + """ + + def __init__(self): + super(LayerNormXBackprop, self).__init__() + self.sum_keep_dims = P.ReduceSum(keep_dims=True) + self.log = P.Log() + self.exp = P.Exp() + self.eps = P.Eps() + + def construct(self, dy, input_x, variance, mean, input_gamma): + shape_x = P.Shape()(input_x) + shape_mean = P.Shape()(mean) + reduce_axis = () + flag = -1 + min_l = 0 + if len(shape_x) > len(shape_mean): + min_l = len(shape_x) + else: + min_l = len(shape_mean) + for i in range(min_l): + if (shape_x[i] != shape_mean[i]) and (flag == -1): + flag = i + if flag != -1: + for i in range(flag, len(shape_x)): + reduce_axis = reduce_axis + (i,) + else: + reduce_axis = reduce_axis + (len(shape_x) - 1,) + mean_num = 1.0 + for i in reduce_axis: + mean_num *= shape_x[i] + pd_xl = input_gamma * dy + epsilon = self.eps(input_x) + var_elta = variance + epsilon + var_elta_log = self.log(var_elta) + var_elta_mul = var_elta_log * -0.5 + var_elta_2 = P.Exp()(var_elta_mul) + pdvar1_mul = var_elta_2 * var_elta_2 + pd_var_1 = pdvar1_mul * var_elta_2 + sub_x_mean = input_x - mean + pdvar_mul1 = pd_xl * sub_x_mean + pdvar_sum = self.sum_keep_dims(pdvar_mul1, reduce_axis) + pdvar_mul3 = pdvar_sum * pd_var_1 + pd_var = pdvar_mul3 * -0.5 + pdmean1_sum = self.sum_keep_dims(pd_xl, reduce_axis) + pdmean1_mul = pdmean1_sum * var_elta_2 + pd_mean_1 = pdmean1_mul * -1.0 + pdmean2_mul1 = sub_x_mean * -2.0 + pdmean2_sum = self.sum_keep_dims(pdmean2_mul1, reduce_axis) + pdmean2_mul3 = pdmean2_sum * (1.0 / mean_num) + pd_mean_2 = pd_var * pdmean2_mul3 + pd_mean = pd_mean_2 + pd_mean_1 + pd_x_1 = var_elta_2 * pd_xl + pdx2_mul = pd_var * sub_x_mean + pd_x_2 = pdx2_mul * (2.0 * (1.0 / mean_num)) + pd_x_3 = pd_mean * (1.0 / mean_num) + pdx_add = pd_x_1 + pd_x_2 + pd_x = pdx_add + pd_x_3 + return pd_x + + +class LayerNormBetaGammaBackprop(GraphKernel): + r""" + Together with LayerNormXBackprop, to supply the backprop functionality for + LayerNorm. + Note: + Sets input_x as :math:`x_i`, variance as :math:`\sigma^2`, mean as :math:`\mu`, + input_gamma as :math:`\gamma`. Then, + .. math:: + \begin{array}{ll} \\ + \hat{x_i} = \frac{x_i - \mu}{\sqrt{\sigma^2 + \epsilon}} \\ + \frac {\partial L} {\partial \beta} = + \sum_{i=1}^m \\frac{\\partial L}{\partial y_i} \\ + \frac {\partial L} {\partial \gamma} = + \sum_{i=1}^m \\frac{\partial L}{\partial y_i} \cdot \hat{x_i} + \end{array} + + Inputs: + - **dy**(Tensor) - The first item of the next operator's backprop's output. + - **input_x**(Tensor) - The first input of the forward function of LayerNorm. + - **variance**(Tensor) - The second input of the forward function of LayerNorm. + - **mean**(Tensor) - The third input of the forward function of LayerNorm. + - **input_gamma**(Tensor) - The fourth input of the forward function of LayerNorm. + + Outputs: + Tuple of 2 Tensor, the backprop outputs. + + - **pd_beta**(Tensor) - The first item of return value of this operator, will be used as + the second item of the LayerNorm's backprop function. + - **pd_gamma**(Tensor) - The second item of return value of this operator, will be used as + the third item of the LayerNorm's backprop function. + + Examples: + >>> dy = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) + >>> input_x = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) + >>> variance = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) + >>> mean = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) + >>> input_gamma = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) + >>> op = LayerNormBetaGammaBackprop(keep_dims=False) + >>> pd_beta, pd_gamma = op(dy, input_x, variance, mean, input_gamma) + """ + def __init__(self): + super(LayerNormBetaGammaBackprop, self).__init__() + self.sum_not_keep_dims = P.ReduceSum(keep_dims=False) + self.log = P.Log() + self.exp = P.Exp() + self.eps = P.Eps() + + def construct(self, dy, input_x, variance, mean, shape_gamma): + shape_x = P.Shape()(input_x) + params_axis = () + + if len(shape_x) != len(shape_gamma): + sub = len(shape_x) - len(shape_gamma) + for i in range(sub): + params_axis = params_axis + (i,) + + pd_beta = self.sum_not_keep_dims(dy, params_axis) + epsilon = self.eps(input_x) + var_elta = variance + epsilon + var_elta_log = self.log(var_elta) + var_elta_mul = var_elta_log * -0.5 + var_elta_2 = P.Exp()(var_elta_mul) + sub_x_mean = input_x - mean + var_elta_2_cast = var_elta_2 + xl_mul = var_elta_2_cast * sub_x_mean + pdga_mul = dy * xl_mul + pd_gamma = self.sum_not_keep_dims(pdga_mul, params_axis) + return pd_beta, pd_gamma + + +class LogSoftmax(GraphKernel): + r""" + Log Softmax activation function. + + Applies the Log Softmax function to the input tensor on the specified axis. + Suppose a slice along the given aixs :math:`x` then for each element :math:`x_i` + the Log Softmax function is shown as follows: + + .. math:: + \text{output}(x_i) = \log \left(\frac{exp(x_i)} {\sum_{j = 0}^{N-1}\exp(x_j)}\right), + + where :math:`N` is the length of the Tensor. + + Args: + axis (int): The axis to do the Log softmax operation. Default: -1. + + Inputs: + logits (Tensor): The input of Log Softmax. + + Outputs: + Tensor, with the same type and shape as the logits. + + Examples: + >>> input_x = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32) + >>> log_softmax = LogSoftmax() + >>> log_softmax(input_x) + [-4.4519143, -3.4519143, -2.4519143, -1.4519144, -0.4519144] + """ + + def __init__(self, axis=-1): + super(LogSoftmax, self).__init__() + self.axis = validator.check_type('axis', axis, [int]) + self.max_keep_dims = P.ReduceMax(keep_dims=True) + self.sub = P.Sub() + self.exp = P.Exp() + self.sum_keep_dims = P.ReduceSum(keep_dims=True) + self.log = P.Log() + self.mul = P.Mul() + + def construct(self, input_x): + data_max = self.max_keep_dims(input_x, (self.axis,)) + data_sub = self.sub(input_x, data_max) + + data_exp = self.exp(data_sub) + data_sum = self.sum_keep_dims(data_exp, (self.axis,)) + data_log = self.log(data_sum) + + res = self.sub(data_sub, data_log) + return res + + def bprop(self, input_x, out, dout): + input_x = out + input_dy = dout + + data_exp = self.exp(input_x) + data_sum = self.sum_keep_dims(input_dy, (self.axis,)) + data_softmax = self.mul(data_exp, data_sum) + + res = self.sub(input_dy, data_softmax) + return (res,) + + +class Tanh(GraphKernel): + r""" + Tanh activation function. + + Computes hyperbolic tangent of input element-wise. The Tanh function is defined as: + + .. math:: + tanh(x_i) = \frac{\exp(x_i) - \exp(-x_i)}{\exp(x_i) + \exp(-x_i)} = \frac{\exp(2x_i) - 1}{\exp(2x_i) + 1}, + + where :math:`x_i` is an element of the input Tensor. + + Inputs: + - **input_x** (Tensor) - The input of Tanh. + + Outputs: + Tensor, with the same type and shape as the input_x. + + Examples: + >>> input_x = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32) + >>> tanh = Tanh() + >>> tanh(input_x) + [0.7615941, 0.9640276, 0.9950548, 0.9993293, 0.99990916] + """ + def __init__(self): + super(Tanh, self).__init__() + self.abs = P.Abs() + self.add = P.TensorAdd() + self.div = P.RealDiv() + self.mul = P.Mul() + self.mul_fp16 = P.Mul() + self.mul_fp16.add_prim_attr("output_precision", "float16") + self.exp = P.Exp() + + def construct(self, input_x): + input_abs = self.abs(input_x) + sign_flag = self.div(input_x, input_abs) + sign_flag_neg = self.mul(sign_flag, -1.0) + + power_val = self.mul(input_abs, -2.0) + exp_val = self.exp(power_val) + up_val = self.add(exp_val, -1.0) + down_val = self.add(exp_val, 1.0) + + div_val = self.div(up_val, down_val) + res = self.mul(sign_flag_neg, div_val) + return res + + def bprop(self, input_x, out, dout): + input_y = out + input_dy = dout + + data_square = self.mul(input_y, input_y) + data_mul = self.mul(data_square, -1.0) + anuminate = self.add(data_mul, 1.0) + res = self.mul_fp16(anuminate, input_dy) + + return (res,) + +class TanhGrad(GraphKernel): + """ + Backprop function of Tanh + + Mathematical calculating: + result = Tanh(out) + result = 1 - result * result + result = result * dout + Inputs: + out (Tensor): Tanh's output + dout (Tensor): next layer's backward function's output, has same shape as out + + Outputs: + result (Tensor): result of (1 - tanh(out)^2) * dout + + Examples: + >>> x_np = np.random.randn(5, 3, 6).astype(np.float16) + >>> dy_np = np.random.randn(5, 3, 6).astype(np.float16) + >>> x_ms = Tensor(x_np) + >>> dy_ms = Tensor(dy_np) + >>> tanh_grad = TanhGrad() + >>> out = tanh_grad(x_np, dy_np) + """ + def __init__(self): + super(TanhGrad, self).__init__() + self.add = P.TensorAdd() + self.mul = P.Mul() + self.mul_fp16 = P.Mul() + self.mul_fp16.add_prim_attr("output_precision", "float16") + + def construct(self, out, dout): + input_y = out + input_dy = dout + + data_square = self.mul(input_y, input_y) + data_mul = self.mul(data_square, -1.0) + anuminate = self.add(data_mul, 1.0) + res = self.mul_fp16(anuminate, input_dy) + + return res + +class Gelu(GraphKernel): + r""" + Gaussian Error Linear Units activation function. + + GeLU is described in the paper `Gaussian Error Linear Units (GELUs) `_. + And also please refer to `BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. + `_. + + Defined as follows: + + .. math:: + \text{output} = 0.5 * x * (1 + erf(x / \sqrt{2})), + + where :math:`erf` is the "Gauss error function" . + + Inputs: + - **input_x** (Tensor) - Input to compute the Gelu. + + Outputs: + Tensor, with the same type and shape as input. + + Examples: + >>> tensor = Tensor(np.array([1.0, 2.0, 3.0]), mindspore.float32) + >>> gelu = Gelu() + >>> result = gelu(tensor) + """ + + def __init__(self): + super(Gelu, self).__init__() + self.add = P.TensorAdd() + self.abs = P.Abs() + self.exp = P.Exp() + self.neg = P.Neg() + self.minimum = P.Minimum() + self.div = P.RealDiv() + self.mul = P.Mul() + self.CSVALUE = 0.044715 + self.CSVALUE_A = 1.59576912 + self.CSVALUE_5 = 0.3989422804 + self.CSVALUE_3B = 0.2140644488 + + def construct(self, input_x): + def _tanh_parameter_compute(data_x): + """ + compute the parameter of tanh: + return: result equal (x+0.044715*tf.pow(x,3)) + """ + mul_0 = self.mul(data_x, data_x) + pow_0 = self.mul(mul_0, data_x) + mul_1 = self.mul(pow_0, self.CSVALUE) + result = self.add(data_x, mul_1) + + return result + + tanh_parameter = _tanh_parameter_compute(input_x) + mul_0 = self.mul(tanh_parameter, 1.5957691) + + mul_0_min = self.minimum(mul_0, 0.0) + right_mul = self.exp(mul_0_min) + + mul_0_abs = self.abs(mul_0) + mul_0_abs_neg = self.mul(mul_0_abs, -1.0) + mul_0_abs_neg_exp = self.exp(mul_0_abs_neg) + + mul_0_abs_neg_exp_add = self.add(mul_0_abs_neg_exp, 1.0) + left_mul = self.div(input_x, mul_0_abs_neg_exp_add) + + result = self.mul(left_mul, right_mul) + return result + + def bprop(self, input_x, out, dout): + """ register backprop function for Gelu """ + data_x = input_x + data_gelu = out + data_dy = dout + + def _math_four_compute(data_x): + """ + return: math_four equal 2*(np(sqrt(2 / np.pi)*(x + 0.044715*tf.pow(x, 3))) + """ + datax_pow = data_x * data_x * data_x + datax_muls_c = self.mul(datax_pow, self.CSVALUE) + datax_addx = self.add(datax_muls_c, data_x) + datax_muls_s = self.mul(datax_addx, self.CSVALUE_A) + + return datax_muls_s + + # common part + math_four = _math_four_compute(data_x) + math_four_abs = self.abs(math_four) + math_four_abs_neg = self.mul(math_four_abs, -1.0) + math_four_abs_neg_exp = self.exp(math_four_abs_neg) + math_four_min = self.minimum(math_four, 0.0) + + # dividend part + datax_pow = self.mul(data_x, data_x) + datax_pow_mul = self.mul(datax_pow, self.CSVALUE_3B) + datax_pow_mul_add = self.add(datax_pow_mul, self.CSVALUE_A) + data_gelu_mul = self.mul(data_gelu, datax_pow_mul_add) + math_four_min_2 = self.mul(math_four_min, 2.0) + div_right = self.mul(data_gelu_mul, math_four_abs_neg_exp) + div_left = self.exp(math_four_min_2) + dividend = self.add(div_left, div_right) + + # divisor part + div_0 = self.add(math_four_abs_neg_exp, 1.0) + div_1 = self.exp(math_four_min) + divisor = self.mul(div_1, div_0) + res_grad = self.div(dividend, divisor) + + result = self.mul(res_grad, data_dy) + return (result,) + + +class Softmax(GraphKernel): + """ + Operator Softmax + .. math: `exp(x-max(x)) / sum(exp(x-max(x)))` + + Args: + axis (int, tuple): Axis along which the softmax normalization is applied + + Inputs: + x (Tensor): input data for softmax + + Outputs: + output (Tensor): a tensor with the same shape of the input + + Examples: + >>> layer = Softmax(1) + >>> x = Tensor(np.array([1.2, 2.1], [2.2, 3.2]), mindspore.float32) + >>> output = layer(x) + """ + + def __init__(self, axis): + super(Softmax, self).__init__() + validator.check_type("axis", axis, [int, tuple]) + if isinstance(axis, int): + self.axis = (axis,) + else: + self.axis = axis + for item in self.axis: + validator.check_type("item of axis", item, [int]) + self.max = P.ReduceMax(keep_dims=True) + self.sub = P.Sub() + self.exp = P.Exp() + self.sum = P.ReduceSum(keep_dims=True) + self.mul = P.Mul() + + def construct(self, x): + max_x = self.max(x, self.axis) + data_sub = self.sub(x, max_x) + data_exp = self.exp(data_sub) + data_expsum = self.sum(data_exp, self.axis) + output = data_exp / data_expsum + return output + + def bprop(self, x, out, dout): + mul_res = self.mul(dout, out) + sum_res = self.sum(mul_res, self.axis) + sub_res = self.sub(dout, sum_res) + res = self.mul(sub_res, out) + return (res,) + + +class LayerNorm(Cell): + r""" + Applies Layer Normalization over a mini-batch of inputs. + + Layer normalization is widely used in recurrent neural networks. It applies + normalization over a mini-batch of inputs for each single training case as described + in the paper `Layer Normalization `_. Unlike batch + normalization, layer normalization performs exactly the same computation at training and + testing times. It can be described using the following formula. It is applied across all channels + and pixel but only one batch size. + + .. math:: + y = \frac{x - \mathrm{E}[x]}{\sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta + + Args: + normalized_shape (Union(tuple[int], list[int]): The normalization is performed over axis + `begin_norm_axis ... R - 1`. + begin_norm_axis (int): It first normalization dimension: normalization will be performed along dimensions + `begin_norm_axis: rank(inputs)`, the value should be in [-1, rank(input)). Default: -1. + begin_params_axis (int): The first parameter(beta, gamma)dimension: scale and centering parameters + will have dimensions `begin_params_axis: rank(inputs)` and will be broadcast with + the normalized inputs accordingly, the value should be in [-1, rank(input)). Default: -1. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform', + 'he_uniform', etc. Default: 'ones'. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform', + 'he_uniform', etc. Default: 'zeros'. + + Inputs: + - **input_x** (Tensor) - The shape of 'input_x' is :math:`(x_1, x_2, ..., x_R)`, + and `input_shape[begin_norm_axis:]` is equal to `normalized_shape`. + + Outputs: + Tensor, the normalized and scaled offset tensor, has the same shape and data type as the `input_x`. + + Examples: + >>> x = Tensor(np.ones([20, 5, 10, 10]), mindspore.float32) + >>> shape1 = x.shape()[1:] + >>> m = G.LayerNorm(shape1, begin_norm_axis=1, begin_params_axis=1) + >>> m(x) + """ + + def __init__(self, + begin_norm_axis=-1, + begin_params_axis=-1 + ): + super(LayerNorm, self).__init__() + self.begin_norm_axis = begin_norm_axis + self.begin_params_axis = begin_params_axis + self.layer_norm = LayerNormForward(begin_norm_axis, begin_params_axis) + self.layer_norm_x_grad = LayerNormXBackprop() + self.layer_norm_beta_gamma = LayerNormBetaGammaBackprop() + self.layer_norm_grad = G.LayerNormGrad(self.begin_norm_axis, self.begin_params_axis) + + def construct(self, input_x, input_gamma, input_beta): + return self.layer_norm(input_x, input_gamma, input_beta) + + # case 1 + def bprop(self, input_x, input_gamma, input_beta, out, dout): + dx, d_gamma, d_beta = self.layer_norm_grad(input_x, dout[0], out[2], dout[1], input_gamma) + return dx, d_gamma, d_beta + + +class LambUpdateWithLR(GraphKernel): + r""" + Part of Lamb optimizer. + + .. math:: + s_1 = select(i_1 \gt y_g, select(i_0 \gt y_g, \frac{i_1}{i_2}, se), se) + i_5 = i_5 - max(min(s_1, y_m), y_g) \times i_3 \times i_4 + + Inputs: + - **input0** (Tensor) - The first tensor to be computed. + - **input1** (Tensor) - The second tensor to be computed. + - **input2** (Tensor) - The third tensor to be computed. + - **input3** (Tensor) - The fourth tensor to be computed. + - **input4** (Tensor) - The fifth tensor to be computed. + - **input5** (Tensor) - The sixth tensor to be computed. It will be updated by result. + - **greater_y** (Tensor) - The seventh tensor to be computed. + - **select_e** (Tensor) - The eighth tensor to be computed. + - **minimum_y** (Tensor) - The ninth tensor to be computed. + + Outputs: + A fake output tensor. + + Examples: + >>> lamb_update = LambUpdateWithLR() + >>> i0 = np.random.normal(0, 1, [1, 16]).astype(np.float32) + >>> i1 = np.random.normal(0, 1, [1]).astype(np.float32) + >>> i2 = np.random.normal(0, 1, [1]).astype(np.float32) + >>> i3 = np.random.normal(0, 1, [1]).astype(np.float32) + >>> i4 = np.random.normal(0, 1, [1, 16]).astype(np.float32) + >>> i5 = np.random.normal(0, 1, [1, 16]).astype(np.float32) + >>> yg = np.random.normal(0, 1, [1]).astype(np.float32) + >>> se = np.random.normal(0, 1, [1]).astype(np.float32) + >>> ym = np.random.normal(0, 1, [1]).astype(np.float32) + >>> lamb_update(i0, i1, i2, i3, i4, i5, yg, se, ym) + + """ + + def __init__(self): + super(LambUpdateWithLR, self).__init__() + self.greater = P.Greater() + self.select = P.Select() + self.div = P.RealDiv() + self.min = P.Minimum() + self.max = P.Maximum() + self.mul = P.Mul() + self.sub = P.Sub() + self.fake_output_assign = InplaceAssign() + self.fake_output_assign.add_prim_attr("fake_output", True) + + def construct(self, input0, input1, input2, input3, input4, input5, greater_y, select_e, minimum_y): + greater0 = self.greater(input0, greater_y) + greater1 = self.greater(input1, greater_y) + real_div0 = self.div(input1, input2) + select0 = self.select(greater0, real_div0, select_e) + select1 = self.select(greater1, select0, select_e) + min0 = self.min(select1, minimum_y) + max0 = self.max(min0, greater_y) + mul0 = self.mul(max0, input3) + mul1 = self.mul(mul0, input4) + sub0 = self.sub(input5, mul1) + sub0 = self.fake_output_assign(input5, sub0, sub0) + return sub0 + +class LambNextMV(GraphKernel): + r""" + Part of Lamb optimizer. + + .. math:: + rd_0 = \frac{i_8 \times i_5 + i_9 \times i_4}{i6} + rd_1 = \frac{x_0 \times i_2 + x_1 \times i_1}{i3} + y_2 = \frac{rd_0}{\sqrt{rd_1 + x3}} + x_2 \times i_7 + y_3 = \frac{rd_0}{\sqrt{rd_1} + x3} + i5 = i_8 \times i_5 + i_9 \times i_4 + i2 = x_0 \times i_2 + x_1 \times i_1 + + Inputs: + - **inputs1** (Tensor) - The first input tensor to be computed. + - **inputs2** (Tensor) - The second input tensor to be computed. It will be updated by result. + - **inputs3** (Tensor) - The third input tensor to be computed. + - **inputs4** (Tensor) - The fourth input tensor to be computed. + - **inputs5** (Tensor) - The fifth input tensor to be computed. It will be updated by result. + - **inputs6** (Tensor) - The sixth input tensor to be computed. + - **inputs7** (Tensor) - The seventh input tensor to be computed. + - **inputs8** (Tensor) - The eighth input tensor to be computed. + - **inputs9** (Tensor) - The ninth input tensor to be computed. + - **inputsx0** (Tensor) - The tenth input tensor to be computed. + - **inputsx1** (Tensor) - The eleventh input tensor to be computed. + - **inputsx2** (Tensor) - The twelfth input tensor to be computed. + - **inputsx3** (Tensor) - The thirteenth input tensor to be computed. + + Outputs: + Tuple of 2 Tensor. + + - **add3** (Tensor) - The shape is same as the shape after broadcasting, and the data type is + the one with high precision or high digits among the inputs. + - **realdiv4** (Tensor) - The shape is same as the shape after broadcasting, and the data type is + the one with high precision or high digits among the inputs. + + Examples: + >>> lamb_next_mv = LambNextMV() + >>> i1 = Tensor(np.random.normal(0, 1, [1, 16]).astype(np.float32)) + >>> i2 = Tensor(np.random.normal(0, 1, [1, 16]).astype(np.float32)) + >>> i3 = Tensor(np.random.normal(0, 1, [1, 16]).astype(np.float32)) + >>> i4 = Tensor(np.random.normal(0, 1, [1, 16]).astype(np.float32)) + >>> i5 = Tensor(np.random.normal(0, 1, [1, 16]).astype(np.float32)) + >>> i6 = Tensor(np.random.normal(0, 1, [1, 16]).astype(np.float32)) + >>> i7 = Tensor(np.random.normal(0, 1, [1, 16]).astype(np.float32)) + >>> i8 = Tensor(np.random.normal(0, 1, [1, 16]).astype(np.float32)) + >>> i9 = Tensor(np.random.normal(0, 1, [1, 16]).astype(np.float32)) + >>> x0 = Tensor(np.random.normal(0, 1, [1, 16]).astype(np.float32)) + >>> x1 = Tensor(np.random.normal(0, 1, [1, 16]).astype(np.float32)) + >>> x2 = Tensor(np.random.normal(0, 1, [1, 16]).astype(np.float32)) + >>> x3 = Tensor(np.ones([1, 16]).astype(np.float32) * 1e-6) + >>> lamb_next_mv(i1, i2, i3, i4, i5, i6, i7, i8, i9, x0, x1, x2, x3) + + """ + + def __init__(self): + super(LambNextMV, self).__init__() + self.mul = P.Mul() + self.add = P.TensorAdd() + self.div = P.RealDiv() + self.sqrt = P.Sqrt() + self.rsqrt = P.Rsqrt() + self.fake_output_assign_1 = InplaceAssign() + self.fake_output_assign_1.add_prim_attr("fake_output", False) + self.fake_output_assign_2 = InplaceAssign() + self.fake_output_assign_2.add_prim_attr("fake_output", False) + + + def construct(self, input1, input2, input3, input4, input5, input6, input7, + input8, input9, inputx0, inputx1, inputx2, inputx3): + mul3 = self.mul(inputx1, input1) + mul2 = self.mul(inputx0, input2) + add1 = self.add(mul2, mul3) + realdiv1 = self.div(add1, input3) + add2 = self.add(realdiv1, inputx3) + sqrt0 = self.rsqrt(add2) + sqrt1 = self.sqrt(realdiv1) + add4 = self.add(sqrt1, inputx3) + mul1 = self.mul(input9, input4) + mul0 = self.mul(input8, input5) + add0 = self.add(mul0, mul1) + realdiv0 = self.div(add0, input6) + realdiv2 = self.mul(realdiv0, sqrt0) + realdiv4 = self.div(realdiv0, add4) + mul4 = self.mul(inputx2, input7) + add3 = self.add(realdiv2, mul4) + + add3 = self.fake_output_assign_1(input5, add0, add3) + add3 = self.fake_output_assign_2(input2, add1, add3) + + return add3, realdiv4 diff --git a/mindspore/nn/layer/activation.py b/mindspore/nn/layer/activation.py index 3a754e4c03..14a1aa8554 100644 --- a/mindspore/nn/layer/activation.py +++ b/mindspore/nn/layer/activation.py @@ -20,8 +20,10 @@ from mindspore.common.parameter import Parameter from mindspore.common.initializer import initializer from mindspore.common.tensor import Tensor from mindspore._extends import cell_attr_register +from mindspore.ops import _selected_ops from ..cell import Cell + __all__ = ['Softmax', 'LogSoftmax', 'ReLU', @@ -73,7 +75,7 @@ class Softmax(Cell): def __init__(self, axis=-1): super(Softmax, self).__init__() - self.softmax = P.Softmax(axis) + self.softmax = _selected_ops.Softmax(axis) def construct(self, x): return self.softmax(x) @@ -110,7 +112,7 @@ class LogSoftmax(Cell): def __init__(self, axis=-1): super(LogSoftmax, self).__init__() - self.log_softmax = P.LogSoftmax(axis) + self.log_softmax = _selected_ops.LogSoftmax(axis) def construct(self, x): return self.log_softmax(x) @@ -249,11 +251,11 @@ class LeakyReLU(Cell): self.alpha = alpha def construct(self, x): - alpha = P.Cast()(F.scalar_to_array(self.alpha), P.DType()(x)) + alpha_array = P.Cast()(F.scalar_to_array(self.alpha), P.DType()(x)) if self.alpha <= 1: - out = P.Maximum()(alpha * x, x) + out = P.Maximum()(alpha_array * x, x) else: - out = P.Minimum()(alpha * x, x) + out = P.Minimum()(alpha_array * x, x) return out @@ -286,7 +288,7 @@ class Tanh(Cell): def __init__(self): super(Tanh, self).__init__() - self.tanh = P.Tanh() + self.tanh = _selected_ops.Tanh() def construct(self, x): return self.tanh(x) @@ -318,7 +320,7 @@ class GELU(Cell): def __init__(self): super(GELU, self).__init__() - self.gelu = P.Gelu() + self.gelu = _selected_ops.Gelu() def construct(self, x): return self.gelu(x) @@ -378,7 +380,7 @@ class PReLU(Cell): Tensor, with the same type and shape as the `input_data`. Examples: - >>> input_x = Tensor(np.array([-1, -2, 0, 2, 1]), mindspore.float32) + >>> input_x = Tensor(np.random.rand(1, 10, 4, 4), mindspore.float32) >>> prelu = nn.PReLU() >>> prelu(input_x) @@ -503,6 +505,7 @@ class LogSigmoid(Cell): [-3.1326166e-01, -1.2692806e-01, -4.8587345e-02] """ + def __init__(self): super(LogSigmoid, self).__init__() self.mul = P.Mul() @@ -549,9 +552,9 @@ def get_activation(name): Examples: >>> sigmoid = nn.get_activation('sigmoid') """ - if not name: + if name is None: return None if name not in _activation: - raise KeyError("Unknown activation type") + raise KeyError(f"Unknown activation type '{name}'") return _activation[name]() diff --git a/mindspore/nn/layer/basic.py b/mindspore/nn/layer/basic.py index 8f4e468e0b..b1d5af48c9 100644 --- a/mindspore/nn/layer/basic.py +++ b/mindspore/nn/layer/basic.py @@ -22,15 +22,21 @@ from mindspore.ops import operations as P from mindspore.ops import functional as F from mindspore.ops.functional import identity from mindspore.ops.operations import _inner_ops as inner +from mindspore.ops.primitive import constexpr from mindspore.common.parameter import Parameter from mindspore._extends import cell_attr_register from mindspore.common.api import ms_function from mindspore import context +from mindspore.ops import _selected_ops from ..cell import Cell from .activation import get_activation from ..._checkparam import Validator as validator +from ..._checkparam import Rel + + +__all__ = ['Dropout', 'Flatten', 'Dense', 'ClipByNorm', 'Norm', 'OneHot', 'Pad', 'Unfold', + 'MatrixDiag', 'MatrixDiagPart', 'MatrixSetDiag'] -__all__ = ['Dropout', 'Flatten', 'Dense', 'ClipByNorm', 'Norm', 'OneHot', 'Pad', 'Unfold'] class Dropout(Cell): r""" @@ -73,6 +79,7 @@ class Dropout(Cell): >>> net = nn.Dropout(keep_prob=0.8) >>> net(x) """ + def __init__(self, keep_prob=0.5, seed0=0, seed1=0, dtype=mstype.float32): super(Dropout, self).__init__() if keep_prob <= 0 or keep_prob > 1: @@ -130,12 +137,13 @@ class Flatten(Cell): Examples: >>> net = nn.Flatten() >>> input = Tensor(np.array([[[1.2, 1.2], [2.1, 2.1]], [[2.2, 2.2], [3.2, 3.2]]]), mindspore.float32) - >>> input.shape() + >>> input.shape (2, 2, 2) >>> net(input) [[1.2 1.2 2.1 2.1] [2.2 2.2 3.2 3.2]] """ + def __init__(self): super(Flatten, self).__init__() @@ -197,21 +205,21 @@ class Dense(Cell): self.has_bias = check_bool(has_bias) if isinstance(weight_init, Tensor): - if weight_init.dim() != 2 or weight_init.shape()[0] != out_channels or \ - weight_init.shape()[1] != in_channels: + if weight_init.dim() != 2 or weight_init.shape[0] != out_channels or \ + weight_init.shape[1] != in_channels: raise ValueError("weight_init shape error") self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight") if self.has_bias: if isinstance(bias_init, Tensor): - if bias_init.dim() != 1 or bias_init.shape()[0] != out_channels: + if bias_init.dim() != 1 or bias_init.shape[0] != out_channels: raise ValueError("bias_init shape error") self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias") self.matmul = P.MatMul(transpose_b=True) - self.bias_add = P.BiasAdd() + self.bias_add = _selected_ops.BiasAdd() self.activation = get_activation(activation) self.activation_flag = self.activation is not None @@ -236,6 +244,13 @@ class Dense(Cell): return str_info +@constexpr +def _is_equal_one(x): + if x is None: + return False + return bool(x.asnumpy().mean() == 1.0) + + class ClipByNorm(Cell): r""" Clips tensor values to a maximum :math:`L_2`-norm. @@ -263,6 +278,7 @@ class ClipByNorm(Cell): >>> net(input, clip_norm) """ + def __init__(self): super(ClipByNorm, self).__init__() self.reduce_sum = P.ReduceSum(keep_dims=True) @@ -290,7 +306,11 @@ class ClipByNorm(Cell): l2sum_safe = self.select_(cond, l2sum, self.cast(ones_, self.dtype(l2sum))) l2norm = self.select_(cond, self.sqrt(l2sum_safe), l2sum) - intermediate = x * clip_norm + if _is_equal_one(clip_norm): + intermediate = x + else: + intermediate = x * clip_norm + max_norm = self.max_op(l2norm, clip_norm) values_clip = self.cast(intermediate, mstype.float32) / self.expand_dims(max_norm, -1) values_clip = self.reshape(values_clip, self.shape(x)) @@ -319,6 +339,7 @@ class Norm(Cell): >>> input = Tensor(np.random.randint(0, 10, [4, 16]), mindspore.float32) >>> net(input) """ + def __init__(self, axis=(), keep_dims=False): super(Norm, self).__init__() self.axis = axis @@ -381,6 +402,7 @@ class OneHot(Cell): [0. 1.] [0. 0.]]] """ + def __init__(self, axis=-1, depth=1, on_value=1.0, off_value=0.0, dtype=mstype.float32): super(OneHot, self).__init__() self.onehot = P.OneHot(axis) @@ -495,6 +517,7 @@ class Unfold(Cell): Tensor ([[[[1, 1] [1, 1]] [[1, 1], [1, 1]] [[1, 1] [1, 1]], [[1, 1], [1, 1]]]], shape=(1, 4, 2, 2), dtype=mstype.float16) """ + def __init__(self, ksizes, strides, rates, padding="valid"): super(Unfold, self).__init__() self.extract_image_patches = inner.ExtractImagePatches(ksizes, strides, rates, padding) @@ -507,3 +530,112 @@ class Unfold(Cell): ret = self.extract_image_patches(x_transpose) ret_transpose = self.transpose(ret, self.format_NCHW) return ret_transpose + + +@constexpr +def _get_matrix_diag_assist(x_shape, x_dtype): + validator.check_integer("x rank", len(x_shape), 1, Rel.GE, "_get_matrix_diag_assist") + base_eye = np.eye(x_shape[-1], x_shape[-1]).reshape(-1) + assist = np.tile(base_eye, x_shape[:-1]).reshape(x_shape + (x_shape[-1],)) + return Tensor(assist, x_dtype) + + +@constexpr +def _get_matrix_diag_part_assist(x_shape, x_dtype): + validator.check_integer("x rank", len(x_shape), 2, Rel.GE, "_get_matrix_diag_part_assist") + base_eye = np.eye(x_shape[-2], x_shape[-1]).reshape(-1) + assist = np.tile(base_eye, x_shape[:-2]).reshape(x_shape) + return Tensor(assist, x_dtype) + + +class MatrixDiag(Cell): + """ + Returns a batched diagonal tensor with a given batched diagonal values. + + Inputs: + - **x** (Tensor) - The diagonal values. It can be of the following data types: + float32, float16, int32, int8, uint8. + + Outputs: + Tensor, same type as input `x`. The shape should be x.shape + (x.shape[-1], ). + + Examples: + >>> x = Tensor(np.array([1, -1]), mstype.float32) + >>> matrix_diag = nn.MatrixDiag() + >>> result = matrix_diag(x) + [[1. 0.] + [0. -1.]] + """ + def __init__(self): + super(MatrixDiag, self).__init__() + self.matrix_diag = inner.MatrixDiag() + self.dtype = P.DType() + + def construct(self, input_x): + x_shape = F.shape(input_x) + x_dtype = self.dtype(input_x) + assist = _get_matrix_diag_assist(x_shape, x_dtype) + out_matrix_diag = self.matrix_diag(input_x, assist) + return out_matrix_diag + + +class MatrixDiagPart(Cell): + r""" + Returns the batched diagonal part of a batched tensor. + + Inputs: + - **x** (Tensor) - The batched tensor. It can be of the following data types: + float32, float16, int32, int8, uint8. + + Outputs: + Tensor, same type as input `x`. The shape should be x.shape[:-2] + [min(x.shape[-2:])]. + + Examples: + >>> x = Tensor([[[-1, 0], [0, 1]], [-1, 0], [0, 1]], [[-1, 0], [0, 1]]], mindspore.float32) + >>> matrix_diag_part = nn.MatrixDiagPart() + >>> result = matrix_diag_part(x) + [[-1., 1.], [-1., 1.], [-1., 1.]] + """ + def __init__(self): + super(MatrixDiagPart, self).__init__() + self.matrix_diag_part = inner.MatrixDiagPart() + self.dtype = P.DType() + + def construct(self, input_x): + x_shape = F.shape(input_x) + x_dtype = self.dtype(input_x) + assist = _get_matrix_diag_part_assist(x_shape, x_dtype) + out_matrix_diag_part = self.matrix_diag_part(input_x, assist) + return out_matrix_diag_part + + +class MatrixSetDiag(Cell): + r""" + Modify the batched diagonal part of a batched tensor. + + Inputs: + - **x** (Tensor) - The batched tensor. It can be of the following data types: + float32, float16, int32, int8, uint8. + - **diagonal** (Tensor) - The diagonal values. + + Outputs: + Tensor, same type as input `x`. The shape same as `x`. + + Examples: + >>> x = Tensor([[[-1, 0], [0, 1]], [-1, 0], [0, 1]], [[-1, 0], [0, 1]]], mindspore.float32) + >>> diagonal = Tensor([[-1., 2.], [-1., 1.], [-1., 1.]], mindspore.float32) + >>> matrix_set_diag = nn.MatrixSetDiag() + >>> result = matrix_set_diag(x, diagonal) + [[[-1, 0], [0, 2]], [-1, 0], [0, 1]], [[-1, 0], [0, 1]]] + """ + def __init__(self): + super(MatrixSetDiag, self).__init__() + self.matrix_set_diag = inner.MatrixSetDiag() + self.dtype = P.DType() + + def construct(self, input_x, diagonal): + x_shape = F.shape(input_x) + x_dtype = self.dtype(input_x) + assist = _get_matrix_diag_part_assist(x_shape, x_dtype) + out_matrix_set_diag = self.matrix_set_diag(input_x, diagonal, assist) + return out_matrix_set_diag diff --git a/mindspore/nn/layer/combined.py b/mindspore/nn/layer/combined.py deleted file mode 100644 index 671365e393..0000000000 --- a/mindspore/nn/layer/combined.py +++ /dev/null @@ -1,182 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""Use combination of Conv, Dense, Relu, Batchnorm.""" - -from .normalization import BatchNorm2d -from .activation import get_activation -from ..cell import Cell -from . import conv, basic -from ..._checkparam import ParamValidator as validator - - -__all__ = ['Conv2d', 'Dense'] - -class Conv2d(Cell): - r""" - A combination of convolution, Batchnorm, activation layer. - - For a more Detailed overview of Conv2d op. - - Args: - in_channels (int): The number of input channel :math:`C_{in}`. - out_channels (int): The number of output channel :math:`C_{out}`. - kernel_size (Union[int, tuple]): The data type is int or tuple with 2 integers. Specifies the height - and width of the 2D convolution window. Single int means the value if for both height and width of - the kernel. A tuple of 2 ints means the first value is for the height and the other is for the - width of the kernel. - stride (int): Specifies stride for all spatial dimensions with the same value. Value of stride should be - greater or equal to 1 but bounded by the height and width of the input. Default: 1. - pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same". - padding (int): Implicit paddings on both sides of the input. Default: 0. - dilation (int): Specifying the dilation rate to use for dilated convolution. If set to be :math:`k > 1`, - there will be :math:`k - 1` pixels skipped for each sampling location. Its value should be greater - or equal to 1 and bounded by the height and width of the input. Default: 1. - group (int): Split filter into groups, `in_ channels` and `out_channels` should be - divisible by the number of groups. Default: 1. - has_bias (bool): Specifies whether the layer uses a bias vector. Default: False. - weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel. - It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, - values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well - as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' - and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of - Initializer for more details. Default: 'normal'. - bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible - Initializer and string are the same as 'weight_init'. Refer to the values of - Initializer for more details. Default: 'zeros'. - batchnorm (bool): Specifies to used batchnorm or not. Default: None. - activation (string): Specifies activation type. The optional values are as following: - 'softmax', 'logsoftmax', 'relu', 'relu6', 'tanh', 'gelu', 'sigmoid', - 'prelu', 'leakyrelu', 'hswish', 'hsigmoid'. Default: None. - - Inputs: - - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`. - - Outputs: - Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`. - - Examples: - >>> net = combined.Conv2d(120, 240, 4, batchnorm=True, activation='ReLU') - >>> input = Tensor(np.ones([1, 120, 1024, 640]), mindspore.float32) - >>> net(input).shape() - (1, 240, 1024, 640) - """ - - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - pad_mode='same', - padding=0, - dilation=1, - group=1, - has_bias=False, - weight_init='normal', - bias_init='zeros', - batchnorm=None, - activation=None): - super(Conv2d, self).__init__() - self.conv = conv.Conv2d( - in_channels, - out_channels, - kernel_size, - stride, - pad_mode, - padding, - dilation, - group, - has_bias, - weight_init, - bias_init) - self.has_bn = batchnorm is not None - self.has_act = activation is not None - self.batchnorm = batchnorm - if batchnorm is True: - self.batchnorm = BatchNorm2d(out_channels) - elif batchnorm is not None: - validator.check_isinstance('batchnorm', batchnorm, (BatchNorm2d,)) - self.activation = get_activation(activation) - - def construct(self, x): - x = self.conv(x) - if self.has_bn: - x = self.batchnorm(x) - if self.has_act: - x = self.activation(x) - return x - - -class Dense(Cell): - r""" - A combination of Dense, Batchnorm, activation layer. - - For a more Detailed overview of Dense op. - - Args: - in_channels (int): The number of channels in the input space. - out_channels (int): The number of channels in the output space. - weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype - is same as input x. The values of str refer to the function `initializer`. Default: 'normal'. - bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is - same as input x. The values of str refer to the function `initializer`. Default: 'zeros'. - has_bias (bool): Specifies whether the layer uses a bias vector. Default: True. - activation (str): Regularizer function applied to the output of the layer, eg. 'relu'. Default: None. - batchnorm (bool): Specifies to used batchnorm or not. Default: None. - activation (string): Specifies activation type. The optional values are as following: - 'softmax', 'logsoftmax', 'relu', 'relu6', 'tanh', 'gelu', 'sigmoid', - 'prelu', 'leakyrelu', 'hswish', 'hsigmoid'. Default: None. - - Inputs: - - **input** (Tensor) - Tensor of shape :math:`(N, in\_channels)`. - - Outputs: - Tensor of shape :math:`(N, out\_channels)`. - - Examples: - >>> net = nn.Dense(3, 4) - >>> input = Tensor(np.random.randint(0, 255, [2, 3]), mindspore.float32) - >>> net(input) - """ - - def __init__(self, - in_channels, - out_channels, - weight_init='normal', - bias_init='zeros', - has_bias=True, - batchnorm=None, - activation=None): - super(Dense, self).__init__() - self.dense = basic.Dense( - in_channels, - out_channels, - weight_init, - bias_init, - has_bias) - self.has_bn = batchnorm is not None - self.has_act = activation is not None - if batchnorm is True: - self.batchnorm = BatchNorm2d(out_channels) - elif batchnorm is not None: - validator.check_isinstance('batchnorm', batchnorm, (BatchNorm2d,)) - self.activation = get_activation(activation) - - def construct(self, x): - x = self.dense(x) - if self.has_bn: - x = self.batchnorm(x) - if self.has_act: - x = self.activation(x) - return x diff --git a/mindspore/nn/layer/container.py b/mindspore/nn/layer/container.py index b9ce230aec..48871401bf 100644 --- a/mindspore/nn/layer/container.py +++ b/mindspore/nn/layer/container.py @@ -140,6 +140,11 @@ class SequentialCell(Cell): def __len__(self): return len(self._cells) + def set_grad(self, flag=True): + self.requires_grad = flag + for cell in self._cells.values(): + cell.set_grad(flag) + def construct(self, input_data): for cell in self.cell_list: input_data = cell(input_data) @@ -150,8 +155,9 @@ class CellList(_CellListBase, Cell): """ Holds Cells in a list. - CellList can be indexed like a regular Python list, but cells it - contains are properly registered, and will be visible by all Cell methods. + CellList can be used like a regular Python list, support + '__getitem__', '__setitem__', '__delitem__', '__len__', '__iter__' and '__iadd__', + but cells it contains are properly registered, and will be visible by all Cell methods. Args: args (list, optional): List of subclass of Cell. @@ -245,5 +251,10 @@ class CellList(_CellListBase, Cell): self._cells[str(len(self))] = cell return self + def set_grad(self, flag=True): + self.requires_grad = flag + for cell in self._cells.values(): + cell.set_grad(flag) + def construct(self, *inputs): raise NotImplementedError diff --git a/mindspore/nn/layer/conv.py b/mindspore/nn/layer/conv.py index e02908aed3..b2a0de9cbe 100644 --- a/mindspore/nn/layer/conv.py +++ b/mindspore/nn/layer/conv.py @@ -168,7 +168,7 @@ class Conv2d(_Conv): Examples: >>> net = nn.Conv2d(120, 240, 4, has_bias=False, weight_init='normal') >>> input = Tensor(np.ones([1, 120, 1024, 640]), mindspore.float32) - >>> net(input).shape() + >>> net(input).shape (1, 240, 1024, 640) """ @cell_attr_register diff --git a/mindspore/nn/layer/embedding.py b/mindspore/nn/layer/embedding.py index 5df38b6845..c8873039ab 100755 --- a/mindspore/nn/layer/embedding.py +++ b/mindspore/nn/layer/embedding.py @@ -44,10 +44,12 @@ class Embedding(Cell): dtype (:class:`mindspore.dtype`): Data type of input. Default: mindspore.float32. Inputs: - - **input** (Tensor) - Tensor of shape :math:`(\text{vocab_size})`. + - **input** (Tensor) - Tensor of shape :math:`(\text{batch_size}, \text{input_length})`. The element of + the Tensor should be integer and not larger than vocab_size. else the corresponding embedding vector is zero + if larger than vocab_size. Outputs: - Tensor of shape :math:`(\text{vocab_size}, \text{embedding_size})`. + Tensor of shape :math:`(\text{batch_size}, \text{input_length}, \text{embedding_size})`. Examples: >>> net = nn.Embedding(20000, 768, True) @@ -55,12 +57,13 @@ class Embedding(Cell): >>> >>> # Maps the input word IDs to word embedding. >>> output = net(input_data) - >>> output.shape() + >>> output.shape (8, 128, 768) """ def __init__(self, vocab_size, embedding_size, use_one_hot=False, embedding_table='normal', dtype=mstype.float32): super(Embedding, self).__init__() validator.check_subclass("dtype", dtype, mstype.number_type, self.cls_name) + validator.check_value_type('use_one_hot', use_one_hot, [bool], self.cls_name) self.vocab_size = vocab_size self.embedding_size = embedding_size self.use_one_hot = use_one_hot diff --git a/mindspore/nn/layer/image.py b/mindspore/nn/layer/image.py index 39cc7895f3..b23f20deb8 100644 --- a/mindspore/nn/layer/image.py +++ b/mindspore/nn/layer/image.py @@ -23,7 +23,7 @@ from mindspore._checkparam import Validator as validator from mindspore._checkparam import Rel from ..cell import Cell -__all__ = ['ImageGradients', 'SSIM', 'PSNR'] +__all__ = ['ImageGradients', 'SSIM', 'PSNR', 'CentralCrop'] class ImageGradients(Cell): r""" @@ -264,3 +264,72 @@ class PSNR(Cell): psnr = 10 * P.Log()(F.square(max_val) / mse) / F.scalar_log(10.0) return psnr + + +@constexpr +def _raise_dims_rank_error(input_shape, param_name, func_name): + """raise error if input is not 3d or 4d""" + raise ValueError(f"{func_name} {param_name} should be 3d or 4d, but got shape {input_shape}") + +@constexpr +def _get_bbox(rank, shape, central_fraction): + """get bbox start and size for slice""" + if rank == 3: + c, h, w = shape + else: + n, c, h, w = shape + + central_fraction = central_fraction.asnumpy()[0] + bbox_h_start = int((float(h) - float(h) * central_fraction) / 2) + bbox_w_start = int((float(w) - float(w) * central_fraction) / 2) + bbox_h_size = h - bbox_h_start * 2 + bbox_w_size = w - bbox_w_start * 2 + + if rank == 3: + bbox_begin = (0, bbox_h_start, bbox_w_start) + bbox_size = (c, bbox_h_size, bbox_w_size) + else: + bbox_begin = (0, 0, bbox_h_start, bbox_w_start) + bbox_size = (n, c, bbox_h_size, bbox_w_size) + + return bbox_begin, bbox_size + +class CentralCrop(Cell): + """ + Crop the centeral region of the images with the central_fraction. + + Args: + central_fraction (float): Fraction of size to crop. It must be float and in range (0.0, 1.0]. + + Inputs: + - **image** (Tensor) - A 3-D tensor of shape [C, H, W], or a 4-D tensor of shape [N, C, H, W]. + + Outputs: + Tensor, 3-D or 4-D float tensor, according to the input. + + Examples: + >>> net = nn.CentralCrop(central_fraction=0.5) + >>> image = Tensor(np.random.random((4, 3, 4, 4)), mindspore.float32) + >>> output = net(image) + """ + + def __init__(self, central_fraction): + super(CentralCrop, self).__init__() + validator.check_value_type("central_fraction", central_fraction, [float], self.cls_name) + self.central_fraction = validator.check_number_range('central_fraction', central_fraction, + 0.0, 1.0, Rel.INC_RIGHT, self.cls_name) + self.central_fraction_tensor = Tensor(np.array([central_fraction]).astype(np.float64)) + self.slice = P.Slice() + + def construct(self, image): + image_shape = F.shape(image) + rank = len(image_shape) + if not rank in (3, 4): + return _raise_dims_rank_error(image_shape, "image", self.cls_name) + if self.central_fraction == 1.0: + return image + + bbox_begin, bbox_size = _get_bbox(rank, image_shape, self.central_fraction_tensor) + image = self.slice(image, bbox_begin, bbox_size) + + return image diff --git a/mindspore/nn/layer/lstm.py b/mindspore/nn/layer/lstm.py index 6122e82aaa..71c2920850 100755 --- a/mindspore/nn/layer/lstm.py +++ b/mindspore/nn/layer/lstm.py @@ -13,15 +13,17 @@ # limitations under the License. # ============================================================================ """lstm""" -from mindspore.ops import operations as P -from mindspore.nn.cell import Cell -from mindspore.common.parameter import Parameter -from mindspore.common.initializer import initializer -from mindspore._checkparam import Validator as validator -from mindspore import context +import math +import numpy as np import mindspore.nn as nn +from mindspore import context +from mindspore._checkparam import Validator as validator +from mindspore.common.initializer import initializer +from mindspore.common.parameter import Parameter, ParameterTuple from mindspore.common.tensor import Tensor -import numpy as np +from mindspore.nn.cell import Cell +from mindspore.ops import operations as P +from ..._checkparam import Rel __all__ = ['LSTM', 'LSTMCell'] @@ -122,6 +124,8 @@ class LSTM(Cell): self.num_layers = num_layers self.has_bias = has_bias self.batch_first = validator.check_value_type("batch_first", batch_first, [bool], self.cls_name) + self.hidden_size = validator.check_integer("hidden_size", hidden_size, 0, Rel.GT, self.cls_name) + self.num_layers = validator.check_integer("num_layers", num_layers, 0, Rel.GT, self.cls_name) self.dropout = float(dropout) self.bidirectional = bidirectional if self.batch_first: @@ -147,23 +151,31 @@ class LSTM(Cell): if self.has_bias: increment_size += 2 * gate_size weight_size += increment_size * num_directions - self.weight = Parameter(initializer(0.0, [weight_size, 1, 1]), name='weight') + stdv = 1 / math.sqrt(hidden_size) + w_np = np.random.uniform(-stdv, stdv, (weight_size, 1, 1)).astype(np.float32) + self.weight = Parameter(initializer(Tensor(w_np), [weight_size, 1, 1]), name='weight') else: - layer = [] - layer.append(nn.LSTMCell(input_size=self.input_size, - hidden_size=self.hidden_size, - layer_index=0, - has_bias=self.has_bias, - bidirectional=self.bidirectional, - dropout=self.dropout)) - for i in range(num_layers - 1): - layer.append(nn.LSTMCell(input_size=self.hidden_size * num_directions, - hidden_size=self.hidden_size, - layer_index=i + 1, - has_bias=self.has_bias, - bidirectional=self.bidirectional, - dropout=self.dropout)) - self.lstms = layer + input_size_list = [] + input_size_list.append(self.input_size) + for i in range(self.num_layers - 1): + input_size_list.append(self.hidden_size * num_directions) + weights = [] + layers = [] + bias_size = 0 if not self.has_bias else num_directions * self.hidden_size * 4 + stdv = 1 / math.sqrt(hidden_size) + for i in range(num_layers): + weight_size = (input_size_list[i] + self.hidden_size) * num_directions * self.hidden_size * 4 + if has_bias: + weight_size = weight_size + bias_size + w_np = np.random.uniform(-stdv, stdv, (weight_size, 1, 1)).astype(np.float32) + weights.append(Parameter(initializer(Tensor(w_np), w_np.shape), name='weight' + str(i))) + layers.append(nn.LSTMCell(input_size=input_size_list[i], + hidden_size=self.hidden_size, + has_bias=self.has_bias, + bidirectional=self.bidirectional, + dropout=self.dropout)) + self.lstms = layers + self.weight = ParameterTuple(tuple(weights)) self.fill = P.Fill() self.shape = P.Shape() @@ -177,12 +189,12 @@ class LSTM(Cell): output = self.transpose2(output, (1, 0, 2)) return (output, (h, c)) h, c = hx - output, hn, cn, _, _ = self.lstms[0](x, h[0], c[0]) + output, hn, cn, _, _ = self.lstms[0](x, h[0], c[0], self.weight[0]) for i in range(1, self.num_layers): - output, hn, cn, _, _ = self.lstms[i](output, h[i], c[i]) + output, hn, cn, _, _ = self.lstms[i](output, h[i], c[i], self.weight[i]) if self.batch_first: output = self.transpose2(output, (1, 0, 2)) - return output, hn, cn, _, _ + return (output, (hn, cn)) class LSTMCell(Cell): @@ -271,11 +283,9 @@ class LSTMCell(Cell): >>> output, hn, cn, _, _ = net(input, h0, c0) """ - def __init__(self, input_size, hidden_size, - layer_index=0, has_bias=True, batch_first=False, dropout=0, @@ -283,8 +293,6 @@ class LSTMCell(Cell): super(LSTMCell, self).__init__() self.input_size = input_size self.hidden_size = hidden_size - self.num_layers = 1 - self.layer_index = layer_index self.has_bias = has_bias self.batch_first = validator.check_value_type("batch_first", batch_first, [bool], self.cls_name) self.dropout = float(dropout) @@ -295,16 +303,7 @@ class LSTMCell(Cell): if self.batch_first: self.transpose1 = P.Transpose() self.transpose2 = P.Transpose() - w_np = np.ones([(self.input_size + self.hidden_size) * self.num_directions * self.hidden_size * 4, 1]).astype( - np.float32) * 0.01 - if has_bias: - b_np = np.ones([self.num_directions * self.hidden_size * 4, 1]).astype( - np.float32) * 0.01 - else: - b_np = np.zeros([self.num_directions * self.hidden_size * 4, 1]).astype( - np.float32) * 0.01 - wb_np = np.concatenate((w_np, b_np), axis=0).reshape([-1, 1, 1]) - self.w = Parameter(initializer(Tensor(wb_np), wb_np.shape), name='w' + str(self.layer_index)) + self.lstm = P.LSTM(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=1, @@ -312,10 +311,10 @@ class LSTMCell(Cell): bidirectional=self.bidirectional, dropout=self.dropout) - def construct(self, x, h, c): + def construct(self, x, h, c, w): if self.batch_first: x = self.transpose1(x, (1, 0, 2)) - output, hn, cn, _, _ = self.lstm(x, h, c, self.w) + output, hn, cn, _, _ = self.lstm(x, h, c, w) if self.batch_first: output = self.transpose2(output, (1, 0, 2)) return output, hn, cn, _, _ diff --git a/mindspore/nn/layer/math.py b/mindspore/nn/layer/math.py index 8a714c1cde..1ecb20056e 100644 --- a/mindspore/nn/layer/math.py +++ b/mindspore/nn/layer/math.py @@ -15,12 +15,16 @@ """math""" import math from mindspore.ops import operations as P +from mindspore.ops.operations import _inner_ops as inner from mindspore.common.tensor import Tensor from ..cell import Cell from ...common import dtype as mstype from ..._checkparam import Validator as validator +from ..._checkparam import Rel + + +__all__ = ['ReduceLogSumExp', 'Range', 'LinSpace'] -__all__ = ['ReduceLogSumExp', 'Range'] class ReduceLogSumExp(Cell): r""" @@ -79,8 +83,8 @@ class Range(Cell): start (Union[int, float]): If `limit` is `None`, the value acts as limit in the range and first entry defaults to `0`. Otherwise, it acts as first entry in the range. limit (Union[int, float]): Acts as upper limit of sequence. If `None`, defaults to the value of `start` - while set the first entry of the range to `0`. - delta (Union[int, float]): Increment of the range. Default: 1. + while set the first entry of the range to `0`. It can not be equal to `start`. + delta (Union[int, float]): Increment of the range. It can not be equal to zero. Default: 1. Outputs: Tensor, the dtype is int if the dtype of `start`, `limit` and `delta` all are int. Otherwise, dtype is float. @@ -93,10 +97,12 @@ class Range(Cell): def __init__(self, start, limit=None, delta=1): super(Range, self).__init__() - validator.check_value_type("start", start, [int, float], None) - validator.check_value_type("delta", delta, [int, float], None) + validator.check_value_type("start", start, [int, float], self.cls_name) + validator.check_value_type("delta", delta, [int, float], self.cls_name) + if delta == 0: + raise ValueError("The input of `delta` can not be equal to zero.") if limit is not None: - validator.check_value_type("limit", limit, [int, float], None) + validator.check_value_type("limit", limit, [int, float], self.cls_name) if isinstance(start, int) and isinstance(limit, int) and isinstance(delta, int): self.dtype = mstype.int32 else: @@ -112,7 +118,7 @@ class Range(Cell): limit = float(limit) if isinstance(delta, int): delta = float(delta) - self.range_x = P.Range(start, limit, delta) + self.range_x = inner.Range(start, limit, delta) if limit is None: length_input = math.ceil(start / delta) else: @@ -122,3 +128,48 @@ class Range(Cell): def construct(self): range_out = self.range_x(self.input_tensor) return range_out + + +class LinSpace(Cell): + r""" + Generates values in an interval. And return the corresponding interpolation accroding to assist. + + Args: + - **start** (Union[int, float]) - The start of interval, With shape of 0-D. + - **stop** (Union[int, float]) - The end of interval, With shape of 0-D. + - **num** (int) - ticks number in the interval, the ticks include start and stop value. + With shape of 0-D. + + Outputs: + Tensor, With type same as `start`. The shape is 1-D with length of `num`. + + Examples: + >>> linspace = nn.LinSpace() + >>> start = Tensor(1, mindspore.float32) + >>> stop = Tensor(10, mindspore.float32) + >>> num = Tensor(5, mindspore.int32) + >>> output = linspace(start, stop, num) + [1, 3.25, 5.5, 7.75, 10] + """ + + def __init__(self, start, stop, num): + super(LinSpace, self).__init__() + validator.check_value_type("start", start, [int, float], self.cls_name) + validator.check_value_type("stop", stop, [int, float], self.cls_name) + validator.check_value_type("num", num, [int], self.cls_name) + validator.check_integer("num", num, 0, Rel.GT, self.cls_name) + + self.is_single = bool(num == 1) + self.lin_space = inner.LinSpace() + self.start = Tensor(start, mstype.float32) + self.stop = Tensor(stop, mstype.float32) + self.assist = Tensor(list(range(num)), mstype.float32) + self.num = Tensor(num, mstype.int32) + self.start_array = Tensor([start], mstype.float32) + + def construct(self): + if self.is_single: + return self.start_array + + lin_space_out = self.lin_space(self.assist, self.start, self.stop, self.num) + return lin_space_out diff --git a/mindspore/nn/layer/normalization.py b/mindspore/nn/layer/normalization.py index f90b8d28ed..4c7ea9d4d6 100644 --- a/mindspore/nn/layer/normalization.py +++ b/mindspore/nn/layer/normalization.py @@ -18,17 +18,17 @@ from mindspore.ops import functional as F from mindspore.common.parameter import Parameter from mindspore.common.initializer import initializer from mindspore.ops.primitive import constexpr -from mindspore.common.tensor import Tensor -import mindspore.common.dtype as mstype import mindspore.context as context from mindspore._checkparam import check_bool, check_typename from mindspore._extends import cell_attr_register from mindspore.communication.management import get_group_size, get_rank from mindspore.communication import management from mindspore._checkparam import check_int_positive +from mindspore.ops import _selected_ops from ..cell import Cell + __all__ = ['BatchNorm1d', 'BatchNorm2d', 'LayerNorm', 'GroupNorm', 'GlobalBatchNorm'] class _BatchNorm(Cell): @@ -85,13 +85,12 @@ class _BatchNorm(Cell): self.reshape = P.Reshape() self.is_ascend = context.get_context("device_target") == "Ascend" self.is_graph_mode = context.get_context("mode") == context.GRAPH_MODE - + self.momentum = 1.0 - momentum if context.get_context("enable_ge"): self.is_ge_backend = True - self.momentum = Tensor(1.0 - momentum, mstype.float32) else: self.is_ge_backend = False - self.momentum = 1.0 - momentum + if self.is_graph_mode and (self.is_ge_backend or self.is_ascend): self.bn_train = P.BatchNorm(is_training=True, epsilon=self.eps) @@ -119,12 +118,11 @@ class _BatchNorm(Cell): "local_rank_size is {}".format(group_size, get_group_size())) if len(world_rank) % group_size != 0: raise ValueError("please make your group size correct.") - world_rank_list = zip(*(iter(world_rank),) *group_size) + world_rank_list = zip(*(iter(world_rank),) * group_size) group_list = [list(i) for i in world_rank_list] return group_list - def _global_sync(self, x, axes, re_shape): """calculate global batch normalization output""" x_mean = self.reduce_mean(x, axes) @@ -191,15 +189,19 @@ class _BatchNorm(Cell): return 'num_features={}, eps={}, momentum={}, gamma={}, beta={}, moving_mean={}, moving_variance={}'.format( self.num_features, self.eps, self.momentum, self.gamma, self.beta, self.moving_mean, self.moving_variance) + @constexpr def _channel_check(channel, num_channel): if channel != num_channel: raise ValueError("the input channel is not equal with num_channel") + @constexpr def _shape_check(in_shape): if len(in_shape) != 4: raise ValueError("The input must has 4 dims") + + @constexpr def _shape_infer(x_shape, num_feature): """global batch normalization shape and axes infer""" @@ -211,6 +213,7 @@ def _shape_infer(x_shape, num_feature): re_shape = (1, num_feature) return axes, re_shape + class BatchNorm1d(_BatchNorm): r""" Batch normalization layer over a 2D input. @@ -260,6 +263,7 @@ class BatchNorm1d(_BatchNorm): >>> input = Tensor(np.random.randint(0, 255, [3, 16]), mindspore.float32) >>> net(input) """ + def __init__(self, num_features, eps=1e-5, @@ -279,6 +283,7 @@ class BatchNorm1d(_BatchNorm): moving_mean_init, moving_var_init, use_batch_statistics) + def _check_data_dim(self, x): if x.dim() != 2: pass @@ -333,6 +338,7 @@ class BatchNorm2d(_BatchNorm): >>> input = Tensor(np.random.randint(0, 255, [1, 3, 224, 224]), mindspore.float32) >>> net(input) """ + def __init__(self, num_features, eps=1e-5, @@ -352,6 +358,7 @@ class BatchNorm2d(_BatchNorm): moving_mean_init, moving_var_init, use_batch_statistics) + def _check_data_dim(self, x): if x.dim() != 4: pass @@ -375,7 +382,7 @@ class GlobalBatchNorm(_BatchNorm): Args: num_features (int): `C` from an expected input of size (N, C, H, W). - device_num_each_group (int): The number of devices in each group. + device_num_each_group (int): The number of devices in each group. Default: 1. eps (float): A value added to the denominator for numerical stability. Default: 1e-5. momentum (float): A floating hyperparameter of the momentum for the running_mean and running_var computation. Default: 0.9. @@ -407,6 +414,7 @@ class GlobalBatchNorm(_BatchNorm): >>> input = Tensor(np.random.randint(0, 255, [1, 3, 224, 224]), mindspore.float32) >>> global_bn_op(input) """ + def __init__(self, num_features, eps=1e-5, @@ -431,10 +439,12 @@ class GlobalBatchNorm(_BatchNorm): self.group = check_int_positive(device_num_each_group) if self.group <= 1: raise ValueError("the number of group must be greater than 1.") + def _check_data_dim(self, x): if x.dim == 0: pass + class LayerNorm(Cell): r""" Applies Layer Normalization over a mini-batch of inputs. @@ -474,10 +484,11 @@ class LayerNorm(Cell): Examples: >>> x = Tensor(np.ones([20, 5, 10, 10]), mindspore.float32) - >>> shape1 = x.shape()[1:] + >>> shape1 = x.shape[1:] >>> m = nn.LayerNorm(shape1, begin_norm_axis=1, begin_params_axis=1) >>> m(x) """ + def __init__(self, normalized_shape, begin_norm_axis=-1, @@ -498,8 +509,8 @@ class LayerNorm(Cell): gamma_init, normalized_shape), name="gamma") self.beta = Parameter(initializer( beta_init, normalized_shape), name="beta") - self.layer_norm = P.LayerNorm(begin_norm_axis=self.begin_norm_axis, begin_params_axis=self.begin_params_axis, - epsilon=self.epsilon) + self.layer_norm = _selected_ops.LayerNorm(begin_norm_axis=self.begin_norm_axis, + begin_params_axis=self.begin_params_axis) def construct(self, input_x): y, _, _ = self.layer_norm(input_x, self.gamma, self.beta) @@ -511,6 +522,7 @@ class LayerNorm(Cell): self.normalized_shape, self.begin_norm_axis, self.begin_params_axis, self.gamma, self.beta) return s + class GroupNorm(Cell): r""" Group Normalization over a mini-batch of inputs. @@ -547,6 +559,7 @@ class GroupNorm(Cell): >>> x = Tensor(np.ones([1, 64, 256, 256], np.float32)) >>> goup_norm_op(x) """ + def __init__(self, num_groups, num_channels, eps=1e-05, affine=True, gamma_init='ones', beta_init='zeros'): super(GroupNorm, self).__init__() self.num_groups = check_int_positive(num_groups) diff --git a/mindspore/nn/layer/pooling.py b/mindspore/nn/layer/pooling.py index 89bc65bb09..6c26fcea67 100644 --- a/mindspore/nn/layer/pooling.py +++ b/mindspore/nn/layer/pooling.py @@ -113,7 +113,7 @@ class MaxPool2d(_PoolNd): [0. 0. 4. 0.] [1. 8. 7. 0.]]]] >>> output = pool(x) - >>> output.shape() + >>> output.shape (1, 2, 2, 2) >>> output [[[[7. 8.] @@ -195,7 +195,7 @@ class AvgPool2d(_PoolNd): [0. 8. 9. 7.] [2. 1. 4. 9.]]]] >>> output = pool(x) - >>> output.shape() + >>> output.shape (1, 2, 2, 2) >>> output [[[[4.888889 4.4444447] @@ -260,7 +260,7 @@ class AvgPool1d(_PoolNd): >>> pool = nn.AvgPool1d(kernel_size=6, strides=1) >>> x = Tensor(np.random.randint(0, 10, [1, 3, 6]), mindspore.float32) >>> output = pool(x) - >>> output.shape() + >>> output.shape (1, 3, 1) """ diff --git a/mindspore/nn/layer/quant.py b/mindspore/nn/layer/quant.py index 305a69800f..14731c6262 100644 --- a/mindspore/nn/layer/quant.py +++ b/mindspore/nn/layer/quant.py @@ -12,8 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ -"""Aware quantization.""" +"""Quantization aware.""" +from functools import partial import numpy as np import mindspore.common.dtype as mstype from mindspore.ops import operations as P @@ -22,15 +23,21 @@ from mindspore.common.parameter import Parameter from mindspore.common.initializer import initializer from mindspore.common.tensor import Tensor from mindspore._checkparam import check_int_positive, check_bool, twice -from mindspore._checkparam import Validator as validator +from mindspore._checkparam import Validator as validator, Rel from mindspore.nn.cell import Cell from mindspore.nn.layer.activation import get_activation import mindspore.context as context - +from .normalization import BatchNorm2d +from .activation import get_activation +from ..cell import Cell +from . import conv, basic +from ..._checkparam import ParamValidator as validator +from ...ops.operations import _quant_ops as Q __all__ = [ + 'Conv2dBnAct', + 'DenseBnAct', 'FakeQuantWithMinMax', - 'DepthwiseConv2dBatchNormQuant', 'Conv2dBatchNormQuant', 'Conv2dQuant', 'DenseQuant', @@ -43,12 +50,171 @@ __all__ = [ ] +class Conv2dBnAct(Cell): + r""" + A combination of convolution, Batchnorm, activation layer. + + For a more Detailed overview of Conv2d op. + + Args: + in_channels (int): The number of input channel :math:`C_{in}`. + out_channels (int): The number of output channel :math:`C_{out}`. + kernel_size (Union[int, tuple]): The data type is int or tuple with 2 integers. Specifies the height + and width of the 2D convolution window. Single int means the value if for both height and width of + the kernel. A tuple of 2 ints means the first value is for the height and the other is for the + width of the kernel. + stride (int): Specifies stride for all spatial dimensions with the same value. Value of stride should be + greater or equal to 1 but bounded by the height and width of the input. Default: 1. + pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same". + padding (int): Implicit paddings on both sides of the input. Default: 0. + dilation (int): Specifying the dilation rate to use for dilated convolution. If set to be :math:`k > 1`, + there will be :math:`k - 1` pixels skipped for each sampling location. Its value should be greater + or equal to 1 and bounded by the height and width of the input. Default: 1. + group (int): Split filter into groups, `in_ channels` and `out_channels` should be + divisible by the number of groups. Default: 1. + has_bias (bool): Specifies whether the layer uses a bias vector. Default: False. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel. + It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible + Initializer and string are the same as 'weight_init'. Refer to the values of + Initializer for more details. Default: 'zeros'. + batchnorm (bool): Specifies to used batchnorm or not. Default: None. + activation (string): Specifies activation type. The optional values are as following: + 'softmax', 'logsoftmax', 'relu', 'relu6', 'tanh', 'gelu', 'sigmoid', + 'prelu', 'leakyrelu', 'hswish', 'hsigmoid'. Default: None. + + Inputs: + - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`. + + Outputs: + Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`. + + Examples: + >>> net = Conv2dBnAct(120, 240, 4, batchnorm=True, activation='ReLU') + >>> input = Tensor(np.ones([1, 120, 1024, 640]), mindspore.float32) + >>> net(input).shape + (1, 240, 1024, 640) + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + pad_mode='same', + padding=0, + dilation=1, + group=1, + has_bias=False, + weight_init='normal', + bias_init='zeros', + batchnorm=None, + activation=None): + super(Conv2dBnAct, self).__init__() + self.conv = conv.Conv2d( + in_channels, + out_channels, + kernel_size, + stride, + pad_mode, + padding, + dilation, + group, + has_bias, + weight_init, + bias_init) + self.has_bn = batchnorm is not None + self.has_act = activation is not None + self.batchnorm = batchnorm + if batchnorm is True: + self.batchnorm = BatchNorm2d(out_channels) + elif batchnorm is not None: + validator.check_isinstance('batchnorm', batchnorm, (BatchNorm2d,)) + self.activation = get_activation(activation) + + def construct(self, x): + x = self.conv(x) + if self.has_bn: + x = self.batchnorm(x) + if self.has_act: + x = self.activation(x) + return x + + +class DenseBnAct(Cell): + r""" + A combination of Dense, Batchnorm, activation layer. + + For a more Detailed overview of Dense op. + + Args: + in_channels (int): The number of channels in the input space. + out_channels (int): The number of channels in the output space. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as input x. The values of str refer to the function `initializer`. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is + same as input x. The values of str refer to the function `initializer`. Default: 'zeros'. + has_bias (bool): Specifies whether the layer uses a bias vector. Default: True. + activation (str): Regularizer function applied to the output of the layer, eg. 'relu'. Default: None. + batchnorm (bool): Specifies to used batchnorm or not. Default: None. + activation (string): Specifies activation type. The optional values are as following: + 'softmax', 'logsoftmax', 'relu', 'relu6', 'tanh', 'gelu', 'sigmoid', + 'prelu', 'leakyrelu', 'hswish', 'hsigmoid'. Default: None. + + Inputs: + - **input** (Tensor) - Tensor of shape :math:`(N, in\_channels)`. + + Outputs: + Tensor of shape :math:`(N, out\_channels)`. + + Examples: + >>> net = nn.DenseBnAct(3, 4) + >>> input = Tensor(np.random.randint(0, 255, [2, 3]), mindspore.float32) + >>> net(input) + """ + + def __init__(self, + in_channels, + out_channels, + weight_init='normal', + bias_init='zeros', + has_bias=True, + batchnorm=None, + activation=None): + super(DenseBnAct, self).__init__() + self.dense = basic.Dense( + in_channels, + out_channels, + weight_init, + bias_init, + has_bias) + self.has_bn = batchnorm is not None + self.has_act = activation is not None + if batchnorm is True: + self.batchnorm = BatchNorm2d(out_channels) + elif batchnorm is not None: + validator.check_isinstance('batchnorm', batchnorm, (BatchNorm2d,)) + self.activation = get_activation(activation) + + def construct(self, x): + x = self.dense(x) + if self.has_bn: + x = self.batchnorm(x) + if self.has_act: + x = self.activation(x) + return x + + class BatchNormFoldCell(Cell): """ Batch normalization folded. Args: - momentum (float): Momentum value should be [0, 1]. Default: 0.1. + momentum (float): Momentum value should be [0, 1]. Default: 0.9. epsilon (float): A small float number to avoid dividing by 0. 1e-5 if dtype in float32 else 1e-3. Default: 1e-5. freeze_bn (int): Delay in steps at which computation switches from regular batch @@ -76,11 +242,11 @@ class BatchNormFoldCell(Cell): self.epsilon = epsilon self.is_gpu = context.get_context('device_target') == "GPU" if self.is_gpu: - self.bn_train = P.BatchNormFold(momentum, epsilon, is_training=True, freeze_bn=freeze_bn) - self.bn_infer = P.BatchNormFold(momentum, epsilon, is_training=False, freeze_bn=freeze_bn) + self.bn_train = Q.BatchNormFold(momentum, epsilon, is_training=True, freeze_bn=freeze_bn) + self.bn_infer = Q.BatchNormFold(momentum, epsilon, is_training=False, freeze_bn=freeze_bn) else: self.bn_reduce = P.BNTrainingReduce() - self.bn_update = P.BatchNormFoldD(momentum, epsilon, is_training=True, freeze_bn=freeze_bn) + self.bn_update = Q.BatchNormFoldD(momentum, epsilon, is_training=True, freeze_bn=freeze_bn) def construct(self, x, mean, variance, global_step): if self.is_gpu: @@ -103,124 +269,22 @@ class BatchNormFoldCell(Cell): return batch_mean, batch_std, running_mean, running_std -class FakeQuantWithMinMaxD(Cell): - r""" - Aware Quantization training op of ascend. This OP provide Fake quantization observer - function on data with min and max. - - Args: - min_init (int, list): The dimension of channel or 1(layer). Default: -6. - max_init (int, list): The dimension of channel or 1(layer). Default: 6. - num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8. - ema (bool): Exponential Moving Average algorithm update min and max. Default: False. - ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.9999. - per_channel (bool): Quantization by layer or channel. Default: False. - out_channels (int): declarate the min and max channel size, Default: 1. - quant_delay (int): Quantization delay parameters according by global step. Default: 0. - symmetric (bool): Quantization algorithm use symmetric or not. Default: False. - narrow_range (bool): Quantization algorithm use narrow range or not. Default: False. - - Inputs: - - **x** (Tensor) - The input of FakeQuantWithMinMax. - - Outputs: - Tensor, with the same type and shape as the `x`. - - Examples: - >>> fake_quant = nn.FakeQuantWithMinMaxD() - >>> input_x = Tensor(np.array([[1, 2, 1], [-2, 0, -1]]), mindspore.float32) - >>> result = fake_quant(input_x) - """ - def __init__(self, - min_init=-6, - max_init=6, - num_bits=8, - ema=False, - ema_decay=0.999, - per_channel=False, - channel_size=1, - quant_delay=0, - symmetric=False, - narrow_range=False, - training=True): - """init FakeQuantWithMinMax ascend layer""" - super(FakeQuantWithMinMaxD, self).__init__() - - self.min_init = min_init - self.num_bits = num_bits - self.max_init = max_init - self.ema = ema - self.ema_decay = ema_decay - self.per_channel = per_channel - self.channel_size = channel_size - self.quant_delay = quant_delay - self.symmetric = symmetric - self.narrow_range = narrow_range - self.training = training - - if not per_channel: - self.fake_quant = P.FakeQuantWithMinMax(num_bits=self.num_bits, - ema=self.ema, - ema_decay=self.ema_decay, - quant_delay=self.quant_delay, - symmetric=self.symmetric, - narrow_range=self.narrow_range, - training=training) - self.ema_update = P.FakeQuantWithMinMaxUpdate(num_bits=self.num_bits, - ema=self.ema, - ema_decay=self.ema_decay, - quant_delay=self.quant_delay, - symmetric=self.symmetric, - narrow_range=self.narrow_range, - training=training) - else: - raise RuntimeError("not support per channel") - - if isinstance(min_init, Parameter): - self.minq = min_init - self.maxq = max_init - else: - self.minq = Parameter(Tensor(np.array([min_init]).astype(np.float32)), - name='quant_min', - requires_grad=False) - self.maxq = Parameter(Tensor(np.array([max_init]).astype(np.float32)), - name='quant_max', - requires_grad=False) - self.reduce_min = P.ReduceMin() - self.reduce_max = P.ReduceMax() - - def extend_repr(self): - s = 'min_init={}, max_init={}, ema={}, ema_decay={}, per_channel={}, channel_size={}, quant_delay={}'.format( - self.min_init, self.max_init, self.ema, self.ema_decay, self.per_channel, self.channel_size, - self.quant_delay) - return s - - def construct(self, x, minq, maxq): - if self.training: - min_up, max_up = self.ema_update(x, minq, maxq) - out = self.fake_quant(x, min_up, max_up) - P.Assign()(self.minq, min_up) - P.Assign()(self.maxq, max_up) - else: - out = self.fake_quant(x, minq, maxq) - return out - - class FakeQuantWithMinMax(Cell): r""" - Aware Quantization training op. This OP provide Fake quantization observer function on data with min and max. + Quantization aware op. This OP provide Fake quantization observer function on data with min and max. Args: - min_init (int, list): The dimension of channel or 1(layer). Default: -6. - max_init (int, list): The dimension of channel or 1(layer). Default: 6. - num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8. + min_init (int, float): The dimension of channel or 1(layer). Default: -6. + max_init (int, float): The dimension of channel or 1(layer). Default: 6. ema (bool): Exponential Moving Average algorithm update min and max. Default: False. - ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.9999. - per_channel (bool): Quantization by layer or channel. Default: False. - out_channels (int): declarate the min and max channel size, Default: 1. - quant_delay (int): Quantization delay parameters according by global step. Default: 0. + ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999. + per_channel (bool): Quantization granularity based on layer or on channel. Default: False. + channel_axis (int): Quantization by channel axis. Default: 1. + num_channels (int): declarate the min and max channel size, Default: 1. + num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8. symmetric (bool): Quantization algorithm use symmetric or not. Default: False. narrow_range (bool): Quantization algorithm use narrow range or not. Default: False. + quant_delay (int): Quantization delay parameters according by global step. Default: 0. Inputs: - **x** (Tensor) - The input of FakeQuantWithMinMax. @@ -237,289 +301,83 @@ class FakeQuantWithMinMax(Cell): def __init__(self, min_init=-6, max_init=6, - num_bits=8, ema=False, ema_decay=0.999, per_channel=False, - out_channels=1, - quant_delay=0, + channel_axis=1, + num_channels=1, + num_bits=8, symmetric=False, narrow_range=False, - training=True): + quant_delay=0): """init FakeQuantWithMinMax layer""" super(FakeQuantWithMinMax, self).__init__() - self.min_init = min_init - self.num_bits = num_bits self.max_init = max_init + self.num_bits = num_bits self.ema = ema self.ema_decay = ema_decay self.per_channel = per_channel - self.out_channels = out_channels + self.num_channels = num_channels + self.channel_axis = channel_axis self.quant_delay = quant_delay self.symmetric = symmetric self.narrow_range = narrow_range - self.training = training + self.is_ascend = context.get_context('device_target') == "Ascend" + # init tensor min and max for fake quant op + if self.per_channel: + min_array = np.array([self.min_init] * self.num_channels).astype(np.float32) + max_array = np.array([self.max_init] * self.num_channels).astype(np.float32) + else: + min_array = np.array([self.min_init]).astype(np.float32) + max_array = np.array([self.max_init]).astype(np.float32) + self.minq = Parameter(Tensor(min_array), name='quant_min', requires_grad=False) + self.maxq = Parameter(Tensor(max_array), name='quant_max', requires_grad=False) + + # init fake quant relative op if per_channel: - min_array = np.array([self.min_init for i in range(0, self.out_channels)]).astype(np.float32) - max_array = np.array([self.max_init for i in range(0, self.channel_size)]).astype(np.float32) - self.minq = Parameter(Tensor(min_array), name='quant_min', requires_grad=False) - self.maxq = Parameter(Tensor(max_array), name='quant_max', requires_grad=False) - self.fake_quant_train = P.FakeQuantWithMinMaxPerChannel(num_bits=self.num_bits, - ema=self.ema, - ema_decay=self.ema_decay, - quant_delay=self.quant_delay, - symmetric=self.symmetric, - narrow_range=self.narrow_range, - training=True) - self.fake_quant_infer = P.FakeQuantWithMinMaxPerChannel(num_bits=self.num_bits, - ema=self.ema, - ema_decay=self.ema_decay, - quant_delay=self.quant_delay, - symmetric=self.symmetric, - narrow_range=self.narrow_range, - training=False) + quant_fun = partial(Q.FakeQuantPerChannel, channel_axis=self.channel_axis) + ema_fun = partial(Q.MinMaxUpdatePerChannel, channel_axis=self.channel_axis) else: - min_array = np.array([min_init]).reshape(1).astype(np.float32) - max_array = np.array([max_init]).reshape(1).astype(np.float32) - self.minq = Parameter(Tensor(min_array), name='quant_min', requires_grad=False) - self.maxq = Parameter(Tensor(max_array), name='quant_max', requires_grad=False) - if context.get_context('device_target') == "Ascend": - self.fake_quant_train = FakeQuantWithMinMaxD(num_bits=self.num_bits, - ema=self.ema, - ema_decay=self.ema_decay, - quant_delay=self.quant_delay, - symmetric=self.symmetric, - narrow_range=self.narrow_range, - training=True, - min_init=self.minq, - max_init=self.maxq) - self.fake_quant_infer = FakeQuantWithMinMaxD(num_bits=self.num_bits, - ema=self.ema, - ema_decay=self.ema_decay, - quant_delay=self.quant_delay, - symmetric=self.symmetric, - narrow_range=self.narrow_range, - training=False, - min_init=self.minq, - max_init=self.maxq) - elif context.get_context('device_target') == "GPU": - self.fake_quant_train = P.FakeQuantWithMinMax(num_bits=self.num_bits, - ema=self.ema, - ema_decay=self.ema_decay, - quant_delay=self.quant_delay, - symmetric=self.symmetric, - narrow_range=self.narrow_range, - training=True) - self.fake_quant_infer = P.FakeQuantWithMinMax(num_bits=self.num_bits, - ema=self.ema, - ema_decay=ema_decay, - quant_delay=quant_delay, - symmetric=self.symmetric, - narrow_range=self.narrow_range, - training=False) - else: - raise ValueError("Not support platform.") + quant_fun = Q.FakeQuantPerLayer + ema_fun = Q.MinMaxUpdatePerLayer + + self.ema_update = ema_fun(ema=self.ema, ema_decay=self.ema_decay) + if self.is_ascend: + self.fake_quant_train = quant_fun(num_bits=self.num_bits, + symmetric=self.symmetric, + narrow_range=self.narrow_range) + self.fake_quant_infer = self.fake_quant_train + else: + quant_fun = partial(quant_fun, + ema=self.ema, + ema_decay=ema_decay, + num_bits=self.num_bits, + symmetric=self.symmetric, + narrow_range=self.narrow_range, + quant_delay=quant_delay) + self.fake_quant_train = quant_fun(training=True) + self.fake_quant_infer = quant_fun(training=False) def extend_repr(self): - s = 'min={}, max={}, ema={}, ema_decay={}, per_channel={}, quant_delay={}'.format( - self.min_init, self.max_init, self.ema, self.ema_decay, self.per_channel, self.quant_delay) + s = 'num_bits={}, symmetric={}, narrow_range={}, ema={}({}), per_channel={}({}, {}), ' \ + 'quant_delay={}, min_init={}, max_init={}'.format( + self.num_bits, self.symmetric, self.narrow_range, self.ema, self.ema_decay, self.per_channel, + self.channel_axis, self.num_channels, self.quant_delay, self.min_init, self.max_init) return s def construct(self, x): if self.training: + min_up, max_up = self.ema_update(x, self.minq, self.maxq) + P.Assign()(self.minq, min_up) + P.Assign()(self.maxq, max_up) out = self.fake_quant_train(x, self.minq, self.maxq) else: out = self.fake_quant_infer(x, self.minq, self.maxq) return out -class DepthwiseConv2dBatchNormQuant(Cell): - r""" - 2D depthwise convolution with BatchNormal op folded layer. - - For a more Detailed overview of Conv2d op. - - Args: - in_channels (int): The number of input channel :math:`C_{in}`. - out_channels (int): The number of output channel :math:`C_{out}`. - kernel_size (Union[int, tuple]): Specifies the height and width of the 2D convolution window. - stride (int): Specifies stride for all spatial dimensions with the same value. - pad_mode: (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same". - padding: (int): Implicit paddings on both sides of the input. Default: 0. - eps (int): Parameters for BatchNormal. Default: 1e-5. - momentum (int): Parameters for BatchNormal op. Default: 0.9. - weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the - convolution kernel. Default: 'None'. - beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the - beta vector. Default: 'None'. - gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the - gamma vector. Default: 'None'. - mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the - mean vector. Default: 'None'. - var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the - variance vector. Default: 'None'. - quant_delay (int): Quantization delay parameters according by global step. Default: 0. - freeze_bn (int): Quantization freeze BatchNormal op according by global step. Default: 100000. - fake (bool): Conv2dBatchNormQuant Cell add FakeQuantWithMinMax op or not. Default: True. - num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8. - per_channel (bool): FakeQuantWithMinMax Parameters. Default: False. - symmetric (bool): Quantization algorithm use symmetric or not. Default: False. - narrow_range (bool): Quantization algorithm use narrow range or not. Default: False. - - Inputs: - - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`. - - Outputs: - Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`. - - Examples: - >>> quant = nn.DepthwiseConv2dBatchNormQuant(1, 6, - kernel_size= (2, 2), - stride=(1, 1), - pad_mode="valid", - >>> dilation=(1, 1)) - >>> input_x = Tensor(np.random.randint(-2, 2, (2, 1, 1, 3)), mindspore.float32) - >>> result = quant(input_x) - """ - - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - pad_mode='same', - padding=0, - dilation=1, - group=1, - eps=1e-5, - momentum=0.997, - weight_init=None, - beta_init=None, - gamma_init=None, - mean_init=None, - var_init=None, - quant_delay=0, - freeze_bn=100000, - fake=True, - num_bits=8, - per_channel=False, - symmetric=False, - narrow_range=False): - """init DepthwiseConv2dBatchNormQuant layer""" - super(DepthwiseConv2dBatchNormQuant, self).__init__() - self.in_channels = in_channels - self.out_channels = out_channels - self.pad_mode = pad_mode - self.padding = padding - self.dilation = twice(dilation) - self.stride = twice(stride) - self.group = group - self.fake = fake - self.freeze_bn = freeze_bn - self.momentum = momentum - self.quant_delay = quant_delay - if isinstance(kernel_size, int): - self.kernel_size = (kernel_size, kernel_size) - else: - self.kernel_size = kernel_size - if group > 1: - validator.check_integer('group', group, 'in_channels', in_channels, 'Conv2dBatchNormQuant') - validator.check_integer('group', group, 'in_channels', out_channels, 'Conv2dBatchNormQuant') - self.is_depthwise = group > 1 - - channel_multiplier = out_channels // in_channels - self.conv = P.DepthwiseConv2dNative(channel_multiplier=channel_multiplier, - kernel_size=kernel_size, - stride=stride, - pad_mode=pad_mode, - pad=padding) - - if weight_init is None: - weight_init = initializer('normal', [channel_multiplier, in_channels, *kernel_size]) - self.weight = Parameter(weight_init, name='weight') - if gamma_init is None: - gamma_init = initializer('ones', [out_channels]) - self.gamma = Parameter(gamma_init, name='gamma') - if beta_init is None: - beta_init = initializer('zeros', [out_channels]) - self.beta = Parameter(beta_init, name='beta') - if mean_init is None: - mean_init = initializer('zeros', [out_channels]) - self.moving_mean = Parameter( - mean_init, name='moving_mean', requires_grad=False) - if var_init is None: - var_init = initializer('ones', [out_channels]) - self.moving_variance = Parameter( - var_init, name='moving_variance', requires_grad=False) - - self.step = Parameter(initializer( - 'normal', [1], dtype=mstype.int32), name='step', requires_grad=False) - - self.fake_quant_weight = FakeQuantWithMinMax(min_init=-6, - max_init=6, - ema=False, - num_bits=num_bits, - quant_delay=quant_delay, - per_channel=per_channel, - out_channels=out_channels, - symmetric=symmetric, - narrow_range=narrow_range) - self.batchnorm_fold = BatchNormFoldCell(epsilon=eps, momentum=momentum, freeze_bn=freeze_bn) - - self.correct_mul = P.CorrectionMul(self.is_depthwise) - if context.get_context('device_target') == "Ascend": - self.batchnorm_fold2_train = P.BatchNormFold2_D(freeze_bn=freeze_bn) - self.batchnorm_fold2_infer = P.BatchNormFold2_D(freeze_bn=0) - elif context.get_context('device_target') == "GPU": - self.batchnorm_fold2_train = P.BatchNormFold2(freeze_bn=freeze_bn) - self.batchnorm_fold2_infer = P.BatchNormFold2(freeze_bn=0) - else: - raise ValueError("Not support platform.") - self.one = Tensor(1, mstype.int32) - self.assignadd = P.AssignAdd() - self.is_gpu = context.get_context('device_target') == "GPU" - - def extend_repr(self): - s = 'in_channels={}, out_channels={}, kernel_size={}, stride={}, ' \ - 'pad_mode={}, padding={}, dilation={}, group={}, ' \ - 'fake={}, freeze_bn={}, momentum={}, quant_delay={}'.format( - self.in_channels, self.out_channels, self.kernel_size, self.stride, - self.pad_mode, self.padding, self.dilation, self.group, - self.fake, self.freeze_bn, self.momentum, self.quant_delay) - return s - - def construct(self, x): - out_conv = self.conv(x, self.weight) - # BN fold1 - batch_mean, batch_std, running_mean, running_std = self.batchnorm_fold(out_conv, - self.moving_mean, - self.moving_variance, - self.step) - # fake weight - weight = self.correct_mul(self.weight, self.gamma, running_std) - if self.fake: - weight = self.fake_quant_weight(weight) - out = self.conv(x, weight) - # BN fold2 - if self.is_gpu: - if self.training: - out = self.batchnorm_fold2_train(out, self.beta, self.gamma, - batch_std, batch_mean, running_std, running_mean, self.step) - F.control_depend(out, self.assignadd(self.step, self.one)) - else: - out = self.batchnorm_fold2_infer(out, self.beta, self.gamma, - batch_std, batch_mean, running_std, running_mean, self.step) - else: - if self.training: - out = self.batchnorm_fold2_train(out, self.beta, self.gamma, batch_std, batch_mean, running_std) - F.control_depend(out, self.assignadd(self.step, self.one)) - else: - out = self.batchnorm_fold2_infer(out, self.beta, self.gamma, batch_std, batch_mean, running_std) - return out - - class Conv2dBatchNormQuant(Cell): r""" 2D convolution with BatchNormal op folded layer. @@ -533,25 +391,25 @@ class Conv2dBatchNormQuant(Cell): stride (int): Specifies stride for all spatial dimensions with the same value. pad_mode: (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same". padding: (int): Implicit paddings on both sides of the input. Default: 0. - eps (int): Parameters for BatchNormal. Default: 1e-5. - momentum (int): Parameters for BatchNormal op. Default: 0.9. + eps (float): Parameters for BatchNormal. Default: 1e-5. + momentum (float): Parameters for BatchNormal op. Default: 0.997. weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the - convolution kernel. Default: 'None'. + convolution kernel. Default: 'normal'. beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the - beta vector. Default: 'None'. + beta vector. Default: 'zeros'. gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the - gamma vector. Default: 'None'. + gamma vector. Default: 'ones'. mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the - mean vector. Default: 'None'. + mean vector. Default: 'zeros'. var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the - variance vector. Default: 'None'. - quant_delay (int): Quantization delay parameters according by global step. Default: 0. - freeze_bn (int): Quantization freeze BatchNormal op according by global step. Default: 100000. + variance vector. Default: 'ones'. fake (bool): Conv2dBatchNormQuant Cell add FakeQuantWithMinMax op or not. Default: True. - num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8. per_channel (bool): FakeQuantWithMinMax Parameters. Default: False. + num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8. symmetric (bool): Quantization algorithm use symmetric or not. Default: False. narrow_range (bool): Quantization algorithm use narrow range or not. Default: False. + quant_delay (int): Quantization delay parameters according by global step. Default: 0. + freeze_bn (int): Quantization freeze BatchNormal op according by global step. Default: 100000. Inputs: - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`. @@ -559,7 +417,7 @@ class Conv2dBatchNormQuant(Cell): Outputs: Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`. - Examples: + Examples: >>> batchnorm_quant = nn.Conv2dBatchNormQuant(1, 6, kernel_size= (2, 2), stride=(1, 1), pad_mode="valid", >>> dilation=(1, 1)) >>> input_x = Tensor(np.random.randint(-2, 2, (2, 1, 1, 3)), mindspore.float32) @@ -577,84 +435,92 @@ class Conv2dBatchNormQuant(Cell): group=1, eps=1e-5, momentum=0.997, - weight_init=None, - beta_init=None, - gamma_init=None, - mean_init=None, - var_init=None, - quant_delay=0, - freeze_bn=100000, + weight_init='normal', + beta_init='zeros', + gamma_init='ones', + mean_init='zeros', + var_init='ones', fake=True, - num_bits=8, per_channel=False, + num_bits=8, symmetric=False, - narrow_range=False): + narrow_range=False, + quant_delay=0, + freeze_bn=100000): """init Conv2dBatchNormQuant layer""" super(Conv2dBatchNormQuant, self).__init__() self.in_channels = in_channels self.out_channels = out_channels + self.kernel_size = twice(kernel_size) + self.stride = twice(stride) self.pad_mode = pad_mode self.padding = padding self.dilation = twice(dilation) - self.stride = twice(stride) self.group = group - self.fake = fake - self.freeze_bn = freeze_bn + self.eps = eps self.momentum = momentum self.quant_delay = quant_delay - if isinstance(kernel_size, int): - self.kernel_size = (kernel_size, kernel_size) - else: - self.kernel_size = kernel_size - if weight_init is None: - weight_init = initializer( - 'normal', [out_channels, in_channels // group, *self.kernel_size]) - self.weight = Parameter(weight_init, name='weight') - if gamma_init is None: - gamma_init = initializer('ones', [out_channels]) - self.gamma = Parameter(gamma_init, name='gamma') - if beta_init is None: - beta_init = initializer('zeros', [out_channels]) - self.beta = Parameter(beta_init, name='beta') - if mean_init is None: - mean_init = initializer('zeros', [out_channels]) - self.moving_mean = Parameter( - mean_init, name='moving_mean', requires_grad=False) - if var_init is None: - var_init = initializer('ones', [out_channels]) - self.moving_variance = Parameter( - var_init, name='moving_variance', requires_grad=False) - - self.step = Parameter(initializer( - 'normal', [1], dtype=mstype.int32), name='step', requires_grad=False) + self.freeze_bn = freeze_bn + self.fake = fake + self.num_bits = num_bits + self.per_channel = per_channel + self.symmetric = symmetric + self.narrow_range = narrow_range + self.is_gpu = context.get_context('device_target') == "GPU" + # initialize convolution op and Parameter + if context.get_context('device_target') == "Ascend" and group > 1: + validator.check_integer('group', group, in_channels, Rel.EQ) + validator.check_integer('group', group, out_channels, Rel.EQ) + self.conv = P.DepthwiseConv2dNative(channel_multiplier=1, + kernel_size=self.kernel_size, + pad_mode=pad_mode, + pad=padding, + stride=self.stride, + dilation=self.dilation) + weight_shape = [1, in_channels, *self.kernel_size] + channel_axis = 1 + else: + self.conv = P.Conv2D(out_channel=out_channels, + kernel_size=self.kernel_size, + pad_mode=pad_mode, + pad=padding, + stride=self.stride, + dilation=self.dilation, + group=group) + weight_shape = [out_channels, in_channels // group, *self.kernel_size] + channel_axis = 0 + self.weight = Parameter(initializer(weight_init, weight_shape), name='weight') + + # initialize batchnorm Parameter + self.gamma = Parameter(initializer(gamma_init, [out_channels]), name='gamma') + self.beta = Parameter(initializer(beta_init, [out_channels]), name='beta') + self.moving_mean = Parameter(initializer(mean_init, [out_channels]), name='moving_mean', requires_grad=False) + self.moving_variance = Parameter(initializer(var_init, [out_channels]), name='moving_variance', + requires_grad=False) + + # initialize fake ops self.fake_quant_weight = FakeQuantWithMinMax(min_init=-6, max_init=6, ema=False, - num_bits=num_bits, - quant_delay=quant_delay, per_channel=per_channel, - out_channels=out_channels, + channel_axis=channel_axis, + num_channels=out_channels, + num_bits=num_bits, symmetric=symmetric, - narrow_range=narrow_range) + narrow_range=narrow_range, + quant_delay=quant_delay) self.batchnorm_fold = BatchNormFoldCell(epsilon=eps, momentum=momentum, freeze_bn=freeze_bn) - self.conv = P.Conv2D(out_channel=out_channels, - kernel_size=kernel_size, - mode=1, - pad_mode=pad_mode, - pad=padding, - stride=stride, - dilation=1, - group=group) - self.correct_mul = P.CorrectionMul() + self.correct_mul = Q.CorrectionMul(channel_axis) if context.get_context('device_target') == "Ascend": - self.batchnorm_fold2_train = P.BatchNormFold2_D(freeze_bn=freeze_bn) - self.batchnorm_fold2_infer = P.BatchNormFold2_D(freeze_bn=0) + self.batchnorm_fold2_train = Q.BatchNormFold2_D(freeze_bn=freeze_bn) + self.batchnorm_fold2_infer = Q.BatchNormFold2_D(freeze_bn=0) elif context.get_context('device_target') == "GPU": - self.batchnorm_fold2_train = P.BatchNormFold2(freeze_bn=freeze_bn) - self.batchnorm_fold2_infer = P.BatchNormFold2(freeze_bn=0) + self.batchnorm_fold2_train = Q.BatchNormFold2(freeze_bn=freeze_bn) + self.batchnorm_fold2_infer = Q.BatchNormFold2(freeze_bn=0) else: - raise ValueError("Not support platform.") + raise ValueError("Unsupported platform: {}".format(context.get_context('device_target'))) + self.step = Parameter(initializer('normal', [1], dtype=mstype.int32), name='step', requires_grad=False) self.one = Tensor(1, mstype.int32) self.assignadd = P.AssignAdd() @@ -693,7 +559,7 @@ class Conv2dBatchNormQuant(Cell): out = self.batchnorm_fold2_train(out, self.beta, self.gamma, batch_std, batch_mean, running_std) F.control_depend(out, self.assignadd(self.step, self.one)) else: - out = self.batchnorm_fold2_infer(out, self.beta, self.gamma, batch_std, batch_mean, running_std) + out = self.batchnorm_fold2_infer(out, self.beta, self.gamma, running_std, running_mean, running_std) return out @@ -715,13 +581,13 @@ class Conv2dQuant(Cell): divisible by the number of groups. Default: 1. has_bias (bool): Specifies whether the layer uses a bias vector. Default: False. weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel. - Default: None. - bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: None. - quant_delay (int): Quantization delay parameters according by global step. Default: 0. - num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8. + Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: 'zeros'. per_channel (bool): FakeQuantWithMinMax Parameters. Default: False. + num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8. symmetric (bool): Quantization algorithm use symmetric or not. Default: False. narrow_range (bool): Quantization algorithm use narrow range or not. Default: False. + quant_delay (int): Quantization delay parameters according by global step. Default: 0. Inputs: - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`. @@ -746,13 +612,13 @@ class Conv2dQuant(Cell): dilation=1, group=1, has_bias=False, - weight_init=None, - bias_init=None, - quant_delay=0, - num_bits=8, + weight_init='normal', + bias_init='zeros', per_channel=False, + num_bits=8, symmetric=False, - narrow_range=False): + narrow_range=False, + quant_delay=0): super(Conv2dQuant, self).__init__() if isinstance(kernel_size, int): self.kernel_size = (kernel_size, kernel_size) @@ -768,15 +634,14 @@ class Conv2dQuant(Cell): self.group = group self.quant_delay = quant_delay - if weight_init is None: - weight_init = initializer( - 'normal', [out_channels, in_channels // group, *self.kernel_size]) - self.weight = Parameter(weight_init, name='weight') - if bias_init is None: - bias_init = initializer('zeros', [out_channels]) - if has_bias: - self.bias = Parameter(bias_init, name='bias') - self.bias_add = P.BiasAdd() + weight_shape = [out_channels, in_channels // group, *self.kernel_size] + self.weight = Parameter(initializer(weight_init, weight_shape), name='weight') + + self.bias_add = P.BiasAdd() + if check_bool(has_bias): + self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias') + else: + self.bias = None self.conv = P.Conv2D(out_channel=self.out_channels, kernel_size=self.kernel_size, @@ -789,12 +654,13 @@ class Conv2dQuant(Cell): self.fake_quant_weight = FakeQuantWithMinMax(min_init=-6, max_init=6, ema=False, - num_bits=num_bits, - quant_delay=quant_delay, per_channel=per_channel, - out_channels=out_channels, + channel_axis=0, + num_channels=out_channels, + num_bits=num_bits, symmetric=symmetric, - narrow_range=narrow_range) + narrow_range=narrow_range, + quant_delay=quant_delay) def construct(self, x): weight = self.fake_quant_weight(self.weight) @@ -828,11 +694,11 @@ class DenseQuant(Cell): same as input x. The values of str refer to the function `initializer`. Default: 'zeros'. has_bias (bool): Specifies whether the layer uses a bias vector. Default: True. activation (str): Regularizer function applied to the output of the layer, eg. 'relu'. Default: None. - num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8. - quant_delay (int): Quantization delay parameters according by global step. Default: 0. per_channel (bool): FakeQuantWithMinMax Parameters. Default: False. + num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8. symmetric (bool): Quantization algorithm use symmetric or not. Default: False. narrow_range (bool): Quantization algorithm use narrow range or not. Default: False. + quant_delay (int): Quantization delay parameters according by global step. Default: 0. Inputs: - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`. @@ -854,11 +720,11 @@ class DenseQuant(Cell): bias_init='zeros', has_bias=True, activation=None, - num_bits=8, - quant_delay=0, per_channel=False, + num_bits=8, symmetric=False, - narrow_range=False): + narrow_range=False, + quant_delay=0): super(DenseQuant, self).__init__() self.in_channels = check_int_positive(in_channels) self.out_channels = check_int_positive(out_channels) @@ -888,12 +754,13 @@ class DenseQuant(Cell): self.fake_quant_weight = FakeQuantWithMinMax(min_init=-6, max_init=6, ema=False, - num_bits=num_bits, - quant_delay=quant_delay, per_channel=per_channel, - out_channels=out_channels, + channel_axis=0, + num_channels=out_channels, + num_bits=num_bits, symmetric=symmetric, - narrow_range=narrow_range) + narrow_range=narrow_range, + quant_delay=quant_delay) def construct(self, x): """Use operators to construct to Dense layer.""" @@ -917,17 +784,28 @@ class DenseQuant(Cell): return str_info -class ReLUQuant(Cell): +class _QuantActivation(Cell): + r""" + Base class for Quant activation function. Add Fake Quant OP after activation OP. + """ + + def get_origin(self): + raise NotImplementedError + + +class ReLUQuant(_QuantActivation): r""" ReLUQuant activation function. Add Fake Quant OP after Relu OP. For a more Detailed overview of ReLU op. Args: + ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999. + per_channel (bool): Quantization granularity based on layer or on channel. Default: False. num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8. - quant_delay (int): Quantization delay parameters according by global step. Default: 0. symmetric (bool): Quantization algorithm use symmetric or not. Default: False. narrow_range (bool): Quantization algorithm use narrow range or not. Default: False. + quant_delay (int): Quantization delay parameters according by global step. Default: 0. Inputs: - **x** (Tensor) - The input of ReLUQuant. @@ -942,18 +820,22 @@ class ReLUQuant(Cell): """ def __init__(self, + ema_decay=0.999, + per_channel=False, num_bits=8, - quant_delay=0, symmetric=False, - narrow_range=False): + narrow_range=False, + quant_delay=0): super(ReLUQuant, self).__init__() self.fake_quant_act = FakeQuantWithMinMax(min_init=0, max_init=6, - num_bits=num_bits, - quant_delay=quant_delay, ema=True, + ema_decay=ema_decay, + per_channel=per_channel, + num_bits=num_bits, symmetric=symmetric, - narrow_range=narrow_range) + narrow_range=narrow_range, + quant_delay=quant_delay) self.relu = P.ReLU() def construct(self, x): @@ -961,8 +843,11 @@ class ReLUQuant(Cell): x = self.fake_quant_act(x) return x + def get_origin(self): + return self.relu -class ReLU6Quant(Cell): + +class ReLU6Quant(_QuantActivation): r""" ReLU6Quant activation function. @@ -971,10 +856,12 @@ class ReLU6Quant(Cell): For a more Detailed overview of ReLU6 op. Args: + ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999. + per_channel (bool): Quantization granularity based on layer or on channel. Default: False. num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8. - quant_delay (int): Quantization delay parameters according by global step. Default: 0. symmetric (bool): Quantization algorithm use symmetric or not. Default: False. narrow_range (bool): Quantization algorithm use narrow range or not. Default: False. + quant_delay (int): Quantization delay parameters according by global step. Default: 0. Inputs: - **x** (Tensor) - The input of ReLU6Quant. @@ -988,16 +875,23 @@ class ReLU6Quant(Cell): >>> result = relu6_quant(input_x) """ - def __init__(self, num_bits=8, quant_delay=0, symmetric=False, - narrow_range=False): + def __init__(self, + ema_decay=0.999, + per_channel=False, + num_bits=8, + symmetric=False, + narrow_range=False, + quant_delay=0): super(ReLU6Quant, self).__init__() self.fake_quant_act = FakeQuantWithMinMax(min_init=0, max_init=6, - num_bits=num_bits, - quant_delay=quant_delay, ema=True, + ema_decay=ema_decay, + per_channel=per_channel, + num_bits=num_bits, symmetric=symmetric, - narrow_range=narrow_range) + narrow_range=narrow_range, + quant_delay=quant_delay) self.relu6 = P.ReLU6() def construct(self, x): @@ -1005,18 +899,23 @@ class ReLU6Quant(Cell): x = self.fake_quant_act(x) return x + def get_origin(self): + return self.relu6 + -class HSwishQuant(Cell): +class HSwishQuant(_QuantActivation): r""" HSwishQuant activation function. Add Fake Quant OP after HSwish OP. For a more Detailed overview of HSwish op. Args: + ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999. + per_channel (bool): Quantization granularity based on layer or on channel. Default: False. num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8. - quant_delay (int): Quantization delay parameters according by global step. Default: 0. symmetric (bool): Quantization algorithm use symmetric or not. Default: False. narrow_range (bool): Quantization algorithm use narrow range or not. Default: False. + quant_delay (int): Quantization delay parameters according by global step. Default: 0. Inputs: - **x** (Tensor) - The input of HSwishQuant. @@ -1031,25 +930,31 @@ class HSwishQuant(Cell): """ def __init__(self, + ema_decay=0.999, + per_channel=False, num_bits=8, - quant_delay=0, symmetric=False, - narrow_range=False): + narrow_range=False, + quant_delay=0): super(HSwishQuant, self).__init__() self.fake_quant_act_before = FakeQuantWithMinMax(min_init=-6, max_init=6, - num_bits=num_bits, - quant_delay=quant_delay, ema=True, + ema_decay=ema_decay, + per_channel=per_channel, + num_bits=num_bits, symmetric=symmetric, - narrow_range=narrow_range) + narrow_range=narrow_range, + quant_delay=quant_delay) self.fake_quant_act_after = FakeQuantWithMinMax(min_init=-6, max_init=6, - num_bits=num_bits, - quant_delay=quant_delay, ema=True, + ema_decay=ema_decay, + per_channel=per_channel, + num_bits=num_bits, symmetric=symmetric, - narrow_range=narrow_range) + narrow_range=narrow_range, + quant_delay=quant_delay) self.act = P.HSwish() def construct(self, x): @@ -1058,18 +963,23 @@ class HSwishQuant(Cell): x = self.fake_quant_act_after(x) return x + def get_origin(self): + return self.act -class HSigmoidQuant(Cell): + +class HSigmoidQuant(_QuantActivation): r""" HSigmoidQuant activation function. Add Fake Quant OP before and after HSigmoid OP. For a more Detailed overview of HSigmoid op. Args: + ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999. + per_channel (bool): Quantization granularity based on layer or on channel. Default: False. num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8. - quant_delay (int): Quantization delay parameters according by global step. Default: 0. symmetric (bool): Quantization algorithm use symmetric or not. Default: False. narrow_range (bool): Quantization algorithm use narrow range or not. Default: False. + quant_delay (int): Quantization delay parameters according by global step. Default: 0. Inputs: - **x** (Tensor) - The input of HSigmoidQuant. @@ -1084,25 +994,31 @@ class HSigmoidQuant(Cell): """ def __init__(self, + ema_decay=0.999, + per_channel=False, num_bits=8, - quant_delay=0, symmetric=False, - narrow_range=False): + narrow_range=False, + quant_delay=0): super(HSigmoidQuant, self).__init__() self.fake_quant_act_before = FakeQuantWithMinMax(min_init=-6, max_init=6, - num_bits=num_bits, - quant_delay=quant_delay, ema=True, + ema_decay=ema_decay, + per_channel=per_channel, + num_bits=num_bits, symmetric=symmetric, - narrow_range=narrow_range) + narrow_range=narrow_range, + quant_delay=quant_delay) self.fake_quant_act_after = FakeQuantWithMinMax(min_init=-6, max_init=6, - num_bits=num_bits, - quant_delay=quant_delay, ema=True, + ema_decay=ema_decay, + per_channel=per_channel, + num_bits=num_bits, symmetric=symmetric, - narrow_range=narrow_range) + narrow_range=narrow_range, + quant_delay=quant_delay) self.act = P.HSigmoid() def construct(self, x): @@ -1111,6 +1027,9 @@ class HSigmoidQuant(Cell): x = self.fake_quant_act_after(x) return x + def get_origin(self): + return self.act + class TensorAddQuant(Cell): r""" @@ -1119,10 +1038,12 @@ class TensorAddQuant(Cell): For a more Detailed overview of TensorAdd op. Args: + ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999. + per_channel (bool): Quantization granularity based on layer or on channel. Default: False. num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8. - quant_delay (int): Quantization delay parameters according by global step. Default: 0. symmetric (bool): Quantization algorithm use symmetric or not. Default: False. narrow_range (bool): Quantization algorithm use narrow range or not. Default: False. + quant_delay (int): Quantization delay parameters according by global step. Default: 0. Inputs: - **x** (Tensor) - The input of TensorAddQuant. @@ -1138,18 +1059,22 @@ class TensorAddQuant(Cell): """ def __init__(self, + ema_decay=0.999, + per_channel=False, num_bits=8, - quant_delay=0, symmetric=False, - narrow_range=False): + narrow_range=False, + quant_delay=0): super(TensorAddQuant, self).__init__() self.fake_quant_act = FakeQuantWithMinMax(min_init=-6, max_init=6, - num_bits=num_bits, - quant_delay=quant_delay, ema=True, + ema_decay=ema_decay, + per_channel=per_channel, + num_bits=num_bits, symmetric=symmetric, - narrow_range=narrow_range) + narrow_range=narrow_range, + quant_delay=quant_delay) self.add = P.TensorAdd() def construct(self, x1, x2): @@ -1165,10 +1090,12 @@ class MulQuant(Cell): For a more Detailed overview of Mul op. Args: + ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999. + per_channel (bool): Quantization granularity based on layer or on channel. Default: False. num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8. - quant_delay (int): Quantization delay parameters according by global step. Default: 0. symmetric (bool): Quantization algorithm use symmetric or not. Default: False. narrow_range (bool): Quantization algorithm use narrow range or not. Default: False. + quant_delay (int): Quantization delay parameters according by global step. Default: 0. Inputs: - **x** (Tensor) - The input of MulQuant. @@ -1179,21 +1106,99 @@ class MulQuant(Cell): """ def __init__(self, + ema_decay=0.999, + per_channel=False, num_bits=8, - quant_delay=0, symmetric=False, - narrow_range=False): + narrow_range=False, + quant_delay=0): super(MulQuant, self).__init__() self.fake_quant_act = FakeQuantWithMinMax(min_init=-6, max_init=6, - num_bits=num_bits, - quant_delay=quant_delay, ema=True, + ema_decay=ema_decay, + per_channel=per_channel, + num_bits=num_bits, symmetric=symmetric, - narrow_range=narrow_range) + narrow_range=narrow_range, + quant_delay=quant_delay) self.mul = P.Mul() def construct(self, x1, x2): x = self.mul(x1, x2) x = self.fake_quant_act(x) return x + + +class QuantBlock(Cell): + r""" + A quant block of Conv/Dense, activation layer for Ascend deploy. + + Calculate Conv or Dense in Int8, with AscendQuant and AscendDeQuant. + + Notes: + This block is only for deploy, and not trainable. + + Args: + in_channels (int): The number of channels in the input space. + out_channels (int): The number of channels in the output space. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as input x. The values of str refer to the function `initializer`. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is + same as input x. The values of str refer to the function `initializer`. Default: 'zeros'. + has_bias (bool): Specifies whether the layer uses a bias vector. Default: True. + activation (str): Regularizer function applied to the output of the layer, eg. 'relu'. Default: None. + batchnorm (bool): Specifies to used batchnorm or not. Default: None. + activation (string): Specifies activation type. The optional values are as following: + 'softmax', 'logsoftmax', 'relu', 'relu6', 'tanh', 'gelu', 'sigmoid', + 'prelu', 'leakyrelu', 'hswish', 'hsigmoid'. Default: None. + + Inputs: + - **input** (Tensor) - Tensor of shape :math:`(N, in\_channels)`. + + Outputs: + Tensor of shape :math:`(N, out\_channels)`. + + Examples: + >>> net = nn.Dense(3, 4) + >>> input = Tensor(np.random.randint(0, 255, [2, 3]), mindspore.float32) + >>> net(input) + """ + + def __init__(self, + core_op, + weight, + quant_op, + dequant_op, + dequant_scale, + bias=None, + activation=None): + super(QuantBlock, self).__init__() + self.core_op = core_op + self.weight = weight + self.quant = quant_op + self.dequant = dequant_op + self.dequant_scale = dequant_scale + self.bias = bias + self.has_bias = bias is None + self.activation = activation + self.has_act = activation is None + + def construct(self, x): + x = self.quant(x) + x = self.core_op(x, self.weight) + if self.has_bias: + output = self.bias_add(output, self.bias) + if self.has_act: + x = self.activation(x) + x = self.dequant(x, self.dequant_scale) + return x + + def extend_repr(self): + str_info = f'quant={self.quant}, core_op={type(self.core_op)}' + if self.has_bias: + str_info = str_info + f', bias={self.bias}' + if self.has_act: + str_info = str_info + f', activation={self.activation}' + str_info = str_info + f', dequant={self.dequant}' + return str_info diff --git a/mindspore/nn/loss/loss.py b/mindspore/nn/loss/loss.py index c7e38fd943..4639229c41 100644 --- a/mindspore/nn/loss/loss.py +++ b/mindspore/nn/loss/loss.py @@ -18,6 +18,7 @@ from mindspore.common.tensor import Tensor from mindspore.ops import operations as P from mindspore.ops import functional as F from mindspore.ops.primitive import constexpr +from mindspore.ops import _selected_ops from mindspore.nn.cell import Cell from mindspore._checkparam import Validator as validator from mindspore._checkparam import Rel @@ -44,7 +45,7 @@ class _Loss(Cell): if reduction == 'none': self.reduce = False - self.reduce_mean = P.ReduceMean() + self.reduce_mean = _selected_ops.ReduceMean() self.reduce_sum = P.ReduceSum() def get_axis(self, x): @@ -245,11 +246,11 @@ class SoftmaxCrossEntropyWithLogits(_Loss): super(SoftmaxCrossEntropyWithLogits, self).__init__(reduction) self.is_grad = is_grad self.sparse = sparse - validator.check_integer("num_classes", num_classes, 1, Rel.GT, self.cls_name) - validator.check_number_range("smooth_factor", smooth_factor, 0, 1, Rel.INC_BOTH, self.cls_name) + validator.check_number_range( + "smooth_factor", smooth_factor, 0, 1, Rel.INC_BOTH, self.cls_name) self.smooth_factor = smooth_factor self.num_classes = num_classes - self.softmax_cross_entropy = P.SoftmaxCrossEntropyWithLogits() + self.softmax_cross_entropy = _selected_ops.SoftmaxCrossEntropyWithLogits() self.one_hot = P.OneHot() self.on_value = Tensor(1.0 - self.smooth_factor, mstype.float32) self.off_value = Tensor(1.0 * self.smooth_factor / (self.num_classes - 1), mstype.float32) @@ -393,7 +394,7 @@ class CosineEmbeddingLoss(_Loss): pos_value = 1.0 - cosine neg_value = self.maximum(cosine - self.margin, 0.0) - zeros = F.zeros_like_tensor(cosine) + zeros = F.zeros_like(cosine) pos_part = F.select(y == 1, pos_value, zeros) neg_part = F.select(y == -1, neg_value, zeros) output_unreduced = pos_part + neg_part diff --git a/mindspore/nn/optim/__init__.py b/mindspore/nn/optim/__init__.py index 8f21179893..f1dac586bc 100644 --- a/mindspore/nn/optim/__init__.py +++ b/mindspore/nn/optim/__init__.py @@ -26,6 +26,8 @@ from .sgd import SGD from .lars import LARS from .ftrl import FTRL from .rmsprop import RMSProp +from .proximal_ada_grad import ProximalAdagrad +from .lazyadam import LazyAdam -__all__ = ['Optimizer', 'Momentum', 'LARS', 'Adam', 'AdamWeightDecay', - 'AdamWeightDecayDynamicLR', 'Lamb', 'SGD', 'FTRL', 'RMSProp'] +__all__ = ['Optimizer', 'Momentum', 'LARS', 'Adam', 'AdamWeightDecay', 'LazyAdam', + 'AdamWeightDecayDynamicLR', 'Lamb', 'SGD', 'FTRL', 'RMSProp', 'ProximalAdagrad'] diff --git a/mindspore/nn/optim/adam.py b/mindspore/nn/optim/adam.py index 2138aed741..5a40d30d5a 100755 --- a/mindspore/nn/optim/adam.py +++ b/mindspore/nn/optim/adam.py @@ -26,12 +26,10 @@ from mindspore._checkparam import Validator as validator from mindspore._checkparam import Rel from .optimizer import Optimizer -_learning_rate_update_func = ['linear', 'cos', 'sin'] +_adam_opt = C.MultitypeFuncGraph("adam_opt") -adam_opt = C.MultitypeFuncGraph("adam_opt") - -@adam_opt.register("Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Bool") +@_adam_opt.register("Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Bool") def _update_run_op(beta1, beta2, eps, lr, weight_decay_tensor, param, m, v, gradient, decay_flag): """ Update parameters. @@ -67,16 +65,16 @@ def _update_run_op(beta1, beta2, eps, lr, weight_decay_tensor, param, m, v, grad next_v = op_mul(beta2, v_fp32) + op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32) - beta2, op_square(gradient_fp32)) - update = next_m / (op_sqrt(next_v) + eps) + update = next_m / (eps + op_sqrt(next_v)) if decay_flag: - update = update + op_mul(weight_decay_tensor, param_fp32) + update = op_mul(weight_decay_tensor, param_fp32) + update update_with_lr = op_mul(lr, update) next_param = param_fp32 - op_reshape(update_with_lr, op_shape(param_fp32)) - next_v = F.depend(next_v, F.assign(param, next_param)) - next_v = F.depend(next_v, F.assign(m, next_m)) - next_v = F.depend(next_v, F.assign(v, next_v)) + next_v = F.depend(next_v, F.assign(param, op_cast(next_param, mstype.float16))) + next_v = F.depend(next_v, F.assign(m, op_cast(next_m, mstype.float16))) + next_v = F.depend(next_v, F.assign(v, op_cast(next_v, mstype.float16))) return next_v @@ -94,19 +92,30 @@ def _check_param_value(beta1, beta2, eps, weight_decay, prim_name): def _check_learning_rate_value(learning_rate, end_learning_rate, decay_steps, power, prim_name): """Check the type of inputs.""" - validator.check_float_positive('learning_rate', learning_rate, prim_name) - validator.check_float_legal_value('learning_rate', learning_rate, prim_name) - validator.check_float_positive('end_learning_rate', end_learning_rate, prim_name) - validator.check_float_legal_value('end_learning_rate', end_learning_rate, prim_name) + validator.check_value_type("learning_rate", learning_rate, [float], prim_name) + validator.check_number_range("learning_rate", learning_rate, 0.0, float("inf"), Rel.INC_LEFT, prim_name) + validator.check_value_type("end_learning_rate", end_learning_rate, [float], prim_name) + validator.check_number_range("end_learning_rate", end_learning_rate, 0.0, float("inf"), Rel.INC_LEFT, prim_name) validator.check_float_positive('power', power, prim_name) validator.check_float_legal_value('power', power, prim_name) validator.check_integer('decay_steps', decay_steps, 0, Rel.GT, prim_name) -@adam_opt.register("Function", "Tensor", "Tensor", "Tensor", "Tensor", "Number", "Tensor", "Tensor", "Tensor", "Tensor", - "Tensor") -def _run_opt_with_one_number(opt, beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, params, moment1, - moment2): +@_adam_opt.register("Function", "Function", "Tensor", "Tensor", "Tensor", "Tensor", "Number", "Tensor", "Tuple", + "Tensor", "Tensor", "Tensor") +def _run_opt_with_sparse(opt, sparse_opt, beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, params, + moment1, moment2): + """Apply sparse adam optimizer to the weight parameter when the gradient is sparse.""" + success = True + success = F.depend(success, sparse_opt(params, moment1, moment2, beta1_power, beta2_power, lr, beta1, beta2, + eps, gradient[1], gradient[0])) + return success + + +@_adam_opt.register("Function", "Function", "Tensor", "Tensor", "Tensor", "Tensor", "Number", "Tensor", "Tensor", + "Tensor", "Tensor", "Tensor") +def _run_opt_with_one_number(opt, sparse_opt, beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, params, + moment1, moment2): """Apply adam optimizer to the weight parameter using Tensor.""" success = True success = F.depend(success, opt(params, moment1, moment2, beta1_power, beta2_power, lr, beta1, beta2, @@ -144,10 +153,16 @@ class Adam(Optimizer): value of weight_decay > 0. When not separating parameter groups, the `weight_decay` in the API will be applied on the parameters if `weight_decay` > 0 and the 'beta' and 'gamma' are not in the name of parameters. + To improve parameter groups performance, the customized order of parameters can be supported. + + The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the + `sparse_grad` of `Parameter` being set. The sparse feature is under continuous development. The sparse + behavior is currently performed on the CPU, weight decay is not supported. + Args: params (Union[list[Parameter], list[dict]]): When the `params` is a list of `Parameter` which will be updated, the element in `params` should be class `Parameter`. When the `params` is a list of `dict`, the "params", - "lr" and "weight_decay" are the keys can be parsed. + "lr", "weight_decay" and "order_params" are the keys can be parsed. - params: Required. The value should be a list of `Parameter`. @@ -157,13 +172,19 @@ class Adam(Optimizer): - weight_decay: Optional. If "weight_decay" in the keys, the value of corresponding weight decay will be used. If not, the `weight_decay` in the API will be used. - learning_rate (Union[float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is - Iterable or a Tensor and the dims of the Tensor is 1, - use dynamic learning rate, then the i-th step will - take the i-th value as the learning rate. - When the learning_rate is float or learning_rate is a Tensor - but the dims of the Tensor is 0, use fixed learning rate. - Other cases are not supported. Default: 1e-3. + - order_params: Optional. If "order_params" in the keys, the value should be the order of parameters and + the order will be followed in optimizer. There are no other keys in the `dict` and the parameters which + in the value of 'order_params' but not in any group will use default learning rate and default weight + decay. + + learning_rate (Union[int, float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is + Iterable or a Tensor and the dims of the Tensor is 1, + use dynamic learning rate, then the i-th step will + take the i-th value as the learning rate. + When the learning_rate is float or learning_rate is a + Tensor but the dims of the Tensor is 0, use fixed learning + rate. Other cases are not supported. It should be equal to + or greater than 0. Default: 1e-3. beta1 (float): The exponential decay rate for the 1st moment estimates. Should be in range (0.0, 1.0). Default: 0.9. beta2 (float): The exponential decay rate for the 2nd moment estimates. Should be in range (0.0, 1.0). Default: @@ -176,9 +197,8 @@ class Adam(Optimizer): use_nesterov (bool): Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients. If True, updates the gradients using NAG. If False, updates the gradients without using NAG. Default: False. - weight_decay (float): Weight decay (L2 penalty). Default: 0.0. - loss_scale (float): A floating point value for the loss scale. Should be equal to or greater than 1. Default: - 1.0. + weight_decay (float): Weight decay (L2 penalty). It should be equal to or greater than 0. Default: 0.0. + loss_scale (float): A floating point value for the loss scale. Should be greater than 0. Default: 1.0. Inputs: - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`. @@ -193,13 +213,16 @@ class Adam(Optimizer): >>> >>> #2) Use parameter groups and set different values >>> conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params())) - >>> no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params())) - >>> group_params = [{'params': conv_params, 'weight_decay': 0.01, 'lr': 0.01}, - >>> {'params': no_conv_params}] + >>> bias_params = list(filter(lambda x: 'bias' in x.name, net.trainable_params())) + >>> group_params = [{'params': conv_params, 'weight_decay': 0.01}, + >>> {'params': bias_params, 'lr': 0.01}, + >>> {'order_params': net.trainable_params()}] >>> opt = nn.Adam(group_params, learning_rate=0.1, weight_decay=0.0) - >>> # the conv_params's parameters will use a learning rate of 0.01 and a weight decay of 0.01 - >>> # the no_cov_params's parameters don't set learning and weight decay. So they will use a - >>> # learning rate of 0.1 and a weight decay of 0.0. + >>> # The conv_params's parameters will use a learning rate of default value 0.1 and a weight decay of 0.01. + >>> # The bias_params's parameters will use a learning rate of 0.01 and a weight decay of default value 0.0. + >>> # The final parameters order in which the optimizer will be followed is the value of 'order_params'. + >>> # The parameters which in the value of 'order_params' but not in any group will use a learning rate + >>> # of default value 0.1 and a weight decay of default value 0.0. >>> >>> loss = nn.SoftmaxCrossEntropyWithLogits() >>> model = Model(net, loss_fn=loss, optimizer=optim) @@ -211,8 +234,6 @@ class Adam(Optimizer): _check_param_value(beta1, beta2, eps, weight_decay, self.cls_name) validator.check_value_type("use_locking", use_locking, [bool], self.cls_name) validator.check_value_type("use_nesterov", use_nesterov, [bool], self.cls_name) - validator.check_value_type("loss_scale", loss_scale, [float], self.cls_name) - validator.check_number_range("loss_scale", loss_scale, 1.0, float("inf"), Rel.INC_LEFT, self.cls_name) self.beta1 = Tensor(beta1, mstype.float32) self.beta2 = Tensor(beta2, mstype.float32) @@ -225,11 +246,7 @@ class Adam(Optimizer): self.hyper_map = C.HyperMap() self.opt = P.Adam(use_locking, use_nesterov) - - self.pow = P.Pow() - self.sqrt = P.Sqrt() - self.one = Tensor(np.array([1.0]).astype(np.float32)) - self.realdiv = P.RealDiv() + self.sparse_opt = P.SparseApplyAdam(use_locking, use_nesterov) def construct(self, gradients): params = self.parameters @@ -244,13 +261,13 @@ class Adam(Optimizer): beta2_power = self.beta2_power * self.beta2 self.beta2_power = beta2_power if self.is_group_lr: - success = self.hyper_map(F.partial(adam_opt, self.opt, beta1_power, beta2_power, self.beta1, - self.beta2, self.eps), - lr, gradients, params, moment1, moment2) + success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt, beta1_power, beta2_power, + self.beta1, self.beta2, self.eps), + lr, gradients, params, moment1, moment2) else: - success = self.hyper_map(F.partial(adam_opt, self.opt, beta1_power, beta2_power, self.beta1, - self.beta2, self.eps, lr), - gradients, params, moment1, moment2) + success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt, beta1_power, beta2_power, + self.beta1, self.beta2, self.eps, lr), + gradients, params, moment1, moment2) return success @@ -267,14 +284,15 @@ class AdamWeightDecay(Optimizer): take the i-th value as the learning rate. When the learning_rate is float or learning_rate is a Tensor but the dims of the Tensor is 0, use fixed learning rate. - Other cases are not supported. Default: 1e-3. + Other cases are not supported. It should be equal to or + greater than 0. Default: 1e-3. beta1 (float): The exponential decay rate for the 1st moment estimates. Default: 0.9. Should be in range (0.0, 1.0). beta2 (float): The exponential decay rate for the 2nd moment estimates. Default: 0.999. Should be in range (0.0, 1.0). eps (float): Term added to the denominator to improve numerical stability. Default: 1e-6. Should be greater than 0. - weight_decay (float): Weight decay (L2 penalty). Default: 0.0. + weight_decay (float): Weight decay (L2 penalty). It should be equal to or greater than 0. Default: 0.0. decay_filter (Function): A function to determine whether to apply weight decay on parameters. Default: lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name. @@ -310,7 +328,7 @@ class AdamWeightDecay(Optimizer): def construct(self, gradients): lr = self.get_lr() - updated_velocity = self.hyper_map(F.partial(adam_opt, self.beta1, self.beta2, self.eps, lr, + updated_velocity = self.hyper_map(F.partial(_adam_opt, self.beta1, self.beta2, self.eps, lr, self.weight_decay_tensor), self.params, self.moments1, self.moments2, gradients, self.decay_flag) @@ -324,17 +342,20 @@ class AdamWeightDecayDynamicLR(Optimizer): Args: params (list[Parameter]): A list of parameter, which will be updated. The element in `params` should be class mindspore.Parameter. - decay_steps (int): The steps of the decay. - learning_rate (float): A floating point value for the learning rate. Default: 0.001. - end_learning_rate (float): A floating point value for the end learning rate. Default: 0.0001. - power (float): Power. Default: 10.0. + decay_steps (int): The steps of the decay. It must be int and positive. + warmup_steps (int): The steps of lr warm up. Default: 0. + learning_rate (float): A floating point value for the learning rate. It should be equal to or + greater than 0. Default: 0.001. + end_learning_rate (float): A floating point value for the end learning rate. It should be equal + to or greater than 0. Default: 0.0001. + power (float): The Power of the polynomial. It must be positive. Default: 10.0. beta1 (float): The exponential decay rate for the 1st moment estimates. Default: 0.9. Should be in range (0.0, 1.0). beta2 (float): The exponential decay rate for the 2nd moment estimates. Default: 0.999. Should be in range (0.0, 1.0). eps (float): Term added to the denominator to improve numerical stability. Default: 1e-6. Should be greater than 0. - weight_decay (float): Weight decay (L2 penalty). Default: 0.0. + weight_decay (float): Weight decay (L2 penalty). It should be equal to or greater than 0. Default: 0.0. decay_filter (Function): A function to determine whether to apply weight decay on parameters. Default: lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name. @@ -353,6 +374,7 @@ class AdamWeightDecayDynamicLR(Optimizer): def __init__(self, params, decay_steps, + warmup_steps=0, learning_rate=0.001, end_learning_rate=0.0001, power=10.0, @@ -360,13 +382,13 @@ class AdamWeightDecayDynamicLR(Optimizer): beta2=0.999, eps=1e-6, weight_decay=0.0, - decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name, - warmup_steps=0): - super(AdamWeightDecayDynamicLR, self).__init__(learning_rate, params) + decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name): + super(AdamWeightDecayDynamicLR, self).__init__(0.0, params) if self.is_group: raise RuntimeError(f"The {self.cls_name} optimizer cannot support group setting.") _check_param_value(beta1, beta2, eps, weight_decay, self.cls_name) _check_learning_rate_value(learning_rate, end_learning_rate, decay_steps, power, self.cls_name) + validator.check_integer('warmup_steps', warmup_steps, 0, Rel.GE, self.cls_name) # turn them to scalar when me support scalar/tensor mix operations self.global_step = Parameter(initializer(0, [1]), name="global_step") self.warmup_steps = Tensor(np.array([warmup_steps]).astype(np.float32)) @@ -402,7 +424,7 @@ class AdamWeightDecayDynamicLR(Optimizer): warmup_lr = self.start_learning_rate * warmup_percent is_warmup = self.cast(self.greater(self.warmup_steps, self.global_step), mstype.float32) lr = (self.one - is_warmup) * lr + is_warmup * warmup_lr - updated_velocity = self.hyper_map(F.partial(adam_opt, self.beta1, self.beta2, self.eps, lr, + updated_velocity = self.hyper_map(F.partial(_adam_opt, self.beta1, self.beta2, self.eps, lr, self.weight_decay_tensor), self.params, self.moments1, self.moments2, gradients, self.decay_flag) diff --git a/mindspore/nn/optim/ftrl.py b/mindspore/nn/optim/ftrl.py index 33edafa4e2..a40d6737cb 100644 --- a/mindspore/nn/optim/ftrl.py +++ b/mindspore/nn/optim/ftrl.py @@ -18,28 +18,34 @@ from mindspore.common import Tensor import mindspore.common.dtype as mstype from mindspore._checkparam import Validator as validator from mindspore._checkparam import Rel -from .optimizer import Optimizer, apply_decay, grad_scale +from .optimizer import Optimizer, _apply_decay, _grad_scale -ftrl_opt = C.MultitypeFuncGraph("ftrl_opt") +_ftrl_opt = C.MultitypeFuncGraph("ftrl_opt") -@ftrl_opt.register("Function", "Tensor", "Number", "Number", "Number", "Tensor", "Tensor", "Tensor", "Tensor") -def _tensor_run_opt(opt, learning_rate, l1, l2, lr_power, linear, gradient, weight, moment): +@_ftrl_opt.register("Function", "Function", "Tensor", "Number", "Number", "Number", "Tensor", "Tuple", "Tensor", + "Tensor") +def _tensor_run_opt_with_sparse(opt, spars_opt, learning_rate, l1, l2, lr_power, linear, gradient, weight, moment): + """Apply sparse ftrl optimizer to the weight parameter when the gradient is sparse.""" + success = True + success = F.depend(success, spars_opt(weight, moment, linear, gradient[1], gradient[0])) + return success + + +@_ftrl_opt.register("Function", "Function", "Tensor", "Number", "Number", "Number", "Tensor", "Tensor", "Tensor", + "Tensor") +def _tensor_run_opt(opt, spars_opt, learning_rate, l1, l2, lr_power, linear, gradient, weight, moment): """Apply ftrl optimizer to the weight parameter.""" success = True success = F.depend(success, opt(weight, moment, linear, gradient, learning_rate, l1, l2, lr_power)) return success -def _check_param(initial_accum, learning_rate, lr_power, l1, l2, use_locking, loss_scale=1.0, weight_decay=0.0, - prim_name=None): +def _check_param(initial_accum, lr_power, l1, l2, use_locking, weight_decay=0.0, prim_name=None): """Check param.""" validator.check_value_type("initial_accum", initial_accum, [float], prim_name) validator.check_number("initial_accum", initial_accum, 0.0, Rel.GE, prim_name) - validator.check_value_type("learning_rate", learning_rate, [float], prim_name) - validator.check_number("learning_rate", learning_rate, 0.0, Rel.GT, prim_name) - validator.check_value_type("lr_power", lr_power, [float], prim_name) validator.check_number("lr_power", lr_power, 0.0, Rel.LE, prim_name) @@ -51,9 +57,6 @@ def _check_param(initial_accum, learning_rate, lr_power, l1, l2, use_locking, lo validator.check_value_type("use_locking", use_locking, [bool], prim_name) - validator.check_value_type("loss_scale", loss_scale, [float], prim_name) - validator.check_number("loss_scale", loss_scale, 1.0, Rel.GE, prim_name) - validator.check_value_type("weight_decay", weight_decay, [float], prim_name) validator.check_number("weight_decay", weight_decay, 0.0, Rel.GE, prim_name) @@ -67,6 +70,11 @@ class FTRL(Optimizer): `_. Refer to paper `Ad Click Prediction: a View from the Trenches `_ for engineering document. + Note: + The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the + `sparse_grad` of `Parameter` being set. The sparse feature is under continuous development. The sparse + behavior is currently performed on the CPU, weight decay is not supported. + Args: params (list[Parameter]): A list of parameter, which will be updated. The element in `params` should be Parameter. @@ -95,32 +103,30 @@ class FTRL(Optimizer): """ def __init__(self, params, initial_accum=0.1, learning_rate=0.001, lr_power=-0.5, l1=0.0, l2=0.0, use_locking=False, loss_scale=1.0, weight_decay=0.0): - super(FTRL, self).__init__(learning_rate, params) + super(FTRL, self).__init__(learning_rate, params, loss_scale=loss_scale) if self.is_group: raise RuntimeError(f"The {self.cls_name} optimizer cannot support group setting.") - _check_param(initial_accum, learning_rate, lr_power, l1, l2, use_locking, loss_scale, weight_decay, - self.cls_name) + _check_param(initial_accum, lr_power, l1, l2, use_locking, weight_decay, self.cls_name) self.moments = self.parameters.clone(prefix="moments", init=initial_accum) self.linear = self.parameters.clone(prefix="linear", init='zeros') self.l1 = l1 self.l2 = l2 self.lr_power = lr_power - self.reciprocal_scale = 1.0 / loss_scale self.weight_decay = weight_decay self.decay_tf = tuple((lambda: True)() for x in self.parameters) self.hyper_map = C.HyperMap() self.opt = P.ApplyFtrl(use_locking=use_locking) - self.one = Tensor(1, mstype.int32) + self.sparse_opt = P.SparseApplyFtrl(learning_rate, l1, l2, lr_power, use_locking=use_locking) def construct(self, grads): params = self.parameters moments = self.moments linear = self.linear - if self.weight_decay > 0.0: - grads = self.hyper_map(F.partial(apply_decay, self.weight_decay), self.decay_tf, params, grads) - if self.reciprocal_scale != 1.0: - grads = self.hyper_map(F.partial(grad_scale, self.reciprocal_scale), grads) lr = self.learning_rate - success = self.hyper_map(F.partial(ftrl_opt, self.opt, lr, self.l1, self.l2, self.lr_power), - linear, grads, params, moments) + if self.weight_decay > 0.0: + grads = self.hyper_map(F.partial(_apply_decay, self.weight_decay), self.decay_tf, params, grads) + + grads = self.scale_grad(grads) + success = self.map_(F.partial(_ftrl_opt, self.opt, self.sparse_opt, lr, self.l1, self.l2, self.lr_power), + linear, grads, params, moments) return success diff --git a/mindspore/nn/optim/lamb.py b/mindspore/nn/optim/lamb.py index f189f1cd02..832b35d66f 100755 --- a/mindspore/nn/optim/lamb.py +++ b/mindspore/nn/optim/lamb.py @@ -14,6 +14,7 @@ # ============================================================================ """lamb""" import numpy as np +from mindspore import context from mindspore.common import dtype as mstype from mindspore.common.initializer import initializer from mindspore.ops import operations as P @@ -25,13 +26,15 @@ from mindspore._checkparam import Validator as validator from mindspore._checkparam import Rel from .optimizer import Optimizer from .. import layer +from .. import graph_kernels as G num_one = Tensor(np.ones([1]), mstype.float32) -lamb_opt = C.MultitypeFuncGraph("lamb_opt") +_lamb_opt = C.MultitypeFuncGraph("lamb_opt") -@lamb_opt.register("Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", - "Tensor", "Bool") + +@_lamb_opt.register("Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", + "Tensor", "Tensor", "Tensor", "Tensor", "Bool") def _update_run_op(beta1, beta2, eps, lr, weight_decay_tensor, global_step, param, m, v, gradient, decay_flag): """ @@ -72,9 +75,11 @@ def _update_run_op(beta1, beta2, eps, lr, weight_decay_tensor, global_step, para v_fp32 = op_cast(v, mstype.float32) gradient_fp32 = op_cast(gradient, mstype.float32) - next_m = op_mul(beta1, m_fp32) + op_mul(op_cast(num_one, mstype.float32) - beta1, gradient_fp32) + next_m = op_mul(beta1, m_fp32) + op_mul(op_cast(num_one, + mstype.float32) - beta1, gradient_fp32) - next_v = op_mul(beta2, v_fp32) + op_mul(op_cast(num_one, mstype.float32) - beta2, op_square(gradient_fp32)) + next_v = op_mul(beta2, v_fp32) + op_mul(op_cast(num_one, + mstype.float32) - beta2, op_square(gradient_fp32)) next_mm = next_m / (op_cast(num_one, mstype.float32) - op_pow(beta1, op_cast(global_step + num_one, mstype.float32))) @@ -83,8 +88,9 @@ def _update_run_op(beta1, beta2, eps, lr, weight_decay_tensor, global_step, para w_norm = op_norm(param_fp32) g_norm = op_norm(gradient_fp32) - g_norm_hat = op_norm(op_mul(next_mm, op_rsqrt(next_vv + eps)) + weight_decay_tensor * param_fp32) - zeros = F.zeros_like_tensor(w_norm) + g_norm_hat = op_norm(op_mul(next_mm, op_rsqrt( + next_vv + eps)) + weight_decay_tensor * param_fp32) + zeros = F.zeros_like(w_norm) ones = op_fill(op_dtype(w_norm), op_shape(w_norm), 1.0) trust_ratio = op_select( op_greater(w_norm, zeros), @@ -108,13 +114,79 @@ def _update_run_op(beta1, beta2, eps, lr, weight_decay_tensor, global_step, para return next_v +lamb_opt_graph_kernel = C.MultitypeFuncGraph("lamb_opt_graph_kernel") + + +@lamb_opt_graph_kernel.register("Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", + "Tensor", "Tensor", "Tensor", "Tensor", "Bool") +def _update_run_op_graph_kernel(beta1, beta2, eps, lr, weight_decay_tensor, + global_step, param, m, v, gradient, decay_flag): + """ + Update parameters. + + Args: + beta1 (Tensor): The exponential decay rate for the 1st moment estimates. Should be in range (0.0, 1.0). + beta2 (Tensor): The exponential decay rate for the 2nd moment estimates. Should be in range (0.0, 1.0). + eps (Tensor): Term added to the denominator to improve numerical stability. Should be greater than 0. + lr (Tensor): Learning rate. + weight_decay_tensor (Tensor): Weight decay. Should be equal to or greater than 0. + global_step (Tensor): Global step. + param (Tensor): Parameters. + m (Tensor): m value of parameters. + v (Tensor): v value of parameters. + gradient (Tensor): Gradient of parameters. + decay_flag (bool): Specifies whether param update with weight decay. + + Returns: + Tensor, the new value of v after updating. + """ + op_mul = P.Mul() + op_square = P.Square() + op_cast = P.Cast() + op_shape = P.Shape() + op_pow = P.Pow() + op_norm = layer.Norm() + op_fill = P.Fill() + op_dtype = P.DType() + + param_fp32 = op_cast(param, mstype.float32) + gradient_fp32 = op_cast(gradient, mstype.float32) + + i6_ex = op_cast(global_step + num_one, mstype.float32) + i9 = op_cast(num_one, mstype.float32) - beta1 + x1 = op_cast(num_one, mstype.float32) - beta2 + i6 = op_cast(num_one, mstype.float32) - op_pow(beta1, i6_ex) + i3 = op_cast(num_one, mstype.float32) - op_pow(beta2, i6_ex) + i1 = op_square(gradient_fp32) + add3, update = G.LambNextMV()(i1, v, i3, gradient, m, i6, param, beta1, + i9, beta2, x1, weight_decay_tensor, eps) + + if decay_flag: + update = update + op_mul(weight_decay_tensor, param_fp32) + + w_norm = op_norm(param_fp32) + g_norm = op_norm(gradient_fp32) + g_norm_hat = op_norm(add3) + + zeros = F.zeros_like(w_norm) + ones = op_fill(op_dtype(w_norm), op_shape(w_norm), 1.0) + tens = op_fill(op_dtype(w_norm), op_shape(w_norm), 10.0) + + next_param = G.LambUpdateWithLR()(g_norm, w_norm, g_norm_hat, lr, update, + param, zeros, ones, tens) + next_v = F.control_depend(add3, next_param) + return next_v + + def _check_param_value(decay_steps, warmup_steps, start_learning_rate, end_learning_rate, power, beta1, beta2, eps, weight_decay, prim_name): """Check the type of inputs.""" - validator.check_float_positive('start_learning_rate', start_learning_rate, prim_name) - validator.check_float_legal_value('start_learning_rate', start_learning_rate, prim_name) + validator.check_value_type("start_learning_rate", start_learning_rate, [float], prim_name) + validator.check_number_range("start_learning_rate rate", start_learning_rate, 0.0, float("inf"), Rel.INC_LEFT, + prim_name) validator.check_value_type("end_learning_rate", end_learning_rate, [float], prim_name) - validator.check_float_legal_value('end_learning_rate', end_learning_rate, prim_name) + validator.check_number_range("end_learning_rate", end_learning_rate, 0.0, float("inf"), Rel.INC_LEFT, + prim_name) validator.check_float_positive('power', power, prim_name) validator.check_float_legal_value('power', power, prim_name) validator.check_integer('decay_steps', decay_steps, 0, Rel.GT, prim_name) @@ -122,11 +194,16 @@ def _check_param_value(decay_steps, warmup_steps, start_learning_rate, validator.check_value_type("beta1", beta1, [float], prim_name) validator.check_value_type("beta2", beta2, [float], prim_name) validator.check_value_type("eps", eps, [float], prim_name) - validator.check_value_type("weight_dacay", weight_decay, [float], prim_name) - validator.check_number_range("beta1", beta1, 0.0, 1.0, Rel.INC_NEITHER, prim_name) - validator.check_number_range("beta2", beta2, 0.0, 1.0, Rel.INC_NEITHER, prim_name) - validator.check_number_range("eps", eps, 0.0, float("inf"), Rel.INC_NEITHER, prim_name) - validator.check_number_range("weight_decay", weight_decay, 0.0, float("inf"), Rel.INC_LEFT, prim_name) + validator.check_value_type( + "weight_dacay", weight_decay, [float], prim_name) + validator.check_number_range( + "beta1", beta1, 0.0, 1.0, Rel.INC_NEITHER, prim_name) + validator.check_number_range( + "beta2", beta2, 0.0, 1.0, Rel.INC_NEITHER, prim_name) + validator.check_number_range( + "eps", eps, 0.0, float("inf"), Rel.INC_NEITHER, prim_name) + validator.check_number_range( + "weight_decay", weight_decay, 0.0, float("inf"), Rel.INC_LEFT, prim_name) class Lamb(Optimizer): @@ -141,10 +218,12 @@ class Lamb(Optimizer): params (list[Parameter]): A list of parameter, which will be updated. The element in `params` should be class mindspore.Parameter. decay_steps (int): The steps of the lr decay. Should be equal to or greater than 1. - warmup_steps (int): The steps of lr warm up. Default: 0. - start_learning_rate (float): A floating point value for the learning rate. Default: 0.1. - end_learning_rate (float): A floating point value for the end learning rate. Default: 0.0001. - power (float): The power of the polynomial. Default: 1.0. + warmup_steps (int): The steps of lr warm up. Should be equal to or greater than 0. Default: 0. + start_learning_rate (float): A floating point value for the learning rate. Should be equal to + or greater than 0. Default: 0.1. + end_learning_rate (float): A floating point value for the end learning rate. Should be equal to + or greater than 0. Default: 0.0001. + power (float): The power of the polynomial. It must be positive. Default: 1.0. beta1 (float): The exponential decay rate for the 1st moment estimates. Default: 0.9. Should be in range (0.0, 1.0). beta2 (float): The exponential decay rate for the 2nd moment estimates. Default: 0.999. @@ -180,10 +259,10 @@ class Lamb(Optimizer): eps=1e-6, weight_decay=0.0, decay_filter=lambda x: 'layernorm' not in x.name.lower() and 'bias' not in x.name.lower()): - - super(Lamb, self).__init__(start_learning_rate, params) + super(Lamb, self).__init__(0.0, params) if self.is_group: - raise RuntimeError(f"The {self.cls_name} optimizer cannot support group setting.") + raise RuntimeError( + f"The {self.cls_name} optimizer cannot support group setting.") _check_param_value(decay_steps, warmup_steps, start_learning_rate, end_learning_rate, power, beta1, beta2, eps, weight_decay, self.cls_name) @@ -195,14 +274,18 @@ class Lamb(Optimizer): if warmup_steps > 0: self.warmup_flag = True self.decay_steps = Tensor(np.array([decay_steps]).astype(np.float32)) - self.start_learning_rate = Tensor(np.array([start_learning_rate]).astype(np.float32)) - self.end_learning_rate = Tensor(np.array([end_learning_rate]).astype(np.float32)) - self.diff_learning_rate = Tensor(np.array([start_learning_rate - end_learning_rate]).astype(np.float32)) + self.start_learning_rate = Tensor( + np.array([start_learning_rate]).astype(np.float32)) + self.end_learning_rate = Tensor( + np.array([end_learning_rate]).astype(np.float32)) + self.diff_learning_rate = Tensor( + np.array([start_learning_rate - end_learning_rate]).astype(np.float32)) self.power = power self.beta1 = Tensor(np.array([beta1]).astype(np.float32)) self.beta2 = Tensor(np.array([beta2]).astype(np.float32)) self.eps = Tensor(np.array([eps]).astype(np.float32)) - self.weight_decay_tensor = Tensor(np.array([weight_decay]).astype(np.float32)) + self.weight_decay_tensor = Tensor( + np.array([weight_decay]).astype(np.float32)) self.params = self.parameters self.moments1 = self.params.clone(prefix="lamb_m", init='zeros') self.moments2 = self.params.clone(prefix="lamb_v", init='zeros') @@ -214,19 +297,29 @@ class Lamb(Optimizer): self.greater = P.Greater() self.one = Tensor(np.array([1.0]).astype(np.float32)) self.cast = P.Cast() + self.enable_graph_kernel = context.get_context("enable_graph_kernel") def construct(self, gradients): step = self.min(self.global_step, self.decay_steps) p = step / self.decay_steps - lr = self.diff_learning_rate * self.pow(self.one - p, self.power) + self.end_learning_rate + lr = self.diff_learning_rate * \ + self.pow(self.one - p, self.power) + self.end_learning_rate if self.warmup_flag: warmup_percent = self.global_step / self.warmup_steps warmup_lr = self.start_learning_rate * warmup_percent - is_warmup = self.cast(self.greater(self.warmup_steps, self.global_step), mstype.float32) + is_warmup = self.cast(self.greater( + self.warmup_steps, self.global_step), mstype.float32) lr = (self.one - is_warmup) * lr + is_warmup * warmup_lr - updated_velocity = self.hyper_map(F.partial(lamb_opt, self.beta1, self.beta2, self.eps, lr, - self.weight_decay_tensor, self.global_step), - self.params, self.moments1, self.moments2, gradients, self.decay_flag) + if self.enable_graph_kernel: + updated_velocity = self.hyper_map(F.partial(lamb_opt_graph_kernel, + self.beta1, self.beta2, self.eps, lr, + self.weight_decay_tensor, self.global_step), + self.params, self.moments1, self.moments2, gradients, self.decay_flag) + else: + updated_velocity = self.hyper_map(F.partial(_lamb_opt, + self.beta1, self.beta2, self.eps, lr, + self.weight_decay_tensor, self.global_step), + self.params, self.moments1, self.moments2, gradients, self.decay_flag) added_global_step = self.global_step + self.one F.control_depend(lr, added_global_step) diff --git a/mindspore/nn/optim/lars.py b/mindspore/nn/optim/lars.py index 3d85a05867..b55d1c5574 100755 --- a/mindspore/nn/optim/lars.py +++ b/mindspore/nn/optim/lars.py @@ -22,12 +22,12 @@ from mindspore.ops import operations as P from mindspore.ops import composite as C from mindspore.ops import functional as F from mindspore._checkparam import Validator as validator -from .optimizer import grad_scale, Optimizer +from .optimizer import _grad_scale, Optimizer -lars_opt = C.MultitypeFuncGraph("lars_opt") +_lars_opt = C.MultitypeFuncGraph("lars_opt") -@lars_opt.register("Function", "Number", "Tensor", "Tensor", "Tensor", "Bool", "Bool") +@_lars_opt.register("Function", "Number", "Tensor", "Tensor", "Tensor", "Bool", "Bool") def _tensor_run_opt(lars, weight_decay, learning_rate, gradient, weight, decay_flag, lars_flag): """Apply lars optimizer to the weight parameter.""" if lars_flag: @@ -59,13 +59,13 @@ class LARS(Optimizer): optimizer (Optimizer): MindSpore optimizer for which to wrap and modify gradients. epsilon (float): Term added to the denominator to improve numerical stability. Default: 1e-05. hyperpara (float): Trust coefficient for calculating the local learning rate. Default: 0.001. - weight_decay (float): Weight decay (L2 penalty). Default: 0.0. + weight_decay (float): Weight decay (L2 penalty). It should be equal to or greater than 0. Default: 0.0. use_clip (bool): Whether to use clip operation for calculating the local learning rate. Default: False. decay_filter (Function): A function to determine whether apply weight decay on parameters. Default: lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name. lars_filter (Function): A function to determine whether apply lars algorithm. Default: lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name. - loss_scale (float): A floating point value for the loss scale. Default: 1.0. + loss_scale (float): A floating point value for the loss scale. It should be greater than 0. Default: 1.0. Inputs: - **gradients** (tuple[Tensor]) - The gradients of `params` in optimizer, the shape is @@ -94,7 +94,7 @@ class LARS(Optimizer): self.learning_rate = optimizer.learning_rate self.lars = P.LARSUpdate(epsilon, hyperpara, use_clip) self.reciprocal_scale = 1.0 / loss_scale - self.weight_decay = weight_decay * loss_scale + self.weight_decay = weight_decay self.cast = P.Cast() self.decay_flag = tuple(decay_filter(x) for x in self.parameters) self.lars_flag = tuple(lars_filter(x) for x in self.parameters) @@ -119,9 +119,9 @@ class LARS(Optimizer): else: lr = self.learning_rate if self.reciprocal_scale != 1.0: - gradients = self.hyper_map(F.partial(grad_scale, self.reciprocal_scale), gradients) + gradients = self.hyper_map(F.partial(_grad_scale, self.reciprocal_scale), gradients) - grad_t = self.hyper_map(F.partial(lars_opt, self.lars, self.weight_decay, lr), + grad_t = self.hyper_map(F.partial(_lars_opt, self.lars, self.weight_decay, lr), gradients, params, self.decay_flag, self.lars_flag) success = self.opt(grad_t) diff --git a/mindspore/nn/optim/lazyadam.py b/mindspore/nn/optim/lazyadam.py new file mode 100644 index 0000000000..48d33bf798 --- /dev/null +++ b/mindspore/nn/optim/lazyadam.py @@ -0,0 +1,199 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""lazy adam""" +from mindspore.common import dtype as mstype +from mindspore.common.initializer import initializer +from mindspore.ops import operations as P +from mindspore.ops import composite as C +from mindspore.ops import functional as F +from mindspore.common.parameter import Parameter +from mindspore.common.tensor import Tensor +from mindspore._checkparam import Validator as validator +from mindspore._checkparam import Rel +from .optimizer import Optimizer + +_lazy_adam_opt = C.MultitypeFuncGraph("lazy_adam_opt") + + +@_lazy_adam_opt.register("Function", "Function", "Tensor", "Tensor", "Tensor", "Tensor", "Number", "Tensor", "Tuple", + "Tensor", "Tensor", "Tensor") +def _run_opt_with_sparse(opt, sparse_opt, beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, params, + moment1, moment2): + """Apply sparse lazy adam optimizer to the weight parameter when the gradient is sparse.""" + success = True + success = F.depend(success, sparse_opt(params, moment1, moment2, beta1_power, beta2_power, lr, beta1, beta2, + eps, gradient[1], gradient[0])) + return success + + +@_lazy_adam_opt.register("Function", "Function", "Tensor", "Tensor", "Tensor", "Tensor", "Number", "Tensor", "Tensor", + "Tensor", "Tensor", "Tensor") +def _run_opt_with_one_number(opt, sparse_opt, beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, params, + moment1, moment2): + """Apply adam optimizer to the weight parameter using Tensor.""" + success = True + success = F.depend(success, opt(params, moment1, moment2, beta1_power, beta2_power, lr, beta1, beta2, + eps, gradient)) + return success + + +def _check_param_value(beta1, beta2, eps, weight_decay, prim_name): + """Check the type of inputs.""" + validator.check_value_type("beta1", beta1, [float], prim_name) + validator.check_value_type("beta2", beta2, [float], prim_name) + validator.check_value_type("eps", eps, [float], prim_name) + validator.check_value_type("weight_dacay", weight_decay, [float], prim_name) + validator.check_number_range("beta1", beta1, 0.0, 1.0, Rel.INC_NEITHER, prim_name) + validator.check_number_range("beta2", beta2, 0.0, 1.0, Rel.INC_NEITHER, prim_name) + validator.check_number_range("eps", eps, 0.0, float("inf"), Rel.INC_NEITHER, prim_name) + validator.check_number_range("weight_decay", weight_decay, 0.0, float("inf"), Rel.INC_LEFT, prim_name) + + +class LazyAdam(Optimizer): + r""" + Updates gradients by Adaptive Moment Estimation (Adam) algorithm. + + The Adam algorithm is proposed in `Adam: A Method for Stochastic Optimization `_. + + The updating formulas are as follows, + + .. math:: + \begin{array}{ll} \\ + m = \beta_1 * m + (1 - \beta_1) * g \\ + v = \beta_2 * v + (1 - \beta_2) * g * g \\ + l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\ + w = w - l * \frac{m}{\sqrt{v} + \epsilon} + \end{array} + + :math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`, + :math:`g` represents `gradients`, :math:`l` represents scaling factor `lr`, :math:`\beta_1, \beta_2` represent + `beta1` and `beta2`, :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent + `beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`, + :math:`\epsilon` represents `eps`. + + Note: + The LazyAdam optimizer supports separating parameter groups. Different parameter groups can set different + `learning_rate` and `weight_decay`. + + When separating parameter groups, the weight decay in each group will be applied on the parameters if the + value of weight_decay > 0. When not separating parameter groups, the `weight_decay` in the API will be + applied on the parameters if `weight_decay` > 0 and the 'beta' and 'gamma' are not in the name of parameters. + + The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the + `sparse_grad` of `Parameter` being set. The sparse behavior, to be notice, is not equivalent to the + original Adam algorithm, as only the current indices parames will be updated. The sparse feature is under + continuous development. The sparse behavior is currently performed on the CPU, weight decay is + not supported. + + Args: + params (Union[list[Parameter], list[dict]]): When the `params` is a list of `Parameter` which will be updated, + the element in `params` should be class `Parameter`. When the `params` is a list of `dict`, the "params", + "lr" and "weight_decay" are the keys can be parsed. + + - params: Required. The value should be a list of `Parameter`. + + - lr: Optional. If "lr" in the keys, the value of corresponding learning rate will be used. + If not, the `learning_rate` in the API will be used. + + - weight_decay: Optional. If "weight_decay" in the keys, the value of corresponding weight decay + will be used. If not, the `weight_decay` in the API will be used. + + learning_rate (Union[float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is + Iterable or a Tensor and the dims of the Tensor is 1, + use dynamic learning rate, then the i-th step will + take the i-th value as the learning rate. + When the learning_rate is float or learning_rate is a Tensor + but the dims of the Tensor is 0, use fixed learning rate. + Other cases are not supported. Default: 1e-3. + beta1 (float): The exponential decay rate for the 1st moment estimates. Should be in range (0.0, 1.0). Default: + 0.9. + beta2 (float): The exponential decay rate for the 2nd moment estimates. Should be in range (0.0, 1.0). Default: + 0.999. + eps (float): Term added to the denominator to improve numerical stability. Should be greater than 0. Default: + 1e-8. + use_locking (bool): Whether to enable a lock to protect updating variable tensors. + If True, updating of the var, m, and v tensors will be protected by a lock. + If False, the result is unpredictable. Default: False. + use_nesterov (bool): Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients. + If True, updates the gradients using NAG. + If False, updates the gradients without using NAG. Default: False. + weight_decay (float): Weight decay (L2 penalty). Default: 0.0. + loss_scale (float): A floating point value for the loss scale. Should be equal to or greater than 1. Default: + 1.0. + + Inputs: + - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`. + + Outputs: + Tensor[bool], the value is True. + + Examples: + >>> net = Net() + >>> #1) All parameters use the same learning rate and weight decay + >>> optim = nn.LazyAdam(params=net.trainable_params()) + >>> + >>> #2) Use parameter groups and set different values + >>> conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params())) + >>> no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params())) + >>> group_params = [{'params': conv_params, 'weight_decay': 0.01, 'lr': 0.01}, + >>> {'params': no_conv_params}] + >>> opt = nn.LazyAdam(group_params, learning_rate=0.1, weight_decay=0.0) + >>> # the conv_params's parameters will use a learning rate of 0.01 and a weight decay of 0.01 + >>> # the no_cov_params's parameters don't set learning and weight decay. So they will use a + >>> # learning rate of 0.1 and a weight decay of 0.0. + >>> + >>> loss = nn.SoftmaxCrossEntropyWithLogits() + >>> model = Model(net, loss_fn=loss, optimizer=optim) + """ + + def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-8, use_locking=False, + use_nesterov=False, weight_decay=0.0, loss_scale=1.0): + super(LazyAdam, self).__init__(learning_rate, params, weight_decay, loss_scale) + _check_param_value(beta1, beta2, eps, weight_decay, self.cls_name) + validator.check_value_type("use_locking", use_locking, [bool], self.cls_name) + validator.check_value_type("use_nesterov", use_nesterov, [bool], self.cls_name) + + self.beta1 = Tensor(beta1, mstype.float32) + self.beta2 = Tensor(beta2, mstype.float32) + self.beta1_power = Parameter(initializer(1, [1], mstype.float32), name="beta1_power") + self.beta2_power = Parameter(initializer(1, [1], mstype.float32), name="beta2_power") + self.eps = eps + self.use_nesterov = use_nesterov + self.use_locking = use_locking + + self.moment1 = self.parameters.clone(prefix="moment1", init='zeros') + self.moment2 = self.parameters.clone(prefix="moment2", init='zeros') + + self.hyper_map = C.HyperMap() + self.opt = P.Adam(use_locking, use_nesterov) + self.sparse_opt = P.SparseApplyLazyAdam(use_locking, use_nesterov) + + def construct(self, gradients): + gradients = self.decay_weight(gradients) + gradients = self.scale_grad(gradients) + lr = self.get_lr() + + self.beta1_power = self.beta1_power * self.beta1 + self.beta2_power = self.beta2_power * self.beta2 + + if self.is_group_lr: + success = self.map_(F.partial(_lazy_adam_opt, self.opt, self.sparse_opt, self.beta1_power, + self.beta2_power, self.beta1, self.beta2, self.eps), + lr, gradients, self.parameters, self.moment1, self.moment2) + else: + success = self.map_(F.partial(_lazy_adam_opt, self.opt, self.sparse_opt, self.beta1_power, + self.beta2_power, self.beta1, self.beta2, self.eps, lr), + gradients, self.parameters, self.moment1, self.moment2) + return success diff --git a/mindspore/nn/optim/momentum.py b/mindspore/nn/optim/momentum.py index 080377b71d..ebdc5d86bf 100755 --- a/mindspore/nn/optim/momentum.py +++ b/mindspore/nn/optim/momentum.py @@ -13,17 +13,19 @@ # limitations under the License. # ============================================================================ """momentum""" -from mindspore.ops import functional as F, composite as C, operations as P +from mindspore.ops import functional as F, composite as C +from mindspore.ops import _selected_ops from mindspore.common.parameter import Parameter from mindspore.common.tensor import Tensor import mindspore.common.dtype as mstype from mindspore._checkparam import check_bool +from mindspore._checkparam import Validator as validator from .optimizer import Optimizer -momentum_opt = C.MultitypeFuncGraph("momentum_opt") +_momentum_opt = C.MultitypeFuncGraph("momentum_opt") -@momentum_opt.register("Function", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor") +@_momentum_opt.register("Function", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor") def _tensor_run_opt_ext(opt, momentum, learning_rate, gradient, weight, moment): """Apply momentum optimizer to the weight parameter using Tensor.""" success = True @@ -45,10 +47,12 @@ class Momentum(Optimizer): value of weight_decay > 0. When not separating parameter groups, the `weight_decay` in the API will be applied on the parameters if `weight_decay` > 0 and the 'beta' and 'gamma' are not in the name of parameters. + To improve parameter groups performance, the customized order of parameters can be supported. + Args: params (Union[list[Parameter], list[dict]]): When the `params` is a list of `Parameter` which will be updated, the element in `params` should be class `Parameter`. When the `params` is a list of `dict`, the "params", - "lr" and "weight_decay" are the keys can be parsed. + "lr", "weight_decay" and "order_params" are the keys can be parsed. - params: Required. The value should be a list of `Parameter`. @@ -58,16 +62,23 @@ class Momentum(Optimizer): - weight_decay: Optional. If "weight_decay" in the keys, the value of corresponding weight decay will be used. If not, the `weight_decay` in the API will be used. - learning_rate (Union[float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is - Iterable or a Tensor and the dims of the Tensor is 1, - use dynamic learning rate, then the i-th step will - take the i-th value as the learning rate. - When the learning_rate is float or learning_rate is a Tensor - but the dims of the Tensor is 0, use fixed learning rate. - Other cases are not supported. + - order_params: Optional. If "order_params" in the keys, the value should be the order of parameters and + the order will be followed in optimizer. There are no other keys in the `dict` and the parameters which + in the value of 'order_params' but not in any group will use default learning rate and default weight + decay. + + learning_rate (Union[int, float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is + Iterable or a Tensor and the dims of the Tensor is 1, + use dynamic learning rate, then the i-th step will + take the i-th value as the learning rate. + When the learning_rate is float or learning_rate is a + Tensor but the dims of the Tensor is 0, use fixed learning + rate. Other cases are not supported. It should be equal to + or greater than 0.0. momentum (float): Hyperparameter of type float, means momentum for the moving average. - weight_decay (float): Weight decay (L2 penalty). Default: 0.0. - loss_scale (float): A floating point value for the loss scale. Default: 1.0. + It should be at least 0.0. + weight_decay (int, float): Weight decay (L2 penalty). It should be equal to or greater than 0.0. Default: 0.0. + loss_scale (int, float): A floating point value for the loss scale. It should be greater than 0.0. Default: 1.0. use_nesterov (bool): Enable Nesterov momentum. Default: False. Inputs: @@ -86,19 +97,23 @@ class Momentum(Optimizer): >>> >>> #2) Use parameter groups and set different values >>> conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params())) - >>> no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params())) - >>> group_params = [{'params': conv_params, 'weight_decay': 0.01, 'lr': 0.01}, - >>> {'params': no_conv_params}] + >>> bias_params = list(filter(lambda x: 'bias' in x.name, net.trainable_params())) + >>> group_params = [{'params': conv_params, 'weight_decay': 0.01}, + >>> {'params': bias_params, 'lr': 0.01}, + >>> {'order_params': net.trainable_params()}] >>> opt = nn.Momentum(group_params, learning_rate=0.1, momentum=0.9, weight_decay=0.0) - >>> # the conv_params's parameters will use a learning rate of 0.01 and a weight decay of 0.01 - >>> # the no_cov_params's parameters don't set learning and weight decay. So they will use a - >>> # learning rate of 0.1 and a weight decay of 0.0. + >>> # The conv_params's parameters will use a learning rate of default value 0.1 and a weight decay of 0.01. + >>> # The bias_params's parameters will use a learning rate of 0.01 and a weight decay of default value 0.0. + >>> # The final parameters order in which the optimizer will be followed is the value of 'order_params'. + >>> # The parameters which in the value of 'order_params' but not in any group will use a learning rate + >>> # of default value 0.1 and a weight decay of default value 0.0. >>> >>> loss = nn.SoftmaxCrossEntropyWithLogits() >>> model = Model(net, loss_fn=loss, optimizer=optim, metrics=None) """ def __init__(self, params, learning_rate, momentum, weight_decay=0.0, loss_scale=1.0, use_nesterov=False): super(Momentum, self).__init__(learning_rate, params, weight_decay, loss_scale) + validator.check_value_type("momentum", momentum, [float], self.cls_name) if isinstance(momentum, float) and momentum < 0.0: raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum)) self.momentum = Parameter(Tensor(momentum, mstype.float32), name="momentum") @@ -106,7 +121,7 @@ class Momentum(Optimizer): self.use_nesterov = check_bool(use_nesterov) self.moments = self.params.clone(prefix="moments", init='zeros') self.hyper_map = C.HyperMap() - self.opt = P.ApplyMomentum(use_nesterov=self.use_nesterov) + self.opt = _selected_ops.ApplyMomentum(use_nesterov=self.use_nesterov) def construct(self, gradients): params = self.params @@ -115,7 +130,7 @@ class Momentum(Optimizer): gradients = self.scale_grad(gradients) lr = self.get_lr() if self.is_group_lr: - success = self.hyper_map(F.partial(momentum_opt, self.opt, self.momentum), lr, gradients, params, moments) + success = self.hyper_map(F.partial(_momentum_opt, self.opt, self.momentum), lr, gradients, params, moments) else: - success = self.hyper_map(F.partial(momentum_opt, self.opt, self.momentum, lr), gradients, params, moments) + success = self.hyper_map(F.partial(_momentum_opt, self.opt, self.momentum, lr), gradients, params, moments) return success diff --git a/mindspore/nn/optim/optimizer.py b/mindspore/nn/optim/optimizer.py index 28c5d9e939..45eb604bf5 100755 --- a/mindspore/nn/optim/optimizer.py +++ b/mindspore/nn/optim/optimizer.py @@ -48,6 +48,8 @@ class Optimizer(Cell): value of weight_decay > 0. When not separating parameter groups, the `weight_decay` in the API will be applied on the parameters if `weight_decay` > 0 and the 'beta' and 'gamma' are not in the name of parameters. + To improve parameter groups performance, the customized order of parameters can be supported. + Args: learning_rate (Union[float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is Iterable or a Tensor and the dims of the Tensor is 1, @@ -55,12 +57,12 @@ class Optimizer(Cell): take the i-th value as the learning rate. When the learning_rate is float or learning_rate is a Tensor but the dims of the Tensor is 0, use fixed learning rate. - Other cases are not supported. Should be greater than 0. - If the type of `learning_rate` input is int, it will be + Other cases are not supported. It should be equal to or greater + than 0. If the type of `learning_rate` input is int, it will be converted to float. parameters (Union[list[Parameter], list[dict]]): When the `parameters` is a list of `Parameter` which will be updated, the element in `parameters` should be class `Parameter`. When the `parameters` is a list of `dict`, - the "params", "lr" and "weight_decay" are the keys can be parsed. + the "params", "lr", "weight_decay" and "order_params" are the keys can be parsed. - params: Required. The value should be a list of `Parameter`. @@ -70,6 +72,11 @@ class Optimizer(Cell): - weight_decay: Optional. If "weight_decay" in the keys, the value of corresponding weight decay will be used. If not, the `weight_decay` in the API will be used. + - order_params: Optional. If "order_params" in the keys, the value should be the order of parameters and + the order will be followed in optimizer. There are no other keys in the `dict` and the parameters which + in the value of 'order_params' but not in any group will use default learning rate and default weight + decay. + weight_decay (float): A floating point value for the weight decay. It should be equal to or greater than 0. If the type of `weight_decay` input is int, it will be converted to float. Default: 0.0. loss_scale (float): A floating point value for the loss scale. It should be greater than 0. If the @@ -93,16 +100,17 @@ class Optimizer(Cell): if isinstance(loss_scale, int): loss_scale = float(loss_scale) - validator.check_value_type("loss_scale", loss_scale, [float], None) - validator.check_number_range("loss_scale", loss_scale, 0.0, float("inf"), Rel.INC_NEITHER, None) + validator.check_value_type("loss_scale", loss_scale, [float], self.cls_name) + validator.check_number_range("loss_scale", loss_scale, 0.0, float("inf"), Rel.INC_NEITHER, self.cls_name) if isinstance(weight_decay, int): weight_decay = float(weight_decay) - validator.check_value_type("weight_decay", weight_decay, [float], None) - validator.check_number_range("weight_decay", weight_decay, 0.0, float("inf"), Rel.INC_LEFT, None) + validator.check_value_type("weight_decay", weight_decay, [float], self.cls_name) + validator.check_number_range("weight_decay", weight_decay, 0.0, float("inf"), Rel.INC_LEFT, self.cls_name) self.is_group = False self.is_group_lr = False + self.is_group_params_ordered = False self.loss_scale = loss_scale if isinstance(learning_rate, int): learning_rate = float(learning_rate) @@ -145,6 +153,7 @@ class Optimizer(Cell): self.reciprocal_scale = 1.0 / loss_scale self.exec_weight_decay = any(self.decay_flags) self.param_length = len(self.parameters) + self.map_ = C.Map() def decay_weight(self, gradients): """ @@ -162,11 +171,11 @@ class Optimizer(Cell): params = self.parameters if self.is_group: if self.exec_weight_decay: - gradients = self.hyper_map(F.partial(apply_decay), self.weight_decay, self.decay_flags, + gradients = self.hyper_map(F.partial(_apply_decay), self.weight_decay, self.decay_flags, params, gradients) else: if self.weight_decay > 0: - gradients = self.hyper_map(F.partial(apply_decay, self.weight_decay), self.decay_flags, + gradients = self.hyper_map(F.partial(_apply_decay, self.weight_decay), self.decay_flags, params, gradients) return gradients @@ -187,7 +196,7 @@ class Optimizer(Cell): """ if self.reciprocal_scale != 1.0: - gradients = self.hyper_map(F.partial(grad_scale, self.reciprocal_scale), gradients) + gradients = self.map_(F.partial(_grad_scale, self.reciprocal_scale), gradients) return gradients @@ -210,9 +219,8 @@ class Optimizer(Cell): raise TypeError("Learning rate should be float, Tensor or Iterable.") return lr - def _init_group_params(self, parameters, learning_rate, weight_decay): - """Init learning rate or weight decay in group params.""" - origin_dynamic_lr = self.dynamic_lr + def _parse_group_params(self, parameters, learning_rate): + """Parse group params.""" if self.dynamic_lr: dynamic_lr_length = learning_rate.size() else: @@ -220,6 +228,15 @@ class Optimizer(Cell): for group_param in parameters: lr_length = dynamic_lr_length + if 'order_params' in group_param.keys(): + if len(group_param.keys()) > 1: + raise ValueError("The order params dict in group parameters should " + "only include the 'order_params' key.") + if not isinstance(group_param['order_params'], Iterable): + raise TypeError("The value of 'order_params' should be an Iterable type.") + self.is_group_params_ordered = True + continue + if 'lr' in group_param.keys(): self.is_group_lr = True self._get_single_lr(group_param['lr']) @@ -229,10 +246,20 @@ class Optimizer(Cell): elif isinstance(group_param['lr'], Tensor): lr_length = group_param['lr'].size() self.dynamic_lr = True + if dynamic_lr_length not in (lr_length, 0): raise ValueError("The dynamic learning rate in group should be the same size.") + + if not group_param['params']: + raise ValueError("Optimizer got an empty group parameter list.") + dynamic_lr_length = lr_length + self.dynamic_lr_length = dynamic_lr_length + def _init_group_params(self, parameters, learning_rate, weight_decay): + """Init learning rate or weight decay in group params.""" + origin_dynamic_lr = self.dynamic_lr + self._parse_group_params(parameters, learning_rate) if self.dynamic_lr and not origin_dynamic_lr: self.gather = P.GatherV2() self.assignadd = P.AssignAdd() @@ -240,20 +267,20 @@ class Optimizer(Cell): params_store = [] for group_param in parameters: - if not group_param['params']: - raise ValueError("Optimizer got an empty parameter list.") + if 'order_params' in group_param.keys(): + ordered_parameters = group_param['order_params'] + continue self.group_params += group_param['params'] if 'lr' in group_param.keys(): params_dynamic_lr = isinstance(group_param['lr'], (Iterable, Tensor)) - if self.dynamic_lr and not params_dynamic_lr: - lr = Tensor(np.array([group_param['lr']] * dynamic_lr_length).astype(np.float32)) + lr = Tensor(np.array([group_param['lr']] * self.dynamic_lr_length).astype(np.float32)) else: lr = self._get_single_lr(group_param['lr']) else: if self.dynamic_lr and not origin_dynamic_lr: - lr = Tensor(np.array([self.scalar_lr] * dynamic_lr_length).astype(np.float32)) + lr = Tensor(np.array([self.scalar_lr] * self.dynamic_lr_length).astype(np.float32)) else: lr = learning_rate @@ -273,10 +300,33 @@ class Optimizer(Cell): validator.check_value_type("parameter", param, [Parameter], self.cls_name) if param.name in params_store: raise RuntimeError(f"The {param.name} parameter has appeared in parameter groups.") + params_store.append(param.name) self.group_lr.append(Parameter(lr, name="lr_" + param.name)) self.group_weight_decay.append(weight_decay_) + if self.is_group_params_ordered: + self._order_and_adjust_group_params(ordered_parameters, learning_rate, weight_decay) + + def _order_and_adjust_group_params(self, ordered_parameters, learning_rate, weight_decay): + """ + Order group parameter, learning rate and weight decay in group params. And assign the parameters + which in the value of 'order_params' but not in any group to default value. + """ + params_length = len(ordered_parameters) + ordered_learning_rate = [Parameter(learning_rate, name="lr_" + param.name) for param in ordered_parameters] + ordered_weight_decay = [weight_decay * self.loss_scale] * params_length + params_name = [param.name for param in ordered_parameters] + + for param, lr, wd in zip(self.group_params, self.group_lr, self.group_weight_decay): + index = params_name.index(param.name) + ordered_learning_rate[index] = lr + ordered_weight_decay[index] = wd + + self.group_params = list(ordered_parameters) + self.group_lr = ordered_learning_rate + self.group_weight_decay = ordered_weight_decay + def get_lr(self): """ Get the learning rate of current step. @@ -339,10 +389,10 @@ class Optimizer(Cell): op_add = P.AddN() -apply_decay = C.MultitypeFuncGraph("apply_decay") +_apply_decay = C.MultitypeFuncGraph("apply_decay") -@apply_decay.register("Number", "Bool", "Tensor", "Tensor") +@_apply_decay.register("Number", "Bool", "Tensor", "Tensor") def _tensor_apply_decay(weight_decay, if_apply, weight, gradient): """Get grad with weight_decay.""" if if_apply: @@ -350,12 +400,20 @@ def _tensor_apply_decay(weight_decay, if_apply, weight, gradient): return gradient -grad_scale = C.MultitypeFuncGraph("grad_scale") +_grad_scale = C.MultitypeFuncGraph("grad_scale") -@grad_scale.register("Number", "Tensor") +@_grad_scale.register("Number", "Tensor") def tensor_grad_scale(scale, grad): """Get grad with scale.""" if scale == 1.0: return grad return grad * scale + + +@_grad_scale.register("Number", "Tuple") +def tensor_grad_scale_with_sparse(scale, grad): + """Get grad with scale.""" + if scale == 1.0: + return grad + return grad[0], grad[1] * scale, grad[2] diff --git a/mindspore/nn/optim/proximal_ada_grad.py b/mindspore/nn/optim/proximal_ada_grad.py new file mode 100644 index 0000000000..380720404a --- /dev/null +++ b/mindspore/nn/optim/proximal_ada_grad.py @@ -0,0 +1,112 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""PROXIMAL_ADA_GRAD""" +from mindspore.ops import functional as F, composite as C, operations as P +from mindspore.common import Tensor +import mindspore.common.dtype as mstype +from mindspore._checkparam import Validator as validator +from mindspore._checkparam import Rel +from .optimizer import Optimizer + +_proximal_ada_grad_opt = C.MultitypeFuncGraph("proximal_ada_grad_opt") + +@_proximal_ada_grad_opt.register("Function", "Function", "Tensor", "Tensor", "Tensor", "Tuple", "Tensor", "Tensor") +def _tensor_run_opt_with_sparse(opt, sparse_opt, learning_rate, l1, l2, gradient, weight, accum): + """Apply sparse proximal_ada_grad optimizer to the weight parameter.""" + success = True + success = F.depend(success, sparse_opt(weight, accum, learning_rate, l1, l2, gradient[1], gradient[0])) + return success + + +@_proximal_ada_grad_opt.register("Function", "Function", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor") +def _tensor_run_opt(opt, sparse_opt, learning_rate, l1, l2, gradient, weight, accum): + """Apply proximal_ada_grad optimizer to the weight parameter.""" + success = True + success = F.depend(success, opt(weight, accum, learning_rate, l1, l2, gradient)) + return success + + +def _check_param_value(accum, l1, l2, use_locking, prim_name=None): + """Check inputs param.""" + validator.check_value_type("accum", accum, [float], prim_name) + validator.check_value_type("l1", l1, [float], prim_name) + validator.check_value_type("l2", l2, [float], prim_name) + validator.check_value_type("use_locking", use_locking, [bool], prim_name) + validator.check_number_range("accum", accum, 0.0, float("inf"), Rel.INC_LEFT, prim_name) + validator.check_number_range("l1", l1, 0.0, float("inf"), Rel.INC_LEFT, prim_name) + validator.check_number_range("l2", l2, 0.0, float("inf"), Rel.INC_LEFT, prim_name) + + +class ProximalAdagrad(Optimizer): + """ + Implement the ProximalAdagrad algorithm with ApplyProximalAdagrad Operator. + + ProximalAdagrad is an online Learning and Stochastic Optimization. + Refer to paper `Efficient Learning using Forward-Backward Splitting + `_. + + Note: + The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the + `sparse_grad` of `Parameter` being set as True. The sparse feature is under continuous development. The sparse + behavior is currently performed on the CPU, weight decay is not supported. + + Args: + params (list[Parameter]): A list of parameter, which will be updated. The element in `params` + should be Parameter. + accum (float): The starting value for accumulators, must be zero or positive values. Default: 0.1. + learning_rate (float): The learning rate value, must be greater than or equal to zero. Default: 0.001. + l1 (float): l1 regularization strength, must be greater than or equal to zero. Default: 0.0. + l2 (float): l2 regularization strength, must be greater than or equal to zero. Default: 0.0. + use_locking (bool): If True use locks for update operation. Default: False. + loss_scale (float): Value for the loss scale. It should be equal to or greater than 1.0. Default: 1.0. + wegith_decay (float): Weight decay value to multiply weight, must be zero or positive value. Default: 0.0. + + Inputs: + - **grads** (tuple[Tensor]) - The gradients of `params` in optimizer, the shape is as same as the `params` + in optimizer. + + Outputs: + Tensor[bool], the value is True. + + Examples: + >>> net = Net() + >>> loss = nn.SoftmaxCrossEntropyWithLogits() + >>> opt = nn.ProximalAdagrad(net.trainable_params()) + >>> model = Model(net, loss_fn=loss, optimizer=opt, metrics=None) + """ + + def __init__(self, params, accum=0.1, learning_rate=0.001, l1=0.0, l2=0.0, + use_locking=False, loss_scale=1.0, weight_decay=0.0): + super(ProximalAdagrad, self).__init__(learning_rate, params, weight_decay, loss_scale) + if self.is_group: + raise RuntimeError(f"The {self.cls_name} optimizer cannot support group setting.") + _check_param_value(accum, l1, l2, use_locking, self.cls_name) + self.accum = self.parameters.clone(prefix="accum", init=accum) + self.l1 = Tensor(l1, mstype.float32) + self.l2 = Tensor(l2, mstype.float32) + self.weight_decay = weight_decay + self.hyper_map = C.HyperMap() + self.opt = P.ApplyProximalAdagrad(use_locking=use_locking) + self.sparse_opt = P.SparseApplyProximalAdagrad(use_locking=use_locking) + + def construct(self, grads): + params = self.parameters + accum = self.accum + grads = self.decay_weight(grads) + grads = self.scale_grad(grads) + lr = self.learning_rate + success = self.map_(F.partial(_proximal_ada_grad_opt, self.opt, self.sparse_opt, lr, self.l1, self.l2), + grads, params, accum) + return success diff --git a/mindspore/nn/optim/rmsprop.py b/mindspore/nn/optim/rmsprop.py index 4d572574ae..05c42fb444 100644 --- a/mindspore/nn/optim/rmsprop.py +++ b/mindspore/nn/optim/rmsprop.py @@ -18,21 +18,21 @@ from mindspore._checkparam import Validator as validator from mindspore._checkparam import Rel from .optimizer import Optimizer -rmsprop_opt = C.MultitypeFuncGraph("rmsprop_opt") -centered_rmsprop_opt = C.MultitypeFuncGraph("rmsprop_opt") +_rmsprop_opt = C.MultitypeFuncGraph("rmsprop_opt") +_centered_rmsprop_opt = C.MultitypeFuncGraph("rmsprop_opt") -@rmsprop_opt.register("Function", "Number", "Number", "Number", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor") -def _rmsprop_opt(opt, decay, epsilon, momentum, learning_rate, weight, ms, mom, grad): +@_rmsprop_opt.register("Function", "Number", "Number", "Number", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor") +def _rmsprop_opt_(opt, decay, epsilon, momentum, learning_rate, weight, ms, mom, grad): """Apply rmsprop optimizer to the weight parameter using dynamic learning rate.""" success = True success = F.depend(success, opt(weight, ms, mom, learning_rate, grad, decay, momentum, epsilon)) return success -@centered_rmsprop_opt.register("Function", "Number", "Number", "Number", "Tensor", "Tensor", "Tensor", "Tensor", - "Tensor", "Tensor") -def _centered_rmsprop_opt(opt, decay, epsilon, momentum, learning_rate, weight, mg, ms, mom, grad): +@_centered_rmsprop_opt.register("Function", "Number", "Number", "Number", "Tensor", "Tensor", "Tensor", "Tensor", + "Tensor", "Tensor") +def _centered_rmsprop_opt_(opt, decay, epsilon, momentum, learning_rate, weight, mg, ms, mom, grad): """Apply centered rmsprop optimizer to the weight parameter using dynamic learning rate.""" success = True success = F.depend(success, opt(weight, mg, ms, mom, grad, learning_rate, decay, momentum, epsilon)) @@ -51,6 +51,8 @@ class RMSProp(Optimizer): value of weight_decay > 0. When not separating parameter groups, the `weight_decay` in the API will be applied on the parameters if `weight_decay` > 0 and the 'beta' and 'gamma' are not in the name of parameters. + To improve parameter groups performance, the customized order of parameters can be supported. + Update `params` according to the RMSProp algorithm. The equation is as follows: @@ -93,7 +95,7 @@ class RMSProp(Optimizer): Args: params (Union[list[Parameter], list[dict]]): When the `params` is a list of `Parameter` which will be updated, the element in `params` should be class `Parameter`. When the `params` is a list of `dict`, the "params", - "lr" and "weight_decay" are the keys can be parsed. + "lr", "weight_decay" and "order_params" are the keys can be parsed. - params: Required. The value should be a list of `Parameter`. @@ -103,6 +105,11 @@ class RMSProp(Optimizer): - weight_decay: Optional. If "weight_decay" in the keys, the value of corresponding weight decay will be used. If not, the `weight_decay` in the API will be used. + - order_params: Optional. If "order_params" in the keys, the value should be the order of parameters and + the order will be followed in optimizer. There are no other keys in the `dict` and the parameters which + in the value of 'order_params' but not in any group will use default learning rate and default weight + decay. + learning_rate (Union[float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is Iterable or a Tensor and the dims of the Tensor is 1, use dynamic learning rate, then the i-th step will @@ -133,13 +140,16 @@ class RMSProp(Optimizer): >>> >>> #2) Use parameter groups and set different values >>> conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params())) - >>> no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params())) - >>> group_params = [{'params': conv_params, 'weight_decay': 0.01, 'lr': 0.01}, - >>> {'params': no_conv_params}] + >>> bias_params = list(filter(lambda x: 'bias' in x.name, net.trainable_params())) + >>> group_params = [{'params': conv_params, 'weight_decay': 0.01}, + >>> {'params': bias_params, 'lr': 0.01}, + >>> {'order_params': net.trainable_params()}] >>> opt = nn.RMSProp(group_params, learning_rate=0.1, weight_decay=0.0) - >>> # the conv_params's parameters will use a learning rate of 0.01 and a weight decay of 0.01 - >>> # the no_cov_params's parameters don't set learning and weight decay. So they will use a - >>> # learning rate of 0.1 and a weight decay of 0.0. + >>> # The conv_params's parameters will use a learning rate of default value 0.1 and a weight decay of 0.01. + >>> # The bias_params's parameters will use a learning rate of 0.01 and a weight decay of default value 0.0. + >>> # The final parameters order in which the optimizer will be followed is the value of 'order_params'. + >>> # The parameters which in the value of 'order_params' but not in any group will use a learning rate + >>> # of default value 0.1 and a weight decay of default value 0.0. >>> >>> loss = nn.SoftmaxCrossEntropyWithLogits() >>> model = Model(net, loss_fn=loss, optimizer=optim) @@ -177,17 +187,17 @@ class RMSProp(Optimizer): lr = self.get_lr() if self.centered: if self.is_group_lr: - success = self.hyper_map(F.partial(centered_rmsprop_opt, self.opt, self.decay, self.epsilon, + success = self.hyper_map(F.partial(_centered_rmsprop_opt, self.opt, self.decay, self.epsilon, self.momentum), lr, params, self.mg, self.ms, self.moment, gradients) else: - success = self.hyper_map(F.partial(centered_rmsprop_opt, self.opt, self.decay, self.epsilon, + success = self.hyper_map(F.partial(_centered_rmsprop_opt, self.opt, self.decay, self.epsilon, self.momentum, lr), params, self.mg, self.ms, self.moment, gradients) else: if self.is_group_lr: - success = self.hyper_map(F.partial(rmsprop_opt, self.opt, self.decay, self.epsilon, + success = self.hyper_map(F.partial(_rmsprop_opt, self.opt, self.decay, self.epsilon, self.momentum), lr, params, self.ms, self.moment, gradients) else: - success = self.hyper_map(F.partial(rmsprop_opt, self.opt, self.decay, self.epsilon, + success = self.hyper_map(F.partial(_rmsprop_opt, self.opt, self.decay, self.epsilon, self.momentum, lr), params, self.ms, self.moment, gradients) return success diff --git a/mindspore/nn/optim/sgd.py b/mindspore/nn/optim/sgd.py index bf49244550..d2680a38e5 100755 --- a/mindspore/nn/optim/sgd.py +++ b/mindspore/nn/optim/sgd.py @@ -20,10 +20,10 @@ import mindspore.common.dtype as mstype from mindspore._checkparam import Validator as validator from .optimizer import Optimizer -sgd_opt = C.MultitypeFuncGraph("sgd_opt") +_sgd_opt = C.MultitypeFuncGraph("sgd_opt") -@sgd_opt.register("Function", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor") +@_sgd_opt.register("Function", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor") def _tensor_run_opt_ext(opt, momentum, learning_rate, gradient, weight, accum, stat): """Apply sgd optimizer to the weight parameter using Tensor.""" success = True @@ -47,10 +47,12 @@ class SGD(Optimizer): value of weight_decay > 0. When not separating parameter groups, the `weight_decay` in the API will be applied on the parameters if `weight_decay` > 0 and the 'beta' and 'gamma' are not in the name of parameters. + To improve parameter groups performance, the customized order of parameters can be supported. + Args: params (Union[list[Parameter], list[dict]]): When the `params` is a list of `Parameter` which will be updated, the element in `params` should be class `Parameter`. When the `params` is a list of `dict`, the "params", - "lr" and "weight_decay" are the keys can be parsed. + "lr", "weight_decay" and "order_params" are the keys can be parsed. - params: Required. The value should be a list of `Parameter`. @@ -60,16 +62,22 @@ class SGD(Optimizer): - weight_decay: Optional. If "weight_decay" in the keys, the value of corresponding weight decay will be used. If not, the `weight_decay` in the API will be used. + - order_params: Optional. If "order_params" in the keys, the value should be the order of parameters and + the order will be followed in optimizer. There are no other keys in the `dict` and the parameters which + in the value of 'order_params' but not in any group will use default learning rate and default weight + decay. + learning_rate (Union[float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is Iterable or a Tensor and the dims of the Tensor is 1, use dynamic learning rate, then the i-th step will take the i-th value as the learning rate. When the learning_rate is float or learning_rate is a Tensor but the dims of the Tensor is 0, use fixed learning rate. - Other cases are not supported. Default: 0.1. - momentum (float): A floating point value the momentum. Default: 0.0. - dampening (float): A floating point value of dampening for momentum. Default: 0.0. - weight_decay (float): Weight decay (L2 penalty). Default: 0.0. + Other cases are not supported. It should be equal to or + greater than 0. Default: 0.1. + momentum (float): A floating point value the momentum. should be at least 0.0. Default: 0.0. + dampening (float): A floating point value of dampening for momentum. should be at least 0.0. Default: 0.0. + weight_decay (float): Weight decay (L2 penalty). It should be equal to or greater than 0. Default: 0.0. nesterov (bool): Enables the Nesterov momentum. Default: False. loss_scale (float): A floating point value for the loss scale, which should be larger than 0.0. Default: 1.0. @@ -90,13 +98,16 @@ class SGD(Optimizer): >>> >>> #2) Use parameter groups and set different values >>> conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params())) - >>> no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params())) - >>> group_params = [{'params': conv_params, 'weight_decay': 0.01, 'lr': 0.01}, - >>> {'params': no_conv_params}] + >>> bias_params = list(filter(lambda x: 'bias' in x.name, net.trainable_params())) + >>> group_params = [{'params': conv_params, 'weight_decay': 0.01}, + >>> {'params': bias_params, 'lr': 0.01}, + >>> {'order_params': net.trainable_params()}] >>> opt = nn.SGD(group_params, learning_rate=0.1, weight_decay=0.0) - >>> # the conv_params's parameters will use a learning rate of 0.01 and a weight decay of 0.01 - >>> # the no_cov_params's parameters don't set learning and weight decay. So they will use a - >>> # learning rate of 0.1 and a weight decay of 0.0. + >>> # The conv_params's parameters will use a learning rate of default value 0.1 and a weight decay of 0.01. + >>> # The bias_params's parameters will use a learning rate of 0.01 and a weight decay of default value 0.0. + >>> # The final parameters order in which the optimizer will be followed is the value of 'order_params'. + >>> # The parameters which in the value of 'order_params' but not in any group will use a learning rate + >>> # of default value 0.1 and a weight decay of default value 0.0. >>> >>> loss = nn.SoftmaxCrossEntropyWithLogits() >>> model = Model(net, loss_fn=loss, optimizer=optim) @@ -143,7 +154,7 @@ class SGD(Optimizer): gradients = self.scale_grad(gradients) lr = self.get_lr() if self.is_group_lr: - success = self.hyper_map(F.partial(sgd_opt, self.opt, self.momentum), lr, gradients, params, accum, stat) + success = self.hyper_map(F.partial(_sgd_opt, self.opt, self.momentum), lr, gradients, params, accum, stat) else: - success = self.hyper_map(F.partial(sgd_opt, self.opt, self.momentum, lr), gradients, params, accum, stat) + success = self.hyper_map(F.partial(_sgd_opt, self.opt, self.momentum, lr), gradients, params, accum, stat) return success diff --git a/mindspore/nn/wrap/cell_wrapper.py b/mindspore/nn/wrap/cell_wrapper.py index fe69a2a6ea..f0d920f51f 100644 --- a/mindspore/nn/wrap/cell_wrapper.py +++ b/mindspore/nn/wrap/cell_wrapper.py @@ -21,7 +21,6 @@ from ...common.parameter import Parameter, ParameterTuple from ...ops import composite as C from ...ops import functional as F from ...ops import operations as P -from ...ops.composite.base import _mp_cast_helper from ...ops.operations.comm_ops import _VirtualDataset from ..cell import Cell from .grad_reducer import DistributedGradReducer @@ -166,6 +165,7 @@ class TrainOneStepCell(Cell): def __init__(self, network, optimizer, sens=1.0): super(TrainOneStepCell, self).__init__(auto_prefix=False) self.network = network + self.network.set_grad() self.network.add_flags(defer_inline=True) self.weights = optimizer.parameters self.optimizer = optimizer @@ -344,7 +344,7 @@ class WithEvalCell(Cell): def construct(self, data, label): outputs = self._network(data) if self.add_cast_fp32: - label = _mp_cast_helper(mstype.float32, label) + label = F.mixed_precision_cast(mstype.float32, label) outputs = F.cast(outputs, mstype.float32) loss = self._loss_fn(outputs, label) return loss, outputs, label diff --git a/mindspore/nn/wrap/grad_reducer.py b/mindspore/nn/wrap/grad_reducer.py index 8383910a60..c66bfbe646 100644 --- a/mindspore/nn/wrap/grad_reducer.py +++ b/mindspore/nn/wrap/grad_reducer.py @@ -52,6 +52,31 @@ def _tensors_allreduce_mean(mul, degree, allreduce_filter, grad): return grad +@reduce_opt.register("Function", "Number", "Bool", "Tuple") +def _tensors_allreduce_mean_with_sparse(mul, degree, allreduce_filter, grad): + """ + Apply mean and allgather on gradient instead of allreduce for sparse feature. + Allgather is a communication operation used for distributed deep learning. + + Args: + mul (Primitive): Div operation. + degree (int): The mean coefficient. + allreduce_filter (bool): When it is true, allgather would apply. + grad (Tuple): The indices, gradient tensor and tensor_shape before operation. + + Returns: + Tuple, include indices, the gradient tensor and tensor_shape after operation. + """ + if allreduce_filter: + indices = _all_gather(grad[0]) + degree = F.scalar_cast(degree, F.dtype(grad[1])) + dout = _all_gather(grad[1]) + cast_op = P.Cast() + dout = mul(dout, cast_op(F.scalar_to_array(1.0/degree), F.dtype(dout))) + grad = (indices, dout, dout[2]) + return grad + + @reduce_opt.register("Bool", "Tensor") def _tensors_allreduce(allreduce_filter, grad): """ @@ -69,6 +94,26 @@ def _tensors_allreduce(allreduce_filter, grad): return grad +@reduce_opt.register("Bool", "Tuple") +def _tensors_allreduce_with_sparse(allreduce_filter, grad): + """ + Apply mean and allgather on gradient instead of allreduce for sparse feature. + Allgather is a communication operation used for distributed deep learning. + + Args: + allreduce_filter (bool): When it is true, allgather would apply. + grad (Tuple): The indices, gradient tensor and tensor_shape before operation. + + Returns: + Tuple, include indices, the gradient tensor and tensor_shape after operation. + """ + if allreduce_filter: + indices = _all_gather(grad[0]) + dout = _all_gather(grad[1]) + grad = (indices, dout, dout[2]) + return grad + + _get_datatype = C.MultitypeFuncGraph("_get_datatype") diff --git a/mindspore/ops/_grad/grad_array_ops.py b/mindspore/ops/_grad/grad_array_ops.py index b7b7af8082..a2a808781e 100644 --- a/mindspore/ops/_grad/grad_array_ops.py +++ b/mindspore/ops/_grad/grad_array_ops.py @@ -19,6 +19,7 @@ from .. import operations as P from ..operations import _grad_ops as G from ..operations import _inner_ops as inner from ..composite.multitype_ops.zeros_like_impl import zeros_like +from ..functional import broadcast_gradient_args from .. import functional as F from .grad_base import bprop_getters from ..primitive import constexpr @@ -30,6 +31,7 @@ unsorted_segment_sum = P.UnsortedSegmentSum() transpose = P.Transpose() shape_op = P.Shape() reshape = P.Reshape() +size_op = P.Size() invert_permutation = P.InvertPermutation() logical_and = P.LogicalAnd() @@ -192,24 +194,27 @@ def get_bprop_tile(self): @bprop_getters.register(inner.EmbeddingLookup) def get_bprop_embedding_lookup(self): """Generate bprop for EmbeddingLookup""" - host_sub = P.Sub().add_prim_attr('primitive_target', 'CPU') + sub_op = P.Sub() + reshape_op = P.Reshape() host_reshape = P.Reshape().add_prim_attr('primitive_target', 'CPU') def bprop_sparse(x, indices, offset, reduce_scatter_flag, split_num, out, dout): x_shp = shape_op(x) - if reduce_scatter_flag is True: - elu_grad = G.EmbeddingLookupCommGrad() - actual_dout = elu_grad(dout, split_num) - else: - actual_dout = dout - new_indices = host_sub(indices - offset) + new_indices = sub_op(indices, offset) # Reshape the 'new_indices' new_indices_shape_changed = (size_op(new_indices),) - new_indices = host_reshape(new_indices, new_indices_shape_changed) - # Reshape the 'actual_dout' + new_indices = reshape_op(new_indices, new_indices_shape_changed) x_shp_tail = x_shp[1:] actual_dout_shape_changed = new_indices_shape_changed + x_shp_tail - actual_dout = host_reshape(actual_dout, actual_dout_shape_changed) - return (new_indices, actual_dout, x_shp), zeros_like(new_indices), zeros_like(axis), \ + if reduce_scatter_flag is True: + # On host + elu_grad = G.EmbeddingLookupCommGrad() + actual_dout = elu_grad(dout, split_num) + # Reshape the 'actual_dout' on host + actual_dout = host_reshape(actual_dout, actual_dout_shape_changed) + else: + # Reshape the 'actual_dout' on device + actual_dout = reshape_op(dout, actual_dout_shape_changed) + return (new_indices, actual_dout, x_shp), zeros_like(indices), zeros_like(offset), \ zeros_like(reduce_scatter_flag), zeros_like(split_num) return bprop_sparse @@ -309,7 +314,38 @@ def get_bprop_gather_v2(self): return bprop -@bprop_getters.register(P.Range) +@bprop_getters.register(P.SparseGatherV2) +def get_bprop_sparse_gather_v2(self): + """Generate bprop for SparseGatherV2""" + + def bprop(x, indices, axis, out, dout): + x_shp = shape_op(x) + if axis == 0: + indices_size = (size_op(indices),) + x_tail_shp = x_shp[1:] + values_shape = indices_size + x_tail_shp + values = reshape(dout, values_shape) + indices = reshape(indices, indices_size) + return (indices, values, x_shp), zeros_like(indices), zeros_like(axis) + if F.rank(dout) == 0: + dout = P.ExpandDims()(dout, -1) + if F.rank(indices) == 0: + indices = P.ExpandDims()(indices, -1) + out_shp = shape_op(dout) + ind_shp = shape_op(indices) + # Example: out_shape:(3,2,3) axis 1 -> (1,0,2) + perm_1 = _generate_shape_index(out_shp, ind_shp, axis) + values_transpose = transpose(dout, perm_1) + params_grad = unsorted_segment_sum(values_transpose, indices, shape_op(x)[axis]) + # Example: out_shape:(3,2,3) axis 2 -> (1,2,0) + perm_2 = _generate_inverse_index(x_shp, axis) + params_grad = transpose(params_grad, perm_2) + return params_grad, zeros_like(indices), zeros_like(axis) + + return bprop + + +@bprop_getters.register(inner.Range) def get_bprop_range(self): """Generate bprop for Range""" @@ -449,6 +485,31 @@ def get_bprop_scatter_nd_update(self): return bprop +@bprop_getters.register(P.TensorScatterUpdate) +def get_bprop_tensor_scatter_update(self): + """Generate bprop for TensorScatterUpdate""" + gather_nd = P.GatherNd() + tensor_scatter_update = P.TensorScatterUpdate() + + def bprop(x, indices, update, out, dout): + x_grad = tensor_scatter_update(dout, indices, zeros_like(update)) + update_grad = gather_nd(dout, indices) + return x_grad, zeros_like(indices), update_grad + + return bprop + + +@bprop_getters.register(P.ScatterMax) +def get_bprop_scatter_max(self): + """Generate bprop for ScatterMax""" + gather = P.GatherV2() + + def bprop(x, indices, update, out, dout): + return dout, zeros_like(indices), gather(dout, indices, 0) + + return bprop + + @bprop_getters.register(P.Argmax) def get_bprop_argmax(self): """Generate bprop for Argmax""" @@ -607,6 +668,24 @@ def get_bprop_batch_to_space_nd(self): return (dx,) return bprop +@bprop_getters.register(P.BroadcastTo) +def get_bprop_broadcast_to(self): + """Generate bprop for BroadcastTo""" + reduce_keep_dim = P.ReduceSum(keep_dims=True) + broadcast_shape = self.shape + + def bprop(x, out, dout): + x_shape = shape_op(x) + dout_shape = shape_op(dout) + + if x_shape == dout_shape: + return (dout,) + _, reduction_axes = broadcast_gradient_args(broadcast_shape, x_shape) + reduced_grad = reduce_keep_dim(dout, reduction_axes) + dx = reshape(reduced_grad, x_shape) + return (dx,) + return bprop + @bprop_getters.register(P.ReverseSequence) def get_bprop_reverse_sequence(self): diff --git a/mindspore/ops/_grad/grad_comm_ops.py b/mindspore/ops/_grad/grad_comm_ops.py index 057d150be1..7477d50895 100644 --- a/mindspore/ops/_grad/grad_comm_ops.py +++ b/mindspore/ops/_grad/grad_comm_ops.py @@ -26,9 +26,10 @@ from .grad_base import bprop_getters @bprop_getters.register(AllReduce) def get_bprop_all_reduce(self): - """Generate bprop for AllReduce.""" + """Generate bprop for AllReduce, do allreduce or allgather, allgather for sparse feature.""" all_reduce_grad = AllReduce(ReduceOp.SUM, self.group) + all_gather = AllGather(group=self.group) if self.instance_name: instance_name = "grad" + self.instance_name all_reduce_grad.set_prim_instance_name(instance_name) @@ -42,15 +43,28 @@ def get_bprop_all_reduce(self): if self.op == ReduceOp.SUM: def bprop(x, out, dout): - dx = all_reduce_grad(dout) + if F.issubclass_(F.typeof(dout), mstype.tensor): + dx = all_reduce_grad(dout) + else: + indices = all_gather(dout[0]) + grad = all_gather(dout[1]) + dx = (indices, grad, dout[2]) return (dx,) else: def bprop(x, out, dout): - dx = all_reduce_grad(dout) - z = equal(x, out) - z = cast(z, dtype(dx)) - dx = mul(dx, z) + if F.issubclass_(F.typeof(dout), mstype.tensor): + dx = all_reduce_grad(dout) + z = equal(x, out) + z = cast(z, dtype(dx)) + dx = mul(dx, z) + else: + indices = all_gather(dout[0]) + grad = all_gather(dout[1]) + z = equal(x, out) + z = cast(z, dtype(grad)) + grad = mul(grad, z) + dx = (indices, grad, dout[2]) return (dx,) return bprop @@ -147,12 +161,16 @@ def get_bprop_all_to_all(self): @bprop_getters.register(_MirrorOperator) def get_bprop_mirror_operator(self): - """Backpropagator for _MirrorOperator, do allreduce for the devices in group(only for one group).""" + """ + Backpropagator for _MirrorOperator, do allreduce or allgather for the devices in group(only for one group), + allgather for sparse feature. + """ group = self.group dev_num = self.dev_num mean_flag = self.mean_flag all_reduce = AllReduce(group=group) + all_gather = AllGather(group=group) mul = P.Mul() cast = P.Cast() @@ -170,12 +188,25 @@ def get_bprop_mirror_operator(self): def bprop(x, out, dout): if mean_flag: - dx = all_reduce(dout) - float_one = F.scalar_cast(1.0, F.dtype(dx)) - num = F.scalar_cast(dev_num, F.dtype(dx)) - dx = mul(dx, cast(F.scalar_to_array(float_one/num), F.dtype(dx))) + if F.issubclass_(F.typeof(dout), mstype.tensor): + dx = all_reduce(dout) + float_one = F.scalar_cast(1.0, F.dtype(dx)) + num = F.scalar_cast(dev_num, F.dtype(dx)) + dx = mul(dx, cast(F.scalar_to_array(float_one/num), F.dtype(dx))) + else: + indices = all_gather(dout[0]) + grad = all_gather(dout[1]) + float_one = F.scalar_cast(1.0, F.dtype(grad)) + num = F.scalar_cast(dev_num, F.dtype(grad)) + grad = mul(grad, cast(F.scalar_to_array(float_one/num), F.dtype(grad))) + dx = (indices, grad, dout[2]) else: - dx = all_reduce(dout) + if F.issubclass_(F.typeof(dout), mstype.tensor): + dx = all_reduce(dout) + else: + indices = all_gather(dout[0]) + grad = all_gather(dout[1]) + dx = (indices, grad, dout[2]) return (dx,) return bprop diff --git a/mindspore/ops/_grad/grad_implementations.py b/mindspore/ops/_grad/grad_implementations.py index ee3117c83a..87566b1110 100644 --- a/mindspore/ops/_grad/grad_implementations.py +++ b/mindspore/ops/_grad/grad_implementations.py @@ -195,7 +195,7 @@ def bprop_array_reduce(fn, x, shp, out, dout): return F.distribute(dout, F.shape(x)), C.zeros_like(shp) -@bprops.register("depend") +@bprops.register("Depend") def bprop_depend(x, y, out, dout): """Backpropagator for primitive `depend`.""" return dout, C.zeros_like(y) @@ -236,7 +236,6 @@ def bprop_control_depend(x, y, out, dout): """Backpropagator for primitive `Control_depend`.""" return C.zeros_like(x), C.zeros_like(y) - @bprops.register("switch") def bprop_switch(cond, tb, fb, out, dout): """Backpropagator for primitive `switch`.""" diff --git a/mindspore/ops/_grad/grad_math_ops.py b/mindspore/ops/_grad/grad_math_ops.py index ffd79e49b9..1e4f932442 100755 --- a/mindspore/ops/_grad/grad_math_ops.py +++ b/mindspore/ops/_grad/grad_math_ops.py @@ -17,15 +17,18 @@ from functools import reduce import numpy as np +from mindspore.ops import _selected_grad_ops as SG from .. import functional as F from .. import operations as P from ..operations import _grad_ops as G +from ..operations import _inner_ops as inner from ..composite.multitype_ops.zeros_like_impl import zeros_like from ..functional import broadcast_gradient_args, reduced_shape, tuple_div from .grad_base import bprop_getters from ..primitive import constexpr from ..composite.multitype_ops import _constexpr_utils as const_utils + shape_op = P.Shape() reduce_sum = P.ReduceSum() reshape = P.Reshape() @@ -232,6 +235,21 @@ def get_bprop_div(self): return bprop +@bprop_getters.register(P.DivNoNan) +def get_bprop_div_no_nan(self): + """Grad definition for `DivNoNan` operation.""" + div_no_nan_op = P.DivNoNan() + neg = P.Neg() + mul_op = P.Mul() + + def bprop(x, y, out, dout): + bc_x = div_no_nan_op(dout, y) + bc_y = neg(mul_op(bc_x, out)) + return binop_grad_common(x, y, bc_x, bc_y) + + return bprop + + @bprop_getters.register(P.Floor) def get_bprop_floor(self): """Grad definition for `floor` operation.""" @@ -239,6 +257,21 @@ def get_bprop_floor(self): shape_ = P.Shape() dtype_ = P.DType() + def bprop(x, out, dout): + bc_x = fill_(dtype_(x), shape_(x), 0.) + return (bc_x,) + + + return bprop + + +@bprop_getters.register(P.Ceil) +def get_bprop_ceil(self): + """Grad definition for `ceil` operation.""" + fill_ = P.Fill() + shape_ = P.Shape() + dtype_ = P.DType() + def bprop(x, out, dout): bc_x = fill_(dtype_(x), shape_(x), 0.) return (bc_x,) @@ -422,10 +455,23 @@ def get_bprop_exp(self): return bprop +@bprop_getters.register(P.Expm1) +def get_bprop_expm1(self): + """Grad definition for `Expm1` operation.""" + exp_ = P.Exp() + + def bprop(x, out, dout): + g = exp_(x) + dx = g * dout + return (dx,) + + return bprop + + @bprop_getters.register(P.Minimum) def get_bprop_minimum(self): """Grad definition for `Minimum` operation.""" - input_grad = G.MinimumGrad() + input_grad = SG.MinimumGrad() def bprop(x, y, out, dout): dx, dy = input_grad(x, y, dout) @@ -437,7 +483,7 @@ def get_bprop_minimum(self): @bprop_getters.register(P.Maximum) def get_bprop_maximum(self): """Grad definition for `Maximum` operation.""" - input_grad = G.MaximumGrad() + input_grad = SG.MaximumGrad() def bprop(x, y, out, dout): dx, dy = input_grad(x, y, dout) @@ -639,6 +685,16 @@ def get_bprop_not_equal(self): return bprop +@bprop_getters.register(P.ApproximateEqual) +def get_bprop_approximate_equal(self): + """Grad definition for `ApproximateEqual` operation.""" + + def bprop(x, y, out, dout): + return zeros_like(x), zeros_like(y) + + return bprop + + @bprop_getters.register(P.Greater) def get_bprop_greater(self): """Grad definition for `Greater` operation.""" @@ -793,6 +849,18 @@ def get_bprop_asinh(self): return bprop +@bprop_getters.register(P.Sinh) +def get_bprop_sinh(self): + """Grad definition for `Sinh` operation.""" + cosh = P.Cosh() + + def bprop(x, out, dout): + dx = cosh(x) * dout + return (dx,) + + return bprop + + @bprop_getters.register(P.Cos) def get_bprop_cos(self): """Grad definition for `Cos` operation.""" @@ -830,10 +898,22 @@ def get_bprop_acosh(self): return bprop +@bprop_getters.register(P.Cosh) +def get_bprop_cosh(self): + """Grad definition for `Cosh` operation.""" + sinh = P.Sinh() + + def bprop(x, out, dout): + dx = sinh(x) * dout + return (dx,) + + return bprop + + @bprop_getters.register(P.Abs) def get_bprop_abs(self): """Grad definition for `Abs` operation.""" - abs_grad = G.AbsGrad() + abs_grad = SG.AbsGrad() def bprop(x, out, dout): dx = abs_grad(x, dout) @@ -852,6 +932,18 @@ def get_bprop_scalar_cast(self): return bprop +@bprop_getters.register(P.AccumulateNV2) +def get_bprop_scalar_accumulatenv2(self): + """Generate bprop for AccumulateNV2""" + + def bprop(x, out, dout): + dx = () + for _ in range(len(x)): + dx = dx + (dout,) + return dx + return bprop + + @bprop_getters.register(P.AddN) def get_bprop_scalar_addn(self): """Generate bprop for AddN""" @@ -934,15 +1026,16 @@ def get_bprop_bessel_i1e(self): reciprocal = P.Reciprocal() cast = P.Cast() dtype = P.DType() + abs_ops = P.Abs() def bprop(x, out, dout): zeros = zeros_like(x) np_eps = const_utils.get_np_eps(dtype(x)) eps = cast(np_eps, dtype(x)) - x_is_valid = less(eps, x) + x_is_valid = less(eps, abs_ops(x)) x_safe = select(x_is_valid, x, eps + zeros) - tmp = bessel_i0e(x_safe) - out * (sign(x) + reciprocal(x_safe)) - dx = select(x_is_valid, tmp, 0.5 + zeros) + tmp = bessel_i0e(x_safe) - out * (sign(x_safe) + reciprocal(x_safe)) + dx = select(x_is_valid, tmp, cast(0.5, dtype(x)) + zeros) * dout return (dx,) return bprop @@ -958,3 +1051,24 @@ def get_bprop_atanh(self): dx = div(1, tmp) * dout return (dx,) return bprop + + +@bprop_getters.register(P.Inv) +def get_bprop_inv(self): + """Grad definition for 'Inv' operation""" + inv_grad = G.InvGrad() + + def bprop(x, out, dout): + dx = inv_grad(out, dout) + return (dx,) + return bprop + + +@bprop_getters.register(inner.LinSpace) +def get_bprop_lin_space(self): + """Grad definition for `LinSpace` operation.""" + + def bprop(assist, start, stop, num, out, dout): + return zeros_like(assist), zeros_like(start), zeros_like(stop), zeros_like(num) + + return bprop diff --git a/mindspore/ops/_grad/grad_nn_ops.py b/mindspore/ops/_grad/grad_nn_ops.py index 4c4acb802c..1254f9e7a2 100755 --- a/mindspore/ops/_grad/grad_nn_ops.py +++ b/mindspore/ops/_grad/grad_nn_ops.py @@ -14,6 +14,7 @@ # ============================================================================ """Define the grad rules of neural network related operations.""" +from mindspore.ops import _selected_grad_ops as SG from .grad_base import bprop_getters from .. import functional as F from .. import operations as P @@ -23,10 +24,11 @@ from ..operations import _inner_ops as inner from ... import context + @bprop_getters.register(P.BiasAdd) def get_bprop_bias_add(self): """Grad definition for `BiasAdd` operation.""" - bias_grad = G.BiasAddGrad() + bias_grad = SG.BiasAddGrad() def bprop(x, w, out, dout): return dout, bias_grad(dout) @@ -303,7 +305,6 @@ def get_bprop_softmax(self): sub = P.Sub() mul = P.Mul() axis = self.axis - def bprop(x, out, dout): dx = mul(out, sub(dout, sum_func(mul(out, dout), axis))) return (dx,) @@ -338,10 +339,10 @@ def get_bprop_softplus(self): @bprop_getters.register(P.Tanh) def get_bprop_tanh(self): """Grad definition for `Tanh` operation.""" - logsoftmax_grad = G.TanhGrad() + tanh_grad = SG.TanhGrad() def bprop(x, out, dout): - dx = logsoftmax_grad(out, dout) + dx = tanh_grad(out, dout) return (dx,) return bprop @@ -404,7 +405,8 @@ def get_bprop_layer_norm(self): layer_norm_grad = G.LayerNormGrad(self.begin_norm_axis, self.begin_params_axis) def bprop(x, gamma, beta, out, dout): - dx, d_gamma, d_beta = layer_norm_grad(x, dout[0], out[2], out[1], gamma) + dx, d_gamma, d_beta = layer_norm_grad( + x, dout[0], out[2], out[1], gamma) return dx, d_gamma, d_beta return bprop @@ -687,7 +689,7 @@ def get_bprop_binary_cross_entropy(self): @bprop_getters.register(P.Dropout) def get_bprop_dropout(self): """Grad definition for `Dropout` operation.""" - grad = P.DropoutGrad(self.drop_prob) + grad = P.DropoutGrad(self.keep_prob) def bprop(x, out, dout): _, mask = out diff --git a/mindspore/ops/_grad/grad_quant_ops.py b/mindspore/ops/_grad/grad_quant_ops.py index 1e694a7dba..a2b0ba8d97 100644 --- a/mindspore/ops/_grad/grad_quant_ops.py +++ b/mindspore/ops/_grad/grad_quant_ops.py @@ -13,17 +13,20 @@ # limitations under the License. # ============================================================================ -"""Generate bprop for aware quantization ops""" +"""Generate bprop for quantization aware ops""" from .. import operations as P +from ..operations import _quant_ops as Q from .grad_base import bprop_getters from ..composite.multitype_ops.zeros_like_impl import zeros_like +from ... import context -@bprop_getters.register(P.FakeQuantWithMinMax) +@bprop_getters.register(Q.FakeQuantPerLayer) def get_bprop_fakequant_with_minmax(self): - """Generate bprop for FakeQuantWithMinMax for GPU and Ascend""" - op = P.FakeQuantWithMinMaxGrad(num_bits=self.num_bits, quant_delay=self.quant_delay) + """Generate bprop for FakeQuantPerLayer for GPU and Ascend""" + op = Q.FakeQuantPerLayerGrad( + num_bits=self.num_bits, quant_delay=self.quant_delay) def bprop(x, x_min, x_max, out, dout): dx = op(dout, x, x_min, x_max) @@ -32,10 +35,14 @@ def get_bprop_fakequant_with_minmax(self): return bprop -@bprop_getters.register(P.FakeQuantWithMinMaxPerChannel) +@bprop_getters.register(Q.FakeQuantPerChannel) def get_bprop_fakequant_with_minmax_perchannel(self): - """Generate bprop for FakeQuantWithMinMaxPerChannel for GPU""" - op = P.FakeQuantWithMinMaxPerChannelGrad(num_bits=self.num_bits, quant_delay=self.quant_delay) + """Generate bprop for FakeQuantPerChannel""" + op = Q.FakeQuantPerChannelGrad(num_bits=self.num_bits, + quant_delay=self.quant_delay, + symmetric=self.symmetric, + narrow_range=self.symmetric, + channel_axis=self.channel_axis) def bprop(x, x_min, x_max, out, dout): dx = op(dout, x, x_min, x_max) @@ -44,10 +51,10 @@ def get_bprop_fakequant_with_minmax_perchannel(self): return bprop -@bprop_getters.register(P.BatchNormFold) +@bprop_getters.register(Q.BatchNormFold) def get_bprop_batchnorm_fold(self): """Generate bprop for BatchNormFold for GPU""" - op = P.BatchNormFoldGrad(self.epsilon, self.is_training, self.freeze_bn) + op = Q.BatchNormFoldGrad(self.epsilon, self.is_training, self.freeze_bn) def bprop(x, mean, variance, global_step, out, dout): dx = op(dout[0], dout[1], x, out[0], out[1], global_step) @@ -56,36 +63,45 @@ def get_bprop_batchnorm_fold(self): return bprop -@bprop_getters.register(P.CorrectionMul) +@bprop_getters.register(Q.CorrectionMul) def get_bprop_correction_mul(self): """Generate bprop for CorrectionMul for Ascend and GPU""" - grad = P.CorrectionMulGrad(self.channel_axis) + grad_dx = Q.CorrectionMulGrad(self.channel_axis) + grad_d_batch_std = Q.CorrectionMulGradReduce(self.channel_axis) def bprop(x, batch_std, running_std, out, dout): - dx, d_batch_std = grad(dout, x, batch_std, running_std) + dx, d_batch_std = grad_dx(dout, x, batch_std, running_std) return dx, d_batch_std, zeros_like(running_std) + def bprop_npu(x, batch_std, running_std, out, dout): + dx, mul_dx = grad_dx(dout, x, batch_std, running_std) + d_batch_std = grad_d_batch_std(mul_dx) + return dx, d_batch_std, zeros_like(running_std) + + if context.get_context('device_target') == "Ascend": + return bprop_npu + return bprop -@bprop_getters.register(P.BatchNormFold2) +@bprop_getters.register(Q.BatchNormFold2) def get_bprop_batchnorm_fold2(self): """Generate bprop for BatchNormFold2 for GPU""" - op_f = P.BatchNormFold2Grad(freeze_bn=self.freeze_bn) + op_f = Q.BatchNormFold2Grad(freeze_bn=self.freeze_bn) def bprop(x, beta, gamma, batch_std, batch_mean, running_std, running_mean, global_step, out, dout): d_batch_std, d_batch_mean, d_beta, d_gamma, d_x = op_f(dout, x, gamma, batch_std, batch_mean, running_std, running_mean, global_step) return d_x, d_beta, d_gamma, d_batch_std, d_batch_mean, zeros_like(running_std), zeros_like(running_mean), \ - zeros_like(global_step) + zeros_like(global_step) return bprop -@bprop_getters.register(P.BatchNormFoldD) +@bprop_getters.register(Q.BatchNormFoldD) def get_bprop_BatchNormFold(self): """Generate bprop for BatchNormFold for Ascend""" - op = P.BatchNormFoldGrad_(self.epsilon, self.is_training, self.freeze_bn) + op = Q.BatchNormFoldGradD(self.epsilon, self.is_training, self.freeze_bn) def bprop(x, x_sum, x_square_sum, mean, variance, out, dout): dx = op(dout[1], dout[2], x, out[1], out[2]) @@ -102,11 +118,11 @@ def get_bprop_BNTrainingReduce(self): return bprop -@bprop_getters.register(P.BatchNormFold2_D) +@bprop_getters.register(Q.BatchNormFold2_D) def get_bprop_batchnorm_fold2_(self): """Generate bprop for BatchNormFold2 for Ascend""" - op_reduce = P.BatchNormFold2GradReduce(freeze_bn=self.freeze_bn) - op_f = P.BatchNormFold2GradD(freeze_bn=self.freeze_bn) + op_reduce = Q.BatchNormFold2GradReduce(freeze_bn=self.freeze_bn) + op_f = Q.BatchNormFold2GradD(freeze_bn=self.freeze_bn) def bprop(x, beta, gamma, batch_std, batch_mean, running_std, out, dout): dout_reduce, dout_x_reduce = op_reduce(dout, x) @@ -117,9 +133,19 @@ def get_bprop_batchnorm_fold2_(self): return bprop -@bprop_getters.register(P.FakeQuantWithMinMaxUpdate) -def get_bprop_fakequant_with_minmax_update(self): - """Generate bprop for FakeQuantWithMinMaxUpdate for Ascend""" +@bprop_getters.register(Q.MinMaxUpdatePerLayer) +def get_bprop_fakequant_with_minmax_per_layer_update(self): + """Generate bprop for MinMaxUpdatePerLayer for Ascend""" + + def bprop(x, x_min, x_max, out, dout): + return zeros_like(x), zeros_like(x_min), zeros_like(x_max) + + return bprop + + +@bprop_getters.register(Q.MinMaxUpdatePerChannel) +def get_bprop_fakequant_with_minmax_per_channel_update(self): + """Generate bprop for MinMaxUpdatePerChannel for Ascend""" def bprop(x, x_min, x_max, out, dout): return zeros_like(x), zeros_like(x_min), zeros_like(x_max) diff --git a/mindspore/ops/_op_impl/__init__.py b/mindspore/ops/_op_impl/__init__.py index 725977877d..65a12cd73c 100644 --- a/mindspore/ops/_op_impl/__init__.py +++ b/mindspore/ops/_op_impl/__init__.py @@ -19,6 +19,5 @@ from .aicpu import * if "Windows" not in platform.system(): from .akg.gpu import * from .tbe import * - from ._custom_op import * __all__ = [] diff --git a/mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py b/mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py index 63b9e2b7d2..11434223d3 100644 --- a/mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py +++ b/mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py @@ -16,6 +16,7 @@ """_BatchNormFold op""" from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType +import te from te import tvm from topi import generic from topi.cce import util @@ -64,7 +65,6 @@ def batchnorm_fold(x, x_sum, x_square_sum, mean, variance, momentum=0.9, epsilon=1e-5, is_training=True, freeze_bn=0, data_format="NCHW", kernel_name="batchnorm_fold"): """batchnorm_fold TBE op""" - momentum = 1.0 - momentum util.check_kernel_name(kernel_name) data_format = data_format.upper() if data_format != "NCHW": @@ -119,13 +119,12 @@ def batchnorm_fold(x, x_sum, x_square_sum, mean, variance, variance_div = te.lang.cce.vmuls(x_square_sum, num_rec) mean_square = te.lang.cce.vmul(batch_mean, batch_mean) batch_var_biased = te.lang.cce.vsub(variance_div, mean_square) - + batch_std = te.lang.cce.vsqrt(te.lang.cce.vadds(batch_var_biased, epsilon)) if num == 1: batch_var_scaler = 0.0 else: batch_var_scaler = float(num) / (num - 1) - batch_variance = te.lang.cce.vmuls(batch_var_biased, batch_var_scaler) - batch_std = te.lang.cce.vsqrt(te.lang.cce.vadds(batch_variance, epsilon)) + batch_var_unbiased = te.lang.cce.vmuls(batch_var_biased, batch_var_scaler) factor = 1.0 - momentum factor_reverse = momentum @@ -133,7 +132,7 @@ def batchnorm_fold(x, x_sum, x_square_sum, mean, variance, mean_mul_rev = te.lang.cce.vmuls(mean, factor_reverse) mean_updated = te.lang.cce.vadd(mean_mul, mean_mul_rev) - var_mul = te.lang.cce.vmuls(batch_variance, factor) + var_mul = te.lang.cce.vmuls(batch_var_unbiased, factor) var_mul_rev = te.lang.cce.vmuls(variance, factor_reverse) variance_updated = te.lang.cce.vadd(var_mul, var_mul_rev) diff --git a/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py b/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py index 810ce7323c..da3a634454 100644 --- a/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py +++ b/mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py @@ -37,13 +37,7 @@ correction_mul_grad_op_info = TBERegOp("CorrectionMulGrad") \ .input(2, "batch_std", None, "required", None) \ .input(3, "running_std", None, "required", None) \ .output(0, "dx", True, "required", "all") \ - .output(1, "d_batch_std", True, "required", "all") \ - .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, - DataType.F16_Default, DataType.F16_Default) \ - .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD, - DataType.F16_5HD, DataType.F16_5HD) \ - .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, - DataType.F32_Default, DataType.F32_Default) \ + .output(1, "mul_dx", True, "required", "all") \ .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD) \ .get_op_info() @@ -62,21 +56,14 @@ def correction_mul_grad_compute(dout, x, batch_std, running_std, channel, data_f factor = te.lang.cce.vdiv(batch_std, running_std) factor_b = te.lang.cce.broadcast(factor, shape_x) dx = te.lang.cce.vmul(dout, factor_b) - mul_data = te.lang.cce.vmul(dout, x) - if channel == 0: - if data_format == "NCHW": - axis = [1, 2, 3] - else: - axis = [1, 2, 3, 4] - else: - axis = [2, 3] - red_data = te.lang.cce.sum(mul_data, axis, keepdims=True) - d_batch_std = te.lang.cce.vdiv(red_data, running_std) - return [dx, d_batch_std] + mul_dx = te.lang.cce.vmul(dout, x) + running_std_b = te.lang.cce.broadcast(running_std, shape_x) + mul_dx = te.lang.cce.vdiv(mul_dx, running_std_b) + return [dx, mul_dx] @util.check_input_type(dict, dict, dict, dict, dict, dict, int, str) -def correction_mul_grad(dout, x, batch_std, running_std, dx, d_batch_std, channel, kernel_name="correction_mul_grad"): +def correction_mul_grad(dout, x, batch_std, running_std, dx, mul_dx, channel, kernel_name="correction_mul_grad"): """CorrectionMulGrad op""" shape_dout = dout.get("shape") shape_x = dout.get("shape") @@ -93,13 +80,13 @@ def correction_mul_grad(dout, x, batch_std, running_std, dx, d_batch_std, channe util.check_dtype_rule(inp_dtype_dout, ("float16", "float32")) util.check_dtype_rule(inp_dtype_x, ("float16", "float32")) - util.check_dtype_rule(inp_dtype_batch_std, ("float32",)) - util.check_dtype_rule(inp_dtype_running_std, ("float32",)) + util.check_dtype_rule(inp_dtype_batch_std, ("float16", "float32")) + util.check_dtype_rule(inp_dtype_running_std, ("float16", "float32")) util.compare_tensor_dict_key(dout, x, "dtype") util.compare_tensor_dict_key(dout, x, "shape") util.compare_tensor_dict_key(dx, x, "shape") util.compare_tensor_dict_key(batch_std, running_std, "shape") - util.compare_tensor_dict_key(batch_std, d_batch_std, "shape") + util.compare_tensor_dict_key(dx, mul_dx, "shape") util.check_kernel_name(kernel_name) util.check_shape_rule(shape_x) @@ -126,7 +113,84 @@ def correction_mul_grad(dout, x, batch_std, running_std, dx, d_batch_std, channe with tvm.target.cce(): sch = generic.auto_schedule(res_list) - tensor_list = [dout_t, x_t, batch_std_t, running_std_t] + list(res_list) + tensor_list = [dout_t, x_t, batch_std_t, running_std_t] + res_list + config = {"print_ir": False, + "name": kernel_name, + "tensor_list": tensor_list} + + te.lang.cce.cce_build_code(sch, config) + + +correction_mul_grad_reduce_op_info = TBERegOp("CorrectionMulGradReduce") \ + .fusion_type("OPAQUE") \ + .async_flag(False) \ + .binfile_name("correction_mul_grad_reduce.so") \ + .compute_cost(10) \ + .kernel_name("correction_mul_grad_reduce") \ + .partial_flag(True) \ + .op_pattern("formatAgnostic") \ + .attr("channel_axis", "optional", "int", "all") \ + .input(0, "dout", None, "required", None) \ + .output(0, "d_batch_std", True, "required", "all") \ + .dtype_format(DataType.F32_5HD, DataType.F32_5HD) \ + .get_op_info() + + +@op_info_register(correction_mul_grad_reduce_op_info) +def _correction_mul_grad_reduce_tbe(): + """CorrectionMulGradReduce TBE register""" + return + + +@fusion_manager.register("correction_mul_grad_reduce") +def correction_mul_grad_reduce_compute(mul_dx, channel, data_format, kernel_name="correction_mul"): + """CorrectionMulGradReduce compute""" + if channel == 0: + if data_format == "NCHW": + axis = [1, 2, 3] + else: + axis = [1, 2, 3, 4] + else: + axis = [2, 3] + d_batch_std = te.lang.cce.sum(mul_dx, axis, keepdims=True) + return d_batch_std + + +@util.check_input_type(dict, dict, int, str) +def correction_mul_grad_reduce(mul_dx, d_batch_std, channel, kernel_name="correction_mul_grad_reduce"): + """CorrectionMulGradReduce op""" + shape_dout = mul_dx.get("shape") + shape_x = mul_dx.get("shape") + + dtype_dout = mul_dx.get("dtype") + + inp_dtype_dout = dtype_dout.lower() + + util.check_dtype_rule(inp_dtype_dout, ("float16", "float32")) + + util.check_kernel_name(kernel_name) + util.check_shape_rule(shape_x) + util.check_shape_size(shape_x, SHAPE_SIZE_LIMIT) + + data_format = mul_dx.get("format") + ori_format = mul_dx.get("format") + if data_format.upper() not in ("NC1HWC0", "NCHW"): + raise RuntimeError("Un supported data format {}".format(data_format)) + if data_format.upper() == "NCHW" and ori_format != "NCHW": + raise RuntimeError("data_format(NCHW) must same as ori_format") + + shape_c = [1] * len(shape_x) + shape_c[channel] = d_batch_std.get("ori_shape")[0] + if data_format == "NC1HWC0" and channel == 1: + shape_c = d_batch_std.get("shape") + + dout_t = tvm.placeholder(shape_dout, name="dout", dtype=inp_dtype_dout) + res = correction_mul_grad_reduce_compute(dout_t, channel, data_format, kernel_name) + + with tvm.target.cce(): + sch = generic.auto_schedule(res) + + tensor_list = [dout_t, res] config = {"print_ir": False, "name": kernel_name, "tensor_list": tensor_list} diff --git a/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py b/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py new file mode 100644 index 0000000000..f6c133c808 --- /dev/null +++ b/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py @@ -0,0 +1,146 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""FakeQuantPerChannel op""" +import te.lang.cce +from te import tvm +from te.platform.fusion_manager import fusion_manager +from topi import generic +from topi.cce import util +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +fake_quant_perchannel_op_info = TBERegOp("FakeQuantPerChannel") \ + .fusion_type("ELEMWISE") \ + .async_flag(False) \ + .binfile_name("fake_quant_perchannel.so") \ + .compute_cost(10) \ + .kernel_name("fake_quant_perchannel") \ + .partial_flag(True) \ + .attr("symmetric", "optional", "bool", "all") \ + .attr("narrow_range", "optional", "bool", "all") \ + .attr("num_bits", "optional", "int", "all") \ + .attr("channel_axis", "optional", "int", "all") \ + .input(0, "x", None, "required", None) \ + .input(1, "min", None, "required", None) \ + .input(2, "max", None, "required", None) \ + .output(0, "y", True, "required", "all") \ + .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \ + .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD) \ + .get_op_info() + + +@op_info_register(fake_quant_perchannel_op_info) +def _fake_quant_perchannel_tbe(): + """FakeQuantPerChannel TBE register""" + return + + +@fusion_manager.register("fake_quant_perchannel") +def fake_quant_perchannel_compute(x, min_val, max_val, y, quant_min, quant_max, + kernel_name="fake_quant_perchannel"): + """FakeQuantPerChannel""" + x_shape = te.lang.cce.util.shape_to_list(x.shape) + minmax_shape = te.lang.cce.util.shape_to_list(min_val.shape) + quant_min = tvm.const(quant_min, x.dtype) + quant_max = tvm.const(quant_max, x.dtype) + quant_min = te.lang.cce.broadcast(quant_min, minmax_shape, x.dtype) + quant_max = te.lang.cce.broadcast(quant_max, minmax_shape, x.dtype) + + # CalNudge(NudgeMinMax) + scale = te.lang.cce.vdiv(te.lang.cce.vsub( + max_val, min_val), te.lang.cce.vsub(quant_max, quant_min)) + zp_from_min = te.lang.cce.vsub(quant_min, te.lang.cce.vdiv(min_val, scale)) + + # Nudge zero point + nudge_zp_ = te.lang.cce.vmin( + quant_max, te.lang.cce.vmax(quant_min, zp_from_min)) + nudge_zp = te.lang.cce.floor(te.lang.cce.vadds(nudge_zp_, 0.5)) + nudge_min = te.lang.cce.vmul(te.lang.cce.vsub(quant_min, nudge_zp), scale) + nudge_max = te.lang.cce.vmul(te.lang.cce.vsub(quant_max, nudge_zp), scale) + + # FakeQuant + nudge_min_b = te.lang.cce.broadcast(nudge_min, x_shape) + nudge_max_b = te.lang.cce.broadcast(nudge_max, x_shape) + scale_b = te.lang.cce.broadcast(scale, x_shape) + + input_x = te.lang.cce.vmin(nudge_max_b, te.lang.cce.vmax(nudge_min_b, x)) + nudge_input_ = te.lang.cce.vdiv( + te.lang.cce.vsub(input_x, nudge_min_b), scale_b) + nudge_input = te.lang.cce.floor(te.lang.cce.vadds(nudge_input_, 0.5)) + res = te.lang.cce.vadd(te.lang.cce.vmul(nudge_input, scale_b), nudge_min_b) + + return res + + +@util.check_input_type(dict, dict, dict, dict, bool, bool, int, int, str) +def fake_quant_perchannel(x, min_val, max_val, y, + symmetric, narrow_range, num_bits, channel_axis, + kernel_name="fake_quant_perchannel"): + """FakeQuantPerChannel""" + x_shape = x.get("shape") + x_shape_ = x.get("ori_shape") + x_format = x.get("format") + x_dtype = x.get("dtype") + min_shape = min_val.get("ori_shape") + min_dtype = min_val.get("dtype") + max_shape = max_val.get("ori_shape") + max_dtype = max_val.get("dtype") + + util.check_kernel_name(kernel_name) + util.check_shape_rule(x_shape) + util.check_shape_rule(min_shape, 1, 1, x_shape_[channel_axis]) + util.check_shape_rule(max_shape, 1, 1, x_shape_[channel_axis]) + util.check_tensor_shape_size(x_shape) + util.check_tensor_shape_size(min_shape) + util.check_tensor_shape_size(max_shape) + + check_list = ["float32", "float16"] + x_dtype = x_dtype.lower() + min_dtype = min_dtype.lower() + max_dtype = max_dtype.lower() + util.check_dtype_rule(x_dtype, check_list) + util.check_dtype_rule(min_dtype, check_list) + util.check_dtype_rule(max_dtype, check_list) + + if symmetric: + quant_min = 0 - 2 ** (num_bits - 1) + quant_max = 2 ** (num_bits - 1) - 1 + else: + quant_min = 0 + quant_max = 2 ** num_bits - 1 + if narrow_range: + quant_min = quant_min + 1 + + shape_c = [1] * len(x_shape) + shape_c[channel_axis] = min_val.get("ori_shape")[0] + if x_format == "NC1HWC0" and channel_axis == 1: + shape_c = min_val.get("shape") + input_data = tvm.placeholder(x_shape, name="x", dtype=x_dtype) + min_data = tvm.placeholder(shape_c, name="min_val", dtype=x_dtype) + max_data = tvm.placeholder(shape_c, name="max_val", dtype=x_dtype) + res = fake_quant_perchannel_compute(input_data, min_data, max_data, y, + quant_min, quant_max, kernel_name) + + with tvm.target.cce(): + sch = generic.auto_schedule(res) + + tensor_list = [input_data, min_data, max_data, res] + config = {"print_ir": False, + "name": kernel_name, + "tensor_list": tensor_list} + + te.lang.cce.cce_build_code(sch, config) diff --git a/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py b/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py new file mode 100644 index 0000000000..4e9053fcb1 --- /dev/null +++ b/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py @@ -0,0 +1,172 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""FakeQuantPerChannelGrad op""" +import te.lang.cce +from te import tvm +from te.platform.fusion_manager import fusion_manager +from topi import generic +from topi.cce import util +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +SHAPE_SIZE_LIMIT = 2147483648 +D_TYPE = 'float32' + +fake_quant_perchannel_grad_op_info = TBERegOp("FakeQuantPerChannelGrad") \ + .fusion_type("OPAQUE") \ + .async_flag(False) \ + .binfile_name("fake_quant_perchannel_grad.so") \ + .compute_cost(10) \ + .kernel_name("fake_quant_perchannel_grad") \ + .partial_flag(True) \ + .attr("symmetric", "optional", "bool", "all") \ + .attr("narrow_range", "optional", "bool", "all") \ + .attr("num_bits", "optional", "int", "all") \ + .attr("channel_axis", "optional", "int", "all") \ + .input(0, "dout", None, "required", None) \ + .input(1, "x", None, "required", None) \ + .input(2, "min", None, "required", None) \ + .input(3, "max", None, "required", None) \ + .output(0, "dx", True, "required", "all") \ + .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, + DataType.F16_Default) \ + .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, + DataType.F32_Default) \ + .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD) \ + .get_op_info() + + +def _less_compare_float32(data_x, data_y): + """_less_compare_float32 compute""" + input_shape = te.lang.cce.util.shape_to_list(data_x.shape) + min_value = tvm.const(2 ** (-126), dtype=D_TYPE) + max_value = tvm.const(2 ** 62, dtype=D_TYPE) + factor_value = tvm.const(2 ** 2, dtype=D_TYPE) + data_zero = te.lang.cce.broadcast( + tvm.const(0, dtype=D_TYPE), input_shape, D_TYPE) + min_value_tensor = te.lang.cce.vadds(data_zero, min_value) + + res_sub = te.lang.cce.vsub(data_y, data_x) + res_min = te.lang.cce.vmin(res_sub, min_value_tensor) + res_max = te.lang.cce.vmax(res_min, data_zero) + + res_max_mul = te.lang.cce.vmuls(res_max, max_value) + res_max_mul_max = te.lang.cce.vmuls(res_max_mul, max_value) + res = te.lang.cce.vmuls(res_max_mul_max, factor_value) + + return res + + +@op_info_register(fake_quant_perchannel_grad_op_info) +def _fake_quant_perchannel_grad_tbe(): + """FakeQuantPerChannelGrad TBE register""" + return + + +@fusion_manager.register("fake_quant_perchannel_grad") +def fake_quant_perchannel_grad_compute(dout, x, min_val, max_val, quant_min, quant_max, + kernel_name="fake_quant_perchannel_grad"): + """FakeQuantPerChannelGrad""" + x_shape = te.lang.cce.util.shape_to_list(x.shape) + minmax_shape = te.lang.cce.util.shape_to_list(min_val.shape) + quant_min = tvm.const(quant_min, x.dtype) + quant_max = tvm.const(quant_max, x.dtype) + quant_min = te.lang.cce.broadcast(quant_min, minmax_shape, x.dtype) + quant_max = te.lang.cce.broadcast(quant_max, minmax_shape, x.dtype) + + # CalNudge(NudgeMinMax) + scale = te.lang.cce.vdiv(te.lang.cce.vsub( + max_val, min_val), te.lang.cce.vsub(quant_max, quant_min)) + zp_from_min = te.lang.cce.vsub(quant_min, te.lang.cce.vdiv(min_val, scale)) + + # Nudge zero point + nudge_zp_ = te.lang.cce.vmin( + quant_max, te.lang.cce.vmax(quant_min, zp_from_min)) + nudge_zp = te.lang.cce.floor(te.lang.cce.vadds(nudge_zp_, 0.5)) + nudge_min = te.lang.cce.vmul(te.lang.cce.vsub(quant_min, nudge_zp), scale) + nudge_max = te.lang.cce.vmul(te.lang.cce.vsub(quant_max, nudge_zp), scale) + + # FakeQuant Grad + nudge_min_b = te.lang.cce.broadcast(nudge_min, x_shape) + nudge_max_b = te.lang.cce.broadcast(nudge_max, x_shape) + + bool_over_min = _less_compare_float32(nudge_min_b, x) + bool_less_max = _less_compare_float32(x, nudge_max_b) + bool_between = te.lang.cce.vmul(bool_over_min, bool_less_max) + res = te.lang.cce.vmul(dout, bool_between) + + return res + + +@util.check_input_type(dict, dict, dict, dict, dict, bool, bool, int, int, str) +def fake_quant_perchannel_grad(dout, x, min_val, max_val, dx, + symmetric, narrow_range, num_bits, channel_axis, + kernel_name="fake_quant_perchannel_grad"): + """FakeQuantPerChannelGrad""" + x_shape = x.get("shape") + x_shape_ = x.get("ori_shape") + x_format = x.get("format") + x_dtype = x.get("dtype") + min_shape = min_val.get("ori_shape") + min_dtype = min_val.get("dtype") + max_shape = max_val.get("ori_shape") + max_dtype = max_val.get("dtype") + + util.check_kernel_name(kernel_name) + util.check_shape_rule(x_shape) + util.check_shape_rule(min_shape, 1, 1, x_shape_[channel_axis]) + util.check_shape_rule(max_shape, 1, 1, x_shape_[channel_axis]) + util.check_tensor_shape_size(x_shape) + util.check_tensor_shape_size(min_shape) + util.check_tensor_shape_size(max_shape) + + check_list = ["float32", "float16"] + x_dtype = x_dtype.lower() + min_dtype = min_dtype.lower() + max_dtype = max_dtype.lower() + util.check_dtype_rule(x_dtype, check_list) + util.check_dtype_rule(min_dtype, check_list) + util.check_dtype_rule(max_dtype, check_list) + + if symmetric: + quant_min = 0 - 2 ** (num_bits - 1) + quant_max = 2 ** (num_bits - 1) - 1 + else: + quant_min = 0 + quant_max = 2 ** num_bits - 1 + if narrow_range: + quant_min = quant_min + 1 + + shape_c = [1] * len(x_shape) + shape_c[channel_axis] = min_val.get("ori_shape")[0] + if x_format == "NC1HWC0" and channel_axis == 1: + shape_c = min_val.get("shape") + dout_data = tvm.placeholder(x_shape, name="dout", dtype=x_dtype) + input_data = tvm.placeholder(x_shape, name="x", dtype=x_dtype) + min_data = tvm.placeholder(shape_c, name="min_val", dtype=x_dtype) + max_data = tvm.placeholder(shape_c, name="max_val", dtype=x_dtype) + res = fake_quant_perchannel_grad_compute(dout_data, input_data, min_data, max_data, + quant_min, quant_max, kernel_name) + + with tvm.target.cce(): + sch = generic.auto_schedule(res) + + tensor_list = [dout_data, input_data, min_data, max_data, res] + config = {"print_ir": False, + "name": kernel_name, + "tensor_list": tensor_list} + + te.lang.cce.cce_build_code(sch, config) diff --git a/mindspore/ops/_op_impl/_custom_op/fake_quant_with_min_max.py b/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py similarity index 69% rename from mindspore/ops/_op_impl/_custom_op/fake_quant_with_min_max.py rename to mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py index 4afdf3a051..3e75e9e0a5 100644 --- a/mindspore/ops/_op_impl/_custom_op/fake_quant_with_min_max.py +++ b/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py @@ -13,8 +13,7 @@ # limitations under the License. # ============================================================================ -"""FakeQuantWithMinMax op""" - +"""FakeQuantPerLayer op""" from functools import reduce as functools_reduce import te.lang.cce from te import tvm @@ -23,20 +22,16 @@ from topi import generic from topi.cce import util from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType -fake_quant_op_info = TBERegOp("FakeQuantWithMinMax") \ +fake_quant_per_layer_op_info = TBERegOp("FakeQuantPerLayer") \ .fusion_type("ELEMWISE") \ .async_flag(False) \ - .binfile_name("fake_quant_with_min_max_vars_ema.so") \ + .binfile_name("fake_quant_per_layer.so") \ .compute_cost(10) \ - .kernel_name("fake_quant_with_min_max_vars_ema") \ + .kernel_name("fake_quant_per_layer") \ .partial_flag(True) \ - .attr("ema", "optional", "bool", "all") \ - .attr("ema_decay", "optional", "float", "all") \ .attr("symmetric", "optional", "bool", "all") \ .attr("narrow_range", "optional", "bool", "all") \ - .attr("training", "optional", "bool", "all") \ .attr("num_bits", "optional", "int", "all") \ - .attr("quant_delay", "optional", "int", "all") \ .input(0, "x", None, "required", None) \ .input(1, "min", None, "required", None) \ .input(2, "max", None, "required", None) \ @@ -48,28 +43,32 @@ fake_quant_op_info = TBERegOp("FakeQuantWithMinMax") \ .get_op_info() -@op_info_register(fake_quant_op_info) -def _fake_quant_tbe(): - """FakeQuantWithMinMax TBE register""" +@op_info_register(fake_quant_per_layer_op_info) +def _fake_quant_per_layer_tbe(): + """FakeQuantPerLayer TBE register""" return -@fusion_manager.register("fake_quant_with_min_max_vars_ema") -def fake_quant_with_min_max_vars_ema_compute(x, min_val, max_val, y, quant_min, quant_max, - kernel_name="correction_mul"): - """FakeQuantWithMinMax""" +@fusion_manager.register("fake_quant_per_layer") +def fake_quant_per_layer_compute(x, min_val, max_val, y, quant_min, quant_max, symmetric, + kernel_name="fake_quant_per_layer"): + """FakeQuantPerLayer""" shape = te.lang.cce.util.shape_to_list(x.shape) shape_min = te.lang.cce.util.shape_to_list(min_val.shape) quant_min = te.lang.cce.broadcast(quant_min, shape_min, x.dtype) quant_max = te.lang.cce.broadcast(quant_max, shape_min, x.dtype) - min_val = te.lang.cce.broadcast(min_val, shape_min, x.dtype) - max_val = te.lang.cce.broadcast(max_val, shape_min, x.dtype) + if symmetric: + max_val = te.lang.cce.vmax(te.lang.cce.vmuls(min_val, -1.), max_val) + min_val = te.lang.cce.vmuls(max_val, -1.) # CalNudge(NudgeMinMax) - scale = te.lang.cce.vdiv(te.lang.cce.vsub(max_val, min_val), te.lang.cce.vsub(quant_max, quant_min)) + scale = te.lang.cce.vdiv(te.lang.cce.vsub( + max_val, min_val), te.lang.cce.vsub(quant_max, quant_min)) zp_from_min = te.lang.cce.vsub(quant_min, te.lang.cce.vdiv(min_val, scale)) # Nudge zero point - nudge_zp = te.lang.cce.round(te.lang.cce.vmin(quant_max, te.lang.cce.vmax(quant_min, zp_from_min))) + nudge_zp_ = te.lang.cce.vmin( + quant_max, te.lang.cce.vmax(quant_min, zp_from_min)) + nudge_zp = te.lang.cce.floor(te.lang.cce.vadds(nudge_zp_, 0.5)) nudge_min = te.lang.cce.vmul(te.lang.cce.vsub(quant_min, nudge_zp), scale) nudge_max = te.lang.cce.vmul(te.lang.cce.vsub(quant_max, nudge_zp), scale) @@ -80,18 +79,19 @@ def fake_quant_with_min_max_vars_ema_compute(x, min_val, max_val, y, quant_min, # FakeQuant input_x = te.lang.cce.vmin(nudge_max, te.lang.cce.vmax(nudge_min, x)) - nudge_input = te.lang.cce.floor(te.lang.cce.vadds(te.lang.cce.vdiv(te.lang.cce.vsub(input_x, nudge_min), scale), - 0.5)) + nudge_input_ = te.lang.cce.vdiv( + te.lang.cce.vsub(input_x, nudge_min), scale) + nudge_input = te.lang.cce.floor(te.lang.cce.vadds(nudge_input_, 0.5)) res = te.lang.cce.vadd(te.lang.cce.vmul(nudge_input, scale), nudge_min) return res -@util.check_input_type(dict, dict, dict, dict, bool, float, bool, bool, bool, int, int, str) -def fake_quant_with_min_max_vars_ema(x, min_val, max_val, y, - ema, ema_decay, symmetric, narrow_range, training, num_bits, quant_delay, - kernel_name="fake_quant"): - """FakeQuantWithMinMax""" +@util.check_input_type(dict, dict, dict, dict, bool, bool, int, str) +def fake_quant_per_layer(x, min_val, max_val, y, + symmetric, narrow_range, num_bits, + kernel_name="fake_quant_per_layer"): + """FakeQuantPerLayer""" input_shape = x.get("shape") input_dtype = x.get("dtype") min_shape = min_val.get("ori_shape") @@ -120,20 +120,16 @@ def fake_quant_with_min_max_vars_ema(x, min_val, max_val, y, input_shape = (functools_reduce(lambda x, y: x * y, input_shape[:]),) shape_min, _, _ = util.produce_shapes(min_shape, input_shape) - if symmetric: - quant_min = 0 - 2 ** (num_bits - 1) - quant_max = 2 ** (num_bits - 1) - 1 - else: - quant_min = 0 - quant_max = 2 ** num_bits - 1 + quant_min = 0 + quant_max = 2 ** num_bits - 1 if narrow_range: quant_min = quant_min + 1 input_data = tvm.placeholder(input_shape, name="x", dtype=x_dtype) min_data = tvm.placeholder(shape_min, name="min_data", dtype=min_dtype) max_data = tvm.placeholder(shape_min, name="max_data", dtype=max_dtype) - res = fake_quant_with_min_max_vars_ema_compute(input_data, min_data, max_data, y, - quant_min, quant_max, kernel_name) + res = fake_quant_per_layer_compute(input_data, min_data, max_data, y, + quant_min, quant_max, symmetric, kernel_name) with tvm.target.cce(): sch = generic.auto_schedule(res) diff --git a/mindspore/ops/_op_impl/_custom_op/fake_quant_with_min_max_grad.py b/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py similarity index 75% rename from mindspore/ops/_op_impl/_custom_op/fake_quant_with_min_max_grad.py rename to mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py index be5dcb6591..a78effcc4f 100644 --- a/mindspore/ops/_op_impl/_custom_op/fake_quant_with_min_max_grad.py +++ b/mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py @@ -13,7 +13,7 @@ # limitations under the License. # ============================================================================ -"""FakeQuantWithMinMaxGrad op""" +"""FakeQuantPerLayerGrad op""" from functools import reduce as functools_reduce import te.lang.cce @@ -26,15 +26,16 @@ from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType SHAPE_SIZE_LIMIT = 2147483648 D_TYPE = 'float32' -fake_quant_grad_op_info = TBERegOp("FakeQuantWithMinMaxGrad") \ +fake_quant_per_layer_grad_op_info = TBERegOp("FakeQuantPerLayerGrad") \ .fusion_type("OPAQUE") \ .async_flag(False) \ - .binfile_name("fake_quant_with_min_max_grad.so") \ + .binfile_name("fake_quant_per_layer_grad.so") \ .compute_cost(10) \ - .kernel_name("fake_quant_with_min_max_grad") \ + .kernel_name("fake_quant_per_layer_grad") \ .partial_flag(True) \ .attr("num_bits", "optional", "int", "all") \ - .attr("quant_delay", "optional", "int", "all") \ + .attr("symmetric", "optional", "bool", "all") \ + .attr("narrow_range", "optional", "bool", "all") \ .input(0, "dout", None, "required", None) \ .input(1, "x", None, "required", None) \ .input(2, "min", None, "required", None) \ @@ -55,7 +56,8 @@ def _less_compare_float32(data_x, data_y): min_value = tvm.const(2 ** (-126), dtype=D_TYPE) max_value = tvm.const(2 ** 62, dtype=D_TYPE) factor_value = tvm.const(2 ** 2, dtype=D_TYPE) - data_zero = te.lang.cce.broadcast(tvm.const(0, dtype=D_TYPE), shape_inputs, D_TYPE) + data_zero = te.lang.cce.broadcast( + tvm.const(0, dtype=D_TYPE), shape_inputs, D_TYPE) min_value_tensor = te.lang.cce.vadds(data_zero, min_value) res_sub = te.lang.cce.vsub(data_y, data_x) @@ -69,16 +71,16 @@ def _less_compare_float32(data_x, data_y): return res -@op_info_register(fake_quant_grad_op_info) -def _fake_quant_grad_tbe(): - """FakeQuantWithMinMaxGrad TBE register""" +@op_info_register(fake_quant_per_layer_grad_op_info) +def _fake_quant_per_layer_grad_tbe(): + """FakeQuantPerLayerGrad TBE register""" return -@fusion_manager.register("fake_quant_with_min_max_grad") -def fake_quant_with_min_max_grad_compute(dout, x, min_val, max_val, quant_min, quant_max, - kernel_name="fake_quant_with_min_max_grad"): - """FakeQuantWithMinMaxGrad""" +@fusion_manager.register("fake_quant_per_layer_grad") +def fake_quant_per_layer_grad_compute(dout, x, min_val, max_val, quant_min, quant_max, symmetric, + kernel_name="fake_quant_per_layer_grad"): + """FakeQuantPerLayerGrad""" shape = te.lang.cce.util.shape_to_list(x.shape) shape_min = te.lang.cce.util.shape_to_list(min_val.shape) quant_min = tvm.const(quant_min, x.dtype) @@ -86,11 +88,18 @@ def fake_quant_with_min_max_grad_compute(dout, x, min_val, max_val, quant_min, q quant_min = te.lang.cce.broadcast(quant_min, shape_min) quant_max = te.lang.cce.broadcast(quant_max, shape_min) + if symmetric: + max_val = te.lang.cce.vmax(te.lang.cce.vmuls(min_val, -1.), max_val) + min_val = te.lang.cce.vmuls(max_val, -1.) + # CalNudge(NudgeMinMax) - scale = te.lang.cce.vdiv(te.lang.cce.vsub(max_val, min_val), te.lang.cce.vsub(quant_max, quant_min)) + scale = te.lang.cce.vdiv(te.lang.cce.vsub( + max_val, min_val), te.lang.cce.vsub(quant_max, quant_min)) zp_from_min = te.lang.cce.vsub(quant_min, te.lang.cce.vdiv(min_val, scale)) # Nudge zero point - nudge_zp = te.lang.cce.round(te.lang.cce.vmin(quant_max, te.lang.cce.vmax(quant_min, zp_from_min))) + nudge_zp_ = te.lang.cce.vmin( + quant_max, te.lang.cce.vmax(quant_min, zp_from_min)) + nudge_zp = te.lang.cce.floor(te.lang.cce.vadds(nudge_zp_, 0.5)) nudge_min = te.lang.cce.vmul(te.lang.cce.vsub(quant_min, nudge_zp), scale) nudge_max = te.lang.cce.vmul(te.lang.cce.vsub(quant_max, nudge_zp), scale) nudge_min = te.lang.cce.broadcast(nudge_min, shape) @@ -104,10 +113,11 @@ def fake_quant_with_min_max_grad_compute(dout, x, min_val, max_val, quant_min, q return res -@util.check_input_type(dict, dict, dict, dict, dict, int, int, str) -def fake_quant_with_min_max_grad(dout, x, min_val, max_val, dx, num_bits, quant_delay, - kernel_name="fake_quant_with_min_max_grad"): - """FakeQuantWithMinMaxGrad""" +@util.check_input_type(dict, dict, dict, dict, dict, int, bool, bool, str) +def fake_quant_per_layer_grad(dout, x, min_val, max_val, dx, + num_bits, symmetric, narrow_range, + kernel_name="fake_quant_per_layer_grad"): + """FakeQuantPerLayerGrad""" input_shape = x.get("shape") input_dtype = x.get("dtype") min_shape = min_val.get("ori_shape") @@ -138,12 +148,15 @@ def fake_quant_with_min_max_grad(dout, x, min_val, max_val, dx, num_bits, quant_ quant_min = 0 quant_max = 2 ** num_bits - 1 + if narrow_range: + quant_min = quant_min + 1 + dout_data = tvm.placeholder(input_shape, name="dout", dtype=x_dtype) input_data = tvm.placeholder(input_shape, name="x", dtype=x_dtype) min_data = tvm.placeholder(shape_min, name="min_data", dtype=min_dtype) max_data = tvm.placeholder(shape_min, name="max_data", dtype=max_dtype) - res = fake_quant_with_min_max_grad_compute(dout_data, input_data, min_data, max_data, quant_min, - quant_max, kernel_name) + res = fake_quant_per_layer_grad_compute(dout_data, input_data, min_data, max_data, + quant_min, quant_max, symmetric, kernel_name) with tvm.target.cce(): sch = generic.auto_schedule(res) diff --git a/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py b/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py new file mode 100644 index 0000000000..1ff63464c3 --- /dev/null +++ b/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py @@ -0,0 +1,126 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""MinMaxUpdatePerChannel op""" +import te.lang.cce +from te import tvm +from te.platform.fusion_manager import fusion_manager +from topi import generic +from topi.cce import util +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +minmax_update_perchannel_op_info = TBERegOp("MinMaxUpdatePerChannel") \ + .fusion_type("OPAQUE") \ + .async_flag(False) \ + .binfile_name("minmax_update_perchannel.so") \ + .compute_cost(10) \ + .kernel_name("minmax_update_perchannel") \ + .partial_flag(True) \ + .attr("ema", "optional", "bool", "all") \ + .attr("ema_decay", "optional", "float", "all") \ + .attr("channel_axis", "optional", "int", "all") \ + .input(0, "x", None, "required", None) \ + .input(1, "min", None, "required", None) \ + .input(2, "max", None, "required", None) \ + .output(0, "min_up", True, "required", "all") \ + .output(1, "max_up", True, "required", "all") \ + .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, + DataType.F32_5HD) \ + .get_op_info() + + +@op_info_register(minmax_update_perchannel_op_info) +def _minmax_update_perchannel_tbe(): + """MinMaxUpdatePerChannel TBE register""" + return + + +@fusion_manager.register("minmax_update_perchannel") +def minmax_update_perchannel_compute(x, min_val, max_val, + ema, ema_decay, channel_axis): + """MinMaxUpdatePerChannel compute""" + shape_min = te.lang.cce.util.shape_to_list(min_val.shape) + + if not ema: + ema_decay = 0.0 + + # CalMinMax + if channel_axis == 0: + axis = [1, 2, 3, 4] + else: + axis = [0, 2, 3] + + x_min = te.lang.cce.reduce_min(x, axis=axis) + x_max = te.lang.cce.reduce_max(x, axis=axis) + x_min = te.lang.cce.broadcast(x_min, shape_min) + x_max = te.lang.cce.broadcast(x_max, shape_min) + min_val = te.lang.cce.vadd(te.lang.cce.vmuls( + min_val, ema_decay), te.lang.cce.vmuls(x_min, (1 - ema_decay))) + max_val = te.lang.cce.vadd(te.lang.cce.vmuls( + max_val, ema_decay), te.lang.cce.vmuls(x_max, (1 - ema_decay))) + min_val = te.lang.cce.vmins(min_val, 0) + max_val = te.lang.cce.vmaxs(max_val, 0) + + return [min_val, max_val] + + +@util.check_input_type(dict, dict, dict, dict, dict, bool, float, int, str) +def minmax_update_perchannel(x, min_val, max_val, min_up, max_up, + ema, ema_decay, channel_axis, + kernel_name="minmax_update_perchannel"): + """MinMaxUpdatePerChannel op""" + x_shape = x.get("ori_shape") + x_format = x.get("format") + x_dtype = x.get("dtype") + min_shape = min_val.get("ori_shape") + min_dtype = min_val.get("dtype") + max_shape = max_val.get("ori_shape") + max_dtype = max_val.get("dtype") + + util.check_kernel_name(kernel_name) + util.check_shape_rule(x_shape) + util.check_shape_rule(min_shape, 1, 1, x_shape[channel_axis]) + util.check_shape_rule(max_shape, 1, 1, x_shape[channel_axis]) + util.check_tensor_shape_size(x_shape) + util.check_tensor_shape_size(min_shape) + util.check_tensor_shape_size(max_shape) + + check_list = ["float32", "float16"] + x_dtype = x_dtype.lower() + min_dtype = min_dtype.lower() + max_dtype = max_dtype.lower() + util.check_dtype_rule(x_dtype, check_list) + util.check_dtype_rule(min_dtype, check_list) + util.check_dtype_rule(max_dtype, check_list) + + if channel_axis == 0: + shape_c = min_val.get("ori_shape") + else: + shape_c = [min_val.get("shape")[1], min_val.get("shape")[-1]] + input_data = tvm.placeholder(x.get("shape"), name="x", dtype=x_dtype) + min_data = tvm.placeholder(shape_c, name="min_val", dtype=x_dtype) + max_data = tvm.placeholder(shape_c, name="max_val", dtype=x_dtype) + res_list = minmax_update_perchannel_compute(input_data, min_data, max_data, + ema, ema_decay, channel_axis) + + with tvm.target.cce(): + sch = generic.auto_schedule(res_list) + + tensor_list = [input_data, min_data, max_data] + list(res_list) + config = {"print_ir": False, + "name": kernel_name, + "tensor_list": tensor_list} + + te.lang.cce.cce_build_code(sch, config) diff --git a/mindspore/ops/_op_impl/_custom_op/fake_quant_with_min_max_update.py b/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py similarity index 60% rename from mindspore/ops/_op_impl/_custom_op/fake_quant_with_min_max_update.py rename to mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py index e5c932aa0f..4d2096d55b 100644 --- a/mindspore/ops/_op_impl/_custom_op/fake_quant_with_min_max_update.py +++ b/mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py @@ -13,7 +13,7 @@ # limitations under the License. # ============================================================================ -"""FakeQuantWithMinMaxUpdate op""" +"""MinMaxUpdatePerLayer op""" from functools import reduce as functools_reduce import te.lang.cce from te import tvm @@ -22,21 +22,15 @@ from topi import generic from topi.cce import util from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType - -fake_quant_update5d_op_info = TBERegOp("FakeQuantWithMinMaxUpdate") \ +minmax_update_perlayer_op_info = TBERegOp("MinMaxUpdatePerLayer") \ .fusion_type("OPAQUE") \ .async_flag(False) \ - .binfile_name("fake_quant_with_min_max_update5d.so") \ + .binfile_name("minmax_update_perlayer.so") \ .compute_cost(10) \ - .kernel_name("fake_quant_with_min_max_update") \ + .kernel_name("minmax_update_perlayer") \ .partial_flag(True) \ .attr("ema", "optional", "bool", "all") \ .attr("ema_decay", "optional", "float", "all") \ - .attr("symmetric", "optional", "bool", "all") \ - .attr("narrow_range", "optional", "bool", "all") \ - .attr("training", "optional", "bool", "all") \ - .attr("num_bits", "optional", "int", "all") \ - .attr("quant_delay", "optional", "int", "all") \ .input(0, "x", None, "required", None) \ .input(1, "min", None, "required", None) \ .input(2, "max", None, "required", None) \ @@ -47,42 +41,42 @@ fake_quant_update5d_op_info = TBERegOp("FakeQuantWithMinMaxUpdate") \ .get_op_info() -@op_info_register(fake_quant_update5d_op_info) -def _fake_quant_update5d_tbe(): - """_FakeQuantWithMinMaxUpdate5D TBE register""" +@op_info_register(minmax_update_perlayer_op_info) +def _minmax_update_perlayer_tbe(): + """MinMaxUpdatePerLayer TBE register""" return -@fusion_manager.register("fake_quant_with_min_max_update") -def fake_quant_with_min_max_update_compute(x, min_val, max_val, ema, ema_decay, quant_min, quant_max, training, - kernel_name="fake_quant_update"): - """FakeQuantWithMinMaxUpdate compute""" +@fusion_manager.register("minmax_update_perlayer") +def minmax_update_perlayer_compute(x, min_val, max_val, ema, ema_decay): + """MinMaxUpdatePerLayer compute""" shape = te.lang.cce.util.shape_to_list(x.shape) shape_min = te.lang.cce.util.shape_to_list(min_val.shape) min_val = te.lang.cce.broadcast(min_val, shape_min, x.dtype) max_val = te.lang.cce.broadcast(max_val, shape_min, x.dtype) if not ema: ema_decay = 0.0 - if training: - # CalMinMax - axis = tuple(range(len(shape))) - x_min = te.lang.cce.reduce_min(x, axis=axis) - x_max = te.lang.cce.reduce_max(x, axis=axis) - x_min = te.lang.cce.broadcast(x_min, shape_min) - x_max = te.lang.cce.broadcast(x_max, shape_min) - min_val = te.lang.cce.vadd(te.lang.cce.vmuls(min_val, ema_decay), te.lang.cce.vmuls(x_min, (1 - ema_decay))) - max_val = te.lang.cce.vadd(te.lang.cce.vmuls(max_val, ema_decay), te.lang.cce.vmuls(x_max, (1 - ema_decay))) - min_val = te.lang.cce.vmins(min_val, 0) - max_val = te.lang.cce.vmaxs(max_val, 0) + + # CalMinMax + axis = tuple(range(len(shape))) + x_min = te.lang.cce.reduce_min(x, axis=axis) + x_max = te.lang.cce.reduce_max(x, axis=axis) + x_min = te.lang.cce.broadcast(x_min, shape_min) + x_max = te.lang.cce.broadcast(x_max, shape_min) + min_val = te.lang.cce.vadd(te.lang.cce.vmuls( + min_val, ema_decay), te.lang.cce.vmuls(x_min, (1 - ema_decay))) + max_val = te.lang.cce.vadd(te.lang.cce.vmuls( + max_val, ema_decay), te.lang.cce.vmuls(x_max, (1 - ema_decay))) + min_val = te.lang.cce.vmins(min_val, 0) + max_val = te.lang.cce.vmaxs(max_val, 0) return [min_val, max_val] -@util.check_input_type(dict, dict, dict, dict, dict, bool, float, bool, bool, bool, int, int, str) -def fake_quant_with_min_max_update(x, min_val, max_val, min_up, max_up, - ema, ema_decay, symmetric, narrow_range, training, num_bits, quant_delay, - kernel_name="fake_quant_update"): - """FakeQuantWithMinMax op""" +@util.check_input_type(dict, dict, dict, dict, dict, bool, float, str) +def minmax_update_perlayer(x, min_val, max_val, min_up, max_up, + ema, ema_decay, kernel_name="minmax_update_perlayer"): + """MinMaxUpdatePerLayer op""" input_shape = x.get("shape") input_dtype = x.get("dtype") min_shape = min_val.get("ori_shape") @@ -111,20 +105,10 @@ def fake_quant_with_min_max_update(x, min_val, max_val, min_up, max_up, input_shape = (functools_reduce(lambda x, y: x * y, input_shape[:]),) shape_min, _, _ = util.produce_shapes(min_shape, input_shape) - if symmetric: - quant_min = 0 - 2 ** (num_bits - 1) - quant_max = 2 ** (num_bits - 1) - 1 - else: - quant_min = 0 - quant_max = 2 ** num_bits - 1 - if narrow_range: - quant_min = quant_min + 1 - input_data = tvm.placeholder(input_shape, name="x", dtype=x_dtype) min_data = tvm.placeholder(shape_min, name="min_data", dtype=min_dtype) max_data = tvm.placeholder(shape_min, name="max_data", dtype=max_dtype) - res_list = fake_quant_with_min_max_update_compute(input_data, min_data, max_data, - ema, ema_decay, quant_min, quant_max, training, kernel_name) + res_list = minmax_update_perlayer_compute(input_data, min_data, max_data, ema, ema_decay) with tvm.target.cce(): sch = generic.auto_schedule(res_list) diff --git a/mindspore/ops/_op_impl/aicpu/__init__.py b/mindspore/ops/_op_impl/aicpu/__init__.py index f514ac183e..c83a6ec46e 100644 --- a/mindspore/ops/_op_impl/aicpu/__init__.py +++ b/mindspore/ops/_op_impl/aicpu/__init__.py @@ -25,11 +25,12 @@ from .flatten import _flatten_aicpu from .squeeze import _squeeze_aicpu from .expand_dims import _expand_dims_aicpu from .random_choice_with_mask import _random_choice_with_mask_aicpu +from .pack import _pack_aicpu +from .normal import _normal_aicpu from .ctcloss import _ctcloss_aicpu -from .rnnt_loss import _rnnt_loss_aicpu -from .random_categorical import _random_categorical_aicpu from .reverse_sequence import _reverse_sequence_aicpu -from .pack import _pack_aicpu from .crop_and_resize import _crop_and_resize_aicpu +from .rnnt_loss import _rnnt_loss_aicpu +from .random_categorical import _random_categorical_aicpu from .cast import _cast_aicpu from .mirror_pad import _mirror_pad_aicpu diff --git a/mindspore/ops/_op_impl/aicpu/normal.py b/mindspore/ops/_op_impl/aicpu/normal.py new file mode 100644 index 0000000000..fdb96e362f --- /dev/null +++ b/mindspore/ops/_op_impl/aicpu/normal.py @@ -0,0 +1,33 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Normal op""" +from mindspore.ops.op_info_register import op_info_register, AiCPURegOp, DataType + +normal_op_info = AiCPURegOp("Normal") \ + .fusion_type("OPAQUE") \ + .input(0, "shape", "required") \ + .input(1, "mean", "required") \ + .input(2, "stddev", "required") \ + .output(0, "y", "required") \ + .attr("seed", "int") \ + .dtype_format(DataType.I32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \ + .dtype_format(DataType.I32_NCHW, DataType.F32_NCHW, DataType.F32_NCHW, DataType.F32_NCHW) \ + .get_op_info() + +@op_info_register(normal_op_info) +def _normal_aicpu(): + """Normal AiCPU register""" + return diff --git a/mindspore/ops/_op_impl/aicpu/topk.py b/mindspore/ops/_op_impl/aicpu/topk.py index a68ae3557d..80cf1c5203 100644 --- a/mindspore/ops/_op_impl/aicpu/topk.py +++ b/mindspore/ops/_op_impl/aicpu/topk.py @@ -24,6 +24,7 @@ top_k_op_info = AiCPURegOp("TopK") \ .output(0, "values", "required") \ .output(1, "indices", "required") \ .dtype_format(DataType.F16_Default, DataType.I32_Default, DataType.F16_Default, DataType.I32_Default) \ + .dtype_format(DataType.F32_Default, DataType.I32_Default, DataType.F32_Default, DataType.I32_Default) \ .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/akg/__init__.py b/mindspore/ops/_op_impl/akg/__init__.py index e69de29bb2..f38b99f5e4 100644 --- a/mindspore/ops/_op_impl/akg/__init__.py +++ b/mindspore/ops/_op_impl/akg/__init__.py @@ -0,0 +1,88 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""autodiff ops""" +from .abs import _abs_akg +from .add_n import _add_n_akg +from .add import _add_akg +from .apply_momentum import _apply_momentum_akg +from .assign import _assign_akg +from .inplace_assign import _inplace_assign_akg +from .assign_add import _assign_add_akg +from .bias_add_grad import _bias_add_grad_akg +from .bias_add import _bias_add_akg +from .cast import _cast_akg +from .clear_zero import _clear_zero_akg +from .conv_bn1 import _conv_bn1_akg +from .conv2d_backprop_filter import _conv2d_backprop_filter_akg +from .conv2d_backprop_input import _conv2d_backprop_input_akg +from .conv2d import _conv2d_akg +from .div import _div_akg +from .equal_count import _equal_count_akg +from .exp import _exp_akg +from .five2four import _five2four_akg +from .four2five import _four2five_akg +from .fused_batch_norm_grad import _fused_batch_norm_grad_akg +from .fused_batch_norm_infer import _fused_batch_norm_infer_akg +from .fused_batch_norm import _fused_batch_norm_akg +from .fused_bn1_grad import _bn1_grad_akg +from .fused_bn1 import _fused_bn1_akg +from .fused_bn2_grad import _bn2_grad_akg +from .fused_bn2 import _fused_bn2_akg +from .fused_bn3_grad import _bn3_grad_akg +from .fused_bn3 import _fused_bn3_akg +from .gather_v2 import _gather_v2_akg +from .less import _less_akg +from .log import _log_akg +from .matmul import _matmul_akg +from .max_pool_grad_with_argmax import _max_pool_grad_with_argmax_akg +from .max_pool_with_argmax import _max_pool_with_argmax_akg +from .max import _max_akg +from .maximum import _maximum_akg +from .mean_grad import _mean_grad_akg +from .mean import _mean_akg +from .minimum import _minimum_akg +from .mul import _mul_akg +from .neg import _neg_akg +from .one_hot import _one_hot_akg +from .pow import _power_akg +from .real_div import _real_div_akg +from .reciprocal import _reciprocal_akg +from .reduce_max import _reduce_max_akg +from .reduce_mean import _reduce_mean_akg +from .reduce_sum import _reduce_sum_akg +from .relu_grad import _relu_grad_akg +from .relu import _relu_akg +from .reshape import _reshape_akg +from .round import _round_akg +from .rsqrt import _rsqrt_akg +from .select import _select_akg +from .softmax import _softmax_akg +from .sparse_softmax_cross_entropy_with_logits import _sparse_softmax_cross_entropy_with_logits_akg +from .sqrt import _sqrt_akg +from .strided_slice import _strided_slice_akg +from .sub import _sub_akg +from .sum import _sum_akg +from .tile import _tile_akg +from .zeros_like import _zeros_like_akg +from .argmax import _argmax_akg +from .floordiv import _floor_div_akg +from .equal import _equal_akg +from .greater_equal import _greater_equal_akg +from .less_equal import _less_equal_akg +from .expand_dims import _expand_dims_akg +from .greater import _greater_akg +from .equiv_format import _equiv_format_akg +from . import gpu diff --git a/mindspore/ops/_op_impl/akg/abs.py b/mindspore/ops/_op_impl/akg/abs.py new file mode 100644 index 0000000000..8c08f405da --- /dev/null +++ b/mindspore/ops/_op_impl/akg/abs.py @@ -0,0 +1,58 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Abs op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Abs", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32", + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32", + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "output" + } + ] +}""") +def _abs_akg(): + """Abs AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/add.py b/mindspore/ops/_op_impl/akg/add.py new file mode 100644 index 0000000000..60544ea1c7 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/add.py @@ -0,0 +1,72 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""TensorAdd op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "TensorAdd", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "int32", "float16", "int32", "float32", "float32", + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0", + "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float16", "int32", "float16", "int32", "float32", "float32", + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0", + "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "int32", "float16", "int32", "float32", "float32", + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0", + "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "output" + } + ] +}""") +def _add_akg(): + """TensorAdd AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/add_n.py b/mindspore/ops/_op_impl/akg/add_n.py new file mode 100644 index 0000000000..53320f752e --- /dev/null +++ b/mindspore/ops/_op_impl/akg/add_n.py @@ -0,0 +1,58 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""AddN op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "AddN", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16","float32","float16","float32", "float16", "float32", + "float16","float32" + ], + "format": [ + "DefaultFormat","DefaultFormat","NC1HWC0","NC1HWC0", "FracZ", "FracZ", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "dynamic", + "name": "inputs" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16","float32","float16","float32", "float16", "float32", + "float16","float32" + ], + "format": [ + "DefaultFormat","DefaultFormat","NC1HWC0","NC1HWC0", "FracZ", "FracZ", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "output" + } + ] +}""") +def _add_n_akg(): + """AddN AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/apply_momentum.py b/mindspore/ops/_op_impl/akg/apply_momentum.py new file mode 100644 index 0000000000..7160571882 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/apply_momentum.py @@ -0,0 +1,103 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""ApplyMomentum op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "ApplyMomentum", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + { + "name": "use_nesterov", + "param_type": "optional", + "type": "bool" + }, + { + "name": "gradient_scale", + "param_type": "optional", + "type": "float" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float32","float32","float32" + ], + "format": [ + "DefaultFormat","NC1HWC0","FracZ" + ], + "name": "variable" + }, + { + "index": 1, + "dtype": [ + "float32","float32","float32" + ], + "format": [ + "DefaultFormat","NC1HWC0","FracZ" + ], + "name": "accumulation" + }, + { + "index": 2, + "dtype": [ + "float32","float32","float32" + ], + "format": [ + "DefaultFormat","DefaultFormat","DefaultFormat" + ], + "name": "learning_rate" + }, + { + "index": 3, + "dtype": [ + "float32","float32","float32" + ], + "format": [ + "DefaultFormat","NC1HWC0","FracZ" + ], + "name": "gradient" + }, + { + "index": 4, + "dtype": [ + "float32","float32","float32" + ], + "format": [ + "DefaultFormat","DefaultFormat","DefaultFormat" + ], + "name": "momentum" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float32","float32","float32" + ], + "format": [ + "DefaultFormat","NC1HWC0","FracZ" + ], + "name": "output" + } + ] +}""") +def _apply_momentum_akg(): + """ApplyMomentum AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/argmax.py b/mindspore/ops/_op_impl/akg/argmax.py new file mode 100644 index 0000000000..b04862cbeb --- /dev/null +++ b/mindspore/ops/_op_impl/akg/argmax.py @@ -0,0 +1,58 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Argmax op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Argmax", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + { + "name": "axis", + "param_type": "optional", + "type": "int" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "int32", "int32", "int32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _argmax_akg(): + """Argmax AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/assign.py b/mindspore/ops/_op_impl/akg/assign.py new file mode 100644 index 0000000000..e7c5a082bd --- /dev/null +++ b/mindspore/ops/_op_impl/akg/assign.py @@ -0,0 +1,63 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Assign op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Assign", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ" + ], + "name": "ref" + }, + { + "index": 1, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ" + ], + "name": "value" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ" + ], + "name": "output" + } + ] +}""") +def _assign_akg(): + """Assign AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/assign_add.py b/mindspore/ops/_op_impl/akg/assign_add.py new file mode 100644 index 0000000000..7d0d345764 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/assign_add.py @@ -0,0 +1,64 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""AssignAdd op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "AssignAdd", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "ref" + }, + { + "index": 1, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "value" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _assign_add_akg(): + """AssignAdd AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/bias_add.py b/mindspore/ops/_op_impl/akg/bias_add.py new file mode 100644 index 0000000000..74f2bf7bcf --- /dev/null +++ b/mindspore/ops/_op_impl/akg/bias_add.py @@ -0,0 +1,68 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""BiasAdd op""" + +from mindspore.ops.op_info_register import op_info_register + +@op_info_register("""{ + "op_name": "BiasAdd", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + { + "name": "data_format", + "param_type": "optional", + "type": "listStr" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16","float32","float16","float32","float16","float32" + ], + "format": [ + "NHWC","NHWC","NC1HWC0","NC1HWC0","DefaultFormat","DefaultFormat" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float16","float32","float16","float32","float16","float32" + ], + "format": [ + "NHWC","NHWC","NC1HWC0","NC1HWC0","DefaultFormat","DefaultFormat" + ], + "name": "b" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16","float32","float16","float32","float16","float32" + ], + "format": [ + "DefaultFormat","DefaultFormat","NC1HWC0","NC1HWC0","DefaultFormat","DefaultFormat" + ], + "name": "output" + } + ] +}""") +def _bias_add_akg(): + """BiasAddGrad AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/bias_add_grad.py b/mindspore/ops/_op_impl/akg/bias_add_grad.py new file mode 100644 index 0000000000..7726af6692 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/bias_add_grad.py @@ -0,0 +1,58 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""BiasAddGrad op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "BiasAddGrad", + "imply_type": "AutoDiff", + "fusion_type": "COMMREDUCE", + "attr": [ + { + "name": "data_format", + "param_type": "optional", + "type": "listStr" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16","float32","float16","float32","float16","float32" + ], + "format": [ + "NHWC","NHWC","NC1HWC0","NC1HWC0","DefaultFormat","DefaultFormat" + ], + "name": "dout" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16","float32","float16","float32","float16","float32" + ], + "format": [ + "DefaultFormat","DefaultFormat","NC1HWC0","NC1HWC0","DefaultFormat","DefaultFormat" + ], + "name": "output" + } + ] +}""") +def _bias_add_grad_akg(): + """BiasAddGrad AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/cast.py b/mindspore/ops/_op_impl/akg/cast.py new file mode 100644 index 0000000000..a78d4d87e4 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/cast.py @@ -0,0 +1,74 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Cast op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Cast", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + { + "name": "dst_type", + "param_type": "required", + "type": "str" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "bool", "bool", + "float16", "float32", "int32", "int32", + "bool", + "float16", "float32", "bool", "bool", + "float16", "float32", "bool", "bool" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", + "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", + "DefaultFormat", + "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float32", "float16", "int32", "float16", + "int32", "int32", "float16", "float32", + "float32", + "float32", "float16", "int32", "float32", + "float32", "float16", "int32", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", + "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", + "DefaultFormat", + "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "output" + } + ] +}""") +def _cast_akg(): + """Cast AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/clear_zero.py b/mindspore/ops/_op_impl/akg/clear_zero.py new file mode 100644 index 0000000000..38bf35044f --- /dev/null +++ b/mindspore/ops/_op_impl/akg/clear_zero.py @@ -0,0 +1,64 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""ClearZero op""" + +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "ClearZero", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + { + "name": "pad_mod", + "param_type": "optional", + "type": "string" + }, + { + "name": "window", + "param_type": "optional", + "type": "int" + }, + { + "name": "pad", + "param_type": "optional", + "type": "int" + }, + { + "name": "stride", + "param_type": "optional", + "type": "int" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + ] +}""") +def _clear_zero_akg(): + """MaxPoolGradWithArgmax AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/conv2d.py b/mindspore/ops/_op_impl/akg/conv2d.py new file mode 100644 index 0000000000..709aca7001 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/conv2d.py @@ -0,0 +1,88 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Conv2D op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Conv2D", + "imply_type": "AutoDiff", + "fusion_type": "CONVLUTION", + "attr": [ + { + "name": "x_shape", + "param_type": "required", + "type": "listInt" + }, + { + "name": "w_shape", + "param_type": "required", + "type": "listInt" + }, + { + "name": "pad_list", + "param_type": "required", + "type": "listInt" + }, + { + "name": "stride", + "param_type": "optional", + "type": "int" + }, + { + "name": "dilation", + "param_type": "optional", + "type": "int" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16" + ], + "format": [ + "NC1HWC0" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float16" + ], + "format": [ + "FracZ" + ], + "name": "w" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16" + ], + "format": [ + "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _conv2d_akg(): + """Conv2D AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/conv2d_backprop_filter.py b/mindspore/ops/_op_impl/akg/conv2d_backprop_filter.py new file mode 100644 index 0000000000..1e4e4f1a1e --- /dev/null +++ b/mindspore/ops/_op_impl/akg/conv2d_backprop_filter.py @@ -0,0 +1,88 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Conv2DBackpropFilter op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Conv2DBackpropFilter", + "imply_type": "AutoDiff", + "fusion_type": "CONVLUTION", + "attr": [ + { + "name": "input_shape", + "param_type": "required", + "type": "listInt" + }, + { + "name": "filter_sizes", + "param_type": "required", + "type": "listInt" + }, + { + "name": "stride", + "param_type": "optional", + "type": "int" + }, + { + "name": "pad_list", + "param_type": "required", + "type": "listInt" + }, + { + "name": "dilation", + "param_type": "optional", + "type": "int" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16" + ], + "format": [ + "NC1HWC0" + ], + "name": "out_backprop" + }, + { + "index": 1, + "dtype": [ + "float16" + ], + "format": [ + "NC1HWC0" + ], + "name": "input" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float32" + ], + "format": [ + "FracZ" + ], + "name": "output" + } + ] +}""") +def _conv2d_backprop_filter_akg(): + """Conv2DBackpropFilter AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/conv2d_backprop_input.py b/mindspore/ops/_op_impl/akg/conv2d_backprop_input.py new file mode 100644 index 0000000000..52c7f2e7b3 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/conv2d_backprop_input.py @@ -0,0 +1,88 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Conv2DBackpropInput op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Conv2DBackpropInput", + "imply_type": "AutoDiff", + "fusion_type": "CONVLUTION", + "attr": [ + { + "name": "input_sizes", + "param_type": "required", + "type": "listInt" + }, + { + "name": "filter_shape", + "param_type": "required", + "type": "listInt" + }, + { + "name": "stride", + "param_type": "optional", + "type": "int" + }, + { + "name": "pad_list", + "param_type": "required", + "type": "listInt" + }, + { + "name": "dilation", + "param_type": "optional", + "type": "int" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16" + ], + "format": [ + "NC1HWC0" + ], + "name": "out_backprop" + }, + { + "index": 1, + "dtype": [ + "float16" + ], + "format": [ + "FracZ" + ], + "name": "filter" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16" + ], + "format": [ + "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _conv2d_backprop_input_akg(): + """Conv2DBackpropInput AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/conv_bn1.py b/mindspore/ops/_op_impl/akg/conv_bn1.py new file mode 100644 index 0000000000..118c94e6fc --- /dev/null +++ b/mindspore/ops/_op_impl/akg/conv_bn1.py @@ -0,0 +1,108 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""ConvBN1 op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "ConvBN1", + "imply_type": "AutoDiff", + "fusion_type": "CONVLUTION", + "attr": [ + { + "name": "x_shape", + "param_type": "required", + "type": "listInt" + }, + { + "name": "w_shape", + "param_type": "required", + "type": "listInt" + }, + { + "name": "pad_list", + "param_type": "required", + "type": "listInt" + }, + { + "name": "stride", + "param_type": "optional", + "type": "int" + }, + { + "name": "dilation", + "param_type": "optional", + "type": "int" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16" + ], + "format": [ + "NC1HWC0" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float16" + ], + "format": [ + "FracZ" + ], + "name": "w" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16" + ], + "format": [ + "NC1HWC0" + ], + "name": "conv_res_16" + }, + { + "index": 1, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "var_part" + }, + { + "index": 2, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "mean" + } + ] +}""") +def _conv_bn1_akg(): + """ConvBN1 AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/div.py b/mindspore/ops/_op_impl/akg/div.py new file mode 100644 index 0000000000..56cdcca868 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/div.py @@ -0,0 +1,64 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Div op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Div", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _div_akg(): + """Div AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/equal.py b/mindspore/ops/_op_impl/akg/equal.py new file mode 100644 index 0000000000..35874c62bb --- /dev/null +++ b/mindspore/ops/_op_impl/akg/equal.py @@ -0,0 +1,64 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Equal op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Equal", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "bool", "bool", "bool", "bool", "bool", "bool" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _equal_akg(): + """Equal AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/equal_count.py b/mindspore/ops/_op_impl/akg/equal_count.py new file mode 100644 index 0000000000..9c575db7b3 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/equal_count.py @@ -0,0 +1,64 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""EqualCount op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "EqualCount", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "int32" + ], + "format": [ + "DefaultFormat" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "int32" + ], + "format": [ + "DefaultFormat" + ], + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "int32" + ], + "format": [ + "DefaultFormat" + ], + "name": "output" + } + ] +}""") +def _equal_count_akg(): + """EqualCount AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/equiv_format.py b/mindspore/ops/_op_impl/akg/equiv_format.py new file mode 100644 index 0000000000..111451b15c --- /dev/null +++ b/mindspore/ops/_op_impl/akg/equiv_format.py @@ -0,0 +1,54 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""EquivFormat op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "EquivFormat", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "FRACTAL_NZ", "FRACTAL_NZ", "DefaultFormat", "DefaultFormat" + ], + "name": "output" + } + ] +}""") +def _equiv_format_akg(): + """EquivFormat AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/exp.py b/mindspore/ops/_op_impl/akg/exp.py new file mode 100644 index 0000000000..273b3348a4 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/exp.py @@ -0,0 +1,59 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Exp op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Exp", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32", + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32", + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "output" + } + ] +}""") +def _exp_akg(): + """Exp AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/expand_dims.py b/mindspore/ops/_op_impl/akg/expand_dims.py new file mode 100644 index 0000000000..9e1b18153a --- /dev/null +++ b/mindspore/ops/_op_impl/akg/expand_dims.py @@ -0,0 +1,58 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""ExpandDims op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "ExpandDims", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + { + "name": "axis", + "param_type": "required", + "type": "int" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat" + ], + "name": "y" + } + ] +}""") +def _expand_dims_akg(): + """ExpandDims AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/five2four.py b/mindspore/ops/_op_impl/akg/five2four.py new file mode 100644 index 0000000000..1dac2c3628 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/five2four.py @@ -0,0 +1,68 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Five2Four op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Five2Four", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + { + "name": "shape4d", + "param_type": "required", + "type": "listInt" + }, + { + "name": "dstType", + "param_type": "required", + "type": "str" + }, + { + "name": "output_format", + "param_type": "required", + "type": "str" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16","float16","float16","float32","float16","float32" + ], + "format": [ + "NC1HWC0","NC1HWC0","NC1HWC0","NC1HWC0","NC1HWC0","NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16","float16","float32","float32","float32","float32" + ], + "format": [ + "DefaultFormat","NHWC","DefaultFormat","DefaultFormat","NHWC","NHWC" + ], + "name": "output" + } + ] +}""") +def _five2four_akg(): + """Five2Four AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/floordiv.py b/mindspore/ops/_op_impl/akg/floordiv.py new file mode 100644 index 0000000000..99e577b4be --- /dev/null +++ b/mindspore/ops/_op_impl/akg/floordiv.py @@ -0,0 +1,64 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""FloorDiv op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "FloorDiv", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "int32", "int32", "int32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _floor_div_akg(): + """FloorDiv AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/four2five.py b/mindspore/ops/_op_impl/akg/four2five.py new file mode 100644 index 0000000000..01b6f85715 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/four2five.py @@ -0,0 +1,63 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Four2Five op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Four2Five", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + { + "name": "data_format", + "param_type": "optional", + "type": "listStr" + }, + { + "name": "dst_type", + "param_type": "required", + "type": "str" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float32", "float16","float32", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NHWC", "NHWC", "NHWC" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float16", "float32", "float16", "float16", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _four2five_akg(): + """Four2Five AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/fused_batch_norm.py b/mindspore/ops/_op_impl/akg/fused_batch_norm.py new file mode 100644 index 0000000000..5ce9839328 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/fused_batch_norm.py @@ -0,0 +1,149 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""FusedBatchNorm op""" + +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "FusedBatchNorm", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + { + "name": "momentum", + "param_type": "optional", + "type": "float" + }, + { + "name": "epsilon", + "param_type": "optional", + "type": "float" + }, + { + "name": "data_format", + "param_type": "optional", + "type": "listStr" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "scale" + }, + { + "index": 2, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "b" + }, + { + "index": 3, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "mean" + }, + { + "index": 4, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "variance" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "y" + }, + { + "index": 1, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "running_mean" + }, + { + "index": 2, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "running_variance" + }, + { + "index": 3, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "save_mean" + }, + { + "index": 4, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "save_inv_variance" + } + ] +}""") +def _fused_batch_norm_akg(): + """FusedBatchNorm AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/fused_batch_norm_grad.py b/mindspore/ops/_op_impl/akg/fused_batch_norm_grad.py new file mode 100644 index 0000000000..9191548f73 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/fused_batch_norm_grad.py @@ -0,0 +1,119 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""FusedBatchNormGrad op""" + +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "FusedBatchNormGrad", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + { + "name": "data_format", + "param_type": "optional", + "type": "listStr" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "dy" + }, + { + "index": 1, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "x" + }, + { + "index": 2, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "scale" + }, + { + "index": 3, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "save_mean" + }, + { + "index": 4, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "save_inv_variance" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "dx" + }, + { + "index": 1, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "bn_scale" + }, + { + "index": 2, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "bn_bias" + } + ] +}""") +def _fused_batch_norm_grad_akg(): + """BiasAddGrad AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/fused_batch_norm_infer.py b/mindspore/ops/_op_impl/akg/fused_batch_norm_infer.py new file mode 100644 index 0000000000..1e7743fa8f --- /dev/null +++ b/mindspore/ops/_op_impl/akg/fused_batch_norm_infer.py @@ -0,0 +1,109 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""FusedBatchNormInfer op""" + +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "FusedBatchNormInfer", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + { + "name": "momentum", + "param_type": "optional", + "type": "float" + }, + { + "name": "epsilon", + "param_type": "optional", + "type": "float" + }, + { + "name": "data_format", + "param_type": "optional", + "type": "listStr" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "scale" + }, + { + "index": 2, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "b" + }, + { + "index": 3, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "mean" + }, + { + "index": 4, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "variance" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "y" + } + ] +}""") +def _fused_batch_norm_infer_akg(): + """FusedBatchNormInfer AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/fused_bn1.py b/mindspore/ops/_op_impl/akg/fused_bn1.py new file mode 100644 index 0000000000..fdaa673f25 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/fused_bn1.py @@ -0,0 +1,64 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""FusedBN1 op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "FusedBN1", + "imply_type": "AutoDiff", + "fusion_type": "COMMREDUCE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "data" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float32", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "output" + }, + { + "index": 1, + "dtype": [ + "float32", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _fused_bn1_akg(): + """FusedBN1 AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/fused_bn1_grad.py b/mindspore/ops/_op_impl/akg/fused_bn1_grad.py new file mode 100644 index 0000000000..8de6796d6f --- /dev/null +++ b/mindspore/ops/_op_impl/akg/fused_bn1_grad.py @@ -0,0 +1,93 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""BNGrad1 op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "BNGrad1", + "imply_type": "AutoDiff", + "fusion_type": "COMMREDUCE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "dy" + }, + { + "index": 1, + "dtype": [ + "float16", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "data" + },{ + "index": 2, + "dtype": [ + "float32", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "mean" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float32", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "output" + }, + { + "index": 1, + "dtype": [ + "float32", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "output" + }, + { + "index": 2, + "dtype": [ + "float32", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _bn1_grad_akg(): + """BNGrad1 AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/fused_bn2.py b/mindspore/ops/_op_impl/akg/fused_bn2.py new file mode 100644 index 0000000000..e26a5ad8a0 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/fused_bn2.py @@ -0,0 +1,108 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""FusedBN2 op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "FusedBN2", + "imply_type": "AutoDiff", + "fusion_type": "COMMREDUCE", + "attr": [ + { + "name": "momentum", + "param_type": "optional", + "type": "float" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "mean" + }, + { + "index": 1, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "var_part" + }, + { + "index": 2, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "running_mean" + }, + { + "index": 3, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "running_var" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "output" + }, + { + "index": 1, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "output" + }, + { + "index": 2, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _fused_bn2_akg(): + """FusedBN2 AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/fused_bn2_grad.py b/mindspore/ops/_op_impl/akg/fused_bn2_grad.py new file mode 100644 index 0000000000..e29a9177b6 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/fused_bn2_grad.py @@ -0,0 +1,132 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""BNGrad1 op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "BNGrad2", + "imply_type": "AutoDiff", + "fusion_type": "COMMREDUCE", + "attr": [ + { + "name": "eps", + "param_type": "optional", + "type": "float" + }, + { + "name": "data_shape", + "param_type": "optional", + "type": "listInt" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "dgamma_red_hw" + }, + { + "index": 1, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "dbeta_red_hw" + },{ + "index": 2, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "variance" + }, + { + "index": 3, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "gamma" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "output" + }, + { + "index": 1, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "output" + }, + { + "index": 2, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "output" + }, + { + "index": 3, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "output" + }, + { + "index": 4, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _bn2_grad_akg(): + """BNGrad2 AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/fused_bn3.py b/mindspore/ops/_op_impl/akg/fused_bn3.py new file mode 100644 index 0000000000..74f3f652f3 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/fused_bn3.py @@ -0,0 +1,95 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""FusedBN3 op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "FusedBN3", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + { + "name": "eps", + "param_type": "optional", + "type": "float" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16" + ], + "format": [ + "NC1HWC0" + ], + "name": "data" + }, + { + "index": 1, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "mean" + },{ + "index": 2, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "variance" + },{ + "index": 3, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "gamma" + },{ + "index": 4, + "dtype": [ + "float32" + ], + "format": [ + "NC1HWC0" + ], + "name": "beta" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16" + ], + "format": [ + "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _fused_bn3_akg(): + """FusedBN3 AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/fused_bn3_grad.py b/mindspore/ops/_op_impl/akg/fused_bn3_grad.py new file mode 100644 index 0000000000..5ffc57a68e --- /dev/null +++ b/mindspore/ops/_op_impl/akg/fused_bn3_grad.py @@ -0,0 +1,93 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""BNGrad3 op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "BNGrad3", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "dy" + }, + { + "index": 1, + "dtype": [ + "float32", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "rs" + },{ + "index": 2, + "dtype": [ + "float32", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "dgamma_dx" + }, + { + "index": 3, + "dtype": [ + "float32", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "dbeta_dx" + }, + { + "index": 4, + "dtype": [ + "float32", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "data_minus_mean" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _bn3_grad_akg(): + """BNGrad3 AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/gather_v2.py b/mindspore/ops/_op_impl/akg/gather_v2.py new file mode 100644 index 0000000000..84ab7eb669 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/gather_v2.py @@ -0,0 +1,68 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""GatherV2 op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "GatherV2", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + { + "name": "axis", + "param_type": "optional", + "type": "int" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat" + ], + "name": "params" + }, + { + "index": 1, + "dtype": [ + "int32", "int32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat" + ], + "name": "indices" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat" + ], + "name": "output" + } + ] +}""") +def _gather_v2_akg(): + """GatherV2 AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/gpu/__init__.py b/mindspore/ops/_op_impl/akg/gpu/__init__.py index 08beb44340..7af6949104 100644 --- a/mindspore/ops/_op_impl/akg/gpu/__init__.py +++ b/mindspore/ops/_op_impl/akg/gpu/__init__.py @@ -32,3 +32,5 @@ from .logical_and import _logical_and_akg from .logical_not import _logical_not_akg from .logical_or import _logical_or_akg from .lessequal import _lessequal_akg +from .notequal import _notequal_akg +from .greater_equal import _greater_equal_akg diff --git a/mindspore/ops/_op_impl/akg/gpu/greater_equal.py b/mindspore/ops/_op_impl/akg/gpu/greater_equal.py new file mode 100644 index 0000000000..b000cbd0e3 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/gpu/greater_equal.py @@ -0,0 +1,32 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""GreaterEqual op""" +from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType + +greater_equal_op_info = AkgRegOp("GreaterEqual") \ + .fusion_type("OPAQUE") \ + .input(0, "x") \ + .input(1, "y") \ + .output(0, "output") \ + .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.BOOL_Default) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.BOOL_Default) \ + .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.BOOL_Default) \ + .get_op_info() + + +@op_info_register(greater_equal_op_info) +def _greater_equal_akg(): + """GreaterEqual register""" + return diff --git a/mindspore/ops/_op_impl/akg/gpu/lessequal.py b/mindspore/ops/_op_impl/akg/gpu/lessequal.py index a3e4d4dc35..a8babf7ae4 100644 --- a/mindspore/ops/_op_impl/akg/gpu/lessequal.py +++ b/mindspore/ops/_op_impl/akg/gpu/lessequal.py @@ -15,7 +15,7 @@ """LessEqual op""" from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType -equal_op_info = AkgRegOp("LessEqual") \ +lessequal_op_info = AkgRegOp("LessEqual") \ .fusion_type("OPAQUE") \ .input(0, "x") \ .input(1, "y") \ @@ -26,7 +26,7 @@ equal_op_info = AkgRegOp("LessEqual") \ .get_op_info() -@op_info_register(equal_op_info) +@op_info_register(lessequal_op_info) def _lessequal_akg(): """LessEqual register""" return diff --git a/mindspore/ops/_op_impl/akg/gpu/notequal.py b/mindspore/ops/_op_impl/akg/gpu/notequal.py new file mode 100644 index 0000000000..dc13449fc1 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/gpu/notequal.py @@ -0,0 +1,32 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""NotEqual op""" +from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType + +notequal_op_info = AkgRegOp("NotEqual") \ + .fusion_type("OPAQUE") \ + .input(0, "x") \ + .input(1, "y") \ + .output(0, "output") \ + .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.BOOL_Default) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.BOOL_Default) \ + .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.BOOL_Default) \ + .get_op_info() + + +@op_info_register(notequal_op_info) +def _notequal_akg(): + """NotEqual AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/greater.py b/mindspore/ops/_op_impl/akg/greater.py new file mode 100644 index 0000000000..941946163a --- /dev/null +++ b/mindspore/ops/_op_impl/akg/greater.py @@ -0,0 +1,64 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Greater op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Greater", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float16", "float32", "float32" + ], + "format": [ + "DefaultFormat", "NC1HWC0", "DefaultFormat", "NC1HWC0" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float16", "float16", "float32", "float32" + ], + "format": [ + "DefaultFormat", "NC1HWC0", "DefaultFormat", "NC1HWC0" + ], + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "bool", "bool", "bool", "bool" + ], + "format": [ + "DefaultFormat", "NC1HWC0", "DefaultFormat", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _greater_akg(): + """Greater AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/greater_equal.py b/mindspore/ops/_op_impl/akg/greater_equal.py new file mode 100644 index 0000000000..11642baa86 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/greater_equal.py @@ -0,0 +1,64 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""GreaterEqual op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "GreaterEqual", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "bool", "bool", "bool", "bool", "bool", "bool" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _greater_equal_akg(): + """Equal AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/inplace_assign.py b/mindspore/ops/_op_impl/akg/inplace_assign.py new file mode 100644 index 0000000000..1cc40abe9b --- /dev/null +++ b/mindspore/ops/_op_impl/akg/inplace_assign.py @@ -0,0 +1,78 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""InplaceAssign op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "InplaceAssign", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + { + "name": "fake_output", + "param_type": "optional", + "type": "bool" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ" + ], + "name": "y" + }, + { + "index": 2, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ" + ], + "name": "z" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ" + ], + "name": "output" + } + ] +}""") +def _inplace_assign_akg(): + """InplaceAssign AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/less.py b/mindspore/ops/_op_impl/akg/less.py new file mode 100644 index 0000000000..499ed2e8fc --- /dev/null +++ b/mindspore/ops/_op_impl/akg/less.py @@ -0,0 +1,64 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Less op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Less", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float16" + ], + "format": [ + "DefaultFormat", "NC1HWC0" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float16", "float16" + ], + "format": [ + "DefaultFormat", "NC1HWC0" + ], + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "bool", "bool" + ], + "format": [ + "DefaultFormat", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _less_akg(): + """Less AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/less_equal.py b/mindspore/ops/_op_impl/akg/less_equal.py new file mode 100644 index 0000000000..97fbdec090 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/less_equal.py @@ -0,0 +1,64 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""LessEqual op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "LessEqual", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "bool", "bool", "bool", "bool", "bool", "bool" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _less_equal_akg(): + """Equal AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/log.py b/mindspore/ops/_op_impl/akg/log.py new file mode 100644 index 0000000000..526538d17d --- /dev/null +++ b/mindspore/ops/_op_impl/akg/log.py @@ -0,0 +1,55 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Log op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Log", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "output" + } + ] +}""") +def _log_akg(): + """Log AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/matmul.py b/mindspore/ops/_op_impl/akg/matmul.py new file mode 100644 index 0000000000..084ba754fa --- /dev/null +++ b/mindspore/ops/_op_impl/akg/matmul.py @@ -0,0 +1,73 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""MatMul op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "MatMul", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + { + "name": "transpose_a", + "param_type": "optional", + "type": "bool" + }, + { + "name": "transpose_b", + "param_type": "optional", + "type": "bool" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat" + ], + "name": "x1" + }, + { + "index": 1, + "dtype": [ + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat" + ], + "name": "x2" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat" + ], + "name": "output" + } + ] +}""") +def _matmul_akg(): + """MatMul AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/max.py b/mindspore/ops/_op_impl/akg/max.py new file mode 100644 index 0000000000..21fd4ef9c4 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/max.py @@ -0,0 +1,63 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Max op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Max", + "imply_type": "AutoDiff", + "fusion_type": "COMMREDUCE", + "attr": [ + { + "name": "axis", + "param_type": "required", + "type": "listInt" + }, + { + "name": "keep_dims", + "param_type": "required", + "type": "bool" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _max_akg(): + """Max AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/max_pool_grad_with_argmax.py b/mindspore/ops/_op_impl/akg/max_pool_grad_with_argmax.py new file mode 100644 index 0000000000..4adad3eb88 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/max_pool_grad_with_argmax.py @@ -0,0 +1,93 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""MaxPoolGradWithArgmax op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "MaxPoolGradWithArgmax", + "imply_type": "AutoDiff", + "fusion_type": "CONVLUTION", + "attr": [ + { + "name": "pad_mode", + "param_type": "optional", + "type": "str" + }, + { + "name": "window", + "param_type": "optional", + "type": "int" + }, + { + "name": "pad", + "param_type": "optional", + "type": "int" + }, + { + "name": "stride", + "param_type": "optional", + "type": "int" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float16" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat" + ], + "name": "argmax" + }, + { + "index": 2, + "dtype": [ + "float16", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "grad" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32" + ], + "format": [ + "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _max_pool_grad_with_argmax_akg(): + """MaxPoolGradWithArgmax AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/max_pool_with_argmax.py b/mindspore/ops/_op_impl/akg/max_pool_with_argmax.py new file mode 100644 index 0000000000..3ae36d4793 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/max_pool_with_argmax.py @@ -0,0 +1,83 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""MaxPoolWithArgmax op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "MaxPoolWithArgmax", + "imply_type": "AutoDiff", + "fusion_type": "CONVLUTION", + "attr": [ + { + "name": "pad_mode", + "param_type": "optional", + "type": "str" + }, + { + "name": "window", + "param_type": "optional", + "type": "int" + }, + { + "name": "pad", + "param_type": "optional", + "type": "int" + }, + { + "name": "stride", + "param_type": "optional", + "type": "int" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16" + ], + "format": [ + "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16" + ], + "format": [ + "NC1HWC0" + ], + "name": "output" + }, + { + "index": 1, + "dtype": [ + "float16" + ], + "format": [ + "DefaultFormat" + ], + "name": "argmax" + } + ] +}""") +def _max_pool_with_argmax_akg(): + """MaxPoolWithArgmax AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/maximum.py b/mindspore/ops/_op_impl/akg/maximum.py new file mode 100644 index 0000000000..8d8de5270a --- /dev/null +++ b/mindspore/ops/_op_impl/akg/maximum.py @@ -0,0 +1,64 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Maximum op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Maximum", + "imply_type": "AutoDiff", + "fusion_type": "COMMREDUCE", + "attr": [], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "param_type": "required", + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "param_type": "required", + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _maximum_akg(): + """Maximum AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/mean.py b/mindspore/ops/_op_impl/akg/mean.py new file mode 100644 index 0000000000..0b49e76865 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/mean.py @@ -0,0 +1,54 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""SimpleMean op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "SimpleMean", + "imply_type": "AutoDiff", + "fusion_type": "COMMREDUCE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _mean_akg(): + """SimpleMean AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/mean_grad.py b/mindspore/ops/_op_impl/akg/mean_grad.py new file mode 100644 index 0000000000..3b8379d1f0 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/mean_grad.py @@ -0,0 +1,58 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""SimpleMeanGrad op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "SimpleMeanGrad", + "imply_type": "AutoDiff", + "fusion_type": "COMMREDUCE", + "attr": [ + { + "name": "input_shape", + "param_type": "required", + "type": "listInt" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "HEAD" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _mean_grad_akg(): + """SimpleMeanGrad AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/minimum.py b/mindspore/ops/_op_impl/akg/minimum.py new file mode 100644 index 0000000000..759df2085f --- /dev/null +++ b/mindspore/ops/_op_impl/akg/minimum.py @@ -0,0 +1,70 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Minimum op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Minimum", + "imply_type": "AutoDiff", + "fusion_type": "COMMREDUCE", + "attr": [], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32", + "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32", + "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32", + "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "output" + } + ] +}""") +def _minimum_akg(): + """Minimum AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/mul.py b/mindspore/ops/_op_impl/akg/mul.py new file mode 100644 index 0000000000..ab02c2d89e --- /dev/null +++ b/mindspore/ops/_op_impl/akg/mul.py @@ -0,0 +1,86 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Mul op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Mul", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + { + "name": "x_shape", + "param_type": "required", + "type": "listInt" + }, + { + "name": "y_shape", + "param_type": "required", + "type": "listInt" + }, + { + "name": "data_format", + "param_type": "required", + "type": "listStr" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32", "float16", "float32", + "float16", "float32" + ], + "format": [ + "FracZ", "FracZ", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float16", "float32", "float16", "float32", "float16", "float32", + "float16", "float32" + ], + "format": [ + "FracZ", "FracZ", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32", "float16", "float32", + "float16", "float32" + ], + "format": [ + "FracZ", "FracZ", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "output" + } + ] +}""") +def _mul_akg(): + """Mul AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/neg.py b/mindspore/ops/_op_impl/akg/neg.py new file mode 100644 index 0000000000..bc00d60271 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/neg.py @@ -0,0 +1,59 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Neg op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Neg", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32", + "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32", + "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "output" + } + ] +}""") +def _neg_akg(): + """Neg AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/one_hot.py b/mindspore/ops/_op_impl/akg/one_hot.py new file mode 100644 index 0000000000..c5034dbbd4 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/one_hot.py @@ -0,0 +1,83 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""OneHot op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "OneHot", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + { + "name": "depth", + "param_type": "required", + "type": "int" + }, + { + "name": "axis", + "param_type": "required", + "type": "int" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "int32", "int32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat" + ], + "name": "indices" + }, + { + "index": 1, + "dtype": [ + "int32", "float32", "float16" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat" + ], + "name": "on_value" + }, + { + "index": 2, + "dtype": [ + "int32", "float32", "float16" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat" + ], + "name": "off_value" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "int32", "float32", "float16" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat" + ], + "name": "output" + } + ] +}""") +def _one_hot_akg(): + """OneHot AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/pow.py b/mindspore/ops/_op_impl/akg/pow.py new file mode 100644 index 0000000000..d782968c05 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/pow.py @@ -0,0 +1,65 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Pow op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Pow", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "int32", "float16", "int32", "float32", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0" + ], + "param_type": "required", + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float16", "int32", "float16", "int32", "float32", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0" + ], + "param_type": "required", + "name": "power" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "int32", "float16", "int32", "float32", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _power_akg(): + """Pow AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/real_div.py b/mindspore/ops/_op_impl/akg/real_div.py new file mode 100644 index 0000000000..9fa37a24e3 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/real_div.py @@ -0,0 +1,72 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""RealDiv op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "RealDiv", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32", + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "x" + }, + { + "index": 1, + "dtype": [ + "float16", "float32", "float16", "float32", + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32", + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "output" + } + ] +}""") +def _real_div_akg(): + """RealDiv AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/reciprocal.py b/mindspore/ops/_op_impl/akg/reciprocal.py new file mode 100644 index 0000000000..9fd7cc40b4 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/reciprocal.py @@ -0,0 +1,54 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Reciprocal op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Reciprocal", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _reciprocal_akg(): + """Reciprocal AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/reduce_max.py b/mindspore/ops/_op_impl/akg/reduce_max.py new file mode 100644 index 0000000000..b9db8ea83a --- /dev/null +++ b/mindspore/ops/_op_impl/akg/reduce_max.py @@ -0,0 +1,63 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""ReduceMax op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "ReduceMax", + "imply_type": "AutoDiff", + "fusion_type": "COMMREDUCE", + "attr": [ + { + "name": "axis", + "param_type": "required", + "type": "listInt" + }, + { + "name": "keep_dims", + "param_type": "required", + "type": "bool" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float16" + ], + "format": [ + "DefaultFormat", "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float16" + ], + "format": [ + "DefaultFormat", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _reduce_max_akg(): + """ReduceMax AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/reduce_mean.py b/mindspore/ops/_op_impl/akg/reduce_mean.py new file mode 100644 index 0000000000..0a4ffdf221 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/reduce_mean.py @@ -0,0 +1,63 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""ReduceMean op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "ReduceMean", + "imply_type": "AutoDiff", + "fusion_type": "COMMREDUCE", + "attr": [ + { + "name": "axis", + "param_type": "required", + "type": "listInt" + }, + { + "name": "keep_dims", + "param_type": "required", + "type": "bool" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _reduce_mean_akg(): + """ReduceMean AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/reduce_sum.py b/mindspore/ops/_op_impl/akg/reduce_sum.py new file mode 100644 index 0000000000..20d091ac76 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/reduce_sum.py @@ -0,0 +1,73 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""ReduceSum op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "ReduceSum", + "imply_type": "AutoDiff", + "fusion_type": "COMMREDUCE", + "attr": [ + { + "name": "axis", + "param_type": "required", + "type": "listInt" + }, + { + "name": "keep_dims", + "param_type": "required", + "type": "bool" + }, + { + "name": "atomic_add", + "param_type": "optional", + "type": "str" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32", + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32", + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "output" + } + ] +}""") +def _reduce_sum_akg(): + """ReduceSum AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/relu.py b/mindspore/ops/_op_impl/akg/relu.py new file mode 100644 index 0000000000..b32725f885 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/relu.py @@ -0,0 +1,54 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""ReLU op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "ReLU", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _relu_akg(): + """ReLU AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/relu_grad.py b/mindspore/ops/_op_impl/akg/relu_grad.py new file mode 100644 index 0000000000..c785b750fe --- /dev/null +++ b/mindspore/ops/_op_impl/akg/relu_grad.py @@ -0,0 +1,64 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""ReluGrad op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "ReluGrad", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0" + ], + "name": "y_backprop" + }, + { + "index": 1, + "dtype": [ + "float16", "float32", "float16" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _relu_grad_akg(): + """ReluGrad AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/reshape.py b/mindspore/ops/_op_impl/akg/reshape.py new file mode 100644 index 0000000000..d200b66fa2 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/reshape.py @@ -0,0 +1,58 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Reshape op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Reshape", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + { + "name": "shape", + "param_type": "required", + "type": "listInt" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "tensor" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _reshape_akg(): + """Reshape AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/round.py b/mindspore/ops/_op_impl/akg/round.py new file mode 100644 index 0000000000..0625c3ceda --- /dev/null +++ b/mindspore/ops/_op_impl/akg/round.py @@ -0,0 +1,54 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Round op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Round", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _round_akg(): + """Round AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/rsqrt.py b/mindspore/ops/_op_impl/akg/rsqrt.py new file mode 100644 index 0000000000..9264864f91 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/rsqrt.py @@ -0,0 +1,54 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Rsqrt op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Rsqrt", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "param_type": "required", + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _rsqrt_akg(): + """Rsqrt AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/select.py b/mindspore/ops/_op_impl/akg/select.py new file mode 100644 index 0000000000..006c6a5444 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/select.py @@ -0,0 +1,76 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Select op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Select", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "bool", "bool", "bool", "bool", "bool", "bool" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0" + ], + "param_type": "required", + "name": "condition" + }, + { + "index": 1, + "dtype": [ + "float16", "int32", "float16", "int32", "float32", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0" + ], + "param_type": "required", + "name": "x" + }, + { + "index": 2, + "dtype": [ + "float16", "int32", "float16", "int32", "float32", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0" + ], + "param_type": "required", + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "int32", "float16", "int32", "float32", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _select_akg(): + """Select AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/softmax.py b/mindspore/ops/_op_impl/akg/softmax.py new file mode 100644 index 0000000000..a41c2aef36 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/softmax.py @@ -0,0 +1,58 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Softmax op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Softmax", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + { + "name": "axis", + "param_type": "required", + "type": "listInt" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _softmax_akg(): + """Softmax AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/sparse_softmax_cross_entropy_with_logits.py b/mindspore/ops/_op_impl/akg/sparse_softmax_cross_entropy_with_logits.py new file mode 100644 index 0000000000..e9e828f312 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/sparse_softmax_cross_entropy_with_logits.py @@ -0,0 +1,73 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""SparseSoftmaxCrossEntropyWithLogits op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "SparseSoftmaxCrossEntropyWithLogits", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + { + "name": "is_grad", + "param_type": "optional", + "type": "bool" + }, + { + "name": "sens", + "param_type": "optional", + "type": "float" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float32" + ], + "format": [ + "DefaultFormat" + ], + "name": "features" + }, + { + "index": 1, + "dtype": [ + "int32" + ], + "format": [ + "DefaultFormat" + ], + "name": "labels" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float32" + ], + "format": [ + "DefaultFormat" + ], + "name": "output" + } + ] +}""") +def _sparse_softmax_cross_entropy_with_logits_akg(): + """SparseSoftmaxCrossEntropyWithLogits AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/sqrt.py b/mindspore/ops/_op_impl/akg/sqrt.py new file mode 100644 index 0000000000..fcaa84b3d4 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/sqrt.py @@ -0,0 +1,54 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Sqrt op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Sqrt", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "param_type": "required", + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _sqrt_akg(): + """Sqrt AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/strided_slice.py b/mindspore/ops/_op_impl/akg/strided_slice.py new file mode 100644 index 0000000000..bdbd8dfc2f --- /dev/null +++ b/mindspore/ops/_op_impl/akg/strided_slice.py @@ -0,0 +1,93 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""StridedSlice op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "StridedSlice", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + { + "name": "begin", + "param_type": "required", + "type": "listInt" + }, + { + "name": "end", + "param_type": "required", + "type": "listInt" + }, + { + "name": "strides", + "param_type": "required", + "type": "listInt" + }, + { + "name": "begin_mask", + "param_type": "required", + "type": "int" + }, + { + "name": "end_mask", + "param_type": "required", + "type": "int" + }, + { + "name": "ellipsis_mask", + "param_type": "required", + "type": "int" + }, + { + "name": "new_axis_mask", + "param_type": "required", + "type": "int" + }, + { + "name": "shrink_axis_mask", + "param_type": "required", + "type": "int" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _strided_slice_akg(): + """StridedSlice AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/sub.py b/mindspore/ops/_op_impl/akg/sub.py new file mode 100644 index 0000000000..846aa280bb --- /dev/null +++ b/mindspore/ops/_op_impl/akg/sub.py @@ -0,0 +1,72 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Sub op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Sub", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32", + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", + "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "x" + }, + { + "index": 1, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32", + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", + "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "y" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "int32", "float16", "float32", "int32", "float16", "float32", + "int32", "float16", "float32", "int32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", + "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "output" + } + ] +}""") +def _sub_akg(): + """Sub AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/sum.py b/mindspore/ops/_op_impl/akg/sum.py new file mode 100644 index 0000000000..501b387b25 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/sum.py @@ -0,0 +1,68 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Sum op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Sum", + "imply_type": "AutoDiff", + "fusion_type": "COMMREDUCE", + "attr": [ + { + "name": "axis", + "param_type": "required", + "type": "listInt" + }, + { + "name": "keepdims", + "param_type": "required", + "type": "bool" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32", + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "param_type": "required", + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32", + "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", + "FRACTAL_NZ", "FRACTAL_NZ" + ], + "name": "output" + } + ] +}""") +def _sum_akg(): + """Sum AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/tile.py b/mindspore/ops/_op_impl/akg/tile.py new file mode 100644 index 0000000000..bd13978fe7 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/tile.py @@ -0,0 +1,58 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Tile op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "Tile", + "imply_type": "AutoDiff", + "fusion_type": "OPAQUE", + "attr": [ + { + "name": "multiples", + "param_type": "required", + "type": "listInt" + } + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "int32", "float16", "float32", "int32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _tile_akg(): + """Tile AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/akg/zeros_like.py b/mindspore/ops/_op_impl/akg/zeros_like.py new file mode 100644 index 0000000000..a02ece22d7 --- /dev/null +++ b/mindspore/ops/_op_impl/akg/zeros_like.py @@ -0,0 +1,54 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""ZerosLike op""" +from mindspore.ops.op_info_register import op_info_register + + +@op_info_register("""{ + "op_name": "ZerosLike", + "imply_type": "AutoDiff", + "fusion_type": "ELEMWISE", + "attr": [ + + ], + "inputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "x" + } + ], + "outputs": [ + { + "index": 0, + "dtype": [ + "float16", "float32", "float16", "float32" + ], + "format": [ + "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0" + ], + "name": "output" + } + ] +}""") +def _zeros_like_akg(): + """ZerosLike AutoDiff register""" + return diff --git a/mindspore/ops/_op_impl/tbe/__init__.py b/mindspore/ops/_op_impl/tbe/__init__.py index 3d1825b53e..631ec1bf44 100644 --- a/mindspore/ops/_op_impl/tbe/__init__.py +++ b/mindspore/ops/_op_impl/tbe/__init__.py @@ -15,6 +15,8 @@ """tbe ops""" from .abs import _abs_tbe +from .inplace_add import _inplace_add_tbe +from .inplace_sub import _inplace_sub_tbe from .abs_grad import _abs_grad_tbe from .acos import _acos_tbe from .acos_grad import _acos_grad_tbe @@ -24,9 +26,15 @@ from .adam_apply_one_with_decay import _adam_apply_one_with_decay_tbe from .add import _add_tbe from .apply_centered_rms_prop import _apply_centered_rms_prop_tbe from .add_n import _add_n_tbe +from .accumulate_n_v2 import _accumulate_n_v2_tbe from .apply_ftrl import _apply_ftrl_tbe from .apply_momentum import _apply_momentum_tbe from .apply_adam import _apply_adam_tbe +from .apply_ada_max import _apply_ada_max_tbe +from .apply_adadelta import _apply_adadelta_tbe +from .apply_adagrad import _apply_adagrad_tbe +from .apply_adagrad_v2 import _apply_adagrad_v2_tbe +from .approximate_equal import _approximate_equal_tbe from .adam_apply_one import _adam_apply_one_tbe from .assign import _assign_tbe from .assign_add import _assign_add_tbe @@ -83,6 +91,7 @@ from .strided_slice_d import _strided_slice_d_tbe from .strided_slice_grad_d import _strided_slice_grad_d_tbe from .split_d import _split_d_tbe from .exp import _exp_tbe +from .expm1 import _expm1_tbe from .elu import _elu_tbe from .elu_grad import _elu_grad_tbe from .div import _div_tbe @@ -177,6 +186,7 @@ from .space_to_batch import _space_to_batch_tbe from .depth_to_space import _depth_to_space_tbe from .space_to_depth import _space_to_depth_tbe from .floor import _floor_tbe +from .ceil import _ceil_tbe from .log1p import _log1p_tbe from .resize_bilinear import _resize_bilinear_tbe from .resize_bilinear_grad import _resize_bilinear_grad_tbe @@ -193,6 +203,7 @@ from .sgd import _sgd_tbe from .lars_update import _lars_update_tbe from .arg_min import _arg_min_tbe from .bn_training_update_v2 import _bn_training_update_v2_tbe +from .bn_training_update_v3 import _bn_training_update_v3_tbe from .square_sum_all import _square_sum_all_tbe from .pack import _pack_tbe from .unpack import _unpack_tbe @@ -214,9 +225,9 @@ from .bessel_i0e import _bessel_i0e_tbe from .bessel_i1e import _bessel_i1e_tbe from .batch_to_space_nd import _batch_to_space_nd_tbe from .space_to_batch_nd import _space_to_batch_nd_tbe -from .bitwise_and import bitwise_and_op_info -from .bitwise_or import bitwise_or_op_info -from .bitwise_xor import bitwise_xor_op_info +from .bitwise_and import _bitwise_and_tbe +from .bitwise_or import _bitwise_or_tbe +from .bitwise_xor import _bitwise_xor_tbe from .reduce_all import _reduce_all_tbe from .sparse_apply_adagrad import _sparse_apply_adagrad_tbe from .unsorted_segment_min import _unsorted_segment_min_tbe @@ -224,10 +235,35 @@ from .asin import _asin_tbe from .asin_grad import _asin_grad_tbe from .asinh import _asinh_tbe from .asinh_grad import _asinh_grad_tbe +from .div_no_nan import _div_no_nan_tbe from .atan import _atan_tbe from .atan_grad import _atan_grad_tbe from .atanh import _atanh_tbe +from .cosh import _cosh_tbe +from .sinh import _sinh_tbe +from .inv import _inv_tbe +from .inv_grad import _inv_grad_tbe +from .invert import _invert_tbe from .basic_lstm_cell import _basic_lstm_cell_tbe from .basic_lstm_cell_c_state_grad import _basic_lstm_cell_c_state_grad_tbe from .basic_lstm_cell_weight_grad import _basic_lstm_cell_weight_grad_tbe from .basic_lstm_cell_input_grad import _basic_lstm_cell_input_grad_tbe +from .confusion_matrix import _confusion_matrix_tbe +from .broadcast_to import _broadcast_to_tbe +from .strided_read import _strided_read_tbe +from .strided_write import _strided_write_tbe +from .range import _range_tbe +from .fused_mul_add_n_l2loss import _fused_mul_add_n_l2loss_tbe +from .fused_mul_apply_momentum_extern import _fused_mul_apply_momentum_extern_tbe +from .lamb_next_right import _lamb_next_right_tbe +from .sparse_gather_v2 import _sparse_gather_v2_tbe +from .data_format_dim_map import _data_format_dim_map_tbe +from .histogram_fixed_width import _histogram_fixed_width_tbe +from .tensor_scatter_update import _tensor_scatter_update_tbe +from .inplace_update import _inplace_update_tbe +from .splitv import _split_v_tbe +from .in_top_k import _in_top_k_tbe +from .lin_space import _lin_space_tbe +from .matrix_diag import _matrix_diag_tbe +from .matrix_diag_part import _matrix_diag_part_tbe +from .matrix_set_diag import _matrix_set_diag_tbe diff --git a/mindspore/ops/_op_impl/tbe/abs.py b/mindspore/ops/_op_impl/tbe/abs.py index 30a75812bd..66c1d409fb 100644 --- a/mindspore/ops/_op_impl/tbe/abs.py +++ b/mindspore/ops/_op_impl/tbe/abs.py @@ -26,12 +26,9 @@ abs_op_info = TBERegOp("Abs") \ .op_pattern("formatAgnostic") \ .input(0, "x", None, "required", None) \ .output(0, "y", True, "required", "all") \ - .dtype_format(DataType.F16_Default, DataType.F16_Default) \ - .dtype_format(DataType.F16_5HD, DataType.F16_5HD) \ - .dtype_format(DataType.F32_Default, DataType.F32_Default) \ - .dtype_format(DataType.F32_5HD, DataType.F32_5HD) \ - .dtype_format(DataType.I32_Default, DataType.I32_Default) \ - .dtype_format(DataType.I32_5HD, DataType.I32_5HD) \ + .dtype_format(DataType.F16_None, DataType.F16_None) \ + .dtype_format(DataType.F32_None, DataType.F32_None) \ + .dtype_format(DataType.I32_None, DataType.I32_None) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/abs_grad.py b/mindspore/ops/_op_impl/tbe/abs_grad.py index ba630f6570..3e7ac70d80 100644 --- a/mindspore/ops/_op_impl/tbe/abs_grad.py +++ b/mindspore/ops/_op_impl/tbe/abs_grad.py @@ -23,7 +23,6 @@ abs_grad_op_info = TBERegOp("AbsGrad") \ .compute_cost(10) \ .kernel_name("abs_grad") \ .partial_flag(True) \ - .op_pattern("formatAgnostic") \ .input(0, "y", None, "required", None) \ .input(1, "dy", None, "required", None) \ .output(0, "z", False, "required", "all") \ diff --git a/mindspore/ops/_op_impl/tbe/accumulate_n_v2.py b/mindspore/ops/_op_impl/tbe/accumulate_n_v2.py new file mode 100644 index 0000000000..fdd72a9494 --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/accumulate_n_v2.py @@ -0,0 +1,41 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""AccumulateNV2 op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +accumulate_n_v2_op_info = TBERegOp("AccumulateNV2") \ + .fusion_type("ELEMWISE") \ + .async_flag(False) \ + .binfile_name("accumulate_n_v2.so") \ + .compute_cost(10) \ + .kernel_name("accumulate_n_v2") \ + .partial_flag(True) \ + .attr("n", "required", "int", "all") \ + .input(0, "x", False, "dynamic", "all") \ + .output(0, "y", False, "required", "all") \ + .op_pattern("broadcast") \ + .dtype_format(DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default) \ + .dtype_format(DataType.I32_Default, DataType.I32_Default) \ + .dtype_format(DataType.I8_Default, DataType.I8_Default) \ + .dtype_format(DataType.U8_Default, DataType.U8_Default) \ + .get_op_info() + + +@op_info_register(accumulate_n_v2_op_info) +def _accumulate_n_v2_tbe(): + """AccumulateNV2 TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/acos.py b/mindspore/ops/_op_impl/tbe/acos.py index 94dd8ba2bd..98516f4496 100644 --- a/mindspore/ops/_op_impl/tbe/acos.py +++ b/mindspore/ops/_op_impl/tbe/acos.py @@ -26,7 +26,9 @@ acos_op_info = TBERegOp("ACos") \ .op_pattern("formatAgnostic") \ .input(0, "x", False, "required", "all") \ .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.F16_Default, DataType.F16_Default) \ .dtype_format(DataType.F16_5HD, DataType.F16_5HD) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default) \ .dtype_format(DataType.F32_5HD, DataType.F32_5HD) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/acosh.py b/mindspore/ops/_op_impl/tbe/acosh.py index 6be222f115..0bf8755bc0 100644 --- a/mindspore/ops/_op_impl/tbe/acosh.py +++ b/mindspore/ops/_op_impl/tbe/acosh.py @@ -26,7 +26,9 @@ acosh_op_info = TBERegOp("Acosh") \ .op_pattern("formatAgnostic") \ .input(0, "x", False, "required", "all") \ .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.F16_Default, DataType.F16_Default) \ .dtype_format(DataType.F16_5HD, DataType.F16_5HD) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default) \ .dtype_format(DataType.F32_5HD, DataType.F32_5HD) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/add.py b/mindspore/ops/_op_impl/tbe/add.py index 63e1efb1c6..d3db3de0ad 100644 --- a/mindspore/ops/_op_impl/tbe/add.py +++ b/mindspore/ops/_op_impl/tbe/add.py @@ -26,6 +26,7 @@ add_op_info = TBERegOp("Add") \ .input(0, "x1", False, "required", "all") \ .input(1, "x2", False, "required", "all") \ .output(0, "y", False, "required", "all") \ + .op_pattern("dynamicFormat") \ .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \ .dtype_format(DataType.I32_5HD, DataType.I32_5HD, DataType.I32_5HD) \ .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ diff --git a/mindspore/ops/_op_impl/tbe/add_n.py b/mindspore/ops/_op_impl/tbe/add_n.py index 3e8a6c0016..1c42b4bb2d 100644 --- a/mindspore/ops/_op_impl/tbe/add_n.py +++ b/mindspore/ops/_op_impl/tbe/add_n.py @@ -26,17 +26,10 @@ add_n_op_info = TBERegOp("AddN") \ .attr("n", "required", "int", "all") \ .input(0, "x", False, "dynamic", "all") \ .output(0, "y", False, "required", "all") \ - .dtype_format(DataType.F16_Default, DataType.F16_Default) \ - .dtype_format(DataType.F16_5HD, DataType.F16_5HD) \ - .dtype_format(DataType.F16_FracZ, DataType.F16_FracZ) \ - .dtype_format(DataType.F16_FracNZ, DataType.F16_FracNZ) \ - .dtype_format(DataType.F32_Default, DataType.F32_Default) \ - .dtype_format(DataType.F32_5HD, DataType.F32_5HD) \ - .dtype_format(DataType.F32_FracZ, DataType.F32_FracZ) \ - .dtype_format(DataType.F32_FracNZ, DataType.F32_FracNZ) \ - .dtype_format(DataType.I32_Default, DataType.I32_Default) \ - .dtype_format(DataType.I32_5HD, DataType.I32_5HD) \ - .dtype_format(DataType.I32_FracZ, DataType.I32_FracZ) \ + .op_pattern("broadcast") \ + .dtype_format(DataType.F16_None, DataType.F16_None) \ + .dtype_format(DataType.F32_None, DataType.F32_None) \ + .dtype_format(DataType.I32_None, DataType.I32_None) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/apply_ada_max.py b/mindspore/ops/_op_impl/tbe/apply_ada_max.py new file mode 100644 index 0000000000..8394623bbf --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/apply_ada_max.py @@ -0,0 +1,68 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""ApplyAdaMaxD op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +apply_ada_max_d_op_info = TBERegOp("ApplyAdaMax") \ + .fusion_type("OPAQUE") \ + .async_flag(False) \ + .binfile_name("apply_ada_max_d.so") \ + .compute_cost(10) \ + .kernel_name("apply_ada_max_d") \ + .partial_flag(True) \ + .input(0, "var", False, "required", "all") \ + .input(1, "m", False, "required", "all") \ + .input(2, "v", False, "required", "all") \ + .input(3, "beta1_power", False, "required", "all") \ + .input(4, "lr", False, "required", "all") \ + .input(5, "beta1", False, "required", "all") \ + .input(6, "beta2", False, "required", "all") \ + .input(7, "epsilon", False, "required", "all") \ + .input(8, "grad", False, "required", "all") \ + .output(0, "var", False, "required", "all") \ + .output(1, "m", False, "required", "all") \ + .output(2, "v", False, "required", "all") \ + .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD, DataType.F16_Default, + DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, + DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD) \ + .dtype_format(DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_Default, + DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, + DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_FracZ) \ + .dtype_format(DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_Default, + DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, + DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0) \ + .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, + DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, + DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_Default, + DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, + DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD) \ + .dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_Default, + DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, + DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ) \ + .dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_Default, + DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, + DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, + DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, + DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \ + .get_op_info() + + +@op_info_register(apply_ada_max_d_op_info) +def _apply_ada_max_tbe(): + """ApplyAdaMaxD TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/apply_adadelta.py b/mindspore/ops/_op_impl/tbe/apply_adadelta.py new file mode 100644 index 0000000000..a5c76b62cc --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/apply_adadelta.py @@ -0,0 +1,66 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""ApplyAdadeltaD op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +apply_adadelta_d_op_info = TBERegOp("ApplyAdadelta") \ + .fusion_type("OPAQUE") \ + .async_flag(False) \ + .binfile_name("apply_adadelta_d.so") \ + .compute_cost(10) \ + .kernel_name("apply_adadelta_d") \ + .partial_flag(True) \ + .input(0, "var", False, "required", "all") \ + .input(1, "accum", False, "required", "all") \ + .input(2, "accum_update", False, "required", "all") \ + .input(3, "lr", False, "required", "all") \ + .input(4, "rho", False, "required", "all") \ + .input(5, "epsilon", False, "required", "all") \ + .input(6, "grad", False, "required", "all") \ + .output(0, "var", False, "required", "all") \ + .output(1, "accum", False, "required", "all") \ + .output(2, "accum_update", False, "required", "all") \ + .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD, DataType.F16_Default, + DataType.F16_Default, DataType.F16_Default, DataType.F16_5HD, DataType.F16_5HD, + DataType.F16_5HD, DataType.F16_5HD) \ + .dtype_format(DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_Default, + DataType.F16_Default, DataType.F16_Default, DataType.F16_FracZ, DataType.F16_FracZ, + DataType.F16_FracZ, DataType.F16_FracZ) \ + .dtype_format(DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_Default, + DataType.F16_Default, DataType.F16_Default, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, + DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0) \ + .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, + DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, + DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_Default, + DataType.F32_Default, DataType.F32_Default, DataType.F32_5HD, DataType.F32_5HD, + DataType.F32_5HD, DataType.F32_5HD) \ + .dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_Default, + DataType.F32_Default, DataType.F32_Default, DataType.F32_FracZ, DataType.F32_FracZ, + DataType.F32_FracZ, DataType.F32_FracZ) \ + .dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_Default, + DataType.F32_Default, DataType.F32_Default, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, + DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, + DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, + DataType.F32_Default, DataType.F32_Default) \ + .get_op_info() + + +@op_info_register(apply_adadelta_d_op_info) +def _apply_adadelta_tbe(): + """ApplyAdadeltaD TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/apply_adagrad.py b/mindspore/ops/_op_impl/tbe/apply_adagrad.py new file mode 100644 index 0000000000..6b9975a479 --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/apply_adagrad.py @@ -0,0 +1,55 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""ApplyAdagradD op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +apply_adagrad_d_op_info = TBERegOp("ApplyAdagrad") \ + .fusion_type("OPAQUE") \ + .async_flag(False) \ + .binfile_name("apply_adagrad_d.so") \ + .compute_cost(10) \ + .kernel_name("apply_adagrad_d") \ + .partial_flag(True) \ + .attr("update_slots", "optional", "bool", "true,false", "false") \ + .input(0, "var", False, "required", "all") \ + .input(1, "accum", False, "required", "all") \ + .input(2, "lr", False, "required", "all") \ + .input(3, "grad", False, "required", "all") \ + .output(0, "var", False, "required", "all") \ + .output(1, "accum", False, "required", "all") \ + .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_Default, DataType.F16_5HD, + DataType.F16_5HD, DataType.F16_5HD) \ + .dtype_format(DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_Default, DataType.F16_FracZ, + DataType.F16_FracZ, DataType.F16_FracZ) \ + .dtype_format(DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_Default, DataType.F16_C1HWNCoC0, + DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0) \ + .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, + DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_Default, DataType.F32_5HD, + DataType.F32_5HD, DataType.F32_5HD) \ + .dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_Default, DataType.F32_FracZ, + DataType.F32_FracZ, DataType.F32_FracZ) \ + .dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_Default, DataType.F32_C1HWNCoC0, + DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, + DataType.F32_Default, DataType.F32_Default) \ + .get_op_info() + + +@op_info_register(apply_adagrad_d_op_info) +def _apply_adagrad_tbe(): + """ApplyAdagradD TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/apply_adagrad_v2.py b/mindspore/ops/_op_impl/tbe/apply_adagrad_v2.py new file mode 100644 index 0000000000..fbaf51e643 --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/apply_adagrad_v2.py @@ -0,0 +1,56 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""ApplyAdagradV2D op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +apply_adagrad_v2_d_op_info = TBERegOp("ApplyAdagradV2") \ + .fusion_type("OPAQUE") \ + .async_flag(False) \ + .binfile_name("apply_adagradv2_d.so") \ + .compute_cost(10) \ + .kernel_name("apply_adagradv2_d") \ + .partial_flag(True) \ + .attr("epsilon", "required", "float", "all") \ + .attr("update_slots", "optional", "bool", "true,false", "false") \ + .input(0, "var", False, "required", "all") \ + .input(1, "accum", False, "required", "all") \ + .input(2, "lr", False, "required", "all") \ + .input(3, "grad", False, "required", "all") \ + .output(0, "var", False, "required", "all") \ + .output(1, "accum", False, "required", "all") \ + .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_Default, DataType.F16_5HD, + DataType.F16_5HD, DataType.F16_5HD) \ + .dtype_format(DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_Default, DataType.F16_FracZ, + DataType.F16_FracZ, DataType.F16_FracZ) \ + .dtype_format(DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_Default, DataType.F16_C1HWNCoC0, + DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0) \ + .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, + DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_Default, DataType.F32_5HD, + DataType.F32_5HD, DataType.F32_5HD) \ + .dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_Default, DataType.F32_FracZ, + DataType.F32_FracZ, DataType.F32_FracZ) \ + .dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_Default, DataType.F32_C1HWNCoC0, + DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, + DataType.F32_Default, DataType.F32_Default) \ + .get_op_info() + + +@op_info_register(apply_adagrad_v2_d_op_info) +def _apply_adagrad_v2_tbe(): + """ApplyAdagradV2D TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/apply_ftrl.py b/mindspore/ops/_op_impl/tbe/apply_ftrl.py index e37648191e..56c6bf3612 100644 --- a/mindspore/ops/_op_impl/tbe/apply_ftrl.py +++ b/mindspore/ops/_op_impl/tbe/apply_ftrl.py @@ -32,30 +32,32 @@ apply_ftrl_op_info = TBERegOp("ApplyFtrl") \ .input(6, "l2", False, "required", "all") \ .input(7, "lr_power", False, "required", "all") \ .output(0, "var", False, "required", "all") \ + .output(1, "accum", False, "required", "all") \ + .output(2, "linear", False, "required", "all") \ .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, - DataType.F16_5HD) \ + DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD) \ .dtype_format(DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, - DataType.F16_FracZ) \ + DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_FracZ) \ .dtype_format(DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, - DataType.F16_C1HWNCoC0) \ + DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0) \ .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, - DataType.F16_Default) \ + DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, - DataType.F32_5HD) \ + DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD) \ .dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, - DataType.F32_FracZ) \ + DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ) \ .dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, - DataType.F32_C1HWNCoC0) \ + DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0) \ .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, - DataType.F32_Default) \ + DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/apply_momentum.py b/mindspore/ops/_op_impl/tbe/apply_momentum.py index 42ce9d0e41..deb8f0d387 100644 --- a/mindspore/ops/_op_impl/tbe/apply_momentum.py +++ b/mindspore/ops/_op_impl/tbe/apply_momentum.py @@ -30,22 +30,23 @@ apply_momentum_op_info = TBERegOp("ApplyMomentum") \ .input(3, "grad", False, "required", "all") \ .input(4, "momentum", False, "required", "all") \ .output(0, "var", False, "required", "all") \ + .output(1, "accum", False, "required", "all") \ .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, - DataType.F16_Default, DataType.F16_Default) \ + DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_Default, DataType.F16_5HD, - DataType.F16_Default, DataType.F16_5HD) \ + DataType.F16_Default, DataType.F16_5HD, DataType.F16_5HD) \ .dtype_format(DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_Default, DataType.F16_C1HWNCoC0, - DataType.F16_Default, DataType.F16_C1HWNCoC0) \ + DataType.F16_Default, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0) \ .dtype_format(DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_Default, DataType.F16_FracZ, - DataType.F16_Default, DataType.F16_FracZ) \ + DataType.F16_Default, DataType.F16_FracZ, DataType.F16_FracZ) \ .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, - DataType.F32_Default, DataType.F32_Default) \ + DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \ .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_Default, DataType.F32_5HD, - DataType.F32_Default, DataType.F32_5HD) \ + DataType.F32_Default, DataType.F32_5HD, DataType.F32_5HD) \ .dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_Default, DataType.F32_C1HWNCoC0, - DataType.F32_Default, DataType.F32_C1HWNCoC0) \ + DataType.F32_Default, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0) \ .dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_Default, DataType.F32_FracZ, - DataType.F32_Default, DataType.F32_FracZ) \ + DataType.F32_Default, DataType.F32_FracZ, DataType.F32_FracZ) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/apply_proximal_adagrad.py b/mindspore/ops/_op_impl/tbe/apply_proximal_adagrad.py index 9099c6e24f..c9b8adf4f4 100644 --- a/mindspore/ops/_op_impl/tbe/apply_proximal_adagrad.py +++ b/mindspore/ops/_op_impl/tbe/apply_proximal_adagrad.py @@ -13,15 +13,15 @@ # limitations under the License. # ============================================================================ -"""ApplyProximalAdagrad op""" +"""ApplyProximalAdagradD op""" from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType -apply_proximal_adagrad_op_info = TBERegOp("ApplyProximalAdagrad") \ +apply_proximal_adagrad_d_op_info = TBERegOp("ApplyProximalAdagrad") \ .fusion_type("OPAQUE") \ .async_flag(False) \ - .binfile_name("apply_proximal_adagrad.so") \ + .binfile_name("apply_proximal_adagrad_d.so") \ .compute_cost(10) \ - .kernel_name("apply_proximal_adagrad") \ + .kernel_name("apply_proximal_adagrad_d") \ .partial_flag(True) \ .attr("use_locking", "optional", "bool", "true,false", "false") \ .input(0, "var", False, "required", "all") \ @@ -31,26 +31,27 @@ apply_proximal_adagrad_op_info = TBERegOp("ApplyProximalAdagrad") \ .input(4, "l2", False, "required", "all") \ .input(5, "grad", False, "required", "all") \ .output(0, "var", False, "required", "all") \ + .output(1, "accum", False, "required", "all") \ .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_Default, DataType.F16_Default, - DataType.F16_Default, DataType.F16_5HD, DataType.F16_5HD) \ + DataType.F16_Default, DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD) \ .dtype_format(DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_Default, DataType.F16_Default, - DataType.F16_Default, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0) \ + DataType.F16_Default, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0) \ .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, - DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ + DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ .dtype_format(DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_Default, DataType.F16_Default, - DataType.F16_Default, DataType.F16_FracZ, DataType.F16_FracZ) \ + DataType.F16_Default, DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_FracZ) \ .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_Default, DataType.F32_Default, - DataType.F32_Default, DataType.F32_5HD, DataType.F32_5HD) \ + DataType.F32_Default, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD) \ .dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_Default, DataType.F32_Default, - DataType.F32_Default, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0) \ + DataType.F32_Default, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0) \ .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, - DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \ + DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \ .dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_Default, DataType.F32_Default, - DataType.F32_Default, DataType.F32_FracZ, DataType.F32_FracZ) \ + DataType.F32_Default, DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ) \ .get_op_info() -@op_info_register(apply_proximal_adagrad_op_info) +@op_info_register(apply_proximal_adagrad_d_op_info) def _apply_proximal_adagrad(): - """ApplyProximalAdagrad TBE register""" + """ApplyProximalAdagradD TBE register""" return diff --git a/mindspore/ops/_op_impl/tbe/approximate_equal.py b/mindspore/ops/_op_impl/tbe/approximate_equal.py new file mode 100644 index 0000000000..62b8a0c16d --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/approximate_equal.py @@ -0,0 +1,41 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""ApproximateEqual op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +approximate_equal_op_info = TBERegOp("ApproximateEqual") \ + .fusion_type("ELEMWISE") \ + .async_flag(False) \ + .binfile_name("approximate_equal.so") \ + .compute_cost(10) \ + .kernel_name("approximate_equal") \ + .partial_flag(True) \ + .op_pattern("broadcast") \ + .attr("tolerance", "optional", "float", "all") \ + .input(0, "x1", False, "required", "all") \ + .input(1, "x2", False, "required", "all") \ + .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.BOOL_Default) \ + .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.BOOL_5HD) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.BOOL_Default) \ + .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.BOOL_5HD) \ + .get_op_info() + + +@op_info_register(approximate_equal_op_info) +def _approximate_equal_tbe(): + """ApproximateEqual TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/atan.py b/mindspore/ops/_op_impl/tbe/atan.py index 9562c573e3..293839eaf0 100644 --- a/mindspore/ops/_op_impl/tbe/atan.py +++ b/mindspore/ops/_op_impl/tbe/atan.py @@ -26,7 +26,9 @@ atan_op_info = TBERegOp("Atan") \ .op_pattern("formatAgnostic") \ .input(0, "x", False, "required", "all") \ .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.F16_Default, DataType.F16_Default) \ .dtype_format(DataType.F16_5HD, DataType.F16_5HD) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default) \ .dtype_format(DataType.F32_5HD, DataType.F32_5HD) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/atan2.py b/mindspore/ops/_op_impl/tbe/atan2.py index 30bea25d70..26ffdcb59a 100644 --- a/mindspore/ops/_op_impl/tbe/atan2.py +++ b/mindspore/ops/_op_impl/tbe/atan2.py @@ -27,7 +27,9 @@ atan2_op_info = TBERegOp("Atan2") \ .input(0, "x1", False, "required", "all") \ .input(1, "x2", False, "required", "all") \ .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \ .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/atanh.py b/mindspore/ops/_op_impl/tbe/atanh.py index d88e0d6105..f60b01967c 100644 --- a/mindspore/ops/_op_impl/tbe/atanh.py +++ b/mindspore/ops/_op_impl/tbe/atanh.py @@ -26,7 +26,9 @@ atanh_op_info = TBERegOp("Atanh") \ .op_pattern("formatAgnostic") \ .input(0, "x", False, "required", "all") \ .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.F16_Default, DataType.F16_Default) \ .dtype_format(DataType.F16_5HD, DataType.F16_5HD) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default) \ .dtype_format(DataType.F32_5HD, DataType.F32_5HD) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/atomic_addr_clean.py b/mindspore/ops/_op_impl/tbe/atomic_addr_clean.py index e707a1f26f..98662fed91 100644 --- a/mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +++ b/mindspore/ops/_op_impl/tbe/atomic_addr_clean.py @@ -23,7 +23,7 @@ atomic_addr_clean_op_info = TBERegOp("AtomicAddrClean") \ .compute_cost(10) \ .kernel_name("atomic_addr_clean") \ .partial_flag(True) \ - .attr("automic_add_mem_size", "required", "listInt", "all") \ + .attr("automic_add_mem_size", "required", "listUInt64", "all") \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/batch_matmul.py b/mindspore/ops/_op_impl/tbe/batch_matmul.py index 4efcf8031c..02f2dd5880 100644 --- a/mindspore/ops/_op_impl/tbe/batch_matmul.py +++ b/mindspore/ops/_op_impl/tbe/batch_matmul.py @@ -29,6 +29,7 @@ batch_matmul_op_info = TBERegOp("BatchMatMul") \ .input(1, "x2", False, "required", "all") \ .input(2, "bias", False, "optional", "all") \ .output(0, "y", False, "required", "all") \ + .op_pattern("dynamicFormat") \ .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \ .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ .dtype_format(DataType.F16_FracNZ, DataType.F16_FracNZ, DataType.F16_Default, DataType.F16_FracNZ) \ diff --git a/mindspore/ops/_op_impl/tbe/bias_add.py b/mindspore/ops/_op_impl/tbe/bias_add.py index 24607af141..5ab1916299 100644 --- a/mindspore/ops/_op_impl/tbe/bias_add.py +++ b/mindspore/ops/_op_impl/tbe/bias_add.py @@ -27,6 +27,7 @@ bias_add_grad_op_info = TBERegOp("BiasAdd") \ .input(0, "x", False, "required", "all") \ .input(1, "bias", False, "required", "all") \ .output(0, "y", False, "required", "all") \ + .op_pattern("dynamicFormat") \ .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \ .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \ diff --git a/mindspore/ops/_op_impl/tbe/bias_add_grad.py b/mindspore/ops/_op_impl/tbe/bias_add_grad.py index 557dececb7..e59c197bce 100644 --- a/mindspore/ops/_op_impl/tbe/bias_add_grad.py +++ b/mindspore/ops/_op_impl/tbe/bias_add_grad.py @@ -26,6 +26,8 @@ bias_add_grad_op_info = TBERegOp("BiasAddGrad") \ .attr("data_format", "required", "str", "all") \ .input(0, "output_backprop", False, "required", "all") \ .output(0, "output", False, "required", "all") \ + .dtype_format(DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.F16_FracNZ, DataType.F16_Default) \ .dtype_format(DataType.F32_Default, DataType.F32_Default) \ .dtype_format(DataType.F32_FracNZ, DataType.F32_Default) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/bn_training_reduce.py b/mindspore/ops/_op_impl/tbe/bn_training_reduce.py index e19d4b65ff..f33cba2110 100644 --- a/mindspore/ops/_op_impl/tbe/bn_training_reduce.py +++ b/mindspore/ops/_op_impl/tbe/bn_training_reduce.py @@ -26,6 +26,7 @@ bn_training_reduce_op_info = TBERegOp("BNTrainingReduce") \ .input(0, "x", False, "required", "all", reshape_type="NC") \ .output(0, "sum", False, "required", "all") \ .output(1, "square_sum", False, "required", "all") \ + .op_pattern("dynamicFormat") \ .dtype_format(DataType.F16_5HD, DataType.F32_5HD, DataType.F32_5HD) \ .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/bn_training_reduce_grad.py b/mindspore/ops/_op_impl/tbe/bn_training_reduce_grad.py index 66dc55ab10..89736a0097 100644 --- a/mindspore/ops/_op_impl/tbe/bn_training_reduce_grad.py +++ b/mindspore/ops/_op_impl/tbe/bn_training_reduce_grad.py @@ -32,6 +32,7 @@ bn_training_reduce_grad_op_info = TBERegOp("BNTrainingReduceGrad") \ .input(5, "batch_mean", False, "required", "all") \ .input(6, "batch_variance", False, "required", "all") \ .output(0, "y", False, "required", "all", reshape_type="NC") \ + .op_pattern("dynamicFormat") \ .dtype_format(DataType.F16_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F16_5HD) \ .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, diff --git a/mindspore/ops/_op_impl/tbe/bn_training_update_grad.py b/mindspore/ops/_op_impl/tbe/bn_training_update_grad.py index 5098923281..1aa822a3c1 100644 --- a/mindspore/ops/_op_impl/tbe/bn_training_update_grad.py +++ b/mindspore/ops/_op_impl/tbe/bn_training_update_grad.py @@ -30,6 +30,7 @@ bn_training_update_grad_op_info = TBERegOp("BNTrainingUpdateGrad") \ .input(3, "batch_variance", False, "required", "all") \ .output(0, "diff_scale", False, "required", "all") \ .output(1, "diff_offset", False, "required", "all") \ + .op_pattern("dynamicFormat") \ .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD) \ .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, diff --git a/mindspore/ops/_op_impl/tbe/bn_training_update_v2.py b/mindspore/ops/_op_impl/tbe/bn_training_update_v2.py index 03a51664e8..a54d91a483 100644 --- a/mindspore/ops/_op_impl/tbe/bn_training_update_v2.py +++ b/mindspore/ops/_op_impl/tbe/bn_training_update_v2.py @@ -32,6 +32,7 @@ bn_training_update_v2_op_info = TBERegOp("BNTrainingUpdateV2") \ .output(0, "y", False, "required", "all", reshape_type="NC") \ .output(1, "batch_mean", False, "required", "all") \ .output(2, "batch_variance", False, "required", "all") \ + .op_pattern("dynamicFormat") \ .dtype_format(DataType.F16_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F16_5HD, DataType.F32_5HD, DataType.F32_5HD) \ diff --git a/mindspore/ops/_op_impl/tbe/bn_training_update_v3.py b/mindspore/ops/_op_impl/tbe/bn_training_update_v3.py new file mode 100644 index 0000000000..6d69c6e4be --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/bn_training_update_v3.py @@ -0,0 +1,51 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""BNTrainingUpdateV3 op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +bn_training_update_v3_op_info = TBERegOp("BNTrainingUpdateV3") \ + .fusion_type("OPAQUE") \ + .async_flag(False) \ + .binfile_name("bn_training_update_v3.so") \ + .compute_cost(10) \ + .kernel_name("bn_training_update_v3") \ + .partial_flag(True) \ + .attr("epsilon", "required", "float", "all") \ + .input(0, "x", False, "required", "all", reshape_type="NC") \ + .input(1, "sum", False, "required", "all") \ + .input(2, "square_sum", False, "required", "all") \ + .input(3, "scale", False, "required", "all") \ + .input(4, "offset", False, "required", "all") \ + .output(0, "y", False, "required", "all", reshape_type="NC") \ + .output(1, "batch_mean", False, "required", "all") \ + .output(2, "batch_variance", False, "required", "all") \ + .output(3, "reserve_1", False, "required", "all") \ + .output(4, "reserve_2", False, "required", "all") \ + .dtype_format(DataType.F16_5HD, DataType.F32_5HD, DataType.F32_5HD, + DataType.F32_5HD, DataType.F32_5HD, DataType.F16_5HD, + DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, + DataType.F32_5HD) \ + .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, + DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, + DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, + DataType.F32_5HD) \ + .get_op_info() + + +@op_info_register(bn_training_update_v3_op_info) +def _bn_training_update_v3_tbe(): + """BNTrainingUpdateV3 TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/broadcast_to.py b/mindspore/ops/_op_impl/tbe/broadcast_to.py new file mode 100644 index 0000000000..5d4b642017 --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/broadcast_to.py @@ -0,0 +1,40 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""BroadcastTo op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +broadcast_to_op_info = TBERegOp("BroadcastTo") \ + .fusion_type("OPAQUE") \ + .async_flag(False) \ + .binfile_name("broadcast_to_d.so") \ + .compute_cost(10) \ + .kernel_name("broadcast_to_d") \ + .partial_flag(True) \ + .attr("shape", "required", "listInt", "all") \ + .input(0, "x", False, "required", "all") \ + .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default) \ + .dtype_format(DataType.I32_Default, DataType.I32_Default) \ + .dtype_format(DataType.I8_Default, DataType.I8_Default) \ + .dtype_format(DataType.U8_Default, DataType.U16_Default) \ + .get_op_info() + + +@op_info_register(broadcast_to_op_info) +def _broadcast_to_tbe(): + """BroadcastTo TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/cast.py b/mindspore/ops/_op_impl/tbe/cast.py index 07e14139da..0a809e28a7 100644 --- a/mindspore/ops/_op_impl/tbe/cast.py +++ b/mindspore/ops/_op_impl/tbe/cast.py @@ -26,32 +26,27 @@ cast_op_info = TBERegOp("Cast") \ .attr("dst_type", "required", "int", "all") \ .input(0, "x", False, "required", "all") \ .output(0, "y", False, "required", "all") \ - .dtype_format(DataType.BOOL_Default, DataType.F16_Default) \ - .dtype_format(DataType.BOOL_Default, DataType.U8_Default) \ - .dtype_format(DataType.BOOL_Default, DataType.F32_Default) \ - .dtype_format(DataType.BOOL_Default, DataType.I32_Default) \ - .dtype_format(DataType.I8_Default, DataType.F16_Default) \ - .dtype_format(DataType.I8_Default, DataType.F32_Default) \ - .dtype_format(DataType.I8_Default, DataType.I32_Default) \ - .dtype_format(DataType.U8_Default, DataType.F16_Default) \ - .dtype_format(DataType.U8_Default, DataType.F32_Default) \ - .dtype_format(DataType.U8_Default, DataType.I32_Default) \ - .dtype_format(DataType.I32_Default, DataType.BOOL_Default) \ - .dtype_format(DataType.I32_Default, DataType.F16_Default) \ - .dtype_format(DataType.I32_Default, DataType.F32_Default) \ - .dtype_format(DataType.I32_Default, DataType.I8_Default) \ - .dtype_format(DataType.I32_Default, DataType.U8_Default) \ - .dtype_format(DataType.F16_Default, DataType.U8_Default) \ - .dtype_format(DataType.F16_Default, DataType.F32_Default) \ - .dtype_format(DataType.F16_Default, DataType.I32_Default) \ - .dtype_format(DataType.F16_5HD, DataType.F32_5HD) \ - .dtype_format(DataType.F16_FracZ, DataType.F32_FracZ) \ - .dtype_format(DataType.F16_FracNZ, DataType.F32_FracNZ) \ - .dtype_format(DataType.F32_5HD, DataType.F16_5HD) \ - .dtype_format(DataType.F32_FracZ, DataType.F16_FracZ) \ - .dtype_format(DataType.F32_FracNZ, DataType.F16_FracNZ) \ - .dtype_format(DataType.F32_Default, DataType.F16_Default) \ - .dtype_format(DataType.F32_Default, DataType.I32_Default) \ + .op_pattern("formatAgnostic") \ + .dtype_format(DataType.BOOL_None, DataType.F16_None) \ + .dtype_format(DataType.BOOL_None, DataType.U8_None) \ + .dtype_format(DataType.BOOL_None, DataType.F32_None) \ + .dtype_format(DataType.BOOL_None, DataType.I32_None) \ + .dtype_format(DataType.I8_None, DataType.F16_None) \ + .dtype_format(DataType.I8_None, DataType.F32_None) \ + .dtype_format(DataType.I8_None, DataType.I32_None) \ + .dtype_format(DataType.U8_None, DataType.F16_None) \ + .dtype_format(DataType.U8_None, DataType.F32_None) \ + .dtype_format(DataType.U8_None, DataType.I32_None) \ + .dtype_format(DataType.I32_None, DataType.BOOL_None) \ + .dtype_format(DataType.I32_None, DataType.F16_None) \ + .dtype_format(DataType.I32_None, DataType.F32_None) \ + .dtype_format(DataType.I32_None, DataType.I8_None) \ + .dtype_format(DataType.I32_None, DataType.U8_None) \ + .dtype_format(DataType.F16_None, DataType.U8_None) \ + .dtype_format(DataType.F16_None, DataType.F32_None) \ + .dtype_format(DataType.F16_None, DataType.I32_None) \ + .dtype_format(DataType.F32_None, DataType.F16_None) \ + .dtype_format(DataType.F32_None, DataType.I32_None) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/ceil.py b/mindspore/ops/_op_impl/tbe/ceil.py new file mode 100644 index 0000000000..d9a127603f --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/ceil.py @@ -0,0 +1,36 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Ceil op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +ceil_op_info = TBERegOp("Ceil") \ + .fusion_type("ELEMWISE") \ + .async_flag(False) \ + .binfile_name("ceil.so") \ + .compute_cost(10) \ + .kernel_name("ceil") \ + .partial_flag(True) \ + .input(0, "x", False, "required", "all") \ + .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default) \ + .get_op_info() + + +@op_info_register(ceil_op_info) +def _ceil_tbe(): + """Ceil TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/concat.py b/mindspore/ops/_op_impl/tbe/concat.py index 56807b15fc..0bf636016f 100644 --- a/mindspore/ops/_op_impl/tbe/concat.py +++ b/mindspore/ops/_op_impl/tbe/concat.py @@ -26,6 +26,7 @@ concat_op_info = TBERegOp("Concat") \ .attr("axis", "required", "int", "all") \ .input(0, "input_values", False, "dynamic", "all") \ .output(0, "output_data", False, "required", "all") \ + .op_pattern("dynamicFormat") \ .dtype_format(DataType.BOOL_Default, DataType.BOOL_Default) \ .dtype_format(DataType.BOOL_5HD, DataType.BOOL_5HD) \ .dtype_format(DataType.I8_Default, DataType.I8_Default) \ diff --git a/mindspore/ops/_op_impl/tbe/confusion_matrix.py b/mindspore/ops/_op_impl/tbe/confusion_matrix.py new file mode 100644 index 0000000000..28dd17f23f --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/confusion_matrix.py @@ -0,0 +1,63 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""ConfusionMatrix op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +confusion_matrix_op_info = TBERegOp("ConfusionMatrix") \ + .fusion_type("OPAQUE") \ + .async_flag(False) \ + .binfile_name("confusion_matrix.so") \ + .compute_cost(10) \ + .kernel_name("confusion_matrix") \ + .partial_flag(True) \ + .attr("num_classes", "required", "int", "all") \ + .attr("dtype", "required", "str", "all") \ + .input(0, "labels", False, "required", "all") \ + .input(1, "predictions", False, "required", "all") \ + .input(2, "weights", False, "optional", "all") \ + .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F32_Default, DataType.F32_Default) \ + .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.I32_Default, DataType.I32_Default) \ + .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.I8_Default, DataType.I8_Default) \ + .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.U8_Default, DataType.U8_Default) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.I32_Default, DataType.I32_Default) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.I8_Default, DataType.I8_Default) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.U8_Default, DataType.U8_Default) \ + .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.F32_Default, DataType.F32_Default) \ + .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \ + .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I8_Default, DataType.I8_Default) \ + .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.U8_Default, DataType.U8_Default) \ + .dtype_format(DataType.I8_Default, DataType.I8_Default, DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.I8_Default, DataType.I8_Default, DataType.F32_Default, DataType.F32_Default) \ + .dtype_format(DataType.I8_Default, DataType.I8_Default, DataType.I32_Default, DataType.I32_Default) \ + .dtype_format(DataType.I8_Default, DataType.I8_Default, DataType.I8_Default, DataType.I8_Default) \ + .dtype_format(DataType.I8_Default, DataType.I8_Default, DataType.U8_Default, DataType.U8_Default) \ + .dtype_format(DataType.U8_Default, DataType.U8_Default, DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.U8_Default, DataType.U8_Default, DataType.F32_Default, DataType.F32_Default) \ + .dtype_format(DataType.U8_Default, DataType.U8_Default, DataType.I32_Default, DataType.I32_Default) \ + .dtype_format(DataType.U8_Default, DataType.U8_Default, DataType.I8_Default, DataType.I8_Default) \ + .dtype_format(DataType.U8_Default, DataType.U8_Default, DataType.U8_Default, DataType.U8_Default) \ + .get_op_info() + + +@op_info_register(confusion_matrix_op_info) +def _confusion_matrix_tbe(): + """ConfusionMatrix TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/conv2d.py b/mindspore/ops/_op_impl/tbe/conv2d.py index 425521901d..a2879d521a 100644 --- a/mindspore/ops/_op_impl/tbe/conv2d.py +++ b/mindspore/ops/_op_impl/tbe/conv2d.py @@ -23,6 +23,7 @@ conv2d_op_info = TBERegOp("Conv2D") \ .compute_cost(10) \ .kernel_name("conv2d") \ .partial_flag(True) \ + .op_pattern("dynamicFormat") \ .attr("stride", "required", "listInt", "all") \ .attr("pad_list", "required", "listInt", "all") \ .attr("dilation", "required", "listInt", "all") \ @@ -32,8 +33,7 @@ conv2d_op_info = TBERegOp("Conv2D") \ .input(2, "bias", False, "optional", "all") \ .input(3, "offset_w", False, "optional", "all") \ .output(0, "y", True, "required", "all") \ - .dtype_format(DataType.F16_5HD, DataType.F16_FracZ, DataType.F16_Default, DataType.I8_Default, - DataType.F16_5HD) \ + .dtype_format(DataType.F16_None, DataType.F16_None, DataType.F16_None, DataType.I8_None, DataType.F16_None) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/cos.py b/mindspore/ops/_op_impl/tbe/cos.py index ecb1062100..3acb0c2a7e 100644 --- a/mindspore/ops/_op_impl/tbe/cos.py +++ b/mindspore/ops/_op_impl/tbe/cos.py @@ -26,7 +26,9 @@ cos_op_info = TBERegOp("Cos") \ .op_pattern("formatAgnostic") \ .input(0, "x", False, "required", "all") \ .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.F16_Default, DataType.F16_Default) \ .dtype_format(DataType.F16_5HD, DataType.F16_5HD) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default) \ .dtype_format(DataType.F32_5HD, DataType.F32_5HD) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/cosh.py b/mindspore/ops/_op_impl/tbe/cosh.py new file mode 100644 index 0000000000..75d48293e9 --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/cosh.py @@ -0,0 +1,37 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Cosh op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +cosh_op_info = TBERegOp("Cosh") \ + .fusion_type("ELEMWISE") \ + .async_flag(False) \ + .binfile_name("cosh.so") \ + .compute_cost(10) \ + .kernel_name("cosh") \ + .partial_flag(True) \ + .op_pattern("formatAgnostic") \ + .input(0, "x", False, "required", "all") \ + .output(0, "y", True, "required", "all") \ + .dtype_format(DataType.F16_5HD, DataType.F16_5HD) \ + .dtype_format(DataType.F32_5HD, DataType.F32_5HD) \ + .get_op_info() + + +@op_info_register(cosh_op_info) +def _cosh_tbe(): + """Cosh TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/data_format_dim_map.py b/mindspore/ops/_op_impl/tbe/data_format_dim_map.py new file mode 100644 index 0000000000..0bbccd30b1 --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/data_format_dim_map.py @@ -0,0 +1,38 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""DataFormatDimMap op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +data_format_dim_map_op_info = TBERegOp("DataFormatDimMap") \ + .fusion_type("OPAQUE") \ + .async_flag(False) \ + .binfile_name("data_format_dim_map.so") \ + .compute_cost(10) \ + .kernel_name("data_format_dim_map") \ + .partial_flag(True) \ + .attr("dst_format", "optional", "str", "all") \ + .attr("src_format", "optional", "str", "all") \ + .input(0, "x", False, "required", "all") \ + .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.I32_5HD, DataType.I32_5HD) \ + .dtype_format(DataType.I32_Default, DataType.I32_Default) \ + .get_op_info() + + +@op_info_register(data_format_dim_map_op_info) +def _data_format_dim_map_tbe(): + """DataFormatDimMap TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/div_no_nan.py b/mindspore/ops/_op_impl/tbe/div_no_nan.py new file mode 100644 index 0000000000..893b38042e --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/div_no_nan.py @@ -0,0 +1,45 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""DivNoNan op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +div_no_nan_op_info = TBERegOp("DivNoNan") \ + .fusion_type("ELEMWISE") \ + .async_flag(False) \ + .binfile_name("div_no_nan.so") \ + .compute_cost(10) \ + .kernel_name("div_no_nan") \ + .partial_flag(True) \ + .input(0, "x1", False, "required", "all") \ + .input(1, "x2", False, "required", "all") \ + .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.I8_Default, DataType.I8_Default, DataType.I8_Default) \ + .dtype_format(DataType.I8_5HD, DataType.I8_5HD, DataType.I8_5HD) \ + .dtype_format(DataType.U8_Default, DataType.U8_Default, DataType.U8_Default) \ + .dtype_format(DataType.U8_5HD, DataType.U8_5HD, DataType.U8_5HD) \ + .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \ + .dtype_format(DataType.I32_5HD, DataType.I32_5HD, DataType.I32_5HD) \ + .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \ + .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD) \ + .get_op_info() + + +@op_info_register(div_no_nan_op_info) +def _div_no_nan_tbe(): + """DivNoNan TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/dropout_do_mask.py b/mindspore/ops/_op_impl/tbe/dropout_do_mask.py index 2bef489b96..a24e02f964 100644 --- a/mindspore/ops/_op_impl/tbe/dropout_do_mask.py +++ b/mindspore/ops/_op_impl/tbe/dropout_do_mask.py @@ -27,6 +27,7 @@ drop_out_do_mask_op_info = TBERegOp("DropoutDoMask") \ .input(1, "mask", False, "required", "all") \ .input(2, "keep_prob", False, "required", "all") \ .output(0, "y", False, "required", "all") \ + .op_pattern("dynamicFormat") \ .dtype_format(DataType.F16_Default, DataType.U8_Default, DataType.F16_Default, DataType.F16_Default) \ .dtype_format(DataType.F32_Default, DataType.U8_Default, DataType.F32_Default, DataType.F32_Default) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/elu.py b/mindspore/ops/_op_impl/tbe/elu.py index 9125d14727..e61e2851af 100644 --- a/mindspore/ops/_op_impl/tbe/elu.py +++ b/mindspore/ops/_op_impl/tbe/elu.py @@ -28,9 +28,7 @@ elu_op_info = TBERegOp("Elu") \ .input(0, "x", False, "required", "all") \ .output(0, "y", False, "required", "all") \ .dtype_format(DataType.F16_Default, DataType.F16_Default) \ - .dtype_format(DataType.F16_5HD, DataType.F16_5HD) \ .dtype_format(DataType.F32_Default, DataType.F32_Default) \ - .dtype_format(DataType.F32_5HD, DataType.F32_5HD) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/erf.py b/mindspore/ops/_op_impl/tbe/erf.py index 2247197c4e..4c4893d505 100644 --- a/mindspore/ops/_op_impl/tbe/erf.py +++ b/mindspore/ops/_op_impl/tbe/erf.py @@ -26,9 +26,7 @@ erf_op_info = TBERegOp("Erf") \ .op_pattern("formatAgnostic") \ .input(0, "x", False, "required", "all") \ .output(0, "y", False, "required", "all") \ - .dtype_format(DataType.F16_5HD, DataType.F16_5HD) \ .dtype_format(DataType.F16_Default, DataType.F16_Default) \ - .dtype_format(DataType.F32_5HD, DataType.F32_5HD) \ .dtype_format(DataType.F32_Default, DataType.F32_Default) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/erfc.py b/mindspore/ops/_op_impl/tbe/erfc.py index 7e1b76649a..7b0eccf52e 100644 --- a/mindspore/ops/_op_impl/tbe/erfc.py +++ b/mindspore/ops/_op_impl/tbe/erfc.py @@ -26,9 +26,7 @@ erfc_op_info = TBERegOp("Erfc") \ .op_pattern("formatAgnostic") \ .input(0, "x", False, "required", "all") \ .output(0, "y", False, "required", "all") \ - .dtype_format(DataType.F16_5HD, DataType.F16_5HD) \ .dtype_format(DataType.F16_Default, DataType.F16_Default) \ - .dtype_format(DataType.F32_5HD, DataType.F32_5HD) \ .dtype_format(DataType.F32_Default, DataType.F32_Default) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/expm1.py b/mindspore/ops/_op_impl/tbe/expm1.py new file mode 100644 index 0000000000..a126aca36f --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/expm1.py @@ -0,0 +1,37 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Expm1 op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +expm1_op_info = TBERegOp("Expm1") \ + .fusion_type("ELEMWISE") \ + .async_flag(False) \ + .binfile_name("expm1.so") \ + .compute_cost(10) \ + .kernel_name("expm1") \ + .partial_flag(True) \ + .op_pattern("formatAgnostic") \ + .input(0, "x", False, "required", "all") \ + .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default) \ + .get_op_info() + + +@op_info_register(expm1_op_info) +def _expm1_tbe(): + """Expm1 TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/fused_mul_add.py b/mindspore/ops/_op_impl/tbe/fused_mul_add.py index ad3c601e5d..fa104fb561 100644 --- a/mindspore/ops/_op_impl/tbe/fused_mul_add.py +++ b/mindspore/ops/_op_impl/tbe/fused_mul_add.py @@ -27,6 +27,7 @@ fused_mul_add_op_info = TBERegOp("FusedMulAdd") \ .input(1, "x2", False, "required", "all") \ .input(2, "x3", False, "required", "all") \ .output(0, "y", False, "required", "all") \ + .op_pattern("dynamicFormat") \ .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \ .dtype_format(DataType.I32_5HD, DataType.I32_5HD, DataType.I32_5HD, DataType.I32_5HD) \ .dtype_format(DataType.I32_FracZ, DataType.I32_FracZ, DataType.I32_FracZ, DataType.I32_FracZ) \ diff --git a/mindspore/ops/_op_impl/tbe/fused_mul_add_n_l2loss.py b/mindspore/ops/_op_impl/tbe/fused_mul_add_n_l2loss.py new file mode 100644 index 0000000000..e4f3f8be16 --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/fused_mul_add_n_l2loss.py @@ -0,0 +1,53 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""FusedMulAddNL2loss op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +fused_mul_add_n_l2loss_op_info = TBERegOp("FusedMulAddNL2loss") \ + .fusion_type("OPAQUE") \ + .async_flag(False) \ + .binfile_name("fused_mul_addn_l2loss.so") \ + .compute_cost(10) \ + .kernel_name("fused_mul_addn_l2loss") \ + .partial_flag(True) \ + .input(0, "x1", False, "required", "all") \ + .input(1, "x2", False, "required", "all") \ + .input(2, "x3", False, "required", "all") \ + .output(0, "y1", False, "required", "all") \ + .output(1, "y2", False, "required", "all") \ + .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_Default, + DataType.F16_5HD, DataType.F16_Default) \ + .dtype_format(DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_Default, + DataType.F16_C1HWNCoC0, DataType.F16_Default) \ + .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, + DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_Default, + DataType.F16_FracZ, DataType.F16_Default) \ + .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_Default, + DataType.F32_5HD, DataType.F32_Default) \ + .dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_Default, + DataType.F32_C1HWNCoC0, DataType.F32_Default) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, + DataType.F32_Default, DataType.F32_Default) \ + .dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_Default, + DataType.F32_FracZ, DataType.F32_Default) \ + .get_op_info() + + +@op_info_register(fused_mul_add_n_l2loss_op_info) +def _fused_mul_add_n_l2loss_tbe(): + """FusedMulAddNL2loss TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/fused_mul_apply_momentum_extern.py b/mindspore/ops/_op_impl/tbe/fused_mul_apply_momentum_extern.py new file mode 100644 index 0000000000..37b0deec12 --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/fused_mul_apply_momentum_extern.py @@ -0,0 +1,67 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""FusedMulApplyMomentumExtern op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +fused_mul_apply_momentum_extern_op_info = TBERegOp("FusedMulApplyMomentumExtern") \ + .fusion_type("OPAQUE") \ + .async_flag(False) \ + .binfile_name("fused_mul_apply_momentum_extern.so") \ + .compute_cost(10) \ + .kernel_name("fused_mul_apply_momentum_extern") \ + .partial_flag(True) \ + .attr("use_nesterov", "optional", "bool", "true,false", "false") \ + .input(0, "var", False, "required", "all") \ + .input(1, "accum", False, "required", "all") \ + .input(2, "lr", False, "required", "all") \ + .input(3, "x1", False, "required", "all") \ + .input(4, "momentum", False, "required", "all") \ + .input(5, "x2", False, "required", "all") \ + .input(6, "var_copy", False, "required", "all") \ + .output(0, "var", False, "required", "all") \ + .output(1, "var_copy", False, "required", "all") \ + .output(2, "accum", False, "required", "all") \ + .dtype_format(DataType.F32_5HD, DataType.F16_5HD, DataType.F16_Default, DataType.F16_5HD, + DataType.F16_Default, DataType.F16_Default, DataType.F16_5HD, DataType.F32_5HD, + DataType.F16_5HD, DataType.F16_5HD) \ + .dtype_format(DataType.F32_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_Default, DataType.F16_C1HWNCoC0, + DataType.F16_Default, DataType.F16_Default, DataType.F16_C1HWNCoC0, DataType.F32_C1HWNCoC0, + DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0) \ + .dtype_format(DataType.F32_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, + DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F32_Default, + DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.F32_FracZ, DataType.F16_FracZ, DataType.F16_Default, DataType.F16_FracZ, + DataType.F16_Default, DataType.F16_Default, DataType.F16_FracZ, DataType.F32_FracZ, + DataType.F16_FracZ, DataType.F16_FracZ) \ + .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_Default, DataType.F32_5HD, + DataType.F32_Default, DataType.F32_Default, DataType.F16_5HD, DataType.F32_5HD, + DataType.F16_5HD, DataType.F32_5HD) \ + .dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_Default, DataType.F32_C1HWNCoC0, + DataType.F32_Default, DataType.F32_Default, DataType.F16_C1HWNCoC0, DataType.F32_C1HWNCoC0, + DataType.F16_C1HWNCoC0, DataType.F32_C1HWNCoC0) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, + DataType.F32_Default, DataType.F32_Default, DataType.F16_Default, DataType.F32_Default, + DataType.F16_Default, DataType.F32_Default) \ + .dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_Default, DataType.F32_FracZ, + DataType.F32_Default, DataType.F32_Default, DataType.F16_FracZ, DataType.F32_FracZ, + DataType.F16_FracZ, DataType.F32_FracZ) \ + .get_op_info() + + +@op_info_register(fused_mul_apply_momentum_extern_op_info) +def _fused_mul_apply_momentum_extern_tbe(): + """FusedMulApplyMomentumExtern TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/histogram_fixed_width.py b/mindspore/ops/_op_impl/tbe/histogram_fixed_width.py new file mode 100644 index 0000000000..32195f1f3c --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/histogram_fixed_width.py @@ -0,0 +1,40 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""HistogramFixedWidth op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +histogram_fixed_width_op_info = TBERegOp("HistogramFixedWidth") \ + .fusion_type("OPAQUE") \ + .async_flag(False) \ + .binfile_name("histogram_fixed_width_d.so") \ + .compute_cost(10) \ + .kernel_name("histogram_fixed_width_d") \ + .partial_flag(True) \ + .attr("nbins", "required", "int", "all") \ + .attr("dtype", "optional", "str", "all") \ + .input(0, "x", False, "required", "all") \ + .input(1, "range", False, "required", "all") \ + .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.I32_Default) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.I32_Default) \ + .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \ + .get_op_info() + + +@op_info_register(histogram_fixed_width_op_info) +def _histogram_fixed_width_tbe(): + """HistogramFixedWidth TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/in_top_k.py b/mindspore/ops/_op_impl/tbe/in_top_k.py new file mode 100644 index 0000000000..46d7258e2a --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/in_top_k.py @@ -0,0 +1,37 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""InTopK op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +in_top_k_op_info = TBERegOp("InTopK") \ + .fusion_type("OPAQUE") \ + .async_flag(False) \ + .binfile_name("in_top_k.so") \ + .compute_cost(10) \ + .kernel_name("in_top_k") \ + .partial_flag(True) \ + .attr("k", "required", "int", "all") \ + .input(0, "x1", False, "required", "all") \ + .input(1, "x2", False, "required", "all") \ + .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.F32_Default, DataType.I32_Default, DataType.BOOL_Default) \ + .get_op_info() + + +@op_info_register(in_top_k_op_info) +def _in_top_k_tbe(): + """InTopK TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/inplace_add.py b/mindspore/ops/_op_impl/tbe/inplace_add.py new file mode 100644 index 0000000000..9a14fc9a63 --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/inplace_add.py @@ -0,0 +1,39 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""InplaceAdd op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +inplace_add_op_info = TBERegOp("InplaceAdd") \ + .fusion_type("ELEMWISE") \ + .async_flag(False) \ + .binfile_name("inplace_add_d.so") \ + .compute_cost(10) \ + .kernel_name("inplace_add_d") \ + .partial_flag(True) \ + .attr("indices", "required", "listInt", "all") \ + .input(0, "x", False, "required", "all") \ + .input(1, "v", False, "required", "all") \ + .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \ + .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \ + .get_op_info() + + +@op_info_register(inplace_add_op_info) +def _inplace_add_tbe(): + """InplaceAdd TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/inplace_sub.py b/mindspore/ops/_op_impl/tbe/inplace_sub.py new file mode 100644 index 0000000000..07f59e05fc --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/inplace_sub.py @@ -0,0 +1,39 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""InplaceSub op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +inplace_sub_op_info = TBERegOp("InplaceSub") \ + .fusion_type("ELEMWISE") \ + .async_flag(False) \ + .binfile_name("inplace_sub_d.so") \ + .compute_cost(10) \ + .kernel_name("inplace_sub_d") \ + .partial_flag(True) \ + .attr("indices", "required", "listInt", "all") \ + .input(0, "x", False, "required", "all") \ + .input(1, "v", False, "required", "all") \ + .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \ + .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \ + .get_op_info() + + +@op_info_register(inplace_sub_op_info) +def _inplace_sub_tbe(): + """InplaceSub TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/inplace_update.py b/mindspore/ops/_op_impl/tbe/inplace_update.py new file mode 100644 index 0000000000..b8c7454d77 --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/inplace_update.py @@ -0,0 +1,39 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""InplaceUpdate op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +inplace_update_op_info = TBERegOp("InplaceUpdate") \ + .fusion_type("INPLACE") \ + .async_flag(False) \ + .binfile_name("inplace_update_d.so") \ + .compute_cost(10) \ + .kernel_name("inplace_update_d") \ + .partial_flag(True) \ + .attr("indices", "required", "listInt", "all") \ + .input(0, "x", False, "required", "all") \ + .input(1, "v", False, "required", "all") \ + .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \ + .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \ + .get_op_info() + + +@op_info_register(inplace_update_op_info) +def _inplace_update_tbe(): + """InplaceUpdate TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/inv.py b/mindspore/ops/_op_impl/tbe/inv.py new file mode 100644 index 0000000000..e2b749a5aa --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/inv.py @@ -0,0 +1,37 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Inv op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +inv_op_info = TBERegOp("Inv") \ + .fusion_type("OPAQUE") \ + .async_flag(False) \ + .binfile_name("inv.so") \ + .compute_cost(10) \ + .kernel_name("inv") \ + .partial_flag(True) \ + .input(0, "x", False, "required", "all") \ + .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.I32_Default, DataType.I32_Default) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default) \ + .dtype_format(DataType.F16_Default, DataType.F16_Default) \ + .get_op_info() + + +@op_info_register(inv_op_info) +def _inv_tbe(): + """Inv TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/inv_grad.py b/mindspore/ops/_op_impl/tbe/inv_grad.py new file mode 100644 index 0000000000..70626b8808 --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/inv_grad.py @@ -0,0 +1,39 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""InvGrad op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +inv_grad_op_info = TBERegOp("InvGrad") \ + .fusion_type("ELEMWISE") \ + .async_flag(False) \ + .binfile_name("inv_grad.so") \ + .compute_cost(10) \ + .kernel_name("inv_grad") \ + .partial_flag(True) \ + .input(0, "x", False, "required", "all") \ + .input(1, "grad", False, "required", "all") \ + .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \ + .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \ + .dtype_format(DataType.I8_Default, DataType.I8_Default, DataType.I8_Default) \ + .get_op_info() + + +@op_info_register(inv_grad_op_info) +def _inv_grad_tbe(): + """InvGrad TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/invert.py b/mindspore/ops/_op_impl/tbe/invert.py new file mode 100644 index 0000000000..887eee45e7 --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/invert.py @@ -0,0 +1,36 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Invert op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +invert_op_info = TBERegOp("Invert") \ + .fusion_type("OPAQUE") \ + .async_flag(False) \ + .binfile_name("invert.so") \ + .compute_cost(10) \ + .kernel_name("invert") \ + .partial_flag(True) \ + .input(0, "x", False, "required", "all") \ + .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.I16_Default, DataType.I16_Default) \ + .dtype_format(DataType.U16_Default, DataType.U16_Default) \ + .get_op_info() + + +@op_info_register(invert_op_info) +def _invert_tbe(): + """Invert TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/l2_normalize_grad.py b/mindspore/ops/_op_impl/tbe/l2_normalize_grad.py index e164120c75..b6a099d286 100644 --- a/mindspore/ops/_op_impl/tbe/l2_normalize_grad.py +++ b/mindspore/ops/_op_impl/tbe/l2_normalize_grad.py @@ -27,7 +27,7 @@ l2_normalize_grad_op_info = TBERegOp("L2NormalizeGrad") \ .attr("epsilon", "required", "float", "all") \ .input(0, "x", False, "required", "all") \ .input(1, "y", False, "required", "all") \ - .input(2, "dy", False, "requried", "all") \ + .input(2, "dy", False, "required", "all") \ .output(0, "dx", True, "required", "all") \ .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \ diff --git a/mindspore/ops/_op_impl/tbe/lamb_next_right.py b/mindspore/ops/_op_impl/tbe/lamb_next_right.py new file mode 100644 index 0000000000..716c5a88fb --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/lamb_next_right.py @@ -0,0 +1,44 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""LambNextRight op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +lamb_next_right_op_info = TBERegOp("LambNextRight") \ + .fusion_type("ELEMWISE") \ + .async_flag(False) \ + .binfile_name("lamb_next_right.so") \ + .compute_cost(10) \ + .kernel_name("lamb_next_right") \ + .partial_flag(True) \ + .input(0, "input_square", False, "required", "all") \ + .input(1, "input_mul2", False, "required", "all") \ + .input(2, "mul2_x", False, "required", "all") \ + .input(3, "mul3_x", False, "required", "all") \ + .input(4, "truediv1_recip", False, "required", "all") \ + .input(5, "add2_y", False, "required", "all") \ + .output(0, "y1", False, "required", "all") \ + .output(1, "y2", False, "required", "all") \ + .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, + DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, + DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \ + .get_op_info() + + +@op_info_register(lamb_next_right_op_info) +def _lamb_next_right_tbe(): + """LambNextRight TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/layer_norm.py b/mindspore/ops/_op_impl/tbe/layer_norm.py index c52be2d4ef..03ddd2dc6c 100644 --- a/mindspore/ops/_op_impl/tbe/layer_norm.py +++ b/mindspore/ops/_op_impl/tbe/layer_norm.py @@ -32,6 +32,7 @@ layer_norm_op_info = TBERegOp("LayerNorm") \ .output(0, "y", False, "required", "all") \ .output(1, "mean", False, "required", "all") \ .output(2, "variance", False, "required", "all") \ + .op_pattern("dynamicFormat") \ .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD, diff --git a/mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop.py b/mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop.py index ef254465bc..deca384032 100644 --- a/mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop.py +++ b/mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop.py @@ -30,6 +30,7 @@ layer_norm_beta_gamma_backprop_op_info = TBERegOp("LayerNormBetaGammaBackprop") .input(3, "mean", False, "required", "all") \ .output(0, "pd_gamma", False, "required", "all") \ .output(1, "pd_beta", False, "required", "all") \ + .op_pattern("dynamicFormat") \ .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F32_Default, DataType.F32_Default) \ .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD, diff --git a/mindspore/ops/_op_impl/tbe/layer_norm_x_backprop.py b/mindspore/ops/_op_impl/tbe/layer_norm_x_backprop.py index bbab66816d..1d4f1ef231 100644 --- a/mindspore/ops/_op_impl/tbe/layer_norm_x_backprop.py +++ b/mindspore/ops/_op_impl/tbe/layer_norm_x_backprop.py @@ -29,6 +29,7 @@ layer_norm_x_backprop_op_info = TBERegOp("LayerNormXBackprop") \ .input(3, "mean", False, "required", "all") \ .input(4, "gamma", False, "required", "all") \ .output(0, "pd_x", False, "required", "all") \ + .op_pattern("dynamicFormat") \ .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD, diff --git a/mindspore/ops/_op_impl/tbe/lin_space.py b/mindspore/ops/_op_impl/tbe/lin_space.py new file mode 100644 index 0000000000..aed41e80d4 --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/lin_space.py @@ -0,0 +1,40 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""LinSpace op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +lin_space_op_info = TBERegOp("LinSpace") \ + .fusion_type("OPAQUE") \ + .async_flag(False) \ + .binfile_name("lin_space.so") \ + .compute_cost(10) \ + .kernel_name("lin_space") \ + .partial_flag(True) \ + .op_pattern("broadcast") \ + .input(0, "assist", False, "required", "all") \ + .input(1, "start", False, "required", "all") \ + .input(2, "stop", False, "required", "all") \ + .input(3, "num", False, "required", "all") \ + .output(0, "output", False, "required", "all") \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.I32_Default, + DataType.F32_Default,) \ + .get_op_info() + + +@op_info_register(lin_space_op_info) +def _lin_space_tbe(): + """LinSpace TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/matmul.py b/mindspore/ops/_op_impl/tbe/matmul.py index c29378f721..7784d5e222 100644 --- a/mindspore/ops/_op_impl/tbe/matmul.py +++ b/mindspore/ops/_op_impl/tbe/matmul.py @@ -17,22 +17,32 @@ from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType matmul_op_info = TBERegOp("MatMul") \ - .fusion_type("OPAQUE") \ + .fusion_type("DYNAMIC") \ .async_flag(False) \ .binfile_name("matmul.so") \ .compute_cost(10) \ .kernel_name("matmul") \ .partial_flag(True) \ - .attr("transpose_a", "required", "bool", "all") \ - .attr("transpose_b", "required", "bool", "all") \ + .attr("transpose_x1", "required", "bool", "all") \ + .attr("transpose_x2", "required", "bool", "all") \ + .attr("offset_x", "optional", "int", "all") \ .input(0, "x1", False, "required", "all") \ .input(1, "x2", False, "required", "all") \ - .input(2, "x3", False, "optional", "all") \ + .input(2, "bias", False, "optional", "all") \ + .input(3, "offset_w", False, "optional", "all") \ .output(0, "y", False, "required", "all") \ - .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \ - .dtype_format(DataType.F16_FracNZ, DataType.F16_FracNZ, DataType.F16_Default, DataType.F16_FracNZ) \ - .dtype_format(DataType.F16_FracNZ, DataType.F16_FracNZ, DataType.F32_Default, DataType.F32_FracNZ) \ - .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \ + .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default, DataType.I8_Default, + DataType.I32_Default) \ + .dtype_format(DataType.F16_FracNZ, DataType.F16_FracNZ, DataType.F16_Default, DataType.I8_Default, + DataType.F16_FracNZ) \ + .dtype_format(DataType.F16_FracNZ, DataType.F16_FracNZ, DataType.F32_Default, DataType.I8_Default, + DataType.F32_FracNZ) \ + .dtype_format(DataType.F32_NHWC, DataType.F32_NHWC, DataType.F32_NHWC, DataType.I8_Default, + DataType.F32_NHWC) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.I8_Default, + DataType.F32_Default) \ + .dtype_format(DataType.I32_NHWC, DataType.I32_NHWC, DataType.I32_NHWC, DataType.I8_Default, + DataType.I32_NHWC) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/matrix_diag.py b/mindspore/ops/_op_impl/tbe/matrix_diag.py new file mode 100644 index 0000000000..9d080e34a2 --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/matrix_diag.py @@ -0,0 +1,45 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""MatrixDiagD op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +matrix_diag_d_op_info = TBERegOp("MatrixDiag") \ + .fusion_type("ELEMWISE") \ + .async_flag(False) \ + .binfile_name("matrix_diag_d.so") \ + .compute_cost(10) \ + .kernel_name("matrix_diag_d") \ + .partial_flag(True) \ + .input(0, "x", False, "required", "all") \ + .input(1, "assist", False, "required", "all") \ + .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD) \ + .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \ + .dtype_format(DataType.I32_5HD, DataType.I32_5HD, DataType.I32_5HD) \ + .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \ + .dtype_format(DataType.I8_5HD, DataType.I8_5HD, DataType.I8_5HD) \ + .dtype_format(DataType.I8_Default, DataType.I8_Default, DataType.I8_Default) \ + .dtype_format(DataType.U8_5HD, DataType.U8_5HD, DataType.U8_5HD) \ + .dtype_format(DataType.U8_Default, DataType.U8_Default, DataType.U8_Default) \ + .get_op_info() + + +@op_info_register(matrix_diag_d_op_info) +def _matrix_diag_tbe(): + """MatrixDiagD TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/matrix_diag_part.py b/mindspore/ops/_op_impl/tbe/matrix_diag_part.py new file mode 100644 index 0000000000..1cb320bbce --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/matrix_diag_part.py @@ -0,0 +1,45 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""MatrixDiagPartD op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +matrix_diag_part_d_op_info = TBERegOp("MatrixDiagPart") \ + .fusion_type("ELEMWISE") \ + .async_flag(False) \ + .binfile_name("matrix_diag_part_d.so") \ + .compute_cost(10) \ + .kernel_name("matrix_diag_part_d") \ + .partial_flag(True) \ + .input(0, "x", False, "required", "all") \ + .input(1, "assist", False, "required", "all") \ + .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD) \ + .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \ + .dtype_format(DataType.I32_5HD, DataType.I32_5HD, DataType.I32_5HD) \ + .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \ + .dtype_format(DataType.I8_5HD, DataType.I8_5HD, DataType.I8_5HD) \ + .dtype_format(DataType.I8_Default, DataType.I8_Default, DataType.I8_Default) \ + .dtype_format(DataType.U8_5HD, DataType.U8_5HD, DataType.U8_5HD) \ + .dtype_format(DataType.U8_Default, DataType.U8_Default, DataType.U8_Default) \ + .get_op_info() + + +@op_info_register(matrix_diag_part_d_op_info) +def _matrix_diag_part_tbe(): + """MatrixDiagPartD TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/matrix_set_diag.py b/mindspore/ops/_op_impl/tbe/matrix_set_diag.py new file mode 100644 index 0000000000..db0b460084 --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/matrix_set_diag.py @@ -0,0 +1,46 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""MatrixSetDiagD op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +matrix_diag_d_op_info = TBERegOp("MatrixSetDiag") \ + .fusion_type("ELEMWISE") \ + .async_flag(False) \ + .binfile_name("matrix_diag_d.so") \ + .compute_cost(10) \ + .kernel_name("matrix_diag_d") \ + .partial_flag(True) \ + .input(0, "x", False, "required", "all") \ + .input(1, "diagonal", False, "required", "all") \ + .input(2, "assist", False, "required", "all") \ + .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD) \ + .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \ + .dtype_format(DataType.I32_5HD, DataType.I32_5HD, DataType.I32_5HD, DataType.I32_5HD) \ + .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \ + .dtype_format(DataType.I8_5HD, DataType.I8_5HD, DataType.I8_5HD, DataType.I8_5HD) \ + .dtype_format(DataType.I8_Default, DataType.I8_Default, DataType.I8_Default, DataType.I8_Default) \ + .dtype_format(DataType.U8_5HD, DataType.U8_5HD, DataType.U8_5HD, DataType.U8_5HD) \ + .dtype_format(DataType.U8_Default, DataType.U8_Default, DataType.U8_Default, DataType.U8_Default) \ + .get_op_info() + + +@op_info_register(matrix_diag_d_op_info) +def _matrix_set_diag_tbe(): + """MatrixSetDiagD TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/mul.py b/mindspore/ops/_op_impl/tbe/mul.py index fa74c88de3..5433bf0b53 100644 --- a/mindspore/ops/_op_impl/tbe/mul.py +++ b/mindspore/ops/_op_impl/tbe/mul.py @@ -26,21 +26,8 @@ mul_op_info = TBERegOp("Mul") \ .input(0, "x", False, "required", "all") \ .input(1, "y", False, "required", "all") \ .output(0, "output", False, "required", "all") \ - .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \ - .dtype_format(DataType.I32_5HD, DataType.I32_5HD, DataType.I32_5HD) \ - .dtype_format(DataType.I32_FracZ, DataType.I32_FracZ, DataType.I32_FracZ) \ - .dtype_format(DataType.I32_FracNZ, DataType.I32_FracNZ, DataType.I32_FracNZ) \ - .dtype_format(DataType.I32_C1HWNCoC0, DataType.I32_C1HWNCoC0, DataType.I32_C1HWNCoC0) \ - .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ - .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD) \ - .dtype_format(DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_FracZ) \ - .dtype_format(DataType.F16_FracNZ, DataType.F16_FracNZ, DataType.F16_FracNZ) \ - .dtype_format(DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0) \ - .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \ - .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD) \ - .dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ) \ - .dtype_format(DataType.F32_FracNZ, DataType.F32_FracNZ, DataType.F32_FracNZ) \ - .dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0) \ + .op_pattern("dynamicFormat") \ + .dtype_format(DataType.None_None, DataType.None_None, DataType.None_None) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/range.py b/mindspore/ops/_op_impl/tbe/range.py new file mode 100644 index 0000000000..257c087b40 --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/range.py @@ -0,0 +1,39 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Range op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +range_op_info = TBERegOp("Range") \ + .fusion_type("OPAQUE") \ + .async_flag(False) \ + .binfile_name("range_d.so") \ + .compute_cost(10) \ + .kernel_name("range_d") \ + .partial_flag(True) \ + .attr("start", "required", "float", "all") \ + .attr("limit", "required", "float", "all") \ + .attr("delta", "required", "float", "all") \ + .input(0, "x", False, "required", "all") \ + .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.F32_Default, DataType.F32_Default) \ + .dtype_format(DataType.I32_Default, DataType.I32_Default) \ + .get_op_info() + + +@op_info_register(range_op_info) +def _range_tbe(): + """Range TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/real_div.py b/mindspore/ops/_op_impl/tbe/real_div.py index b39948971d..9c6d9e0b27 100644 --- a/mindspore/ops/_op_impl/tbe/real_div.py +++ b/mindspore/ops/_op_impl/tbe/real_div.py @@ -26,10 +26,9 @@ realdiv_op_info = TBERegOp("RealDiv") \ .input(0, "x", False, "required", "all") \ .input(1, "y", False, "required", "all") \ .output(0, "z", False, "required", "all") \ - .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ - .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD) \ - .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \ - .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD) \ + .op_pattern("broadcast") \ + .dtype_format(DataType.F16_None, DataType.F16_None, DataType.F16_None) \ + .dtype_format(DataType.F32_None, DataType.F32_None, DataType.F32_None) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/reciprocal.py b/mindspore/ops/_op_impl/tbe/reciprocal.py index dfa126384c..77f3bfac27 100644 --- a/mindspore/ops/_op_impl/tbe/reciprocal.py +++ b/mindspore/ops/_op_impl/tbe/reciprocal.py @@ -25,6 +25,7 @@ reciprocal_op_info = TBERegOp("Reciprocal") \ .partial_flag(True) \ .input(0, "x", False, "required", "all") \ .output(0, "y", False, "required", "all") \ + .op_pattern("dynamicFormat") \ .dtype_format(DataType.F16_Default, DataType.F16_Default) \ .dtype_format(DataType.F16_5HD, DataType.F16_5HD) \ .dtype_format(DataType.F16_NHWC, DataType.F16_NHWC) \ diff --git a/mindspore/ops/_op_impl/tbe/reduce_mean.py b/mindspore/ops/_op_impl/tbe/reduce_mean.py index 67b96933a1..b01fd3bebd 100644 --- a/mindspore/ops/_op_impl/tbe/reduce_mean.py +++ b/mindspore/ops/_op_impl/tbe/reduce_mean.py @@ -27,11 +27,11 @@ reduce_mean_op_info = TBERegOp("ReduceMean") \ .attr("keep_dims", "optional", "bool", "all") \ .input(0, "x", False, "required", "all") \ .output(0, "y", False, "required", "all") \ - .dtype_format(DataType.I8_Default, DataType.I8_Default) \ - .dtype_format(DataType.U8_Default, DataType.U8_Default) \ - .dtype_format(DataType.F16_Default, DataType.F16_Default) \ - .dtype_format(DataType.F32_Default, DataType.F32_Default) \ - .dtype_format(DataType.F16_5HD, DataType.F16_5HD) \ + .op_pattern("reduce") \ + .dtype_format(DataType.I8_None, DataType.I8_None) \ + .dtype_format(DataType.U8_None, DataType.U8_None) \ + .dtype_format(DataType.F16_None, DataType.F16_None) \ + .dtype_format(DataType.F32_None, DataType.F32_None) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/relu_grad_v2.py b/mindspore/ops/_op_impl/tbe/relu_grad_v2.py index 93d7dede62..e5f82c8b78 100644 --- a/mindspore/ops/_op_impl/tbe/relu_grad_v2.py +++ b/mindspore/ops/_op_impl/tbe/relu_grad_v2.py @@ -24,7 +24,7 @@ relu_grad_v2_op_info = TBERegOp("ReluGradV2") \ .kernel_name("relu_grad_v2") \ .partial_flag(True) \ .input(0, "gradients", False, "required", "all") \ - .input(1, "mask", False, "rerequired", "all") \ + .input(1, "mask", False, "required", "all") \ .output(0, "backprops", True, "required", "all") \ .dtype_format(DataType.F16_5HD, DataType.U8_Default, DataType.F16_5HD) \ .dtype_format(DataType.F32_5HD, DataType.U8_Default, DataType.F32_5HD) \ diff --git a/mindspore/ops/_op_impl/tbe/scatter_nd_update.py b/mindspore/ops/_op_impl/tbe/scatter_nd_update.py index df0996f26f..74fb7c9b72 100644 --- a/mindspore/ops/_op_impl/tbe/scatter_nd_update.py +++ b/mindspore/ops/_op_impl/tbe/scatter_nd_update.py @@ -31,7 +31,7 @@ scatter_nd_update_op_info = TBERegOp("ScatterNdUpdate") \ .dtype_format(DataType.F16_Default, DataType.I32_Default, DataType.F16_Default, DataType.F16_Default) \ .dtype_format(DataType.F32_Default, DataType.I32_Default, DataType.F32_Default, DataType.F32_Default) \ .dtype_format(DataType.I8_Default, DataType.I32_Default, DataType.I8_Default, DataType.I8_Default) \ - .dtype_format(DataType.U8_Default, DataType.I32_Default, DataType.U8_Default, DataType.U8_Default,) \ + .dtype_format(DataType.U8_Default, DataType.I32_Default, DataType.U8_Default, DataType.U8_Default) \ .dtype_format(DataType.BOOL_Default, DataType.I32_Default, DataType.BOOL_Default, DataType.BOOL_Default) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/scatter_update.py b/mindspore/ops/_op_impl/tbe/scatter_update.py index 3c330fe435..244b8ab21f 100644 --- a/mindspore/ops/_op_impl/tbe/scatter_update.py +++ b/mindspore/ops/_op_impl/tbe/scatter_update.py @@ -31,7 +31,7 @@ scatter_update_op_info = TBERegOp("ScatterUpdate") \ .dtype_format(DataType.F16_Default, DataType.I32_Default, DataType.F16_Default, DataType.F16_Default) \ .dtype_format(DataType.F32_Default, DataType.I32_Default, DataType.F32_Default, DataType.F32_Default) \ .dtype_format(DataType.I8_Default, DataType.I32_Default, DataType.I8_Default, DataType.I8_Default) \ - .dtype_format(DataType.U8_Default, DataType.I32_Default, DataType.U8_Default, DataType.U8_Default,) \ + .dtype_format(DataType.U8_Default, DataType.I32_Default, DataType.U8_Default, DataType.U8_Default) \ .dtype_format(DataType.BOOL_Default, DataType.I32_Default, DataType.BOOL_Default, DataType.BOOL_Default) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/select.py b/mindspore/ops/_op_impl/tbe/select.py index 4af4325312..e924f05021 100644 --- a/mindspore/ops/_op_impl/tbe/select.py +++ b/mindspore/ops/_op_impl/tbe/select.py @@ -27,6 +27,7 @@ select_op_info = TBERegOp("Select") \ .input(1, "x1", False, "required", "all") \ .input(2, "x2", False, "required", "all") \ .output(0, "y", False, "required", "all") \ + .op_pattern("dynamicFormat") \ .dtype_format(DataType.BOOL_Default, DataType.I8_Default, DataType.I8_Default, DataType.I8_Default) \ .dtype_format(DataType.BOOL_Default, DataType.U8_Default, DataType.U8_Default, DataType.U8_Default) \ .dtype_format(DataType.BOOL_Default, DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \ diff --git a/mindspore/ops/_op_impl/tbe/sign.py b/mindspore/ops/_op_impl/tbe/sign.py index 823715aa9f..99f7970316 100644 --- a/mindspore/ops/_op_impl/tbe/sign.py +++ b/mindspore/ops/_op_impl/tbe/sign.py @@ -27,11 +27,8 @@ sign_op_info = TBERegOp("Sign") \ .input(0, "x", None, "required", None) \ .output(0, "y", True, "required", "all") \ .dtype_format(DataType.F16_Default, DataType.F16_Default) \ - .dtype_format(DataType.F16_5HD, DataType.F16_5HD) \ .dtype_format(DataType.F32_Default, DataType.F32_Default) \ - .dtype_format(DataType.F32_5HD, DataType.F32_5HD) \ .dtype_format(DataType.I32_Default, DataType.I32_Default) \ - .dtype_format(DataType.I32_5HD, DataType.I32_5HD) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/sin.py b/mindspore/ops/_op_impl/tbe/sin.py index 187c0f0f32..f01f687926 100644 --- a/mindspore/ops/_op_impl/tbe/sin.py +++ b/mindspore/ops/_op_impl/tbe/sin.py @@ -26,7 +26,9 @@ sin_op_info = TBERegOp("Sin") \ .op_pattern("formatAgnostic") \ .input(0, "x", False, "required", "all") \ .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.F16_Default, DataType.F16_Default) \ .dtype_format(DataType.F16_5HD, DataType.F16_5HD) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default) \ .dtype_format(DataType.F32_5HD, DataType.F32_5HD) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/sinh.py b/mindspore/ops/_op_impl/tbe/sinh.py new file mode 100644 index 0000000000..27eb66d274 --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/sinh.py @@ -0,0 +1,37 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Sinh op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +sinh_op_info = TBERegOp("Sinh") \ + .fusion_type("ELEMWISE") \ + .async_flag(False) \ + .binfile_name("sinh.so") \ + .compute_cost(10) \ + .kernel_name("sinh") \ + .partial_flag(True) \ + .op_pattern("formatAgnostic") \ + .input(0, "x", False, "required", "all") \ + .output(0, "y", True, "required", "all") \ + .dtype_format(DataType.F16_5HD, DataType.F16_5HD) \ + .dtype_format(DataType.F32_5HD, DataType.F32_5HD) \ + .get_op_info() + + +@op_info_register(sinh_op_info) +def _sinh_tbe(): + """Sinh TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/softmax_grad_ext.py b/mindspore/ops/_op_impl/tbe/softmax_grad_ext.py index 51060d717b..d43183dcb7 100644 --- a/mindspore/ops/_op_impl/tbe/softmax_grad_ext.py +++ b/mindspore/ops/_op_impl/tbe/softmax_grad_ext.py @@ -24,12 +24,13 @@ softmax_grad_ext_op_info = TBERegOp("SoftmaxGradExt") \ .kernel_name("softmax_grad_ext") \ .partial_flag(True) \ .dynamic_format(True) \ - .attr("axes", "required", "listInt", "all") \ - .attr("keep_dims", "required", "bool", "all") \ + .attr("axis", "required", "listInt", "all") \ + .attr("keepdims", "required", "bool", "all") \ .input(0, "grad", False, "required", "all") \ .input(1, "x1", False, "required", "all") \ .input(2, "x2", False, "required", "all") \ .output(0, "y", True, "required", "all") \ + .op_pattern("dynamicFormat") \ .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ .dtype_format(DataType.F16_5HD, DataType.F16_5HD, diff --git a/mindspore/ops/_op_impl/tbe/softplus.py b/mindspore/ops/_op_impl/tbe/softplus.py index d362cd06db..92261d91ef 100644 --- a/mindspore/ops/_op_impl/tbe/softplus.py +++ b/mindspore/ops/_op_impl/tbe/softplus.py @@ -27,9 +27,7 @@ softplus_op_info = TBERegOp("Softplus") \ .input(0, "x", False, "required", "all") \ .output(0, "y", False, "required", "all") \ .dtype_format(DataType.F16_Default, DataType.F16_Default) \ - .dtype_format(DataType.F16_5HD, DataType.F16_5HD) \ .dtype_format(DataType.F32_Default, DataType.F32_Default) \ - .dtype_format(DataType.F32_5HD, DataType.F32_5HD) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/softplus_grad.py b/mindspore/ops/_op_impl/tbe/softplus_grad.py index 4bf7a82440..3dc0e7ee0c 100644 --- a/mindspore/ops/_op_impl/tbe/softplus_grad.py +++ b/mindspore/ops/_op_impl/tbe/softplus_grad.py @@ -28,9 +28,7 @@ softplus_grad_op_info = TBERegOp("SoftplusGrad") \ .input(1, "features", False, "required", "all") \ .output(0, "backprops", False, "required", "all") \ .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ - .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD) \ .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \ - .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/sparse_apply_adagrad.py b/mindspore/ops/_op_impl/tbe/sparse_apply_adagrad.py index ca77a5eaed..c1083af9f6 100644 --- a/mindspore/ops/_op_impl/tbe/sparse_apply_adagrad.py +++ b/mindspore/ops/_op_impl/tbe/sparse_apply_adagrad.py @@ -13,15 +13,15 @@ # limitations under the License. # ============================================================================ -"""SparseApplyAdagrad op""" +"""SparseApplyAdagradD op""" from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType -sparse_apply_adagrad_op_info = TBERegOp("SparseApplyAdagrad") \ +sparse_apply_adagrad_d_op_info = TBERegOp("SparseApplyAdagrad") \ .fusion_type("OPAQUE") \ .async_flag(False) \ - .binfile_name("sparse_apply_adagrad.so") \ + .binfile_name("sparse_apply_adagrad_d.so") \ .compute_cost(10) \ - .kernel_name("sparse_apply_adagrad") \ + .kernel_name("sparse_apply_adagrad_d") \ .partial_flag(True) \ .attr("lr", "required", "float", "all") \ .attr("update_slots", "optional", "bool", "all") \ @@ -31,14 +31,17 @@ sparse_apply_adagrad_op_info = TBERegOp("SparseApplyAdagrad") \ .input(2, "grad", False, "required", "all") \ .input(3, "indices", False, "required", "all") \ .output(0, "var", False, "required", "all") \ - .dtype_format(DataType.F32_NCHW, DataType.F32_NCHW, DataType.F32_NCHW, DataType.I32_NCHW, DataType.F32_NCHW) \ - .dtype_format(DataType.F32_NHWC, DataType.F32_NHWC, DataType.F32_NHWC, DataType.I32_NHWC, DataType.F32_NHWC) \ + .output(1, "accum", False, "required", "all") \ + .dtype_format(DataType.F32_NCHW, DataType.F32_NCHW, DataType.F32_NCHW, DataType.I32_NCHW, + DataType.F32_NCHW, DataType.F32_NCHW) \ + .dtype_format(DataType.F32_NHWC, DataType.F32_NHWC, DataType.F32_NHWC, DataType.I32_NHWC, + DataType.F32_NHWC, DataType.F32_NHWC) \ .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.I32_Default, - DataType.F32_Default) \ + DataType.F32_Default, DataType.F32_Default) \ .get_op_info() -@op_info_register(sparse_apply_adagrad_op_info) +@op_info_register(sparse_apply_adagrad_d_op_info) def _sparse_apply_adagrad_tbe(): - """SparseApplyAdagrad TBE register""" + """SparseApplyAdagradD TBE register""" return diff --git a/mindspore/ops/_op_impl/tbe/sparse_apply_proximal_adagrad.py b/mindspore/ops/_op_impl/tbe/sparse_apply_proximal_adagrad.py index f665890c55..782be983fa 100644 --- a/mindspore/ops/_op_impl/tbe/sparse_apply_proximal_adagrad.py +++ b/mindspore/ops/_op_impl/tbe/sparse_apply_proximal_adagrad.py @@ -13,10 +13,10 @@ # limitations under the License. # ============================================================================ -"""SparseApplyProximalAdagrad op""" +"""SparseApplyProximalAdagradD op""" from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType -sparse_apply_proximal_adagrad_op_info = TBERegOp("SparseApplyProximalAdagrad") \ +sparse_apply_proximal_adagrad_d_op_info = TBERegOp("SparseApplyProximalAdagrad") \ .fusion_type("OPAQUE") \ .async_flag(False) \ .binfile_name("sparse_apply_proximal_adagrad.so") \ @@ -32,70 +32,101 @@ sparse_apply_proximal_adagrad_op_info = TBERegOp("SparseApplyProximalAdagrad") \ .input(5, "grad", False, "required", "all") \ .input(6, "indices", False, "required", "all") \ .output(0, "var", False, "required", "all") \ + .output(1, "accum", False, "required", "all") \ .dtype_format(DataType.F32_NCHW, DataType.F32_NCHW, DataType.F32_NCHW, DataType.F32_NCHW, - DataType.F32_NCHW, DataType.F32_NCHW, DataType.I16_NCHW, DataType.F32_NCHW) \ + DataType.F32_NCHW, DataType.F32_NCHW, DataType.I16_NCHW, DataType.F32_NCHW, + DataType.F32_NCHW) \ .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, - DataType.F32_5HD, DataType.F32_5HD, DataType.I16_5HD, DataType.F32_5HD) \ + DataType.F32_5HD, DataType.F32_5HD, DataType.I16_5HD, DataType.F32_5HD, + DataType.F32_5HD) \ .dtype_format(DataType.F32_NHWC, DataType.F32_NHWC, DataType.F32_NHWC, DataType.F32_NHWC, - DataType.F32_NHWC, DataType.F32_NHWC, DataType.I16_NHWC, DataType.F32_NHWC) \ + DataType.F32_NHWC, DataType.F32_NHWC, DataType.I16_NHWC, DataType.F32_NHWC, + DataType.F32_NHWC) \ .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, - DataType.F32_Default, DataType.F32_Default, DataType.I16_Default, DataType.F32_Default) \ + DataType.F32_Default, DataType.F32_Default, DataType.I16_Default, DataType.F32_Default, + DataType.F32_Default) \ .dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ, - DataType.F32_FracZ, DataType.F32_FracZ, DataType.I16_FracZ, DataType.F32_FracZ) \ + DataType.F32_FracZ, DataType.F32_FracZ, DataType.I16_FracZ, DataType.F32_FracZ, + DataType.F32_FracZ) \ .dtype_format(DataType.F32_NCHW, DataType.F32_NCHW, DataType.F32_NCHW, DataType.F32_NCHW, - DataType.F32_NCHW, DataType.F32_NCHW, DataType.I32_NCHW, DataType.F32_NCHW) \ + DataType.F32_NCHW, DataType.F32_NCHW, DataType.I32_NCHW, DataType.F32_NCHW, + DataType.F32_NCHW) \ .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, - DataType.F32_5HD, DataType.F32_5HD, DataType.I32_5HD, DataType.F32_5HD) \ + DataType.F32_5HD, DataType.F32_5HD, DataType.I32_5HD, DataType.F32_5HD, + DataType.F32_5HD) \ .dtype_format(DataType.F32_NHWC, DataType.F32_NHWC, DataType.F32_NHWC, DataType.F32_NHWC, - DataType.F32_NHWC, DataType.F32_NHWC, DataType.I32_NHWC, DataType.F32_NHWC) \ + DataType.F32_NHWC, DataType.F32_NHWC, DataType.I32_NHWC, DataType.F32_NHWC, + DataType.F32_NHWC) \ .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, - DataType.F32_Default, DataType.F32_Default, DataType.I32_Default, DataType.F32_Default) \ + DataType.F32_Default, DataType.F32_Default, DataType.I32_Default, DataType.F32_Default, + DataType.F32_Default) \ .dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ, - DataType.F32_FracZ, DataType.F32_FracZ, DataType.I32_FracZ, DataType.F32_FracZ) \ + DataType.F32_FracZ, DataType.F32_FracZ, DataType.I32_FracZ, DataType.F32_FracZ, + DataType.F32_FracZ) \ .dtype_format(DataType.F32_NCHW, DataType.F32_NCHW, DataType.F32_NCHW, DataType.F32_NCHW, - DataType.F32_NCHW, DataType.F32_NCHW, DataType.I64_NCHW, DataType.F32_NCHW) \ + DataType.F32_NCHW, DataType.F32_NCHW, DataType.I64_NCHW, DataType.F32_NCHW, + DataType.F32_NCHW) \ .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, - DataType.F32_5HD, DataType.F32_5HD, DataType.I64_5HD, DataType.F32_5HD) \ + DataType.F32_5HD, DataType.F32_5HD, DataType.I64_5HD, DataType.F32_5HD, + DataType.F32_5HD) \ .dtype_format(DataType.F32_NHWC, DataType.F32_NHWC, DataType.F32_NHWC, DataType.F32_NHWC, - DataType.F32_NHWC, DataType.F32_NHWC, DataType.I64_NHWC, DataType.F32_NHWC) \ + DataType.F32_NHWC, DataType.F32_NHWC, DataType.I64_NHWC, DataType.F32_NHWC, + DataType.F32_NHWC) \ .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, - DataType.F32_Default, DataType.F32_Default, DataType.I64_Default, DataType.F32_Default) \ + DataType.F32_Default, DataType.F32_Default, DataType.I64_Default, DataType.F32_Default, + DataType.F32_Default) \ .dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ, - DataType.F32_FracZ, DataType.F32_FracZ, DataType.I64_FracZ, DataType.F32_FracZ) \ + DataType.F32_FracZ, DataType.F32_FracZ, DataType.I64_FracZ, DataType.F32_FracZ, + DataType.F32_FracZ) \ .dtype_format(DataType.F32_NCHW, DataType.F32_NCHW, DataType.F32_NCHW, DataType.F32_NCHW, - DataType.F32_NCHW, DataType.F32_NCHW, DataType.U16_NCHW, DataType.F32_NCHW) \ + DataType.F32_NCHW, DataType.F32_NCHW, DataType.U16_NCHW, DataType.F32_NCHW, + DataType.F32_NCHW) \ .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, - DataType.F32_5HD, DataType.F32_5HD, DataType.U16_5HD, DataType.F32_5HD) \ + DataType.F32_5HD, DataType.F32_5HD, DataType.U16_5HD, DataType.F32_5HD, + DataType.F32_5HD) \ .dtype_format(DataType.F32_NHWC, DataType.F32_NHWC, DataType.F32_NHWC, DataType.F32_NHWC, - DataType.F32_NHWC, DataType.F32_NHWC, DataType.U16_NHWC, DataType.F32_NHWC) \ + DataType.F32_NHWC, DataType.F32_NHWC, DataType.U16_NHWC, DataType.F32_NHWC, + DataType.F32_NHWC) \ .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, - DataType.F32_Default, DataType.F32_Default, DataType.U16_Default, DataType.F32_Default) \ + DataType.F32_Default, DataType.F32_Default, DataType.U16_Default, DataType.F32_Default, + DataType.F32_Default) \ .dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ, - DataType.F32_FracZ, DataType.F32_FracZ, DataType.U16_FracZ, DataType.F32_FracZ) \ + DataType.F32_FracZ, DataType.F32_FracZ, DataType.U16_FracZ, DataType.F32_FracZ, + DataType.F32_FracZ) \ .dtype_format(DataType.F32_NCHW, DataType.F32_NCHW, DataType.F32_NCHW, DataType.F32_NCHW, - DataType.F32_NCHW, DataType.F32_NCHW, DataType.U32_NCHW, DataType.F32_NCHW) \ + DataType.F32_NCHW, DataType.F32_NCHW, DataType.U32_NCHW, DataType.F32_NCHW, + DataType.F32_NCHW) \ .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, - DataType.F32_5HD, DataType.F32_5HD, DataType.U32_5HD, DataType.F32_5HD) \ + DataType.F32_5HD, DataType.F32_5HD, DataType.U32_5HD, DataType.F32_5HD, + DataType.F32_5HD) \ .dtype_format(DataType.F32_NHWC, DataType.F32_NHWC, DataType.F32_NHWC, DataType.F32_NHWC, - DataType.F32_NHWC, DataType.F32_NHWC, DataType.U32_NHWC, DataType.F32_NHWC) \ + DataType.F32_NHWC, DataType.F32_NHWC, DataType.U32_NHWC, DataType.F32_NHWC, + DataType.F32_NHWC) \ .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, - DataType.F32_Default, DataType.F32_Default, DataType.U32_Default, DataType.F32_Default) \ + DataType.F32_Default, DataType.F32_Default, DataType.U32_Default, DataType.F32_Default, + DataType.F32_Default) \ .dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ, - DataType.F32_FracZ, DataType.F32_FracZ, DataType.U32_FracZ, DataType.F32_FracZ) \ + DataType.F32_FracZ, DataType.F32_FracZ, DataType.U32_FracZ, DataType.F32_FracZ, + DataType.F32_FracZ) \ .dtype_format(DataType.F32_NCHW, DataType.F32_NCHW, DataType.F32_NCHW, DataType.F32_NCHW, - DataType.F32_NCHW, DataType.F32_NCHW, DataType.U64_NCHW, DataType.F32_NCHW) \ + DataType.F32_NCHW, DataType.F32_NCHW, DataType.U64_NCHW, DataType.F32_NCHW, + DataType.F32_NCHW) \ .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, - DataType.F32_5HD, DataType.F32_5HD, DataType.U64_5HD, DataType.F32_5HD) \ + DataType.F32_5HD, DataType.F32_5HD, DataType.U64_5HD, DataType.F32_5HD, + DataType.F32_5HD) \ .dtype_format(DataType.F32_NHWC, DataType.F32_NHWC, DataType.F32_NHWC, DataType.F32_NHWC, - DataType.F32_NHWC, DataType.F32_NHWC, DataType.U64_NHWC, DataType.F32_NHWC) \ + DataType.F32_NHWC, DataType.F32_NHWC, DataType.U64_NHWC, DataType.F32_NHWC, + DataType.F32_NHWC) \ .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, - DataType.F32_Default, DataType.F32_Default, DataType.U64_Default, DataType.F32_Default) \ + DataType.F32_Default, DataType.F32_Default, DataType.U64_Default, DataType.F32_Default, + DataType.F32_Default) \ .dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ, - DataType.F32_FracZ, DataType.F32_FracZ, DataType.U64_FracZ, DataType.F32_FracZ) \ + DataType.F32_FracZ, DataType.F32_FracZ, DataType.U64_FracZ, DataType.F32_FracZ, + DataType.F32_FracZ) \ .get_op_info() -@op_info_register(sparse_apply_proximal_adagrad_op_info) +@op_info_register(sparse_apply_proximal_adagrad_d_op_info) def _sparse_apply_proximal_adagrad(): - """SparseApplyProximalAdagrad TBE register""" + """SparseApplyProximalAdagradD TBE register""" return diff --git a/mindspore/ops/_op_impl/tbe/sparse_gather_v2.py b/mindspore/ops/_op_impl/tbe/sparse_gather_v2.py new file mode 100644 index 0000000000..b824836312 --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/sparse_gather_v2.py @@ -0,0 +1,66 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""SparseGatherV2 op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +sparse_gather_v2_op_info = TBERegOp("SparseGatherV2") \ + .fusion_type("OPAQUE") \ + .async_flag(False) \ + .binfile_name("gather_v2_d.so") \ + .compute_cost(10) \ + .kernel_name("gather_v2_d") \ + .partial_flag(True) \ + .attr("axis", "optional", "int", "all") \ + .input(0, "x", False, "required", "all") \ + .input(1, "indices", False, "required", "all") \ + .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.I8_Default, DataType.I32_Default, DataType.I8_Default) \ + .dtype_format(DataType.I8_Default, DataType.I64_Default, DataType.I8_Default) \ + .dtype_format(DataType.I8_5HD, DataType.I32_5HD, DataType.I8_5HD) \ + .dtype_format(DataType.I8_5HD, DataType.I64_5HD, DataType.I8_5HD) \ + .dtype_format(DataType.I8_FracZ, DataType.I32_FracZ, DataType.I8_FracZ) \ + .dtype_format(DataType.I8_FracZ, DataType.I64_FracZ, DataType.I8_FracZ) \ + .dtype_format(DataType.U8_Default, DataType.I32_Default, DataType.U8_Default) \ + .dtype_format(DataType.U8_Default, DataType.I64_Default, DataType.U8_Default) \ + .dtype_format(DataType.U8_5HD, DataType.I32_5HD, DataType.U8_5HD) \ + .dtype_format(DataType.U8_5HD, DataType.I64_5HD, DataType.U8_5HD) \ + .dtype_format(DataType.U8_FracZ, DataType.I32_FracZ, DataType.U8_FracZ) \ + .dtype_format(DataType.U8_FracZ, DataType.I64_FracZ, DataType.U8_FracZ) \ + .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \ + .dtype_format(DataType.I32_Default, DataType.I64_Default, DataType.I32_Default) \ + .dtype_format(DataType.I32_5HD, DataType.I32_5HD, DataType.I32_5HD) \ + .dtype_format(DataType.I32_5HD, DataType.I64_5HD, DataType.I32_5HD) \ + .dtype_format(DataType.I32_FracZ, DataType.I32_FracZ, DataType.I32_FracZ) \ + .dtype_format(DataType.I32_FracZ, DataType.I64_FracZ, DataType.I32_FracZ) \ + .dtype_format(DataType.F16_Default, DataType.I32_Default, DataType.F16_Default) \ + .dtype_format(DataType.F16_Default, DataType.I64_Default, DataType.F16_Default) \ + .dtype_format(DataType.F16_5HD, DataType.I32_5HD, DataType.F16_5HD) \ + .dtype_format(DataType.F16_5HD, DataType.I64_5HD, DataType.F16_5HD) \ + .dtype_format(DataType.F16_FracZ, DataType.I32_FracZ, DataType.F16_FracZ) \ + .dtype_format(DataType.F16_FracZ, DataType.I64_FracZ, DataType.F16_FracZ) \ + .dtype_format(DataType.F32_Default, DataType.I32_Default, DataType.F32_Default) \ + .dtype_format(DataType.F32_Default, DataType.I64_Default, DataType.F32_Default) \ + .dtype_format(DataType.F32_5HD, DataType.I32_5HD, DataType.F32_5HD) \ + .dtype_format(DataType.F32_5HD, DataType.I64_5HD, DataType.F32_5HD) \ + .dtype_format(DataType.F32_FracZ, DataType.I32_FracZ, DataType.F32_FracZ) \ + .dtype_format(DataType.F32_FracZ, DataType.I64_FracZ, DataType.F32_FracZ) \ + .get_op_info() + + +@op_info_register(sparse_gather_v2_op_info) +def _sparse_gather_v2_tbe(): + """SparseGatherV2 TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/split_d.py b/mindspore/ops/_op_impl/tbe/split_d.py index dcc8219fd4..d2faf31096 100644 --- a/mindspore/ops/_op_impl/tbe/split_d.py +++ b/mindspore/ops/_op_impl/tbe/split_d.py @@ -27,6 +27,7 @@ split_d_op_info = TBERegOp("Split") \ .attr("output_num", "required", "int", "all") \ .input(0, "value", False, "required", "all") \ .output(0, "output", False, "dynamic", "all") \ + .op_pattern("dynamicFormat") \ .dtype_format(DataType.BOOL_Default, DataType.BOOL_Default) \ .dtype_format(DataType.BOOL_NHWC, DataType.BOOL_NHWC) \ .dtype_format(DataType.I8_Default, DataType.I8_Default) \ diff --git a/mindspore/ops/_op_impl/tbe/splitv.py b/mindspore/ops/_op_impl/tbe/splitv.py new file mode 100644 index 0000000000..29f65c7e87 --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/splitv.py @@ -0,0 +1,60 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""SplitV op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +split_v_op_info = TBERegOp("SplitV") \ + .fusion_type("ELEMWISE") \ + .async_flag(False) \ + .binfile_name("split_v_d.so") \ + .compute_cost(10) \ + .kernel_name("split_v_d") \ + .partial_flag(True) \ + .attr("size_splits", "required", "listInt", "all") \ + .attr("split_dim", "required", "int", "all") \ + .attr("num_split", "required", "int", "all") \ + .input(0, "x", False, "required", "all") \ + .output(0, "y", False, "dynamic", "all") \ + .op_pattern("dynamicFormat") \ + .dtype_format(DataType.BOOL_Default, DataType.BOOL_Default) \ + .dtype_format(DataType.BOOL_NHWC, DataType.BOOL_NHWC) \ + .dtype_format(DataType.I8_Default, DataType.I8_Default) \ + .dtype_format(DataType.I8_NHWC, DataType.I8_NHWC) \ + .dtype_format(DataType.U8_Default, DataType.U8_Default) \ + .dtype_format(DataType.U8_NHWC, DataType.U8_NHWC) \ + .dtype_format(DataType.I16_Default, DataType.I16_Default) \ + .dtype_format(DataType.I16_NHWC, DataType.I16_NHWC) \ + .dtype_format(DataType.U16_Default, DataType.U16_Default) \ + .dtype_format(DataType.U16_NHWC, DataType.U16_NHWC) \ + .dtype_format(DataType.I32_Default, DataType.I32_Default) \ + .dtype_format(DataType.I32_NHWC, DataType.I32_NHWC) \ + .dtype_format(DataType.U32_Default, DataType.U32_Default) \ + .dtype_format(DataType.U32_NHWC, DataType.U32_NHWC) \ + .dtype_format(DataType.I64_Default, DataType.I64_Default) \ + .dtype_format(DataType.I64_NHWC, DataType.I64_NHWC) \ + .dtype_format(DataType.U64_Default, DataType.U64_Default) \ + .dtype_format(DataType.U64_NHWC, DataType.U64_NHWC) \ + .dtype_format(DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.F16_NHWC, DataType.F16_NHWC) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default) \ + .dtype_format(DataType.F32_NHWC, DataType.F32_NHWC) \ + .get_op_info() + + +@op_info_register(split_v_op_info) +def _split_v_tbe(): + """SplitV TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/strided_read.py b/mindspore/ops/_op_impl/tbe/strided_read.py new file mode 100644 index 0000000000..1ebd29f8f2 --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/strided_read.py @@ -0,0 +1,38 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""StridedRead op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +strided_read_op_info = TBERegOp("StridedRead") \ + .fusion_type("OPAQUE") \ + .async_flag(False) \ + .binfile_name("strided_read.so") \ + .compute_cost(10) \ + .kernel_name("strided_read") \ + .partial_flag(True) \ + .attr("axis", "required", "int", "all") \ + .attr("stride", "required", "int", "all") \ + .input(0, "x", False, "required", "all") \ + .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.F16_5HD, DataType.F16_5HD) \ + .dtype_format(DataType.I8_5HD, DataType.I8_5HD) \ + .get_op_info() + + +@op_info_register(strided_read_op_info) +def _strided_read_tbe(): + """StridedRead TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/strided_write.py b/mindspore/ops/_op_impl/tbe/strided_write.py new file mode 100644 index 0000000000..feda752b28 --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/strided_write.py @@ -0,0 +1,38 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""StridedWrite op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +strided_write_op_info = TBERegOp("StridedWrite") \ + .fusion_type("OPAQUE") \ + .async_flag(False) \ + .binfile_name("strided_write.so") \ + .compute_cost(10) \ + .kernel_name("strided_write") \ + .partial_flag(True) \ + .attr("axis", "required", "int", "all") \ + .attr("stride", "required", "int", "all") \ + .input(0, "x", False, "required", "all") \ + .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.F16_5HD, DataType.F16_5HD) \ + .dtype_format(DataType.I8_5HD, DataType.I8_5HD) \ + .get_op_info() + + +@op_info_register(strided_write_op_info) +def _strided_write_tbe(): + """StridedWrite TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/tensor_add.py b/mindspore/ops/_op_impl/tbe/tensor_add.py index 255c1b1278..a1f21bee77 100644 --- a/mindspore/ops/_op_impl/tbe/tensor_add.py +++ b/mindspore/ops/_op_impl/tbe/tensor_add.py @@ -26,6 +26,7 @@ tensor_add_op_info = TBERegOp("TensorAdd") \ .input(0, "x1", False, "required", "all") \ .input(1, "x2", False, "required", "all") \ .output(0, "y", False, "required", "all") \ + .op_pattern("dynamicFormat") \ .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \ .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \ diff --git a/mindspore/ops/_op_impl/tbe/tensor_scatter_update.py b/mindspore/ops/_op_impl/tbe/tensor_scatter_update.py new file mode 100644 index 0000000000..46d6b20357 --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/tensor_scatter_update.py @@ -0,0 +1,41 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""TensorScatterUpdate op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +tensor_scatter_update_op_info = TBERegOp("TensorScatterUpdate") \ + .fusion_type("ELEMWISE") \ + .async_flag(False) \ + .binfile_name("tensor_scatter_update.so") \ + .compute_cost(10) \ + .kernel_name("tensor_scatter_update") \ + .partial_flag(True) \ + .input(0, "x", False, "required", "all") \ + .input(1, "indices", False, "required", "all") \ + .input(1, "updates", False, "required", "all") \ + .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.F16_Default, DataType.I32_Default, DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.F32_Default, DataType.I32_Default, DataType.F32_Default, DataType.F32_Default) \ + .dtype_format(DataType.I8_Default, DataType.I32_Default, DataType.I8_Default, DataType.I8_Default) \ + .dtype_format(DataType.U8_Default, DataType.I32_Default, DataType.U8_Default, DataType.U8_Default) \ + .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \ + .get_op_info() + + +@op_info_register(tensor_scatter_update_op_info) +def _tensor_scatter_update_tbe(): + """TensorScatterUpdate TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/trans_data.py b/mindspore/ops/_op_impl/tbe/trans_data.py index f961491b37..c0cce302cd 100644 --- a/mindspore/ops/_op_impl/tbe/trans_data.py +++ b/mindspore/ops/_op_impl/tbe/trans_data.py @@ -23,41 +23,112 @@ trans_data_op_info = TBERegOp("TransData") \ .compute_cost(10) \ .kernel_name("trans_data") \ .partial_flag(True) \ - .attr("src_format", "required", "str", "DefaultFormat,NC1HWC0,FracZ,FRACTAL_NZ,HWCN,C1HWNCoC0")\ - .attr("dst_format", "required", "str", "DefaultFormat,NC1HWC0,FracZ,FRACTAL_NZ,HWCN,C1HWNCoC0")\ + .attr("src_format", "required", "str", "DefaultFormat, NC1HWC0, FracZ, FRACTAL_NZ, HWCN, C1HWNCoC0, NDHWC, NHWC") \ + .attr("dst_format", "required", "str", "DefaultFormat, NC1HWC0, FracZ, FRACTAL_NZ, HWCN, C1HWNCoC0, NDHWC, NHWC") \ .input(0, "src", False, "required", "all") \ .output(0, "dst", False, "required", "all") \ - .dtype_format(DataType.U16_Default, DataType.U16_5HD) \ - .dtype_format(DataType.U16_Default, DataType.U16_FracZ) \ - .dtype_format(DataType.U16_Default, DataType.U16_FracNZ) \ - .dtype_format(DataType.U16_FracZ, DataType.U16_Default) \ - .dtype_format(DataType.U16_FracZ, DataType.U16_HWCN) \ - .dtype_format(DataType.U16_FracNZ, DataType.U16_Default) \ - .dtype_format(DataType.U16_5HD, DataType.U16_Default) \ - .dtype_format(DataType.U16_HWCN, DataType.U16_FracZ) \ - .dtype_format(DataType.U16_HWCN, DataType.U16_C1HWNCoC0) \ - .dtype_format(DataType.U16_C1HWNCoC0, DataType.U16_HWCN) \ - .dtype_format(DataType.BOOL_Default, DataType.BOOL_5HD) \ - .dtype_format(DataType.F16_Default, DataType.F16_5HD) \ + .dtype_format(DataType.F32_NHWC, DataType.F32_5HD) \ + .dtype_format(DataType.F32_Default, DataType.F32_5HD) \ + .dtype_format(DataType.F32_5HD, DataType.F32_NHWC) \ + .dtype_format(DataType.F32_5HD, DataType.F32_Default) \ + .dtype_format(DataType.F32_FracZ, DataType.F32_Default) \ + .dtype_format(DataType.F32_Default, DataType.F32_FracZ) \ + .dtype_format(DataType.F32_HWCN, DataType.F32_FracZ) \ + .dtype_format(DataType.F32_FracZ, DataType.F32_HWCN) \ + .dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_HWCN) \ + .dtype_format(DataType.F32_HWCN, DataType.F32_C1HWNCoC0) \ .dtype_format(DataType.F16_Default, DataType.F16_FracZ) \ - .dtype_format(DataType.F16_Default, DataType.F16_FracNZ) \ - .dtype_format(DataType.F16_FracZ, DataType.F16_Default) \ - .dtype_format(DataType.F16_FracZ, DataType.F16_HWCN) \ - .dtype_format(DataType.F16_FracNZ, DataType.F16_Default) \ + .dtype_format(DataType.F16_NHWC, DataType.F16_FracZ) \ + .dtype_format(DataType.F16_HWCN, DataType.F16_FracZ) \ + .dtype_format(DataType.F16_Default, DataType.F16_5HD) \ + .dtype_format(DataType.F16_NHWC, DataType.F16_5HD) \ + .dtype_format(DataType.F16_HWCN, DataType.F16_5HD) \ + .dtype_format(DataType.F16_5HD, DataType.F16_NHWC) \ .dtype_format(DataType.F16_5HD, DataType.F16_Default) \ + .dtype_format(DataType.F16_FracZ, DataType.F16_Default) \ + .dtype_format(DataType.F16_Default, DataType.F16_FracZ) \ .dtype_format(DataType.F16_HWCN, DataType.F16_FracZ) \ - .dtype_format(DataType.F16_HWCN, DataType.F16_C1HWNCoC0) \ + .dtype_format(DataType.F16_FracZ, DataType.F16_HWCN) \ .dtype_format(DataType.F16_C1HWNCoC0, DataType.F16_HWCN) \ - .dtype_format(DataType.F32_Default, DataType.F32_5HD) \ - .dtype_format(DataType.F32_Default, DataType.F32_FracZ) \ + .dtype_format(DataType.F16_HWCN, DataType.F16_5HD) \ + .dtype_format(DataType.F16_Default, DataType.F16_FracNZ) \ .dtype_format(DataType.F32_Default, DataType.F32_FracNZ) \ - .dtype_format(DataType.F32_FracZ, DataType.F32_Default) \ - .dtype_format(DataType.F32_FracZ, DataType.F32_HWCN) \ + .dtype_format(DataType.F16_FracNZ, DataType.F16_Default) \ .dtype_format(DataType.F32_FracNZ, DataType.F32_Default) \ - .dtype_format(DataType.F32_5HD, DataType.F32_Default) \ - .dtype_format(DataType.F32_HWCN, DataType.F32_FracZ) \ - .dtype_format(DataType.F32_HWCN, DataType.F32_C1HWNCoC0) \ - .dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_HWCN) \ + .dtype_format(DataType.BOOL_NHWC, DataType.BOOL_5HD) \ + .dtype_format(DataType.BOOL_Default, DataType.BOOL_5HD) \ + .dtype_format(DataType.BOOL_5HD, DataType.BOOL_NHWC) \ + .dtype_format(DataType.BOOL_5HD, DataType.BOOL_Default) \ + .dtype_format(DataType.F16_Default, DataType.F16_NHWC) \ + .dtype_format(DataType.F16_Default, DataType.F16_HWCN) \ + .dtype_format(DataType.F16_NHWC, DataType.F16_Default) \ + .dtype_format(DataType.F16_NHWC, DataType.F16_HWCN) \ + .dtype_format(DataType.F16_HWCN, DataType.F16_Default) \ + .dtype_format(DataType.F16_HWCN, DataType.F16_NHWC) \ + .dtype_format(DataType.F32_Default, DataType.F32_NHWC) \ + .dtype_format(DataType.F32_Default, DataType.F32_HWCN) \ + .dtype_format(DataType.F32_NHWC, DataType.F32_Default) \ + .dtype_format(DataType.F32_NHWC, DataType.F32_HWCN) \ + .dtype_format(DataType.F32_HWCN, DataType.F32_Default) \ + .dtype_format(DataType.F32_HWCN, DataType.F32_NHWC) \ + .dtype_format(DataType.I8_Default, DataType.I8_FracNZ) \ + .dtype_format(DataType.I8_Default, DataType.I8_FracZ) \ + .dtype_format(DataType.I8_Default, DataType.I8_NHWC) \ + .dtype_format(DataType.I8_Default, DataType.I8_HWCN) \ + .dtype_format(DataType.I8_NHWC, DataType.I8_Default) \ + .dtype_format(DataType.I8_NHWC, DataType.I8_HWCN) \ + .dtype_format(DataType.I8_HWCN, DataType.I8_Default) \ + .dtype_format(DataType.I8_HWCN, DataType.I8_NHWC) \ + .dtype_format(DataType.I16_Default, DataType.I16_NHWC) \ + .dtype_format(DataType.I16_Default, DataType.I16_HWCN) \ + .dtype_format(DataType.I16_NHWC, DataType.I16_Default) \ + .dtype_format(DataType.I16_NHWC, DataType.I16_HWCN) \ + .dtype_format(DataType.I16_HWCN, DataType.I16_Default) \ + .dtype_format(DataType.I16_HWCN, DataType.I16_NHWC) \ + .dtype_format(DataType.I32_Default, DataType.I32_NHWC) \ + .dtype_format(DataType.I32_Default, DataType.I32_HWCN) \ + .dtype_format(DataType.I32_NHWC, DataType.I32_Default) \ + .dtype_format(DataType.I32_NHWC, DataType.I32_HWCN) \ + .dtype_format(DataType.I32_HWCN, DataType.I32_Default) \ + .dtype_format(DataType.I32_HWCN, DataType.I32_NHWC) \ + .dtype_format(DataType.I64_Default, DataType.I64_NHWC) \ + .dtype_format(DataType.I64_Default, DataType.I64_HWCN) \ + .dtype_format(DataType.I64_NHWC, DataType.I64_Default) \ + .dtype_format(DataType.I64_NHWC, DataType.I64_HWCN) \ + .dtype_format(DataType.I64_HWCN, DataType.I64_Default) \ + .dtype_format(DataType.I64_HWCN, DataType.I64_NHWC) \ + .dtype_format(DataType.U8_Default, DataType.U8_NHWC) \ + .dtype_format(DataType.U8_Default, DataType.U8_HWCN) \ + .dtype_format(DataType.U8_NHWC, DataType.U8_Default) \ + .dtype_format(DataType.U8_NHWC, DataType.U8_HWCN) \ + .dtype_format(DataType.U8_HWCN, DataType.U8_Default) \ + .dtype_format(DataType.U8_HWCN, DataType.U8_NHWC) \ + .dtype_format(DataType.U16_Default, DataType.U16_NHWC) \ + .dtype_format(DataType.U16_Default, DataType.U16_HWCN) \ + .dtype_format(DataType.U16_NHWC, DataType.U16_Default) \ + .dtype_format(DataType.U16_NHWC, DataType.U16_HWCN) \ + .dtype_format(DataType.U16_HWCN, DataType.U16_Default) \ + .dtype_format(DataType.U16_HWCN, DataType.U16_NHWC) \ + .dtype_format(DataType.U32_Default, DataType.U32_NHWC) \ + .dtype_format(DataType.U32_Default, DataType.U32_HWCN) \ + .dtype_format(DataType.U32_NHWC, DataType.U32_Default) \ + .dtype_format(DataType.U32_NHWC, DataType.U32_HWCN) \ + .dtype_format(DataType.U32_HWCN, DataType.U32_Default) \ + .dtype_format(DataType.U32_HWCN, DataType.U32_NHWC) \ + .dtype_format(DataType.U64_Default, DataType.U64_NHWC) \ + .dtype_format(DataType.U64_Default, DataType.U64_HWCN) \ + .dtype_format(DataType.U64_NHWC, DataType.U64_Default) \ + .dtype_format(DataType.U64_NHWC, DataType.U64_HWCN) \ + .dtype_format(DataType.U64_HWCN, DataType.U64_Default) \ + .dtype_format(DataType.U64_HWCN, DataType.U64_NHWC) \ + .dtype_format(DataType.I32_FracNZ, DataType.I32_Default) \ + .dtype_format(DataType.F16_NDHWC, DataType.F16_5HD) \ + .dtype_format(DataType.F16_5HD, DataType.F16_NDHWC) \ + .dtype_format(DataType.I8_HWCN, DataType.I8_C1HWNCoC0) \ + .dtype_format(DataType.F16_HWCN, DataType.F16_FracZ) \ + .dtype_format(DataType.F16_FracZ, DataType.F16_HWCN) \ + .dtype_format(DataType.F16_HWCN, DataType.F16_FracNZ) \ + .dtype_format(DataType.F32_HWCN, DataType.F16_FracNZ) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/unsorted_segment_sum.py b/mindspore/ops/_op_impl/tbe/unsorted_segment_sum.py index 5dc07dd59f..b1f81b72b0 100644 --- a/mindspore/ops/_op_impl/tbe/unsorted_segment_sum.py +++ b/mindspore/ops/_op_impl/tbe/unsorted_segment_sum.py @@ -27,6 +27,7 @@ unsorted_segment_sum_op_info = TBERegOp("UnsortedSegmentSum") \ .input(0, "x", False, "required", "all") \ .input(1, "segment_ids", False, "required", "all") \ .output(0, "y", False, "required", "all") \ + .op_pattern("dynamicFormat") \ .dtype_format(DataType.I8_Default, DataType.I32_Default, DataType.I8_Default) \ .dtype_format(DataType.I8_5HD, DataType.I32_5HD, DataType.I8_5HD) \ .dtype_format(DataType.U8_Default, DataType.I32_Default, DataType.U8_Default) \ diff --git a/mindspore/ops/_op_impl/tbe/zeros_like.py b/mindspore/ops/_op_impl/tbe/zeros_like.py index 144b0c95cb..7e15a19996 100644 --- a/mindspore/ops/_op_impl/tbe/zeros_like.py +++ b/mindspore/ops/_op_impl/tbe/zeros_like.py @@ -25,18 +25,13 @@ zeros_like_op_info = TBERegOp("ZerosLike") \ .partial_flag(True) \ .input(0, "x", False, "required", "all") \ .output(0, "y", False, "required", "all") \ - .dtype_format(DataType.BOOL_Default, DataType.BOOL_Default) \ - .dtype_format(DataType.BOOL_5HD, DataType.BOOL_5HD) \ - .dtype_format(DataType.I8_Default, DataType.I8_Default) \ - .dtype_format(DataType.I8_5HD, DataType.I8_5HD) \ - .dtype_format(DataType.U8_Default, DataType.U8_Default) \ - .dtype_format(DataType.U8_5HD, DataType.U8_5HD) \ - .dtype_format(DataType.I32_Default, DataType.I32_Default) \ - .dtype_format(DataType.I32_5HD, DataType.I32_5HD) \ - .dtype_format(DataType.F16_Default, DataType.F16_Default) \ - .dtype_format(DataType.F16_5HD, DataType.F16_5HD) \ - .dtype_format(DataType.F32_Default, DataType.F32_Default) \ - .dtype_format(DataType.F32_5HD, DataType.F32_5HD) \ + .op_pattern("formatAgnostic") \ + .dtype_format(DataType.BOOL_None, DataType.BOOL_None) \ + .dtype_format(DataType.I8_None, DataType.I8_None) \ + .dtype_format(DataType.U8_None, DataType.U8_None) \ + .dtype_format(DataType.I32_None, DataType.I32_None) \ + .dtype_format(DataType.F16_None, DataType.F16_None) \ + .dtype_format(DataType.F32_None, DataType.F32_None) \ .get_op_info() diff --git a/mindspore/ops/_selected_grad_ops.py b/mindspore/ops/_selected_grad_ops.py new file mode 100644 index 0000000000..5da1d53abf --- /dev/null +++ b/mindspore/ops/_selected_grad_ops.py @@ -0,0 +1,50 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +""" resolved grad ops """ +from mindspore.ops.op_selector import new_ops_selector + +op_selector = new_ops_selector( + "mindspore.ops.operations._grad_ops", "mindspore.nn.graph_kernels") + + +@op_selector +class MaximumGrad: + def __call__(self, *args): + pass + + +@op_selector +class MinimumGrad: + def __call__(self, *args): + pass + + +@op_selector +class AbsGrad: + def __call__(self, *args): + pass + + +@op_selector +class BiasAddGrad: + def __call__(self, *args): + pass + + +@op_selector +class TanhGrad: + def __call__(self, *args): + pass diff --git a/mindspore/ops/_selected_ops.py b/mindspore/ops/_selected_ops.py new file mode 100644 index 0000000000..5e125025c9 --- /dev/null +++ b/mindspore/ops/_selected_ops.py @@ -0,0 +1,108 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +""" resolve ops """ +from mindspore.ops.op_selector import new_ops_selector + +op_selector = new_ops_selector( + "mindspore.ops.operations", "mindspore.nn.graph_kernels") +opt_selector = new_ops_selector( + "mindspore.nn.optim", "mindspore.nn.graph_kernels") +nn_selector = new_ops_selector( + "mindspore.nn", "mindspore.nn.graph_kernels") + + +@nn_selector +class BatchNorm2d: + def __call__(self, *args): + pass + + +@op_selector +class ReLU: + def __call__(self, *args): + pass + + +@op_selector +class ReduceMean: + def __call__(self, *args): + pass + + +@op_selector +class BiasAdd: + def __call__(self, *args): + pass + + +@op_selector +class FusedBatchNorm: + def __call__(self, *args): + pass + + +@op_selector +class ApplyMomentum: + def __call__(self, *args): + pass + + +@op_selector +class SoftmaxCrossEntropyWithLogits: + def __call__(self, *args): + pass + + +@op_selector +class LogSoftmax: + def __call__(self, *args): + pass + + +@op_selector +class Tanh: + def __call__(self, *args): + pass + + +@op_selector +class Gelu: + def __call__(self, *args): + pass + + +@op_selector +class LayerNorm: + def __call__(self, *args): + pass + + +@op_selector +class Softmax: + def __call__(self, *args): + pass + + +@op_selector +class LambUpdateWithLR: + def __call__(self, *args): + pass + + +@op_selector +class LambNextMV: + def __call__(self, *args): + pass diff --git a/mindspore/ops/composite/__init__.py b/mindspore/ops/composite/__init__.py index e4c6e35d3a..a531503d94 100644 --- a/mindspore/ops/composite/__init__.py +++ b/mindspore/ops/composite/__init__.py @@ -20,7 +20,7 @@ Pre-defined combination of operators. """ -from .base import GradOperation, HyperMap, MultitypeFuncGraph, add_flags, \ +from .base import GradOperation, HyperMap, Map, MultitypeFuncGraph, add_flags, \ grad, grad_all, grad_all_with_sens, grad_by_list, grad_by_list_with_sens, grad_with_sens, \ core, env_get, tail, zip_operation from .clip_ops import clip_by_value diff --git a/mindspore/ops/composite/base.py b/mindspore/ops/composite/base.py index 4306e0c8cb..e283867684 100644 --- a/mindspore/ops/composite/base.py +++ b/mindspore/ops/composite/base.py @@ -18,15 +18,16 @@ """Basic composite operations.""" from functools import partial -from ..._c_expression import EnvInstance_, GradOperation_, HyperMap_, MultitypeFuncGraph_, Tail_, TensorSlice_, \ +from mindspore import context +from ..._c_expression import EnvInstance_, GradOperation_, HyperMap_, Map_, MultitypeFuncGraph_, Tail_, \ TupleAdd_, TupleSlice_, UnpackCall_, ZipOperation_, ListAppend_, TupleGetItemTensor_ from ...common import dtype as mstype -from ...common.api import ms_function +from ...common.api import ms_function, _pynative_exec, _wrap_func from .. import functional as F -from .. import operations as P from ...common.parameter import Parameter -__all__ = [EnvInstance_, TensorSlice_, TupleAdd_, TupleSlice_, UnpackCall_, TupleGetItemTensor_] + +__all__ = [EnvInstance_, TupleAdd_, TupleSlice_, UnpackCall_, TupleGetItemTensor_] def add_flags(fn, **flags): @@ -105,14 +106,35 @@ class GradOperation(GradOperation_): GradOperation_.__init__(self, name, get_all, get_by_list, sens_param) self.grad_fn = None self.fn = None + self.need_forward = False def __call__(self, fn, weights=None): grad_ = GradOperation('grad', self.get_all, self.get_by_list, self.sens_param) if self.grad_fn is None or self.fn != fn: if self.get_by_list: - @ms_function(obj=fn) - def after_grad(*args): - return grad_(fn, weights)(*args) + if context.get_context("mode") == context.GRAPH_MODE: + @ms_function(obj=fn) + def after_grad(*args): + return grad_(fn, weights)(*args) + else: + @_wrap_func + def after_grad(*args): + if fn.is_run and not fn.requires_grad: + raise ValueError("obj must set_grad.") + if not fn.is_run: + self.need_forward = True + print("already has forward run before grad by user") + if self.need_forward: + fn.set_grad() + if self.sens_param: + f_args = args[:-1] + fn(*f_args) + else: + fn(*args) + _pynative_exec.grad(grad_, fn, weights, *args) + out = _pynative_exec(*args) + _pynative_exec.clear() + return out else: @ms_function(obj=fn) def after_grad(*args): @@ -219,6 +241,45 @@ class HyperMap(HyperMap_): return func(*args_list) return tuple(map(hypermap, *args_list)) + +class Map(Map_): + """ + Map will apply the set operation on input sequences. + + Which will apply the operations of every elements of the sequence. + + Args: + ops (Union[MultitypeFuncGraph, None]): `ops` is the operation to apply. If `ops` is `None`, + the operations should be putted in the first input of the instance. + + Inputs: + - **args** (Tuple[sequence]) - If `ops` is not `None`, all the inputs should be the same length sequences, + and each row of the sequences. e.g. If args length is 2, and for `i` in length of each sequence + `(args[0][i], args[1][i])` will be the input of the operation. + + If `ops` is not `None`, the first input is the operation, and the other is inputs. + + Outputs: + sequence, the output will be same type and same length of sequence from input and the value of each element + is the result of operation apply each row of element. e.g. `operation(args[0][i], args[1][i])`. + """ + + def __init__(self, ops=None): + self.ops = ops + if ops: + Map_.__init__(self, ops) + else: + Map_.__init__(self) + + def __call__(self, *args): + func = self.ops + args_list = args + if self.ops is None: + func = args[0] + args_list = args[1:] + return tuple(map(func, *args_list)) + + class _ListAppend(ListAppend_): """ A metafuncgraph class that append one element to list. @@ -274,33 +335,4 @@ env_get = MultitypeFuncGraph("env_get") @env_get.register("EnvType", "Tensor") def _tensor_env_get(env, parameter): """Used to get env.""" - return F.env_getitem(env, F.ref_to_embed(parameter), F.zeros_like_tensor(parameter)) - - -_mp_cast_helper = MultitypeFuncGraph('mixed_precision_cast_helper') - - -@_mp_cast_helper.register("TypeType", "Number") -@core -def _mixed_precision_cast_helper_1(type_, x): - """if x is float cast to type.""" - # type_ is place holder - return x - - -@_mp_cast_helper.register("TypeType", "Tensor") -@core -def _mixed_precision_cast_helper_2(type_, x): - """if x is float cast to type.""" - if F.issubclass_(F.dtype(x), mstype.float_): - return P.Cast()(x, type_) - return x - -@_mp_cast_helper.register("TypeType", "Tuple") -@core -def _mixed_precision_cast_helper_3(type_, x): - """if x is a tuple""" - t = () - for item in x: - t = t + (_mp_cast_helper(type_, item),) - return t + return F.env_getitem(env, F.ref_to_embed(parameter), F.zeros_like(parameter)) diff --git a/mindspore/ops/composite/multitype_ops/_compile_utils.py b/mindspore/ops/composite/multitype_ops/_compile_utils.py index 8954470b76..906d74948a 100644 --- a/mindspore/ops/composite/multitype_ops/_compile_utils.py +++ b/mindspore/ops/composite/multitype_ops/_compile_utils.py @@ -18,13 +18,15 @@ from . import _constexpr_utils as const_utils from ... import functional as F from ... import operations as P from ...composite import base +from ....common.tensor import Tensor from ....common import dtype as mstype +from ....common._register_for_tensor import tensor_operator_registry hyper_map = base.HyperMap() pack = P.Pack(axis=-1) -def broadcast(broadcast_shape, x): +def _broadcast(broadcast_shape, x): """Broadcast tensor to the required shape.""" if F.shape(x) == broadcast_shape: return x @@ -34,13 +36,13 @@ def broadcast(broadcast_shape, x): return x -def transform_indexing_tensor(broadcast_shape, final_shape, new_shape, x): +def _transform_indexing_tensor(broadcast_shape, final_shape, new_shape, x): """Transform indexing tensor to the required.""" - x = broadcast(broadcast_shape, x) - return broadcast(final_shape, F.reshape(x, new_shape)) + x = _broadcast(broadcast_shape, x) + return _broadcast(final_shape, F.reshape(x, new_shape)) -def generate_indices_from_tuple_of_tensor(data, tuple_index, op_name): +def _generate_indices_from_tuple_of_tensor(data, tuple_index, op_name): """Generate an indices tensor from a tuple of tensor.""" indices = None check_index_tensor_number = const_utils.check_number_of_index_tensor(F.shape(data), len(tuple_index), op_name) @@ -50,26 +52,31 @@ def generate_indices_from_tuple_of_tensor(data, tuple_index, op_name): if check_dtypes: shape_tuple = hyper_map(F.shape, tuple_index) broadcast_shape = const_utils.generate_broadcast_shape(shape_tuple, op_name) - broadcast_tensors = hyper_map(F.partial(broadcast, broadcast_shape), tuple_index) + broadcast_tensors = hyper_map(F.partial(_broadcast, broadcast_shape), tuple_index) indices = pack(broadcast_tensors) return indices -def generate_indices_from_tuple_of_mixed_tensors(data, tuple_index, op_name): +def _generate_indices_from_tuple_of_mixed_tensors(data, tuple_index, op_name): """Generate an indices tensor from a tuple that contains slice, int, ellipsis, tensor.""" indexes_types = hyper_map(F.typeof, tuple_index) int_positions = const_utils.get_pos_of_int_index(indexes_types) - for i in int_positions: - tuple_index = F.tuple_setitem(tuple_index, i, F.scalar_to_tensor(tuple_index[i], mstype.int32)) - indexes_types = hyper_map(F.typeof, tuple_index) + tuple_index_new = () + tuple_len = len(tuple_index) + for i in range(tuple_len): + if i in int_positions: + tuple_index_new = tuple_index_new + (F.scalar_to_tensor(tuple_index[i], mstype.int32),) + else: + tuple_index_new = tuple_index_new + (tuple_index[i],) + indexes_types = hyper_map(F.typeof, tuple_index_new) tensor_positions, slice_positions, ellipsis_position = \ const_utils.separate_mixed_tensors_index(indexes_types, op_name) tensor_indexes = [] slice_indexes = [] for i in tensor_positions: - tensor_indexes.append(tuple_index[i]) + tensor_indexes.append(tuple_index_new[i]) for j in slice_positions: - slice_indexes.append(tuple_index[j]) + slice_indexes.append(tuple_index_new[j]) data_shape = F.shape(data) tensor_indexes_shapes = hyper_map(F.shape, tensor_indexes) tensor_indexes_dtypes = hyper_map(F.dtype, tensor_indexes) @@ -83,14 +90,14 @@ def generate_indices_from_tuple_of_mixed_tensors(data, tuple_index, op_name): slice_number = 0 final_index_tensors = [] - tuple_index_size = len(tuple_index) + tuple_index_size = len(tuple_index_new) index_tensor_new_shape = const_utils.compute_new_shape(broadcast_shape, indexes_shapes_info) for i in range(tuple_index_size): if i in tensor_positions: - transform_tensor = transform_indexing_tensor(broadcast_shape, - final_shape, - index_tensor_new_shape, - tuple_index[i]) + transform_tensor = _transform_indexing_tensor(broadcast_shape, + final_shape, + index_tensor_new_shape, + tuple_index_new[i]) final_index_tensors.append(transform_tensor) if i in slice_positions: slice_tensor = const_utils.convert_slice_to_tensor(slice_number, @@ -112,7 +119,7 @@ def generate_indices_from_tuple_of_mixed_tensors(data, tuple_index, op_name): return indices -def generate_updates_from_scalar(data, indices, value, op_type): +def _generate_updates_from_scalar(data, indices, value, op_type): """Generate an updates tensor from a scalar.""" data_shape = F.shape(data) indices_shape = F.shape(indices) @@ -120,7 +127,7 @@ def generate_updates_from_scalar(data, indices, value, op_type): return const_utils.convert_scalar_to_tensor(data_shape, data_dtype, indices_shape, value, op_type) -def generate_updates_from_tuple(data, index, value, op_type): +def _generate_updates_from_tuple(data, index, value, op_type): """Generate an updates tensor from a tuple.""" value_types = hyper_map(F.typeof, value) data_dtype = F.dtype(data) @@ -130,14 +137,14 @@ def generate_updates_from_tuple(data, index, value, op_type): shapes_same = const_utils.check_shapes_same(value_shapes, const_utils.TENSOR_SETITEM) if shapes_same: value = F.pack(value) - return generate_updates_from_tensor(data, index, value, op_type) + return _generate_updates_from_tensor(data, index, value, op_type) data_shape = F.shape(data) index_shape = F.shape(index) return const_utils.convert_tuple_of_scalar_to_tensor(data_shape, data_dtype, index_shape, value, op_type) -def generate_updates_from_tensor(data, index, value, op_type): +def _generate_updates_from_tensor(data, index, value, op_type): """Generate an updates tensor from a tensor.""" data_shape = F.shape(data) index_shape = F.shape(index) @@ -150,5 +157,410 @@ def generate_updates_from_tensor(data, index, value, op_type): updates_shape = const_utils.generate_updates_shape(data_shape, index_shape, op_type) need_broadcast = const_utils.check_two_shapes_need_broadcast(updates_shape, value_shape) if need_broadcast: - return broadcast(updates_shape, value) + return _broadcast(updates_shape, value) return value + + +def _tensor_getitem(self, index): + """Handle tensor getitem""" + if isinstance(index, Tensor): + return tensor_index_by_tensor(self, index) + if isinstance(index, tuple): + return tensor_index_by_tuple(self, index) + if isinstance(index, int): + return _tensor_index_by_integer(self, index) + if isinstance(index, slice): + return tensor_index_by_slice(self, index) + if isinstance(index, bool): + return _tensor_index_by_bool(self, index) + if index is None: + return F.expand_dims(self, 0) + if index is ...: + return self + raise IndexError(f"Only support integers, slices(`:`), ellipsis(`...`), None, bool and tensor with int32, " + f"got {index} with type {type(index)}.") + + +tensor_operator_registry.register("__getitem__", _tensor_getitem) + + +def _tensor_getitem_by_tuple_of_tensor(data, tuple_index): + """Tensor getitem by a tuple of tensor.""" + indices = _generate_indices_from_tuple_of_tensor(data, + tuple_index, + const_utils.TENSOR_GETITEM) + result = F.gather_nd(data, indices) + return result + + +def _tensor_getitem_by_tuple_of_mixed_tensors(data, tuple_index): + """Tensor getitem by a tuple of mixed tensor.""" + indices = _generate_indices_from_tuple_of_mixed_tensors(data, + tuple_index, + const_utils.TENSOR_GETITEM) + result = F.gather_nd(data, indices) + return result + + +def tensor_index_by_slice(data, slice_index): + """Tensor getitem by a single slice""" + shape = F.shape(data) + if not shape: + const_utils.raise_index_error("When tensor is indexed by a slice, the dimension of the tensor cannot be 0.") + begin_strides, end_strides, step_strides = const_utils.get_stride_info_from_slice(shape, slice_index) + return F.strided_slice(data, begin_strides, end_strides, step_strides) + + +def _tensor_index_by_integer(data, number): + """Tensor getitem by a single integer number""" + shape = F.shape(data) + if not shape: + const_utils.raise_index_error("When tensor is indexed by an integer, the dimension of the tensor cannot be 0.") + begin_strides, end_strides, step_strides = const_utils.get_stride_info_from_integer(shape, number) + shrink_axis_mask = 1 + return P.StridedSlice(0, 0, 0, 0, shrink_axis_mask)(data, begin_strides, end_strides, step_strides) + + +def _tensor_index_by_bool(data, bool_value): + """Tensor getitem by a single bool value""" + if bool_value: + return F.expand_dims(data, 0) + return const_utils.raise_index_error("When tensor is indexed by a bool object, the value only support 'True'.") + + +def tensor_index_by_number(data, number): + """Tensor getitem by a Number which may be integer/float/bool value""" + number_type = const_utils.check_number_index_type(number) + if number_type == const_utils.BOOL_: + return _tensor_index_by_bool(data, number) + if number_type == const_utils.INT_: + return _tensor_index_by_integer(data, number) + return const_utils.raise_index_error("Only support integers, slices(`:`), ellipsis(`...`), None and bool.") + + +def tensor_index_by_tensor(data, tensor_index): + """Tensor getitem by a single tensor""" + dtype_valid = const_utils.check_index_tensor_dtype(F.dtype(tensor_index), + const_utils.TENSOR_GETITEM) + if dtype_valid: + return F.gather(data, tensor_index, 0) + return const_utils.raise_index_error("For 'tensor getitem', " + "the index tensor data type only support mstype.int32.") + + +def _tensor_index_by_tuple_slice(data, t): + """Tensor getitem by a tuple of slice""" + shape = F.shape(data) + if len(t) > len(shape): + const_utils.raise_index_error("When tensor is indexed by a tuple, " + "the length of the tuple cannot be greater than the dimension of the tensor.") + begin_strides, end_strides, step_strides, shrink_axis_mask = \ + const_utils.get_stride_info_from_tuple(shape, t) + return P.StridedSlice(0, 0, 0, 0, shrink_axis_mask)(data, begin_strides, end_strides, step_strides) + + +def tensor_index_by_tuple(data, tuple_index): + """Tensor getitem by tuple of various types""" + indexes_types = hyper_map(F.typeof, tuple_index) + index_elements_type = const_utils.tuple_index_elements_type(indexes_types, const_utils.TENSOR_GETITEM) + if index_elements_type == const_utils.NO_TENSOR: + return _tensor_index_by_tuple_slice(data, tuple_index) + if index_elements_type == const_utils.ALL_TENSOR: + return _tensor_getitem_by_tuple_of_tensor(data, tuple_index) + return _tensor_getitem_by_tuple_of_mixed_tensors(data, tuple_index) + + +def _tensor_setitem(self, index, value): + """Handle tensor getitem""" + if isinstance(index, Tensor): + if isinstance(value, (int, float, bool)): + return tensor_setitem_by_tensor_with_number(self, index, value) + if isinstance(value, Tensor): + return tensor_setitem_by_tensor_with_tensor(self, index, value) + if isinstance(value, tuple): + return tensor_setitem_by_tensor_with_tuple(self, index, value) + if isinstance(index, tuple): + if isinstance(value, (int, float, bool)): + return tensor_setitem_by_tuple_with_number(self, index, value) + if isinstance(value, Tensor): + return tensor_setitem_by_tuple_with_tensor(self, index, value) + if isinstance(value, tuple): + return tensor_setitem_by_tuple_with_tuple(self, index, value) + if isinstance(index, int): + if isinstance(value, (int, float, bool)): + return tensor_setitem_by_number_with_number(self, index, value) + if isinstance(value, Tensor): + return tensor_setitem_by_number_with_tensor(self, index, value) + if isinstance(index, slice): + if isinstance(value, (int, float, bool)): + return tensor_setitem_by_slice_with_number(self, index, value) + if isinstance(value, Tensor): + return tensor_setitem_by_slice_with_tensor(self, index, value) + if isinstance(index, bool): + return _tensor_index_by_bool(self, index) + if index is ...: + if isinstance(value, (int, float, bool)): + return tensor_setitem_by_ellipsis_with_number(self, index, value) + if isinstance(value, Tensor): + return tensor_setitem_by_ellipsis_with_tensor(self, index, value) + raise IndexError("Tensor setitem index only support integers, slices(`:`), ellipsis(`...`), None, bool\ + and tensor with int32, got {} with type{}".format(index, type(index))) + + +tensor_operator_registry.register("__setitem__", _tensor_setitem) + + +def _tensor_setitem_by_int_tensor_with_tensor(data, index, value): + """Set a tensor item by a int tensor with a tensor.""" + updates = _generate_updates_from_tensor(data, index, value, + const_utils.SET_ITEM_BY_ONE_TENSOR) + index = F.expand_dims(index, -1) + return P.TensorScatterUpdate()(data, index, updates) + + +def _tensor_setitem_by_bool_tensor_with_tensor(data, index, value): + """Set a tensor item by a bool tensor with a tensor.""" + index_shape = F.shape(index) + data_shape = F.shape(data) + data_shape = const_utils.check_equal(data_shape, index_shape, + "The tensor(shape={}) and tensor index(shape={}) should be the same shape.") + size = F.size(value) + size = const_utils.check_equal(1, size, + "When assign value is a tensor, its size should be {}, but current size is {}.") + dtype = F.dtype(data) + u_cast = F.cast(value, dtype) + one_data = F.ones_like(data) + u = F.tensor_mul(one_data, u_cast) + result = F.select(index, u, data) + return result + + +def tensor_setitem_by_tensor_with_tensor(data, index, value_tensor): + """setitem by tensor index(dtype is int or bool) with tensor as value""" + index_dtype = F.dtype(index) + tensor_dtype = const_utils.get_index_tensor_dtype(index_dtype) + if tensor_dtype == const_utils.INT_: + return _tensor_setitem_by_int_tensor_with_tensor(data, index, value_tensor) + return _tensor_setitem_by_bool_tensor_with_tensor(data, index, value_tensor) + + +def _tensor_setitem_by_bool_tensor_with_scalar(data, index, value): + """Set a tensor item by a bool tensor with a scalar.""" + index_shape = F.shape(index) + shape = F.shape(data) + shape = const_utils.check_equal( + shape, index_shape, "The tensor(shape={}) and tensor index(shape={}) should be the same shape.") + dtype = F.dtype(data) + u = F.fill(dtype, shape, value) + return F.select(index, u, data) + + +def _tensor_setitem_by_int_tensor_with_scalar(data, index, value): + """Set a tensor item by a int tensor with a scalar.""" + updates = _generate_updates_from_scalar(data, index, value, + const_utils.SET_ITEM_BY_ONE_TENSOR) + index = F.expand_dims(index, -1) + return P.TensorScatterUpdate()(data, index, updates) + + +def tensor_setitem_by_tensor_with_number(data, index, value): + index_dtype = F.dtype(index) + tensor_dtype = const_utils.get_index_tensor_dtype(index_dtype) + if tensor_dtype == const_utils.BOOL_: + return _tensor_setitem_by_bool_tensor_with_scalar(data, index, value) + if tensor_dtype == const_utils.INT_: + return _tensor_setitem_by_int_tensor_with_scalar(data, index, value) + return const_utils.raise_index_error("For tensor setitem, indexing tensor dtype only supports bool/int") + + +def tensor_setitem_by_tensor_with_tuple(data, index, value): + """Assigns the tensor by tensor with tuple value.""" + index_dtype = F.dtype(index) + check_dtype = const_utils.check_index_tensor_dtype(index_dtype, const_utils.TENSOR_SETITEM) + result = None + if check_dtype: + result = _tensor_setitem_by_tensor_with_tuple(data, index, value) + return result + + +def _tensor_indices_number(data, data_shape, index, indices, value): + """Assigns a scalar value to the tensor.""" + data_size = F.size(data) + data_dtype = F.dtype(data) + indices_size = F.size(indices) + indices_size = const_utils.check_indices(indices_size, index) + update = F.fill(mstype.int32, (indices_size,), 1) + condition_1d = F.scatter_nd(indices, update, (data_size,)) + condition = F.reshape(condition_1d, data_shape) + condition = F.cast(condition, mstype.bool_) + value_fill = F.fill(data_dtype, (indices_size,), value) + value_1d = F.scatter_nd(indices, value_fill, (data_size,)) + u = F.reshape(value_1d, data_shape) + return F.select(condition, u, data) + + +def _tensor_setitem_by_tensor_with_tuple(data, index, value): + """Set a tensor item by a tensor with a tuple.""" + updates = _generate_updates_from_tuple(data, index, value, + const_utils.SET_ITEM_BY_ONE_TENSOR) + index = F.expand_dims(index, -1) + result = P.TensorScatterUpdate()(data, index, updates) + return result + + +def tensor_setitem_by_slice_with_number(data, input_slice, value): + """Givens a scalar assign to tensor by slice""" + check_result = const_utils.check_tensor_setitem_index(input_slice) + result = None + if check_result: + data_shape = F.shape(data) + indices = const_utils.slice2indices(input_slice, data_shape) + is_tuple_int = const_utils.tuple_element_is_int(input_slice) + if is_tuple_int: + indices = const_utils.integer_to_indices(input_slice, data_shape) + result = _tensor_indices_number(data, data_shape, input_slice, indices, value) + return result + + +def tensor_setitem_by_tuple_with_number(data, tuple_index, value): + """Assigns the tensor by tuple with number value.""" + indexes_types = hyper_map(F.typeof, tuple_index) + index_elements_type = const_utils.tuple_index_elements_type(indexes_types, const_utils.TENSOR_SETITEM) + + if index_elements_type == const_utils.NO_TENSOR: + return tensor_setitem_by_slice_with_number(data, tuple_index, value) + if index_elements_type == const_utils.ALL_TENSOR: + indices = _generate_indices_from_tuple_of_tensor(data, + tuple_index, + const_utils.TENSOR_SETITEM) + else: + indices = _generate_indices_from_tuple_of_mixed_tensors(data, + tuple_index, + const_utils.TENSOR_SETITEM) + updates = _generate_updates_from_scalar(data, + indices, + value, + const_utils.SET_ITEM_BY_TUPLE_OF_TENSOR) + return P.TensorScatterUpdate()(data, indices, updates) + + +def _tensor_indices_tensor(data, data_shape, index, indices, value): + """Assigns a tensor value to the tensor.""" + data_size = F.size(data) + data_dtype = F.dtype(data) + indices_size = F.size(indices) + indices_size = const_utils.check_indices(indices_size, index) + update = F.fill(mstype.int32, (indices_size,), 1) + condition_1d = F.scatter_nd(indices, update, (data_size,)) + condition = F.reshape(condition_1d, data_shape) + condition = F.cast(condition, mstype.bool_) + value_fill = None + value_size = F.size(value) + + value_size = const_utils.check_indices_value_size(indices_size, value_size) + if value_size == 1: + value_fill = F.fill(data_dtype, (indices_size,), 1) + value = F.cast(value, data_dtype) + value_fill = F.tensor_mul(value_fill, value) + elif value_size > 1: + value_fill = F.reshape(value, (indices_size,)) + value_1d = F.scatter_nd(indices, value_fill, (data_size,)) + u = F.reshape(value_1d, data_shape) + return F.select(condition, u, data) + + +def tensor_setitem_by_slice_with_tensor(data, input_slice, value): + """Assigns a tensor value to the tensor by slice.""" + result = None + check_result = const_utils.check_tensor_setitem_index(input_slice) + if check_result: + data_shape = F.shape(data) + indices = const_utils.slice2indices(input_slice, data_shape) + is_tuple_int = const_utils.tuple_element_is_int(input_slice) + if is_tuple_int: + indices = const_utils.integer_to_indices(input_slice, data_shape) + result = _tensor_indices_tensor(data, data_shape, input_slice, indices, value) + return result + + +def tensor_setitem_by_tuple_with_tensor(data, tuple_index, value): + """Assigns the tensor by tuple with tensor value.""" + indexes_types = hyper_map(F.typeof, tuple_index) + index_elements_type = const_utils.tuple_index_elements_type(indexes_types, const_utils.TENSOR_SETITEM) + + if index_elements_type == const_utils.NO_TENSOR: + return tensor_setitem_by_slice_with_tensor(data, tuple_index, value) + if index_elements_type == const_utils.ALL_TENSOR: + indices = _generate_indices_from_tuple_of_tensor(data, + tuple_index, + const_utils.TENSOR_SETITEM) + else: + indices = _generate_indices_from_tuple_of_mixed_tensors(data, + tuple_index, + const_utils.TENSOR_SETITEM) + updates = _generate_updates_from_tensor(data, + indices, + value, + const_utils.SET_ITEM_BY_TUPLE_OF_TENSOR) + return P.TensorScatterUpdate()(data, indices, updates) + + +def tensor_setitem_by_tuple_with_tuple(data, tuple_index, value): + """Assigns the tensor by tuple with tuple of value.""" + indexes_types = hyper_map(F.typeof, tuple_index) + index_elements_type = const_utils.tuple_index_elements_type(indexes_types, const_utils.TENSOR_SETITEM) + + if index_elements_type == const_utils.ALL_TENSOR: + indices = _generate_indices_from_tuple_of_tensor(data, + tuple_index, + const_utils.TENSOR_SETITEM) + else: + indices = _generate_indices_from_tuple_of_mixed_tensors(data, + tuple_index, + const_utils.TENSOR_SETITEM) + updates = _generate_updates_from_tuple(data, + indices, + value, + const_utils.SET_ITEM_BY_TUPLE_OF_TENSOR) + return P.TensorScatterUpdate()(data, indices, updates) + + +def tensor_setitem_by_number_with_number(data, index, value): + """Assigns the tensor by number with number value.""" + data_shape = F.shape(data) + indices = const_utils.integer_to_indices(index, data_shape) + return _tensor_indices_number(data, data_shape, index, indices, value) + + +def tensor_setitem_by_number_with_tensor(data, index, value): + """Assigns the tensor by number with tensor value.""" + data_shape = F.shape(data) + indices = const_utils.integer_to_indices(index, data_shape) + return _tensor_indices_tensor(data, data_shape, index, indices, value) + + +def tensor_setitem_by_ellipsis_with_number(data, index, value): + """Assigns the tensor by ellipsis with number value.""" + data_shape = F.shape(data) + data_dtype = F.dtype(data) + return F.fill(data_dtype, data_shape, value) + + +def tensor_setitem_by_ellipsis_with_tensor(data, index, value): + """Assigns the tensor by ellipsis with tensor value.""" + result = None + data_shape = F.shape(data) + data_dtype = F.dtype(data) + data_size = F.size(data) + value_shape = F.shape(value) + value_size = F.size(value) + check_result = const_utils.check_ellipsis_shape_size(data_shape, value_shape, data_size, value_size) + if check_result: + if data_size == value_size: + result = F.reshape(value, data_shape) + result = F.cast(result, data_dtype) + elif value_size == 1: + param1 = F.fill(data_dtype, data_shape, 1) + param2 = F.cast(value, data_dtype) + result = F.tensor_mul(param1, param2) + return result diff --git a/mindspore/ops/composite/multitype_ops/_constexpr_utils.py b/mindspore/ops/composite/multitype_ops/_constexpr_utils.py index e4d42aed03..02756ffe56 100644 --- a/mindspore/ops/composite/multitype_ops/_constexpr_utils.py +++ b/mindspore/ops/composite/multitype_ops/_constexpr_utils.py @@ -20,7 +20,6 @@ import numpy as np from ...primitive import constexpr from .... import log as logger -from ...._extends.utils import Slice, Ellipsis_ from ....common import dtype as mstype from ....common.tensor import Tensor from ....ops import _utils as op_utils @@ -41,6 +40,11 @@ SET_ITEM_BY_ONE_TENSOR = 0 SET_ITEM_BY_TUPLE_OF_TENSOR = 1 +@constexpr +def raise_index_error(msg): + raise IndexError(msg) + + @constexpr def check_equal(param1, param2, msg="{},{}"): """Checks whether the two parameters are equal or not.""" @@ -54,7 +58,8 @@ def check_ellipsis_shape_size(data_shape, value_shape, data_size, value_size): """Checks the shape and size of the sensor and value.""" if data_shape == value_shape or data_size == value_size or value_size == 1: return True - raise ValueError("The value(shape={}), can not assign to tensor(shape={}).".format(value_shape, data_shape)) + raise ValueError("The value(shape={}), can not assign to tensor(shape={}).".format( + value_shape, data_shape)) @constexpr @@ -63,16 +68,18 @@ def check_tensor_setitem_index(index, element_type=None): if index is None: raise IndexError("Tensor's index cannot be None.") # eg. Tensor[Slice] = u - if isinstance(index, Slice): + if isinstance(index, slice): return True # eg. Tensor[tuple] = u if isinstance(index, tuple): if not index: raise IndexError("Tensor's index cannot be empty.") - # eg. Tensor[tuple(Slice...)] = u - if isinstance(index[0], (Slice, Ellipsis_, int)): - return True - raise IndexError("Index of type '{}' is not supported yet.".format(type(index[0]))) + # eg. Tensor[tuple(Slice,...)] = u + for item in index: + if not isinstance(item, (slice, type(...), int)): + raise IndexError( + "Index of type '{}' is not supported yet.".format(type(item))) + return True # eg. Tensor[Tensor[dtype=bool]] = u if isinstance(index, mstype.tensor_type): if element_type is None or element_type != mstype.bool_: @@ -81,7 +88,8 @@ def check_tensor_setitem_index(index, element_type=None): "{} type is not supported yet.".format(element_type)) return True - raise IndexError("Index of type '{}' is not supported yet.".format(type(index))) + raise IndexError( + "Index of type '{}' is not supported yet.".format(type(index))) @constexpr @@ -116,12 +124,12 @@ def slice_expand(input_slices, shape): index = 0 slices = None # Slice or tuple(Slice...) - if isinstance(input_slices, Slice): + if isinstance(input_slices, slice): slices = (input_slices,) - elif isinstance(input_slices, (tuple, list)) and input_slices and isinstance(input_slices[0], (Slice, Ellipsis_)): + elif isinstance(input_slices, (tuple, list)) and input_slices and isinstance(input_slices[0], (slice, type(...))): is_have_ellipsis = False for _, element in enumerate(input_slices): - if isinstance(element, Ellipsis_): + if isinstance(element, type(...)): is_have_ellipsis = True break if is_have_ellipsis: @@ -130,10 +138,9 @@ def slice_expand(input_slices, shape): slices = input_slices else: raise IndexError("Tensor's index type is not supported yet.") - for s in slices: start = 0 if (s.start is None) else s.start - stop = shape[index] if (s.end is None) else s.end + stop = shape[index] if (s.stop is None) else s.stop step = 1 if (s.step is None) else s.step begin.append(start) end.append(stop) @@ -151,11 +158,11 @@ def ellipsis2slice(input_, shape): """Converts ellipsis to slice.""" input_slice = input_ result = [] - if isinstance(input_, Ellipsis_): + if isinstance(input_, type(...)): input_slice = (input_,) ell_count = 0 for _, element in enumerate(input_slice): - if not isinstance(element, Ellipsis_): + if not isinstance(element, type(...)): result.append(element) continue ell_count += 1 @@ -163,7 +170,7 @@ def ellipsis2slice(input_, shape): raise IndexError("There cannot be more than one ellisis (...) in the index of the tensor, " "but it is currently {}".format(input_slice)) for _ in range(len(shape) - len(input_slice) + 1): - result.append(Slice(None, None, None)) + result.append(slice(None, None, None)) return tuple(result) @@ -196,7 +203,8 @@ def slice2indices(input_slices, shape): def check_indices(indices_size, index): """Checks indices whether is empty.""" if indices_size < 1: - raise IndexError("The tensor's index is unreasonable. index:{}".format(index)) + raise IndexError( + "The tensor's index is unreasonable. index:{}".format(index)) return indices_size @@ -230,7 +238,7 @@ def tuple_element_is_slice(indexs): raise IndexError("Tensor's index cannot be empty.") if isinstance(indexs, tuple): for _, ele in enumerate(indexs): - if not isinstance(ele, Slice): + if not isinstance(ele, slice): return False return True return False @@ -285,7 +293,8 @@ def check_value_elements(data_dtype, types): return ALL_TENSOR if scalars_number == len(types): return ALL_SCALAR - raise TypeError(f"For '{TENSOR_SETITEM}', the value does not support scalar and tensor mixing, but got {types}.") + raise TypeError( + f"For '{TENSOR_SETITEM}', the value does not support scalar and tensor mixing, but got {types}.") @constexpr @@ -295,7 +304,8 @@ def get_index_tensor_dtype(dtype): return INT_ if dtype == mstype.bool_: return BOOL_ - raise IndexError(f"For '{TENSOR_SETITEM}', the index tensor data type '{dtype}' is not supported.") + raise IndexError( + f"For '{TENSOR_SETITEM}', the index tensor data type '{dtype}' is not supported.") @constexpr @@ -313,7 +323,8 @@ def check_index_tensor_dtype(dtype, op_name): """Check a tensor data type.""" if dtype == mstype.int32: return True - raise IndexError(f"For '{op_name}', the index tensor data type should be mstype.int32, but got {dtype}.") + raise IndexError( + f"For '{op_name}', the index tensor data type should be mstype.int32, but got {dtype}.") @constexpr @@ -332,7 +343,8 @@ def generate_broadcast_shape(shapes, op_name): for i, shape in enumerate(shapes): logger.debug(f"Broadcasts the {i}th tensor, the shape is {shape}.") try: - broadcast_shape = op_utils.get_broadcast_shape(broadcast_shape, shape, op_name) + broadcast_shape = op_utils.get_broadcast_shape( + broadcast_shape, shape, op_name) except ValueError as ex: raise IndexError(ex) return tuple(broadcast_shape) @@ -398,7 +410,8 @@ def convert_ellipsis_to_tensors(slice_number, if isinstance(ele, tuple): shape.extend([1] * len(ele)) if array is None: - raise ValueError(f"For '{op_name}', generate tensors from ellipsis failed.") + raise ValueError( + f"For '{op_name}', generate tensors from ellipsis failed.") array = np.reshape(array, shape) reps = compute_multiples(shape, final_shape) tensor = Tensor(np.tile(array, reps)) @@ -428,7 +441,8 @@ def convert_slice_to_tensor(slice_number, final_shape, indexes_shapes_info, op_n else: shape.append(1) if array is None: - raise ValueError(f"For '{op_name}', generate tensor from 'slice' failed.") + raise ValueError( + f"For '{op_name}', generate tensor from 'slice' failed.") array = np.reshape(array, shape) reps = compute_multiples(shape, final_shape) tensor = Tensor(np.tile(array, reps)) @@ -523,14 +537,15 @@ def generate_index_info_from_tuple_of_mixed_tensors(data_shape, tensor_count += 1 elif isinstance(ele_type, mstype.slice_type): slice_obj = slice(slice_indexes[slice_count].start, - slice_indexes[slice_count].end, + slice_indexes[slice_count].stop, slice_indexes[slice_count].step) # Use list to represent slicing result. indexes_info[pos] = list(range(data_shape[pos]))[slice_obj] slice_count += 1 elif isinstance(ele_type, mstype.ellipsis_type): if ellipsis_num != 0: - raise IndexError(f"For '{op_name}', the index could only contain one ellipsis.") + raise IndexError( + f"For '{op_name}', the index could only contain one ellipsis.") ellipsis_occupied_dims = data_rank - indexes_size + 1 for j in range(pos, pos + ellipsis_occupied_dims): # Use list to represent slicing result. @@ -540,7 +555,8 @@ def generate_index_info_from_tuple_of_mixed_tensors(data_shape, raise IndexError(f"For '{op_name}', the index elements only support " f"'Tensor', 'int', 'Slice', 'Ellipsis', but got {ele_type}.") broadcast_shape, final_shape, indexes_shapes_info = \ - _derive_result_shape_info_from_tuple_of_mixed_tensors(indexes_info, index_tensors_info, op_name) + _derive_result_shape_info_from_tuple_of_mixed_tensors( + indexes_info, index_tensors_info, op_name) return broadcast_shape, final_shape, indexes_shapes_info, ellipsis_occupied_dims @@ -556,10 +572,12 @@ def _derive_result_shape_info_from_tuple_of_mixed_tensors(indexes_info, index_te """Derive the resulting shape information from the a tuple index of mixed tensors.""" index_tensor_info_key = list(index_tensors_info.keys()) index_tensor_info_value = list(index_tensors_info.values()) - broadcast_shape = generate_broadcast_shape(index_tensor_info_value, op_name) + broadcast_shape = generate_broadcast_shape( + index_tensor_info_value, op_name) final_shape = [] indexes_shapes_info = [] - mixed_tensors_continuous = _judge_tuple_of_mixed_tensors_continuous(index_tensor_info_key) + mixed_tensors_continuous = _judge_tuple_of_mixed_tensors_continuous( + index_tensor_info_key) if mixed_tensors_continuous: tensor_shape_dealt = False for ele in indexes_info.values(): @@ -638,3 +656,98 @@ def get_np_eps(input_dtype): nptype = mstype.dtype_to_nptype(input_dtype) eps = np.finfo(nptype).eps return float(eps) + + +@constexpr +def check_number_index_type(number): + """Check if it is int or bool number""" + if isinstance(number, bool): + return BOOL_ + if isinstance(number, int): + return INT_ + raise IndexError("Only support integers, slices(`:`), ellipsis(`...`), None and bool, got {0} type is {1} " + .format(number, type(number))) + + +@constexpr +def get_stride_info_from_slice(data_shape, slice_index): + """Get stride info from a python slice""" + begin, end, step = get_slice_stride(data_shape[0], slice_index) + begin_strides = [begin] + end_strides = [end] + step_strides = [step] + for end in data_shape[1:]: + begin_strides.append(0) + end_strides.append(end) + step_strides.append(1) + return tuple(begin_strides), tuple(end_strides), tuple(step_strides) + + +@constexpr +def get_stride_info_from_integer(data_shape, number): + """Get stride info from a integer""" + begin_strides = [number] + end_strides = [number+1] + step_strides = [1] + for end in data_shape[1:]: + begin_strides.append(0) + end_strides.append(end) + step_strides.append(1) + return tuple(begin_strides), tuple(end_strides), tuple(step_strides) + + +def get_slice_stride(dim_size, index_slice): + """Get slice stride info""" + step = 1 if index_slice.step is None else index_slice.step + start_default = 0 + stop_default = dim_size + if step < 0: + start_default = -1 + stop_default = -(dim_size+1) + start = start_default if index_slice.start is None else index_slice.start + stop = stop_default if index_slice.stop is None else index_slice.stop + return start, stop, step + + +@constexpr +def get_stride_info_from_tuple(data_shape, index_tuple): + """Get stride info from a tuple""" + begin_strides = [] + end_strides = [] + step_strides = [] + index_size = len(index_tuple) + data_shape_size = len(data_shape) + shrink_axis = 0 + index_count = 0 + ellipsis_count = 0 + for idx, item in enumerate(index_tuple): + if isinstance(item, slice): + start, stop, step = get_slice_stride(data_shape[idx], item) + begin_strides.append(start) + end_strides.append(stop) + step_strides.append(step) + index_count = index_count + 1 + elif isinstance(item, int): + begin_strides.append(item) + end_strides.append(item + 1) + step_strides.append(1) + shrink_axis = shrink_axis + (1 << index_count) + index_count = index_count + 1 + elif item is ...: + ellipsis_count = ellipsis_count + 1 + if ellipsis_count > 1: + raise IndexError("An index can have only one ellipsis (...)") + ellipsis_range_size = data_shape_size - (index_size - 1) + begin_strides.extend([0] * (ellipsis_range_size)) + end_strides.extend( + [i for i in data_shape[index_count: index_count + (ellipsis_range_size)]]) + step_strides.extend([1] * (ellipsis_range_size)) + index_count = index_count + ellipsis_range_size + else: + raise IndexError("Not supported index data type, got ", + item, " type is ", type(item)) + for item in range(index_count, data_shape_size): + begin_strides.append(0) + end_strides.append(data_shape[item]) + step_strides.append(1) + return tuple(begin_strides), tuple(end_strides), tuple(step_strides), shrink_axis diff --git a/mindspore/ops/composite/multitype_ops/div_impl.py b/mindspore/ops/composite/multitype_ops/div_impl.py index c37fcb9c36..85a4e035c0 100644 --- a/mindspore/ops/composite/multitype_ops/div_impl.py +++ b/mindspore/ops/composite/multitype_ops/div_impl.py @@ -47,8 +47,8 @@ def _div_tensor(x, y): Two tensors divide by element. Args: - x (Tensor): x - y (Tensor): The dtype is same as x. + x (Tensor): The first input tensor. + y (Tensor): The second input tensor. Returns: Tensor, has the same dtype as x. diff --git a/mindspore/ops/composite/multitype_ops/floordiv_impl.py b/mindspore/ops/composite/multitype_ops/floordiv_impl.py index c1a47f881f..8e9e941309 100644 --- a/mindspore/ops/composite/multitype_ops/floordiv_impl.py +++ b/mindspore/ops/composite/multitype_ops/floordiv_impl.py @@ -34,7 +34,7 @@ def _floordiv_scalar(x, y): @floordiv.register("Tensor", "Tensor") def _floordiv_tensor(x, y): - """Returns x // y where x and y are all tensors and have save dtype.""" + """Returns x // y where x and y are all tensors.""" return F.tensor_floordiv(x, y) diff --git a/mindspore/ops/composite/multitype_ops/getitem_impl.py b/mindspore/ops/composite/multitype_ops/getitem_impl.py index 1295aba87e..ffd5ea4d62 100644 --- a/mindspore/ops/composite/multitype_ops/getitem_impl.py +++ b/mindspore/ops/composite/multitype_ops/getitem_impl.py @@ -15,7 +15,6 @@ """Implementation for getitem.""" from . import _compile_utils as compile_utils -from . import _constexpr_utils as const_utils from .. import base from ... import functional as F @@ -50,29 +49,6 @@ _tuple_slice = _TupleSlice('tuple_slice') """_tuple_slice is an metafuncgraph object which will slice a tuple.""" -class _TensorSlice(base.TensorSlice_): - """ - Slices a tensor. - - Inputs: - data (Tensor): A tensor to be sliced. - s (slice): The index to slice tuple data. - - Outputs: - Tensor, consists of some elements of data. - """ - - def __init__(self, name): - base.TensorSlice_.__init__(self, name) - - def __call__(self, *args): - pass - - -_tensor_slice = _TensorSlice('tensor_slice') -"""_tensor_slice is an metafuncgraph object which will slice a tensor.""" - - class _TupleGetItemTensor(base.TupleGetItemTensor_): """ Getting item of tuple by tensor index. @@ -182,13 +158,13 @@ def _tensor_getitem_by_number(data, number_index): Outputs: Tensor, element type is as same as the element type of data. """ - return _tensor_slice(data, number_index) + return compile_utils.tensor_index_by_number(data, number_index) @getitem.register("Tensor", "None") def _tensor_getitem_by_none(data, index): """ - Getting item of tensor by None. + For none indexing , expand data with one dim. Inputs: data (Tensor): A tensor. @@ -197,7 +173,7 @@ def _tensor_getitem_by_none(data, index): Outputs: Tensor, element type is as same as the element type of data. """ - return _tensor_slice(data, index) + return F.expand_dims(data, 0) @getitem.register("Tensor", "Slice") @@ -212,13 +188,13 @@ def _tensor_getitem_by_slice(data, slice_index): Outputs: Tensor, element type is same as the element type of data. """ - return _tensor_slice(data, slice_index) + return compile_utils.tensor_index_by_slice(data, slice_index) @getitem.register("Tensor", "Tensor") def _tensor_getitem_by_tensor(data, tensor_index): """ - Getting item of tensor by slice. + Getting item of tensor by tensor indice. Inputs: data (Tensor): A tensor. @@ -227,18 +203,13 @@ def _tensor_getitem_by_tensor(data, tensor_index): Outputs: Tensor, element type is same as the element type of data. """ - check_dtypes = const_utils.check_index_tensor_dtype(F.dtype(tensor_index), - const_utils.TENSOR_GETITEM) - result = None - if check_dtypes: - result = F.gather(data, tensor_index, 0) - return result + return compile_utils.tensor_index_by_tensor(data, tensor_index) @getitem.register("Tensor", "Tuple") def _tensor_getitem_by_tuple(data, tuple_index): """ - Getting item of tensor by slice tuple. + Getting item of tensor by tuple. Inputs: data (Tensor): A tensor. @@ -247,13 +218,7 @@ def _tensor_getitem_by_tuple(data, tuple_index): Outputs: Tensor, element type is same as the element type of data. """ - indexes_types = compile_utils.hyper_map(F.typeof, tuple_index) - index_elements_type = const_utils.tuple_index_elements_type(indexes_types, const_utils.TENSOR_GETITEM) - if index_elements_type == const_utils.NO_TENSOR: - return _tensor_slice(data, tuple_index) - if index_elements_type == const_utils.ALL_TENSOR: - return _tensor_getitem_by_tuple_of_tensor(data, tuple_index) - return _tensor_getitem_by_tuple_of_mixed_tensors(data, tuple_index) + return compile_utils.tensor_index_by_tuple(data, tuple_index) @getitem.register("Tensor", "Ellipsis") @@ -268,22 +233,4 @@ def _tensor_getitem_by_ellipsis(data, ellipsis_index): Outputs: Tensor, same as data. """ - return _tensor_slice(data, ellipsis_index) - - -def _tensor_getitem_by_tuple_of_tensor(data, tuple_index): - """Tensor getitem by a tuple of tensor.""" - indices = compile_utils.generate_indices_from_tuple_of_tensor(data, - tuple_index, - const_utils.TENSOR_GETITEM) - result = F.gather_nd(data, indices) - return result - - -def _tensor_getitem_by_tuple_of_mixed_tensors(data, tuple_index): - """Tensor getitem by a tuple of mixed tensor.""" - indices = compile_utils.generate_indices_from_tuple_of_mixed_tensors(data, - tuple_index, - const_utils.TENSOR_GETITEM) - result = F.gather_nd(data, indices) - return result + return data diff --git a/mindspore/ops/composite/multitype_ops/greater_equal_impl.py b/mindspore/ops/composite/multitype_ops/greater_equal_impl.py index 2073abb762..93f1acbc54 100644 --- a/mindspore/ops/composite/multitype_ops/greater_equal_impl.py +++ b/mindspore/ops/composite/multitype_ops/greater_equal_impl.py @@ -25,7 +25,7 @@ greater_equal = base.MultitypeFuncGraph("greater_equal") @greater_equal.register("Number", "Number") def _greater_equal_scala(x, y): """ - Determine whether x is greater equal than y + Determine whether x is greater equal than y. Args: x(Number): Number. diff --git a/mindspore/ops/composite/multitype_ops/greater_impl.py b/mindspore/ops/composite/multitype_ops/greater_impl.py index 7bbf53da49..2f3a2dbb83 100644 --- a/mindspore/ops/composite/multitype_ops/greater_impl.py +++ b/mindspore/ops/composite/multitype_ops/greater_impl.py @@ -23,7 +23,7 @@ greater = base.MultitypeFuncGraph("greater") @greater.register("Number", "Number") -def _greater_scala(x, y): +def _greater_scalar(x, y): """ Determine whether two numbers are greater. @@ -48,6 +48,6 @@ def _greater_tensor(x, y): y(Tensor): Tensor. Returns: - tensor, return operation of x and y by P.Greater + tensor, return operation of x and y by P.Greater. """ return F.tensor_gt(x, y) diff --git a/mindspore/ops/composite/multitype_ops/less_equal_impl.py b/mindspore/ops/composite/multitype_ops/less_equal_impl.py index dc1438da2c..5927c4b349 100644 --- a/mindspore/ops/composite/multitype_ops/less_equal_impl.py +++ b/mindspore/ops/composite/multitype_ops/less_equal_impl.py @@ -25,7 +25,7 @@ less_equal = base.MultitypeFuncGraph("less_equal") @less_equal.register("Number", "Number") def _less_equal_scala(x, y): """ - Determine whether x is less equal than y + Determine whether x is less equal than y. Args: x(Number): Number. @@ -41,7 +41,7 @@ def _less_equal_scala(x, y): @less_equal.register("Tensor", "Tensor") def _less_equal_tensor(x, y): """ - Determine whether tensor x is less equal than tensor y elementwise + Determine whether tensor x is less equal than tensor y elementwise. Args: x(Tensor): Tensor. diff --git a/mindspore/ops/composite/multitype_ops/logic_not_impl.py b/mindspore/ops/composite/multitype_ops/logic_not_impl.py index 35ae766433..6705145a64 100644 --- a/mindspore/ops/composite/multitype_ops/logic_not_impl.py +++ b/mindspore/ops/composite/multitype_ops/logic_not_impl.py @@ -25,13 +25,13 @@ logical_not = base.MultitypeFuncGraph("logical_not") @logical_not.register("Number") def _logical_not_scala(x): """ - Return logical not operation result of x + Return logical not operation result of x. Args: x(Number): Number. Returns: - bool, Return logical not operation result of x + bool, Return logical not operation result of x. """ return F.bool_not(x.__bool__()) @@ -39,10 +39,24 @@ def _logical_not_scala(x): @logical_not.register("Tensor") def _logical_not_tensor(x): """ - Return logical not operation result of x + Return logical not operation result of x. Args: x(Tensor): Tensor. Returns: - Tensor, Return logical not operation result of x + Tensor, Return logical not operation result of x. """ - return F.logical_not(x) + return F.logical_not(x) + + +@logical_not.register("Tuple") +def _logical_not_tuple(x): + """ + Return logical not operation result of a tuple object. + + Args: + x(Tuple): The input tuple. + + Returns: + bool, Return logical not operation result of x. + """ + return F.bool_not(x.__bool__()) diff --git a/mindspore/ops/composite/multitype_ops/logical_and_impl.py b/mindspore/ops/composite/multitype_ops/logical_and_impl.py index 324ce3a78d..79001f43e8 100644 --- a/mindspore/ops/composite/multitype_ops/logical_and_impl.py +++ b/mindspore/ops/composite/multitype_ops/logical_and_impl.py @@ -25,14 +25,14 @@ logical_and = base.MultitypeFuncGraph("logical_and") @logical_and.register("Number", "Number") def _logical_and_scala(x, y): """ - Return logical and operation result of x and y + Return logical and operation result of x and y. Args: x(Number): Number. y(Number): Number. Returns: - bool, Return logical and operation result of x and y + bool, Return logical and operation result of x and y. """ return F.bool_and(x.__bool__(), y.__bool__()) @@ -40,13 +40,13 @@ def _logical_and_scala(x, y): @logical_and.register("Tensor", "Tensor") def _logical_and_tensor(x, y): """ - Return logical and operation result of x and y + Return logical and operation result of x and y. Args: x(Tensor): Tensor. y(Tensor): Tensor. Returns: - Tensor, Return logical and operation result of x and y + Tensor, Return logical and operation result of x and y. """ return F.logical_and(x, y) diff --git a/mindspore/ops/composite/multitype_ops/logical_or_impl.py b/mindspore/ops/composite/multitype_ops/logical_or_impl.py index fd106f7685..6d070d5cbf 100644 --- a/mindspore/ops/composite/multitype_ops/logical_or_impl.py +++ b/mindspore/ops/composite/multitype_ops/logical_or_impl.py @@ -25,14 +25,14 @@ logical_or = base.MultitypeFuncGraph("logical_or") @logical_or.register("Number", "Number") def _logical_or_scala(x, y): """ - Return logical or operation result of x and y + Return logical or operation result of x and y. Args: x(Number): Number. y(Number): Number. Returns: - bool, Return logical or operation result of x and y + bool, Return logical or operation result of x and y. """ return F.bool_or(x.__bool__(), y.__bool__()) @@ -40,13 +40,13 @@ def _logical_or_scala(x, y): @logical_or.register("Tensor", "Tensor") def _logical_or_tensor(x, y): """ - Return logical operation or result of x and y + Return logical operation or result of x and y. Args: x(Tensor): Tensor. y(Tensor): Tensor. Returns: - Tensor, Return logical operation or result of x and y + Tensor, Return logical operation or result of x and y. """ - return F.logical_or(x, y) + return F.logical_or(x, y) diff --git a/mindspore/ops/composite/multitype_ops/mod_impl.py b/mindspore/ops/composite/multitype_ops/mod_impl.py index e9947677ac..4b6a13bbc8 100644 --- a/mindspore/ops/composite/multitype_ops/mod_impl.py +++ b/mindspore/ops/composite/multitype_ops/mod_impl.py @@ -34,7 +34,7 @@ def _mod_scalar(x, y): @mod.register("Tensor", "Tensor") def _mod_tensor(x, y): - """Returns x % y where x and y are all tensors and have save dtype.""" + """Returns x % y where x and y are all tensors.""" return F.tensor_mod(x, y) diff --git a/mindspore/ops/composite/multitype_ops/mul_impl.py b/mindspore/ops/composite/multitype_ops/mul_impl.py index ce9ec391af..b5535df135 100644 --- a/mindspore/ops/composite/multitype_ops/mul_impl.py +++ b/mindspore/ops/composite/multitype_ops/mul_impl.py @@ -40,7 +40,7 @@ def _mul_scalar(x, y): @mul.register("Tensor", "Tensor") def _mul_tensor(x, y): """ - Returns x * y by element-wise where x and y are all tensors and have same dtype. + Returns x * y by element-wise where x and y are all tensors. Outputs: Tensor, has the same dtype as x. diff --git a/mindspore/ops/composite/multitype_ops/setitem_impl.py b/mindspore/ops/composite/multitype_ops/setitem_impl.py index 53659c6205..38cf0141f0 100644 --- a/mindspore/ops/composite/multitype_ops/setitem_impl.py +++ b/mindspore/ops/composite/multitype_ops/setitem_impl.py @@ -16,10 +16,8 @@ """Implementation for setitem.""" from . import _compile_utils as compile_utils -from . import _constexpr_utils as const_utils from ... import functional as F from ...composite import base -from ....common import dtype as mstype setitem = base.MultitypeFuncGraph('setitem') @@ -139,11 +137,7 @@ def _tensor_setitem_by_tensor_with_tensor(data, index, value_tensor): Outputs: Tensor, element type and shape is same as data. """ - index_dtype = F.dtype(index) - tensor_dtype = const_utils.get_index_tensor_dtype(index_dtype) - if tensor_dtype == const_utils.INT_: - return _tensor_setitem_by_int_tensor_with_tensor(data, index, value_tensor) - return _tensor_setitem_by_bool_tensor_with_tensor(data, index, value_tensor) + return compile_utils.tensor_setitem_by_tensor_with_tensor(data, index, value_tensor) @setitem.register("Tensor", "Tensor", "Number") @@ -166,11 +160,7 @@ def _tensor_setitem_by_tensor_with_number(data, index, value): Outputs: Tensor, element type and shape is same as data. """ - index_dtype = F.dtype(index) - tensor_dtype = const_utils.get_index_tensor_dtype(index_dtype) - if tensor_dtype == const_utils.BOOL_: - return _tensor_setitem_by_bool_tensor_with_scalar(data, index, value) - return _tensor_setitem_by_int_tensor_with_scalar(data, index, value) + return compile_utils.tensor_setitem_by_tensor_with_number(data, index, value) @setitem.register("Tensor", "Tuple", "Number") @@ -191,24 +181,7 @@ def _tensor_setitem_by_tuple_with_number(data, tuple_index, value): Outputs: Tensor, element type and shape is same as data. """ - indexes_types = compile_utils.hyper_map(F.typeof, tuple_index) - index_elements_type = const_utils.tuple_index_elements_type(indexes_types, const_utils.TENSOR_SETITEM) - - if index_elements_type == const_utils.NO_TENSOR: - return _tensor_assgin_number(data, tuple_index, value) - if index_elements_type == const_utils.ALL_TENSOR: - indices = compile_utils.generate_indices_from_tuple_of_tensor(data, - tuple_index, - const_utils.TENSOR_SETITEM) - else: - indices = compile_utils.generate_indices_from_tuple_of_mixed_tensors(data, - tuple_index, - const_utils.TENSOR_SETITEM) - updates = compile_utils.generate_updates_from_scalar(data, - indices, - value, - const_utils.SET_ITEM_BY_TUPLE_OF_TENSOR) - return F.scatter_nd_update(data, indices, updates) + return compile_utils.tensor_setitem_by_tuple_with_number(data, tuple_index, value) @setitem.register("Tensor", "Tuple", "Tensor") @@ -229,24 +202,7 @@ def _tensor_setitem_by_tuple_with_tensor(data, tuple_index, value): Outputs: Tensor, element type and shape is same as data. """ - indexes_types = compile_utils.hyper_map(F.typeof, tuple_index) - index_elements_type = const_utils.tuple_index_elements_type(indexes_types, const_utils.TENSOR_SETITEM) - - if index_elements_type == const_utils.NO_TENSOR: - return _tensor_assgin_tensor(data, tuple_index, value) - if index_elements_type == const_utils.ALL_TENSOR: - indices = compile_utils.generate_indices_from_tuple_of_tensor(data, - tuple_index, - const_utils.TENSOR_SETITEM) - else: - indices = compile_utils.generate_indices_from_tuple_of_mixed_tensors(data, - tuple_index, - const_utils.TENSOR_SETITEM) - updates = compile_utils.generate_updates_from_tensor(data, - indices, - value, - const_utils.SET_ITEM_BY_TUPLE_OF_TENSOR) - return F.scatter_nd_update(data, indices, updates) + return compile_utils.tensor_setitem_by_tuple_with_tensor(data, tuple_index, value) @setitem.register("Tensor", "Tuple", "Tuple") @@ -268,22 +224,7 @@ def _tensor_setitem_by_tuple_with_tuple(data, tuple_index, value): Outputs: Tensor, element type and shape is same as data. """ - indexes_types = compile_utils.hyper_map(F.typeof, tuple_index) - index_elements_type = const_utils.tuple_index_elements_type(indexes_types, const_utils.TENSOR_SETITEM) - - if index_elements_type == const_utils.ALL_TENSOR: - indices = compile_utils.generate_indices_from_tuple_of_tensor(data, - tuple_index, - const_utils.TENSOR_SETITEM) - else: - indices = compile_utils.generate_indices_from_tuple_of_mixed_tensors(data, - tuple_index, - const_utils.TENSOR_SETITEM) - updates = compile_utils.generate_updates_from_tuple(data, - indices, - value, - const_utils.SET_ITEM_BY_TUPLE_OF_TENSOR) - return F.scatter_nd_update(data, indices, updates) + return compile_utils.tensor_setitem_by_tuple_with_tuple(data, tuple_index, value) @setitem.register("Tensor", "Tensor", "Tuple") @@ -299,12 +240,7 @@ def _tensor_setitem_by_tensor_v2(data, index, value): Outputs: Tensor, element type and shape is same as data. """ - index_dtype = F.dtype(index) - check_dtype = const_utils.check_index_tensor_dtype(index_dtype, const_utils.TENSOR_SETITEM) - result = None - if check_dtype: - result = _tensor_setitem_by_tensor_with_tuple(data, index, value) - return result + return compile_utils.tensor_setitem_by_tensor_with_tuple(data, index, value) @setitem.register("Tensor", "Slice", "Tensor") @@ -326,7 +262,7 @@ def _tensor_setitem_with_slice_v3(data, input_slice, value): Outputs: Tensor, element type and shape is same as data. """ - return _tensor_assgin_tensor(data, input_slice, value) + return compile_utils.tensor_setitem_by_slice_with_tensor(data, input_slice, value) @setitem.register("Tensor", "Slice", "Number") @@ -348,168 +284,28 @@ def _tensor_setitem_with_slice_v1(data, input_slice, value): Outputs: Tensor, element type and shape is same as data. """ - return _tensor_assgin_number(data, input_slice, value) - - -def _tensor_assgin_number(data, input_slice, value): - """Givens a scalar assign to tensor by slice""" - check_result = const_utils.check_tensor_setitem_index(input_slice) - result = None - if check_result: - data_shape = F.shape(data) - indices = const_utils.slice2indices(input_slice, data_shape) - is_tuple_int = const_utils.tuple_element_is_int(input_slice) - if is_tuple_int: - indices = const_utils.integer_to_indices(input_slice, data_shape) - result = _tensor_indices_number(data, data_shape, input_slice, indices, value) - return result + return compile_utils.tensor_setitem_by_slice_with_number(data, input_slice, value) @setitem.register("Tensor", "Number", "Number") def _tensor_setitem_with_int_v1(data, index, value): """Syntax: A[1] = 3""" - data_shape = F.shape(data) - indices = const_utils.integer_to_indices(index, data_shape) - return _tensor_indices_number(data, data_shape, index, indices, value) + return compile_utils.tensor_setitem_by_number_with_number(data, index, value) @setitem.register("Tensor", "Number", "Tensor") def _tensor_setitem_with_int_v2(data, index, value): """Syntax: A[1] = Tensor""" - data_shape = F.shape(data) - indices = const_utils.integer_to_indices(index, data_shape) - return _tensor_indices_tensor(data, data_shape, index, indices, value) + return compile_utils.tensor_setitem_by_number_with_tensor(data, index, value) @setitem.register("Tensor", "Ellipsis", "Number") def _tensor_setitem_with_ellipsis_v1(data, index, value): """Syntax: A[...] = number.""" - data_shape = F.shape(data) - data_dtype = F.dtype(data) - return F.fill(data_dtype, data_shape, value) + return compile_utils.tensor_setitem_by_ellipsis_with_number(data, index, value) @setitem.register("Tensor", "Ellipsis", "Tensor") def _tensor_setitem_with_ellipsis_v2(data, index, value): """Syntax: A[...] = Tensor.""" - result = None - data_shape = F.shape(data) - data_dtype = F.dtype(data) - data_size = F.size(data) - value_shape = F.shape(value) - value_size = F.size(value) - check_result = const_utils.check_ellipsis_shape_size(data_shape, value_shape, data_size, value_size) - if check_result: - if data_size == value_size: - result = F.reshape(value, data_shape) - result = F.cast(result, data_dtype) - elif value_size == 1: - param1 = F.fill(data_dtype, data_shape, 1) - param2 = F.cast(value, data_dtype) - result = F.tensor_mul(param1, param2) - return result - - -def _tensor_assgin_tensor(data, input_slice, value): - """Assigns a tensor value to the tensor by slice.""" - result = None - check_result = const_utils.check_tensor_setitem_index(input_slice) - if check_result: - data_shape = F.shape(data) - indices = const_utils.slice2indices(input_slice, data_shape) - is_tuple_int = const_utils.tuple_element_is_int(input_slice) - if is_tuple_int: - indices = const_utils.integer_to_indices(input_slice, data_shape) - result = _tensor_indices_tensor(data, data_shape, input_slice, indices, value) - return result - - -def _tensor_indices_tensor(data, data_shape, index, indices, value): - """Assigns a tensor value to the tensor.""" - data_size = F.size(data) - data_dtype = F.dtype(data) - indices_size = F.size(indices) - indices_size = const_utils.check_indices(indices_size, index) - update = F.fill(mstype.int32, (indices_size,), 1) - condition_1d = F.scatter_nd(indices, update, (data_size,)) - condition = F.reshape(condition_1d, data_shape) - condition = F.cast(condition, mstype.bool_) - value_fill = None - value_size = F.size(value) - - value_size = const_utils.check_indices_value_size(indices_size, value_size) - if value_size == 1: - value_fill = F.fill(data_dtype, (indices_size,), 1) - value = F.cast(value, data_dtype) - value_fill = F.tensor_mul(value_fill, value) - elif value_size > 1: - value_fill = F.reshape(value, (indices_size,)) - value_1d = F.scatter_nd(indices, value_fill, (data_size,)) - u = F.reshape(value_1d, data_shape) - return F.select(condition, u, data) - - -def _tensor_indices_number(data, data_shape, index, indices, value): - """Assigns a scalar value to the tensor.""" - data_size = F.size(data) - data_dtype = F.dtype(data) - indices_size = F.size(indices) - indices_size = const_utils.check_indices(indices_size, index) - update = F.fill(mstype.int32, (indices_size,), 1) - condition_1d = F.scatter_nd(indices, update, (data_size,)) - condition = F.reshape(condition_1d, data_shape) - condition = F.cast(condition, mstype.bool_) - value_fill = F.fill(data_dtype, (indices_size,), value) - value_1d = F.scatter_nd(indices, value_fill, (data_size,)) - u = F.reshape(value_1d, data_shape) - return F.select(condition, u, data) - - -def _tensor_setitem_by_tensor_with_tuple(data, index, value): - """Set a tensor item by a tensor with a tuple.""" - updates = compile_utils.generate_updates_from_tuple(data, index, value, - const_utils.SET_ITEM_BY_ONE_TENSOR) - result = F.scatter_update(data, index, updates) - return result - - -def _tensor_setitem_by_int_tensor_with_scalar(data, index, value): - """Set a tensor item by a int tensor with a scalar.""" - updates = compile_utils.generate_updates_from_scalar(data, index, value, - const_utils.SET_ITEM_BY_ONE_TENSOR) - return F.scatter_update(data, index, updates) - - -def _tensor_setitem_by_bool_tensor_with_scalar(data, index, value): - """Set a tensor item by a bool tensor with a scalar.""" - index_shape = F.shape(index) - shape = F.shape(data) - shape = const_utils.check_equal( - shape, index_shape, "The tensor(shape={}) and tensor index(shape={}) should be the same shape.") - dtype = F.dtype(data) - u = F.fill(dtype, shape, value) - return F.select(index, u, data) - - -def _tensor_setitem_by_int_tensor_with_tensor(data, index, value): - """Set a tensor item by a int tensor with a tensor.""" - updates = compile_utils.generate_updates_from_tensor(data, index, value, - const_utils.SET_ITEM_BY_ONE_TENSOR) - return F.scatter_update(data, index, updates) - - -def _tensor_setitem_by_bool_tensor_with_tensor(data, index, value): - """Set a tensor item by a bool tensor with a tensor.""" - index_shape = F.shape(index) - data_shape = F.shape(data) - data_shape = const_utils.check_equal(data_shape, index_shape, - "The tensor(shape={}) and tensor index(shape={}) should be the same shape.") - size = F.size(value) - size = const_utils.check_equal(1, size, - "When assign value is a tensor, its size should be {}, but current size is {}.") - dtype = F.dtype(data) - u_cast = F.cast(value, dtype) - one_data = F.ones_like(data) - u = F.tensor_mul(one_data, u_cast) - result = F.select(index, u, data) - return result + return compile_utils.tensor_setitem_by_ellipsis_with_tensor(data, index, value) diff --git a/mindspore/ops/composite/multitype_ops/sub_impl.py b/mindspore/ops/composite/multitype_ops/sub_impl.py index 431a58b991..864b8678d4 100644 --- a/mindspore/ops/composite/multitype_ops/sub_impl.py +++ b/mindspore/ops/composite/multitype_ops/sub_impl.py @@ -34,7 +34,7 @@ def _sub_scalar(x, y): @sub.register("Tensor", "Tensor") def _sub_tensor(x, y): - """Returns x - y where x and y are all tensors and have save dtype.""" + """Returns x - y where x and y are all tensors.""" return F.tensor_sub(x, y) diff --git a/mindspore/ops/composite/multitype_ops/zeros_like_impl.py b/mindspore/ops/composite/multitype_ops/zeros_like_impl.py index 1308bfd62a..9732d84fdc 100644 --- a/mindspore/ops/composite/multitype_ops/zeros_like_impl.py +++ b/mindspore/ops/composite/multitype_ops/zeros_like_impl.py @@ -57,7 +57,7 @@ def _zeros_like_func(x): @zeros_like_leaf.register("Tensor") def _zeros_like_tensor(x): """Returns a tensor with the same shape and dtype as x and all elements ars 1.""" - return F.zeros_like_tensor(x) + return F.zeros_like(x) @zeros_like_leaf.register("TypeType") diff --git a/mindspore/ops/functional.py b/mindspore/ops/functional.py index 6559d9b2ab..5637274bfb 100644 --- a/mindspore/ops/functional.py +++ b/mindspore/ops/functional.py @@ -21,11 +21,13 @@ from mindspore.common._register_for_tensor import tensor_operator_registry from .primitive import Primitive from . import operations as P from .operations import _grad_ops +from .._extends import builtin_operations as BP typeof = Primitive('typeof') hastype = Primitive('hastype') cast = P.Cast() dtype = P.DType() +isconstant = Primitive('is_constant') issubclass_ = P.IsSubClass() @@ -76,6 +78,9 @@ gather_nd = P.GatherNd() scatter_update = P.ScatterUpdate() scatter_nd_update = P.ScatterNdUpdate() pack = P.Pack() +partial = P.Partial() +# depend: mount a node to another node +depend = P.Depend() tuple_setitem = Primitive('tuple_setitem') @@ -126,15 +131,13 @@ is_ = Primitive("is_") is_not = Primitive("is_not") in_dict = Primitive("in_dict") not_in_dict = Primitive("not_in_dict") +mixed_precision_cast = Primitive("mixed_precision_cast") broadcast_gradient_args = Primitive('BroadcastGradientArgs') dot = Primitive('dot') array_reduce = Primitive('array_reduce') -partial = Primitive('partial') -zeros_like_tensor = Primitive('zeros_like_tensor') +zeros_like = P.ZerosLike() identity = Primitive('identity') distribute = Primitive('distribute') -# depend: mount a node to another node -depend = Primitive('depend') embed = Primitive('embed') ref_to_embed = _grad_ops.RefToEmbed() env_setitem = Primitive('env_setitem') @@ -151,7 +154,17 @@ shape_mul = Primitive("shape_mul") stop_gradient = Primitive("stop_gradient") tensor_operator_registry.register('__add__', tensor_add) +tensor_operator_registry.register('__sub__', tensor_sub) tensor_operator_registry.register('__mul__', tensor_mul) -tensor_operator_registry.register('__div__', tensor_div) +tensor_operator_registry.register('__truediv__', tensor_div) #ms cannot support Tensor(True) compare tensor_operator_registry.register('__eq__', equal) +tensor_operator_registry.register('__ne__', not_equal) +tensor_operator_registry.register('__neg__', neg_tensor) +tensor_operator_registry.register('__lt__', tensor_lt) +tensor_operator_registry.register('__le__', tensor_le) +tensor_operator_registry.register('__gt__', tensor_gt) +tensor_operator_registry.register('__ge__', tensor_ge) +tensor_operator_registry.register('shape', shape) +#support GE backend for no compare operators +tensor_operator_registry.register('vm_compare', BP.vm_compare) diff --git a/mindspore/ops/op_info_register.py b/mindspore/ops/op_info_register.py index 3096e90250..a7a60b7181 100644 --- a/mindspore/ops/op_info_register.py +++ b/mindspore/ops/op_info_register.py @@ -97,6 +97,7 @@ class RegOp: """ if not isinstance(value, str): raise TypeError("%s value must be str" % str(value)) + return True def _is_int(self, value): """ @@ -110,6 +111,7 @@ class RegOp: """ if not isinstance(value, int): raise TypeError("%s value must be int" % str(value)) + return True def _is_bool(self, value): """ @@ -123,6 +125,7 @@ class RegOp: """ if not isinstance(value, bool): raise TypeError("%s value must be bool" % str(value)) + return True def _check_param(self, param_list, key_list, fn_list, kwargs): """ @@ -494,6 +497,7 @@ class DataType: The current list below maybe not completed. If necessary, please add it. """ + None_None = ("", "") BOOL_None = ("bool", "") BOOL_Default = ("bool", "DefaultFormat") BOOL_5HD = ("bool", "NC1HWC0") diff --git a/mindspore/ops/op_selector.py b/mindspore/ops/op_selector.py new file mode 100644 index 0000000000..bdd00ac7f1 --- /dev/null +++ b/mindspore/ops/op_selector.py @@ -0,0 +1,120 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +""" +A factory class that create op selector instance to config switch on a class, +which can be used to control the switch of op type: GraphKernel or Primitive. +""" +import importlib +import inspect +from mindspore import context + + +class _OpSelector: + """ + A helper class, which can be used to choose different type of operator. + + When an instance of this class is called, we return the right operator + according to the context['enable_graph_kernel'] and the name of the + parameter. returned operator will be a GraphKernel op ora Primitive op. + + Args: + op (class): an empty class has an operator name as its class name + config_optype (str): operator type, which must be either 'GraphKernel' + or 'Primitive' + graph_kernel_pkg (str): real operator's package name + primitive_pkg (str): graph kernel operator's package name + + Examples: + >>> class A: pass + >>> selected_op = _OpSelector(A, "GraphKernel", + >>> "graph_kernel.ops.pkg", "primitive.ops.pkg") + >>> # selected_op() will call graph_kernel.ops.pkg.A() + """ + GRAPH_KERNEL = "GraphKernel" + PRIMITIVE = "Primitive" + DEFAULT_OP_TYPE = PRIMITIVE + KW_STR = "op_type" + + def __init__(self, op, config_optype, primitive_pkg, graph_kernel_pkg): + self.op_name = op.__name__ + self.config_optype = config_optype + self.graph_kernel_pkg = graph_kernel_pkg + self.primitive_pkg = primitive_pkg + + def __call__(self, *args, **kwargs): + _op_type = _OpSelector.DEFAULT_OP_TYPE + if context.get_context("enable_graph_kernel"): + if _OpSelector.KW_STR in kwargs: + _op_type = kwargs.get(_OpSelector.KW_STR) + kwargs.pop(_OpSelector.KW_STR, None) + elif self.config_optype is not None: + _op_type = self.config_optype + if _op_type == _OpSelector.GRAPH_KERNEL: + pkg = self.graph_kernel_pkg + else: + pkg = self.primitive_pkg + op = getattr(importlib.import_module(pkg, __package__), self.op_name) + return op(*args, **kwargs) + + +def new_ops_selector(primitive_pkg, graph_kernel_pkg): + """ + A factory method to return an op selector + + When the GraphKernel switch is on: + `context.get_context('enable_graph_kernel') == True`, we have 2 ways to control the op type: + (1). call the real op with an extra parameter `op_type='Primitive'` or `op_type='GraphKernel'` + (2). pass a parameter to the op selector, like `@op_selector('Primitive')` or + `@op_selector('GraphKernel')` + (3). default op type is PRIMITIVE + The order of the highest priority to lowest priority is (1), (2), (3) + If the GraphKernel switch is off, then op_type will always be PRIMITIVE. + + Args: + primitive_pkg (str): primitive op's package name + graph_kernel_pkg (str): graph kernel op's package name + + Returns: + returns an op selector, which can control what operator should be actually called. + + Examples: + >>> op_selector = new_ops_selector("primitive_pkg.some.path", + >>> "graph_kernel_pkg.some.path") + >>> @op_selector + >>> class ReduceSum: pass + """ + + def op_selector(cls_or_optype): + + _primitive_pkg = primitive_pkg + _graph_kernel_pkg = graph_kernel_pkg + + def direct_op_type(): + darg = None + if cls_or_optype is None: + pass + elif not inspect.isclass(cls_or_optype): + darg = cls_or_optype + return darg + + if direct_op_type() is not None: + def deco_cls(_real_cls): + return _OpSelector(_real_cls, direct_op_type(), _primitive_pkg, _graph_kernel_pkg) + return deco_cls + + return _OpSelector(cls_or_optype, direct_op_type(), _primitive_pkg, _graph_kernel_pkg) + + return op_selector diff --git a/mindspore/ops/operations/__init__.py b/mindspore/ops/operations/__init__.py index 47c14f592c..beed99f713 100644 --- a/mindspore/ops/operations/__init__.py +++ b/mindspore/ops/operations/__init__.py @@ -22,16 +22,16 @@ A collection of operators to build nerual networks or computing functions. from .image_ops import (CropAndResize) from .array_ops import (Argmax, Argmin, Cast, Concat, Pack, Unpack, Diag, DiagPart, DType, ExpandDims, Eye, - Fill, GatherNd, GatherV2, InvertPermutation, + Fill, GatherNd, GatherV2, SparseGatherV2, InvertPermutation, IsInstance, IsSubClass, ArgMaxWithValue, OnesLike, ZerosLike, - Rank, Reshape, ResizeNearestNeighbor, ArgMinWithValue, Range, + Rank, Reshape, ResizeNearestNeighbor, ArgMinWithValue, SameTypeShape, ScatterAdd, ScatterMax, ScatterUpdate, ScalarToArray, ScalarToTensor, ScatterNd, ScatterNdUpdate, Select, - Shape, Size, Slice, Split, EmbeddingLookup, - Squeeze, StridedSlice, Tile, + Shape, Size, Slice, Split, + Squeeze, StridedSlice, Tile, TensorScatterUpdate, Transpose, TruncatedNormal, TupleToArray, UnsortedSegmentMin, UnsortedSegmentSum, SpaceToDepth, DepthToSpace, SpaceToBatch, BatchToSpace, - SpaceToBatchND, BatchToSpaceND, ReverseSequence) + SpaceToBatchND, BatchToSpaceND, BroadcastTo, InplaceUpdate, ReverseSequence) from .comm_ops import (AllGather, AllReduce, _AlltoAll, ReduceScatter, Broadcast, _MirrorOperator, ReduceOp, _VirtualDataset, _VirtualDiv, _GetTensorSlice, @@ -41,27 +41,29 @@ from .debug_ops import (ImageSummary, InsertGradientOf, HookBackward, ScalarSumm from .control_ops import ControlDepend, GeSwitch, Merge from .inner_ops import ScalarCast -from .math_ops import (Abs, ACos, Asin, Asinh, AddN, AssignAdd, AssignSub, Atan2, BatchMatMul, BitwiseAnd, BitwiseOr, BitwiseXor, +from .math_ops import (Abs, ACos, Asin, Asinh, AddN, AccumulateNV2, AssignAdd, AssignSub, Atan2, BatchMatMul, BitwiseAnd, BitwiseOr, + BitwiseXor, Inv, Invert, ApproximateEqual, InplaceAdd, InplaceSub, ReduceMax, ReduceMin, ReduceMean, ReduceSum, ReduceAll, ReduceProd, CumProd, - Cos, Div, Equal, EqualCount, Exp, Erf, Erfc, Floor, FloorDiv, FloorMod, Acosh, - Greater, GreaterEqual, Less, LessEqual, Log, Log1p, LogicalAnd, + Cos, Div, DivNoNan, Equal, EqualCount, Exp, Expm1, Erf, Erfc, Floor, FloorDiv, FloorMod, Ceil, + Acosh, Greater, GreaterEqual, Less, LessEqual, Log, Log1p, LogicalAnd, LogicalNot, LogicalOr, MatMul, Maximum, Minimum, Mul, Neg, NMSWithMask, NotEqual, NPUAllocFloatStatus, NPUClearFloatStatus, NPUGetFloatStatus, Pow, RealDiv, IsNan, IsInf, IsFinite, FloatStatus, - Reciprocal, CumSum, + Reciprocal, CumSum, HistogramFixedWidth, Sin, Sqrt, Rsqrt, BesselI0e, BesselI1e, - Square, Sub, TensorAdd, Sign, Round, SquareSumAll, Atan, Atanh) -from .random_ops import (RandomChoiceWithMask, RandomCategorical) -from .nn_ops import (LSTM, SGD, Adam, ApplyMomentum, BatchNorm, + Square, Sub, TensorAdd, Sign, Round, SquareSumAll, Atan, Atanh, Cosh, Sinh, Eps) + +from .random_ops import (RandomChoiceWithMask, Normal, RandomCategorical) +from .nn_ops import (LSTM, SGD, Adam, SparseApplyAdam, SparseApplyLazyAdam, ApplyMomentum, BatchNorm, BiasAdd, Conv2D, DepthwiseConv2dNative, DropoutDoMask, DropoutGrad, Dropout, - DropoutGenMask, Flatten, FusedBatchNorm, + DropoutGenMask, Flatten, FusedBatchNorm, BNTrainingReduce, BNTrainingUpdate, Gelu, Elu, GetNext, L2Normalize, LayerNorm, L2Loss, CTCLoss, LogSoftmax, - MaxPool, + MaxPool, DataFormatDimMap, AvgPool, Conv2DBackpropInput, ConfusionMulGrad, MaxPoolWithArgmax, OneHot, Pad, MirrorPad, PReLU, ReLU, ReLU6, ReLUV2, HSwish, HSigmoid, ResizeBilinear, Sigmoid, @@ -72,19 +74,24 @@ from .nn_ops import (LSTM, SGD, Adam, ApplyMomentum, BatchNorm, SparseSoftmaxCrossEntropyWithLogits, Tanh, TopK, BinaryCrossEntropy, SparseApplyAdagrad, LARSUpdate, ApplyFtrl, SparseApplyFtrl, ApplyProximalAdagrad, SparseApplyProximalAdagrad, - ApplyRMSProp, ApplyCenteredRMSProp, BasicLSTMCell) -from .other_ops import Assign, IOU, BoundingBoxDecode, BoundingBoxEncode, CheckValid, MakeRefKey, CheckBprop + ApplyAdaMax, ApplyAdadelta, ApplyAdagrad, ApplyAdagradV2, + ApplyRMSProp, ApplyCenteredRMSProp, BasicLSTMCell, InTopK) +from .other_ops import (Assign, IOU, BoundingBoxDecode, BoundingBoxEncode, + CheckValid, MakeRefKey, Partial, Depend, CheckBprop) from . import _quant_ops from ._quant_ops import * from .thor_ops import * __all__ = [ + 'ReverseSequence', + 'CropAndResize', 'TensorAdd', 'Argmax', 'Argmin', 'ArgMaxWithValue', 'ArgMinWithValue', 'AddN', + 'AccumulateNV2', 'Sub', 'CumSum', 'MatMul', @@ -92,6 +99,7 @@ __all__ = [ 'Mul', 'Pow', 'Exp', + 'Expm1', 'Rsqrt', 'Sqrt', 'Square', @@ -99,10 +107,14 @@ __all__ = [ 'Flatten', 'MaxPoolWithArgmax', 'FusedBatchNorm', + 'BNTrainingReduce', + 'BNTrainingUpdate', 'BatchNorm', 'MaxPool', 'TopK', 'Adam', + 'SparseApplyAdam', + 'SparseApplyLazyAdam', 'Softplus', 'Softmax', 'LogSoftmax', @@ -121,6 +133,7 @@ __all__ = [ 'Transpose', 'OneHot', 'GatherV2', + 'SparseGatherV2', 'Concat', 'Pack', 'Unpack', @@ -132,14 +145,15 @@ __all__ = [ 'StridedSlice', 'ReduceSum', 'ReduceMean', - 'Range', 'LayerNorm', - 'EmbeddingLookup', 'Rank', 'Less', 'LessEqual', 'RealDiv', 'Div', + 'DivNoNan', + 'Inv', + 'Invert', 'TruncatedNormal', 'Fill', 'OnesLike', @@ -148,7 +162,6 @@ __all__ = [ 'Split', 'ReLU', 'ReLU6', - 'ReLUV2', 'Elu', 'Erf', 'Erfc', @@ -157,6 +170,7 @@ __all__ = [ 'HSigmoid', 'Tanh', 'RandomChoiceWithMask', + 'Normal', 'RandomCategorical', 'ResizeBilinear', 'ScalarSummary', @@ -173,6 +187,8 @@ __all__ = [ 'DropoutGrad', 'Dropout', 'Neg', + 'InplaceAdd', + 'InplaceSub', 'Slice', 'DType', 'NPUAllocFloatStatus', @@ -204,15 +220,19 @@ __all__ = [ 'ScatterNd', 'ScatterMax', 'ResizeNearestNeighbor', + 'HistogramFixedWidth', 'Pad', 'MirrorPad', 'GatherNd', + 'TensorScatterUpdate', 'ScatterUpdate', 'ScatterNdUpdate', 'Floor', 'NMSWithMask', 'IOU', 'MakeRefKey', + 'Partial', + 'Depend', 'AvgPool', # Back Primitive 'Equal', @@ -245,10 +265,12 @@ __all__ = [ 'SigmoidCrossEntropyWithLogits', 'FloorDiv', 'FloorMod', + 'Ceil', 'Acosh', 'Asinh', "PReLU", "Cos", + "Cosh", "ACos", "Diag", "DiagPart", @@ -257,6 +279,7 @@ __all__ = [ 'AssignAdd', 'AssignSub', "Sin", + "Sinh", "Asin", "LSTM", "Abs", @@ -268,11 +291,16 @@ __all__ = [ "Sign", "LARSUpdate", "Round", + "Eps", "ApplyFtrl", "SpaceToBatch", "SparseApplyFtrl", "ApplyProximalAdagrad", "SparseApplyProximalAdagrad", + "ApplyAdaMax", + "ApplyAdadelta", + "ApplyAdagrad", + "ApplyAdagradV2", "BatchToSpace", "Atan2", "ApplyRMSProp", @@ -289,8 +317,12 @@ __all__ = [ "Atan", "Atanh", "BasicLSTMCell", + "BroadcastTo", + "DataFormatDimMap", + "ApproximateEqual", + "InplaceUpdate", + "InTopK", "CropAndResize" ] -__all__.extend(_quant_ops.__all__) __all__.sort() diff --git a/mindspore/ops/operations/_grad_ops.py b/mindspore/ops/operations/_grad_ops.py index 008f5f0edb..c3f97b9f33 100644 --- a/mindspore/ops/operations/_grad_ops.py +++ b/mindspore/ops/operations/_grad_ops.py @@ -21,6 +21,7 @@ from ..primitive import Primitive, PrimitiveWithInfer, prim_attr_register from ..._checkparam import Validator as validator, Rel from .._utils import get_concat_offset from ...common import dtype as mstype +from .. import functional as F class AbsGrad(PrimitiveWithInfer): @@ -404,6 +405,33 @@ class FusedBatchNormGrad(Primitive): def __call__(self, dy, x, scale, save_mean, save_inv_variance): raise NotImplementedError +class BNTrainingReduceGrad(PrimitiveWithInfer): + """Gradients of FusedBatchNorm operation.""" + + @prim_attr_register + def __init__(self, epsilon=0.0001): + _inputs = ['grads', 'x', 'diff_scale', 'diff_offset', 'scale', 'batch_mean', 'batch_variance'] + self.init_prim_io_names(inputs=_inputs, outputs=['y']) + + def infer_shape(self, grads, x, diff_scale, diff_offset, scale, batch_mean, batch_variance): + return grads + + def infer_dtype(self, grads, x, diff_scale, diff_offset, scale, batch_mean, batch_variance): + return grads + +class BNTrainingUpdateGrad(PrimitiveWithInfer): + """Gradients of FusedBatchNorm operation.""" + + @prim_attr_register + def __init__(self, epsilon=0.0001): + self.init_prim_io_names(inputs=['grads', 'x', 'batch_mean', 'batch_variance'], + outputs=['diff_scale', 'diff_offset']) + + def infer_shape(self, grads, x, batch_mean, batch_variance): + return (batch_mean, batch_variance) + + def infer_dtype(self, grads, x, batch_mean, batch_variance): + return (batch_mean, batch_variance) class GeluGrad(PrimitiveWithInfer): """Gradients of Gelu operation.""" @@ -1065,6 +1093,18 @@ class StridedSliceGrad(PrimitiveWithInfer): self.init_prim_io_names(inputs=['dy', 'shapex', 'begin', 'end', 'strides'], outputs=['output']) def __infer__(self, dy, shapex, begin, end, strides): + args = {"dy": dy['dtype']} + validator.check_tensor_type_same(args, mstype.number_type, self.name) + + for idx, item in enumerate(shapex['value']): + validator.check_value_type("shapex[%d]" % idx, item, [int], self.name) + for idx, item in enumerate(begin['value']): + validator.check_value_type("begin[%d]" % idx, item, [int], self.name) + for idx, item in enumerate(end['value']): + validator.check_value_type("end[%d]" % idx, item, [int], self.name) + for idx, item in enumerate(strides['value']): + validator.check_value_type("strides[%d]" % idx, item, [int], self.name) + return {'shape': shapex['value'], 'dtype': dy['dtype'], 'value': None} @@ -1121,6 +1161,37 @@ class MirrorPadGrad(PrimitiveWithInfer): 'value': None} +class EmbeddingLookupCommGrad(PrimitiveWithInfer): + """ + Perform the gradient for the communication part of EmbeddingLookup operator. + + This works ONLY when 'reduce_scatter_flag' is True in 'EmbeddingLookup'. Roughly speaking, + this primitive is implemented by StridedSlice --> HostAllGather --> Concat. This primitive runs on host. + """ + @prim_attr_register + def __init__(self): + self.init_prim_io_names(inputs=['dy', 'split_num'], outputs=['output']) + self.add_prim_attr('primitive_target', 'CPU') + + def __infer__(self, dy, split_num): + """ + This primitive is implemented by three steps: + 1) Split the 'dy' along dimension 0 into 'split_num' parts. + 2) For each part, perform HostAllGather((0, 1, 2, 3, 4, 5, 6, 7)) on the host. + 3) After HostAllGather, there are still 'split_num' parts in each process. Then, perform Concat on them + along dimension 0. + + The output shape of this primitive: shape(output)[0] == shape(dy)[0] * 8 + """ + dy_shape = tuple(dy['shape']) + split_num_value = split_num['value'] + validator.check_value_type("split_num_value", split_num_value, [int], self.name) + dy_shape_all = F.tuple_setitem(dy_shape, 0, dy_shape[0] * 8) + return {'shape': dy_shape_all, + 'dtype': dy['dtype'], + 'value': None} + + class RefToEmbed(Primitive): r""" Make a key from Ref. @@ -1276,3 +1347,20 @@ class BasicLSTMCellInputGrad(PrimitiveWithInfer): validator.check_type_name("dgate", dgate_dtype, [mstype.float16, mstype.float32], self.name) validator.check_type_name("w", w_dtype, [mstype.float16, mstype.float32], self.name) return (dgate_dtype, dgate_dtype) + + +class InvGrad(PrimitiveWithInfer): + """Computes gradients for inv operation.""" + + @prim_attr_register + def __init__(self): + pass + + def infer_shape(self, x, grad): + validator.check("x_shape", x, "grad_shape", grad, Rel.EQ, self.name) + return x + + def infer_dtype(self, x, grad): + validator.check_type_name("dgate", x, [mstype.float16, mstype.float32, mstype.int32, mstype.int8], self.name) + validator.check_type_name("grad", grad, [mstype.float16, mstype.float32, mstype.int32, mstype.int8], self.name) + return x diff --git a/mindspore/ops/operations/_inner_ops.py b/mindspore/ops/operations/_inner_ops.py index 2f9970eb0c..49834fc168 100644 --- a/mindspore/ops/operations/_inner_ops.py +++ b/mindspore/ops/operations/_inner_ops.py @@ -15,9 +15,10 @@ """Inner operators.""" +from ..._checkparam import Rel from ..._checkparam import Validator as validator from ...common import dtype as mstype -from ..primitive import PrimitiveWithInfer, prim_attr_register +from ..primitive import PrimitiveWithInfer, prim_attr_register class ExtractImagePatches(PrimitiveWithInfer): @@ -98,6 +99,167 @@ class ExtractImagePatches(PrimitiveWithInfer): return input_x +class Range(PrimitiveWithInfer): + r""" + Creates a sequence of numbers. + Set `input_x` as :math:`x_i` for each element, `output` as follows: + + .. math:: + \text{output}(x_i) = x_i * \text{delta} + \text{start} + + Args: + start (float): If `limit` is `None`, the value acts as limit in the range and first entry + defaults to `0`. Otherwise, it acts as first entry in the range. + limit (float): Acts as upper limit of sequence. If `None`, defaults to the value of `start` + while set the first entry of the range to `0`. It can not be equal to `start`. + delta (float): Increment of the range. It can not be equal to zero. Default: 1.0. + + Inputs: + - **input_x** (Tensor) - The assistant data. A `1-D` tensor of type float32 or int32. + + Outputs: + Tensor, has the same shape and dtype as `input_x`. + + Examples: + >>> range = P.Range(1.0, 8.0, 2.0) + >>> x = Tensor(np.array([1, 2, 3, 2]), mindspore.int32) + >>> range(x) + [3, 5, 7, 5] + """ + + @prim_attr_register + def __init__(self, start, limit=None, delta=1.0): + self.init_prim_io_names(inputs=['x'], outputs=['y']) + self.delta = validator.check_value_type("delta", delta, [float], self.name) + validator.check_value_type("start", start, [float], self.name) + if limit is None: + self.start = 0.0 + self.limit = start + self.add_prim_attr("start", self.start) + self.add_prim_attr("limit", self.limit) + else: + validator.check_value_type("limit", limit, [float], self.name) + validator.check('start', self.start, 'limit', self.limit, Rel.NE, self.name) + if self.delta == 0.0: + raise ValueError("The input of `delta` can not be equal to zero.") + if self.delta > 0.0 and self.start > self.limit: + raise ValueError(f"Limit should be greater than start when delta:{self.delta} is more than zero, " + f"but got start:{self.start}, limit:{self.limit}") + if self.delta < 0.0 and self.start < self.limit: + raise ValueError(f"Start should be greater than limit when delta:{self.delta} is less than zero, " + f"but got start:{self.start}, limit:{self.limit}") + + def infer_shape(self, x_shape): + return x_shape + + def infer_dtype(self, x_dtype): + validator.check_tensor_type_same({'x_dtype': x_dtype}, [mstype.float32, mstype.int32], self.name) + return x_dtype + + +class AscendQuant(PrimitiveWithInfer): + r""" + Returns the quantized value of input_x. + + If `sqrt_mode` is False: + + .. math:: + y = round(scale * x + offset) + + If `sqrt_mode` is True: + + .. math:: + y = round(scale * x * scale + offset) + + Note: + This operation only support Ascend 310 inference environment. + + Args: + scale (float) : Specifies the scaling ratio. + offset (float): Specifies the offset. + sqrt_mode (bool) : Specifies whether to perform square root on `scale`. Default: False. + round_mode (str): Specifies the way to round. Should be one of ["Round", "Floor", "Ceil", "Trunc"]. + Default: "Round". + + Inputs: + - **input_x** (Tensor) : Input tensor. Its data type should be mindspore.float16 or mindspore.float32. + + Outputs: + - Tensor: The quantized output tensor of type mindspore.int8. + + Examples: + >>> input_x = Tensor([100.0, 150.0], mstype.float32) + >>> quant = P.AscendQuant(80.0, 0.0, False, "Round") + >>> y = quant(input_x) + """ + + @prim_attr_register + def __init__(self, scale, offset, sqrt_mode=False, round_mode="Round"): + self.scale = validator.check_value_type("scale", scale, [float], self.name) + self.offset = validator.check_value_type("offset", offset, [float], self.name) + self.sqrt_mode = validator.check_value_type("sqrt_mode", sqrt_mode, [bool], self.name) + self.round_mode = validator.check_string("round_mode", round_mode, + ["Round", "Floor", "Ceil", "Trunc"], self.name) + + def infer_shape(self, x_shape): + return x_shape + + def infer_dtype(self, x_type): + validator.check_subclass("input_x", x_type, mstype.tensor, self.name) + validator.check_type_name("input_x", x_type, [mstype.float16, mstype.float32], self.name) + return mstype.int8 + + +class AscendDequant(PrimitiveWithInfer): + r""" + Returns the dequantized value of input_x. + This operation will do ReLU to the dequantized value if `relu_flag` is True. + + If `sqrt_mode` is False: + + .. math:: + y = x * deq\_scale + + If `sqrt_mode` is True: + + .. math:: + y = x * deq\_scale * deq\_scale + + Note: + This operation only support Ascend 310 inference environment. + + Args: + sqrt_mode (bool) : Specifies whether to perform square root on `scale`. Default: False. + relu_flag (bool): Specifies whether to perform ReLU. Default: False. + + Inputs: + - **input_x** (Tensor) : Input tensor. Should be mindspore.int32. + - **deq_scale** (Tensor) : Specifies the scaling ratio. + Data type should be mindspore.float16 or mindspore.uint64 + + Outputs: + - Tensor: The quantized output tensor of type mindspore.float16. + + Examples: + >>> input_x = Tensor([100.0, 150.0], mstype.float32) + >>> dequant = P.AscendDequant(False, False) + >>> y = dequant(input_x) + """ + @prim_attr_register + def __init__(self, sqrt_mode=False, relu_flag=False): + self.sqrt_mode = validator.check_value_type("sqrt_mode", sqrt_mode, [bool], self.name) + self.relu_flag = validator.check_value_type("relu_flag", relu_flag, [bool], self.name) + + def infer_shape(self, x_shape, deq_scale_shape): + return x_shape + + def infer_dtype(self, x_type, deq_scale_type): + validator.check_subclass("x", x_type, mstype.tensor, self.name) + validator.check_type_name("x", x_type, [mstype.int32], self.name) + validator.check_type_name("deq_scale", deq_scale_type, [mstype.float16, mstype.uint64], self.name) + return mstype.float16 + + class EmbeddingLookup(PrimitiveWithInfer): """ Returns a slice of input tensor based on the specified indices. @@ -166,3 +328,183 @@ class EmbeddingLookup(PrimitiveWithInfer): 'dtype': params['dtype'], 'value': None} return out + + +class LinSpace(PrimitiveWithInfer): + r""" + Generates values in an interval. And return the corresponding interpolation accroding to assist. + + Inputs: + - **assist** (Tensor[float32]) - The assist value, With shape of 0-D or 1-D. + - **start** (Tensor[float32]) - The start of interval, With shape of 0-D. + - **stop** (Tensor[float32]) - The end of interval, With shape of 0-D. + - **num** (Tensor[int32]) - ticks number in the interval, the ticks include start and stop value. + With shape of 0-D. + + Outputs: + Tensor, has the same shape as `assist`. + + Examples: + >>> linspace = P.LinSpace() + >>> assist = Tensor([5, 5.5], mindspore.float32) + >>> start = Tensor(1, mindspore.float32) + >>> stop = Tensor(10, mindspore.float32) + >>> num = Tensor(5, mindspore.int32) + >>> output = linspace(assist, start, stop, num) + [12.25, 13.375] + """ + + @prim_attr_register + def __init__(self): + pass + + def infer_shape(self, assist, start, stop, num): + return assist + + def infer_dtype(self, assist, start, stop, num): + args = {"num": num} + validator.check_tensor_type_same(args, (mstype.int32,), self.name) + args = {"assist": assist, "start": start, "stop": stop} + validator.check_tensor_type_same(args, (mstype.float32,), self.name) + return assist + + +class MatrixDiag(PrimitiveWithInfer): + """ + Returns a batched diagonal tensor with a given batched diagonal values. + + Inputs: + - **x** (Tensor) - A tensor which to be element-wise multi by `assist`. It can be of the following data types: + float32, float16, int32, int8, uint8. + - **assist** (Tensor) - A eye tensor of the same type as `x`. It's rank must greater than or equal to 2 and + it's last dimension must equal to the second to last dimension. + + Outputs: + Tensor, has the same type and shape as input `assist`. + + Examples: + >>> x = Tensor(np.array([1, -1]), mstype.float32) + >>> assist = Tensor(np.arange(-12, 0).reshape(3, 2, 2), mindspore.float32) + >>> matrix_diag = P.MatrixDiag() + >>> result = matrix_diag(x, assist) + [[[-12. 11.] + [-10. 9.]] + [[ -8. 7.] + [ -6. 5.]] + [[ -4. 3.] + [ -2. 1.]]] + """ + + @prim_attr_register + def __init__(self): + """init MatrixDiag""" + + def infer_dtype(self, x_dtype, assist_dtype): + valid_type = [mstype.float16, mstype.float32, mstype.int32, mstype.int8, mstype.uint8] + args = {"x": x_dtype, "assist": assist_dtype} + validator.check_tensor_type_same(args, valid_type, self.name) + return x_dtype + + def infer_shape(self, x_shape, assist_shape): + validator.check_integer("assist rank", len(assist_shape), 2, Rel.GE, self.name) + validator.check('rank of x', len(x_shape)+1, + 'rank of assist', len(assist_shape), Rel.LE, self.name) + validator.check('assist\'s penultimate dimension', assist_shape[-2], 'assist\'s last dimension', + assist_shape[-1], Rel.EQ, self.name) + + r_end_dim = -len(x_shape) + r_idx = -1 + while r_idx >= r_end_dim: + if x_shape[r_idx] != 1: + validator.check("reverse x dim %d" % r_idx, x_shape[r_idx], "reverse assist dim %d" % + assist_shape[r_idx-1], assist_shape[r_idx-1], Rel.EQ, self.name) + r_idx = r_idx - 1 + + return assist_shape + + +class MatrixDiagPart(PrimitiveWithInfer): + r""" + Returns the batched diagonal part of a batched tensor. + + Inputs: + - **x** (Tensor) - The batched tensor. It can be of the following data types: + float32, float16, int32, int8, uint8. + - **assist** (Tensor) - A eye tensor of the same type as `x`. With shape same as `x`. + + Outputs: + Tensor, data type same as input `x`. The shape should be x.shape[:-2] + [min(x.shape[-2:])]. + + Examples: + >>> x = Tensor([[[-1, 0], [0, 1]], [-1, 0], [0, 1]], [[-1, 0], [0, 1]]], mindspore.float32) + >>> assist = Tensor(np.arange(-12, 0).reshape(3, 2, 2), mindspore.float32) + >>> matrix_diag_part = P.MatrixDiagPart() + >>> result = matrix_diag_part(x, assist) + [[12., -9.], [8., -5.], [4., -1.]] + """ + + @prim_attr_register + def __init__(self): + """init MatrixDiagPart""" + + def infer_dtype(self, x_dtype, assist_dtype): + valid_type = [mstype.float16, mstype.float32, mstype.int32, mstype.int8, mstype.uint8] + args = {"x": x_dtype, "assist": assist_dtype} + validator.check_tensor_type_same(args, valid_type, self.name) + return x_dtype + + def infer_shape(self, x_shape, assist_shape): + validator.check_integer("x rank", len(x_shape), 2, Rel.GE, self.name) + validator.check("x shape", x_shape, "assist shape", assist_shape, Rel.EQ, self.name) + + if assist_shape[-2] < assist_shape[-1]: + out_shape = assist_shape[:-1] + else: + out_shape = assist_shape[:-2] + assist_shape[-1:] + return out_shape + + +class MatrixSetDiag(PrimitiveWithInfer): + r""" + Modify the batched diagonal part of a batched tensor. + + Inputs: + - **x** (Tensor) - The batched tensor. It can be of the following data types: + float32, float16, int32, int8, uint8. + - **assist** (Tensor) - A eye tensor of the same type as `x`. With shape same as `x`. + - **diagonal** (Tensor) - The diagonal values. + + Outputs: + Tensor, data type same as input `x`. The shape same as `x`. + + Examples: + >>> x = Tensor([[[-1, 0], [0, 1]], [-1, 0], [0, 1]], [[-1, 0], [0, 1]]], mindspore.float32) + >>> diagonal = Tensor([[-1., 2.], [-1., 1.], [-1., 1.]], mindspore.float32) + >>> matrix_set_diag = P.MatrixSetDiag() + >>> result = matrix_set_diag(x, diagonal) + [[[-1, 0], [0, 2]], [-1, 0], [0, 1]], [[-1, 0], [0, 1]]] + + """ + + @prim_attr_register + def __init__(self): + """init MatrixSetDiag""" + + def infer_dtype(self, x_dtype, diagonal_dtype, assist_dtype): + valid_type = [mstype.float16, mstype.float32, mstype.int32, mstype.int8, mstype.uint8] + args = {"x": x_dtype, "diagonal": diagonal_dtype, "assist": assist_dtype} + validator.check_tensor_type_same(args, valid_type, self.name) + return x_dtype + + def infer_shape(self, x_shape, diagonal_shape, assist_shape): + validator.check_integer("x rank", len(x_shape), 2, Rel.GE, self.name) + validator.check("x shape", x_shape, "assist shape", assist_shape, Rel.EQ, self.name) + + if x_shape[-2] < x_shape[-1]: + validator.check("x shape excluding the last dimension", x_shape[:-1], "diagnoal shape", + diagonal_shape, Rel.EQ, self.name) + else: + validator.check("x shape excluding the second to last dimension", x_shape[:-2]+x_shape[-1:], + "diagonal shape", diagonal_shape, Rel.EQ, self.name) + + return assist_shape diff --git a/mindspore/ops/operations/_quant_ops.py b/mindspore/ops/operations/_quant_ops.py index 705968be65..42c2406906 100644 --- a/mindspore/ops/operations/_quant_ops.py +++ b/mindspore/ops/operations/_quant_ops.py @@ -15,38 +15,161 @@ """Operators for quantization.""" +import mindspore.context as context from ..._checkparam import Validator as validator from ..._checkparam import Rel from ..primitive import PrimitiveWithInfer, prim_attr_register from ...common import dtype as mstype -__all__ = ["FakeQuantWithMinMax", - "FakeQuantWithMinMaxGrad", - "FakeQuantWithMinMaxPerChannel", - "FakeQuantWithMinMaxPerChannelGrad", +__all__ = ["MinMaxUpdatePerLayer", + "MinMaxUpdatePerChannel", + "FakeQuantPerLayer", + "FakeQuantPerLayerGrad", + "FakeQuantPerChannel", + "FakeQuantPerChannelGrad", "BatchNormFold", "BatchNormFoldGrad", "CorrectionMul", "CorrectionMulGrad", + "CorrectionMulGradReduce", "BatchNormFold2", "BatchNormFold2Grad", "BatchNormFoldD", - "BNTrainingReduce", + "BatchNormFoldGradD", "BatchNormFold2_D", - "FakeQuantWithMinMaxUpdate", + "BatchNormFold2GradD", + "BatchNormFold2GradReduce" ] -class FakeQuantWithMinMax(PrimitiveWithInfer): +class MinMaxUpdatePerLayer(PrimitiveWithInfer): + r""" + Update min and max per layer. + + Args: + ema (bool): Use EMA algorithm update value min and max. Default: False. + ema_decay (int) : EMA algorithm decay parameter. Default: 0.999. + + Inputs: + - **x** (Tensor) : float32 Tensor representing the shape of the output tensor. + - **min** (Tensor) : Value of the min range of the input data x. + - **max** (Tensor) : Value of the max range of the input data x. + + Outputs: + - Tensor: Simulate quantize tensor of x. + + Examples: + >>> input_tensor = Tensor(np.random.rand(3, 16, 5, 5), mstype.float32) + >>> min_tensor = Tensor(np.array([-6]), mstype.float32) + >>> max_tensor = Tensor(np.array([6]), mstype.float32) + >>> output_tensor = MinMaxUpdatePerLayer(num_bits=8)(input_tensor, min_tensor, max_tensor) + """ + support_quant_bit = [4, 7, 8] + + @prim_attr_register + def __init__(self, ema=False, ema_decay=0.999): + """init FakeQuantMinMaxPerLayerUpdate OP""" + if context.get_context('device_target') == "Ascend": + from mindspore.ops._op_impl._custom_op import minmax_update_perlayer + if ema and not ema_decay: + raise ValueError( + f"For '{self.name}' attr \'ema\' and \'ema_decay\' should set together.") + + self.ema = validator.check_value_type('ema', ema, (bool,), self.name) + self.ema_decay = validator.check_number_range( + 'ema_decay', ema_decay, 0, 1, Rel.INC_BOTH, self.name) + self.init_prim_io_names(inputs=['x', 'min', 'max'], + outputs=['min_up', 'max_up']) + + def infer_shape(self, x_shape, min_shape, max_shape): + validator.check_integer("x rank", len(x_shape), 1, Rel.GE, self.name) + validator.check("min shape", min_shape, "max shape", + max_shape, Rel.EQ, self.name) + validator.check_integer("min shape", len( + min_shape), 1, Rel.EQ, self.name) + return min_shape, max_shape + + def infer_dtype(self, x_type, min_type, max_type): + valid_types = (mstype.float16, mstype.float32) + validator.check_tensor_type_same({"x": x_type}, valid_types, self.name) + validator.check_tensor_type_same( + {"min": min_type}, valid_types, self.name) + validator.check_tensor_type_same( + {"max": max_type}, valid_types, self.name) + return min_type, max_type + + +class MinMaxUpdatePerChannel(PrimitiveWithInfer): + r""" + Update min and max per channel. + + Args: + ema (bool): Use EMA algorithm update value min and max. Default: False. + ema_decay (int) : EMA algorithm decay parameter. Default: 0.999. + channel_axis (int): Channel asis for per channel compute. Default: 1. + + Inputs: + - **x** (Tensor) : float32 Tensor representing the shape of the output tensor. + - **min** (Tensor) : Value of the min range of the input data x. + - **max** (Tensor) : Value of the max range of the input data x. + + Outputs: + - Tensor: Simulate quantize tensor of x. + + Examples: + >>> x = Tensor(np.random.rand(3, 16, 5, 5), mstype.float32) + >>> min = Tensor(np.random.uniform(-1, 1, size=16), mstype.float32) + >>> max = Tensor(np.random.uniform(-1, 1, size=16), mstype.float32) + >>> output_tensor = MinMaxUpdatePerChannel(num_bits=8)(x, min, max) + """ + support_quant_bit = [4, 7, 8] + + @prim_attr_register + def __init__(self, ema=False, ema_decay=0.999, channel_axis=1): + """init FakeQuantPerChannelUpdate OP for Ascend""" + if context.get_context('device_target') == "Ascend": + from mindspore.ops._op_impl._custom_op import minmax_update_perchannel + if ema and not ema_decay: + raise ValueError( + f"For '{self.name}' attr \'ema\' and \'ema_decay\' should set together.") + + self.ema = validator.check_value_type('ema', ema, (bool,), self.name) + self.ema_decay = validator.check_number_range( + 'ema_decay', ema_decay, 0, 1, Rel.INC_BOTH, self.name) + self.channel_axis = validator.check_integer( + 'channel axis', channel_axis, 0, Rel.GE, self.name) + self.init_prim_io_names( + inputs=['x', 'min', 'max'], outputs=['min_up', 'max_up']) + + def infer_shape(self, x_shape, min_shape, max_shape): + validator.check_integer("x rank", len(x_shape), 1, Rel.GT, self.name) + validator.check("min shape", min_shape, "max shape", + max_shape, Rel.EQ, self.name) + validator.check_integer("min shape", len( + min_shape), 1, Rel.EQ, self.name) + return min_shape, max_shape + + def infer_dtype(self, x_type, min_type, max_type): + valid_types = (mstype.float16, mstype.float32) + validator.check_tensor_type_same( + {"x": x_type}, valid_types, self.name) + validator.check_tensor_type_same( + {"min": min_type}, valid_types, self.name) + validator.check_tensor_type_same( + {"max": max_type}, valid_types, self.name) + return min_type, max_type + + +class FakeQuantPerLayer(PrimitiveWithInfer): r""" Simulate the quantize and dequantize operations in training time. Args: - num_bits (int) : Number bits for aware quantilization. Default: 8. + num_bits (int) : Number bits for quantization aware. Default: 8. ema (bool): Use EMA algorithm update value min and max. Default: False. ema_decay (int) : EMA algorithm decay parameter. Default: 0.999. quant_delay (int): Quantilization delay parameter. Before delay step in training time not update - simulate aware quantize funcion. After delay step in training time begin simulate the aware + simulate quantization aware funcion. After delay step in training time begin simulate the aware quantize funcion. Default: 0. symmetric (bool): Quantization algorithm use symmetric or not. Default: False. narrow_range (bool): Quantization algorithm use narrow range or not. Default: False. @@ -64,82 +187,120 @@ class FakeQuantWithMinMax(PrimitiveWithInfer): >>> input_tensor = Tensor(np.random.rand(3, 16, 5, 5), mstype.float32) >>> min_tensor = Tensor(np.array([-6]), mstype.float32) >>> max_tensor = Tensor(np.array([6]), mstype.float32) - >>> output_tensor = P.FakeQuantWithMinMax(num_bits=8)(input_tensor, min_tensor, max_tensor) + >>> output_tensor = FakeQuantPerLayer(num_bits=8)(input_tensor, min_tensor, max_tensor) """ support_quant_bit = [4, 7, 8] @prim_attr_register - def __init__(self, num_bits=8, ema=False, ema_decay=0.999, quant_delay=0, symmetric=False, narrow_range=False, + def __init__(self, + num_bits=8, + ema=False, + ema_decay=0.999, + quant_delay=0, + symmetric=False, + narrow_range=False, training=True): - """init FakeQuantWithMinMax OP""" + """init FakeQuantPerLayer OP""" + if context.get_context('device_target') == "Ascend": + from mindspore.ops._op_impl._custom_op import fake_quant_perlayer if num_bits not in self.support_quant_bit: - raise ValueError(f"For '{self.name}' attr \'num_bits\' is not support.") + raise ValueError( + f"For '{self.name}' attr \'num_bits\' is not support.") if ema and not ema_decay: - raise ValueError(f"For '{self.name}' attr \'ema\' and \'ema_decay\' should set together.") + raise ValueError( + f"For '{self.name}' attr \'ema\' and \'ema_decay\' should set together.") self.ema = validator.check_value_type('ema', ema, (bool,), self.name) - self.symmetric = validator.check_value_type('symmetric', symmetric, (bool,), self.name) - self.narrow_range = validator.check_value_type('narrow_range', narrow_range, (bool,), self.name) - self.training = validator.check_value_type('training', training, (bool,), self.name) - self.ema_decay = validator.check_number_range('ema_decay', ema_decay, 0, 1, Rel.INC_BOTH, self.name) - self.num_bits = validator.check_integer('num_bits', num_bits, 0, Rel.GT, self.name) - self.quant_delay = validator.check_value_type('quant_delay', quant_delay, (int,), self.name) + self.symmetric = validator.check_value_type( + 'symmetric', symmetric, (bool,), self.name) + self.narrow_range = validator.check_value_type( + 'narrow_range', narrow_range, (bool,), self.name) + self.training = validator.check_value_type( + 'training', training, (bool,), self.name) + self.ema_decay = validator.check_number_range( + 'ema_decay', ema_decay, 0, 1, Rel.INC_BOTH, self.name) + self.num_bits = validator.check_integer( + 'num_bits', num_bits, 0, Rel.GT, self.name) + self.quant_delay = validator.check_value_type( + 'quant_delay', quant_delay, (int,), self.name) self.init_prim_io_names(inputs=['x', 'min', 'max'], outputs=['out']) def infer_shape(self, x_shape, min_shape, max_shape): - validator.check_integer("x rank", len(x_shape), 1, Rel.GT, self.name) + validator.check_integer("x rank", len(x_shape), 1, Rel.GE, self.name) validator.check("min shape", min_shape, "max shape", max_shape, Rel.EQ, self.name) - validator.check_integer("min rank", len(min_shape), 1, Rel.EQ, self.name) + validator.check_integer("min shape", len(min_shape), 1, Rel.EQ, self.name) return x_shape def infer_dtype(self, x_type, min_type, max_type): valid_types = (mstype.float16, mstype.float32) validator.check_tensor_type_same({"x": x_type}, valid_types, self.name) - validator.check_tensor_type_same({"min": min_type}, valid_types, self.name) - validator.check_tensor_type_same({"max": max_type}, valid_types, self.name) + validator.check_tensor_type_same( + {"min": min_type}, valid_types, self.name) + validator.check_tensor_type_same( + {"max": max_type}, valid_types, self.name) return x_type -class FakeQuantWithMinMaxGrad(PrimitiveWithInfer): +class FakeQuantPerLayerGrad(PrimitiveWithInfer): r""" - Performs grad of FakeQuantWithMinMax operation. + Performs grad of FakeQuantPerLayerGrad operation. Examples: - >>> fake_min_max_grad = P.FakeQuantWithMinMaxGrad() + >>> fake_min_max_grad = FakeQuantPerLayerGrad() >>> dout = Tensor(np.array([[-2.3, 1.2], [5.7, 0.2]]), mindspore.float32) >>> input_x = Tensor(np.array([[18, -23], [0.2, 6]]), mindspore.float32) >>> _min = Tensor(np.array([-4]), mindspore.float32) >>> _max = Tensor(np.array([2]), mindspore.float32) >>> result = fake_min_max_grad(dout, input_x, _min, _max) """ - support_quant_bit = [4, 8] + support_quant_bit = [4, 7, 8] @prim_attr_register - def __init__(self, num_bits=8, quant_delay=0): + def __init__(self, + num_bits=8, + quant_delay=0, + symmetric=False, + narrow_range=False): + if context.get_context('device_target') == "Ascend": + from mindspore.ops._op_impl._custom_op import fake_quant_perlayer_grad if num_bits not in self.support_quant_bit: - raise ValueError(f"For '{self.name}' attr \'num_bits\' is not support.") - - self.quant_delay = validator.check_value_type('quant_delay', quant_delay, (int,), self.name) - self.num_bits = validator.check_integer('num_bits', num_bits, 0, Rel.GT, self.name) - self.init_prim_io_names(inputs=['dout', 'x', 'min', 'max'], outputs=['dx']) + raise ValueError( + f"For '{self.name}' attr \'num_bits\' is not support.") + + self.num_bits = validator.check_integer( + 'num_bits', num_bits, 0, Rel.GT, self.name) + self.quant_delay = validator.check_value_type( + 'quant_delay', quant_delay, (int,), self.name) + self.symmetric = validator.check_value_type( + 'symmetric', symmetric, (bool,), self.name) + self.narrow_range = validator.check_value_type( + 'narrow_range', narrow_range, (bool,), self.name) + self.init_prim_io_names( + inputs=['dout', 'x', 'min', 'max'], outputs=['dx']) def infer_shape(self, dout_shape, x_shape, min_shape, max_shape): - validator.check("dout shape", dout_shape, "x shape", x_shape, Rel.EQ, self.name) - validator.check("min shape", min_shape, "max shape", max_shape, Rel.EQ, self.name) - validator.check_integer("min rank", len(min_shape), 1, Rel.EQ, self.name) + validator.check("dout shape", dout_shape, "x shape", + x_shape, Rel.EQ, self.name) + validator.check("min shape", min_shape, "max shape", + max_shape, Rel.EQ, self.name) + validator.check_integer("min shape", len( + min_shape), 1, Rel.EQ, self.name) return dout_shape def infer_dtype(self, dout_type, x_type, min_type, max_type): valid_types = (mstype.float16, mstype.float32) - validator.check_tensor_type_same({"dout": dout_type}, valid_types, self.name) + validator.check_tensor_type_same( + {"dout": dout_type}, valid_types, self.name) validator.check_tensor_type_same({"x": x_type}, valid_types, self.name) - validator.check_tensor_type_same({"min": min_type}, valid_types, self.name) - validator.check_tensor_type_same({"max": max_type}, valid_types, self.name) + validator.check_tensor_type_same( + {"min": min_type}, valid_types, self.name) + validator.check_tensor_type_same( + {"max": max_type}, valid_types, self.name) return dout_type -class FakeQuantWithMinMaxPerChannel(PrimitiveWithInfer): +class FakeQuantPerChannel(PrimitiveWithInfer): r""" Simulate the quantize and dequantize operations in training time base on per channel. @@ -163,70 +324,110 @@ class FakeQuantWithMinMaxPerChannel(PrimitiveWithInfer): - Tensor, has the same type as input. Examples: - >>> fake_quant = P.FakeQuantWithMinMaxPerChannel() + >>> fake_quant = FakeQuantPerChannel() >>> input_x = Tensor(np.array([3, 4, 5, -2, -3, -1]).reshape(3, 2), mindspore.float32) >>> _min = Tensor(np.linspace(-2, 2, 12).reshape(3, 2, 2), mindspore.float32) >>> _max = Tensor(np.linspace(8, 12, 12).reshape(3, 2, 2), mindspore.float32) >>> result = fake_quant(input_x, _min, _max) """ - support_quant_bit = [4, 8] - channel_axis = 0 + support_quant_bit = [4, 7, 8] @prim_attr_register - def __init__(self, num_bits=8, ema=False, ema_decay=0.999, quant_delay=0, symmetric=False, narrow_range=False, - training=True): - """init FakeQuantWithMinMaxPerChannel OP""" + def __init__(self, + num_bits=8, + ema=False, + ema_decay=0.999, + quant_delay=0, + symmetric=False, + narrow_range=False, + training=True, + channel_axis=1): + """init FakeQuantPerChannel OP""" + if context.get_context('device_target') == "Ascend": + from mindspore.ops._op_impl._custom_op import fake_quant_perchannel if num_bits not in self.support_quant_bit: - raise ValueError(f"For '{self.name}' Attr \'num_bits\' is not support.") + raise ValueError( + f"For '{self.name}' Attr \'num_bits\' is not support.") if ema and not ema_decay: - raise ValueError(f"For '{self.name}' attr \'ema\' and \'ema_decay\' should set together.") + raise ValueError( + f"For '{self.name}' attr \'ema\' and \'ema_decay\' should set together.") self.ema = validator.check_value_type('ema', ema, (bool,), self.name) - self.symmetric = validator.check_value_type('symmetric', symmetric, (bool,), self.name) - self.narrow_range = validator.check_value_type('narrow_range', narrow_range, (bool,), self.name) - self.training = validator.check_value_type('training', training, (bool,), self.name) - self.ema_decay = validator.check_number_range('ema_decay', ema_decay, 0, 1, Rel.INC_BOTH, self.name) - self.num_bits = validator.check_integer('num_bits', num_bits, 0, Rel.GT, self.name) - self.quant_delay = validator.check_value_type('quant_delay', quant_delay, (int,), self.name) + self.symmetric = validator.check_value_type( + 'symmetric', symmetric, (bool,), self.name) + self.narrow_range = validator.check_value_type( + 'narrow_range', narrow_range, (bool,), self.name) + self.training = validator.check_value_type( + 'training', training, (bool,), self.name) + self.ema_decay = validator.check_number_range( + 'ema_decay', ema_decay, 0, 1, Rel.INC_BOTH, self.name) + self.num_bits = validator.check_integer( + 'num_bits', num_bits, 0, Rel.GT, self.name) + self.quant_delay = validator.check_value_type( + 'quant_delay', quant_delay, (int,), self.name) + self.channel_axis = validator.check_integer( + 'channel_axis', channel_axis, 0, Rel.GE, self.name) self.init_prim_io_names(inputs=['x', 'min', 'max'], outputs=['out']) def infer_shape(self, x_shape, min_shape, max_shape): - validator.check_integer("x rank", len(x_shape), 1, Rel.GT, self.name) - validator.check_integer("min shape[0]", min_shape[0], x_shape[self.channel_axis], Rel.EQ, self.name) - validator.check_integer("max shape[0]", max_shape[0], x_shape[self.channel_axis], Rel.EQ, self.name) + validator.check_integer("x rank", len(x_shape), 1, Rel.GE, self.name) + validator.check("min shape", min_shape, "max shape", max_shape, Rel.EQ, self.name) + validator.check_integer( + "min shape", min_shape[0], x_shape[self.channel_axis], Rel.EQ, self.name) + validator.check_integer( + "max shape", max_shape[0], x_shape[self.channel_axis], Rel.EQ, self.name) return x_shape def infer_dtype(self, x_type, min_type, max_type): valid_types = (mstype.float16, mstype.float32) validator.check_tensor_type_same({"x": x_type}, valid_types, self.name) - validator.check_tensor_type_same({"min": min_type}, valid_types, self.name) - validator.check_tensor_type_same({"max": max_type}, valid_types, self.name) + validator.check_tensor_type_same( + {"min": min_type}, valid_types, self.name) + validator.check_tensor_type_same( + {"max": max_type}, valid_types, self.name) return x_type -class FakeQuantWithMinMaxPerChannelGrad(PrimitiveWithInfer): +class FakeQuantPerChannelGrad(PrimitiveWithInfer): r""" - Performs grad of FakeQuantWithMinMaxPerChannel operation. + Performs grad of FakeQuantPerChannelGrad operation. Examples: - >>> fqmmpc_grad = P.FakeQuantWithMinMaxPerChannelGrad() + >>> fqmmpc_grad = FakeQuantPerChannelGrad() >>> input_x = Tensor(np.random.randint(-4, 4, (2, 3, 4)), mindspore.float32) >>> dout = Tensor(np.random.randint(-2, 2, (2, 3, 4)), mindspore.float32) >>> _min = Tensor(np.random.randint(-8, 2, (2, 3, 4)), mindspore.float32) >>> _max = Tensor(np.random.randint(-2, 8, (2, 3, 4)), mindspore.float32) >>> result = fqmmpc_grad(dout, input_x, _min, _max) """ - support_quant_bit = [4, 8] + support_quant_bit = [4, 7, 8] @prim_attr_register - def __init__(self, num_bits=8, quant_delay=0): - """init FakeQuantWithMinMaxPerChannel Fill""" + def __init__(self, + num_bits=8, + quant_delay=0, + symmetric=False, + narrow_range=False, + channel_axis=1): + """init FakeQuantPerChannelGrad Fill""" + if context.get_context('device_target') == "Ascend": + from mindspore.ops._op_impl._custom_op import fake_quant_perchannel_grad if num_bits not in self.support_quant_bit: - raise ValueError(f"For '{self.name}' attr \'num_bits\' is not support.") - - self.quant_delay = validator.check_value_type('quant_delay', quant_delay, (int,), self.name) - self.num_bits = validator.check_integer('num_bits', num_bits, 0, Rel.GT, self.name) - self.init_prim_io_names(inputs=['dout', 'x', 'min', 'max'], outputs=['dx']) + raise ValueError( + f"For '{self.name}' attr \'num_bits\' is not support.") + + self.num_bits = validator.check_integer( + 'num_bits', num_bits, 0, Rel.GT, self.name) + self.quant_delay = validator.check_value_type( + 'quant_delay', quant_delay, (int,), self.name) + self.symmetric = validator.check_value_type( + 'symmetric', symmetric, (bool,), self.name) + self.narrow_range = validator.check_value_type( + 'narrow_range', narrow_range, (bool,), self.name) + self.channel_axis = validator.check_integer( + 'channel axis', channel_axis, 0, Rel.GE, self.name) + self.init_prim_io_names( + inputs=['dout', 'x', 'min', 'max'], outputs=['dx']) def infer_shape(self, dout_shape, x_shape, min_shape, max_shape): validator.check("dout shape", dout_shape, "x shape", x_shape) @@ -235,10 +436,13 @@ class FakeQuantWithMinMaxPerChannelGrad(PrimitiveWithInfer): def infer_dtype(self, dout_type, x_type, min_type, max_type): valid_types = (mstype.float16, mstype.float32) - validator.check_tensor_type_same({"dout": dout_type}, valid_types, self.name) + validator.check_tensor_type_same( + {"dout": dout_type}, valid_types, self.name) validator.check_tensor_type_same({"x": x_type}, valid_types, self.name) - validator.check_tensor_type_same({"min": min_type}, valid_types, self.name) - validator.check_tensor_type_same({"max": max_type}, valid_types, self.name) + validator.check_tensor_type_same( + {"min": min_type}, valid_types, self.name) + validator.check_tensor_type_same( + {"max": max_type}, valid_types, self.name) return dout_type @@ -247,7 +451,7 @@ class BatchNormFold(PrimitiveWithInfer): Batch normalization folded. Args: - momentum (float): Momentum value should be [0, 1]. Default: 0.1. + momentum (float): Momentum value should be [0, 1]. Default: 0.9. epsilon (float): A small float number to avoid dividing by 0. 1e-5 if dtype in float32 else 1e-3. Default: 1e-5. is_training (bool): In training mode set True, else set False. Default: True. @@ -279,7 +483,7 @@ class BatchNormFold(PrimitiveWithInfer): channel_axis = 1 @prim_attr_register - def __init__(self, momentum=0.1, epsilon=1e-5, is_training=True, freeze_bn=0): + def __init__(self, momentum=0.9, epsilon=1e-5, is_training=True, freeze_bn=0): """init batch norm fold layer""" self.momentum = validator.check_number_range('momentum', momentum, 0, 1, Rel.INC_BOTH, self.name) self.epsilon = validator.check_float_positive('epsilon', epsilon, self.name) @@ -292,7 +496,7 @@ class BatchNormFold(PrimitiveWithInfer): def infer_shape(self, x_shape, mean_shape, variance_shape, global_step_shape): validator.check("mean shape", mean_shape, "gamma_shape", variance_shape, Rel.EQ, self.name) validator.check("mean_shape[0]", mean_shape[0], "input channel", x_shape[self.channel_axis], Rel.EQ, self.name) - validator.check_integer("global_step rank", len(global_step_shape), 1, Rel.EQ, self.name) + validator.check_integer("global step shape len", len(global_step_shape), 1, Rel.EQ, self.name) return mean_shape, mean_shape, mean_shape, mean_shape def infer_dtype(self, x_type, mean_type, variance_type, global_step_type): @@ -339,7 +543,7 @@ class BatchNormFoldGrad(PrimitiveWithInfer): "batch_std shape", batch_std_shape, Rel.EQ, self.name) validator.check("d_batch_mean_shape[0]", d_batch_mean_shape[0], "input channel", x_shape[self.channel_axis], Rel.EQ, self.name) - validator.check_integer("global_step rank", len(global_step_shape), 1, Rel.EQ, self.name) + validator.check_integer("global step shape len", len(global_step_shape), 1, Rel.EQ, self.name) return x_shape def infer_dtype(self, d_batch_mean_type, d_batch_std_type, x_type, batch_mean_type, batch_std_type, @@ -376,6 +580,8 @@ class CorrectionMul(PrimitiveWithInfer): @prim_attr_register def __init__(self, channel_axis=0): """init correction mul layer""" + if context.get_context('device_target') == "Ascend": + from mindspore.ops._op_impl._custom_op import correction_mul self.channel_axis = channel_axis self.init_prim_io_names(inputs=['x', 'batch_std', 'running_std'], outputs=['out']) @@ -408,9 +614,11 @@ class CorrectionMulGrad(PrimitiveWithInfer): @prim_attr_register def __init__(self, channel_axis=0): """init correction mul layer""" + if context.get_context('device_target') == "Ascend": + from mindspore.ops._op_impl._custom_op import correction_mul_grad self.channel_axis = channel_axis self.init_prim_io_names(inputs=['dout', 'x', 'gamma', 'running_std'], - outputs=['dx', 'd_gamma']) + outputs=['dx', 'mul_dx']) def infer_shape(self, dout_shape, x_shape, gamma_shape, running_std_shape): validator.check("dout shape", dout_shape, "x_shape x", x_shape, Rel.EQ, self.name) @@ -418,12 +626,45 @@ class CorrectionMulGrad(PrimitiveWithInfer): Rel.EQ, self.name) validator.check("running_std_shape[0]", running_std_shape[0], "dout channel size", dout_shape[self.channel_axis], Rel.EQ, self.name) + if context.get_context('device_target') == "Ascend": + return x_shape, x_shape return x_shape, gamma_shape def infer_dtype(self, dout_type, x_type, gamma_type, running_std_type): args = {"dout": dout_type, "x": x_type, "gamma": gamma_type, "running_std": running_std_type} validator.check_tensor_type_same(args, (mstype.float16, mstype.float32), self.name) - return x_type, x_type + if context.get_context('device_target') == "Ascend": + return x_type, x_type + return x_type, gamma_type + + +class CorrectionMulGradReduce(PrimitiveWithInfer): + r""" + Performs grad reduce of CorrectionMul operation. + + Examples: + >>> correction_mul_grad_rd = P.CorrectionMulGradReduce() + >>> dout = Tensor(np.array([1.5, -2.2, 0.7, -3, 1.6, 2.8]).reshape(2, 1, 1, 3), mindspore.float32) + >>> input_x = Tensor(np.random.randint(0, 256, (2, 1, 1, 3)), mindspore.float32) + >>> gamma = Tensor(np.array([0.2, -0.2, 2.5, -1.]).reshape(2, 1, 2), mindspore.float32) + >>> running_std = Tensor(np.array([1.2, 0.1, 0.7, 2.3]).reshape(2, 1, 2), mindspore.float32) + >>> result = correction_mul_grad_rd(dout, input_x, gamma, running_std) + """ + + @prim_attr_register + def __init__(self, channel_axis=0): + """init correction mul reduce layer""" + if context.get_context('device_target') == "Ascend": + from mindspore.ops._op_impl._custom_op import correction_mul_grad + self.channel_axis = channel_axis + self.init_prim_io_names(inputs=['mul_dx'], + outputs=['d_gamma']) + + def infer_shape(self, mul_dx_shape): + return [mul_dx_shape[self.channel_axis]] + + def infer_dtype(self, mul_dx_type): + return mul_dx_type class BatchNormFold2(PrimitiveWithInfer): @@ -477,7 +718,7 @@ class BatchNormFold2(PrimitiveWithInfer): validator.check("batch_std shape", batch_std_shape, "batch_mean shape", gamma_shape, Rel.EQ, self.name) validator.check("batch_std_shape[0]", batch_std_shape[0], "x_shape channel size", x_shape[self.channel_axis], Rel.EQ, self.name) - validator.check_integer("global_step rank", len(global_step_shape), 1, Rel.EQ, self.name) + validator.check_integer("global step shape len", len(global_step_shape), 1, Rel.EQ, self.name) return x_shape def infer_dtype(self, x_type, beta_type, gamma_type, batch_std_type, running_std_type, batch_mean_type, @@ -525,7 +766,7 @@ class BatchNormFold2Grad(PrimitiveWithInfer): validator.check("batch_std shape", batch_std_shape, "gamma shape", gamma_shape, Rel.EQ, self.name) validator.check("batch_std size", batch_std_shape[0], "dout channel size", dout_shape[self.channel_axis], Rel.EQ, self.name) - validator.check_integer("global_step rank", len(global_step_shape), 1, Rel.EQ, self.name) + validator.check_integer("global step shape len", len(global_step_shape), 1, Rel.EQ, self.name) return gamma_shape, gamma_shape, gamma_shape, gamma_shape, x_shape def infer_dtype(self, dout_type, x_type, gamma_type, @@ -607,32 +848,6 @@ class BatchNormFoldGradD(PrimitiveWithInfer): return x_type -class BNTrainingReduce(PrimitiveWithInfer): - """ - reduce sum at axis [0, 2, 3]. - - Inputs: - - **x** (Tensor) - Tensor of shape :math:`(N, C)`. - - Outputs: - - **x_sum** (Tensor) - Tensor has the same shape as x. - - **x_square_sum** (Tensor) - Tensor has the same shape as x. - - """ - - @prim_attr_register - def __init__(self): - """init _BNTrainingReduce layer""" - self.init_prim_io_names(inputs=['x'], - outputs=['x_sum', 'x_square_sum']) - - def infer_shape(self, x_shape): - return [x_shape[1]], [x_shape[1]] - - def infer_dtype(self, x_type): - return x_type, x_type - - class BatchNormFold2_D(PrimitiveWithInfer): """ Scale the bias with a correction factor to the long term statistics @@ -735,70 +950,3 @@ class BatchNormFold2GradReduce(PrimitiveWithInfer): def infer_dtype(self, dout_type, x_type): validator.check("dout type", dout_type, "x type", x_type) return dout_type, dout_type - - -class FakeQuantWithMinMaxUpdate(PrimitiveWithInfer): - r""" - Simulate the quantize and dequantize operations in training time. - - Args: - num_bits (int) : Number bits for aware quantilization. Default: 8. - ema (bool): Use EMA algorithm update value min and max. Default: False. - ema_decay (int) : EMA algorithm decay parameter. Default: 0.999. - quant_delay (int): Quantilization delay parameter. Before delay step in training time not update - simulate aware quantize funcion. After delay step in training time begin simulate the aware - quantize funcion. Default: 0. - symmetric (bool): Quantization algorithm use symmetric or not. Default: False. - narrow_range (bool): Quantization algorithm use narrow range or not. Default: False. - training (bool): Training the network or not. Default: True. - - Inputs: - - **x** (Tensor) : float32 Tensor representing the shape of the output tensor. - - **min** (Tensor) : Value of the min range of the input data x. - - **max** (Tensor) : Value of the max range of the input data x. - - Outputs: - - Tensor: Simulate quantize tensor of x. - - Examples: - >>> input_tensor = Tensor(np.random.rand(3, 16, 5, 5), mstype.float32) - >>> min_tensor = Tensor(np.array([-6]), mstype.float32) - >>> max_tensor = Tensor(np.array([6]), mstype.float32) - >>> output_tensor = P.FakeQuantWithMinMax(num_bits=8)(input_tensor, min_tensor, max_tensor) - """ - support_quant_bit = [4, 7, 8] - - @prim_attr_register - def __init__(self, num_bits=8, ema=False, ema_decay=0.999, quant_delay=0, symmetric=False, narrow_range=False, - training=True): - """init FakeQuantWithMinMax OP""" - from mindspore.ops._op_impl._custom_op import correction_mul, correction_mul_grad - from mindspore.ops._op_impl._custom_op import fake_quant_with_min_max, fake_quant_with_min_max_grad - from mindspore.ops._op_impl._custom_op import fake_quant_with_min_max_update - if num_bits not in self.support_quant_bit: - raise ValueError(f"For '{self.name}' attr \'num_bits\' is not support.") - if ema and not ema_decay: - raise ValueError(f"For '{self.name}' attr \'ema\' and \'ema_decay\' should set together.") - - self.ema = validator.check_value_type('ema', ema, (bool,), self.name) - self.symmetric = validator.check_value_type('symmetric', symmetric, (bool,), self.name) - self.narrow_range = validator.check_value_type('narrow_range', narrow_range, (bool,), self.name) - self.training = validator.check_value_type('training', training, (bool,), self.name) - self.ema_decay = validator.check_number_range('ema_decay', ema_decay, 0, 1, Rel.INC_BOTH, self.name) - self.num_bits = validator.check_integer('num_bits', num_bits, 0, Rel.GT, self.name) - self.quant_delay = validator.check_value_type('quant_delay', quant_delay, (int,), self.name) - self.init_prim_io_names(inputs=['x', 'min', 'max'], - outputs=['min_up', 'max_up']) - - def infer_shape(self, x_shape, min_shape, max_shape): - validator.check_integer("x rank", len(x_shape), 1, Rel.GT, self.name) - validator.check("min shape", min_shape, "max shape", max_shape, Rel.EQ, self.name) - validator.check_integer("min rank", len(min_shape), 1, Rel.EQ, self.name) - return min_shape, max_shape - - def infer_dtype(self, x_type, min_type, max_type): - valid_types = (mstype.float16, mstype.float32) - validator.check_tensor_type_same({"x": x_type}, valid_types, self.name) - validator.check_tensor_type_same({"min": min_type}, valid_types, self.name) - validator.check_tensor_type_same({"max": max_type}, valid_types, self.name) - return min_type, max_type diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py index 79a92ed7c8..1bb39d1547 100644 --- a/mindspore/ops/operations/array_ops.py +++ b/mindspore/ops/operations/array_ops.py @@ -28,12 +28,14 @@ from ..._checkparam import Validator as validator from ..._checkparam import Rel from ...common import dtype as mstype from ...common.tensor import Tensor +from ...common.parameter import Parameter from ..operations.math_ops import _infer_shape_reduce from .._utils import get_concat_offset -from ..primitive import Primitive, PrimitiveWithInfer, prim_attr_register +from ..primitive import Primitive, PrimitiveWithInfer, prim_attr_register, _run_op from ..._c_expression import signature_rw as sig_rw from ..._c_expression import signature_kind as sig_kind from ..._c_expression import signature_dtype as sig_dtype +from ..._c_expression import typing def _check_infer_attr_reduce(axis, keep_dims, prim_name): validator.check_value_type('keep_dims', keep_dims, [bool], prim_name) @@ -81,12 +83,17 @@ class ExpandDims(PrimitiveWithInfer): axis_v = axis['value'] rank = len(x_shape) validator.check_int_range('axis', axis_v, -rank - 1, rank, Rel.INC_BOTH, self.name) + value = None + if x['value'] is not None: + value = x['value'].asnumpy() + value = np.expand_dims(value, axis_v) + value = Tensor(value) if axis_v < 0: axis_v = rank + 1 + axis_v x_shape.insert(axis_v, 1) out = {'shape': x_shape, 'dtype': x['dtype'], - 'value': None} + 'value': value} return out @@ -122,7 +129,8 @@ class SameTypeShape(PrimitiveWithInfer): Checks whether data type and shape of two tensors are the same. Raises: - ValueError: If not the same. + TypeError: If data type not the same. + ValueError: If shape of two tensors not the same. Inputs: - **input_x** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`. @@ -144,10 +152,10 @@ class SameTypeShape(PrimitiveWithInfer): def __call__(self, x, y): """run in PyNative mode""" - validator.check_subclass('x', x.dtype(), mstype.tensor, self.name) - validator.check_subclass('y', y.dtype(), mstype.tensor, self.name) - validator.check('x dtype', x.dtype(), 'y dtype', y.dtype(), Rel.EQ, self.name, TypeError) - validator.check('x shape', x.shape(), 'y shape', y.shape(), Rel.EQ, self.name) + validator.check_value_type('x', x, Tensor, self.name) + validator.check_value_type('y', y, Tensor, self.name) + validator.check('x dtype', x.dtype, 'y dtype', y.dtype, Rel.EQ, self.name, TypeError) + validator.check('x shape', x.shape, 'y shape', y.shape, Rel.EQ, self.name) return x def __infer__(self, x, y): @@ -184,6 +192,18 @@ class Cast(PrimitiveWithInfer): """init Cast""" self.init_prim_io_names(inputs=['x', 'dst_type'], outputs=['output']) + def check_elim(self, x, dtype): + if isinstance(x, (Tensor, numbers.Number, Parameter)): + if isinstance(x, Tensor) and x.dtype == dtype: + return (True, x) + if isinstance(x, numbers.Number): + return (True, Tensor(x, dtype=dtype)) + if isinstance(x, Parameter): + data = x.default_input + if data.dtype == dtype: + return (True, x) + return (False, None) + def __infer__(self, x, t): src_type = x['dtype'] dst_type = t['value'] @@ -490,7 +510,7 @@ class GatherV2(PrimitiveWithInfer): The original Tensor. - **input_indices** (Tensor) - The shape of tensor is :math:`(y_1, y_2, ..., y_S)`. Specifies the indices of elements of the original Tensor. Must be in the range - `[0, input_param.shape()[axis])`. + `[0, input_param.shape[axis])`. - **axis** (int) - Specifies the dimension index to gather indices. Outputs: @@ -525,99 +545,27 @@ class GatherV2(PrimitiveWithInfer): return out -class Range(PrimitiveWithInfer): - r""" - Creates a sequence of numbers. - Set `input_x` as :math:`x_i` for each element, `output` as follows: - - .. math:: - \text{output}(x_i) = x_i * \text{delta} + \text{start} - - Args: - start (float): If `limit` is `None`, the value acts as limit in the range and first entry - defaults to `0`. Otherwise, it acts as first entry in the range. - limit (float): Acts as upper limit of sequence. If `None`, defaults to the value of `start` - while set the first entry of the range to `0`. - delta (float): Increment of the range. Default: 1.0. - - Inputs: - - **input_x** (Tensor) - The assistant data. A `1-D` tensor of type float32 or int32. - - Outputs: - Tensor, has the same shape and dtype as `input_x`. - - Examples: - >>> range = P.Range(1.0, 8.0, 2.0) - >>> x = Tensor(np.array([1, 2, 3, 2]), mindspore.int32) - >>> range(x) - [3, 5, 7, 5] +class SparseGatherV2(GatherV2): """ - - @prim_attr_register - def __init__(self, start, limit=None, delta=1.0): - self.init_prim_io_names(inputs=['x'], outputs=['y']) - self.delta = validator.check_value_type("delta", delta, [float], self.name) - validator.check_value_type("start", start, [float], self.name) - if limit is None: - self.start = 0.0 - self.limit = start - self.add_prim_attr("start", self.start) - self.add_prim_attr("limit", self.limit) - else: - validator.check_value_type("limit", limit, [float], self.name) - - def infer_shape(self, x_shape): - return x_shape - - def infer_dtype(self, x_dtype): - validator.check_tensor_type_same({'x_dtype': x_dtype}, [mstype.float32, mstype.int32], self.name) - return x_dtype - - -class EmbeddingLookup(PrimitiveWithInfer): - """ - Returns a slice of input tensor based on the specified indices and axis. This Primitive has the similar - functionality as GatherV2, but has one more inputs: `offset`. - This primitive runs on the acipu devices. + Returns a slice of input tensor based on the specified indices and axis. Inputs: - - **params** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`. - The Tensor slice, instead of the entire Tensor. - - **indices** (Tensor) - The shape of tensor is :math:`(y_1, y_2, ..., y_S)`. - Specifies the indices of elements of the original Tensor. Values can be out of range of `params`, - and the exceeding part will be filled with 0 in the output. - The indices to do lookup operation whose data type should be mindspore.int32 or mindspore.int64. - - **offset** (int) - Specifies the offset value of this `params` slice. Thus the real indices - are equal to `indices` minus `offset`. - + - **input_params** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`. + The original Tensor. + - **input_indices** (Tensor) - The shape of tensor is :math:`(y_1, y_2, ..., y_S)`. + Specifies the indices of elements of the original Tensor. Must be in the range + `[0, input_param.shape[axis])`. + - **axis** (int) - Specifies the dimension index to gather indices. Outputs: Tensor, the shape of tensor is :math:`(z_1, z_2, ..., z_N)`. Examples: - >>> params = Tensor(np.array([[8, 9], [10, 11], [12, 13], [14, 15]]), mindspore.float32) - >>> indices = Tensor(np.array([[5, 2], [8, 5]]), mindspore.int32) - >>> offset = 4 - >>> out = P.EmbeddingLookup()(params, indices, offset) - [[[10, 11], [0 ,0]], [[0, 0], [10, 11]]] + >>> input_params = Tensor(np.array([[1, 2, 7, 42], [3, 4, 54, 22], [2, 2, 55, 3]]), mindspore.float32) + >>> input_indices = Tensor(np.array([1, 2]), mindspore.int32) + >>> axis = 1 + >>> out = P.GatherV2()(input_params, input_indices, axis) """ - @prim_attr_register - def __init__(self): - """init index_select""" - self.init_prim_io_names(inputs=['params', 'indices', 'offset'], - outputs=['output']) - - def __infer__(self, params, indices, offset): - validator.check_subclass("params", params['dtype'], mstype.tensor, self.name) - valid_types = (mstype.int32, mstype.int64) - validator.check_tensor_type_same({"indices": indices['dtype']}, valid_types, self.name) - validator.check_subclass("offset", offset['dtype'], mstype.int_, self.name) - params_shp = params['shape'] - out_shape = indices['shape'] + params_shp[1:] - out = {'shape': out_shape, - 'dtype': params['dtype'], - 'value': None} - return out class Split(PrimitiveWithInfer): @@ -629,7 +577,7 @@ class Split(PrimitiveWithInfer): output_num (int): The number of output tensors. Default: 1. Raises: - ValueError: If axis is out of the range [-len(input_x.shape()), len(input_x.shape())), + ValueError: If axis is out of the range [-len(input_x.shape), len(input_x.shape)), or if the output_num is less than or equal to 0, or if the dimension which to split cannot be evenly divided by output_num. @@ -919,9 +867,16 @@ class TupleToArray(PrimitiveWithInfer): ret = np.array(x, np.int32) else: ret = np.array(x, np.float32) - return Tensor(ret) + def __call__(self, x): + args = list() + if isinstance(x, range): + args.append(tuple(x)) + else: + args.append(x) + return _run_op(self, self.name, args) + class ScalarToArray(PrimitiveWithInfer): """ @@ -1000,7 +955,7 @@ class InvertPermutation(PrimitiveWithInfer): - **input_x** (Union(tuple[int], Tensor[int])) - The input tuple is constructed by multiple integers, i.e., :math:`(y_1, y_2, ..., y_S)` representing the indices. The values must include 0. There can be no duplicate values or negative values. - If the input is Tensor, it must be 1-d and the dtype is int. + If the input is Tensor, it must be 1-d and the dtype is int. Only constant value is allowed. Outputs: @@ -1020,6 +975,8 @@ class InvertPermutation(PrimitiveWithInfer): def __infer__(self, x): x_shp = x['shape'] x_value = x['value'] + if x_value is None: + raise ValueError(f'For \'{self.name}\' the input value must be const.') validator.check_value_type("shape", x_shp, [tuple, list], self.name) if mstype.issubclass_(x['dtype'], mstype.tensor): validator.check('x dimension', len(x_shp), '', 1, Rel.EQ, self.name) @@ -1028,6 +985,12 @@ class InvertPermutation(PrimitiveWithInfer): z = [x_value[i] for i in range(len(x_value))] z.sort() + for i in range(1, len(z)): + if z[i-1] == z[i]: + raise ValueError(f"For {self.name}, {z[i]} is duplicated in the input.") + validator.check(f'value min', min(x_value), '', 0, Rel.EQ, self.name) + validator.check(f'value max', max(x_value), '', len(x_value)-1, Rel.EQ, self.name) + y = [None] * len(x_value) for i, value in enumerate(x_value): validator.check_value_type("input[%d]" % i, value, [int], self.name) @@ -1273,14 +1236,20 @@ class Tile(PrimitiveWithInfer): """init Tile""" self.init_prim_io_names(inputs=['x', 'multiples'], outputs=['output']) + def check_elim(self, base_tensor, multiplier): + if (not isinstance(base_tensor, Tensor)) or (not isinstance(multiplier, tuple)): + raise TypeError("Expecting (Tensor, tuple), got: ({}, {})".format(base_tensor, multiplier)) + if all(v == 1 for v in multiplier): + return (True, base_tensor) + return (False, None) + def __infer__(self, x, multiples): multiples_v = multiples['value'] x_shp = x['shape'] validator.check_value_type("shape", multiples_v, [tuple], self.name) for i, multiple in enumerate(multiples_v): validator.check_value_type("multiples[%d]" % i, multiple, [int], self.name) - valid_types = [mstype.int16, mstype.int32, mstype.bool_, mstype.float16, mstype.float32] - validator.check_tensor_type_same({'x': x['dtype']}, valid_types, self.name) + validator.check_value_type("x[\'dtype\']", x["dtype"], typing.TensorType, self.name) len_sub = len(multiples_v) - len(x_shp) multiples_w = None if len_sub == 0: @@ -1323,7 +1292,7 @@ class UnsortedSegmentSum(PrimitiveWithInfer): Tensor, the shape is :math:`(z, x_{N+1}, ..., x_R)`. Examples: - >>> input_x = Tensor([1, 2, 3, 4], mindspore.float) + >>> input_x = Tensor([1, 2, 3, 4], mindspore.float32) >>> segment_ids = Tensor([0, 0, 1, 2], mindspore.int32) >>> num_segments = 4 >>> P.UnsortedSegmentSum()(input_x, segment_ids, num_segments) @@ -1556,7 +1525,7 @@ class Unpack(PrimitiveWithInfer): A tuple of Tensors, the shape of each objects is same. Raises: - ValueError: If axis is out of the range [-len(input_x.shape()), len(input_x.shape())). + ValueError: If axis is out of the range [-len(input_x.shape), len(input_x.shape)). Examples: >>> unpack = P.Unpack() @@ -1697,6 +1666,7 @@ class Select(PrimitiveWithInfer): @prim_attr_register def __init__(self): """init""" + self.init_prim_io_names(inputs=['condition', 'x', 'y'], outputs=['output']) def infer_shape(self, cond_shape, x_shape, y_shape): if cond_shape != x_shape or x_shape != y_shape: @@ -1712,6 +1682,16 @@ class Select(PrimitiveWithInfer): raise TypeError('\'%s\' the x_type %s must be the same as y_type %s.' % (self.name, x_type, y_type)) return x_type + def infer_value(self, cond, x, y): + if cond is not None and x is not None and y is not None: + cond = cond.asnumpy() + x = x.asnumpy() + y = y.asnumpy() + out = np.where(cond, x, y) + return Tensor(out) + return None + + class StridedSlice(PrimitiveWithInfer): r""" @@ -1762,7 +1742,7 @@ class StridedSlice(PrimitiveWithInfer): >>> [[5, 5, 5], [6, 6, 6]]], mindspore.float32) >>> slice = P.StridedSlice() >>> output = slice(input_x, (1, 0, 0), (2, 1, 3), (1, 1, 1)) - >>> output.shape() + >>> output.shape (1, 1, 3) >>> output [[[3, 3, 3]]] @@ -1886,7 +1866,7 @@ class Diag(PrimitiveWithInfer): if x is None: return None # do constant-folding only when x rank is 1 - if len(x.shape()) != 1: + if len(x.shape) != 1: return None ret = np.diag(x.asnumpy()) return Tensor(ret) @@ -1938,7 +1918,7 @@ class DiagPart(PrimitiveWithInfer): if x is None: return None # do constant-folding only when x rank is 2 - if len(x.shape()) != 2: + if len(x.shape) != 2: return None ret = np.diag(x.asnumpy()) return Tensor(ret) @@ -1952,7 +1932,7 @@ class Eye(PrimitiveWithInfer): Inputs: - **n** (int) - Number of rows of returned tensor - **m** (int) - Number of columns of returned tensor - - **t** (mindspore.dtype) - Mindspore's dtype, The data type of the returned tensor. + - **t** (mindspore.dtype) - MindSpore's dtype, The data type of the returned tensor. Outputs: Tensor, a tensor with ones on the diagonal and zeros elsewhere. @@ -1983,7 +1963,7 @@ class ScatterNd(PrimitiveWithInfer): Creates an empty tensor, and set values by scattering the update tensor depending on indices. Inputs: - - **indices** (Tensor) - The index of scattering in the new tensor. + - **indices** (Tensor) - The index of scattering in the new tensor. With int32 data type. - **update** (Tensor) - The source Tensor to be scattered. - **shape** (tuple[int]) - Define the shape of the output tensor. Has the same type as indices. @@ -2006,7 +1986,7 @@ class ScatterNd(PrimitiveWithInfer): def __infer__(self, indices, update, shape): shp = shape['value'] validator.check_subclass("update_dtype", update['dtype'], mstype.tensor, self.name) - validator.check_tensor_type_same({"indices": indices['dtype']}, mstype.int_type, self.name) + validator.check_tensor_type_same({"indices": indices['dtype']}, [mstype.int32], self.name) validator.check_value_type("shape", shp, [tuple], self.name) for i, x in enumerate(shp): validator.check_integer("shape[%d]" % i, x, 0, Rel.GT, self.name) @@ -2099,6 +2079,47 @@ class GatherNd(PrimitiveWithInfer): return x_dtype +class TensorScatterUpdate(PrimitiveWithInfer): + """ + Update tensor value by using input indices and value. + + Using given values to update tensor value, along with the input indices. + + Inputs: + - **input_x** (Tensor) - The target tensor. + - **indices** (Tensor) - The index of input tensor whose data type is int32. + - **update** (Tensor) - The tensor to update the input tensor, has the same type as input, + and update.shape = indices.shape + input_x.shape[1:]. + + Outputs: + Tensor, has the same shape and type as `input_x`. + + Examples: + >>> input_x = Tensor(np.array([[-0.1, 0.3, 3.6], [0.4, 0.5, -3.2]]), mindspore.float32) + >>> indices = Tensor(np.array([[0, 0], [1, 1]]), mindspore.int32) + >>> update = Tensor(np.array([1.0, 2.2]), mindspore.float32) + >>> op = P.TensorScatterUpdate() + >>> output = op(input_x, indices, update) + """ + @prim_attr_register + def __init__(self): + """Init TensorScatterUpdate""" + self.init_prim_io_names(inputs=['x', 'indices', 'value'], outputs=['y']) + + def infer_shape(self, x_shape, indices_shape, value_shape): + validator.check('the dimension of x', len(x_shape), + 'the dimension of indices', indices_shape[-1], Rel.GE) + if indices_shape[:-1] + x_shape[indices_shape[-1]:] != value_shape: + raise ValueError("For 'TensorScatterUpdate', input value are not match with input indices.") + return x_shape + + def infer_dtype(self, x_dtype, indices_dtype, value_dtype): + validator.check_tensor_type_same({'indices': indices_dtype}, [mstype.int32], self.name) + args = {"x": x_dtype, "value": value_dtype} + validator.check_tensor_type_same(args, (mstype.bool_,) + mstype.number_type, self.name) + return x_dtype + + class ScatterUpdate(PrimitiveWithInfer): """ Update tensor value by using input indices and value. @@ -2110,7 +2131,7 @@ class ScatterUpdate(PrimitiveWithInfer): Inputs: - **input_x** (Parameter) - The target tensor, with data type of Parameter. - - **indices** (Tensor) - The index of input tensor. + - **indices** (Tensor) - The index of input tensor. With int32 data type. - **update** (Tensor) - The tensor to update the input tensor, has the same type as input, and update.shape = indices.shape + input_x.shape[1:]. @@ -2118,9 +2139,11 @@ class ScatterUpdate(PrimitiveWithInfer): Tensor, has the same shape and type as `input_x`. Examples: - >>> input_x = mindspore.Parameter(Tensor(np.array([[-0.1, 0.3, 3.6], [0.4, 0.5, -3.2]]), mindspore.float32)) + >>> np_x = np.array([[-0.1, 0.3, 3.6], [0.4, 0.5, -3.2]]) + >>> input_x = mindspore.Parameter(Tensor(np_x, mindspore.float32), name="x") >>> indices = Tensor(np.array([[0, 0], [1, 1]]), mindspore.int32) - >>> update = Tensor(np.array([1.0, 2.2]), mindspore.float32) + >>> np_update = np.array([[[1.0, 2.2, 1.0], [2.0, 1.2, 1.0]], [[2.0, 2.2, 1.0], [3.0, 1.2, 1.0]]]) + >>> update = Tensor(np_update, mindspore.float32) >>> op = P.ScatterUpdate() >>> output = op(input_x, indices, update) """ @@ -2132,15 +2155,16 @@ class ScatterUpdate(PrimitiveWithInfer): @prim_attr_register def __init__(self, use_locking=True): """Init ScatterUpdate""" + validator.check_value_type('use_locking', use_locking, [bool], self.name) self.init_prim_io_names(inputs=['x', 'indices', 'value'], outputs=['y']) def infer_shape(self, x_shape, indices_shape, value_shape): if indices_shape + x_shape[1:] != value_shape: - raise ValueError('Input value are not match with input indices.') + raise ValueError("For 'ScatterUpdate', input value are not match with input indices.") return x_shape def infer_dtype(self, x_dtype, indices_dtype, value_dtype): - validator.check_tensor_type_same({'indices': indices_dtype}, mstype.int_type, self.name) + validator.check_tensor_type_same({'indices': indices_dtype}, [mstype.int32], self.name) args = {"x": x_dtype, "value": value_dtype} validator.check_tensor_type_same(args, (mstype.bool_,) + mstype.number_type, self.name) return x_dtype @@ -2157,14 +2181,15 @@ class ScatterNdUpdate(PrimitiveWithInfer): Inputs: - **input_x** (Parameter) - The target tensor, with data type of Parameter. - - **indices** (Tensor) - The index of input tensor. + - **indices** (Tensor) - The index of input tensor, with int32 data type. - **update** (Tensor) - The tensor to add to the input tensor, has the same type as input. Outputs: Tensor, has the same shape and type as `input_x`. Examples: - >>> input_x = mindspore.Parameter(Tensor(np.array([[-0.1, 0.3, 3.6], [0.4, 0.5, -3.2]]), mindspore.float32)) + >>> np_x = np.array([[-0.1, 0.3, 3.6], [0.4, 0.5, -3.2]]) + >>> input_x = mindspore.Parameter(Tensor(np_x, mindspore.float32), name="x") >>> indices = Tensor(np.array([[0, 0], [1, 1]]), mindspore.int32) >>> update = Tensor(np.array([1.0, 2.2]), mindspore.float32) >>> op = P.ScatterNdUpdate() @@ -2178,17 +2203,18 @@ class ScatterNdUpdate(PrimitiveWithInfer): @prim_attr_register def __init__(self, use_locking=True): """Init ScatterNdUpdate""" + validator.check_value_type('use_locking', use_locking, [bool], self.name) self.init_prim_io_names(inputs=['x', 'indices', 'value'], outputs=['y']) def infer_shape(self, x_shape, indices_shape, value_shape): validator.check('the dimension of x', len(x_shape), 'the dimension of indices', indices_shape[-1], Rel.GE) if indices_shape[:-1] + x_shape[indices_shape[-1]:] != value_shape: - raise ValueError('Input value are not match with input indices.') + raise ValueError("For 'ScatterNdUpdate', input value are not match with input indices.") return x_shape def infer_dtype(self, x_dtype, indices_dtype, value_dtype): - validator.check_tensor_type_same({'indices': indices_dtype}, mstype.int_type, self.name) + validator.check_tensor_type_same({'indices': indices_dtype}, [mstype.int32], self.name) args = {"x": x_dtype, "value": value_dtype} validator.check_tensor_type_same(args, (mstype.bool_,) + mstype.number_type, self.name) return x_dtype @@ -2204,7 +2230,8 @@ class ScatterMax(PrimitiveWithInfer): """ Update the value of the input tensor through the max operation. - Using given values to update tensor value through the max operation, along with the input indices,. + Using given values to update tensor value through the max operation, along with the input indices. + This operation outputs the `input_x` after the update is done, which makes it convenient to use the updated value. Args: use_locking (bool): Whether protect the assignment by a lock. Default: True. @@ -2216,7 +2243,7 @@ class ScatterMax(PrimitiveWithInfer): the data type is same as `input_x`, the shape is `indices_shape + x_shape[1:]`. Outputs: - Tensor, has the same shape and data type as `input_x`. + Parameter, the updated `input_x`. Examples: >>> input_x = Parameter(Tensor(np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]), mindspore.float32), name="input_x") @@ -2249,6 +2276,7 @@ class ScatterAdd(PrimitiveWithInfer): Update the value of the input tensor through the add operation. Using given values to update tensor value through the add operation, along with the input indices. + This operation outputs the `input_x` after the update is done, which makes it convenient to use the updated value. Args: use_locking (bool): Whether protect the assignment by a lock. Default: False. @@ -2260,7 +2288,7 @@ class ScatterAdd(PrimitiveWithInfer): the data type is same as `input_x`, the shape is `indices_shape + x_shape[1:]`. Outputs: - Tensor, has the same shape and data type as `input_x`. + Parameter, the updated `input_x`. Examples: >>> input_x = Parameter(Tensor(np.array([[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]), mindspore.float32), name="x") @@ -2460,8 +2488,7 @@ class SpaceToBatch(PrimitiveWithInfer): validator.check_integer('rank of input_x', len(x_shape), 4, Rel.EQ, self.name) out_shape = copy.deepcopy(x_shape) for i in range(2): - padded = out_shape[i + 2] + self.paddings[i][0] + \ - self.paddings[i][1] + padded = out_shape[i + 2] + self.paddings[i][0] + self.paddings[i][1] if padded % self.block_size != 0: raise ValueError(f'For \'{self.name}\' padded[{i}] {padded} should be divisible by ' f'block_size {self.block_size}') @@ -2479,7 +2506,7 @@ class BatchToSpace(PrimitiveWithInfer): dimension and block_size with given amount to crop from dimension, respectively. Args: - block_size (int): The block size of dividing block with value >= 1. + block_size (int): The block size of dividing block with value >= 2. crops (list): The crop value for H and W dimension, containing 2 sub list, each containing 2 int value. All values must be >= 0. crops[i] specifies the crop values for spatial dimension i, which corresponds to input dimension i+2. It is required that input_shape[i+2]*block_size >= crops[i][0]+crops[i][1]. @@ -2513,7 +2540,7 @@ class BatchToSpace(PrimitiveWithInfer): def __init__(self, block_size, crops): """Init BatchToSpace""" validator.check_value_type('block_size', block_size, [int], self.name) - validator.check('block_size', block_size, '', 1, Rel.GE, self.name) + validator.check('block_size', block_size, '', 2, Rel.GE, self.name) self.block_size = block_size validator.check('crops shape', np.array(crops).shape, '', (2, 2)) for elem in itertools.chain(*crops): @@ -2592,6 +2619,8 @@ class SpaceToBatchND(PrimitiveWithInfer): for elem in block_shape: validator.check('block_shape element', elem, '', 1, Rel.GE, self.name) + validator.check_value_type('block_shape element', elem, [int], self.name) + self.block_shape = block_shape validator.check('paddings shape', np.array(paddings).shape, '', (block_rank, 2), Rel.EQ, self.name) @@ -2634,7 +2663,7 @@ class BatchToSpaceND(PrimitiveWithInfer): The length of block_shape is M correspoding to the number of spatial dimensions. crops (list): The crop value for H and W dimension, containing 2 sub list, each containing 2 int value. All values must be >= 0. crops[i] specifies the crop values for spatial dimension i, which corresponds to - input dimension i+2. It is required that input_shape[i+2]*block_size[i] >= crops[i][0]+crops[i][1]. + input dimension i+2. It is required that input_shape[i+2]*block_shape[i] > crops[i][0]+crops[i][1]. Inputs: - **input_x** (Tensor) - The input tensor. @@ -2670,6 +2699,8 @@ class BatchToSpaceND(PrimitiveWithInfer): for elem in block_shape: validator.check('block_shape element', elem, '', 1, Rel.GE, self.name) + validator.check_value_type('block_shape element', elem, [int], self.name) + self.block_shape = block_shape validator.check('crops shape', np.array(crops).shape, '', (block_rank, 2), Rel.EQ, self.name) @@ -2701,32 +2732,144 @@ class BatchToSpaceND(PrimitiveWithInfer): return out_shape +class BroadcastTo(PrimitiveWithInfer): + """ + Broadcasts input tensor to a given shape. + Input shape can be broadcast to target shape if for each dimension pair they are either equal or input is one. + When input shape is broadcast to target shape, it starts with the trailing dimensions. + + Args: + shape (tuple): The target shape to broadcast. + + Inputs: + - **input_x** (Tensor) - The input tensor. + + Outputs: + Tensor, with the given `shape` and the same data type as `input_x`. + + Examples: + >>> shape = (2, 3) + >>> input_x = Tensor(np.array([1, 2, 3]).astype(np.float32)) + >>> broadcast_to = P.BroadcastTo(shape) + >>> broadcast_to(input_x) + [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0]] + """ + + @prim_attr_register + def __init__(self, shape): + """Init BroadcastTo""" + validator.check_value_type("shape", shape, (tuple), self.name) + validator.check("shape length", len(shape), "", 0, Rel.GT, self.name) + for i in shape: + validator.check_integer("shape element", i, 0, Rel.GT, self.name) + self.shape = shape + + def infer_shape(self, x_shape): + validator.check("input_x shape length", len(x_shape), "target shape", len(self.shape), Rel.LE, self.name) + + reversed_x_shape = tuple(reversed(x_shape)) + reversed_target = tuple(reversed(self.shape)) + for i, v in enumerate(reversed_x_shape): + if v not in (reversed_target[i], 1): + raise ValueError(f"Not supported shapes for broadcast, " + f"x_shape: {tuple(x_shape)}, target shape {self.shape}.") + return self.shape + + def infer_dtype(self, x_dtype): + validator.check_subclass("input_x", x_dtype, mstype.tensor, self.name) + return x_dtype + + +class InplaceUpdate(PrimitiveWithInfer): + r""" + Updates specified rows with values in `v`. + + Args: + indices (Union[int, tuple]): Indices into the left-most dimension of `x`. + + Inputs: + - **x** (Tensor) - A tensor which to be inplace updated. It can be of the following data types: + float32, float16, int32. + - **v** (Tensor) - A tensor of the same type as `x`. Same dimension size as `x` except + the first dimension, which must be the same as the size of `indices`. + + Outputs: + Tensor, with the same type and shape as the input `x`. + + Examples: + >>> x = Tensor(np.arange(24).reshape(3, 4, 2), mindspore.float32) + >>> v = Tensor(np.arange(-8, 8).reshape(2, 4, 2), mindspore.float32) + >>> inplace_update = P.InplaceUpdate((0, 2)) + >>> result = inplace_update(x, v) + [[[-8. -7.] + [-6. -5.] + [-4. -3.] + [-2. -1.]] + [[ 8. 9.] + [10. 11.] + [12. 13.] + [14. 15.]] + [[ 0. 1.] + [ 2. 3.] + [ 4. 5.] + [ 6. 7.]]] + """ + @prim_attr_register + def __init__(self, indices): + """Init InplaceUpdate""" + self.init_prim_io_names(inputs=['x', 'indices', 'v'], outputs=['y']) + validator.check_value_type("indices", indices, [int, tuple], self.name) + if isinstance(indices, int): + self.add_prim_attr('indices', (indices,)) + for item in self.indices: + validator.check_value_type("item of indices", item, [int], self.name) + + def infer_dtype(self, x_dtype, v_dtype): + valid_type = [mstype.int32, mstype.float16, mstype.float32] + validator.check_tensor_type_same( + { + "x": x_dtype, + "v": v_dtype + }, valid_type, self.name) + + return x_dtype + + def infer_shape(self, x_shape, v_shape): + validator.check("x", len(x_shape), "v", len(v_shape), Rel.EQ, self.name) + + x_rank = len(x_shape) + for idx in range(x_rank)[1:]: + validator.check("x dim %d" % idx, x_shape[idx], 'v dim %d' % idx, v_shape[idx], Rel.EQ, self.name) + + validator.check("size of indices", len(self.indices), "v's first dimension", v_shape[0], + Rel.EQ, self.name) + + return x_shape + + class ReverseSequence(PrimitiveWithInfer): """ Reverses variable length slices. - Note: - If the specified axis is a negative number, the index is counted - backward from the end and starts at 1. - - Raises: - ValueError: If axis is not an integer or not in the valid range. Args: - seq_dim (int): The dimension which is partially reversed. Required. - batch_dim (int): The dimension along which reversal is performed. Default: 0 + seq_dim (int): The dimension along which reversal is performed. Required. + batch_dim (int): The input is sliced along this dimmension. Default: 0. Inputs: - - **x** (Tensor) - The input to reverse. - - **seq_lengths** (int) - Must be 1-D vector with types: int32, int64 + - **x** (Tensor) - The input to reverse, support all number types including bool. + - **seq_lengths** (Tensor) - Must be 1-D vector with types: int32, int64. Outputs: - Reversed tensor with the same shape and data type as x. + Reversed tensor with the same shape and data type as input. Examples: >>> x = Tensor(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), mindspore.float32) >>> seq_lengths = Tensor(np.array([1, 2, 3])) >>> reverse_sequence = P.ReverseSequence(seq_dim=1) >>> output = reverse_sequence(x, seq_lengths) + [[1 2 3] + [5 4 6] + [9 8 7]] """ @prim_attr_register @@ -2748,5 +2891,6 @@ class ReverseSequence(PrimitiveWithInfer): return x def infer_dtype(self, x, seq_lengths): + validator.check_tensor_type_same({"x_dtype": x}, mstype.number_type + (mstype.bool_,), self.name) validator.check_tensor_type_same({"seq_lengths_dtype": seq_lengths}, [mstype.int32, mstype.int64], self.name) return x diff --git a/mindspore/ops/operations/comm_ops.py b/mindspore/ops/operations/comm_ops.py index 6e0c22f584..dc690b5f6e 100644 --- a/mindspore/ops/operations/comm_ops.py +++ b/mindspore/ops/operations/comm_ops.py @@ -68,7 +68,11 @@ class AllReduce(PrimitiveWithInfer): Examples: >>> from mindspore.communication import init + >>> from mindspore import Tensor + >>> from mindspore.ops.operations.comm_ops import ReduceOp + >>> import mindspore.nn as nn >>> import mindspore.ops.operations as P + >>> >>> init('nccl') >>> class Net(nn.Cell): >>> def __init__(self): @@ -131,8 +135,11 @@ class AllGather(PrimitiveWithInfer): then the shape of output is :math:`(N, x_1, x_2, ..., x_R)`. Examples: - >>> from mindspore.communication import init >>> import mindspore.ops.operations as P + >>> import mindspore.nn as nn + >>> from mindspore.communication import init + >>> from mindspore import Tensor + >>> >>> init('nccl') >>> class Net(nn.Cell): >>> def __init__(self): @@ -175,14 +182,16 @@ class HostAllGather(PrimitiveWithInfer): Note: Tensor must have the same shape and format in all processes participating in the collective. + HostAllGather is a host-side operator, it depends on OpenMPI and must use build option -M on + to enable it. Using mpirun command to run it: + mpirun -output-filename log -merge-stderr-to-stdout -np 3 python test_host_all_gather.py Args: group (Union[tuple[int],list[int]]): The rand_ids of communication group to work on. Raises: TypeError: If group is not a list nor tuple, or elements of group are not int. - ValueError: If the local rank id of the calling process not in group, - or rank_id from group not in [0, 7]. + ValueError: If group is not set, or rank_id from group not in [0, 7]. Inputs: - **input_x** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`. @@ -192,9 +201,14 @@ class HostAllGather(PrimitiveWithInfer): then the shape of output is :math:`(N, x_1, x_2, ..., x_R)`. Examples: - >>> from mindspore.communication import init + >>> import mindspore.nn as nn + >>> import mindspore.context as context >>> import mindspore.ops.operations as P - >>> init('nccl') + >>> from mindspore import Tensor + >>> + >>> context.set_context(mode=context.GRAPH_MODE, device_target='CPU') + >>> context.set_mpi_config(enable_mpi=True) + >>> >>> class Net(nn.Cell): >>> def __init__(self): >>> super(Net, self).__init__() @@ -218,8 +232,6 @@ class HostAllGather(PrimitiveWithInfer): validator.check_int_range("rank_id", r, 0, 7, Rel.INC_BOTH, self.name) validator.check_value_type("rank_id", r, (int,), self.name) self.group_size = len(group) - self.rank = get_rank() - validator.check('rank', self.rank, 'group', self.group, Rel.IN, self.name) self.add_prim_attr('group', group) def infer_shape(self, x_shape): @@ -253,8 +265,12 @@ class ReduceScatter(PrimitiveWithInfer): ValueError: If the first dimension of input can not be divided by rank size. Examples: + >>> from mindspore import Tensor >>> from mindspore.communication import init + >>> from mindspore.ops.operations.comm_ops import ReduceOp + >>> import mindspore.nn as nn >>> import mindspore.ops.operations as P + >>> >>> init('nccl') >>> class Net(nn.Cell): >>> def __init__(self): @@ -264,7 +280,7 @@ class ReduceScatter(PrimitiveWithInfer): >>> def construct(self, x): >>> return self.reducescatter(x) >>> - >>> input_ = Tensor(np.ones([2, 8]).astype(np.float32)) + >>> input_ = Tensor(np.ones([8, 8]).astype(np.float32)) >>> net = Net() >>> output = net(input_) """ @@ -298,6 +314,9 @@ class HostReduceScatter(PrimitiveWithInfer): Note: Tensor must have the same shape and format in all processes participating in the collective. + HostReduceScatter is a host-side operator, it depends on OpenMPI and must use build option + -M on to enable it. Using mpirun command to run it: + mpirun -output-filename log -merge-stderr-to-stdout -np 3 python test_host_reduce_scatter.py Args: op (str): Specifies an operation used for element-wise reductions, @@ -307,13 +326,19 @@ class HostReduceScatter(PrimitiveWithInfer): Raises: TypeError: If op is not a string and group is not a list nor tuple, or elements of group are not int. - ValueError: If the first dimension of input can not be divided by rank size, - or group is not set, or rank_id not in [1, 7]. + ValueError: If the first dimension of input can not be divided by group size, + or group is not set, or rank_id not in [0, 7]. Examples: - >>> from mindspore.communication import init + >>> import mindspore.nn as nn + >>> import mindspore.context as context >>> import mindspore.ops.operations as P - >>> init('nccl') + >>> from mindspore import Tensor + >>> from mindspore.ops.operations.comm_ops import ReduceOp + >>> + >>> context.set_context(mode=context.GRAPH_MODE, device_target='CPU') + >>> context.set_mpi_config(enable_mpi=True) + >>> >>> class Net(nn.Cell): >>> def __init__(self): >>> super(Net, self).__init__() @@ -322,7 +347,7 @@ class HostReduceScatter(PrimitiveWithInfer): >>> def construct(self, x): >>> return self.hostreducescatter(x) >>> - >>> input_ = Tensor(np.ones([2, 8]).astype(np.float32)) + >>> input_ = Tensor(np.ones([8, 8]).astype(np.float32)) >>> net = Net() >>> output = net(input_) """ @@ -377,8 +402,11 @@ class Broadcast(PrimitiveWithInfer): TypeError: If root_rank is not a integer or group is not a string. Examples: + >>> from mindspore import Tensor >>> from mindspore.communication import init + >>> import mindspore.nn as nn >>> import mindspore.ops.operations as P + >>> >>> init('nccl') >>> class Net(nn.Cell): >>> def __init__(self): diff --git a/mindspore/ops/operations/control_ops.py b/mindspore/ops/operations/control_ops.py index 2c804c483f..e7ac4572ce 100644 --- a/mindspore/ops/operations/control_ops.py +++ b/mindspore/ops/operations/control_ops.py @@ -144,7 +144,7 @@ class Merge(PrimitiveWithInfer): One and only one of the inputs should be selected as the output Inputs: - - **inputs** (Tuple) - The data to be merged. All tuple elements should have same data type. + - **inputs** (Union(Tuple, List)) - The data to be merged. All tuple elements should have same data type. Outputs: tuple. Output is tuple(`data`, `output_index`). The `data` has the same shape of `inputs` element. diff --git a/mindspore/ops/operations/debug_ops.py b/mindspore/ops/operations/debug_ops.py index f1b56b2850..c6b635a69f 100644 --- a/mindspore/ops/operations/debug_ops.py +++ b/mindspore/ops/operations/debug_ops.py @@ -191,7 +191,7 @@ class InsertGradientOf(PrimitiveWithInfer): f (Function): MindSpore's Function. Callback function. Inputs: - - **input_x** (Tensor) - The graph node to attach to. + - **input_x** (Any) - The graph node to attach to. Outputs: Tensor, returns `input_x` directly. `InsertGradientOf` does not affect the forward result. @@ -286,12 +286,6 @@ class HookBackward(PrimitiveWithInfer): self.register_hook(hook_fn) self.cell_id = cell_id - def __call__(self, *inputs): - """run in PyNative mode.""" - if len(inputs) == 1: - return inputs[0] - return inputs - def infer_shape(self, *inputs_shape): if len(inputs_shape) == 1: return inputs_shape[0] diff --git a/mindspore/ops/operations/math_ops.py b/mindspore/ops/operations/math_ops.py index 9afdc50caa..08cd481582 100644 --- a/mindspore/ops/operations/math_ops.py +++ b/mindspore/ops/operations/math_ops.py @@ -15,6 +15,7 @@ """Operators for math.""" +import copy import numpy as np from ... import context from ..._c_expression import signature_rw as sig_rw @@ -119,18 +120,20 @@ class TensorAdd(_MathBinaryOp): Adds two input tensors element-wise. The inputs must be two tensors or one tensor and one scalar. - When the inputs are two tensors, the shapes of them could be broadcast, - and the data types of them should be same. - When the inputs are one tensor and one scalar, the scalar cannot be a parameter, only can be a constant, - and the type of the scalar is the same as the data type of the tensor. + When the inputs are two tensors, + both dtypes cannot be bool, and the shapes of them could be broadcast. + When the inputs are one tensor and one scalar, + the scalar only could be a constant. Inputs: - - **input_x** (Union[Tensor, Number]) - The first input is a tensor whose data type is number or a number. - - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as 'input_x' or - a number. + - **input_x** (Union[Tensor, Number, bool]) - The first input is a number or + a bool or a tensor whose data type is number or bool. + - **input_y** (Union[Tensor, Number, bool]) - The second input is a number or + a bool when the first input is a tensor or a tensor whose data type is number or bool. Outputs: - Tensor, the shape is same as the shape after broadcasting, and the data type is same as 'input_x'. + Tensor, the shape is same as the shape after broadcasting, + and the data type is the one with high precision or high digits among the two inputs. Examples: >>> add = P.TensorAdd() @@ -140,6 +143,15 @@ class TensorAdd(_MathBinaryOp): [5,7,9] """ + def infer_value(self, x, y): + if x is not None and y is not None: + x = x.asnumpy() + y = y.asnumpy() + out = x + y + out = np.array(out, x.dtype) + return Tensor(out) + return None + class AssignAdd(PrimitiveWithInfer): """ @@ -198,14 +210,14 @@ class AssignSub(PrimitiveWithInfer): >>> def __init__(self): >>> super(Net, self).__init__() >>> self.AssignSub = P.AssignSub() - >>> self.variable = mindspore.Parameter(initializer(1, [1], mindspore.int64), name="global_step") + >>> self.variable = mindspore.Parameter(initializer(1, [1], mindspore.int32), name="global_step") >>> >>> def construct(self, x): >>> self.AssignSub(self.variable, x) >>> return self.variable >>> >>> net = Net() - >>> value = Tensor(np.ones([1]).astype(np.int64)*100) + >>> value = Tensor(np.ones([1]).astype(np.int32)*100) >>> net(value) """ @@ -253,15 +265,35 @@ class _Reduce(PrimitiveWithInfer): return output def do_infer(self, input_x, axis, valid_dtype=mstype.number_type): + """ return meta infos of input parameters """ axis_v = axis['value'] input_shp = input_x['shape'] args = {'input_x': input_x['dtype']} validator.check_tensor_type_same(args, valid_dtype, self.name) + if axis_v is None: + raise ValueError(f"For {self.name}, axis must be const.") input_shp = _infer_shape_reduce(input_shp, axis_v, self.keep_dims, self.name) + value = None + if input_x['value'] is not None: + prim_map = { + 'ReduceSum': np.sum, + 'ReduceMax': np.max, + 'ReduceMin': np.min, + } + np_reduce_func = prim_map.get(self.name, None) + + if np_reduce_func is not None: + value = input_x['value'].asnumpy() + if not axis_v: + axis_v = [i for i in range(len(input_x['shape']))] + axis_v = tuple(axis_v) + value = np_reduce_func(value, axis_v, keepdims=self.keep_dims) + value = np.array(value) + value = Tensor(value) return {'shape': input_shp, 'dtype': input_x['dtype'], - 'value': None} + 'value': value} def __infer__(self, input_x, axis): return self.do_infer(input_x, axis) @@ -330,6 +362,12 @@ class ReduceSum(_Reduce): >>> output = op(input_x, 1) """ + @prim_attr_register + def __init__(self, keep_dims=False): + """init ReduceSum""" + super(ReduceSum, self).__init__(keep_dims) + self.__setattr_flag__ = True + class ReduceAll(_Reduce): """ @@ -399,6 +437,12 @@ class ReduceMax(_Reduce): >>> output = op(input_x, 1) """ + @prim_attr_register + def __init__(self, keep_dims=False): + """ReduceMax""" + super(ReduceMax, self).__init__(keep_dims) + self.__setattr_flag__ = True + class ReduceMin(_Reduce): """ @@ -445,8 +489,9 @@ class ReduceProd(_Reduce): Default : False, don't keep these reduced dimensions. Inputs: - - **input_x** (Tensor[Number]) - The input tensor. - - **axis** (Union[int, tuple(int), list(int)]) - The dimensions to reduce. Default: (), reduce all dimensions. + - **input_x** (Tensor[Number]) - The input tensor. + - **axis** (Union[int, tuple(int), list(int)]) - The dimensions to reduce. Default: (), reduce all dimensions. + Only constant value is allowed. Outputs: Tensor, has the same dtype as the 'input_x'. @@ -474,8 +519,9 @@ class CumProd(PrimitiveWithInfer): reverse (bool): If True, reverse the result along axis. Default: False Inputs: - - **input_x** (Tensor[Number]) - The input tensor. - - **axis** (int) - The dimensions to compute the cumulative product. + - **input_x** (Tensor[Number]) - The input tensor. + - **axis** (int) - The dimensions to compute the cumulative product. + Only constant value is allowed. Outputs: Tensor, has the same shape and dtype as the 'input_x'. @@ -507,6 +553,10 @@ class CumProd(PrimitiveWithInfer): validator.check_subclass("axis", axis_type, mstype.int_, cls_name) return x_type + def infer_value(self, x, axis): + if axis is None: + raise ValueError(f"For {self.name}, axis must be const.") + class MatMul(PrimitiveWithInfer): """ @@ -574,6 +624,8 @@ class MatMul(PrimitiveWithInfer): def infer_dtype(self, x, y): args = {"x": x, "y": y} validator.check_tensor_type_same(args, mstype.float_type + mstype.int_type, self.name) + if x.element_type() == mstype.int8: + return mstype.tensor_type(mstype.int32) return x @@ -662,6 +714,8 @@ class CumSum(PrimitiveWithInfer): def __infer__(self, x, axis): cls_name = self.name x_shp = x['shape'] + if axis['value'] is None: + raise ValueError(f"For {self.name}, axis must be const.") validator.check_value_type('axis', axis['value'], [int], cls_name) valid_types = [mstype.uint8, mstype.int8, mstype.int32, mstype.float16, mstype.float32] validator.check_tensor_type_same({'x': x['dtype']}, valid_types, cls_name) @@ -703,6 +757,85 @@ class AddN(PrimitiveWithInfer): def __init__(self): self.init_prim_io_names(inputs=["inputs"], outputs=["sum"]) + def check_elim(self, inputs): + if len(inputs) != 1: + return (False, None) + if isinstance(inputs[0], Tensor): + return (True, inputs[0]) + raise TypeError("Expecting Tensor, got : {}".format(type(inputs[0]))) + + def infer_shape(self, inputs): + cls_name = self.name + validator.check_integer("inputs", len(inputs), 1, Rel.GE, cls_name) + self.add_prim_attr('n', len(inputs)) + shp0 = inputs[0] + for i, shp in enumerate(inputs): + validator.check(f"shape of inputs[{i}]", shp, 'shape of inputs[0]', shp0, Rel.EQ, cls_name) + return shp0 + + def infer_dtype(self, inputs): + cls_name = self.name + validator.check_value_type("inputs", inputs, [tuple, list], cls_name) + validator.check_integer("inputs", len(inputs), 1, Rel.GE, cls_name) + args = {} + for i, dtype in enumerate(inputs): + args[f"inputs[{i}]"] = dtype + validator.check_tensor_type_same(args, mstype.number_type + (mstype.bool_,), cls_name) + return inputs[0] + + def infer_value(self, inputs): + if inputs is None: + return None + + for x in inputs: + if x is None: + return None + + added = copy.deepcopy(inputs[0].asnumpy()) + for x in inputs[1:]: + added += x.asnumpy() + out = np.array(added, inputs[0].asnumpy().dtype) + return Tensor(out) + + +class AccumulateNV2(PrimitiveWithInfer): + """ + Computes accumulation of all input tensors element-wise. + + AccumulateNV2 is like AddN with a significant difference: AccumulateNV2 won't + wait for all of its inputs to be ready before beginning to sum. That is to say, + AccumulateNV2 will be able to save memory when inputs are ready at different + times since minimum temporary storage is proportional to the output size rather + than the inputs size. + + Inputs: + - **input_x** (Union(tuple[Tensor], list[Tensor])) - The input tuple or list + is made up of multiple tensors whose dtype is number to be added together. + + Outputs: + Tensor, has the same shape and dtype as each entry of the `input_x`. + + Examples: + >>> class NetAccumulateNV2(nn.Cell): + >>> def __init__(self): + >>> super(NetAccumulateNV2, self).__init__() + >>> self.accumulateNV2 = P.AccumulateNV2() + >>> + >>> def construct(self, *z): + >>> return self.accumulateNV2(z) + >>> + >>> net = NetAccumulateNV2() + >>> input_x = Tensor(np.array([1, 2, 3]), mindspore.float32) + >>> input_y = Tensor(np.array([4, 5, 6]), mindspore.float32) + >>> net(input_x, input_y, input_x, input_y) + Tensor([10., 14., 18.], shape=(3,), dtype=mindspore.float32) + """ + + @prim_attr_register + def __init__(self): + self.__setattr_flag__ = True + self.init_prim_io_names(inputs=["inputs"], outputs=["sum"]) + def infer_shape(self, inputs): cls_name = self.name validator.check_integer("inputs", len(inputs), 1, Rel.GE, cls_name) @@ -752,24 +885,151 @@ class Neg(PrimitiveWithInfer): validator.check_tensor_type_same({"input_x": input_x}, mstype.number_type, self.name) return input_x + def infer_value(self, input_x): + if input_x is not None: + input_x = input_x.asnumpy() + return Tensor(-input_x) + + return None + + +class InplaceAdd(PrimitiveWithInfer): + """ + Adds v into specified rows of x. Computes y = x; y[i,] += v. + + Args: + indices (Union[int, tuple]): Indices into the left-most dimension of x, and determines which rows of x + to add with v. It is a int or tuple, whose value is in [0, the first dimension size of x). + + Inputs: + - **input_x** (Tensor) - The first input is a tensor whose data type is number. + - **input_v** (Tensor) - The second input is a tensor who has the same dimension sizes as x except + the first dimension, which must be the same as indices's size. + + Outputs: + Tensor, has the same shape and dtype as input. + + Examples: + >>> indices = (0, 1) + >>> input_x = Tensor(np.array([[1, 2], [3, 4], [5, 6]]), mindspore.float32) + >>> input_v = Tensor(np.array([[0.5, 1.0], [1.0, 1.5]]), mindspore.float32) + >>> inplaceAdd = P.InplaceAdd(indices) + >>> inplaceAdd(input_x, input_v) + [[1.5 3.] + [4. 5.5] + [5. 6.]] + """ + + @prim_attr_register + def __init__(self, indices): + """init InplaceAdd""" + self.init_prim_io_names(inputs=['x', 'v'], outputs=['y']) + self.indices = indices + + def infer_shape(self, x_shape, v_shape): + validator.check("x", len(x_shape), "v", len(v_shape), Rel.EQ, self.name) + if isinstance(self.indices, int): + validator.check("size of indices", 1, "v's first dimension", v_shape[0], + Rel.EQ, self.name) + if self.indices < 0 or self.indices >= x_shape[0]: + raise ValueError(f'The value of indices must be in [0, {x_shape[0]}), but got {self.indices}.') + else: + validator.check("size of indices", len(self.indices), "v's first dimension", v_shape[0], + Rel.EQ, self.name) + for i in self.indices: + if i < 0 or i >= x_shape[0]: + raise ValueError(f'The value of indices must be in [0, {x_shape[0]}), but got {i}.') + if len(x_shape) > 1: + validator.check("x's ith dimension", x_shape[1:], "v's ith dimension", v_shape[1:], + Rel.EQ, self.name) + return x_shape + + def infer_dtype(self, x_dtype, v_dtype): + args = {'x': x_dtype, 'v': v_dtype} + valid_type = [mstype.int32, mstype.float16, mstype.float32] + validator.check_tensor_type_same(args, valid_type, self.name) + validator.check_value_type('indices', self.indices, [tuple, int], self.name) + return x_dtype + + +class InplaceSub(PrimitiveWithInfer): + """ + Subtracts v into specified rows of x. Computes y = x; y[i, :] -= v; return y. + + Args: + indices (Union[int, tuple]): Indices into the left-most dimension of x, and determines which rows of x + to sub with v. It is a int or tuple, whose value is in [0, the first dimension size of x). + + Inputs: + - **input_x** (Tensor) - The first input is a tensor whose data type is number. + - **input_v** (Tensor) - The second input is a tensor who has the same dimension sizes as x except + the first dimension, which must be the same as indices's size. + + Outputs: + Tensor, has the same shape and dtype as input. + + Examples: + >>> indices = (0, 1) + >>> input_x = Tensor(np.array([[1, 2], [3, 4], [5, 6]]), mindspore.float32) + >>> input_v = Tensor(np.array([[0.5, 1.0], [1.0, 1.5]]), mindspore.float32) + >>> inplaceSub = P.InplaceSub(indices) + >>> inplaceSub(input_x, input_v) + [[0.5 1.] + [2. 2.5] + [5. 6.]] + """ + + @prim_attr_register + def __init__(self, indices): + """init InplaceSub""" + self.init_prim_io_names(inputs=['x', 'v'], outputs=['y']) + self.indices = indices + + def infer_shape(self, x_shape, v_shape): + validator.check("x", len(x_shape), "v", len(v_shape), Rel.EQ, self.name) + if isinstance(self.indices, int): + validator.check("size of indices", 1, "v's first dimension", v_shape[0], + Rel.EQ, self.name) + if self.indices < 0 or self.indices >= x_shape[0]: + raise ValueError(f'The value of indices must be in [0, {x_shape[0]}), but got {self.indices}.') + else: + validator.check("size of indices", len(self.indices), "v's first dimension", v_shape[0], + Rel.EQ, self.name) + for i in self.indices: + if i < 0 or i >= x_shape[0]: + raise ValueError(f'The value of indices must be in [0, {x_shape[0]}), but got {i}.') + if len(x_shape) > 1: + validator.check("x's ith dimension", x_shape[1:], "v's ith dimension", v_shape[1:], + Rel.EQ, self.name) + return x_shape + + def infer_dtype(self, x_dtype, v_dtype): + args = {'x': x_dtype, 'v': v_dtype} + valid_type = [mstype.int32, mstype.float16, mstype.float32] + validator.check_tensor_type_same(args, valid_type, self.name) + validator.check_value_type('indices', self.indices, [tuple, int], self.name) + return x_dtype + class Sub(_MathBinaryOp): """ Subtracts the second input tensor from the first input tensor element-wise. The inputs must be two tensors or one tensor and one scalar. - When the inputs are two tensors, the shapes of them could be broadcast, - and the data types of them should be same. - When the inputs are one tensor and one scalar, the scalar cannot be a parameter, only can be a constant, - and the type of the scalar is the same as the data type of the tensor. + When the inputs are two tensors, + both dtypes cannot be bool, and the shapes of them could be broadcast. + When the inputs are one tensor and one scalar, + the scalar only could be a constant. Inputs: - - **input_x** (Union[Tensor, Number]) - The first input is a tensor whose data type is number or a number. - - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as 'input_x' or - a number. + - **input_x** (Union[Tensor, Number, bool]) - The first input is a number or + a bool or a tensor whose data type is number or bool. + - **input_y** (Union[Tensor, Number, bool]) - The second input is a number or + a bool when the first input is a tensor or a tensor whose data type is number or bool. Outputs: - Tensor, the shape is same as the shape after broadcasting, and the data type is same as 'input_x'. + Tensor, the shape is same as the shape after broadcasting, + and the data type is the one with high precision or high digits among the two inputs. Examples: >>> input_x = Tensor(np.array([1, 2, 3]), mindspore.int32) @@ -779,24 +1039,35 @@ class Sub(_MathBinaryOp): [-3, -3, -3] """ + def infer_value(self, x, y): + if x is not None and y is not None: + x = x.asnumpy() + y = y.asnumpy() + out = x - y + out = np.array(out, x.dtype) + return Tensor(out) + return None + class Mul(_MathBinaryOp): """ Multiplies two tensors element-wise. The inputs must be two tensors or one tensor and one scalar. - When the inputs are two tensors, the shapes of them could be broadcast, - and the data types of them should be same. - When the inputs are one tensor and one scalar, the scalar cannot be a parameter, only can be a constant, - and the type of the scalar is the same as the data type of the tensor. + When the inputs are two tensors, + both dtypes cannot be bool, and the shapes of them could be broadcast. + When the inputs are one tensor and one scalar, + the scalar only could be a constant. Inputs: - - **input_x** (Union[Tensor, Number]) - The first input is a tensor whose data type is number or a number. - - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as 'input_x' or - a number. + - **input_x** (Union[Tensor, Number, bool]) - The first input is a number or + a bool or a tensor whose data type is number or bool. + - **input_y** (Union[Tensor, Number, bool]) - The second input is a number or + a bool when the first input is a tensor or a tensor whose data type is number or bool. Outputs: - Tensor, the shape is same as the shape after broadcasting, and the data type is same as 'input_x'. + Tensor, the shape is same as the shape after broadcasting, + and the data type is the one with high precision or high digits among the two inputs. Examples: >>> input_x = Tensor(np.array([1.0, 2.0, 3.0]), mindspore.float32) @@ -835,6 +1106,7 @@ class Square(PrimitiveWithInfer): @prim_attr_register def __init__(self): """init Square""" + self.init_prim_io_names(inputs=['input_x'], outputs=['output']) def infer_shape(self, x_shape): return x_shape @@ -843,6 +1115,14 @@ class Square(PrimitiveWithInfer): validator.check_tensor_type_same({"x": x_type}, mstype.number_type, self.name) return x_type + def infer_value(self, x): + if x is not None: + x = x.asnumpy() + out = x * x + out = np.array(out, x.dtype) + return Tensor(out) + return None + class Rsqrt(PrimitiveWithInfer): """ @@ -864,6 +1144,7 @@ class Rsqrt(PrimitiveWithInfer): @prim_attr_register def __init__(self): """init Rsqrt""" + self.init_prim_io_names(inputs=['x'], outputs=['output']) def infer_shape(self, x_shape): return x_shape @@ -872,6 +1153,14 @@ class Rsqrt(PrimitiveWithInfer): validator.check_tensor_type_same({"x": x_type}, mstype.number_type, self.name) return x_type + def infer_value(self, x): + if x is not None: + x = x.asnumpy() + out = 1.0 / np.sqrt(x) + out = np.array(out, x.dtype) + return Tensor(out) + return None + class Sqrt(PrimitiveWithInfer): """ @@ -893,6 +1182,7 @@ class Sqrt(PrimitiveWithInfer): @prim_attr_register def __init__(self): """init Sqrt""" + self.init_prim_io_names(inputs=['x'], outputs=['output']) def infer_shape(self, x_shape): return x_shape @@ -901,6 +1191,14 @@ class Sqrt(PrimitiveWithInfer): validator.check_tensor_type_same({"x": x_type}, mstype.number_type, self.name) return x_type + def infer_value(self, x): + if x is not None: + x = x.asnumpy() + out = np.sqrt(x) + out = np.array(out, x.dtype) + return Tensor(out) + return None + class Reciprocal(PrimitiveWithInfer): """ @@ -931,33 +1229,34 @@ class Reciprocal(PrimitiveWithInfer): validator.check_subclass("x", x, mstype.tensor, self.name) return x + def infer_value(self, x): + if x is not None: + x = x.asnumpy() + out = 1.0 / x + out = np.array(out, x.dtype) + return Tensor(out) + return None + class Pow(_MathBinaryOp): """ Computes a tensor to the power of the second input. - The first input must be a tensor, and the second input should be a tensor or a number. - When the inputs are two tensors, the shapes of them could be broadcast, - and the data types of them should be the same. - When the inputs are one tensor and one scalar, the scalar could not be a parameter, - only could be a constant, and the type of the scalar is the same as the data type of the tensor. - - Inputs: - - **input_x** (Union[Tensor]) - The first input is a tensor whose data type is number. - - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as 'input_x' or - a number. - - Outputs: - Tensor, the shape is same as the shape after broadcasting, and the data type is same as 'input_x'. - + The inputs must be two tensors or one tensor and one scalar. + When the inputs are two tensors, + both dtypes cannot be bool, and the shapes of them could be broadcast. + When the inputs are one tensor and one scalar, + the scalar only could be a constant. Inputs: - - **input_x** (Tensor) - The input tensor. - - **input_y** (Union[Tensor, Number]) - The exponent part. If exponent is a tensor, its shape must be able to - broadcast to the shape of the `input_x`. + - **input_x** (Union[Tensor, Number, bool]) - The first input is a number or + a bool or a tensor whose data type is number or bool. + - **input_y** (Union[Tensor, Number, bool]) - The second input is a number or + a bool when the first input is a tensor or a tensor whose data type is number or bool. Outputs: - Tensor, has the same shape as the `input_x`. + Tensor, the shape is same as the shape after broadcasting, + and the data type is the one with high precision or high digits among the two inputs. Examples: >>> input_x = Tensor(np.array([1.0, 2.0, 4.0]), mindspore.float32) @@ -973,6 +1272,15 @@ class Pow(_MathBinaryOp): [1.0, 16.0, 64.0] """ + def infer_value(self, x, power): + if x is not None and power is not None: + x = x.asnumpy() + power = power.asnumpy() + out = np.power(x, power) + out = np.array(out, x.dtype) + return Tensor(out) + return None + class Exp(PrimitiveWithInfer): """ @@ -1003,6 +1311,88 @@ class Exp(PrimitiveWithInfer): validator.check_subclass("x", x_type, mstype.tensor, self.name) return x_type + def infer_value(self, x): + if x is not None: + x = x.asnumpy() + out = np.exp(x) + out = np.array(out, x.dtype) + return Tensor(out) + return None + + +class Expm1(PrimitiveWithInfer): + """ + Returns exponential then minus 1 of a tensor element-wise. + + Inputs: + - **input_x** (Tensor) - The input tensor. + + Outputs: + Tensor, has the same shape as the `input_x`. + + Examples: + >>> input_x = Tensor(np.array([0.0, 1.0, 2.0, 4.0]), mindspore.float32) + >>> expm1 = P.Expm1() + >>> expm1(input_x) + [ 0., 1.71828183, 6.3890561 , 53.59815003] + """ + + @prim_attr_register + def __init__(self): + """init Exp""" + self.init_prim_io_names(inputs=['x'], outputs=['y']) + + def infer_shape(self, x_shape): + return x_shape + + def infer_dtype(self, x_type): + validator.check_subclass("x", x_type, mstype.tensor, self.name) + return x_type + + +class HistogramFixedWidth(PrimitiveWithInfer): + """ + Returns a rank 1 histogram counting the number of entries in values that fall into every bin. The bins are equal + width and determined by the arguments range and nbins. + + Args: + dtype (string): An optional attribute. Must be one of the following types: "int32", "int64". Default: "int32". + nbins (Tensor): Number of histogram bins, the type is int32. + + Inputs: + - **x** (Tensor) - Numeric Tensor. Must be one of the following types: int32, float32, float16. + - **range** (Tensor) - Must have the same type as x. Shape [2] Tensor of same dtype as x. + x <= range[0] will be mapped to hist[0], x >= range[1] will be mapped to hist[-1]. + + Outputs: + Tensor, the type is int32. + + Examples: + >>> x = Tensor([-1.0, 0.0, 1.5, 2.0, 5.0, 15], mindspore.float16) + >>> range = Tensor([0.0, 5.0], mindspore.float16) + >>> hist = P.HistogramFixedWidth(5) + >>> hist(x, range) + [2 1 1 0 2] + """ + + @prim_attr_register + def __init__(self, nbins, dtype='int32'): + self.nbins = validator.check_value_type("nbins", nbins, [int], self.name) + valid_values = ['int32', 'int64'] + self.dtype = validator.check_string("dtype", dtype, valid_values, self.name) + self.init_prim_io_names(inputs=['x', 'range'], outputs=['y']) + + def infer_shape(self, x_shape, range_shape): + return (self.nbins,) + + def infer_dtype(self, x_dtype, range_dtype): + validator.check_subclass("x", x_dtype, mstype.tensor, self.name) + valid_types = (mstype.float16, mstype.float32, mstype.int32) + validator.check_tensor_type_same({"x": x_dtype}, valid_types, self.name) + validator.check_tensor_type_same({"range": range_dtype}, valid_types, self.name) + y_dtype = mstype.int32 + return y_dtype + class Log(PrimitiveWithInfer): """ @@ -1032,6 +1422,14 @@ class Log(PrimitiveWithInfer): validator.check_subclass("x", x, mstype.tensor, self.name) return x + def infer_value(self, x): + if x is not None: + x = x.asnumpy() + out = np.log(x) + out = np.array(out, x.dtype) + return Tensor(out) + return None + class Log1p(PrimitiveWithInfer): """ @@ -1127,18 +1525,20 @@ class Minimum(_MathBinaryOp): Computes the element-wise minimum of input tensors. The inputs must be two tensors or one tensor and one scalar. - When the inputs are two tensors, the shapes of them could be broadcast, - and the data types of them should be same. - When the inputs are one tensor and one scalar, the scalar cannot be a parameter, only can be a constant, - and the type of the scalar is the same as the data type of the tensor. + When the inputs are two tensors, + both dtypes cannot be bool, and the shapes of them could be broadcast. + When the inputs are one tensor and one scalar, + the scalar only could be a constant. Inputs: - - **input_x** (Union[Tensor, Number]) - The first input is a tensor whose data type is number or a number. - - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as 'input_x' or - a number. + - **input_x** (Union[Tensor, Number, bool]) - The first input is a number or + a bool or a tensor whose data type is number or bool. + - **input_y** (Union[Tensor, Number, bool]) - The second input is a number or + a bool when the first input is a tensor or a tensor whose data type is number or bool. Outputs: - Tensor, the shape is same as the shape after broadcasting, and the data type is same as 'input_x'. + Tensor, the shape is same as the shape after broadcasting, + and the data type is the one with high precision or high digits among the two inputs. Examples: >>> input_x = Tensor(np.array([1.0, 5.0, 3.0]), mindspore.float32) @@ -1148,24 +1548,35 @@ class Minimum(_MathBinaryOp): [1.0, 2.0, 3.0] """ + def infer_value(self, x, y): + if x is not None and y is not None: + x = x.asnumpy() + y = y.asnumpy() + out = np.minimum(x, y) + out = np.array(out, x.dtype) + return Tensor(out) + return None + class Maximum(_MathBinaryOp): """ Computes the element-wise maximum of input tensors. The inputs must be two tensors or one tensor and one scalar. - When the inputs are two tensors, the shapes of them could be broadcast, - and the data types of them should be same. - When the inputs are one tensor and one scalar, the scalar cannot be a parameter, only can be a constant, - and the type of the scalar is the same as the data type of the tensor. + When the inputs are two tensors, + both dtypes cannot be bool, and the shapes of them could be broadcast. + When the inputs are one tensor and one scalar, + the scalar only could be a constant. Inputs: - - **input_x** (Union[Tensor, Number]) - The first input is a tensor whose data type is number or a number. - - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as 'input_x' or - a number. + - **input_x** (Union[Tensor, Number, bool]) - The first input is a number or + a bool or a tensor whose data type is number or bool. + - **input_y** (Union[Tensor, Number, bool]) - The second input is a number or + a bool when the first input is a tensor or a tensor whose data type is number or bool. Outputs: - Tensor, the shape is same as the shape after broadcasting, and the data type is same as 'input_x'. + Tensor, the shape is same as the shape after broadcasting, + and the data type is the one with high precision or high digits among the two inputs. Examples: >>> input_x = Tensor(np.array([1.0, 5.0, 3.0]), mindspore.float32) @@ -1175,24 +1586,34 @@ class Maximum(_MathBinaryOp): [4.0, 5.0, 6.0] """ + def infer_value(self, x, y): + if x is not None and y is not None: + x = x.asnumpy() + y = y.asnumpy() + out = np.maximum(x, y) + out = np.array(out, x.dtype) + return Tensor(out) + return None class RealDiv(_MathBinaryOp): """ Divide the first input tensor by the second input tensor in floating-point type element-wise. The inputs must be two tensors or one tensor and one scalar. - When the inputs are two tensors, the shapes of them could be broadcast, - and the data types of them should be same. - When the inputs are one tensor and one scalar, the scalar cannot be a parameter, only can be a constant, - and the type of the scalar is the same as the data type of the tensor. + When the inputs are two tensors, + both dtypes cannot be bool, and the shapes of them could be broadcast. + When the inputs are one tensor and one scalar, + the scalar only could be a constant. Inputs: - - **input_x** (Union[Tensor, Number]) - The first input is a tensor whose data type is number or a number. - - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as 'input_x' or - a number. + - **input_x** (Union[Tensor, Number, bool]) - The first input is a number or + a bool or a tensor whose data type is number or bool. + - **input_y** (Union[Tensor, Number, bool]) - The second input is a number or + a bool when the first input is a tensor or a tensor whose data type is number or bool. Outputs: - Tensor, the shape is same as the shape after broadcasting, and the data type is same as 'input_x'. + Tensor, the shape is same as the shape after broadcasting, + and the data type is the one with high precision or high digits among the two inputs. Examples: >>> input_x = Tensor(np.array([1.0, 2.0, 3.0]), mindspore.float32) @@ -1217,18 +1638,20 @@ class Div(_MathBinaryOp): Computes the quotient of dividing the first input tensor by the second input tensor element-wise. The inputs must be two tensors or one tensor and one scalar. - When the inputs are two tensors, the shapes of them could be broadcast, - and the data types of them should be same. - When the inputs are one tensor and one scalar, the scalar cannot be a parameter, only can be a constant, - and the type of the scalar is the same as the data type of the tensor. + When the inputs are two tensors, + both dtypes cannot be bool, and the shapes of them could be broadcast. + When the inputs are one tensor and one scalar, + the scalar only could be a constant. Inputs: - - **input_x** (Union[Tensor, Number]) - The first input is a tensor whose data type is number or a number. - - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as 'input_x' or - a number. + - **input_x** (Union[Tensor, Number, bool]) - The first input is a number or + a bool or a tensor whose data type is number or bool. + - **input_y** (Union[Tensor, Number, bool]) - The second input is a number or + a bool when the first input is a tensor or a tensor whose data type is number or bool. Outputs: - Tensor, the shape is same as the shape after broadcasting, and the data type is same as 'input_x'. + Tensor, the shape is same as the shape after broadcasting, + and the data type is the one with high precision or high digits among the two inputs. Raises: ValueError: When `input_x` and `input_y` are not the same dtype. @@ -1248,23 +1671,67 @@ class Div(_MathBinaryOp): return None +class DivNoNan(_MathBinaryOp): + """ + Computes a safe divide which returns 0 if the y is zero. + + The inputs must be two tensors or one tensor and one scalar. + When the inputs are two tensors, + both dtypes cannot be bool, and the shapes of them could be broadcast. + When the inputs are one tensor and one scalar, + the scalar only could be a constant. + + Inputs: + - **input_x** (Union[Tensor, Number, bool]) - The first input is a number or + a bool or a tensor whose data type is number or bool. + - **input_y** (Union[Tensor, Number, bool]) - The second input is a number or + a bool when the first input is a tensor or a tensor whose data type is number or bool. + + Outputs: + Tensor, the shape is same as the shape after broadcasting, + and the data type is the one with high precision or high digits among the two inputs. + + Raises: + ValueError: When `input_x` and `input_y` are not the same dtype. + + Examples: + >>> input_x = Tensor(np.array([-1.0, 0., 1.0, 5.0, 6.0]), mindspore.float32) + >>> input_y = Tensor(np.array([0., 0., 0., 2.0, 3.0]), mindspore.float32) + >>> div_no_nan = P.DivNoNan() + >>> div_no_nan(input_x, input_y) + [0., 0., 0., 2.5, 2.0] + """ + + def infer_value(self, x, y): + if x is not None and y is not None: + x = x.asnumpy() + y = y.asnumpy() + with np.errstate(divide='ignore', invalid='ignore'): + out = np.true_divide(x, y) + out[~np.isfinite(out)] = 0 + return out + return None + + class FloorDiv(_MathBinaryOp): """ Divide the first input tensor by the second input tensor element-wise and rounds down to the closest integer. The inputs must be two tensors or one tensor and one scalar. - When the inputs are two tensors, the shapes of them could be broadcast, - and the data types of them should be same. - When the inputs are one tensor and one scalar, the scalar cannot be a parameter, only can be a constant, - and the type of the scalar is the same as the data type of the tensor. + When the inputs are two tensors, + both dtypes cannot be bool, and the shapes of them could be broadcast. + When the inputs are one tensor and one scalar, + the scalar only could be a constant. Inputs: - - **input_x** (Union[Tensor, Number]) - The first input is a tensor whose data type is number or a number. - - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as 'input_x' or - a number. + - **input_x** (Union[Tensor, Number, bool]) - The first input is a number or + a bool or a tensor whose data type is number or bool. + - **input_y** (Union[Tensor, Number, bool]) - The second input is a number or + a bool when the first input is a tensor or a tensor whose data type is number or bool. Outputs: - Tensor, the shape is same as the shape after broadcasting, and the data type is same as 'input_x'. + Tensor, the shape is same as the shape after broadcasting, + and the data type is the one with high precision or high digits among the two inputs. Examples: >>> input_x = Tensor(np.array([2, 4, -1]), mindspore.int32) @@ -1309,18 +1776,20 @@ class FloorMod(_MathBinaryOp): Compute element-wise remainder of division. The inputs must be two tensors or one tensor and one scalar. - When the inputs are two tensors, the shapes of them could be broadcast, - and the data types of them should be same. - When the inputs are one tensor and one scalar, the scalar cannot be a parameter, only can be a constant, - and the type of the scalar is the same as the data type of the tensor. + When the inputs are two tensors, + both dtypes cannot be bool, and the shapes of them could be broadcast. + When the inputs are one tensor and one scalar, + the scalar only could be a constant. Inputs: - - **input_x** (Union[Tensor, Number]) - The first input is a tensor whose data type is number or a number. - - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as 'input_x' or - a number. + - **input_x** (Union[Tensor, Number, bool]) - The first input is a number or + a bool or a tensor whose data type is number or bool. + - **input_y** (Union[Tensor, Number, bool]) - The second input is a number or + a bool when the first input is a tensor or a tensor whose data type is number or bool. Outputs: - Tensor, the shape is same as the shape after broadcasting, and the data type is same as 'input_x'. + Tensor, the shape is same as the shape after broadcasting, + and the data type is the one with high precision or high digits among the two inputs. Examples: >>> input_x = Tensor(np.array([2, 4, -1]), mindspore.int32) @@ -1331,6 +1800,35 @@ class FloorMod(_MathBinaryOp): """ +class Ceil(PrimitiveWithInfer): + """ + Round a tensor up to the closest integer element-wise. + + Inputs: + - **input_x** (Tensor) - The input tensor. Its element data type must be float. + + Outputs: + Tensor, has the same shape as `input_x`. + + Examples: + >>> input_x = Tensor(np.array([1.1, 2.5, -1.5]), mindspore.float32) + >>> ceil_op = P.Ceil() + >>> ceil_op(input_x) + [2.0, 3.0, -1.0] + """ + + @prim_attr_register + def __init__(self): + self.init_prim_io_names(inputs=['x'], outputs=['y']) + + def infer_shape(self, x_shape): + return x_shape + + def infer_dtype(self, x_dtype): + validator.check_tensor_type_same({"x": x_dtype}, mstype.float_type, self.name) + return x_dtype + + class Acosh(PrimitiveWithInfer): """ Compute inverse hyperbolic cosine of x element-wise. @@ -1359,6 +1857,35 @@ class Acosh(PrimitiveWithInfer): return x_dtype +class Cosh(PrimitiveWithInfer): + """ + Computes hyperbolic cosine of input element-wise. + + Inputs: + - **input_x** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`. + + Outputs: + Tensor, has the same shape as `input_x`. + + Examples: + >>> cosh = P.Cosh() + >>> input_x = Tensor(np.array([0.24, 0.83, 0.31, 0.09]), mindspore.float32) + >>> output = cosh(input_x) + [1.0289385 1.364684 1.048436 1.4228927] + """ + + @prim_attr_register + def __init__(self): + """init Cosh""" + + def infer_shape(self, x_shape): + return x_shape + + def infer_dtype(self, x_dtype): + validator.check_tensor_type_same({'x': x_dtype}, mstype.number_type, self.name) + return x_dtype + + class Asinh(PrimitiveWithInfer): """ Compute inverse hyperbolic cosine of x element-wise. @@ -1376,7 +1903,6 @@ class Asinh(PrimitiveWithInfer): [-2.3212, 1.1976, 1.8184, 5.2983] """ - @prim_attr_register def __init__(self): """init Asinh""" @@ -1389,6 +1915,35 @@ class Asinh(PrimitiveWithInfer): return x_dtype +class Sinh(PrimitiveWithInfer): + """ + Computes hyperbolic sine of input element-wise. + + Inputs: + - **input_x** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`. + + Outputs: + Tensor, has the same shape as `input_x`. + + Examples: + >>> sinh = P.Sinh() + >>> input_x = Tensor(np.array([0.62, 0.28, 0.43, 0.62]), mindspore.float32) + >>> output = sinh(input_x) + [0.6604918 0.28367308 0.44337422 0.6604918] + """ + + @prim_attr_register + def __init__(self): + """init Sinh""" + + def infer_shape(self, x_shape): + return x_shape + + def infer_dtype(self, x_dtype): + validator.check_tensor_type_same({'x': x_dtype}, mstype.number_type, self.name) + return x_dtype + + class _LogicBinaryOp(_BinaryOp): """ Define logic binary operators. @@ -1409,19 +1964,17 @@ class Equal(_LogicBinaryOp): Computes the equivalence between two tensors element-wise. The inputs must be two tensors or one tensor and one scalar. - When the inputs are two tensors, the shapes of them could be broadcast, - and the data types of them should be same. - When the inputs are one tensor and one scalar, the scalar cannot be a parameter, only can be a constant, - and the type of the scalar is the same as the data type of the tensor. + When the inputs are two tensors, the shapes of them could be broadcast. + When the inputs are one tensor and one scalar, the scalar only could be a constant. Inputs: - - **input_x** (Union[Tensor, Number, bool]) - The first input is a tensor whose data type is number or bool, or - a number or a bool object. - - **input_y** (Union[Tensor, Number, bool]) - The second input tensor whose data type is same as 'input_x' or - a number or a bool object. + - **input_x** (Union[Tensor, Number, bool]) - The first input is a number or + a bool or a tensor whose data type is number or bool. + - **input_y** (Union[Tensor, Number, bool]) - The second input is a number or + a bool when the first input is a tensor or a tensor whose data type is number or bool. Outputs: - Tensor, the shape is same as the shape after broadcasting, and the data type is bool. + Tensor, the shape is same as the shape after broadcasting,and the data type is bool. Examples: >>> input_x = Tensor(np.array([1, 2, 3]), mindspore.float32) @@ -1440,6 +1993,44 @@ class Equal(_LogicBinaryOp): return _LogicBinaryOp.do_infer_dtype(x_dtype, y_dtype, mstype.number_type + (mstype.bool_,), self.name) +class ApproximateEqual(_LogicBinaryOp): + """ + Returns the truth value of abs(x1-x2) < tolerance element-wise. + + Args: + tolerance (float): The maximum deviation that two elements can be considered equal. Default: 1e-05. + + Inputs: + - **x1** (Tensor) - A tensor. Must be one of the following types: float32, float16. + - **x2** (Tensor) - A tensor of the same type and shape as 'x1'. + + Outputs: + Tensor, the shape is same as the shape of 'x1', and the data type is bool. + + Examples: + >>> x1 = Tensor(np.array([1, 2, 3]), mindspore.float32) + >>> x2 = Tensor(np.array([2, 4, 6]), mindspore.float32) + >>> approximate_equal = P.ApproximateEqual(2.) + >>> result = approximate_equal(x1, x2) + [True True False] + """ + + @prim_attr_register + def __init__(self, tolerance=1e-05): + """Init ApproximateEqual""" + validator.check_value_type("tolerance", tolerance, [float], self.name) + + def infer_shape(self, x_shape, y_shape): + validator.check("x_shape", x_shape, "y_shape", y_shape, Rel.EQ, self.name) + return x_shape + + def infer_dtype(self, x_dtype, y_dtype): + args_dtype = {"x": x_dtype, "y": y_dtype} + valid_type = [mstype.float32, mstype.float16] + validator.check_tensor_type_same(args_dtype, valid_type, prim_name=self.name) + return mstype.tensor_type(mstype.bool_) + + class EqualCount(PrimitiveWithInfer): """ Computes the number of the same elements of two tensors. @@ -1482,19 +2073,17 @@ class NotEqual(_LogicBinaryOp): Computes the non-equivalence of two tensors element-wise. The inputs must be two tensors or one tensor and one scalar. - When the inputs are two tensors, the shapes of them could be broadcast, - and the data types of them should be same. - When the inputs are one tensor and one scalar, the scalar cannot be a parameter, only can be a constant, - and the type of the scalar is the same as the data type of the tensor. + When the inputs are two tensors, the shapes of them could be broadcast. + When the inputs are one tensor and one scalar, the scalar only could be a constant. Inputs: - - **input_x** (Union[Tensor, Number, bool]) - The first input is a tensor whose data type is number or bool, or - a number or a bool object. - - **input_y** (Union[Tensor, Number, bool]) - The second input tensor whose data type is same as `input_x` or - a number or a bool object. + - **input_x** (Union[Tensor, Number, bool]) - The first input is a number or + a bool or a tensor whose data type is number or bool. + - **input_y** (Union[Tensor, Number, bool]) - The second input is a number or + a bool when the first input is a tensor or a tensor whose data type is number or bool. Outputs: - Tensor, the shape is same as the shape after broadcasting, and the data type is bool. + Tensor, the shape is same as the shape after broadcasting,and the data type is bool. Examples: >>> input_x = Tensor(np.array([1, 2, 3]), mindspore.float32) @@ -1518,18 +2107,19 @@ class Greater(_LogicBinaryOp): Computes the boolean value of :math:`x > y` element-wise. The inputs must be two tensors or one tensor and one scalar. - When the inputs are two tensors, the shapes of them could be broadcast, - and the data types of them should be same. - When the inputs are one tensor and one scalar, the scalar cannot be a parameter, only can be a constant, - and the type of the scalar is the same as the data type of the tensor. + When the inputs are two tensors, + both dtypes cannot be bool, and the shapes of them could be broadcast. + When the inputs are one tensor and one scalar, + the scalar only could be a constant. Inputs: - - **input_x** (Union[Tensor, Number]) - The first input is a tensor whose data type is number or a number. - - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as `input_x` or - a number. + - **input_x** (Union[Tensor, Number, bool]) - The first input is a number or + a bool or a tensor whose data type is number or bool. + - **input_y** (Union[Tensor, Number, bool]) - The second input is a number or + a bool when the first input is a tensor or a tensor whose data type is number or bool. Outputs: - Tensor, the shape is same as the shape after broadcasting, and the data type is bool. + Tensor, the shape is same as the shape after broadcasting,and the data type is bool. Examples: >>> input_x = Tensor(np.array([1, 2, 3]), mindspore.int32) @@ -1538,6 +2128,13 @@ class Greater(_LogicBinaryOp): >>> greater(input_x, input_y) [False, True, False] """ + def infer_value(self, x, y): + if x is not None and y is not None: + x = x.asnumpy() + y = y.asnumpy() + out = np.array(np.greater(x, y)) + return Tensor(out) + return None class GreaterEqual(_LogicBinaryOp): @@ -1545,18 +2142,19 @@ class GreaterEqual(_LogicBinaryOp): Computes the boolean value of :math:`x >= y` element-wise. The inputs must be two tensors or one tensor and one scalar. - When the inputs are two tensors, the shapes of them could be broadcast, - and the data types of them should be same. - When the inputs are one tensor and one scalar, the scalar cannot be a parameter, only can be a constant, - and the type of the scalar is the same as the data type of the tensor. + When the inputs are two tensors, + both dtypes cannot be bool, and the shapes of them could be broadcast. + When the inputs are one tensor and one scalar, + the scalar only could be a constant. Inputs: - - **input_x** (Union[Tensor, Number]) - The first input is a tensor whose data type is number or a number. - - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as `input_x` or - a number. + - **input_x** (Union[Tensor, Number, bool]) - The first input is a number or + a bool or a tensor whose data type is number or bool. + - **input_y** (Union[Tensor, Number, bool]) - The second input is a number or + a bool when the first input is a tensor or a tensor whose data type is number or bool. Outputs: - Tensor, the shape is same as the shape after broadcasting, and the data type is bool. + Tensor, the shape is same as the shape after broadcasting,and the data type is bool. Examples: >>> input_x = Tensor(np.array([1, 2, 3]), mindspore.int32) @@ -1565,6 +2163,13 @@ class GreaterEqual(_LogicBinaryOp): >>> greater_equal(input_x, input_y) [True, True, False] """ + def infer_value(self, x, y): + if x is not None and y is not None: + x = x.asnumpy() + y = y.asnumpy() + out = np.array(np.greater_equal(x, y)) + return Tensor(out) + return None class Less(_LogicBinaryOp): @@ -1572,18 +2177,19 @@ class Less(_LogicBinaryOp): Computes the boolean value of :math:`x < y` element-wise. The inputs must be two tensors or one tensor and one scalar. - When the inputs are two tensors, the shapes of them could be broadcast, - and the data types of them should be same. - When the inputs are one tensor and one scalar, the scalar cannot be a parameter, only can be a constant, - and the type of the scalar is the same as the data type of the tensor. + When the inputs are two tensors, + both dtypes cannot be bool, and the shapes of them could be broadcast. + When the inputs are one tensor and one scalar, + the scalar only could be a constant. Inputs: - - **input_x** (Union[Tensor, Number]) - The first input is a tensor whose data type is number or a number. - - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as `input_x` or - a number. + - **input_x** (Union[Tensor, Number, bool]) - The first input is a number or + a bool or a tensor whose data type is number or bool. + - **input_y** (Union[Tensor, Number, bool]) - The second input is a number or + a bool when the first input is a tensor or a tensor whose data type is number or bool. Outputs: - Tensor, the shape is same as the shape after broadcasting, and the data type is bool. + Tensor, the shape is same as the shape after broadcasting,and the data type is bool. Examples: >>> input_x = Tensor(np.array([1, 2, 3]), mindspore.int32) @@ -1592,6 +2198,13 @@ class Less(_LogicBinaryOp): >>> less(input_x, input_y) [False, False, True] """ + def infer_value(self, x, y): + if x is not None and y is not None: + x = x.asnumpy() + y = y.asnumpy() + out = np.array(np.less(x, y)) + return Tensor(out) + return None class LessEqual(_LogicBinaryOp): @@ -1599,18 +2212,19 @@ class LessEqual(_LogicBinaryOp): Computes the boolean value of :math:`x <= y` element-wise. The inputs must be two tensors or one tensor and one scalar. - When the inputs are two tensors, the shapes of them could be broadcast, - and the data types of them should be same. - When the inputs are one tensor and one scalar, the scalar cannot be a parameter, only can be a constant, - and the type of the scalar is the same as the data type of the tensor. + When the inputs are two tensors, + both dtypes cannot be bool, and the shapes of them could be broadcast. + When the inputs are one tensor and one scalar, + the scalar only could be a constant. Inputs: - - **input_x** (Union[Tensor, Number]) - The first input is a tensor whose data type is number or a number. - - **input_y** (Union[Tensor, Number]) - The second input is a tensor whose data type is same as `input_x` or - a number. + - **input_x** (Union[Tensor, Number, bool]) - The first input is a number or + a bool or a tensor whose data type is number or bool. + - **input_y** (Union[Tensor, Number, bool]) - The second input is a number or + a bool when the first input is a tensor or a tensor whose data type is number or bool. Outputs: - Tensor, the shape is same as the shape after broadcasting, and the data type is bool. + Tensor, the shape is same as the shape after broadcasting,and the data type is bool. Examples: >>> input_x = Tensor(np.array([1, 2, 3]), mindspore.int32) @@ -1619,6 +2233,13 @@ class LessEqual(_LogicBinaryOp): >>> less_equal(input_x, input_y) [True, False, True] """ + def infer_value(self, x, y): + if x is not None and y is not None: + x = x.asnumpy() + y = y.asnumpy() + out = np.array(np.less_equal(x, y)) + return Tensor(out) + return None class LogicalNot(PrimitiveWithInfer): @@ -1655,15 +2276,16 @@ class LogicalAnd(_LogicBinaryOp): """ Computes the "logical AND" of two tensors element-wise. - The inputs must be two tensors or one tensor and one bool object. + The inputs must be two tensors or one tensor and one bool. When the inputs are two tensors, the shapes of them could be broadcast, and the data types of them should be bool. - When the inputs are one tensor and one bool object, the bool object cannot be a parameter, only can be a constant, + When the inputs are one tensor and one bool, the bool object only could be a constant, and the data type of the tensor should be bool. Inputs: - - **input_x** (Union[Tensor, bool]) - The first input is a tensor whose data type is bool or a bool object. - - **input_y** (Union[Tensor, bool]) - The second input is a tensor whose data type is bool or a bool object. + - **input_x** (Union[Tensor, bool]) - The first input is a bool or a tensor whose data type is bool. + - **input_y** (Union[Tensor, bool]) - The second input is a bool when the first input is a tensor or + a tensor whose data type is bool. Outputs: Tensor, the shape is same as the shape after broadcasting, and the data type is bool. @@ -1684,18 +2306,19 @@ class LogicalOr(_LogicBinaryOp): """ Computes the "logical OR" of two tensors element-wise. - The inputs must be two tensors or one tensor and one bool object. + The inputs must be two tensors or one tensor and one bool. When the inputs are two tensors, the shapes of them could be broadcast, and the data types of them should be bool. - When the inputs are one tensor and one bool object, the bool object cannot be a parameter, only can be a constant, + When the inputs are one tensor and one bool, the bool object only could be a constant, and the data type of the tensor should be bool. Inputs: - - **input_x** (Union[Tensor, bool]) - The first input is a tensor whose data type is bool or a bool object. - - **input_y** (Union[Tensor, bool]) - The second input is a tensor whose data type is bool or a bool object. + - **input_x** (Union[Tensor, bool]) - The first input is a bool or a tensor whose data type is bool. + - **input_y** (Union[Tensor, bool]) - The second input is a bool when the first input is a tensor or + a tensor whose data type is bool. Outputs: - Tensor, the shape is same as the shape after broadcasting, and the data type is bool. + Tensor, the shape is same as the shape after broadcasting,and the data type is bool. Examples: >>> input_x = Tensor(np.array([True, False, True]), mindspore.bool_) @@ -2097,8 +2720,8 @@ class NMSWithMask(PrimitiveWithInfer): def infer_shape(self, bboxes_shape): cls_name = self.name validator.check_integer("bboxes rank", len(bboxes_shape), 2, Rel.EQ, cls_name) - validator.check_integer("bboxes.shape()[0]", bboxes_shape[0], 0, Rel.GT, cls_name) - validator.check_integer("bboxes.shape()[1]", bboxes_shape[1], 5, Rel.EQ, cls_name) + validator.check_integer("bboxes.shape[0]", bboxes_shape[0], 0, Rel.GT, cls_name) + validator.check_integer("bboxes.shape[1]", bboxes_shape[1], 5, Rel.EQ, cls_name) num = bboxes_shape[0] return (bboxes_shape, (num,), (num,)) @@ -2127,6 +2750,7 @@ class Abs(PrimitiveWithInfer): @prim_attr_register def __init__(self): """init Abs""" + self.init_prim_io_names(inputs=['input_x'], outputs=['output']) def infer_shape(self, x_shape): return x_shape @@ -2138,7 +2762,7 @@ class Abs(PrimitiveWithInfer): def infer_value(self, x): if x is not None: x = x.asnumpy() - out = np.abs(x, dtype=x.dtype) + out = np.array(np.abs(x, dtype=x.dtype)) return Tensor(out) return None @@ -2197,7 +2821,8 @@ class Round(PrimitiveWithInfer): @prim_attr_register def __init__(self): - pass + """init Round""" + self.init_prim_io_names(inputs=['input_x'], outputs=['output']) def infer_shape(self, x_shape): return x_shape @@ -2279,7 +2904,7 @@ class Atan2(_MathBinaryOp): - **input_y** (Tensor) - The input tensor. Outputs: - Tensor, the shape is same as the shape after broadcasting, and the data type is same as `input_x`. + Tensor, the shape is same as the shape after broadcasting,and the data type is same as `input_x`. Examples: >>> input_x = Tensor(np.array([[0, 1]]), mindspore.float32) @@ -2289,7 +2914,6 @@ class Atan2(_MathBinaryOp): [[0. 0.7853982]] """ - class SquareSumAll(PrimitiveWithInfer): """ Returns square sum all of a tensor element-wise @@ -2315,6 +2939,7 @@ class SquareSumAll(PrimitiveWithInfer): @prim_attr_register def __init__(self): """init SquareSumAll""" + def infer_shape(self, x_shape, y_shape): validator.check("x1_shape", x_shape, "x2_shape", y_shape, Rel.EQ, self.name) return [], [] @@ -2441,3 +3066,101 @@ class BesselI1e(PrimitiveWithInfer): def infer_dtype(self, x): validator.check_tensor_type_same({'x': x}, mstype.number_type, self.name) return x + + +class Inv(PrimitiveWithInfer): + """ + Computes Inv(Reciprocal) of input tensor element-wise. + + Inputs: + - **input_x** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`. + Must be one of the following types: float16, float32, int32. + + Outputs: + Tensor, has the same shape and data type as `input_x`. + + Examples: + >>> inv = P.Inv() + >>> input_x = Tensor(np.array([0.25, 0.4, 0.31, 0.52]), mindspore.float32) + >>> output = inv(input_x) + [4., 2.5, 3.2258065, 1.923077] + """ + + @prim_attr_register + def __init__(self): + pass + + def infer_shape(self, x_shape): + return x_shape + + def infer_dtype(self, x_dtype): + validator.check_tensor_type_same({'x_dtype': x_dtype}, [mstype.float16, mstype.float32, + mstype.int32], self.name) + return x_dtype + + +class Invert(PrimitiveWithInfer): + """ + Flips all bits of input tensor element-wise. + + Inputs: + - **input_x** (Tensor[int16], Tensor[uint16]) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`. + + Outputs: + Tensor, has the same shape as `input_x`. + + Examples: + >>> invert = P.Invert() + >>> input_x = Tensor(np.array([25, 4, 13, 9]), mindspore.int16) + >>> output = invert(input_x) + [-26, -5, -14, -10] + """ + + @prim_attr_register + def __init__(self): + pass + + def infer_shape(self, x_shape): + return x_shape + + def infer_dtype(self, x_dtype): + validator.check_tensor_type_same({'x_dtype': x_dtype}, [mstype.int16, mstype.uint16], self.name) + return x_dtype + + +class Eps(PrimitiveWithInfer): + """ + Creates a tensor filled with `input_x` dtype minimum val. + + Inputs: + - **input_x** (Tensor) - Input tensor. + + Outputs: + Tensor, has the same type and shape as `input_x`, but filled with `input_x` dtype minimum val. + + Examples: + >>> out = P.Eps()(input_x) + """ + + @prim_attr_register + def __init__(self): + """init Eps""" + self.init_prim_io_names(inputs=['input_x'], outputs=['y']) + + def __infer__(self, input_x): + valid_types = [mstype.float16, mstype.float32] + validator.check_tensor_type_same({'input_x': input_x['dtype']}, valid_types, self.name) + + x_nptype = mstype.dtype_to_nptype(input_x['dtype'].element_type()) + if x_nptype == np.float16: + min_val = 2 ** (-14) + else: + min_val = 2 ** (-16) + + res = np.full(input_x['shape'], min_val, x_nptype) + out = { + 'value': Tensor(res), + 'shape': input_x['shape'], + 'dtype': input_x['dtype'], + } + return out diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py index 027a9e9525..ce8536c001 100644 --- a/mindspore/ops/operations/nn_ops.py +++ b/mindspore/ops/operations/nn_ops.py @@ -78,7 +78,7 @@ class Flatten(PrimitiveWithInfer): >>> input_tensor = Tensor(np.ones(shape=[1, 2, 3, 4]), mindspore.float32) >>> flatten = P.Flatten() >>> output = flatten(input_tensor) - >>> assert output.shape() == (1, 24) + >>> assert output.shape == (1, 24) """ @prim_attr_register @@ -585,6 +585,50 @@ class FusedBatchNorm(Primitive): self.momentum = validator.check_number_range('momentum', momentum, 0, 1, Rel.INC_BOTH, self.name) +class BNTrainingReduce(PrimitiveWithInfer): + """ + reduce sum at axis [0, 2, 3]. + + Inputs: + - **x** (Tensor) - Tensor of shape :math:`(N, C)`. + + Outputs: + - **sum** (Tensor) - Tensor of shape :math:`(C,)`. + - **square_sum** (Tensor) - Tensor of shape :math:`(C,)`. + + """ + + @prim_attr_register + def __init__(self): + self.init_prim_io_names(inputs=['x'], outputs=['sum', 'square_sum']) + + def infer_shape(self, x_shape): + validator.check_integer("x rank", len(x_shape), 4, Rel.EQ, self.name) + return ([x_shape[1]], [x_shape[1]]) + + def infer_dtype(self, x_type): + return (x_type, x_type) + + +class BNTrainingUpdate(PrimitiveWithInfer): + """ + primitive operator of bn_training_update's register and info descriptor + """ + @prim_attr_register + def __init__(self, isRef=True, epsilon=1e-5, factor=0.1): + self.init_prim_io_names(inputs=['x', 'sum', 'square_sum', 'scale', 'b', 'mean', 'variance'], + outputs=['y', 'running_mean', 'running_variance', 'save_mean', 'save_inv_variance']) + #self.isRef = validator.check_integer('isRef', isRef, [0, 1], Rel.IN) + self.epsilon = validator.check_number_range('epsilon', epsilon, 0, 1, Rel.INC_RIGHT, 'BNTrainingUpdate') + self.factor = validator.check_number_range('factor', factor, 0, 1, Rel.INC_BOTH, 'BNTrainingUpdate') + + def infer_shape(self, x, sum, square_sum, scale, b, mean, variance): + return (x, variance, variance, variance, variance) + + def infer_dtype(self, x, sum, square_sum, scale, b, mean, variance): + return (x, variance, variance, variance, variance) + + class BatchNorm(PrimitiveWithInfer): r""" Batch Normalization for input data and updated parameters. @@ -629,7 +673,7 @@ class BatchNorm(PrimitiveWithInfer): >>> mean = Tensor(np.ones([64]), mindspore.float32) >>> variance = Tensor(np.ones([64]), mindspore.float32) >>> batch_norm = P.BatchNorm() - >>> output = batch_norm(input_x, scale, bias, mean, variance + >>> output = batch_norm(input_x, scale, bias, mean, variance) """ @prim_attr_register @@ -756,7 +800,7 @@ class Conv2D(PrimitiveWithInfer): def infer_shape(self, x_shape, w_shape): validator.check_integer("weight rank", len(w_shape), 4, Rel.EQ, self.name) validator.check_integer("x rank", len(x_shape), 4, Rel.EQ, self.name) - validator.check("x_shape[1] / group", x_shape[1] // self.group, "w_shape[1]", w_shape[1], Rel.EQ, self.name) + validator.check(f"x_shape[1] / group", x_shape[1] // self.group, "w_shape[1]", w_shape[1], Rel.EQ, self.name) validator.check('out_channel', self.out_channel, 'w_shape[0]', w_shape[0], Rel.EQ, self.name) validator.check('kernel_size', self.kernel_size, 'w_shape[2:4]', tuple(w_shape[2:4]), Rel.EQ, self.name) @@ -786,9 +830,9 @@ class Conv2D(PrimitiveWithInfer): pad_top, pad_bottom, pad_left, pad_right = self.pad, self.pad, self.pad, self.pad h_out = 1 + (x_shape[2] + 2 * self.pad - kernel_size_h - (kernel_size_h - 1) * (dilation_h - 1)) \ - / stride_h + / stride_h w_out = 1 + (x_shape[3] + 2 * self.pad - kernel_size_w - (kernel_size_w - 1) * (dilation_w - 1)) \ - / stride_w + / stride_w h_out = math.floor(h_out) w_out = math.floor(w_out) @@ -802,6 +846,8 @@ class Conv2D(PrimitiveWithInfer): args = {'x': x_dtype, 'w': w_dtype} valid_types = [mstype.int8, mstype.int32, mstype.float16, mstype.float32] validator.check_tensor_type_same(args, valid_types, self.name) + if x_dtype.element_type() == mstype.int8: + return mstype.tensor_type(mstype.int32) return x_dtype @@ -840,7 +886,7 @@ class DepthwiseConv2dNative(PrimitiveWithInfer): >>> weight = Tensor(np.ones([1, 32, 3, 3]), mindspore.float32) >>> depthwise_conv2d = P.DepthwiseConv2dNative(channel_multiplier = 3, kernel_size = (3, 3)) >>> output = depthwise_conv2d(input, weight) - >>> assert output.shape() == (10, 96, 30, 30) + >>> assert output.shape == (10, 96, 30, 30) """ @prim_attr_register @@ -907,9 +953,9 @@ class DepthwiseConv2dNative(PrimitiveWithInfer): pad_top, pad_bottom, pad_left, pad_right = self.pad, self.pad, self.pad, self.pad h_out = 1 + (x_shape[2] + 2 * self.pad - kernel_size_h - (kernel_size_h - 1) * (dilation_h - 1)) \ - / stride_h + / stride_h w_out = 1 + (x_shape[3] + 2 * self.pad - kernel_size_w - (kernel_size_w - 1) * (dilation_w - 1)) \ - / stride_w + / stride_w h_out = math.floor(h_out) w_out = math.floor(w_out) @@ -1498,17 +1544,20 @@ class ApplyMomentum(PrimitiveWithInfer): ('accumulation', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), ('learning_rate', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, - sig_dtype.T), + sig_dtype.T1), ('gradient', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), - ('momentum', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T) + ('momentum', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T2) ) @prim_attr_register def __init__(self, use_nesterov=False, use_locking=False, gradient_scale=1.0): self.init_prim_io_names(inputs=['variable', 'accumulation', 'learning_rate', 'gradient', 'momentum'], outputs=['output']) + self.is_tbe = context.get_context("device_target") == "Ascend" def infer_shape(self, v_shape, a_shape, l_shape, g_shape, m_shape): + if self.is_tbe: + return v_shape, v_shape return v_shape def infer_dtype(self, v_dtype, a_dtype, l_dtype, g_dtype, m_dtype): @@ -1519,6 +1568,8 @@ class ApplyMomentum(PrimitiveWithInfer): validator.check_scalar_or_tensor_type_same({"l_dtype": l_dtype}, valid_types, self.name) validator.check_scalar_or_tensor_type_same({"g_dtype": g_dtype}, valid_types, self.name) validator.check_scalar_or_tensor_type_same({"m_dtype": m_dtype}, valid_types, self.name) + if self.is_tbe: + return g_dtype, g_dtype return g_dtype @@ -1608,6 +1659,44 @@ class L2Loss(PrimitiveWithInfer): return x_type +class DataFormatDimMap(PrimitiveWithInfer): + """ + Returns the dimension index in the destination data format given the one in the source data format. + + Args: + src_format (string): An optional value for source data format. Default: 'NHWC'. + dst_format (string): An optional value for destination data format. Default: 'NCHW'. + + Inputs: + - **input_x** (Tensor) - A Tensor with each element as a dimension index in source data format. + Must be in the range [-4, 4). It's type is int32. + + Outputs: + Tensor, has the same type as the `input_x`. + + Examples: + >>> x = Tensor([0, 1, 2, 3], mindspore.int32) + >>> dfdm = P.DataFormatDimMap() + >>> dfdm(x) + [0 3 1 2] + """ + + @prim_attr_register + def __init__(self, src_format='NHWC', dst_format='NCHW'): + valid_values = ['NHWC', 'NCHW'] + self.src_format = validator.check_string("src_format", src_format, valid_values, self.name) + self.dst_format = validator.check_string("dst_format", dst_format, valid_values, self.name) + self.init_prim_io_names(inputs=['input_x'], outputs=['output']) + + def infer_shape(self, x_shape): + return x_shape + + def infer_dtype(self, x_type): + validator.check_subclass("x", x_type, mstype.tensor, self.name) + valid_types = [mstype.int32] + validator.check_tensor_type_same({"x": x_type}, valid_types, self.name) + return x_type + class RNNTLoss(PrimitiveWithInfer): """ Computes the RNNTLoss and its gradient with respect to the softmax outputs. @@ -1762,9 +1851,9 @@ class ApplyRMSProp(PrimitiveWithInfer): - **moment** (Tensor) - Delta of `var`, must have the same type as `var`. - **learning_rate** (Union[Number, Tensor]) - Learning rate. - **grad** (Tensor) - Gradients, must have the same type as `var`. - - **decay** (float) - Decay rate. - - **momentum** (float) - Momentum. - - **epsilon** (float) - Ridge term. + - **decay** (float) - Decay rate. Only constant value is allowed. + - **momentum** (float) - Momentum. Only constant value is allowed. + - **epsilon** (float) - Ridge term. Only constant value is allowed. Outputs: Tensor, parameters to be update. @@ -1814,6 +1903,10 @@ class ApplyRMSProp(PrimitiveWithInfer): return var_dtype, var_dtype, var_dtype return var_dtype + def infer_value(self, var, mean_square, moment, learning_rate, grad, decay, momentum, epsilon): + if decay is None or momentum is None or epsilon is None: + raise ValueError(f"For {self.name}, decay, momentum, epsilon must be const.") + class ApplyCenteredRMSProp(PrimitiveWithInfer): """ @@ -1862,18 +1955,23 @@ class ApplyCenteredRMSProp(PrimitiveWithInfer): Examples: >>> centered_rms_prop = P.ApplyCenteredRMSProp() - >>> input_x = Tensor(1., mindspore.float32) - >>> mean_grad = Tensor(2., mindspore.float32) - >>> mean_square = Tensor(1., mindspore.float32) - >>> moment = Tensor(2., mindspore.float32) - >>> grad = Tensor(1., mindspore.float32) + >>> input_x = Tensor(np.arange(-6, 6).astype(np.float32).reshape(2, 3, 2), mindspore.float32) + >>> mean_grad = Tensor(np.arange(12).astype(np.float32).reshape(2, 3, 2), mindspore.float32) + >>> mean_square = Tensor(np.arange(-8, 4).astype(np.float32).reshape(2, 3, 2), mindspore.float32) + >>> moment = Tensor(np.arange(12).astype(np.float32).reshape(2, 3, 2), mindspore.float32) + >>> grad = Tensor(np.arange(12).astype(np.float32).rehspae(2, 3, 2), mindspore.float32) >>> learning_rate = Tensor(0.9, mindspore.float32) >>> decay = 0.0 >>> momentum = 1e-10 - >>> epsilon = 0.001 + >>> epsilon = 0.05 >>> result = centered_rms_prop(input_x, mean_grad, mean_square, moment, grad, >>> learning_rate, decay, momentum, epsilon) - -27.460497 + [[[ -6. -9.024922] + [-12.049845 -15.074766] + [-18.09969 -21.124613]] + [[-24.149532 -27.174456] + [-30.199379 -33.2243 ] + [-36.249226 -39.274143]]] """ @prim_attr_register @@ -1910,7 +2008,7 @@ class LayerNorm(Primitive): `Layer Normalization `_. .. math:: - y = \frac{x - mean]}{\sqrt{variance + \epsilon}} * \gamma + \beta + y = \frac{x - mean}{\sqrt{variance + \epsilon}} * \gamma + \beta where :math:`\gamma` is scale, :math:`\beta` is bias, :math:`\epsilon` is epsilon. @@ -2059,7 +2157,7 @@ class DropoutDoMask(PrimitiveWithInfer): >>> dropout_do_mask = P.DropoutDoMask() >>> mask = dropout_gen_mask(shape, keep_prob) >>> output = dropout_do_mask(x, mask, keep_prob) - >>> assert output.shape() == (20, 16, 50) + >>> assert output.shape == (20, 16, 50) """ @prim_attr_register @@ -2113,10 +2211,10 @@ class ResizeBilinear(PrimitiveWithInfer): Tensor, resized image. Tensor of shape `(N_i, ..., N_n, new_height, new_width)` in `float32`. Examples: - >>> tensor = Tensor([[[[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]]]], mindspore.int32) + >>> tensor = Tensor([[[[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]]]], mindspore.float32) >>> resize_bilinear = P.ResizeBilinear((5, 5)) >>> result = resize_bilinear(tensor) - >>> assert result.shape() == (5, 5) + >>> assert result.shape == (1, 1, 5, 5) """ @prim_attr_register @@ -2132,6 +2230,7 @@ class ResizeBilinear(PrimitiveWithInfer): return out_shape def infer_dtype(self, input_dtype): + validator.check_tensor_type_same({'input_dtype': input_dtype}, [mstype.float16, mstype.float32], self.name) return mstype.tensor_type(mstype.float32) @@ -2701,9 +2800,25 @@ class Adam(PrimitiveWithInfer): - **v** (Tensor) - The same shape and data type as `v`. Examples: - Please refer to the usage in nn.Adam. + >>> import numpy as np + >>> import mindspore.nn as nn + >>> from mindspore import Tensor, Parameter + >>> from mindspore.ops import operations as P + >>> class Net(nn.Cell): + >>> def __init__(self): + >>> super(Net, self).__init__() + >>> self.apply_adam = P.Adam() + >>> self.var = Parameter(Tensor(np.ones([3, 3, 3]).astype(np.float32)), name="var") + >>> self.m = Parameter(Tensor(np.ones([3, 3, 3]).astype(np.float32)), name="m") + >>> self.v = Parameter(Tensor(np.ones([3, 3, 3]).astype(np.float32)), name="v") + >>> def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): + >>> out = self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, + >>> epsilon, grad) + >>> return out + >>> net = Net() + >>> gradient = Tensor(np.random.rand(3, 3, 3).astype(np.float32)) + >>> result = net(0.9, 0.999, 0.001, 0.9, 0.999, 1e-8, gradient) """ - @prim_attr_register def __init__(self, use_locking=False, use_nesterov=False): validator.check_value_type("use_locking", use_locking, [bool], self.name) @@ -2727,6 +2842,274 @@ class Adam(PrimitiveWithInfer): return var_dtype, m_dtype, v_dtype +class SparseApplyAdam(PrimitiveWithInfer): + r""" + Merge the duplicate value of the gradient and then updates parameters by Adaptive Moment Estimation (Adam) + algorithm. This operator is used when the gradient is sparse. + + The Adam algorithm is proposed in `Adam: A Method for Stochastic Optimization `_. + + The updating formulas are as follows, + + .. math:: + \begin{array}{ll} \\ + m = \beta_1 * m + (1 - \beta_1) * g \\ + v = \beta_2 * v + (1 - \beta_2) * g * g \\ + l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\ + w = w - l * \frac{m}{\sqrt{v} + \epsilon} + \end{array} + + :math:`m` represents the 1st moment vector, :math:`v` represents the 2nd moment vector, :math:`g` represents + `gradient`, :math:`l` represents scaling factor `lr`, :math:`\beta_1, \beta_2` represent `beta1` and `beta2`, + :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent `beta1_power` and + `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `var`, :math:`\epsilon` represents + `epsilon`. + + Args: + use_locking (bool): Whether to enable a lock to protect updating variable tensors. + If True, updating of the var, m, and v tensors will be protected by a lock. + If False, the result is unpredictable. Default: False. + use_nesterov (bool): Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients. + If True, updates the gradients using NAG. + If False, updates the gradients without using NAG. Default: False. + + Inputs: + - **var** (Parameter) - Parameters to be updated. With float32 data type. + - **m** (Parameter) - The 1st moment vector in the updating formula. Has the same type as `var`. With + float32 data type. + - **v** (Parameter) - The 2nd moment vector in the updating formula. Mean square gradients, + has the same type as `var`. With float32 data type. + - **beta1_power** (Tensor) - :math:`beta_1^t` in the updating formula. With float32 data type. + - **beta2_power** (Tensor) - :math:`beta_2^t` in the updating formula. With float32 data type. + - **lr** (Tensor) - :math:`l` in the updating formula. With float32 data type. + - **beta1** (Tensor) - The exponential decay rate for the 1st moment estimates. With float32 data type. + - **beta2** (Tensor) - The exponential decay rate for the 2nd moment estimates. With float32 data type. + - **epsilon** (Tensor) - Term added to the denominator to improve numerical stability. With float32 data type. + - **gradient** (Tensor) - Gradient value. With float32 data type. + - **indices** (Tensor) - Gradient indices. With int32 data type. + + Outputs: + Tuple of 3 Tensor, the updated parameters. + + - **var** (Tensor) - The same shape and data type as `var`. + - **m** (Tensor) - The same shape and data type as `m`. + - **v** (Tensor) - The same shape and data type as `v`. + + Examples: + >>> import numpy as np + >>> import mindspore.nn as nn + >>> from mindspore import Tensor, Parameter + >>> from mindspore.ops import operations as P + >>> import mindspore.common.dtype as mstype + >>> class Net(nn.Cell): + >>> def __init__(self): + >>> super(Net, self).__init__() + >>> self.sparse_apply_adam = P.SparseApplyAdam() + >>> self.var = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="var") + >>> self.m = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="m") + >>> self.v = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="v") + >>> def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, indices): + >>> out = self.sparse_apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, + >>> epsilon, grad, indices) + >>> return out + >>> net = Net() + >>> beta1_power = Tensor(0.9, mstype.float32) + >>> beta2_power = Tensor(0.999, mstype.float32) + >>> lr = Tensor(0.001, mstype.float32) + >>> beta1 = Tensor(0.9, mstype.float32) + >>> beta2 = Tensor(0.999, mstype.float32) + >>> epsilon = Tensor(1e-8, mstype.float32) + >>> gradient = Tensor(np.random.rand(2, 1, 2), mstype.float32) + >>> indices = Tensor([0, 1], mstype.int32) + >>> result = net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, gradient, indices) + """ + __mindspore_signature__ = ( + ('var', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('m', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('v', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('beta1_power', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, + sig_dtype.T), + ('beta2_power', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, + sig_dtype.T), + ('lr', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, + sig_dtype.T), + ('beta1', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, + sig_dtype.T), + ('beta2', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, + sig_dtype.T), + ('epsilon', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, + sig_dtype.T), + ('grad', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('indices', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T1) + ) + + @prim_attr_register + def __init__(self, use_locking=False, use_nesterov=False): + validator.check_value_type("use_locking", use_locking, [bool], self.name) + validator.check_value_type("use_nesterov", use_nesterov, [bool], self.name) + self.init_prim_io_names(inputs=['var', 'm', 'v', 'beta1_power', 'beta2_power', 'lr', 'beta1', 'beta2', + 'epsilon', 'grad', 'indices'], + outputs=['var', 'm', 'v']) + + def infer_shape(self, var_shape, m_shape, v_shape, beta1_power_shape, beta2_power_shape, lr_shape, + beta1_shape, beta2_shape, epsilon_shape, grad_shape, indices_shape): + validator.check("var_shape", var_shape, "m_shape", m_shape, Rel.EQ, self.name) + validator.check("var_shape", var_shape, "v_shape", v_shape, Rel.EQ, self.name) + validator.check_integer("indices rank", len(indices_shape), 1, Rel.EQ, self.name) + validator.check('grad_shape[0]', grad_shape[0], 'indices_shape[0]', indices_shape[0], Rel.EQ, self.name) + if len(var_shape) > 1 and grad_shape != indices_shape + var_shape[1:]: + raise ValueError(f"For '{self.name}', the shape of updates should be [] or " + f"grad_shape = indices_shape + var_shape[1:], but got var_shape: {var_shape}, " + f"indices_shape: {indices_shape}, grad_shape: {grad_shape}.") + return var_shape, m_shape, v_shape + + def infer_dtype(self, var_dtype, m_dtype, v_dtype, beta1_power_dtype, beta2_power_dtype, lr_dtype, + beta1_dtype, beta2_dtype, epsilon_dtype, grad_dtype, indices_dtype): + args = {"var": var_dtype, "m": m_dtype, "v": v_dtype, "grad": grad_dtype} + validator.check_tensor_type_same(args, mstype.number_type, self.name) + + args = {"beta1_power": beta1_power_dtype, "beta2_power": beta2_power_dtype, 'lr': lr_dtype, + "beta1": beta1_dtype, "beta2": beta2_dtype, "epsilon": epsilon_dtype} + validator.check_scalar_or_tensor_type_same(args, [mstype.float16, mstype.float32], self.name, True) + validator.check_tensor_type_same({"indices_dtype": indices_dtype}, [mstype.int32], self.name) + return var_dtype, m_dtype, v_dtype + + +class SparseApplyLazyAdam(PrimitiveWithInfer): + r""" + Merge the duplicate value of the gradient and then updates parameters by Adaptive Moment Estimation (Adam) + algorithm. This operator is used when the gradient is sparse. The behavior is not equivalent to the + original Adam algorithm, as only the current indices parameters will be updated. + + The Adam algorithm is proposed in `Adam: A Method for Stochastic Optimization `_. + + The updating formulas are as follows, + + .. math:: + \begin{array}{ll} \\ + m = \beta_1 * m + (1 - \beta_1) * g \\ + v = \beta_2 * v + (1 - \beta_2) * g * g \\ + l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\ + w = w - l * \frac{m}{\sqrt{v} + \epsilon} + \end{array} + + :math:`m` represents the 1st moment vector, :math:`v` represents the 2nd moment vector, :math:`g` represents + `gradient`, :math:`l` represents scaling factor `lr`, :math:`\beta_1, \beta_2` represent `beta1` and `beta2`, + :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent `beta1_power` and + `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `var`, :math:`\epsilon` represents + `epsilon`. + + Args: + use_locking (bool): Whether to enable a lock to protect updating variable tensors. + If True, updating of the var, m, and v tensors will be protected by a lock. + If False, the result is unpredictable. Default: False. + use_nesterov (bool): Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients. + If True, updates the gradients using NAG. + If False, updates the gradients without using NAG. Default: False. + + Inputs: + - **var** (Parameter) - Parameters to be updated. With float32 data type. + - **m** (Parameter) - The 1st moment vector in the updating formula. Has the same type as `var`. With + float32 data type. + - **v** (Parameter) - The 2nd moment vector in the updating formula. Mean square gradients, + has the same type as `var`. With float32 data type. + - **beta1_power** (Tensor) - :math:`beta_1^t` in the updating formula. With float32 data type. + - **beta2_power** (Tensor) - :math:`beta_2^t` in the updating formula. With float32 data type. + - **lr** (Tensor) - :math:`l` in the updating formula. With float32 data type. + - **beta1** (Tensor) - The exponential decay rate for the 1st moment estimates. With float32 data type. + - **beta2** (Tensor) - The exponential decay rate for the 2nd moment estimates. With float32 data type. + - **epsilon** (Tensor) - Term added to the denominator to improve numerical stability. With float32 data type. + - **gradient** (Tensor) - Gradient value. With float32 data type. + - **indices** (Tensor) - Gradient indices. With int32 data type. + + Outputs: + Tuple of 3 Tensor, the updated parameters. + + - **var** (Tensor) - The same shape and data type as `var`. + - **m** (Tensor) - The same shape and data type as `m`. + - **v** (Tensor) - The same shape and data type as `v`. + + Examples: + >>> import numpy as np + >>> import mindspore.nn as nn + >>> from mindspore import Tensor, Parameter + >>> from mindspore.ops import operations as P + >>> import mindspore.common.dtype as mstype + >>> class Net(nn.Cell): + >>> def __init__(self): + >>> super(Net, self).__init__() + >>> self.sparse_apply_lazyadam = P.SparseApplyLazyAdam() + >>> self.var = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="var") + >>> self.m = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="m") + >>> self.v = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="v") + >>> def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, indices): + >>> out = self.sparse_apply_lazyadam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, + >>> beta2, epsilon, grad, indices) + >>> return out + >>> net = Net() + >>> beta1_power = Tensor(0.9, mstype.float32) + >>> beta2_power = Tensor(0.999, mstype.float32) + >>> lr = Tensor(0.001, mstype.float32) + >>> beta1 = Tensor(0.9, mstype.float32) + >>> beta2 = Tensor(0.999, mstype.float32) + >>> epsilon = Tensor(1e-8, mstype.float32) + >>> gradient = Tensor(np.random.rand(2, 1, 2), mstype.float32) + >>> indices = Tensor([0, 1], mstype.int32) + >>> result = net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, gradient, indices) + """ + __mindspore_signature__ = ( + ('var', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('m', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('v', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('beta1_power', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, + sig_dtype.T), + ('beta2_power', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, + sig_dtype.T), + ('lr', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, + sig_dtype.T), + ('beta1', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, + sig_dtype.T), + ('beta2', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, + sig_dtype.T), + ('epsilon', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, + sig_dtype.T), + ('grad', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('indices', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T1) + ) + + @prim_attr_register + def __init__(self, use_locking=False, use_nesterov=False): + validator.check_value_type("use_locking", use_locking, [bool], self.name) + validator.check_value_type("use_nesterov", use_nesterov, [bool], self.name) + self.init_prim_io_names(inputs=['var', 'm', 'v', 'beta1_power', 'beta2_power', 'lr', 'beta1', 'beta2', + 'epsilon', 'grad', 'indices'], + outputs=['var', 'm', 'v']) + + def infer_shape(self, var_shape, m_shape, v_shape, beta1_power_shape, beta2_power_shape, lr_shape, + beta1_shape, beta2_shape, epsilon_shape, grad_shape, indices_shape): + validator.check("var_shape", var_shape, "m_shape", m_shape, Rel.EQ, self.name) + validator.check("var_shape", var_shape, "v_shape", v_shape, Rel.EQ, self.name) + validator.check_integer("indices rank", len(indices_shape), 1, Rel.EQ, self.name) + validator.check('grad_shape[0]', grad_shape[0], 'indices_shape[0]', indices_shape[0], Rel.EQ, self.name) + if len(var_shape) > 1 and grad_shape != indices_shape + var_shape[1:]: + raise ValueError(f"For '{self.name}', the shape of updates should be [] or " + f"grad_shape = indices_shape + var_shape[1:], but got var_shape: {var_shape}, " + f"indices_shape: {indices_shape}, grad_shape: {grad_shape}.") + return var_shape, m_shape, v_shape + + def infer_dtype(self, var_dtype, m_dtype, v_dtype, beta1_power_dtype, beta2_power_dtype, lr_dtype, + beta1_dtype, beta2_dtype, epsilon_dtype, grad_dtype, indices_dtype): + args = {"var": var_dtype, "m": m_dtype, "v": v_dtype, "grad": grad_dtype} + validator.check_tensor_type_same(args, mstype.number_type, self.name) + + args = {"beta1_power": beta1_power_dtype, "beta2_power": beta2_power_dtype, 'lr': lr_dtype, + "beta1": beta1_dtype, "beta2": beta2_dtype, "epsilon": epsilon_dtype} + validator.check_scalar_or_tensor_type_same(args, [mstype.float16, mstype.float32], self.name, True) + + validator.check_tensor_type_same({"indices_dtype": indices_dtype}, [mstype.int32], self.name) + return var_dtype, m_dtype, v_dtype + + class BinaryCrossEntropy(PrimitiveWithInfer): r""" Computes the Binary Cross Entropy between the target and the output. @@ -2807,6 +3190,283 @@ class BinaryCrossEntropy(PrimitiveWithInfer): return x_type +class ApplyAdaMax(PrimitiveWithInfer): + r""" + Update relevant entries according to the adamax scheme. + + The updating formulas are as follows, + + .. math:: + \begin{array}{ll} \\ + m_{t} = \beta_1 * m_{t-1} + (1 - \beta_1) * g \\ + v_{t} = \max(\beta_2 * v_{t-1}, \left| g \right|) \\ + var = var - \frac{l}{1 - \beta_1^t} * \frac{m_{t}}{v_{t} + \epsilon} + \end{array} + + :math:`t` represents updating step while, :math:`m` represents the 1st moment vector, :math:`m_{t-1}` + is the last momentent of :math:`m_{t}`, :math:`v` represents the 2nd moment vector, :math:`v_{t-1}` + is the last momentent of :math:`v_{t}`, :math:`l` represents scaling factor `lr`, + :math:`g` represents `grad`, :math:`\beta_1, \beta_2` represent `beta1` and `beta2`, + :math:`beta_1^t` represent `beta1_power`, :math:`var` represents Variable to be updated, + :math:`\epsilon` represents `epsilon`. + + Inputs: + - **var** (Parameter) - Variable to be updated. + - **m** (Parameter) - The 1st moment vector in the updating formula. Has the same shape and type as `var`. + - **v** (Parameter) - The 2nd moment vector in the updating formula. Mean square gradients, + has the same shape and type as `var`. + - **beta1_power** (float) - :math:`beta_1^t` in the updating formula. + - **lr** (float) - Learning rate, :math:`l` in the updating formula. Has the same type as `var`. + - **beta1** (float) - The exponential decay rate for the 1st moment estimates. + - **beta2** (float) - The exponential decay rate for the 2nd moment estimates. + - **epsilon** (float) - A small value added for numerical stability. + - **grad** (Tensor) - A tensor for gradient. Has the same shape and type as `var`. + + Outputs: + Tuple of 3 Tensor, the updated parameters. + + - **var** (Tensor) - The same shape and data type as `var`. + - **m** (Tensor) - The same shape and data type as `m`. + - **v** (Tensor) - The same shape and data type as `v`. + + Examples: + >>> var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var") + >>> m = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="m") + >>> v = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="v") + >>> grad = Tensor(np.random.rand(3, 3).astype(np.float32)) + >>> beta1_power = 0.9 + >>> lr = 0.001 + >>> beta1 = 0.9 + >>> beta2 = 0.99 + >>> epsilon = 1e-10 + >>> apply_ada_max = P.ApplyAdaMax() + >>> output = apply_ada_max(var, m, v, beta1_power, lr, beta1, beta2, epsilon, grad) + """ + + __mindspore_signature__ = ( + ('var', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('m', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('v', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('beta1_power', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, + sig_dtype.T), + ('lr', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('beta1', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('beta2', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('epsilon', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('grad', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T) + ) + + @prim_attr_register + def __init__(self): + """init ApplyAdaMax""" + + def infer_shape(self, var_shape, m_shape, v_shape, beta1_power_shape, lr_shape, + beta1_shape, beta2_shape, epsilon_shape, grad_shape): + validator.check("var_shape", var_shape, "m_shape", m_shape, Rel.EQ, self.name) + validator.check("var_shape", var_shape, "v_shape", v_shape, Rel.EQ, self.name) + validator.check("var_shape", var_shape, "grad_shape", grad_shape, Rel.EQ, self.name) + return var_shape, m_shape, v_shape + + def infer_dtype(self, var_dtype, m_dtype, v_dtype, beta1_power_dtype, lr_dtype, + beta1_dtype, beta2_dtype, epsilon_dtype, grad_dtype): + args = {"var": var_dtype, "m": m_dtype, "v": v_dtype, "grad": grad_dtype} + validator.check_tensor_type_same(args, mstype.number_type, self.name) + + scalar_args = {"beta1_power": beta1_power_dtype, 'lr': lr_dtype, "beta1": beta1_dtype, + "beta2": beta2_dtype, "epsilon": epsilon_dtype} + validator.check_scalar_or_tensor_type_same(scalar_args, [mstype.float16, mstype.float32], self.name, True) + return var_dtype, m_dtype, v_dtype + + +class ApplyAdadelta(PrimitiveWithInfer): + r""" + Update relevant entries according to the adadelta scheme. + + .. math:: + accum = \rho * accum + (1 - \rho) * grad^2 + .. math:: + \text{update} = \sqrt{\text{accum_update} + \epsilon} * \frac{grad}{\sqrt{accum + \epsilon}} + .. math:: + \text{accum_update} = \rho * \text{accum_update} + (1 - \rho) * update^2 + .. math:: + var -= lr * update + + Inputs: + - **var** (Parameter) - Weights to be updated. + - **accum** (Parameter) - Accum to be updated, has the same shape and type as `var`. + - **accum_update** (Parameter) - Accum_update to be updated, has the same shape and type as `var`. + - **lr** (float) - Learning rate, has the same type as `var`. + - **rho** (float) - Decay rate. + - **epsilon** (float) - A small value added for numerical stability. + - **grad** (Tensor) - Gradients, has the same shape and type as `var`. + + Outputs: + Tuple of 3 Tensor, the updated parameters. + + - **var** (Tensor) - The same shape and data type as `var`. + - **accum** (Tensor) - The same shape and data type as `accum`. + - **accum_update** (Tensor) - The same shape and data type as `accum_update`. + + Examples: + >>> var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var") + >>> accum = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum") + >>> accum_update = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum_update") + >>> grad = Tensor(np.random.rand(3, 3).astype(np.float32)) + >>> lr = 0.001 + >>> rho = 0.0 + >>> epsilon = 1e-6 + >>> apply_adadelta = P.ApplyAdadelta() + >>> output = apply_adadelta(var, accum, accum_update, lr, rho, epsilon, grad) + """ + + __mindspore_signature__ = ( + ('var', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('accum', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('accum_update', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, + sig_dtype.T), + ('lr', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('rho', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('epsilon', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('grad', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T) + ) + + @prim_attr_register + def __init__(self): + """init ApplyAdadelta""" + + def infer_shape(self, var_shape, accum_shape, accum_update_shape, lr_shape, rho_shape, + epsilon_shape, grad_shape): + validator.check("var_shape", var_shape, "accum_shape", accum_shape, Rel.EQ, self.name) + validator.check("var_shape", var_shape, "accum_update_shape", accum_update_shape, Rel.EQ, self.name) + validator.check("var_shape", var_shape, "grad_shape", grad_shape, Rel.EQ, self.name) + return var_shape, accum_shape, accum_update_shape + + def infer_dtype(self, var_dtype, accum_dtype, accum_update_dtype, lr_dtype, rho_shape, + epsilon_dtype, grad_dtype): + args = {"var": var_dtype, "accum": accum_dtype, "accum_update": accum_update_dtype, "grad": grad_dtype} + validator.check_tensor_type_same(args, mstype.number_type, self.name) + + scalar_args = {"lr": lr_dtype, "rho": rho_shape, "epsilon": epsilon_dtype} + validator.check_scalar_or_tensor_type_same(scalar_args, [mstype.float16, mstype.float32], self.name, True) + return var_dtype, accum_dtype, accum_update_dtype + + +class ApplyAdagrad(PrimitiveWithInfer): + r""" + Update relevant entries according to the adagrad scheme. + + .. math:: + accum += grad * grad + .. math:: + var -= lr * grad * \frac{1}{\sqrt{accum}} + + Args: + update_slots (bool): If `True`, `accum` will be updated. Default: True. + + Inputs: + - **var** (Parameter) - Variable to be updated. + - **accum** (Parameter) - Accum to be updated. The shape and dtype should be the same as `var`. + - **lr** (float): The learning rate value, has the same type as `var`. + - **grad** (Tensor) - A tensor for gradient. The shape and dtype should be the same as `var`. + + Outputs: + Tuple of 2 Tensor, the updated parameters. + + - **var** (Tensor) - The same shape and data type as `var`. + - **accum** (Tensor) - The same shape and data type as `accum`. + + Examples: + >>> var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var") + >>> accum = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum") + >>> grad = Tensor(np.random.rand(3, 3).astype(np.float32)) + >>> lr = 0.01 + >>> apply_adagrad = P.ApplyAdagrad() + >>> output = apply_adagrad(var, accum, lr, grad) + """ + + __mindspore_signature__ = ( + ('var', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('accum', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('lr', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('grad', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T) + ) + + @prim_attr_register + def __init__(self, update_slots=True): + validator.check_value_type("update_slots", update_slots, [bool], self.name) + + def infer_shape(self, var_shape, accum_shape, lr_shape, grad_shape): + validator.check('var shape', var_shape, 'accum shape', accum_shape, Rel.EQ, self.name) + validator.check('var shape', var_shape, 'grad shape', grad_shape, Rel.EQ, self.name) + return var_shape, accum_shape + + def infer_dtype(self, var_dtype, accum_dtype, lr_dtype, grad_dtype): + args = {'var': var_dtype, 'accum': accum_dtype, 'grad': grad_dtype} + validator.check_tensor_type_same(args, mstype.number_type, self.name) + valid_types = [mstype.float16, mstype.float32] + validator.check_scalar_or_tensor_type_same({'lr': lr_dtype}, valid_types, self.name) + return var_dtype, accum_dtype + + +class ApplyAdagradV2(PrimitiveWithInfer): + r""" + Update relevant entries according to the adagradv2 scheme. + + .. math:: + accum += grad * grad + .. math:: + var -= lr * grad * \frac{1}{\sqrt{accum} + \epsilon} + + Args: + epsilon (float): A small value added for numerical stability. + update_slots (bool): If `True`, `accum` will be updated. Default: True. + + Inputs: + - **var** (Parameter) - Variable to be updated. + - **accum** (Parameter) - Accum to be updated. The shape and dtype should be the same as `var`. + - **lr** (float): The learning rate value, has the same type as `var`. + - **grad** (Tensor) - A tensor for gradient. The shape and dtype should be the same as `var`. + + Outputs: + Tuple of 2 Tensor, the updated parameters. + + - **var** (Tensor) - The same shape and data type as `var`. + - **accum** (Tensor) - The same shape and data type as `m`. + + Examples: + >>> var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var") + >>> accum = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum") + >>> grad = Tensor(np.random.rand(3, 3).astype(np.float32)) + >>> lr = 0.01 + >>> apply_adagrad_v2 = P.ApplyAdagradV2(epsilon=1e-6) + >>> output = apply_adagrad_v2(var, accum, lr, grad) + """ + + __mindspore_signature__ = ( + ('var', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('accum', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('lr', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('grad', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T) + ) + + @prim_attr_register + def __init__(self, epsilon, update_slots=True): + validator.check_value_type("epsilon", epsilon, [float], self.name) + validator.check_value_type("update_slots", update_slots, [bool], self.name) + + def infer_shape(self, var_shape, accum_shape, lr_shape, grad_shape): + validator.check('var shape', var_shape, 'accum shape', accum_shape, Rel.EQ, self.name) + validator.check('var shape', var_shape, 'grad shape', grad_shape, Rel.EQ, self.name) + return var_shape, accum_shape + + def infer_dtype(self, var_dtype, accum_dtype, lr_dtype, grad_dtype): + args = {'var': var_dtype, 'accum': accum_dtype, 'grad': grad_dtype} + validator.check_tensor_type_same(args, mstype.number_type, self.name) + valid_types = [mstype.float16, mstype.float32] + validator.check_scalar_or_tensor_type_same({'lr': lr_dtype}, valid_types, self.name) + return var_dtype, accum_dtype + + class SparseApplyAdagrad(PrimitiveWithInfer): r""" Update relevant entries according to the adagrad scheme. @@ -2818,11 +3478,12 @@ class SparseApplyAdagrad(PrimitiveWithInfer): Args: lr (float): Learning rate. + update_slots (bool): If `True`, `accum` will be updated. Default: True. use_locking (bool): If True, updating of the var and accum tensors will be protected. Default: False. Inputs: - - **var** (Tensor) - Variable to be updated. The type must be float32. - - **accum** (Tensor) - Accum to be updated. The shape must be the same as `var`'s shape, + - **var** (Parameter) - Variable to be updated. The type must be float32. + - **accum** (Parameter) - Accum to be updated. The shape must be the same as `var`'s shape, the type must be float32. - **grad** (Tensor) - Gradient. The shape must be the same as `var`'s shape except first dimension, the type must be float32. @@ -2830,21 +3491,45 @@ class SparseApplyAdagrad(PrimitiveWithInfer): The shape of `indices` must be the same as `grad` in first dimension, the type must be int32. Outputs: - Tensor, has the same shape and type as `var`. + Tuple of 2 Tensor, the updated parameters. + + - **var** (Tensor) - The same shape and data type as `var`. + - **accum** (Tensor) - The same shape and data type as `accum`. Examples: - >>> var = Tensor(np.random.random((3, 3)), mindspore.float32) - >>> accum = Tensor(np.random.random((3, 3)), mindspore.float32) - >>> grad = Tensor(np.random.random((3, 3)), mindspore.float32) - >>> indices = Tensor(np.ones((3,), np.int32)) - >>> sparse_apply_ada_grad = P.SparseApplyAdagrad(0.5) - >>> sparse_apply_ada_grad(var, accum, grad, indices) + >>> import numpy as np + >>> import mindspore.nn as nn + >>> from mindspore import Tensor, Parameter + >>> from mindspore.ops import operations as P + >>> import mindspore.common.dtype as mstype + >>> class Net(nn.Cell): + >>> def __init__(self): + >>> super(Net, self).__init__() + >>> self.sparse_apply_adagrad = P.SparseApplyAdagrad(lr=1e-8) + >>> self.var = Parameter(Tensor(np.ones([3, 3, 3]).astype(np.float32)), name="var") + >>> self.accum = Parameter(Tensor(np.ones([3, 3, 3]).astype(np.float32)), name="accum") + >>> def construct(self, grad, indices): + >>> out = self.sparse_apply_adagrad(self.var, self.accum, grad, indices) + >>> return out + >>> net = Net() + >>> grad = Tensor(np.random.rand(3, 3, 3).astype(np.float32)) + >>> indices = Tensor([0, 1, 2], mstype.int32) + >>> result = net(grad, indices) """ + __mindspore_signature__ = ( + ('var', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('accum', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('grad', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('indices', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T1) + ) + @prim_attr_register - def __init__(self, lr, use_locking=False): - self.lr = validator.check_value_type("lr", lr, [float], self.name) - self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name) + def __init__(self, lr, update_slots=True, use_locking=False): + validator.check_value_type("lr", lr, [float], self.name) + validator.check_number_range("lr", lr, float("-inf"), float("inf"), Rel.INC_NEITHER, self.name) + validator.check_value_type("update_slots", update_slots, [bool], self.name) + validator.check_value_type("use_locking", use_locking, [bool], self.name) def infer_shape(self, var_shape, accum_shape, grad_shape, indices_shape): validator.check('var shape', var_shape, 'accum shape', accum_shape, Rel.EQ, self.name) @@ -2853,13 +3538,13 @@ class SparseApplyAdagrad(PrimitiveWithInfer): validator.check('var_shape[1:]', var_shape[1:], 'grad_shape[1:]', grad_shape[1:], Rel.EQ, self.name) validator.check_integer("indices rank", len(indices_shape), 1, Rel.EQ, self.name) validator.check('grad_shape[0]', grad_shape[0], 'indices_shape[0]', indices_shape[0], Rel.EQ, self.name) - return var_shape + return var_shape, accum_shape def infer_dtype(self, var_type, accum_type, grad_type, indices_type): args = {'var': var_type, 'accum': accum_type, 'grad': grad_type} validator.check_tensor_type_same(args, (mstype.float32,), self.name) validator.check_tensor_type_same({'indices': indices_type}, [mstype.int32], self.name) - return var_type + return var_type, accum_type class ApplyProximalAdagrad(PrimitiveWithInfer): @@ -2869,38 +3554,61 @@ class ApplyProximalAdagrad(PrimitiveWithInfer): .. math:: accum += grad * grad .. math:: - prox_v = var - lr * grad * \frac{1}{\sqrt{accum}} + \text{prox_v} = var - lr * grad * \frac{1}{\sqrt{accum}} .. math:: - var = \frac{sign(prox_v)}{1 + lr * l2} * \max(\left| prox_v \right| - lr * l1, 0) + var = \frac{sign(\text{prox_v})}{1 + lr * l2} * \max(\left| \text{prox_v} \right| - lr * l1, 0) Args: use_locking (bool): If True, updating of the var and accum tensors will be protected. Default: False. Inputs: - - **var** (Tensor) - Variable to be updated. - - **accum** (Tensor) - Accum to be updated. The shape must be the same as `var`'s shape. - - **lr** (Union[Number, Tensor]): The learning rate value, must be positive. It should be - a scalar tensor or number. + - **var** (Parameter) - Variable to be updated. The data type should be float. + - **accum** (Parameter) - Accum to be updated. Must has the same shape and dtype as `var`. + - **lr** (Union[Number, Tensor]): The learning rate value. It should be a scalar tensor or number. + The data type should be float. - **l1** (Union[Number, Tensor]): l1 regularization strength, must be greater than or equal to zero. - It should be a scalar tensor or number. + It should be a scalar tensor or number. The data type should be float. - **l2** (Union[Number, Tensor]): l2 regularization strength, must be greater than or equal to zero. - It should be a scalar tensor or number. - - **grad** (Tensor) - Gradient. The shape must be the same as `var`'s shape. + It should be a scalar tensor or number. The data type should be float. + - **grad** (Tensor) - Gradient. Must has the same shape and dtype as `var`. Outputs: - Tensor, has the same shape and type as `var`. + Tuple of 2 Tensor, the updated parameters. + + - **var** (Tensor) - The same shape and data type as `var`. + - **accum** (Tensor) - The same shape and data type as `accum`. Examples: - >>> var = Tensor(np.random.random((3, 3)), mindspore.float32) - >>> accum = Tensor(np.random.random((3, 3)), mindspore.float32) - >>> grad = Tensor(np.random.random((3, 3)), mindspore.float32) - >>> lr = 0.01 - >>> l1 = 0.0 - >>> l2 = 0.0 - >>> apply_proximal_ada_grad = P.ApplyProximalAdagrad() - >>> output = apply_proximal_ada_grad(var, accum, lr, l1, l2, grad) + >>> import numpy as np + >>> import mindspore.nn as nn + >>> from mindspore import Tensor, Parameter + >>> from mindspore.ops import operations as P + >>> class Net(nn.Cell): + >>> def __init__(self): + >>> super(Net, self).__init__() + >>> self.apply_proximal_adagrad = P.ApplyProximalAdagrad() + >>> self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var") + >>> self.accum = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum") + >>> self.lr = 0.01 + >>> self.l1 = 0.0 + >>> self.l2 = 0.0 + >>> def construct(self, grad): + >>> out = self.apply_proximal_adagrad(self.var, self.accum, self.lr, self.l1, self.l2, grad) + >>> return out + >>> net = Net() + >>> grad = Tensor(np.random.rand(3, 3).astype(np.float32)) + >>> output = net(grad) """ + __mindspore_signature__ = ( + ('var', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('accum', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('lr', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('l1', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('l2', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('grad', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T) + ) + @prim_attr_register def __init__(self, use_locking=False): self.init_prim_io_names(inputs=['var', 'accum', 'lr', 'l1', 'l2', 'grad'], outputs=['output']) @@ -2909,7 +3617,7 @@ class ApplyProximalAdagrad(PrimitiveWithInfer): def infer_shape(self, var_shape, accum_shape, lr_shape, l1_shape, l2_shape, grad_shape): validator.check('var shape', var_shape, 'accum shape', accum_shape, Rel.EQ, self.name) validator.check('var shape', var_shape, 'grad shape', grad_shape, Rel.EQ, self.name) - return var_shape + return var_shape, accum_shape def infer_dtype(self, var_dtype, accum_dtype, lr_dtype, l1_dtype, l2_dtype, grad_dtype): valid_types = [mstype.float16, mstype.float32] @@ -2917,7 +3625,7 @@ class ApplyProximalAdagrad(PrimitiveWithInfer): validator.check_tensor_type_same(args, valid_types, self.name) scalar_args = {"lr": lr_dtype, "l1": l1_dtype, "l2": l2_dtype} validator.check_scalar_or_tensor_type_same(scalar_args, valid_types, self.name) - return var_dtype + return var_dtype, accum_dtype class SparseApplyProximalAdagrad(PrimitiveWithInfer): @@ -2928,40 +3636,65 @@ class SparseApplyProximalAdagrad(PrimitiveWithInfer): .. math:: accum += grad * grad .. math:: - prox_v = var - lr * grad * \frac{1}{\sqrt{accum}} + \text{prox_v} = var - lr * grad * \frac{1}{\sqrt{accum}} .. math:: - var = \frac{sign(prox_v)}{1 + lr * l2} * \max(\left| prox_v \right| - lr * l1, 0) + var = \frac{sign(\text{prox_v})}{1 + lr * l2} * \max(\left| \text{prox_v} \right| - lr * l1, 0) Args: use_locking (bool): If True, updating of the var and accum tensors will be protected. Default: False. Inputs: - - **var** (Tensor) - Variable tensor to be updated. - - **accum** (Tensor) - Variable tensor to be updated. The shape must be the same as `var`'s shape. - - **lr** (Union[Number, Tensor]): The learning rate value, must be positive. It should be - a scalar tensor or number. + - **var** (Parameter) - Variable tensor to be updated. The data type must be float32. + - **accum** (Parameter) - Variable tensor to be updated. Has the same dtype as `var`. + - **lr** (Union[Number, Tensor]): The learning rate value. It should be a scalar tensor or number. + The data type must be float32. - **l1** (Union[Number, Tensor]): l1 regularization strength, must be greater than or equal to zero. - It should be a scalar tensor or number. + It should be a scalar tensor or number. The data type must be float32. - **l2** (Union[Number, Tensor]): l2 regularization strength, must be greater than or equal to zero. - It should be a scalar tensor or number. - - **grad** (Tensor) - A tensor of the same type as `var`, for the gradient. + It should be a scalar tensor or number. The data type must be float32. + - **grad** (Tensor) - A tensor of the same type as `var`, for the gradient. The data type must be float32. - **indices** (Tensor) - A vector of indices into the first dimension of `var` and `accum`. Outputs: - Tensor, has the same shape and type as `var`. + Tuple of 2 Tensor, the updated parameters. + + - **var** (Tensor) - The same shape and data type as `var`. + - **accum** (Tensor) - The same shape and data type as `accum`. Examples: - >>> var = Tensor(np.random.random((3, 3)), mindspore.float32) - >>> accum = Tensor(np.random.random((3, 3)), mindspore.float32) - >>> grad = Tensor(np.random.random((3, 3)), mindspore.float32) + >>> import numpy as np + >>> import mindspore.nn as nn + >>> from mindspore import Tensor, Parameter + >>> from mindspore.ops import operations as P + >>> class Net(nn.Cell): + >>> def __init__(self): + >>> super(Net, self).__init__() + >>> self.sparse_apply_proximal_adagrad = P.SparseApplyProximalAdagrad() + >>> self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var") + >>> self.accum = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum") + >>> self.lr = 0.01 + >>> self.l1 = 0.0 + >>> self.l2 = 0.0 + >>> def construct(self, grad, indices): + >>> out = self.sparse_apply_proximal_adagrad(self.var, self.accum, self.lr, self.l1, + self.l2, grad, indices) + >>> return out + >>> net = Net() + >>> grad = Tensor(np.random.rand(3, 3).astype(np.float32)) >>> indices = Tensor(np.ones((3,), np.int32)) - >>> lr = 0.01 - >>> l1 = 0.0 - >>> l2 = 0.0 - >>> sparse_apply_proximal_ada_grad = P.SparseApplyProximalAdagrad() - >>> output = sparse_apply_proximal_ada_grad(var, accum, lr, l1, l2, grad, indices) + >>> output = net(grad, indices) """ + __mindspore_signature__ = ( + ('var', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('accum', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('lr', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('l1', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('l2', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('grad', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T), + ('indices', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T1) + ) + @prim_attr_register def __init__(self, use_locking=False): self.init_prim_io_names(inputs=['var', 'accum', 'lr', 'l1', 'l2', 'grad', 'indices'], @@ -2969,7 +3702,8 @@ class SparseApplyProximalAdagrad(PrimitiveWithInfer): self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name) def infer_shape(self, var_shape, accum_shape, lr_shape, l1_shape, l2_shape, grad_shape, indices_shape): - return var_shape + validator.check_integer("indices rank", len(indices_shape), 1, Rel.EQ, self.name) + return var_shape, accum_shape def infer_dtype(self, var_dtype, accum_dtype, lr_dtype, l1_dtype, l2_dtype, grad_dtype, indices_dtype): args = {'var': var_dtype, 'accum': accum_dtype, 'grad': grad_dtype} @@ -2979,7 +3713,7 @@ class SparseApplyProximalAdagrad(PrimitiveWithInfer): valid_types = [mstype.int16, mstype.int32, mstype.int64, mstype.uint16, mstype.uint32, mstype.uint64] validator.check_tensor_type_same({'indices': indices_dtype}, valid_types, self.name) - return var_dtype + return var_dtype, accum_dtype class LARSUpdate(PrimitiveWithInfer): @@ -3119,11 +3853,14 @@ class ApplyFtrl(PrimitiveWithInfer): self.init_prim_io_names(inputs=['var', 'accum', 'linear', 'grad', 'lr', 'l1', 'l2', 'lr_power'], outputs=['output']) self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name) + self.is_tbe = context.get_context("device_target") == "Ascend" def infer_shape(self, var_shape, accum_shape, linear_shape, grad_shape, lr_shape, l1_shape, l2_shape, lr_power_shape): validator.check('var shape', var_shape, 'accum shape', accum_shape, Rel.EQ, self.name) validator.check('var shape', var_shape, 'linear shape', linear_shape, Rel.EQ, self.name) + if self.is_tbe: + return var_shape, var_shape, var_shape return var_shape def infer_dtype(self, var_type, accum_type, linear_type, grad_type, lr_type, l1_type, l2_type, lr_power_type): @@ -3135,6 +3872,8 @@ class ApplyFtrl(PrimitiveWithInfer): validator.check_scalar_or_tensor_type_same({"l1": l1_type}, valid_types, self.name) validator.check_scalar_or_tensor_type_same({"l2": l2_type}, valid_types, self.name) validator.check_scalar_or_tensor_type_same({"lr_power": lr_power_type}, valid_types, self.name) + if self.is_tbe: + return var_type, var_type, var_type return var_type @@ -3174,17 +3913,17 @@ class SparseApplyFtrl(PrimitiveWithInfer): >>> def __init__(self): >>> super(SparseApplyFtrlNet, self).__init__() >>> self.sparse_apply_ftrl = P.SparseApplyFtrl(lr=0.01, l1=0.0, l2=0.0, lr_power=-0.5) - >>> self.var = Parameter(Tensor(np.random.random(3, 3).astype(np.float32)), name="var") - >>> self.accum = Parameter(Tensor(np.random.random(3, 3).astype(np.float32)), name="accum") - >>> self.linear = Parameter(Tensor(np.random.random(3, 3).astype(np.float32)), name="linear") + >>> self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var") + >>> self.accum = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum") + >>> self.linear = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="linear") >>> >>> def construct(self, grad, indices): - >>> out = self.apply_ftrl(self.var, self.accum, self.linear, grad, indices) + >>> out = self.sparse_apply_ftrl(self.var, self.accum, self.linear, grad, indices) >>> return out >>> >>> net = SparseApplyFtrlNet() - >>> grad = Tensor(np.random.random(3, 3).astype(np.float32)) - >>> indices = Tnsor(np.ones([3]), mindspore.float32) + >>> grad = Tensor(np.random.rand(3, 3).astype(np.float32)) + >>> indices = Tensor(np.ones([3]), mindspore.int32) >>> output = net(grad, indices) """ @@ -3194,9 +3933,9 @@ class SparseApplyFtrl(PrimitiveWithInfer): validator.check_value_type("l1", l1, [float], self.name) validator.check_value_type("l2", l2, [float], self.name) validator.check_value_type("lr_power", lr_power, [float], self.name) - self.lr = validator.check_number("lr", lr, 0.0, Rel.GT, self.name) - self.l1 = validator.check_number("l1", l1, 0.0, Rel.GE, self.name) - self.l2 = validator.check_number("l2", l2, 0.0, Rel.GE, self.name) + self.lr = validator.check_number_range("lr", lr, 0.0, float("inf"), Rel.INC_NEITHER, self.name) + self.l1 = validator.check_number_range("l1", l1, 0.0, float("inf"), Rel.INC_LEFT, self.name) + self.l2 = validator.check_number_range("l2", l2, 0.0, float("inf"), Rel.INC_LEFT, self.name) self.lr_power = validator.check_number("lr_power", lr_power, 0, Rel.LE, self.name) self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name) @@ -3227,8 +3966,8 @@ class ConfusionMulGrad(PrimitiveWithInfer): axis (Union[int, tuple[int], list[int]]): The dimensions to reduce. Default:(), reduce all dimensions. Only constant value is allowed. keep_dims (bool): - - If true, keep these reduced dimensions and the length is 1. - - If false, don't keep these dimensions. Default:False. + - If True, keep these reduced dimensions and the length is 1. + - If False, don't keep these dimensions. Default:False. Inputs: - **input_0** (Tensor) - The input Tensor. @@ -3282,7 +4021,8 @@ class Dropout(PrimitiveWithInfer): During training, randomly zeroes some of the elements of the input tensor with probability. Args: - drop_prob (float): probability of an element to be zeroed. Default: 0. + keep_prob (float): The keep rate, between 0 and 1, e.g. keep_prob = 0.9, + means dropping out 10% of input units. Inputs: - **shape** (tuple[int]) - The shape of target mask. @@ -3291,14 +4031,14 @@ class Dropout(PrimitiveWithInfer): Tensor, the value of generated mask for input shape. Examples: - >>> dropout = P.Dropout(drop_prob=0.5) + >>> dropout = P.Dropout(keep_prob=0.5) >>> in = Tensor((20, 16, 50, 50)) >>> out = dropout(in) """ @prim_attr_register - def __init__(self, drop_prob=0): - self.drop_prob = validator.check_number_range("drop_prob", drop_prob, 0, 1, Rel.INC_BOTH, self.name) + def __init__(self, keep_prob=0.5): + self.keep_prob = validator.check_number_range("keep_prob", keep_prob, 0, 1, Rel.INC_RIGHT, self.name) def infer_shape(self, x_shape): validator.check_integer("x_shape", len(x_shape), 1, Rel.GE, self.name) @@ -3317,7 +4057,8 @@ class DropoutGrad(PrimitiveWithInfer): of the input tensor with probability. Args: - drop_prob (float): probability of an element to be zeroed. Default: 0. + keep_prob (float): The keep rate, between 0 and 1, e.g. keep_prob = 0.9, + means dropping out 10% of input units. Inputs: - **shape** (tuple[int]) - The shape of target mask. @@ -3326,14 +4067,14 @@ class DropoutGrad(PrimitiveWithInfer): Tensor, the value of generated mask for input shape. Examples: - >>> dropout_grad = P.DropoutGrad(drop_prob=0.5) + >>> dropout_grad = P.DropoutGrad(keep_prob=0.5) >>> in = Tensor((20, 16, 50, 50)) >>> out = dropout_grad(in) """ @prim_attr_register - def __init__(self, drop_prob=0): - self.drop_prob = validator.check_number_range("drop_prob", drop_prob, 0, 1, Rel.INC_BOTH, self.name) + def __init__(self, keep_prob=0.5): + self.keep_prob = validator.check_number_range("keep_prob", keep_prob, 0, 1, Rel.INC_RIGHT, self.name) def infer_shape(self, dy_shape, mask_shape): return dy_shape @@ -3383,7 +4124,7 @@ class CTCLoss(PrimitiveWithInfer): """ @prim_attr_register - def __init__(self, preprocess_collapse_repeated=False, ctc_merge_repeated=False, + def __init__(self, preprocess_collapse_repeated=False, ctc_merge_repeated=True, ignore_longer_outputs_than_inputs=False): self.init_prim_io_names(inputs=["inputs", "labels_indices", "labels_values", "sequence_length"], outputs=["loss", "gradient"]) @@ -3460,12 +4201,12 @@ class BasicLSTMCell(PrimitiveWithInfer): Outputs: - **ct** (Tensor) - Forward :math:`c_t` cache at moment `t`. Tensor of shape (`batch_size`, `hidden_size`). - **ht** (Tensor) - Cell output. Tensor of shape (`batch_size`, `hidden_size`). - - **it** (Tensor) - Forward :math:`i_t` cache at moment `t`. Tensor of shape (`batch_size`, `4 x hidden_size`). - - **jt** (Tensor) - Forward :math:`j_t` cache at moment `t`. Tensor of shape (`batch_size`, `4 x hidden_size`). - - **ft** (Tensor) - Forward :math:`f_t` cache at moment `t`. Tensor of shape (`batch_size`, `4 x hidden_size`). - - **ot** (Tensor) - Forward :math:`o_t` cache at moment `t`. Tensor of shape (`batch_size`, `4 x hidden_size`). + - **it** (Tensor) - Forward :math:`i_t` cache at moment `t`. Tensor of shape (`batch_size`, `hidden_size`). + - **jt** (Tensor) - Forward :math:`j_t` cache at moment `t`. Tensor of shape (`batch_size`, `hidden_size`). + - **ft** (Tensor) - Forward :math:`f_t` cache at moment `t`. Tensor of shape (`batch_size`, `hidden_size`). + - **ot** (Tensor) - Forward :math:`o_t` cache at moment `t`. Tensor of shape (`batch_size`, `hidden_size`). - **tanhct** (Tensor) - Forward :math:`tanh c_t` cache at moment `t`. - Tensor of shape (`batch_size`, `4 x hidden_size`). + Tensor of shape (`batch_size`, `hidden_size`). Examples: 'block': P.BasicLSTMCell(keep_prob=1.0, forget_bias=1.0, state_is_tuple=True, activation='tanh'), @@ -3482,7 +4223,7 @@ class BasicLSTMCell(PrimitiveWithInfer): """ @prim_attr_register - def __init__(self, keep_prob=1.0, forget_bias=1.0, state_is_tuple=True, activation="tanh"): + def __init__(self, keep_prob=1.0, forget_bias=1.0, state_is_tuple=True, activation='tanh'): self.keep_prob = validator.check_value_type("keep_prob", keep_prob, [float], self.name) self.keep_prob = validator.check_number_range("keep_prob", keep_prob, 0.0, 1.0, Rel.INC_BOTH, self.name) self.forget_bias = validator.check_value_type("forget_bias", forget_bias, [float], self.name) @@ -3501,7 +4242,7 @@ class BasicLSTMCell(PrimitiveWithInfer): validator.check_integer("b rank", len(b_shape), 4, Rel.EQ, self.name) validator.check("w_shape[0]", w_shape[0], "4*h_shape[1]", 4 * h_shape[1], Rel.EQ, self.name) validator.check("w_shape[1]", w_shape[1], "x_shape[1]+h_shape[1]", x_shape[1] + h_shape[1], Rel.EQ, self.name) - validator.check("b_shape[0]", b_shape[0], "4*h_shape[1]", 4*h_shape[1], Rel.EQ, self.name) + validator.check("b_shape[0]", b_shape[0], "4*h_shape[1]", 4 * h_shape[1], Rel.EQ, self.name) ct_shape = c_shape ht_shape = h_shape it_shape = h_shape @@ -3524,3 +4265,44 @@ class BasicLSTMCell(PrimitiveWithInfer): validator.check_type_name("w", w_dtype, [mstype.float16, mstype.float32], self.name) validator.check_type_name("b", b_dtype, [mstype.float16, mstype.float32], self.name) return (x_dtype, x_dtype, x_dtype, x_dtype, x_dtype, x_dtype, x_dtype) + + +class InTopK(PrimitiveWithInfer): + r""" + Says whether the targets are in the top `k` predictions. + + Args: + k (int): Special the number of top elements to look at for computing precision. + + Inputs: + - **x1** (Tensor) - A 2D Tensor define the predictions of a batch of samples with float32 data type. + - **x2** (Tensor) - A 1D Tensor define the labels of a batch of samples with int32 data type. + + Outputs: + Tensor, which is 1 dimension of type bool and has same shape with `x2`. for label of sample `i` in `x2`, + if label in first `k` predictions for sample `i` in `x1`, then the value is True else False. + + Examples: + >>> x1 = Tensor(np.array([[1, 8, 5, 2, 7], [4, 9, 1, 3, 5]]), mindspore.float32) + >>> x2 = Tensor(np.array([1, 3]), mindspore.int32) + >>> in_top_k = P.InTopK(3) + >>> result = in_top_k(x1, x2) + [True False] + """ + @prim_attr_register + def __init__(self, k): + """Init InTopK""" + self.init_prim_io_names(inputs=['x1', 'x2', 'k'], outputs=['y']) + validator.check_value_type("k", k, [int], self.name) + + def infer_dtype(self, x1_dtype, x2_dtype): + validator.check_tensor_type_same({"x1": x1_dtype}, (mstype.float32,), self.name) + validator.check_tensor_type_same({"x2": x2_dtype}, (mstype.int32,), self.name) + + return mstype.tensor_type(mstype.bool_) + + def infer_shape(self, x1_shape, x2_shape): + validator.check("x1", len(x1_shape), "", 2, Rel.EQ, self.name) + validator.check("x2", len(x2_shape), "", 1, Rel.EQ, self.name) + validator.check("size of x2", x2_shape[0], "x1's first dimension", x1_shape[0], Rel.EQ, self.name) + return x2_shape diff --git a/mindspore/ops/operations/other_ops.py b/mindspore/ops/operations/other_ops.py index d73f53eb6a..74c6080ab4 100644 --- a/mindspore/ops/operations/other_ops.py +++ b/mindspore/ops/operations/other_ops.py @@ -14,6 +14,7 @@ # ============================================================================ """Other operators.""" +import functools from ..._c_expression import signature_rw as sig_rw from ..._c_expression import signature_kind as sig_kind from ..._c_expression import signature_dtype as sig_dtype @@ -52,7 +53,7 @@ class Assign(PrimitiveWithInfer): ) @prim_attr_register def __init__(self): - pass + self.init_prim_io_names(inputs=['ref', 'value'], outputs=['output']) def infer_shape(self, variable, value): return variable @@ -227,20 +228,20 @@ class IOU(PrimitiveWithInfer): Inputs: - **anchor_boxes** (Tensor) - Anchor boxes, tensor of shape (N, 4). "N" indicates the number of anchor boxes, - and the value "4" refers to "x0", "x1", "y0", and "y1". + and the value "4" refers to "x0", "x1", "y0", and "y1". Data type must be float16. - **gt_boxes** (Tensor) - Ground truth boxes, tensor of shape (M, 4). "M" indicates the number of ground - truth boxes, and the value "4" refers to "x0", "x1", "y0", and "y1". + truth boxes, and the value "4" refers to "x0", "x1", "y0", and "y1". Data type must be float16. Outputs: - Tensor, the 'iou' values, tensor of shape (M, N). + Tensor, the 'iou' values, tensor of shape (M, N), with data type float16. Raises: KeyError: When `mode` is not 'iou' or 'iof'. Examples: >>> iou = P.IOU() - >>> anchor_boxes = Tensor(np.random.randint(1.0, 5.0, [3, 4]), mindspore.float32) - >>> gt_boxes = Tensor(np.random.randint(1.0, 5.0, [3, 4]), mindspore.float32) + >>> anchor_boxes = Tensor(np.random.randint(1.0, 5.0, [3, 4]), mindspore.float16) + >>> gt_boxes = Tensor(np.random.randint(1.0, 5.0, [3, 4]), mindspore.float16) >>> iou(anchor_boxes, gt_boxes) """ @@ -304,6 +305,46 @@ class MakeRefKey(Primitive): pass +class Partial(Primitive): + """ + Make a partial function instance, used for pynative mode. + + Inputs: + - **args** (Union[FunctionType, Tensor]) - The function and bind arguments. + + Outputs: + FunctionType, partial function binded with arguments. + """ + + @prim_attr_register + def __init__(self): + pass + + def __call__(self, *args): + func = args[0].__call__ + partial_func = functools.partial(func, *args[1:]) + return partial_func + +class Depend(Primitive): + """ + Depend is used for process side-effect operations. + + Inputs: + - **value** (Tensor) - the real value to return for depend operator. + - **expr** (Expression) - the expression to execute with no outputs. + + Outputs: + Tensor, the value passed by last operator. + """ + + @prim_attr_register + def __init__(self): + pass + + def __call__(self, value, expr): + return value + + class CheckBprop(PrimitiveWithInfer): """ Checks whether data type and shape of corresponding element from tuple x and y are the same. @@ -332,6 +373,8 @@ class CheckBprop(PrimitiveWithInfer): def infer_shape(self, xshapes, yshapes): tips = f'Bprop of {self.prim_to_check}' + validator.check_value_type('grads', xshapes, (tuple,), tips) + validator.check_value_type('params', yshapes, (tuple,), tips) if len(xshapes) < len(yshapes): raise TypeError(f"{tips}, the size of output should be {len(yshapes)}," f" but got {len(xshapes)}.") @@ -348,6 +391,8 @@ class CheckBprop(PrimitiveWithInfer): def infer_dtype(self, xdtypes, ydtypes): tips = f'Bprop of {self.prim_to_check}' + validator.check_value_type('grads', xdtypes, (tuple,), tips) + validator.check_value_type('params', ydtypes, (tuple,), tips) if len(xdtypes) < len(ydtypes): raise TypeError(f"{tips}, the size of output should be {len(ydtypes)}," f" but got {len(xdtypes)}.") @@ -366,3 +411,50 @@ class CheckBprop(PrimitiveWithInfer): raise TypeError(f"{tips}, the dtype of {i}th output should be {ydtype}," f" but got {xdtype}.") return xdtypes + + +class ConfusionMatrix(PrimitiveWithInfer): + r""" + Calculate the confusion matrix from labels and predictions. + + Args: + num_classes (int): The num of classes. + dtype (str): Data type of confusion matrix. Default: 'int32'. + + Inputs: + - **labels** (Tensor) - real labels, tensor of 1-D. the dtype must be non-negative Integer. + - **predictions** (Tensor) - the labels from prediction, tensor of 1-D. + the shape same as `labels` and the dtype must be non-negative Integer. + - **weights** (Tensor) - tensor of 1-D. the shape same as `predictions`. + + Outputs: + Tensor, the confusion matrix, with shape (`num_classes`, `num_classes`). + + Examples: + >>> confusion_matrix = P.ConfusionMatrix(4) + >>> labels = Tensor([0, 1, 1, 3], mindspore.int32) + >>> predictions = Tensor([1, 2, 1, 3], mindspore.int32) + >>> confusion_matrix(labels, predictions) + """ + + @prim_attr_register + def __init__(self, num_classes, dtype="int32"): + validator.check_value_type("num_classes", num_classes, [int], self.name) + validator.check_value_type("dtype", dtype, [str], self.name) + + def infer_shape(self, labels, predictions, weights=None): + validator.check('labels dimension', len(labels), '', 1, Rel.EQ, self.name) + validator.check('labels shape', labels, 'predictions shape', predictions, Rel.EQ, self.name) + if weights is not None: + validator.check('labels shape', labels, 'weights shape', weights, Rel.EQ, self.name) + ret = (self.num_classes, self.num_classes) + return ret + + def infer_dtype(self, labels, predictions, weights=None): + validator.check_subclass('labels', labels, mstype.tensor, self.name) + validator.check_subclass('predictions', predictions, mstype.tensor, self.name) + if weights is not None: + validator.check_subclass('weights', weights, mstype.tensor, self.name) + args = {"labels": labels, "predictions": predictions} + validator.check_tensor_type_same(args, (mstype.number_type), self.name) + return labels diff --git a/mindspore/ops/operations/random_ops.py b/mindspore/ops/operations/random_ops.py index 77201c25f9..cde7dd41e3 100644 --- a/mindspore/ops/operations/random_ops.py +++ b/mindspore/ops/operations/random_ops.py @@ -66,6 +66,49 @@ class RandomChoiceWithMask(PrimitiveWithInfer): return (mstype.int32, mstype.bool_) +class Normal(PrimitiveWithInfer): + """ + Generates random samples from a normal(Gaussian) distribution. + + Args: + seed (int): Random seed. Default: 0. + + Inputs: + - **shape** (tuple[int]) - The shape of output tensor. Only constant value is allowed. + - **mean** (Tensor) - The mean of the distribution, with float32 data type. + - **stddev** (Tensor) - The standard deviation of the distribution, with float32 data type. + + Outputs: + Tensor, with the given shape from the specific distribution and float32 data type. + + Examples: + >>> normal = P.Normal() + >>> mean = Tensor(0., mstype.float32) + >>> stddev = Tensor(1., mstype.float32) + >>> out = normal((32, 3, 3), mean, stddev) + """ + + @prim_attr_register + def __init__(self, seed=0): + """Init Normal""" + validator.check_value_type("seed", seed, [int], self.name) + + def __infer__(self, shape, mean, stddev): + shape_value = shape["value"] + if shape_value is None: + raise ValueError(f"For {self.name}, shape must be const.") + validator.check_value_type("shape", shape_value, [tuple], self.name) + for i, shape_i in enumerate(shape_value): + validator.check_integer("shape[%d]" % i, shape_i, 0, Rel.GE, self.name) + + validator.check_tensor_type_same({"mean": mean["dtype"]}, [mstype.float32], self.name) + validator.check_tensor_type_same({"stddev": stddev["dtype"]}, [mstype.float32], self.name) + + out = {"shape": shape_value, + "dtype": mstype.float32, + "value": None} + return out + class RandomCategorical(PrimitiveWithInfer): """ Generates random samples from a given categorical distribution tensor. diff --git a/mindspore/ops/operations/thor_ops.py b/mindspore/ops/operations/thor_ops.py index f84b5d1ffd..d2de0190a6 100644 --- a/mindspore/ops/operations/thor_ops.py +++ b/mindspore/ops/operations/thor_ops.py @@ -13,10 +13,9 @@ # limitations under the License. # ============================================================================ """thor_ops""" -from mindspore.ops import prim_attr_register, PrimitiveWithInfer -from mindspore.ops.composite import multitype_ops as C +from ..primitive import prim_attr_register, PrimitiveWithInfer +from ...common import dtype as mstype -import mindspore as ms __all__ = ["CusBatchMatMul", "CusCholeskyTrsm", @@ -58,11 +57,6 @@ class CusBatchMatMul(PrimitiveWithInfer): """init CusBatchMatMul""" self.init_prim_io_names(inputs=['x1', 'x2'], outputs=['y']) from mindspore.ops._op_impl._custom_op.batch_matmul_impl import CusBatchMatMul - def get_bprop(self): - def bprop(x1, x2, out, dout): - return (C.zeros_like(x1), C.zeros_like(x2)) - - return bprop def infer_shape(self, data1_shape, data2_shape): return data1_shape @@ -97,11 +91,6 @@ class CusCholeskyTrsm(PrimitiveWithInfer): self.init_prim_io_names(inputs=['x1'], outputs=['y']) from mindspore.ops._op_impl._custom_op.cholesky_trsm_impl import CusCholeskyTrsm - def get_bprop(self): - def bprop(x, out, dout): - return (C.zeros_like(x),) - return bprop - def infer_shape(self, data1_shape): ll = [] m, _ = data1_shape @@ -138,11 +127,6 @@ class CusFusedAbsMax1(PrimitiveWithInfer): self.init_prim_io_names(inputs=['x1'], outputs=['y']) self.origin_shape = origin_shape from mindspore.ops._op_impl._custom_op.fused_abs_max1_impl import CusFusedAbsMax1 - def get_bprop(self): - def bprop(x, out, dout): - return (C.zeros_like(x),) - - return bprop def infer_shape(self, data1_shape): ll = [] @@ -182,11 +166,6 @@ class CusImg2Col(PrimitiveWithInfer): self.dilates = dilates self.mode = mode from mindspore.ops._op_impl._custom_op.img2col_impl import CusImg2Col - def get_bprop(self): - def bprop(x, out, dout): - return (C.zeros_like(x),) - - return bprop def infer_shape(self, data1_shape): bs, c, h, w = data1_shape @@ -229,17 +208,12 @@ class CusMatMulCubeDenseLeft(PrimitiveWithInfer): """init CusMatMulCubeDenseLeft""" self.init_prim_io_names(inputs=['x1', 'x2'], outputs=['y']) from mindspore.ops._op_impl._custom_op.matmul_cube_dense_left_impl import CusMatMulCubeDenseLeft - def get_bprop(self): - def bprop(x1, x2, out, dout): - return (C.zeros_like(x1), C.zeros_like(x2)) - - return bprop def infer_shape(self, data1_shape, data2_shape): return data2_shape def infer_dtype(self, data1_dtype, data2_dtype): - return ms.common.dtype.tensor_type(getattr(ms, "float16")) + return mstype.float16 class CusMatMulCubeFraczRightMul(PrimitiveWithInfer): @@ -269,17 +243,12 @@ class CusMatMulCubeFraczRightMul(PrimitiveWithInfer): """init CusMatMulCubeFraczRightMul""" self.init_prim_io_names(inputs=['x1', 'x2', 'x3'], outputs=['y']) from mindspore.ops._op_impl._custom_op.matmul_cube_fracz_right_mul_impl import CusMatMulCubeFraczRightMul - def get_bprop(self): - def bprop(x1, x2, x3, out, dout): - return (C.zeros_like(x1), C.zeros_like(x2), C.zeros_like(x3)) - - return bprop def infer_shape(self, data1_shape, data2_shape, data3_shape): return data1_shape def infer_dtype(self, data1_dtype, data2_dtype, data3_dtype): - return ms.common.dtype.tensor_type(getattr(ms, "float32")) + return mstype.float32 class CusMatMulCube(PrimitiveWithInfer): @@ -315,11 +284,6 @@ class CusMatMulCube(PrimitiveWithInfer): self.transpose_a = transpose_a self.transpose_b = transpose_b from mindspore.ops._op_impl._custom_op.matmul_cube_impl import CusMatMulCube - def get_bprop(self): - def bprop(x1, x2, out, dout): - return (C.zeros_like(x1), C.zeros_like(x2)) - - return bprop def infer_shape(self, data1_shape, data2_shape): # shape = [1, data1_shape[1], data2_shape[2], 16, 16] @@ -337,7 +301,7 @@ class CusMatMulCube(PrimitiveWithInfer): return shape def infer_dtype(self, data1_dtype, data2_dtype): - return ms.common.dtype.tensor_type(getattr(ms, "float32")) + return mstype.float32 class CusMatrixCombine(PrimitiveWithInfer): @@ -362,11 +326,6 @@ class CusMatrixCombine(PrimitiveWithInfer): """init CusMatrixCombine""" self.init_prim_io_names(inputs=['x'], outputs=['y']) from mindspore.ops._op_impl._custom_op.matrix_combine_impl import CusMatrixCombine - def get_bprop(self): - def bprop(x, out, dout): - return (C.zeros_like(x),) - - return bprop def infer_shape(self, data_shape): a, b, c = data_shape @@ -446,17 +405,12 @@ class CusMatMulCubeDenseRight(PrimitiveWithInfer): """init CusMatMulCubeDenseRight""" self.init_prim_io_names(inputs=['x1', 'x2', 'x3'], outputs=['y']) from mindspore.ops._op_impl._custom_op.matmul_cube_dense_right_impl import CusMatMulCubeDenseRight - def get_bprop(self): - def bprop(x1, x2, x3, out, dout): - return (C.zeros_like(x1), C.zeros_like(x2), C.zeros_like(x3)) - - return bprop def infer_shape(self, data1_shape, data2_shape, data3_shape): return data1_shape def infer_dtype(self, data1_dtype, data2_dtype, data3_dtype): - return ms.common.dtype.tensor_type(getattr(ms, "float32")) + return mstype.float32 class CusMatMulCubeFraczLeftCast(PrimitiveWithInfer): @@ -486,14 +440,9 @@ class CusMatMulCubeFraczLeftCast(PrimitiveWithInfer): """init CusMatMulCubeFraczLeftCast""" self.init_prim_io_names(inputs=['x1', 'x2'], outputs=['y']) from mindspore.ops._op_impl._custom_op.matmul_cube_fracz_left_cast_impl import CusMatMulCubeFraczLeftCast - def get_bprop(self): - def bprop(x1, x2, out, dout): - return (C.zeros_like(x1), C.zeros_like(x2)) - - return bprop def infer_shape(self, data1_shape, data2_shape): return data2_shape def infer_dtype(self, data1_dtype, data2_dtype): - return ms.common.dtype.tensor_type(getattr(ms, "float16")) + return mstype.float16 diff --git a/mindspore/ops/primitive.py b/mindspore/ops/primitive.py index 95e148204b..7ceb687778 100644 --- a/mindspore/ops/primitive.py +++ b/mindspore/ops/primitive.py @@ -43,11 +43,12 @@ class Primitive(Primitive_): >>> # init a Primitive obj with attr1=1 and attr2=2 >>> add = Add(attr1=1, attr2=2) """ + _repr_ignore_list = ['input_names', 'output_names'] def __init__(self, name): self.name = name self.attrs = {} - self.init_attrs = {} + self.init_attrs = {"name": name} Primitive_.__init__(self, name, self) if hasattr(self.__class__, '__mindspore_signature__'): sig = self._fill_signature(self.__class__.__mindspore_signature__) @@ -140,9 +141,24 @@ class Primitive(Primitive_): return self.attrs[item] raise AttributeError(item) + def check_elim(self, *args): + """ + Check whether or not certain inputs should go into backend. Subclass in need should override this method. + + Args: + Same as arguments of current Primitive + + Returns: + A tuple of two elements, first element indicates whether or not we should filter out current arguments; + seconde element is the output in case where we should filter out the arguments. + """ + return (False, None) + def __call__(self, *args): - output = _run_op(self, self.name, args) - return output + should_elim, output = self.check_elim(*args) + if should_elim: + return output + return _run_op(self, self.name, args) def __getstate__(self): return self.__dict__ @@ -150,6 +166,16 @@ class Primitive(Primitive_): def __setstate__(self, d): self.__dict__.update(d) + def __deepcopy__(self, memo): + return type(self)(**self.init_attrs) + + def __repr__(self): + attr = ', '.join([f'{k}={self.attrs[k]}'for k in self.attrs if not k in Primitive._repr_ignore_list]) + info_str = f'Prim[{self.name}]' + if attr: + info_str += f'<{attr}>' + return info_str + def init_prim_io_names(self, inputs, outputs): """ Initializes inputs and outpus name of Tensor or attributes. @@ -170,8 +196,8 @@ class PrimitiveWithInfer(Primitive): There are four method can be overide to define the infer logic of the primitive: __infer__(), infer_shape(), infer_dtype(), and infer_value(). If __infer__() is defined in primitive, the __infer__() has highest priority - to be called. If __infer__() is not defined, infer_shape() and infer_dtype() can be defined to describle shape - and type infer logic. The infer_value() is used for constant propogation. + to be called. If __infer__() is not defined, infer_shape() and infer_dtype() can be defined to describe shape + and type infer logic. The infer_value() is used for constant propagation. Args: name (str): Name for current Primitive. @@ -273,6 +299,7 @@ def prim_attr_register(fn): bound_args.apply_defaults() arguments = bound_args.arguments del arguments['self'] + del self.init_attrs['name'] for name in arguments: value = arguments[name] self.add_prim_attr(name, value) @@ -284,7 +311,8 @@ def prim_attr_register(fn): def constexpr(fn=None, get_instance=True, name=None): """ - Makes a PrimitiveWithInfer operator, which infer the value while compiling. + Makes a PrimitiveWithInfer operator, which infer the value while compiling. We can define a function + to compute between constant variable and used in constructß. Args: fn (function): A `fn` use as the infer_value of the output operator. @@ -310,6 +338,7 @@ def constexpr(fn=None, get_instance=True, name=None): def __init__(self): op_name = name if name else fn.__name__ PrimitiveWithInfer.__init__(self, op_name) + self.const_value = True def infer_value(self, *args): return fn(*args) @@ -324,19 +353,7 @@ def constexpr(fn=None, get_instance=True, name=None): @_wrap_func def _run_op(obj, op_name, args): """Single op execution function supported by ge in PyNative mode.""" - op_mask = [0] * len(args) - op_inputs = [] - for i, arg in enumerate(args): - if hasattr(arg, '__parameter__'): - op_inputs.append(arg.default_input) - op_mask[i] = 1 - elif isinstance(arg, tuple): - convert = lambda x: x.default_input if hasattr(x, '__parameter__') else x - args_ = tuple(convert(x) for x in arg) - op_inputs.append(args_) - else: - op_inputs.append(arg) - output = real_run_op(obj, op_name, tuple(op_inputs), tuple(op_mask)) + output = real_run_op(obj, op_name, args) if not output: raise RuntimeError("Pynative run op %s failed!" % op_name) if len(output) == 1: diff --git a/mindspore/parallel/_auto_parallel_context.py b/mindspore/parallel/_auto_parallel_context.py index 21ef1d59f2..74250f12e5 100644 --- a/mindspore/parallel/_auto_parallel_context.py +++ b/mindspore/parallel/_auto_parallel_context.py @@ -185,13 +185,20 @@ class _AutoParallelContext: self.check_context_handle() return self._context_handle.get_parallel_mode() - def set_strategy_search_mode(self, strategy_search_mode): + def set_strategy_search_mode(self, auto_parallel_search_mode): + """ + Set search mode of strategy. + + Args: + auto_parallel_search_mode (str): The search mode of strategy. + """ self.check_context_handle() - ret = self._context_handle.set_strategy_search_mode(strategy_search_mode) + ret = self._context_handle.set_strategy_search_mode(auto_parallel_search_mode) if ret is False: - raise ValueError("Strategy search mode does not support {}".format(strategy_search_mode)) + raise ValueError("Strategy search mode does not support {}".format(auto_parallel_search_mode)) def get_strategy_search_mode(self): + """Get search mode of strategy.""" self.check_context_handle() return self._context_handle.get_strategy_search_mode() @@ -225,6 +232,21 @@ class _AutoParallelContext: self.check_context_handle() return self._context_handle.get_strategy_ckpt_load_file() + def set_full_batch(self, full_batch): + """ + Set whether load full batch on each device. + + Args: + full_batch (bool): True if load full batch on each device. + """ + self.check_context_handle() + self._context_handle.set_full_batch(full_batch) + + def get_full_batch(self): + """Get whether load full batch on each device.""" + self.check_context_handle() + return self._context_handle.get_full_batch() + def set_strategy_ckpt_save_file(self, strategy_ckpt_save_file): """ Set strategy checkpoint save path. @@ -407,9 +429,11 @@ _set_auto_parallel_context_func_map = { "cast_before_mirror": auto_parallel_context().set_cast_before_mirror, "loss_repeated_mean": auto_parallel_context().set_loss_repeated_mean, "parallel_mode": auto_parallel_context().set_parallel_mode, + "auto_parallel_search_mode": auto_parallel_context().set_strategy_search_mode, "parameter_broadcast": auto_parallel_context().set_parameter_broadcast, "strategy_ckpt_load_file": auto_parallel_context().set_strategy_ckpt_load_file, - "strategy_ckpt_save_file": auto_parallel_context().set_strategy_ckpt_save_file} + "strategy_ckpt_save_file": auto_parallel_context().set_strategy_ckpt_save_file, + "full_batch": auto_parallel_context().set_full_batch} _get_auto_parallel_context_func_map = { @@ -419,14 +443,17 @@ _get_auto_parallel_context_func_map = { "cast_before_mirror": auto_parallel_context().get_cast_before_mirror, "loss_repeated_mean": auto_parallel_context().get_loss_repeated_mean, "parallel_mode": auto_parallel_context().get_parallel_mode, + "auto_parallel_search_mode": auto_parallel_context().get_strategy_search_mode, "parameter_broadcast": auto_parallel_context().get_parameter_broadcast, "strategy_ckpt_load_file": auto_parallel_context().get_strategy_ckpt_load_file, - "strategy_ckpt_save_file": auto_parallel_context().get_strategy_ckpt_save_file} + "strategy_ckpt_save_file": auto_parallel_context().get_strategy_ckpt_save_file, + "full_batch": auto_parallel_context().get_full_batch} @args_type_check(device_num=int, global_rank=int, mirror_mean=bool, cast_before_mirror=bool, - loss_repeated_mean=bool, parallel_mode=str, parameter_broadcast=bool, - strategy_ckpt_load_file=str, strategy_ckpt_save_file=str) + loss_repeated_mean=bool, parallel_mode=str, auto_parallel_search_mode=str, + parameter_broadcast=bool, strategy_ckpt_load_file=str, + strategy_ckpt_save_file=str, full_batch=bool) def _set_auto_parallel_context(**kwargs): """ Set auto parallel context. @@ -454,11 +481,18 @@ def _set_auto_parallel_context(**kwargs): setting parallel strategies. - auto_parallel: Achieving parallelism automatically. + auto_parallel_search_mode (str): There are two kinds of search modes, "recursive_programming" + and "dynamic_programming". Default: "dynamic_programming". + + - recursive_programming: Recursive programming search mode. + + - dynamic_programming: Dynamic programming search mode. parameter_broadcast (bool): Indicating whether to broadcast parameters before training. "stand_alone", "semi_auto_parallel" and "auto_parallel" do not support parameter broadcast. Default: False. strategy_ckpt_load_file (str): The path to load parallel strategy checkpoint. Default: '' strategy_ckpt_save_file (str): The path to save parallel strategy checkpoint. Default: '' + full_batch (bool): Whether to load the whole batch on each device. Default: False. Raises: ValueError: If input key is not attribute in auto parallel context. diff --git a/mindspore/parallel/_tensor.py b/mindspore/parallel/_tensor.py index 073ad9809a..fca8b88920 100644 --- a/mindspore/parallel/_tensor.py +++ b/mindspore/parallel/_tensor.py @@ -168,21 +168,21 @@ def _chunk_tensor_by_strategy(np_tensor, strategy): raise ValueError("The length of np_tensor does not match the length of strategy!") return _chunk_tensor(np_tensor, strategy, len(strategy)) -def _get_seed(dev_mat, tensor_map): +def _get_slice_index(dev_mat, tensor_map): """ - Get the random seed for current slice. + Get the slice index for current slice. Args: dev_mat (list): The device matrix of devices. tensor_map (list): The split strategy of tensor. Returns: - Integer, the local random seed for this device. + Integer, the slice index for slice on this device. """ rank = get_rank() tensor_strategy = _get_tensor_strategy(dev_mat, tensor_map) - tensor_slice_seed = _get_tensor_slice_index(dev_mat, tensor_strategy, tensor_map, rank) - return tensor_slice_seed + tensor_slice_index = _get_tensor_slice_index(dev_mat, tensor_strategy, tensor_map, rank) + return tensor_slice_index def _load_tensor(tensor, dev_mat, tensor_map): """ diff --git a/mindspore/parallel/_utils.py b/mindspore/parallel/_utils.py index 3301c3c970..c5b4d57702 100644 --- a/mindspore/parallel/_utils.py +++ b/mindspore/parallel/_utils.py @@ -20,10 +20,26 @@ from mindspore.parallel._auto_parallel_context import auto_parallel_context def _get_parallel_mode(): + """Get parallel mode.""" return auto_parallel_context().get_parallel_mode() +def _get_full_batch(): + """Get whether to use full_batch.""" + return auto_parallel_context().get_full_batch() + + +def _need_to_full(): + """Check whether to convert input to full shape or tensor.""" + parallel_mode = _get_parallel_mode() + full_batch = _get_full_batch() + need = ((parallel_mode in ("semi_auto_parallel", "auto_parallel")) + and (not full_batch)) + return need + + def _get_mirror_mean(): + """Get if using mirror_mean.""" return auto_parallel_context().get_mirror_mean() diff --git a/mindspore/parallel/mpi/__init__.py b/mindspore/parallel/mpi/__init__.py new file mode 100644 index 0000000000..e30774307c --- /dev/null +++ b/mindspore/parallel/mpi/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ diff --git a/mindspore/parallel/mpi/_mpi_config.py b/mindspore/parallel/mpi/_mpi_config.py new file mode 100644 index 0000000000..e43305fb76 --- /dev/null +++ b/mindspore/parallel/mpi/_mpi_config.py @@ -0,0 +1,111 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +The MPI config, used to configure the MPI environment. +""" +import threading +from mindspore._c_expression import MpiConfig +from mindspore._checkparam import args_type_check + +class _MpiConfig: + """ + _MpiConfig is the config tool for controlling MPI + + Note: + Create a config through instantiating MpiConfig object is not recommended. + should use MpiConfig() to get the config since MpiConfig is singleton. + """ + _instance = None + _instance_lock = threading.Lock() + + def __init__(self): + self._mpiconfig_handle = MpiConfig.get_instance() + + def __new__(cls, *args, **kwargs): + if cls._instance is None: + cls._instance_lock.acquire() + cls._instance = object.__new__(cls) + cls._instance_lock.release() + return cls._instance + + def __getattribute__(self, attr): + value = object.__getattribute__(self, attr) + if attr == "_mpiconfig_handle" and value is None: + raise ValueError("mpiconfig handle is none in MpiConfig!!!") + return value + + @property + def enable_mpi(self): + return self._mpiconfig_handle.get_enable_mpi() + + @enable_mpi.setter + def enable_mpi(self, enable_mpi): + self._mpiconfig_handle.set_enable_mpi(enable_mpi) + +_k_mpi_config = None +def _mpi_config(): + """ + Get the global mpi config, if mpi config is not created, create a new one. + + Returns: + _MpiConfig, the global mpi config. + """ + global _k_mpi_config + if _k_mpi_config is None: + _k_mpi_config = _MpiConfig() + return _k_mpi_config + +@args_type_check(enable_mpi=bool) +def _set_mpi_config(**kwargs): + """ + Sets mpi config for running environment. + + mpi config should be configured before running your program. If there is no configuration, + mpi moudle will be disabled by default. + + Note: + Attribute name is required for setting attributes. + + Args: + enable_mpi (bool): Whether to enable mpi. Default: False. + + Raises: + ValueError: If input key is not an attribute in mpi config. + + Examples: + >>> mpiconfig.set_mpi_config(enable_mpi=True) + """ + for key, value in kwargs.items(): + if not hasattr(_mpi_config(), key): + raise ValueError("Set mpi config keyword %s is not recognized!" % key) + setattr(_mpi_config(), key, value) + + +def _get_mpi_config(attr_key): + """ + Gets mpi config attribute value according to the input key. + + Args: + attr_key (str): The key of the attribute. + + Returns: + Object, The value of given attribute key. + + Raises: + ValueError: If input key is not an attribute in context. + """ + if not hasattr(_mpi_config(), attr_key): + raise ValueError("Get context keyword %s is not recognized!" % attr_key) + return getattr(_mpi_config(), attr_key) diff --git a/mindspore/train/_utils.py b/mindspore/train/_utils.py index 7bc07b126e..85fd6fa189 100644 --- a/mindspore/train/_utils.py +++ b/mindspore/train/_utils.py @@ -14,14 +14,17 @@ # ============================================================================ """Train utility.""" import os +from collections.abc import Iterable + import numpy as np + from mindspore.common.tensor import Tensor -from mindspore.common.dtype import dtype_to_nptype +from mindspore.common.dtype import dtype_to_nptype, pytype_to_dtype from mindspore.common import dtype as mstype from mindspore import log as logger from mindspore.common.api import _executor -from mindspore.common.dtype import pytype_to_dtype +from .lineage_pb2 import DatasetGraph, TrainLineage, EvaluationLineage, UserDefinedInfo def _convert_type(types): """ @@ -64,8 +67,6 @@ def _exec_datagraph(exec_dataset, dataset_size, phase='dataset'): input_indexs, phase=phase) - # engine dataset to write data to tdt queue - exec_dataset.send() return exec_dataset @@ -157,8 +158,8 @@ def _to_full_tensor(elem, device_num, global_rank, scaling_sens=None): data = Tensor(data) if not isinstance(data, Tensor): raise ValueError("elements in tensors must be Tensor") - shape_ = data.shape() - type_ = data.dtype() + shape_ = data.shape + type_ = data.dtype new_shape = () batchsize_per_device = 1 for i, item in enumerate(shape_): @@ -196,3 +197,56 @@ def _to_full_shapes(shapes, device_num): new_shape += (item,) new_shapes.append(new_shape) return new_shapes + + +def _check_to_numpy(plugin, tensor): + """Check the tensor and return a numpy.ndarray.""" + np_value = tensor.asnumpy() + if plugin == 'scalar': + if np_value.size == 1: + return np_value + raise ValueError('The tensor holds more than one value, but the scalar plugin expects on value.') + if plugin == 'image': + if np_value.ndim == 4: + return np_value + raise ValueError('The tensor seems not to hold a valid image.') + if plugin in ('tensor', 'histogram'): + if np_value.ndim > 0: + return np_value + raise ValueError('The tensor should not be empty.') + return np_value + + +def _check_lineage_value(plugin, value): + """Check the lineage value.""" + def raises(plugin, prototype): + raise TypeError(f'Plugin {repr(plugin)} expects a {prototype.__name__} value.') + + if plugin == 'dataset_graph' and not isinstance(value, DatasetGraph): + raises(plugin, DatasetGraph) + + if plugin == 'eval_lineage' and not isinstance(value, EvaluationLineage): + raises(plugin, EvaluationLineage) + + if plugin == 'train_lineage' and not isinstance(value, TrainLineage): + raises(plugin, TrainLineage) + + if plugin == 'custom_lineage_data' and not isinstance(value, UserDefinedInfo): + raises(plugin, UserDefinedInfo) + + +def check_value_type(arg_name, arg_value, valid_types): + """Checks whether a value is instance of some types.""" + valid_types = tuple(valid_types) if isinstance(valid_types, Iterable) else (valid_types,) + is_valid = True + + # bool is subclass of int, so for a bool value, we need to extra check + if isinstance(arg_value, int) and isinstance(arg_value, bool) and bool not in valid_types: + is_valid = False + + if not isinstance(arg_value, valid_types): + is_valid = False + + if not is_valid: + raise TypeError(f'For `{arg_name}` the type should be a valid type of {[t.__name__ for t in valid_types]}, ' + f'bug got {type(arg_value).__name__}.') diff --git a/mindspore/train/amp.py b/mindspore/train/amp.py index da0626d6e8..a47b16d0e0 100644 --- a/mindspore/train/amp.py +++ b/mindspore/train/amp.py @@ -21,7 +21,6 @@ from .._checkparam import Rel from ..common import dtype as mstype from ..nn.wrap.cell_wrapper import _VirtualDatasetCell from ..ops import functional as F -from ..ops.composite.base import _mp_cast_helper from ..parallel._utils import _get_parallel_mode from .loss_scale_manager import DynamicLossScaleManager, LossScaleManager from .parallel_utils import ParallelMode @@ -66,7 +65,11 @@ _config_level = { "O2": { "keep_batchnorm_fp32": True, "cast_model_type": mstype.float16, - "loss_scale_manager": DynamicLossScaleManager()}} + "loss_scale_manager": DynamicLossScaleManager()}, + "O3": { + "keep_batchnorm_fp32": False, + "cast_model_type": mstype.float16, + "loss_scale_manager": None}} def _check_kwargs(key_words): @@ -98,7 +101,7 @@ def _add_loss_network(network, loss_fn, cast_model_type): def construct(self, data, label): out = self._backbone(data) - label = _mp_cast_helper(mstype.float32, label) + label = F.mixed_precision_cast(mstype.float32, label) return self._loss_fn(F.cast(out, mstype.float32), label) validator.check_value_type('loss_fn', loss_fn, nn.Cell, None) @@ -118,11 +121,14 @@ def build_train_network(network, optimizer, loss_fn=None, level='O0', **kwargs): loss_fn (Union[None, Cell]): Definition of the loss_fn. If None, the `network` should have the loss inside. Default: None. optimizer (Optimizer): Optimizer to update the Parameter. - level (str): Supports [O0, O2]. Default: "O0". + level (str): Supports [O0, O2, O3]. Default: "O0". - O0: Do not change. - O2: Cast network to float16, keep batchnorm and `loss_fn` (if set) run in float32, using dynamic loss scale. + - O3: Cast network to float16, with additional property 'keep_batchnorm_fp32=False'. + + O2 is recommended on GPU, O3 is recommended on Ascend. cast_model_type (:class:`mindspore.dtype`): Supports `mstype.float16` or `mstype.float32`. If set to `mstype.float16`, use `float16` mode to train. If set, overwrite the level setting. @@ -132,7 +138,7 @@ def build_train_network(network, optimizer, loss_fn=None, level='O0', **kwargs): """ validator.check_value_type('network', network, nn.Cell, None) validator.check_value_type('optimizer', optimizer, nn.Optimizer, None) - validator.check('level', level, "", ['O0', 'O2'], Rel.IN, None) + validator.check('level', level, "", ['O0', 'O2', 'O3'], Rel.IN, None) _check_kwargs(kwargs) config = dict(_config_level[level], **kwargs) config = edict(config) diff --git a/mindspore/train/callback/__init__.py b/mindspore/train/callback/__init__.py new file mode 100644 index 0000000000..6ef171cc87 --- /dev/null +++ b/mindspore/train/callback/__init__.py @@ -0,0 +1,31 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Callback related classes and functions.""" + +from ._callback import Callback +from ._callback import CallbackManager as _CallbackManager +from ._callback import InternalCallbackParam as _InternalCallbackParam +from ._callback import RunContext +from ._callback import checkpoint_cb_for_save_op as _checkpoint_cb_for_save_op +from ._callback import set_cur_net as _set_cur_net +from ._checkpoint import CheckpointConfig +from ._checkpoint import CheckpointManager as _CheckpointManager +from ._checkpoint import ModelCheckpoint +from ._loss_monitor import LossMonitor +from ._time_monitor import TimeMonitor +from ._summary_collector import SummaryCollector + +__all__ = ["Callback", "LossMonitor", "TimeMonitor", "ModelCheckpoint", + "SummaryCollector", "CheckpointConfig", "RunContext"] diff --git a/mindspore/train/callback/_callback.py b/mindspore/train/callback/_callback.py new file mode 100644 index 0000000000..c75e099693 --- /dev/null +++ b/mindspore/train/callback/_callback.py @@ -0,0 +1,269 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Callback related classes and functions.""" + +from contextlib import ExitStack + +from mindspore import log as logger +from mindspore.train.serialization import _fill_param_into_net +from mindspore.train.summary.summary_record import _cache_summary_tensor_data + +_cur_net = None + +def set_cur_net(net): + """ + Set current net for which we are using to save checkpoint. + + Args: + net (Cell): train network + """ + global _cur_net + _cur_net = net + + +def checkpoint_cb_for_save_op(parameter_list): + """ + The checkpoint callback function for MindSpore. + + Will be executed by checkpoint save op. + + Args: + parameter_list (list): Format is like [{"name",name},{"data",value}] and value type is Tensor. + + Returns: + bool, true: means save checkpoint success. + """ + if _cur_net is None: + logger.warning("_cur_net is None. parameters are not updated.") + return False + + logger.info("update parameters in the net.") + _fill_param_into_net(_cur_net, parameter_list) + set_cur_net(None) + return True + + +def summary_cb_for_save_op(summary_list): + """ + The summary callback function for MindSpore. + + Will be executed by summary op. + + Args: + summary_list (list): Format is like [{"name": tag_name, "data": tensor},...] and value is Scalar/Tensor. + + Returns: + bool, true: means save summary success. + """ + ret = _cache_summary_tensor_data(summary_list) + return ret + + +class Callback: + """ + Abstract base class used to build a callback class. Callbacks are context managers + which will be entered and exited when passing into the Model. + You can leverage this mechanism to init and release resources automatically. + + Callback function will execution some operating to the current step or epoch. + + Examples: + >>> class Print_info(Callback): + >>> def step_end(self, run_context): + >>> cb_params = run_context.original_args() + >>> print(cb_params.cur_epoch_num) + >>> print(cb_params.cur_step_num) + >>> + >>> print_cb = Print_info() + >>> model.train(epoch, dataset, callbacks=print_cb) + """ + + def __enter__(self): + """Return the enter target.""" + return self + + def __exit__(self, *err): + """Release resources here if have any.""" + + def begin(self, run_context): + """ + Called once before the network executing. + + Args: + run_context (RunContext): Include some information of the model. + """ + + def epoch_begin(self, run_context): + """ + Called before each epoch beginning. + + Args: + run_context (RunContext): Include some information of the model. + """ + + def epoch_end(self, run_context): + """ + Called after each epoch finished. + + Args: + run_context (RunContext): Include some information of the model. + """ + + def step_begin(self, run_context): + """ + Called before each epoch beginning. + + Args: + run_context (RunContext): Include some information of the model. + """ + + def step_end(self, run_context): + """ + Called after each step finished. + + Args: + run_context (RunContext): Include some information of the model. + """ + + def end(self, run_context): + """ + Called once after network training. + + Args: + run_context (RunContext): Include some information of the model. + """ + + +class CallbackManager(Callback): + """ + Sequential execution of callback functions. + + Execute Callback functions at certain points. + + Args: + callbacks (Optional[list[Callback], Callback]): None, callback, or callbacks list. + """ + + def __init__(self, callbacks): + self._callbacks, self._stack = [], None + if isinstance(callbacks, Callback): + self._callbacks.append(callbacks) + elif isinstance(callbacks, list): + for cb in callbacks: + if not isinstance(cb, Callback): + raise TypeError("The 'callbacks' contains not-a-Callback item.") + self._callbacks.append(cb) + elif callbacks is not None: + raise TypeError("The 'callbacks' is not a Callback or a list of Callback.") + + def __enter__(self): + if self._stack is None: + callbacks, self._stack = [], ExitStack().__enter__() + for callback in self._callbacks: + target = self._stack.enter_context(callback) + if not isinstance(target, Callback): + logger.warning("Please return 'self' or a Callback as the enter target.") + callbacks.append(callback) + else: + callbacks.append(target) + self._callbacks = callbacks + return self + + def __exit__(self, *err): + return self._stack.__exit__(*err) + + def begin(self, run_context): + """Called once before network training.""" + for cb in self._callbacks: + cb.begin(run_context) + + def epoch_begin(self, run_context): + """Called before each epoch begin.""" + for cb in self._callbacks: + cb.epoch_begin(run_context) + + def epoch_end(self, run_context): + """Called after each epoch finished.""" + for cb in self._callbacks: + cb.epoch_end(run_context) + + def step_begin(self, run_context): + """Called before each epoch begin.""" + for cb in self._callbacks: + cb.step_begin(run_context) + + def step_end(self, run_context): + """Called after each step finished.""" + for cb in self._callbacks: + cb.step_end(run_context) + + def end(self, run_context): + """Called once after network training.""" + for cb in self._callbacks: + cb.end(run_context) + + +class InternalCallbackParam(dict): + """Internal callback object's parameters.""" + + def __getattr__(self, key): + return self[key] + + def __setattr__(self, key, value): + self[key] = value + + +class RunContext: + """ + Provides information about the model. + + Run call being made. Provides information about original request to model function. + callback objects can stop the loop by calling request_stop() of run_context. + + Args: + original_args (dict): Holding the related information of model etc. + """ + def __init__(self, original_args): + if not isinstance(original_args, dict): + raise TypeError("The arg of RunContext should be dict type.") + self._original_args = original_args + self._stop_requested = False + + def original_args(self): + """ + Get the _original_args object. + + Returns: + Dict, a object holding the original arguments of model. + """ + return self._original_args + + def request_stop(self): + """ + Sets stop requested during training. + + Callbacks can use this function to request stop of iterations. + model.train() checks whether this is called or not. + """ + self._stop_requested = True + + def get_stop_requested(self): + """ + Returns whether a stop is requested or not. + + Returns: + bool, if true, model.train() stops iterations. + """ + return self._stop_requested diff --git a/mindspore/train/callback.py b/mindspore/train/callback/_checkpoint.py similarity index 59% rename from mindspore/train/callback.py rename to mindspore/train/callback/_checkpoint.py index e691cfd837..d185377c83 100644 --- a/mindspore/train/callback.py +++ b/mindspore/train/callback/_checkpoint.py @@ -12,95 +12,25 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ -"""Callback related classes and functions.""" +"""Checkpoint related classes and functions.""" import os -import stat import shutil +import stat import time -import numpy as np import mindspore.context as context -from mindspore.train.serialization import _exec_save_checkpoint, _fill_param_into_net, _save_graph -from mindspore.train._utils import _make_directory from mindspore import log as logger -from mindspore._checkparam import check_int_non_negative, check_bool -from mindspore.common.tensor import Tensor -from .summary.summary_record import _cache_summary_tensor_data - - -__all__ = ["Callback", "LossMonitor", "TimeMonitor", "ModelCheckpoint", "SummaryStep", "CheckpointConfig", "RunContext"] +from mindspore._checkparam import check_bool, check_int_non_negative +from mindspore.train._utils import _make_directory +from mindspore.train.serialization import _exec_save_checkpoint, _save_graph +from ._callback import Callback, set_cur_net _cur_dir = os.getcwd() -_cur_net = None _save_dir = _cur_dir -class _CheckpointManager: - """Manage checkpoint files according to train_config of checkpoint.""" - def __init__(self): - self._ckpoint_filelist = [] - - @property - def ckpoint_filelist(self): - """Get all the related checkpoint files managed here.""" - return self._ckpoint_filelist - - @property - def ckpoint_num(self): - """Get the number of the related checkpoint files managed here.""" - return len(self._ckpoint_filelist) - - def update_ckpoint_filelist(self, directory, prefix): - """Update the checkpoint file list.""" - self._ckpoint_filelist = [] - files = os.listdir(directory) - for filename in files: - if os.path.splitext(filename)[-1] == ".ckpt" and filename.startswith(prefix): - mid_name = filename[len(prefix):-5] - flag = True - for char in mid_name: - if char.isalpha(): - flag = False - if flag: - self._ckpoint_filelist.append(directory + '/' + filename) - - def remove_ckpoint_file(self, file_name): - """Remove the specified checkpoint file from this checkpoint manager and also from the directory.""" - try: - os.chmod(file_name, stat.S_IWRITE) - os.remove(file_name) - self._ckpoint_filelist.remove(file_name) - except OSError: - logger.warning("OSError, failed to remove the older ckpt file %s.", file_name) - except ValueError: - logger.warning("ValueError, failed to remove the older ckpt file %s.", file_name) - - def remove_oldest_ckpoint_file(self): - """Remove the oldest checkpoint file from this checkpoint manager and also from the directory.""" - ckpoint_files = sorted(self._ckpoint_filelist, key=os.path.getmtime) - self.remove_ckpoint_file(ckpoint_files[0]) - - def keep_one_ckpoint_per_minutes(self, minutes, cur_time): - """Only keep the latest one ckpt file per minutes, remove other files generated in [last_time, cur_time].""" - movs = [] - oldest_file = '' - oldest_time = cur_time - for ck_file in self._ckpoint_filelist: - modify_time = os.path.getmtime(ck_file) - if cur_time - modify_time < 60 * minutes: - movs.append(ck_file) - - if modify_time < oldest_time: - oldest_time = modify_time - oldest_file = ck_file - - for mv_file in movs: - if mv_file == oldest_file: - continue - self.remove_ckpoint_file(mv_file) - def _check_file_name_prefix(file_name_prefix): """ @@ -236,278 +166,6 @@ class CheckpointConfig: return checkpoint_policy -def _set_cur_net(net): - """ - Set current net for which we are using to save checkpoint. - - Args: - net (Cell): train network - """ - global _cur_net - _cur_net = net - - -def _checkpoint_cb_for_save_op(parameter_list): - """ - The checkpoint callback function for MindSpore. - - Will be executed by checkpoint save op. - - Args: - parameter_list (list): Format is like [{"name",name},{"data",value}] and value type is Tensor. - - Returns: - bool, true: means save checkpoint success. - """ - if _cur_net is None: - logger.warning("_cur_net is None. parameters are not updated.") - return False - - logger.info("update parameters in the net.") - _fill_param_into_net(_cur_net, parameter_list) - _set_cur_net(None) - return True - - -def _summary_cb_for_save_op(summary_list): - """ - The summary callback function for MindSpore. - - Will be executed by summary op. - - Args: - summary_list (list): Format is like [{"name": tag_name, "data": tensor},...] and value is Scalar/Tensor. - - Returns: - bool, true: means save summary success. - """ - ret = _cache_summary_tensor_data(summary_list) - return ret - - -def _build_callbacks(callbacks): - """ - Contain a list of callback. - - Args: - callbacks (list): Callback functions list, Support None, a single Callback object, or a list. - - Returns: - List, a list of callback functions. - """ - if callbacks: - if isinstance(callbacks, tuple): - raise TypeError("Callbacks cannot be a tuple. Please check it.") - if not isinstance(callbacks, list): - callbacks = [callbacks] - else: - callbacks = [] - - excute_callbacks = [] - for cb in callbacks: - if cb is None or not isinstance(cb, Callback): - raise TypeError("Callback must inheriting base class Callback. Some callback is Wrong. Please check it.") - excute_callbacks.append(cb) - - return _ListCallback(excute_callbacks) - - -class _ListCallback: - """ - Sequential execution of callback functions. - - Execute Callback functions at certain points. - - Args: - callbacks (list): Callback functions list. - """ - def __init__(self, callbacks): - super(_ListCallback, self).__init__() - self._callbacks = callbacks - - def begin(self, run_context): - """Called once before network training.""" - for cb in self._callbacks: - cb.begin(run_context) - - def epoch_begin(self, run_context): - """Called before each epoch begin.""" - for cb in self._callbacks: - cb.epoch_begin(run_context) - - def epoch_end(self, run_context): - """Called after each epoch finished.""" - for cb in self._callbacks: - cb.epoch_end(run_context) - - def step_begin(self, run_context): - """Called before each epoch begin.""" - for cb in self._callbacks: - cb.step_begin(run_context) - - def step_end(self, run_context): - """Called after each step finished.""" - for cb in self._callbacks: - cb.step_end(run_context) - - def end(self, run_context): - """Called once after network training.""" - for cb in self._callbacks: - cb.end(run_context) - - -class Callback: - """ - Abstract base class used to build a callback function. - - Callback function will execution some operating to the current step or epoch. - - Examples: - >>> class Print_info(Callback): - >>> def step_end(self, run_context): - >>> cb_params = run_context.original_args() - >>> print(cb_params.cur_epoch_num) - >>> print(cb_params.cur_step_num) - >>> - >>> print_cb = Print_info() - >>> model.train(epoch, dataset, callbacks=print_cb) - """ - def __init__(self): - pass - - def begin(self, run_context): - """ - Called once before the network executing. - - Args: - run_context (RunContext): Include some information of the model. - """ - - def epoch_begin(self, run_context): - """ - Called before each epoch beginning. - - Args: - run_context (RunContext): Include some information of the model. - """ - - def epoch_end(self, run_context): - """ - Called after each epoch finished. - - Args: - run_context (RunContext): Include some information of the model. - """ - - def step_begin(self, run_context): - """ - Called before each epoch beginning. - - Args: - run_context (RunContext): Include some information of the model. - """ - - def step_end(self, run_context): - """ - Called after each step finished. - - Args: - run_context (RunContext): Include some information of the model. - """ - - def end(self, run_context): - """ - Called once after network training. - - Args: - run_context (RunContext): Include some information of the model. - """ - - -class SummaryStep(Callback): - """ - The summary callback class. - - Args: - summary (Object): Summary recode object. - flush_step (int): Number of interval steps to execute. Default: 10. - """ - def __init__(self, summary, flush_step=10): - super(SummaryStep, self).__init__() - if not isinstance(flush_step, int) or isinstance(flush_step, bool) or flush_step <= 0: - raise ValueError("`flush_step` should be int and greater than 0") - self._summary = summary - self._flush_step = flush_step - - def step_end(self, run_context): - """ - Save summary. - - Args: - run_context (RunContext): Context of the train running. - """ - cb_params = run_context.original_args() - if cb_params.cur_step_num % self._flush_step == 0: - self._summary.record(cb_params.cur_step_num, cb_params.train_network) - - @property - def summary_file_name(self): - return self._summary.full_file_name - - -class _InternalCallbackParam(dict): - """Internal callback object's parameters.""" - - def __getattr__(self, key): - return self[key] - - def __setattr__(self, key, value): - self[key] = value - - -class RunContext: - """ - Provides information about the model. - - Run call being made. Provides information about original request to model function. - callback objects can stop the loop by calling request_stop() of run_context. - - Args: - original_args (dict): Holding the related information of model etc. - """ - def __init__(self, original_args): - if not isinstance(original_args, dict): - raise TypeError("The arg of RunContext should be dict type.") - self._original_args = original_args - self._stop_requested = False - - def original_args(self): - """ - Get the _original_args object. - - Returns: - Dict, a object holding the original arguments of model. - """ - return self._original_args - - def request_stop(self): - """ - Sets stop requested during training. - - Callbacks can use this function to request stop of iterations. - model.train() checks whether this is called or not. - """ - self._stop_requested = True - - def get_stop_requested(self): - """ - Returns whether a stop is requested or not. - - Returns: - bool, if true, model.train() stops iterations. - """ - return self._stop_requested - class ModelCheckpoint(Callback): """ @@ -551,7 +209,7 @@ class ModelCheckpoint(Callback): self._config = config # get existing checkpoint files - self._manager = _CheckpointManager() + self._manager = CheckpointManager() self._prefix = _chg_ckpt_file_name_if_same_exist(self._directory, self._prefix) self._graph_saved = False @@ -631,7 +289,7 @@ class ModelCheckpoint(Callback): self._last_triggered_step = cb_params.cur_step_num if context.get_context("enable_ge"): - _set_cur_net(cb_params.train_network) + set_cur_net(cb_params.train_network) cb_params.train_network.exec_checkpoint_graph() _exec_save_checkpoint(cb_params.train_network, gen_file, self._config.integrated_save) @@ -646,57 +304,66 @@ class ModelCheckpoint(Callback): return self._latest_ckpt_file_name -class LossMonitor(Callback): - """ - Monitor the loss in training. - - If the loss is NAN or INF, it will terminate training. - - Note: - If per_print_times is 0 do not print loss. - - Args: - per_print_times (int): Print loss every times. Default: 1. - - Raises: - ValueError: If print_step is not int or less than zero. - """ - def __init__(self, per_print_times=1): - super(LossMonitor, self).__init__() - if not isinstance(per_print_times, int) or per_print_times < 0: - raise ValueError("print_step must be int and >= 0.") - self._per_print_times = per_print_times - - def step_end(self, run_context): - cb_params = run_context.original_args() - loss = cb_params.net_outputs +class CheckpointManager: + """Manage checkpoint files according to train_config of checkpoint.""" + def __init__(self): + self._ckpoint_filelist = [] - if isinstance(loss, (tuple, list)): - if isinstance(loss[0], Tensor) and isinstance(loss[0].asnumpy(), np.ndarray): - loss = loss[0] + @property + def ckpoint_filelist(self): + """Get all the related checkpoint files managed here.""" + return self._ckpoint_filelist - if isinstance(loss, Tensor) and isinstance(loss.asnumpy(), np.ndarray): - loss = np.mean(loss.asnumpy()) + @property + def ckpoint_num(self): + """Get the number of the related checkpoint files managed here.""" + return len(self._ckpoint_filelist) - cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1 + def update_ckpoint_filelist(self, directory, prefix): + """Update the checkpoint file list.""" + self._ckpoint_filelist = [] + files = os.listdir(directory) + for filename in files: + if os.path.splitext(filename)[-1] == ".ckpt" and filename.startswith(prefix): + mid_name = filename[len(prefix):-5] + flag = True + for char in mid_name: + if char.isalpha(): + flag = False + if flag: + self._ckpoint_filelist.append(directory + '/' + filename) - if isinstance(loss, float) and (np.isnan(loss) or np.isinf(loss)): - raise ValueError("epoch: {} step: {}. Invalid loss, terminating training." - .format(cb_params.cur_epoch_num, cur_step_in_epoch)) - if self._per_print_times != 0 and cb_params.cur_step_num % self._per_print_times == 0: - print("epoch: %s step: %s, loss is %s" % (cb_params.cur_epoch_num, cur_step_in_epoch, loss), flush=True) + def remove_ckpoint_file(self, file_name): + """Remove the specified checkpoint file from this checkpoint manager and also from the directory.""" + try: + os.chmod(file_name, stat.S_IWRITE) + os.remove(file_name) + self._ckpoint_filelist.remove(file_name) + except OSError: + logger.warning("OSError, failed to remove the older ckpt file %s.", file_name) + except ValueError: + logger.warning("ValueError, failed to remove the older ckpt file %s.", file_name) + def remove_oldest_ckpoint_file(self): + """Remove the oldest checkpoint file from this checkpoint manager and also from the directory.""" + ckpoint_files = sorted(self._ckpoint_filelist, key=os.path.getmtime) + self.remove_ckpoint_file(ckpoint_files[0]) -class TimeMonitor(Callback): - """Time Monitor.""" - def __init__(self, data_size): - super(TimeMonitor, self).__init__() - self.data_size = data_size + def keep_one_ckpoint_per_minutes(self, minutes, cur_time): + """Only keep the latest one ckpt file per minutes, remove other files generated in [last_time, cur_time].""" + movs = [] + oldest_file = '' + oldest_time = cur_time + for ck_file in self._ckpoint_filelist: + modify_time = os.path.getmtime(ck_file) + if cur_time - modify_time < 60 * minutes: + movs.append(ck_file) - def epoch_begin(self, run_context): - self.epoch_time = time.time() + if modify_time < oldest_time: + oldest_time = modify_time + oldest_file = ck_file - def epoch_end(self, run_context): - epoch_mseconds = (time.time() - self.epoch_time) * 1000 - per_step_mseconds = epoch_mseconds / self.data_size - print("epoch time: {0}, per step time: {1}".format(epoch_mseconds, per_step_mseconds), flush=True) + for mv_file in movs: + if mv_file == oldest_file: + continue + self.remove_ckpoint_file(mv_file) diff --git a/mindspore/train/callback/_dataset_graph.py b/mindspore/train/callback/_dataset_graph.py new file mode 100644 index 0000000000..e8c8dcb2ba --- /dev/null +++ b/mindspore/train/callback/_dataset_graph.py @@ -0,0 +1,128 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Define dataset graph related operations.""" +import json +from importlib import import_module + +from mindspore.train import lineage_pb2 + + +class DatasetGraph: + """Handle the data graph and packages it into binary data.""" + def package_dataset_graph(self, dataset): + """ + packages dataset graph into binary data + + Args: + dataset (MindData): refer to MindDataset + + Returns: + DatasetGraph, a object of lineage_pb2.DatasetGraph. + """ + dataset_package = import_module('mindspore.dataset') + dataset_dict = dataset_package.serialize(dataset) + json_str = json.dumps(dataset_dict, indent=2) + dataset_dict = json.loads(json_str) + dataset_graph_proto = lineage_pb2.DatasetGraph() + if "children" in dataset_dict: + children = dataset_dict.pop("children") + if children: + self._package_children(children=children, message=dataset_graph_proto) + self._package_current_dataset(operation=dataset_dict, message=dataset_graph_proto) + return dataset_graph_proto + + def _package_children(self, children, message): + """ + Package children in dataset operation. + + Args: + children (list[dict]): Child operations. + message (DatasetGraph): Children proto message. + """ + for child in children: + if child: + child_graph_message = getattr(message, "children").add() + grandson = child.pop("children") + if grandson: + self._package_children(children=grandson, message=child_graph_message) + # package other parameters + self._package_current_dataset(operation=child, message=child_graph_message) + + def _package_current_dataset(self, operation, message): + """ + Package operation parameters in event message. + + Args: + operation (dict): Operation dict. + message (Operation): Operation proto message. + """ + for key, value in operation.items(): + if value and key == "operations": + for operator in value: + self._package_enhancement_operation( + operator, + message.operations.add() + ) + elif value and key == "sampler": + self._package_enhancement_operation( + value, + message.sampler + ) + else: + self._package_parameter(key, value, message.parameter) + + def _package_enhancement_operation(self, operation, message): + """ + Package enhancement operation in MapDataset. + + Args: + operation (dict): Enhancement operation. + message (Operation): Enhancement operation proto message. + """ + for key, value in operation.items(): + if isinstance(value, list): + if all(isinstance(ele, int) for ele in value): + message.size.extend(value) + else: + message.weights.extend(value) + else: + self._package_parameter(key, value, message.operationParam) + + @staticmethod + def _package_parameter(key, value, message): + """ + Package parameters in operation. + + Args: + key (str): Operation name. + value (Union[str, bool, int, float, list, None]): Operation args. + message (OperationParameter): Operation proto message. + """ + if isinstance(value, str): + message.mapStr[key] = value + elif isinstance(value, bool): + message.mapBool[key] = value + elif isinstance(value, int): + message.mapInt[key] = value + elif isinstance(value, float): + message.mapDouble[key] = value + elif isinstance(value, list) and key != "operations": + if value: + replace_value_list = list(map(lambda x: "" if x is None else x, value)) + message.mapStrList[key].strValue.extend(replace_value_list) + elif value is None: + message.mapStr[key] = "None" + else: + raise ValueError(f"Parameter {key} is not supported in event package.") diff --git a/mindspore/train/callback/_loss_monitor.py b/mindspore/train/callback/_loss_monitor.py new file mode 100644 index 0000000000..3c1da218c2 --- /dev/null +++ b/mindspore/train/callback/_loss_monitor.py @@ -0,0 +1,93 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""LossMonitor Callback class.""" + +import time +import numpy as np +from mindspore.common.tensor import Tensor + +from ._callback import Callback + + +class LossMonitor(Callback): + """ + Monitor the loss in training. + + If the loss is NAN or INF, it will terminate training. + + Note: + If per_print_times is 0 do not print loss. + + Args: + per_print_times (int): Print loss every times. Default: 1. + lr_init (numpy array): train learning rate. Default: None. + + Raises: + ValueError: If print_step is not int or less than zero. + + Examples: + >>> LossMonitor(100, lr_init=Tensor([0.05]*100).asnumpy()) + """ + + def __init__(self, per_print_times=1, lr_init=None): + super(LossMonitor, self).__init__() + if not isinstance(per_print_times, int) or per_print_times < 0: + raise ValueError("print_step must be int and >= 0.") + self._per_print_times = per_print_times + self.lr_init = lr_init + + def epoch_begin(self, run_context): + self.losses = [] + self.epoch_time = time.time() + + def epoch_end(self, run_context): + cb_params = run_context.original_args() + epoch_mseconds = (time.time() - self.epoch_time) * 1000 + per_step_mseconds = epoch_mseconds / cb_params.batch_num + print("Epoch time: {:5.3f}, per step time: {:5.3f}, " + "avg loss: {:5.3f}".format(epoch_mseconds, + per_step_mseconds, + np.mean(self.losses))) + print("*" * 60) + + def step_begin(self, run_context): + self.step_time = time.time() + + def step_end(self, run_context): + cb_params = run_context.original_args() + step_mseconds = (time.time() - self.step_time) * 1000 + step_loss = cb_params.net_outputs + + if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor): + step_loss = step_loss[0] + if isinstance(step_loss, Tensor): + step_loss = np.mean(step_loss.asnumpy()) + + self.losses.append(step_loss) + cur_step_in_epoch = int((cb_params.cur_step_num - 1) % cb_params.batch_num) + + if isinstance(step_loss, float) and (np.isnan(step_loss) or np.isinf(step_loss)): + raise ValueError("Epoch: [{:3d}/{:3d}], step: [{:5d}/{:5d}]. " + "Invalid loss, terminating training.".format( + cb_params.cur_epoch_num - 1, cb_params.epoch_num, + cur_step_in_epoch, cb_params.batch_num)) + + if self._per_print_times != 0 and cb_params.cur_step_num % self._per_print_times == 0: + print("Epoch: [{:3d}/{:3d}], step: [{:5d}/{:5d}], " + "loss: [{:5.4f}/{:5.4f}], time: [{:5.4f}]".format( + cb_params.cur_epoch_num - 1, cb_params.epoch_num, + cur_step_in_epoch, int(cb_params.batch_num), + step_loss, np.mean(self.losses), + step_mseconds), flush=True) diff --git a/mindspore/train/callback/_summary_collector.py b/mindspore/train/callback/_summary_collector.py new file mode 100644 index 0000000000..e2e4a9cc2d --- /dev/null +++ b/mindspore/train/callback/_summary_collector.py @@ -0,0 +1,786 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Summary collector callback.""" + +import os +import re +import json + +from importlib import import_module + +import numpy as np + +from mindspore import log as logger +from mindspore.common.tensor import Tensor +from mindspore.common.parameter import Parameter +from mindspore.train.summary.summary_record import SummaryRecord +from mindspore.train.summary.enum import PluginEnum, ModeEnum +from mindspore.train.callback import Callback, ModelCheckpoint +from mindspore.train import lineage_pb2 +from mindspore.train.callback._dataset_graph import DatasetGraph +from mindspore.nn.optim.optimizer import Optimizer +from mindspore.nn.loss.loss import _Loss +from mindspore.train._utils import check_value_type + + +class LineageMetadata: + """Initialize parameters used in model lineage management.""" + train_dataset_path = 'train_dataset_path' + valid_dataset_path = 'valid_dataset_path' + train_network = 'train_network' + loss_function = 'loss_function' + loss = 'loss' + optimizer = 'optimizer' + learning_rate = 'learning_rate' + epoch = 'epoch' + step_num = 'step_num' + parallel_mode = 'parallel_mode' + device_num = 'device_num' + batch_size = 'batch_size' + model_path = 'model_path' + model_ckpt = 'model_ckpt' + model_size = 'model_size' + metrics = 'metrics' + train_dataset_size = 'train_dataset_size' + valid_dataset_size = 'valid_dataset_size' + + +class SummaryCollector(Callback): + """ + SummaryCollector can help you to collect some common information. + + It can help you to collect loss, learning late, computational graph and so on. + SummaryCollector also persists data collected by the summary operator into a summary file. + + Note: + 1. Multiple SummaryCollector instances in callback list are not allowed. + 2. Not all information is collected at the training phase or at the eval phase. + 3. SummaryCollector always record the data collected by the summary operator. + + Args: + summary_dir (str): The collected data will be persisted to this directory. + If the directory does not exist, it will be created automatically. + collect_freq (int): Set the frequency of data collection, it should be greater then zero, + and the unit is `step`. Default: 10. + It is important to note that if the data sink mode is used, the unit will become the `epoch`. + It is not recommended to collect data too frequently, which can affect performance. + collect_specified_data (Union[None, dict]): Perform custom operations on the collected data. Default: None. + By default, if set to None, all data is collected as the default behavior. + If you want to customize the data collected, you can do so with a dictionary. + Examples,you can set {'collect_metric': False} to control not collecting metrics. + The data that supports control is shown below. + + - collect_metric: Whether to collect training metrics, currently only loss is collected. + Optional: True/False. Default: True. + - collect_graph: Whether to collect computational graph, currently only + training computational graph is collected. Optional: True/False. Default: True. + - collect_train_lineage: Whether to collect lineage data for the training phase, + this field will be displayed on the lineage page of Mindinsight. Optional: True/False. Default: True. + - collect_eval_lineage: Whether to collect lineage data for the eval phase, + this field will be displayed on the lineage page of Mindinsight. Optional: True/False. Default: True. + - collect_input_data: Whether to collect dataset for each training. Currently only image data is supported. + Optional: True/False. Default: True. + - collect_dataset_graph: Whether to collect dataset graph for the training phase. + Optional: True/False. Default: True. + - histogram_regular: Collect weight and bias for parameter distribution page display in MindInsight. + This field allows regular strings to control which parameters to collect. + Default: None, it means only the first five parameters are collected. + It is not recommended to collect too many parameters at once, as it can affect performance. + Note that if you collect too many parameters and run out of memory, the training will fail. + keep_default_action (bool): This field affects the collection behavior of the 'collect_specified_data' field. + Optional: True/False, Default: True. + True: means that after specified data is set, non-specified data is collected as the default behavior. + False: means that after specified data is set, only the specified data is collected, + and the others are not collected. + custom_lineage_data (Union[dict, None]): Allows you to customize the data and present it on the MingInsight + lineage page. In the custom data, the key type support str, and the value type support str/int/float. + Default: None, it means there is no custom data. + + Raises: + ValueError: If the parameter value is not expected. + TypeError: If the parameter type is not expected. + RuntimeError: If an error occurs during data collection. + + Examples: + >>> # Simple usage: + >>> summary_collector = SummaryCollector(summary_dir='./summary_dir') + >>> model.train(epoch, dataset, callbacks=summary_collector) + >>> + >>> # Do not collect metric and collect the first layer parameter, others are collected by default + >>> specified={'collect_metric': False, 'histogram_regular': '^conv1.*'} + >>> summary_collector = SummaryCollector(summary_dir='./summary_dir', collect_specified_data=specified) + >>> model.train(epoch, dataset, callbacks=summary_collector) + >>> + >>> # Only collect metric, custom lineage data and record data that collected by the summary operator, + >>> # others are not collected + >>> specified = {'collect_metric':True, 'custom_lineage_data': {'version': 'resnet50_v1'}} + >>> summary_collector = SummaryCollector('./summary_dir', + >>> collect_specified_data=specified, + >>> keep_default_action=False) + >>> model.train(epoch, dataset, callbacks=summary_collector) + """ + + _DEFAULT_SPECIFIED_DATA = { + 'collect_metric': True, + 'collect_graph': True, + 'collect_train_lineage': True, + 'collect_eval_lineage': True, + 'collect_input_data': True, + 'collect_dataset_graph': True, + 'histogram_regular': None + } + + # _OPTIMIZER_FAILED means find optimizer failed, so we will not collect data about optimizer. + _OPTIMIZER_FAILED = 'Failed' + + def __init__(self, summary_dir, collect_freq=10, collect_specified_data=None, + keep_default_action=True, custom_lineage_data=None): + super(SummaryCollector, self).__init__() + + self._summary_dir = self._process_summary_dir(summary_dir) + self._record = None + + self._check_collect_freq(collect_freq) + self._collect_freq = collect_freq + + self._check_action(keep_default_action) + + self._collect_specified_data = self._process_specified_data(collect_specified_data, keep_default_action) + logger.info(f"For `collect_specified_data` the value after processing is: {self._collect_specified_data}.") + + self._check_custom_lineage_data(custom_lineage_data) + self._custom_lineage_data = custom_lineage_data + + self._optimizer = None + self._has_saved_train_network = False + self._has_saved_custom_data = False + self._is_parse_loss_success = True + + def __enter__(self): + self._record = SummaryRecord(log_dir=self._summary_dir) + return self + + def __exit__(self, *err): + self._record.close() + + @staticmethod + def _process_summary_dir(summary_dir): + """Check the summary dir, and create a new directory if it not exists.""" + check_value_type('summary_dir', summary_dir, str) + summary_dir = summary_dir.strip() + if not summary_dir: + raise ValueError('For `summary_dir` the value should be a valid string of path, but got empty string.') + + summary_dir = os.path.realpath(summary_dir) + if not os.path.exists(summary_dir): + os.makedirs(summary_dir, exist_ok=True) + else: + if not os.path.isdir(summary_dir): + raise NotADirectoryError('For `summary_dir` it should be a directory path.') + + return summary_dir + + @staticmethod + def _check_collect_freq(freq): + """Check collect freq type and value.""" + check_value_type('collect_freq', freq, int) + if freq <= 0: + raise ValueError(f'For `collect_freq` the value should be greater than 0, but got `{freq}`.') + + @staticmethod + def _check_custom_lineage_data(custom_lineage_data): + """ + Check user custom lineage data. + + Args: + custom_lineage_data (dict): The user custom defined data. + + Raises: + TypeError: If the type of parameters is invalid. + """ + if custom_lineage_data is None: + return + + check_value_type('custom_lineage_data', custom_lineage_data, [dict, type(None)]) + for key, value in custom_lineage_data.items(): + check_value_type(f'custom_lineage_data -> {key}', key, str) + check_value_type(f'the value of custom_lineage_data -> {key}', value, (int, str, float)) + + @staticmethod + def _check_action(action): + """Check action type.""" + check_value_type('keep_default_action', action, bool) + + def _process_specified_data(self, specified_data, action): + """Check specified data type and value.""" + if specified_data is None: + if action: + return self._DEFAULT_SPECIFIED_DATA + return None + + check_value_type('collect_specified_data', specified_data, [dict, type(None)]) + + for param_name in specified_data: + check_value_type(param_name, param_name, [str]) + + unexpected_params = set(specified_data) - set(self._DEFAULT_SPECIFIED_DATA) + if unexpected_params: + raise ValueError(f'For `collect_specified_data` the keys {unexpected_params} are unsupported.') + + if 'histogram_regular' in specified_data: + check_value_type('histogram_regular', specified_data.get('histogram_regular'), (str, type(None))) + + bool_items = set(self._DEFAULT_SPECIFIED_DATA) - {'histogram_regular'} + for item in bool_items: + if item in specified_data: + check_value_type(item, specified_data.get(item), bool) + + if action: + result = dict(self._DEFAULT_SPECIFIED_DATA).update(specified_data) + else: + result = specified_data + return result + + def begin(self, run_context): + cb_params = run_context.original_args() + self._check_callbacks(cb_params) + + if cb_params.mode not in ModeEnum.to_list(): + raise ValueError('Only support `train` (model.train) and `eval` (model.eval) mode, ' + 'but got `{cb_params.mode}` mode.') + + self._record.set_mode(cb_params.mode) + if cb_params.mode == ModeEnum.TRAIN.value: + # Note: if model.init is not executed then the computed graph will not be obtained here + # The purpose of recording the graph here was to collect_freq if it was set to a large size, + # but also want to see the graph as soon after compilation. + self._collect_graphs(cb_params) + + self._collect_dataset_graph(cb_params) + + if self._custom_lineage_data and not self._has_saved_custom_data: + packaged_custom_data = self._package_custom_lineage_data(self._custom_lineage_data) + self._record.add_value('custom_lineage_data', 'custom_lineage_data', packaged_custom_data) + self._has_saved_custom_data = True + + # There's nothing special about setting step to 0 here, just to satisfy the interface call + self._record.record(step=0) + + def step_end(self, run_context): + cb_params = run_context.original_args() + + if cb_params.mode == ModeEnum.TRAIN.value: + if cb_params.cur_step_num % self._collect_freq: + return + + if not self._has_saved_train_network: + self._collect_graphs(cb_params) + + self._collect_input_data(cb_params) + self._collect_metric(cb_params) + self._collect_histogram(cb_params) + + self._record.record(cb_params.cur_step_num) + + def end(self, run_context): + cb_params = run_context.original_args() + if cb_params.mode == ModeEnum.TRAIN.value: + self._collect_train_lineage(cb_params) + else: + self._collect_eval_lineage(cb_params) + + # There's nothing special about setting step to 0 here, just to satisfy the interface call + self._record.record(step=0) + + def _check_callbacks(self, cb_params): + """Check there if there are duplicate instances of SummaryCollector.""" + callbacks = cb_params.list_callback + + is_find = False + for callback in callbacks: + if type(callback).__name__ == self.__class__.__name__: + if not is_find: + is_find = True + continue + raise ValueError(f"There are more than one {self.__class__.__name__} instance in callback list," + f"but expected only one {self.__class__.__name__} instance.") + + @staticmethod + def _package_custom_lineage_data(custom_lineage_data): + """ + Package user-defined lineage data into binary data. + + Args: + custom_lineage_data (dict): User custom lineage data. + + Returns: + UserDefinedInfo, a object of lineage_pb2.UserDefinedInfo. + """ + user_defined_info = lineage_pb2.UserDefinedInfo() + for key, value in custom_lineage_data.items(): + if isinstance(value, int): + attr_name = "map_int32" + elif isinstance(value, float): + attr_name = "map_double" + else: + attr_name = "map_str" + + user_info = user_defined_info.user_info.add() + getattr(user_info, attr_name)[key] = value + + return user_defined_info + + def _collect_input_data(self, cb_params): + """Only support to collect image data.""" + if not self._collect_specified_data.get('collect_input_data'): + return + + input_data = getattr(cb_params, 'train_dataset_element', None) + if input_data is None: + self._collect_specified_data['collect_input_data'] = False + logger.info("There is not a `train_dataset_element` in cb_params.") + return + + if isinstance(input_data, (list, tuple)): + input_data = input_data[0] + try: + self._record.add_value(PluginEnum.IMAGE.value, 'input_data/auto', input_data) + except ValueError: + self._collect_specified_data['collect_input_data'] = False + return + + def _collect_dataset_graph(self, cb_params): + """Only collect train dataset graph.""" + if not self._collect_specified_data.get('collect_dataset_graph'): + return + + # After analysis, we think that the validated dataset graph and the training dataset graph + # should be consistent under normal scenarios, so only the training dataset graph is collected. + if cb_params.mode == ModeEnum.TRAIN.value: + train_dataset = cb_params.train_dataset + dataset_graph = DatasetGraph() + graph_bytes = dataset_graph.package_dataset_graph(train_dataset) + self._record.add_value('dataset_graph', 'train_dataset', graph_bytes) + + def _collect_graphs(self, cb_params): + """Collect the graph of train network and eval network.""" + if not self._collect_specified_data.get('collect_graph'): + return + + network = cb_params.train_network if cb_params.mode == ModeEnum.TRAIN.value else cb_params.eval_network + graph_proto = network.get_func_graph_proto() + if graph_proto is None: + return + + self._has_saved_train_network = True + self._record.add_value(PluginEnum.GRAPH.value, 'train_network/auto', graph_proto) + + def _collect_metric(self, cb_params): + """Collect metric, currently only collection Loss is supported.""" + if not self._collect_specified_data.get('collect_metric'): + return + + loss = self._get_loss(cb_params) + if loss is None: + return + self._record.add_value(PluginEnum.SCALAR.value, 'loss/auto', loss) + + def _get_loss(self, cb_params): + """ + Get loss from the network output. + + Args: + cb_params (_InternalCallbackParam): Callback parameters. + + Returns: + Union[Tensor, None], if parse loss success, will return a Tensor value(shape is [1]), else return None. + """ + if not self._is_parse_loss_success: + # If parsing has failed before, avoid repeating it + return None + + output = cb_params.net_outputs + if output is None: + logger.warning("Can not find any output by this network.") + self._is_parse_loss_success = False + return None + + if isinstance(output, (int, float)): + loss = output + elif isinstance(output, (list, tuple)): + # If the output is a list, since the default network returns loss first, + # we assume that the first one is loss. + loss = output[0] + elif isinstance(output, Tensor) and (not output.shape or output.shape == [1]): + loss_numpy = output.asnumpy() + loss = float(np.atleast_1d(loss_numpy)[0]) + else: + logger.warning("The output type could not be identified, so no loss was recorded in SummaryCollector.") + self._is_parse_loss_success = False + return None + + if not isinstance(loss, Tensor): + loss = Tensor(loss) + + return loss + + def _get_optimizer(self, cb_params): + """ + Get optimizer from the cb_params or parse from the network. + + Args: + cb_params (_InternalCallbackParam): Callback parameters. + + Returns: + Union[Optimizer, None], if parse optimizer success, will return a optimizer, else return None. + """ + if self._optimizer == self._OPTIMIZER_FAILED: + return None + + if self._optimizer is not None: + return self._optimizer + + optimizer = cb_params.optimizer + if optimizer is None: + network = cb_params.train_network if cb_params.mode == 'train' else cb_params.eval_work + optimizer = self._parse_optimizer_by_network(network) + + if optimizer is None or not isinstance(optimizer, Optimizer): + logger.warning("Can not find optimizer in network, or the optimizer does not inherit Mindpore's optimizer, " + "so we will not collect data about optimizer in SummaryCollector.") + optimizer = self._OPTIMIZER_FAILED + + return optimizer + + @staticmethod + def _parse_optimizer_by_network(network): + """Parse optimizer from network, if parse success will return a optimizer, else return None.""" + optimizer = None + for _, cell in network.cells_and_names(): + try: + optimizer = getattr(cell, 'optimizer') + except AttributeError: + continue + + if not isinstance(optimizer, Optimizer): + continue + + # Optimizer found successfully + break + + return optimizer + + def _collect_histogram(self, cb_params): + """Collect histogram data, contain the parameter weight and bias.""" + # Note: if there is not a key named `histogram_regular` in `self._collect_specified_data`, + # it means we will not collect histogram data. + if 'histogram_regular' not in self._collect_specified_data: + return + + self._optimizer = self._get_optimizer(cb_params) + if self._optimizer is None: + return + + parameters = self._optimizer.parameters + regular = self._collect_specified_data.get('histogram_regular') + if regular is not None: + for parameter in parameters: + if re.match(regular, parameter.name): + self._record.add_value(PluginEnum.HISTOGRAM.value, parameter.name+'/auto', parameter.data) + return + + # Note: If `histogram_regular` in `self._collect_specified_data` and the value is None, + # we will collect the first five parameters. + default_parameter_count = 5 + for parameter in parameters[:default_parameter_count]: + self._record.add_value(PluginEnum.HISTOGRAM.value, parameter.name+'/auto', parameter.data) + + @staticmethod + def _get_learning_rate(optimizer): + """ + parse the learning rate from optimizer. + + Args: + optimizer (Optimizer): A optimizer which inherit the MindSpore Optimizer class. + + Returns: + Union[Tensor, None], if parse learning rate success, will return a Tensor, else return None. + """ + learning_rate = optimizer.learning_rate + if not isinstance(learning_rate, Parameter): + logger.info("The learning rate detected in the optimizer is not a Parameter type, so it is not recorded.") + return None + return learning_rate.data + + def _collect_train_lineage(self, cb_params): + """Collect train lineage data, the detail refer to lineage_pb2.TrainLineage.""" + if not self._collect_specified_data.get('collect_train_lineage'): + return + train_lineage = {} + loss = self._get_loss(cb_params) + if loss: + loss_numpy = loss.asnumpy() + loss = float(np.atleast_1d(loss_numpy)[0]) + train_lineage[LineageMetadata.loss] = loss + else: + train_lineage[LineageMetadata.loss] = None + + optimizer = self._get_optimizer(cb_params) + learning_rate = self._get_learning_rate(optimizer) + + if learning_rate is not None: + train_lineage[LineageMetadata.learning_rate] = list(np.atleast_1d(learning_rate.asnumpy()))[0] + else: + train_lineage[LineageMetadata.learning_rate] = None + train_lineage[LineageMetadata.optimizer] = type(optimizer).__name__ if optimizer else None + train_lineage[LineageMetadata.train_network] = self._get_backbone(cb_params.train_network) + + loss_fn = self._get_loss_fn(cb_params) + train_lineage[LineageMetadata.loss_function] = type(loss_fn).__name__ if loss_fn else None + + train_lineage[LineageMetadata.epoch] = cb_params.epoch_num + train_lineage[LineageMetadata.step_num] = cb_params.cur_step_num + train_lineage[LineageMetadata.parallel_mode] = cb_params.parallel_mode + train_lineage[LineageMetadata.device_num] = cb_params.device_number + train_lineage[LineageMetadata.batch_size] = cb_params.batch_num + + ckpt_file_path = self._get_ckpt_file_path(cb_params) + train_lineage[LineageMetadata.model_path] = json.dumps(dict(ckpt=ckpt_file_path)) + + model_size = os.path.getsize(ckpt_file_path) if ckpt_file_path else 0 + train_lineage[LineageMetadata.model_size] = model_size + + self._parse_dataset(cb_params, train_lineage) + + train_lineage_message = self._package_train_lineage_message(train_lineage) + + self._record.add_value(PluginEnum.TRAIN_LINEAGE.value, 'train_lineage', train_lineage_message) + + @staticmethod + def _package_train_lineage_message(train_lineage): + """ + Package train lineage data into binary data. + + Args: + train_lineage (dict): The train lineage dict, refer to the attribute of `_collect_train_lineage` method. + + Returns: + TrainLineage, a object of lineage_pb2.TrainLineage. + """ + lineage_message = lineage_pb2.TrainLineage() + + if train_lineage.get(LineageMetadata.train_network) is not None: + lineage_message.algorithm.network = train_lineage.get(LineageMetadata.train_network) + if train_lineage.get(LineageMetadata.loss) is not None: + lineage_message.algorithm.loss = train_lineage.get(LineageMetadata.loss) + + # Construct train_dataset message. + if train_lineage.get(LineageMetadata.train_dataset_path) is not None: + lineage_message.train_dataset.train_dataset_path = train_lineage.get(LineageMetadata.train_dataset_path) + if train_lineage.get(LineageMetadata.train_dataset_size) is not None: + lineage_message.train_dataset.train_dataset_size = train_lineage.get(LineageMetadata.train_dataset_size) + + # Construct model message + lineage_message.model.path = train_lineage.get(LineageMetadata.model_path) + lineage_message.model.size = train_lineage.get(LineageMetadata.model_size) + + # Construct hyper_parameters message. + if train_lineage.get(LineageMetadata.learning_rate) is not None: + lineage_message.hyper_parameters.learning_rate = train_lineage.get(LineageMetadata.learning_rate) + if train_lineage.get(LineageMetadata.optimizer) is not None: + lineage_message.hyper_parameters.optimizer = train_lineage.get(LineageMetadata.optimizer) + if train_lineage.get(LineageMetadata.loss_function) is not None: + lineage_message.hyper_parameters.loss_function = train_lineage.get(LineageMetadata.loss_function) + if train_lineage.get(LineageMetadata.parallel_mode) is not None: + lineage_message.hyper_parameters.parallel_mode = train_lineage.get(LineageMetadata.parallel_mode) + + lineage_message.hyper_parameters.epoch = train_lineage.get(LineageMetadata.epoch) + lineage_message.hyper_parameters.device_num = train_lineage.get(LineageMetadata.device_num) + lineage_message.hyper_parameters.batch_size = train_lineage.get(LineageMetadata.batch_size) + + return lineage_message + + def _parse_dataset(self, cb_params, lineage_dict): + """ + Analyze Dataset to get the dataset path and dataset size. + + Args: + cb_params (_InternalCallbackParam): Callback parameters. + lineage_dict (dict): The lineage dict, refer to the attribute + of `_collect_train_lineage` method or `_collect_eval_lineage`. + + Returns: + dict, the lineage metadata. + """ + dataset = cb_params.train_dataset if cb_params.mode == ModeEnum.TRAIN.value else cb_params.valid_dataset + + try: + dataset_path = self._get_dataset_path(dataset) + except IndexError: + dataset_path = None + + if dataset_path and os.path.isfile(dataset_path): + dataset_dir = os.path.dirname(dataset_path) + else: + dataset_dir = dataset_path + + batch_num = dataset.get_dataset_size() + batch_size = dataset.get_batch_size() + dataset_size = int(batch_num * batch_size) + + if cb_params.mode == ModeEnum.TRAIN.value: + lineage_dict[LineageMetadata.train_dataset_path] = dataset_dir + lineage_dict[LineageMetadata.train_dataset_size] = dataset_size + else: + lineage_dict[LineageMetadata.valid_dataset_path] = dataset_dir + lineage_dict[LineageMetadata.valid_dataset_size] = dataset_size + + return lineage_dict + + def _get_dataset_path(self, output_dataset): + """ + Get dataset path of MindDataset object. + + Args: + output_dataset (Union[Dataset, ImageFolderDatasetV2, MnistDataset, Cifar10Dataset, Cifar100Dataset, + VOCDataset, CelebADataset, MindDataset, ManifestDataset, TFRecordDataset, TextFileDataset]): + Refer to mindspore.dataset.Dataset. + + Returns: + str, dataset path. + + Raises: + IndexError: it means get dataset path failed. + """ + dataset_package = import_module('mindspore.dataset') + dataset_dir_set = (dataset_package.ImageFolderDatasetV2, dataset_package.MnistDataset, + dataset_package.Cifar10Dataset, dataset_package.Cifar100Dataset, + dataset_package.VOCDataset, dataset_package.CelebADataset) + dataset_file_set = (dataset_package.MindDataset, dataset_package.ManifestDataset) + dataset_files_set = (dataset_package.TFRecordDataset, dataset_package.TextFileDataset) + + if isinstance(output_dataset, dataset_file_set): + return output_dataset.dataset_file + if isinstance(output_dataset, dataset_dir_set): + return output_dataset.dataset_dir + if isinstance(output_dataset, dataset_files_set): + return output_dataset.dataset_files[0] + return self._get_dataset_path(output_dataset.input[0]) + + @staticmethod + def _get_ckpt_file_path(cb_params): + """ + Get checkpoint file path from MindSpore callback list. + + Args: + cb_params (_InternalCallbackParam): Callback parameters. + + Returns: + Union[str, None], if parse success will checkpoint file absolute path, else return None. + """ + callbacks = cb_params.list_callback + ckpt_file_path = None + for callback in callbacks: + if isinstance(callback, ModelCheckpoint): + ckpt_file_path = callback.latest_ckpt_file_name + + if ckpt_file_path: + ckpt_file_path = os.path.realpath(ckpt_file_path) + + return ckpt_file_path + + @staticmethod + def _get_backbone(network): + """ + Get the name of backbone network. + + Args: + network (Cell): The train network. + + Returns: + Union[str, None], If parse success, will return the name of the backbone network, else return None. + """ + backbone_name = None + backbone_key = '_backbone' + + for _, cell in network.cells_and_names(): + if hasattr(cell, backbone_key): + backbone_network = getattr(cell, backbone_key) + backbone_name = type(backbone_network).__name__ + + if backbone_name is None and network is not None: + backbone_name = type(network).__name__ + + return backbone_name + + @staticmethod + def _get_loss_fn(cb_params): + """ + Get loss function by cb_params and analyzing network. + + Args: + cb_params (_InternalCallbackParam): Callback parameters. + + Returns: + Union[Loss_fn, None], a Cell object, if parse failed, will return None. + """ + loss_fn = cb_params.loss_fn + if loss_fn is not None: + return loss_fn + + if cb_params.mode == ModeEnum.TRAIN.value: + network = cb_params.train_network + else: + network = cb_params.eval_network + + for _, cell in network.cells_and_names(): + if isinstance(cell, _Loss): + loss_fn = cell + break + return loss_fn + + def _collect_eval_lineage(self, cb_params): + """Collect eval lineage data, the detail refer to lineage_pb2.EvaluationLineage.""" + if not self._collect_specified_data.get('collect_eval_lineage'): + return + eval_lineage = dict() + + eval_lineage[LineageMetadata.metrics] = json.dumps(cb_params.metrics) + self._parse_dataset(cb_params, eval_lineage) + + eval_lineage_message = self._package_eval_lineage_message(eval_lineage) + self._record.add_value(PluginEnum.EVAL_LINEAGE.value, 'eval_lineage', eval_lineage_message) + + @staticmethod + def _package_eval_lineage_message(eval_lineage): + """ + Package eval lineage data into binary data. + + Args: + eval_lineage (dict): The eval lineage dict, refer to the attribute of `_collect_eval_lineage` method. + + Returns: + EvaluationLineage, a object of lineage_pb2.EvaluationLineage. + """ + lineage_message = lineage_pb2.EvaluationLineage() + + if eval_lineage.get(LineageMetadata.metrics) is not None: + lineage_message.metric = eval_lineage.get(LineageMetadata.metrics) + if eval_lineage.get(LineageMetadata.valid_dataset_path) is not None: + lineage_message.valid_dataset.valid_dataset_path = eval_lineage.get(LineageMetadata.valid_dataset_path) + if eval_lineage.get(LineageMetadata.valid_dataset_size) is not None: + lineage_message.valid_dataset.valid_dataset_size = eval_lineage.get(LineageMetadata.valid_dataset_size) + + return lineage_message diff --git a/mindspore/train/callback/_time_monitor.py b/mindspore/train/callback/_time_monitor.py new file mode 100644 index 0000000000..9fbdf83aa8 --- /dev/null +++ b/mindspore/train/callback/_time_monitor.py @@ -0,0 +1,35 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""TimeMonitor Callback class.""" + +import time + +from ._callback import Callback + + +class TimeMonitor(Callback): + """Time Monitor.""" + + def __init__(self, data_size): + super(TimeMonitor, self).__init__() + self.data_size = data_size + + def epoch_begin(self, run_context): + self.epoch_time = time.time() + + def epoch_end(self, run_context): + epoch_mseconds = (time.time() - self.epoch_time) * 1000 + per_step_mseconds = epoch_mseconds / self.data_size + print("Epoch time: {:5.3f}, per step time: {:5.3f}".format(epoch_mseconds, per_step_mseconds), flush=True) diff --git a/mindspore/train/dataset_helper.py b/mindspore/train/dataset_helper.py index 52797b631c..14797e568b 100644 --- a/mindspore/train/dataset_helper.py +++ b/mindspore/train/dataset_helper.py @@ -13,13 +13,22 @@ # limitations under the License. # ============================================================================ """Dataset help for minddata dataset""" +import math + from mindspore._checkparam import check_bool from .. import context -from .parallel_utils import ParallelMode from ._utils import _exec_datagraph, _get_types_and_shapes, _to_tensor, \ _construct_tensor_list, _to_full_shapes, _to_full_tensor from ..nn.wrap import GetNextSingleOp -from ..parallel._utils import _get_device_num, _get_global_rank, _get_parallel_mode +from ..parallel._utils import _get_device_num, _get_global_rank, _need_to_full + + +def _send_data(dataset): + """Engine dataset to write data to tdt queue.""" + if not hasattr(dataset, '__has_sent__'): + exec_dataset = dataset.__TRANSFER_DATASET__ + exec_dataset.send() + dataset.__has_sent__ = True class DatasetHelper: @@ -74,13 +83,19 @@ class DatasetHelper: class _DatasetIter: """Base iter for dataset help""" def __init__(self, dataset): - self.loop_size = 1 + if not hasattr(dataset, '__loop_size__'): + self.loop_size = dataset.get_dataset_size() + else: + self.loop_size = dataset.__loop_size__ + if not hasattr(dataset, '__ME_INITED__'): - if not hasattr(dataset, '__loop_size__'): - self.loop_size = dataset.get_dataset_size() - else: - self.loop_size = dataset.__loop_size__ - dataset.__ME_INITED__ = _exec_datagraph(dataset, self.loop_size).queue_name + dataset.__TRANSFER_DATASET__ = _exec_datagraph(dataset, self.loop_size) + dataset.__ME_INITED__ = dataset.__TRANSFER_DATASET__.queue_name + + if not hasattr(dataset, '__no_send__'): + _send_data(dataset) + else: + _send_data(dataset) self.ind = 0 self.dataset = dataset @@ -104,10 +119,10 @@ class _DatasetIter: loop_count = 1 if hasattr(dataset, '__loop_size__'): loop_size = dataset.__loop_size__ - if dataset.get_dataset_size() % loop_size != 0: + if loop_size <= dataset.get_dataset_size() and dataset.get_dataset_size() % loop_size != 0: raise ValueError(f'Dataset size {dataset.get_dataset_size()} and ' f'loop_size {loop_size} are not matched.') - loop_count = int(dataset.get_dataset_size() / loop_size) + loop_count = math.ceil(dataset.get_dataset_size() / loop_size) return loop_count @@ -116,10 +131,10 @@ class _DatasetIterMSLoopSink(_DatasetIter): def __init__(self, dataset): super(_DatasetIterMSLoopSink, self).__init__(dataset) self.loop_count = self.get_loop_count(dataset) - # for self._parallel_mode equal to semi_auto_parallel or auto_parallel, use a complete tensor to - # compile, and slice tensor to run. The batch dimension of tensors for compile is device_number - # times the batch dimension of tensors for run. Now only support LoopSink. - if _get_parallel_mode() in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL): + # for self._parallel_mode equal to semi_auto_parallel or auto_parallel, and not using full_batch, + # use a complete tensor to compile, and slice tensor to run. The batch dimension of tensors for + # compile is device_number times the batch dimension of tensors for run. Now only support LoopSink. + if _need_to_full(): device_num = _get_device_num() self.dataset_shapes = _to_full_shapes(self.dataset_shapes, device_num) @@ -144,10 +159,8 @@ class _DatasetIterGE(_DatasetIter): def __init__(self, dataset): super(_DatasetIterGE, self).__init__(dataset) self.loop_count = self.get_loop_count(dataset) - parallel_mode = _get_parallel_mode() - self.need_to_full = parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL) batch_expand_num = 1 - if self.need_to_full: + if _need_to_full(): batch_expand_num = _get_device_num() tensor_list_run = _construct_tensor_list(self.dataset_types, self.dataset_shapes, batch_expand_num) @@ -168,9 +181,6 @@ class _DatasetIterFeed: self.loop_count = dataset.get_dataset_size() self.ind = 0 - parallel_mode = context.get_auto_parallel_context("parallel_mode") - self.need_to_full = parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL) - def __iter__(self): if self.repeat_ind % self.repeat_count == 0: self.iter = self.dataset.__iter__() @@ -184,6 +194,6 @@ class _DatasetIterFeed: raise StopIteration() self.ind += 1 data = self.iter.__next__() - if self.need_to_full: + if _need_to_full(): return _to_full_tensor(data, self.device_num, self.global_rank) return _to_tensor(data) diff --git a/mindspore/train/model.py b/mindspore/train/model.py index d2b5d4f5d8..79bd6bc90b 100755 --- a/mindspore/train/model.py +++ b/mindspore/train/model.py @@ -13,13 +13,15 @@ # limitations under the License. # ============================================================================ """Model.""" +from collections.abc import Iterable + import numpy as np from mindspore import log as logger from ..common.tensor import Tensor from ..nn.metrics import get_metrics from .._checkparam import check_input_data, check_output_data, check_int_positive, check_bool -from .callback import _InternalCallbackParam, RunContext, _build_callbacks +from .callback import _InternalCallbackParam, RunContext, _CallbackManager from .. import context from ..parallel._utils import _get_parallel_mode, _get_device_num, _get_global_rank, \ _get_parameter_broadcast, _device_number_check, _parameter_broadcast_check @@ -54,10 +56,13 @@ class Model: value would be passed to `Loss` metric, predict value and label would be passed to other metric. Default: None. amp_level (str): Option for argument `level` in `mindspore.amp.build_train_network`, level for mixed - precision training. Supports [O0, O2]. Default: "O0". + precision training. Supports [O0, O2, O3]. Default: "O0". - O0: Do not change. - O2: Cast network to float16, keep batchnorm run in float32, using dynamic loss scale. + - O3: Cast network to float16, with additional property 'keep_batchnorm_fp32=False'. + + O2 is recommended on GPU, O3 is recommended on Ascend. loss_scale_manager (Union[None, LossScaleManager]): If None, not scale the loss, or else scale the loss by LossScaleManager. If it is set, overwrite the level setting. It's a eyword argument. @@ -111,7 +116,7 @@ class Model: self._build_predict_network() def _process_amp_args(self, kwargs): - if self._amp_level == "O0": + if self._amp_level in ["O0", "O3"]: self._keep_bn_fp32 = False if 'keep_batchnorm_fp32' in kwargs: self._keep_bn_fp32 = kwargs['keep_batchnorm_fp32'] @@ -169,6 +174,8 @@ class Model: self._eval_indexes = [0, 1, 2] if self._parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL): + if self._optimizer: + self._eval_network = _VirtualDatasetCell(self._eval_network) self._eval_network.set_auto_parallel() def _build_predict_network(self): @@ -281,7 +288,7 @@ class Model: if self._parameter_broadcast: self._train_network.set_broadcast_flag() - + train_dataset.__no_send__ = True train_dataset_helper, train_network = self._exec_preprocess(self._train_network, is_train=True, phase='train', @@ -298,6 +305,7 @@ class Model: self._eval_network.set_train(False) self._eval_network.phase = 'eval' + valid_dataset.__no_send__ = True valid_dataset_helper, eval_network = self._exec_preprocess(self._eval_network, is_train=False, phase='eval', @@ -330,8 +338,6 @@ class Model: if self._parameter_broadcast: self._train_network.set_broadcast_flag() - # build callback list - list_callback = _build_callbacks(callbacks) cb_params = _InternalCallbackParam() cb_params.train_network = self._train_network cb_params.epoch_num = epoch @@ -342,17 +348,30 @@ class Model: cb_params.parallel_mode = self._parallel_mode cb_params.device_number = self._device_number cb_params.train_dataset = train_dataset - cb_params.list_callback = list_callback + cb_params.list_callback = self._transform_callbacks(callbacks) + cb_params.train_dataset_element = None - if dataset_sink_mode: - if context.get_context("mode") == context.PYNATIVE_MODE: + # build callback list + with _CallbackManager(callbacks) as list_callback: + if not dataset_sink_mode: + self._train_process(epoch, train_dataset, list_callback, cb_params) + elif context.get_context("mode") == context.PYNATIVE_MODE: logger.warning("The pynative mode cannot support dataset sink mode currently." "So the training process will be performed with dataset not sink.") self._train_process(epoch, train_dataset, list_callback, cb_params) else: self._train_dataset_sink_process(epoch, train_dataset, list_callback, cb_params) - else: - self._train_process(epoch, train_dataset, list_callback, cb_params) + + @staticmethod + def _transform_callbacks(callbacks): + """Transform callback to a list.""" + if callbacks is None: + return [] + + if isinstance(callbacks, Iterable): + return list(callbacks) + + return [callbacks] def _train_dataset_sink_process(self, epoch, train_dataset, list_callback=None, cb_params=None): """ @@ -365,7 +384,7 @@ class Model: returned and passed to the network. Otherwise, a tuple (data, label) should be returned, and the data and label are passed to the network and loss function respectively. - list_callback (_ListCallback): Executor of callback list. Default: None. + list_callback (Callback): Executor of callback list. Default: None. cb_params (_InternalCallbackParam): Callback parameters. Default: None. """ dataset_helper, train_network = self._exec_preprocess(self._train_network, @@ -413,7 +432,7 @@ class Model: returned and passed to the network. Otherwise, a tuple (data, label) should be returned, and the data and label are passed to the network and loss function respectively. - list_callback (_ListCallback): Executor of callback list. Default: None. + list_callback (Callback): Executor of callback list. Default: None. cb_params (_InternalCallbackParam): Callback parameters. Default: None. """ dataset_helper, _ = self._exec_preprocess(self._train_network, @@ -445,6 +464,7 @@ class Model: scaling_sens = self._get_scaling_sens() next_element = tuple(next_element) + (Tensor(scaling_sens, mstype.float32),) + cb_params.train_dataset_element = next_element outputs = self._train_network(*next_element) cb_params.net_outputs = outputs if self._loss_scale_manager and self._loss_scale_manager.get_drop_overflow_update(): @@ -520,7 +540,7 @@ class Model: Args: valid_dataset (Dataset): Dataset to evaluate the model. - list_callback (ListCallback): Executor of callback list. Default: None. + list_callback (Callback): Executor of callback list. Default: None. cb_params (_InternalCallbackParam): Callback parameters. Default: None. Returns: @@ -559,7 +579,7 @@ class Model: Args: valid_dataset (Dataset): Dataset to evaluate the model. - list_callback (ListCallback): Executor of callback list. Default: None. + list_callback (Callback): Executor of callback list. Default: None. cb_params (_InternalCallbackParam): Callback parameters. Default: None. Returns: @@ -618,22 +638,23 @@ class Model: if not self._metric_fns: raise ValueError("metric fn can not be None or empty.") - list_callback = _build_callbacks(callbacks) cb_params = _InternalCallbackParam() cb_params.eval_network = self._eval_network cb_params.valid_dataset = valid_dataset cb_params.batch_num = valid_dataset.get_dataset_size() cb_params.mode = "eval" cb_params.cur_step_num = 0 + cb_params.list_callback = self._transform_callbacks(callbacks) self._eval_network.set_train(mode=False) self._eval_network.phase = 'eval' self._clear_metrics() - if dataset_sink_mode: - return self._eval_dataset_sink_process(valid_dataset, list_callback, cb_params) - return self._eval_process(valid_dataset, list_callback, cb_params) + with _CallbackManager(callbacks) as list_callback: + if dataset_sink_mode: + return self._eval_dataset_sink_process(valid_dataset, list_callback, cb_params) + return self._eval_process(valid_dataset, list_callback, cb_params) def predict(self, *predict_data): """ diff --git a/mindspore/train/quant/__init__.py b/mindspore/train/quant/__init__.py index 531db34b2b..51e8c20ded 100644 --- a/mindspore/train/quant/__init__.py +++ b/mindspore/train/quant/__init__.py @@ -15,10 +15,10 @@ """ quantization. -User can use aware quantization to train a model. Mindspore supports quantization aware training, +User can use quantization aware to train a model. MindSpore supports quantization aware training, which models quantization errors in both the forward and backward passes using fake-quantization ops. Note that the entire computation is carried out in floating point. At the end of quantization -aware training, Mindspore provides conversion functions to convert the trained model into lower precision. +aware training, MindSpore provides conversion functions to convert the trained model into lower precision. """ from .quant import convert_quant_network diff --git a/mindspore/train/quant/quant.py b/mindspore/train/quant/quant.py index e2a035bc77..937e54a7e4 100644 --- a/mindspore/train/quant/quant.py +++ b/mindspore/train/quant/quant.py @@ -12,15 +12,27 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ -"""aware quantization.""" +"""quantization aware.""" +import copy import re -from ... import nn -from ... import ops + +import numpy as np +import mindspore.context as context + +from ... import log as logger +from ... import nn, ops from ..._checkparam import ParamValidator as validator from ..._checkparam import Rel -from ...nn.layer import combined +from ...common import Tensor +from ...common import dtype as mstype +from ...common.api import _executor from ...nn.layer import quant +from ...ops import functional as F +from ...ops.operations import _inner_ops as inner +from ...train import serialization +from . import quant_utils + _ACTIVATION_MAP = {nn.ReLU: quant.ReLUQuant, nn.ReLU6: quant.ReLU6Quant, @@ -28,25 +40,21 @@ _ACTIVATION_MAP = {nn.ReLU: quant.ReLUQuant, nn.HSwish: quant.HSwishQuant} -class _AddFakeQuantInputOutput(nn.Cell): +class _AddFakeQuantInput(nn.Cell): """ Add FakeQuant at input and output of the Network. Only support one input and one output case. """ def __init__(self, network, quant_delay=0): - super(_AddFakeQuantInputOutput, self).__init__(auto_prefix=False) + super(_AddFakeQuantInput, self).__init__(auto_prefix=False) self.network = network self.fake_quant_input = quant.FakeQuantWithMinMax( min_init=-6, max_init=6, quant_delay=quant_delay, ema=True) self.fake_quant_input.update_parameters_name('fake_quant_input') - self.fake_quant_output = quant.FakeQuantWithMinMax( - min_init=-6, max_init=6, quant_delay=quant_delay, ema=True) - self.fake_quant_output.update_parameters_name('fake_quant_output') def construct(self, data): data = self.fake_quant_input(data) output = self.network(data) - output = self.fake_quant_output(output) return output @@ -55,14 +63,17 @@ class _AddFakeQuantAfterSubCell(nn.Cell): Add FakeQuant after of the sub Cell. """ - def __init__(self, subcell, quant_delay=0, num_bits=8): + def __init__(self, subcell, **kwargs): super(_AddFakeQuantAfterSubCell, self).__init__(auto_prefix=False) self.subcell = subcell self.fake_quant_act = quant.FakeQuantWithMinMax(min_init=-6, max_init=6, - num_bits=num_bits, - quant_delay=quant_delay, - ema=True) + ema=True, + num_bits=kwargs["num_bits"], + quant_delay=kwargs["quant_delay"], + per_channel=kwargs["per_channel"], + symmetric=kwargs["symmetric"], + narrow_range=kwargs["narrow_range"]) def construct(self, *data): output = self.subcell(*data) @@ -76,30 +87,22 @@ class ConvertToQuantNetwork: """ __quant_op_name__ = ["TensorAdd", "Sub", "Mul", "RealDiv"] - def __init__(self, - network, - quant_delay=0, - bn_fold=False, - freeze_bn=0, - weight_bits=8, - act_bits=8, - per_channel=False, - symmetric=False, - narrow_range=False): - self.network = validator.check_isinstance( - 'network', network, (nn.Cell,)) - self.quant_delay = validator.check_integer( - "quant delay", quant_delay, 0, Rel.GE) - self.freeze_bn = validator.check_integer( - "freeze bn", freeze_bn, 0, Rel.GE) - self.weight_bits = validator.check_integer( - "weights bit", weight_bits, 0, Rel.GE) - self.act_bits = validator.check_integer( - "activations bit", act_bits, 0, Rel.GE) - self.bn_fold = validator.check_bool("bn fold", bn_fold) - self.per_channel = validator.check_bool("per channel", per_channel) - self.symmetric = validator.check_bool("symmetric", symmetric) - self.narrow_range = validator.check_bool("narrow range", narrow_range) + def __init__(self, **kwargs): + self.network = validator.check_isinstance('network', kwargs["network"], (nn.Cell,)) + self.weight_qdelay = validator.check_integer("quant delay", kwargs["quant_delay"][0], 0, Rel.GE) + self.act_qdelay = validator.check_integer("quant delay", kwargs["quant_delay"][-1], 0, Rel.GE) + self.bn_fold = validator.check_bool("bn fold", kwargs["bn_fold"]) + self.freeze_bn = validator.check_integer("freeze bn", kwargs["freeze_bn"], 0, Rel.GE) + self.weight_bits = validator.check_integer("weights bit", kwargs["num_bits"][0], 0, Rel.GE) + self.act_bits = validator.check_integer("activations bit", kwargs["num_bits"][-1], 0, Rel.GE) + self.weight_channel = validator.check_bool("per channel", kwargs["per_channel"][0]) + self.act_channel = validator.check_bool("per channel", kwargs["per_channel"][-1]) + self.weight_symmetric = validator.check_bool("symmetric", kwargs["symmetric"][0]) + self.act_symmetric = validator.check_bool("symmetric", kwargs["symmetric"][-1]) + self.weight_range = validator.check_bool("narrow range", kwargs["narrow_range"][0]) + self.act_range = validator.check_bool("narrow range", kwargs["narrow_range"][-1]) + self._convert_method_map = {quant.Conv2dBnAct: self._convert_conv, + quant.DenseBnAct: self._convert_dense} def _convert_op_name(self, name): pattern = re.compile(r'([A-Z]{1})') @@ -111,6 +114,7 @@ class ConvertToQuantNetwork: def run(self): self.network.update_cell_prefix() network = self._convert_subcells2quant(self.network) + network = _AddFakeQuantInput(network) return network def _convert_subcells2quant(self, network): @@ -123,15 +127,9 @@ class ConvertToQuantNetwork: subcell = cells[name] if subcell == network: continue - elif isinstance(subcell, combined.Conv2d): - prefix = subcell.param_prefix - new_subcell = self._convert_conv(subcell) - new_subcell.update_parameters_name(prefix + '.') - network.insert_child_to_cell(name, new_subcell) - change = True - elif isinstance(subcell, combined.Dense): + elif isinstance(subcell, (quant.Conv2dBnAct, quant.DenseBnAct)): prefix = subcell.param_prefix - new_subcell = self._convert_dense(subcell) + new_subcell = self._convert_method_map[type(subcell)](subcell) new_subcell.update_parameters_name(prefix + '.') network.insert_child_to_cell(name, new_subcell) change = True @@ -150,7 +148,12 @@ class ConvertToQuantNetwork: add_list.append((name, attr)) for name, prim_op in add_list: prefix = name - add_quant = _AddFakeQuantAfterSubCell(prim_op) # quant.TensorAddQuant() + add_quant = _AddFakeQuantAfterSubCell(prim_op, + num_bits=self.act_bits, + quant_delay=self.act_delay, + per_channel=self.act_channel, + symmetric=self.act_symmetric, + narrow_range=self.act_range) prefix = '.'.join([network.param_prefix, self._convert_op_name(prim_op.name)]) add_quant.update_parameters_name(prefix + '.') del network.__dict__[name] @@ -159,7 +162,7 @@ class ConvertToQuantNetwork: def _convert_conv(self, subcell): """ - convet conv cell to combine cell + convet conv cell to quant cell """ conv_inner = subcell.conv bn_inner = subcell.batchnorm @@ -174,13 +177,13 @@ class ConvertToQuantNetwork: group=conv_inner.group, eps=bn_inner.eps, momentum=bn_inner.momentum, - quant_delay=self.quant_delay, + quant_delay=self.weight_qdelay, freeze_bn=self.freeze_bn, - per_channel=self.per_channel, + per_channel=self.weight_channel, num_bits=self.weight_bits, fake=True, - symmetric=self.symmetric, - narrow_range=self.narrow_range) + symmetric=self.weight_symmetric, + narrow_range=self.weight_range) del subcell.batchnorm subcell.batchnorm = None subcell.has_bn = False @@ -194,16 +197,22 @@ class ConvertToQuantNetwork: dilation=conv_inner.dilation, group=conv_inner.group, has_bias=conv_inner.has_bias, - quant_delay=self.quant_delay, - per_channel=self.per_channel, + quant_delay=self.weight_qdelay, + per_channel=self.weight_channel, num_bits=self.weight_bits, - symmetric=self.symmetric, - narrow_range=self.narrow_range) + symmetric=self.weight_symmetric, + narrow_range=self.weight_range) subcell.conv = conv_inner - if subcell.activation is not None: + if subcell.has_act and subcell.activation is not None: subcell.activation = self._convert_activation(subcell.activation) else: - subcell = _AddFakeQuantAfterSubCell(subcell) + subcell.has_act = True + subcell.activation = _AddFakeQuantAfterSubCell(F.identity, + num_bits=self.act_bits, + quant_delay=self.act_qdelay, + per_channel=self.act_channel, + symmetric=self.act_symmetric, + narrow_range=self.act_range) return subcell def _convert_dense(self, subcell): @@ -214,12 +223,22 @@ class ConvertToQuantNetwork: dense_inner = quant.DenseQuant(dense_inner.in_channels, dense_inner.out_channels, has_bias=dense_inner.has_bias, - quant_delay=self.quant_delay, - per_channel=self.per_channel, - num_bits=self.weight_bits) + num_bits=self.weight_bits, + quant_delay=self.weight_qdelay, + per_channel=self.weight_channel, + symmetric=self.weight_symmetric, + narrow_range=self.weight_range) subcell.dense = dense_inner - if subcell.activation is not None: + if subcell.has_act and subcell.activation is not None: subcell.activation = self._convert_activation(subcell.activation) + else: + subcell.has_act = True + subcell.activation = _AddFakeQuantAfterSubCell(F.identity, + num_bits=self.act_bits, + quant_delay=self.act_qdelay, + per_channel=self.act_channel, + symmetric=self.act_symmetric, + narrow_range=self.act_range) return subcell def _convert_activation(self, activation): @@ -227,36 +246,210 @@ class ConvertToQuantNetwork: if act_class not in _ACTIVATION_MAP: raise ValueError( "Unsupported activation in auto Quant: ", act_class) - return _ACTIVATION_MAP[act_class](num_bits=self.act_bits, quant_delay=self.quant_delay) + return _ACTIVATION_MAP[act_class](num_bits=self.act_bits, + quant_delay=self.act_qdelay, + per_channel=self.act_channel, + symmetric=self.weight_symmetric, + narrow_range=self.weight_range) + + +class ExportQuantNetworkDeploy: + """ + Convert quantization aware network to deploy network. + + Args: + network (Cell): MindSpore network produced by `convert_quant_network`. + inputs (Tensor): Inputs of the `network`. + + Returns: + Cell, converted network. + """ + __quant_op_name__ = ["TensorAdd", "Sub", "Mul", "RealDiv"] + + def __init__(self, + network, + *inputs): + network = validator.check_isinstance('network', network, (nn.Cell,)) + self.data_type = mstype.int8 + self.network = copy.deepcopy(network) + self.all_paramters = {p.name: p for p in self.network.get_parameters()} + self.get_inputs_table(inputs) + + def get_inputs_table(self, inputs): + """Get the support info for quant export.""" + phase_name = 'export_quant' + graph_id, _ = _executor.compile(self.network, *inputs, phase=phase_name, do_convert=False) + self.quant_info_table = _executor.fetch_info_for_quant_export(graph_id) + + def run(self): + """Start to convert.""" + self.network.update_cell_prefix() + network = self.network + if isinstance(network, _AddFakeQuantInput): + network = network.network + network = self._convert_quant2deploy(network) + return network + + def _get_quant_block(self, cell_core, activation, fake_quant_a_out): + """convet network's quant subcell to deploy subcell""" + # Calculate the scale and zero point + w_minq_name = cell_core.fake_quant_weight.minq.name + np_type = mstype.dtype_to_nptype(self.data_type) + scale_w, zp_w = quant_utils.scale_zp_from_fack_quant_cell(cell_core.fake_quant_weight, np_type) + scale_a_out, _ = quant_utils.scale_zp_from_fack_quant_cell(fake_quant_a_out, np_type) + info = self.quant_info_table.get(w_minq_name, None) + if info: + fack_quant_a_in_op, minq_name = info + maxq = self.all_paramters[minq_name[:-4] + "maxq"] + minq = self.all_paramters[minq_name] + scale_a_in, zp_a_in = quant_utils.scale_zp_from_data(fack_quant_a_in_op, maxq, minq, np_type) + else: + logger.warning(f"Do not find `fake_quant` from input with `fack_quant.minq` {w_minq_name}") + return None + + # Build the `Quant` `Dequant` op. + # AscendQuant only support perlayer version. Need check here. + quant_op = inner.AscendQuant(float(scale_a_in), float(zp_a_in)) + sqrt_mode = False + scale_deq = scale_a_out * scale_w + if scale_deq < 2 ** -14: + scale_deq = np.sqrt(scale_deq) + sqrt_mode = True + dequant_op = inner.AscendDequant(sqrt_mode) + + # get op + op_core = cell_core.matmul if isinstance(cell_core, quant.DenseQuant) else cell_core.conv + if isinstance(activation, _AddFakeQuantAfterSubCell): + activation = activation.subcell + elif hasattr(activation, "get_origin"): + activation = activation.get_origin() + + # get the `weight` and `bias` + weight = cell_core.weight.data.asnumpy() + bias = None + if isinstance(cell_core, (quant.DenseQuant, quant.Conv2dQuant)): + if cell_core.has_bias: + bias = cell_core.bias.data.asnumpy() + elif isinstance(cell_core, quant.Conv2dBatchNormQuant): + weight, bias = quant_utils.fold_batchnorm(weight, cell_core) + + # apply the quant + weight = Tensor(quant_utils.weight2int(weight, scale_w, zp_w), self.data_type) + if bias is not None: + bias = Tensor(scale_a_in * scale_w * bias, mstype.int32) + scale_deq = Tensor(scale_deq, mstype.float16) + block = quant.QuantBlock(op_core, weight, quant_op, dequant_op, scale_deq, bias, activation) + return block + + def _convert_quant2deploy(self, network): + """Convet network's all quant subcell to deploy subcell.""" + cells = network.name_cells() + change = False + for name in cells: + subcell = cells[name] + if subcell == network: + continue + cell_core = None + fake_quant_act = None + activation = None + if isinstance(subcell, quant.Conv2dBnAct): + cell_core = subcell.conv + activation = subcell.activation + fake_quant_act = activation.fake_quant_act + elif isinstance(subcell, quant.DenseBnAct): + cell_core = subcell.dense + activation = subcell.activation + fake_quant_act = activation.fake_quant_act + if cell_core is not None: + new_subcell = self._get_quant_block(cell_core, activation, fake_quant_act) + if new_subcell: + prefix = subcell.param_prefix + new_subcell.update_parameters_name(prefix + '.') + network.insert_child_to_cell(name, new_subcell) + change = True + elif isinstance(subcell, _AddFakeQuantAfterSubCell): + op = subcell.subcell + if op.name in ConvertToQuantNetwork.__quant_op_name__ and isinstance(op, ops.Primitive): + network.__delattr__(name) + network.__setattr__(name, op) + change = True + else: + self._convert_quant2deploy(subcell) + if isinstance(network, nn.SequentialCell) and change: + network.cell_list = list(network.cells()) + return network + + +def export_geir(network, *inputs, file_name): + """ + Exports MindSpore quant predict model to deploy with GEIR. + + Args: + network (Cell): MindSpore network produced by `convert_quant_network`. + inputs (Tensor): Inputs of the `network`. + file_name (str): File name of model to export. + """ + exporter = ExportQuantNetworkDeploy(network, *inputs) + deploy_net = exporter.run() + serialization.export(deploy_net, *inputs, file_name=file_name, file_format="GEIR") def convert_quant_network(network, - quant_delay=0, bn_fold=False, freeze_bn=0, - weight_bits=8, - act_bits=8, - per_channel=False, - symmetric=False, - narrow_range=False + quant_delay=(0, 0), + num_bits=(8, 8), + per_channel=(False, False), + symmetric=(False, False), + narrow_range=(False, False) ): r""" - Create aware quantizaiton training network. + Create quantization aware training network. Args: network (Cell): Obtain a pipeline through network for saving graph summary. - quant_delay (int): Number of steps after which weights and activations are quantized during eval. Default: 0. + quant_delay (int or tuple): Number of steps after which weights and activations are quantized during + eval. The first element represent weights and second element represent data flow. Default: (0, 0) bn_fold (bool): Flag to used bn fold ops for simulation inference operation. Default: False. - freeze_bn (bool): Number of steps after which BN parameters used total mean and variance. Default: 0. - weight_bits (int): Number of bits to use for quantizing weights. Default: 8. - act_bits (int): Number of bits to use for quantizing activations. Default: 8. - per_channel (bool): Quantization granularity based on layer or on channel. Default: False. - symmetric (bool): Quantization algorithm use symmetric or not. Default: False. - narrow_range (bool): Quantization algorithm use narrow range or not. Default: False. - - returns: - Cell, Network which has change to aware quantization training network. + freeze_bn (int): Number of steps after which BatchNorm OP parameters used total mean and variance. Default: 0. + num_bits (int or tuple): Number of bits to use for quantizing weights and activations. The first + element represent weights and second element represent data flow. Default: (8, 8) + per_channel (int or tuple): Quantization granularity based on layer or on channel. If `True` + then base on per channel otherwise base on per layer. The first element represent weights + and second element represent data flow. Default: (False, False) + symmetric (int or tuple): Quantization algorithm use symmetric or not. If `True` then base on + symmetric otherwise base on assymmetric. The first element represent weights and second + element represent data flow. Default: (False, False) + narrow_range (int or tuple): Quantization algorithm use narrow range or not. If `True` then base + on narrow range otherwise base on off narrow range. The first element represent weights and + second element represent data flow. Default: (False, False) + + Returns: + Cell, Network which has change to quantization aware training network cell. """ - net = ConvertToQuantNetwork( - network, quant_delay, bn_fold, freeze_bn, weight_bits, act_bits, per_channel, symmetric, narrow_range) + support_device = ["Ascend", "GPU"] + def convert2list(name, value): + if not isinstance(value, list) and not isinstance(value, tuple): + value = [value] + elif len(value) > 2: + raise ValueError("input `{}` len should less then 2".format(name)) + return value + + quant_delay = convert2list("quant delay", quant_delay) + num_bits = convert2list("num bits", num_bits) + per_channel = convert2list("per channel", per_channel) + symmetric = convert2list("symmetric", symmetric) + narrow_range = convert2list("narrow range", narrow_range) + + if context.get_context('device_target') not in support_device: + raise KeyError("Not support {} backend.".format(context.get_context('device_target'))) + + net = ConvertToQuantNetwork(network=network, + quant_delay=quant_delay, + bn_fold=bn_fold, + freeze_bn=freeze_bn, + num_bits=num_bits, + per_channel=per_channel, + symmetric=symmetric, + narrow_range=narrow_range) return net.run() diff --git a/mindspore/train/quant/quant_utils.py b/mindspore/train/quant/quant_utils.py new file mode 100644 index 0000000000..c9e6ac92e1 --- /dev/null +++ b/mindspore/train/quant/quant_utils.py @@ -0,0 +1,191 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Quantization utils.""" + +import numpy as np + + +def cal_quantization_params(input_min, + input_max, + data_type, + num_bits=8, + symmetric=False, + narrow_range=False): + r""" + Calculate quantization params for scale and zero point. + + Args: + input_min (numpy.ndarray): The dimension of channel or 1. + input_max (numpy.ndarray): The dimension of channel or 1. + data_type (numpy type) : Can ben numpy int8, numpy uint8. + num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8. + symmetric (bool): Quantization algorithm use symmetric or not. Default: False. + narrow_range (bool): Quantization algorithm use narrow range or not. Default: False. + + Returns: + scale (numpy.ndarray): quantization param. + zero point (numpy.ndarray): quantization param. + """ + input_max = np.maximum(0.0, input_max) + input_min = np.minimum(0.0, input_min) + + if input_min.shape != input_max.shape: + raise ValueError("input min shape should equal to input max.") + if len(input_min.shape) > 1: + raise ValueError("input min and max shape should be one dim.") + if input_min > input_max: + raise ValueError("input_min min should less than input max.") + if (input_max == input_min).all(): + # scale = 1.0, zp = 0.0 + return np.ones(input_min.shape), np.zeros(input_min.shape) + + if data_type == np.int8: + quant_min = 0 - 2 ** (num_bits - 1) + quant_max = 2 ** (num_bits - 1) + else: + quant_min = 0 + quant_max = 2 ** num_bits - 1 + if narrow_range: + quant_min = quant_min + 1 + + # calculate scale + if symmetric: + input_max = np.maximum(-input_min, input_max) + input_min = -input_max + scale = (input_max - input_min) / (quant_max - quant_min) + + # calculate zero point + if symmetric: + zp = np.zeros(input_min.shape) + else: + zp_from_min = quant_min - input_min / scale + zp_from_max = quant_max - input_max / scale + zp_from_min_error = np.abs(quant_min) + np.abs(input_min / scale) + zp_from_max_error = np.abs(quant_max) + np.abs(input_max / scale) + zp_double = zp_from_min if zp_from_min_error < zp_from_max_error else zp_from_max + if zp_double < quant_min: + zp = quant_min + elif zp_double > quant_max: + zp = quant_max + else: + zp = np.floor(zp_double + 0.5) + + return scale, zp + + +def weight2int(data, + scale, + zero_point): + r""" + Calculate int8/uint8 weight from fp32. the formula is defined as: + + .. math:: + int8/uint8 = round(float/scale) + offset + + Args: + data (numpy.ndarray): The dimension of channel or 1. Should be NCHW. + scale (numpy.ndarray): The dimension of channel or 1. + zero_point (numpy.ndarray): The dimension of channel or 1. + + Returns: + weight (numpy.ndarray): The dimension of channel or 1. + """ + if scale.shape != zero_point.shape: + raise ValueError("scale and zero_point should have the same shape.") + if scale.shape[0] > 0: + scale = scale.reshape(1, -1) + zero_point = zero_point.reshape(1, -1) + + return np.round((data/scale) + zero_point) + + +def scale_zp_from_fack_quant_cell(cell, data_type): + r""" + Get calculate quantization params for scale and zero point From `FakeQuantWithMinMax`. + + Args: + cell (Cell): `mindspore.nn.layer.FakeQuantWithMinMax` + data_type (numpy type): Can ben `numpy.int8` or `numpy.uint8`. + + Returns: + scale (numpy.ndarray): quantization param. + zero point (numpy.ndarray): quantization param. + """ + minq = cell.minq.data.asnumpy() + maxq = cell.maxq.data.asnumpy() + op = cell.fake_quant + + scale, zp = cal_quantization_params( + minq, maxq, data_type, + num_bits=op.num_bits, + symmetric=op.symmetric, + narrow_range=op.narrow_range) + return scale, zp + + +def scale_zp_from_data(op, minq, maxq, data_type): + r""" + Get calculate quantization params for scale and zero point. + + Calculate from `FakeQuantWithMinMax`'s Parameter or Fake quant primitive. + + Args: + op (Primitive): Fake quant primitive `mindspore.ops.operation.FakeQuantPerLayer` or + `mindspore.ops.operation.FakeQuantPerChannel` + minq (Parameter): Parameter `minq` of `mindspore.nn.layer.FakeQuantWithMinMax` + maxq (Parameter): Parameter `maxq` of `mindspore.nn.layer.FakeQuantWithMinMax` + data_type (numpy type): Can ben `numpy.int8` or `numpy.uint8`. + + Returns: + scale (numpy.ndarray): quantization param. + zero point (numpy.ndarray): quantization param. + """ + minq = minq.data.asnumpy() + maxq = maxq.data.asnumpy() + + scale, zp = cal_quantization_params( + minq, maxq, data_type, + num_bits=op.num_bits, + symmetric=op.symmetric, + narrow_range=op.narrow_range) + return scale, zp + + +def fold_batchnorm(weight, cell_quant): + r""" + Fold the batchnorm in `Conv2dBatchNormQuant` to weight. + + Calculate from `FakeQuantWithMinMax`'s Parameter or Fake quant primitive. + + Args: + weight (numpy.ndarray): Weight of `cell_quant`. + cell_quant (Cell): Object of `mindspore.nn.layer.Conv2dBatchNormQuant`. + + Returns: + weight (numpy.ndarray): Folded weight. + bias (numpy.ndarray): Folded bias. + """ + variance = cell_quant.moving_variance.data.asnumpy() + mean = cell_quant.moving_mean.data.asnumpy() + gamma = cell_quant.gamma.data.asnumpy() + beta = cell_quant.beta.data.asnumpy() + epsilon = cell_quant.eps + sigma = np.sqrt(variance + epsilon) + gamma = gamma.reshape(-1, 1, 1, 1) + sigma = sigma.reshape(-1, 1, 1, 1) + mean = mean.reshape(-1, 1, 1, 1) + weight = weight * gamma / sigma + bias = beta - gamma * mean / sigma + return weight, bias diff --git a/mindspore/train/serialization.py b/mindspore/train/serialization.py index 502f00572f..c39104c6ff 100644 --- a/mindspore/train/serialization.py +++ b/mindspore/train/serialization.py @@ -21,6 +21,7 @@ import mindspore.nn as nn import mindspore.context as context from mindspore import log as logger from mindspore.train.checkpoint_pb2 import Checkpoint +from mindspore.train.print_pb2 import Print from mindspore.common.tensor import Tensor from mindspore.common.initializer import initializer from mindspore.common.parameter import Parameter @@ -30,11 +31,15 @@ from mindspore._checkparam import check_input_data __all__ = ["save_checkpoint", "load_checkpoint", "load_param_into_net", "export"] -tensor_to_ms_type = {"Int8": mstype.int8, "Int16": mstype.int16, "Int32": mstype.int32, "Int64": mstype.int64, - "Float16": mstype.float16, "Float32": mstype.float32, "Float64": mstype.float64} +tensor_to_ms_type = {"Int8": mstype.int8, "Uint8": mstype.uint8, "Int16": mstype.int16, "Uint16": mstype.uint16, + "Int32": mstype.int32, "Uint32": mstype.uint32, "Int64": mstype.int64, "Uint64": mstype.uint64, + "Float16": mstype.float16, "Float32": mstype.float32, "Float64": mstype.float64, + "Bool": mstype.bool_} + +tensor_to_np_type = {"Int8": np.int8, "Uint8": np.uint8, "Int16": np.int16, "Uint16": np.uint16, + "Int32": np.int32, "Uint32": np.uint32, "Int64": np.int64, "Uint64": np.uint64, + "Float16": np.float16, "Float32": np.float32, "Float64": np.float64, "Bool": np.bool_} -tensor_to_np_type = {"Int8": np.int8, "Int16": np.int16, "Int32": np.int32, "Int64": np.int64, - "Float16": np.float16, "Float32": np.float32, "Float64": np.float64} def _special_process_par(par, new_par): """ @@ -42,17 +47,17 @@ def _special_process_par(par, new_par): Like (12,2048,1,1)->(12,2048), this case is caused by GE 4 dimensions tensor. """ - par_shape_len = len(par.data.shape()) - new_par_shape_len = len(new_par.data.shape()) + par_shape_len = len(par.data.shape) + new_par_shape_len = len(new_par.data.shape) delta_len = new_par_shape_len - par_shape_len delta_i = 0 for delta_i in range(delta_len): - if new_par.data.shape()[par_shape_len + delta_i] != 1: + if new_par.data.shape[par_shape_len + delta_i] != 1: break if delta_i == delta_len - 1: new_val = new_par.data.asnumpy() - new_val = new_val.reshape(par.data.shape()) - par.set_parameter_data(Tensor(new_val, par.data.dtype())) + new_val = new_val.reshape(par.data.shape) + par.set_parameter_data(Tensor(new_val, par.data.dtype)) return True return False @@ -61,17 +66,17 @@ def _update_param(param, new_param): """Updates param's data from new_param's data.""" if isinstance(param.data, Tensor) and isinstance(new_param.data, Tensor): - if param.data.dtype() != new_param.data.dtype(): + if param.data.dtype != new_param.data.dtype: logger.error("Failed to combine the net and the parameters for param %s.", param.name) msg = ("Net parameters {} type({}) different from parameter_dict's({})" - .format(param.name, param.data.dtype(), new_param.data.dtype())) + .format(param.name, param.data.dtype, new_param.data.dtype)) raise RuntimeError(msg) - if param.data.shape() != new_param.data.shape(): + if param.data.shape != new_param.data.shape: if not _special_process_par(param, new_param): logger.error("Failed to combine the net and the parameters for param %s.", param.name) msg = ("Net parameters {} shape({}) different from parameter_dict's({})" - .format(param.name, param.data.shape(), new_param.data.shape())) + .format(param.name, param.data.shape, new_param.data.shape)) raise RuntimeError(msg) return @@ -79,12 +84,12 @@ def _update_param(param, new_param): return if isinstance(param.data, Tensor) and not isinstance(new_param.data, Tensor): - if param.data.shape() != (1,) and param.data.shape() != (): + if param.data.shape != (1,) and param.data.shape != (): logger.error("Failed to combine the net and the parameters for param %s.", param.name) msg = ("Net parameters {} shape({}) is not (1,), inconsitent with parameter_dict's(scalar)." - .format(param.name, param.data.shape())) + .format(param.name, param.data.shape)) raise RuntimeError(msg) - param.set_parameter_data(initializer(new_param.data, param.data.shape(), param.data.dtype())) + param.set_parameter_data(initializer(new_param.data, param.data.shape, param.data.dtype)) elif isinstance(new_param.data, Tensor) and not isinstance(param.data, Tensor): logger.error("Failed to combine the net and the parameters for param %s.", param.name) @@ -120,12 +125,12 @@ def save_checkpoint(parameter_list, ckpoint_file_name): param["data"].init_data() param_data = param["data"].asnumpy().reshape(-1) param_tensor.tensor_content = param_data.tostring() - param_tensor.tensor_type = str(param["data"].dtype()) + param_tensor.tensor_type = str(param["data"].dtype) - if param['data'].shape() == (): + if param['data'].shape == (): param_tensor.dims.append(0) else: - for dim in param['data'].shape(): + for dim in param['data'].shape: param_tensor.dims.append(dim) with open(ckpoint_file_name, "wb") as f: @@ -398,17 +403,18 @@ def export(net, *inputs, file_name, file_format='GEIR'): net (Cell): MindSpore network. inputs (Tensor): Inputs of the `net`. file_name (str): File name of model to export. - file_format (str): MindSpore currently supports 'GEIR', 'ONNX' and 'LITE' format for exported model. + file_format (str): MindSpore currently supports 'GEIR', 'ONNX' 'LITE' and 'BINARY' format for exported model. - GEIR: Graph Engine Intermidiate Representation. An intermidiate representation format of Ascend model. - ONNX: Open Neural Network eXchange. An open format built to represent machine learning models. - LITE: Huawei model format for mobile. A lite model only for the MindSpore Lite + - BINARY: Binary format for model. An intermidiate representation format for models. """ logger.info("exporting model file:%s format:%s.", file_name, file_format) check_input_data(*inputs, data_class=Tensor) - supported_formats = ['GEIR', 'ONNX', 'LITE'] + supported_formats = ['GEIR', 'ONNX', 'LITE', 'BINARY'] if file_format not in supported_formats: raise ValueError(f'Illegal file format {file_format}, it must be one of {supported_formats}') # switch network mode to infer when it is training @@ -428,9 +434,77 @@ def export(net, *inputs, file_name, file_format='GEIR'): with open(file_name, 'wb') as f: os.chmod(file_name, stat.S_IWUSR | stat.S_IRUSR) f.write(onnx_stream) + elif file_format == 'BINARY': # file_format is 'BINARY' + phase_name = 'export_binary' + graph_id, _ = _executor.compile(net, *inputs, phase=phase_name, do_convert=False) + onnx_stream = _executor._get_func_graph_proto(graph_id, 'binary_ir') + with open(file_name, 'wb') as f: + os.chmod(file_name, stat.S_IWUSR | stat.S_IRUSR) + f.write(onnx_stream) elif file_format == 'LITE': # file_format is 'LITE' context.set_context(save_ms_model=True, save_ms_model_path=file_name) net(*inputs) # restore network training mode if is_training: net.set_train(mode=True) + + +def parse_print(print_file_name): + """ + Loads Print data from a specified file. + + Args: + print_file_name (str): The file name of save print data. + + Returns: + List, element of list is Tensor. + + Raises: + ValueError: Print file is incorrect. + """ + if not os.path.realpath(print_file_name): + raise ValueError("Please input the correct print file name.") + + if os.path.getsize(print_file_name) == 0: + raise ValueError("The print file may be empty, please make sure enter the correct file name.") + + logger.info("Execute load print process.") + print_list = Print() + + try: + with open(print_file_name, "rb") as f: + pb_content = f.read() + print_list.ParseFromString(pb_content) + except BaseException as e: + logger.error("Failed to read the print file %s, please check the correct of the file.", print_file_name) + raise ValueError(e.__str__()) + + tensor_list = [] + + try: + for print_ in print_list.value: + # String type + if print_.HasField("desc"): + tensor_list.append(print_.desc) + elif print_.HasField("tensor"): + dims = print_.tensor.dims + data_type = print_.tensor.tensor_type + data = print_.tensor.tensor_content + np_type = tensor_to_np_type[data_type] + param_data = np.fromstring(data, np_type) + ms_type = tensor_to_ms_type[data_type] + param_dim = [] + for dim in dims: + param_dim.append(dim) + if param_dim: + param_value = param_data.reshape(param_dim) + tensor_list.append(Tensor(param_value, ms_type)) + # Scale type + else: + tensor_list.append(Tensor(param_data, ms_type)) + + except BaseException as e: + logger.error("Failed to load the print file %s.", print_list) + raise RuntimeError(e.__str__()) + + return tensor_list diff --git a/mindspore/train/summary/_event_writer.py b/mindspore/train/summary/_event_writer.py deleted file mode 100644 index ae347135f6..0000000000 --- a/mindspore/train/summary/_event_writer.py +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""Writes events to disk in a logdir.""" -import os -import stat -from collections import deque -from multiprocessing import Pool, Process, Queue, cpu_count - -from ..._c_expression import EventWriter_ -from ._summary_adapter import package_summary_event - - -def _pack(result, step): - summary_event = package_summary_event(result, step) - return summary_event.SerializeToString() - - -class EventWriter(Process): - """ - Creates a `EventWriter` and write event to file. - - Args: - filepath (str): Summary event file path and file name. - flush_interval (int): The flush seconds to flush the pending events to disk. Default: 120. - """ - - def __init__(self, filepath: str, flush_interval: int) -> None: - super().__init__() - _ = flush_interval - with open(filepath, 'w'): - os.chmod(filepath, stat.S_IWUSR | stat.S_IRUSR) - self._writer = EventWriter_(filepath) - self._queue = Queue(cpu_count() * 2) - self.start() - - def run(self): - - with Pool() as pool: - deq = deque() - while True: - while deq and deq[0].ready(): - self._writer.Write(deq.popleft().get()) - - if not self._queue.empty(): - action, data = self._queue.get() - if action == 'WRITE': - if not isinstance(data, (str, bytes)): - deq.append(pool.apply_async(_pack, data)) - else: - self._writer.Write(data) - elif action == 'FLUSH': - self._writer.Flush() - elif action == 'END': - break - for res in deq: - self._writer.Write(res.get()) - - self._writer.Shut() - - def write(self, data) -> None: - """ - Write the event to file. - - Args: - data (Optional[str, Tuple[list, int]]): The data to write. - """ - self._queue.put(('WRITE', data)) - - def flush(self): - """Flush the writer.""" - self._queue.put(('FLUSH', None)) - - def close(self) -> None: - """Close the writer.""" - self._queue.put(('END', None)) - self.join() diff --git a/mindspore/train/summary/_lineage_adapter.py b/mindspore/train/summary/_lineage_adapter.py new file mode 100644 index 0000000000..d85d16b49d --- /dev/null +++ b/mindspore/train/summary/_lineage_adapter.py @@ -0,0 +1,39 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Generate the lineage event which conform to proto format.""" +import time + +from ..lineage_pb2 import LineageEvent + + +def serialize_to_lineage_event(name, value): + """Serialize value to lineage event.""" + event = LineageEvent() + event.wall_time = time.time() + content = _get_lineage_content(name, event) + content.ParseFromString(value) + return event.SerializeToString() + + +def _get_lineage_content(name, event): + if name == 'dataset_graph': + return event.dataset_graph + if name == 'eval_lineage': + return event.evaluation_lineage + if name == 'train_lineage': + return event.train_lineage + if name == 'custom_lineage_data': + return event.user_defined_info + raise KeyError(f'No such field in LineageEvent') diff --git a/mindspore/train/summary/_summary_adapter.py b/mindspore/train/summary/_summary_adapter.py index 47ed0a7b90..1ae5bdd2d5 100644 --- a/mindspore/train/summary/_summary_adapter.py +++ b/mindspore/train/summary/_summary_adapter.py @@ -13,7 +13,7 @@ # limitations under the License. # ============================================================================ """Generate the summary event which conform to proto format.""" -import socket +import platform import time import numpy as np @@ -30,7 +30,7 @@ MS_IMAGE_TENSOR_FORMAT = 'NCHW' # Set the Event mark EVENT_FILE_NAME_MARK = ".out.events.summary." # Set the init event of version and mark -EVENT_FILE_INIT_VERSION_MARK = "Mindspore.Event:" +EVENT_FILE_INIT_VERSION_MARK = "MindSpore.Event:" EVENT_FILE_INIT_VERSION = 1 F32_MIN, F32_MAX = np.finfo(np.float32).min, np.finfo(np.float32).max @@ -51,7 +51,7 @@ def get_event_file_name(prefix, suffix): _check_str_by_regular(suffix) file_name = "" time_second = str(int(time.time())) - hostname = socket.gethostname() + hostname = platform.node() if prefix is not None: file_name = file_name + prefix @@ -113,7 +113,7 @@ def package_summary_event(data_list, step): data = value["data"] tag = value["name"] - logger.debug("Now process %r summary, tag = %r", summary_type, tag) + logger.debug(f"Now process {summary_type} summary, tag = {tag}") summary_value = summary.value.add() summary_value.tag = tag @@ -130,7 +130,7 @@ def package_summary_event(data_list, step): _fill_histogram_summary(tag, data, summary_value.histogram) else: # The data is invalid ,jump the data - logger.error("Summary type(%r) is error, tag = %r", summary_type, tag) + logger.error(f"Summary type({summary_type}) is error, tag = {tag}") del summary.value[-1] return summary_event @@ -186,17 +186,17 @@ def _fill_scalar_summary(tag: str, np_value, summary): Returns: Summary, return scalar summary content. """ - logger.debug("Set(%r) the scalar summary value", tag) + logger.debug(f"Set({tag}) the scalar summary value") if np_value.size == 1: # is scalar summary.scalar_value = np_value.item() return True if np_value.size > 1: - logger.warning("The tensor is not a single scalar, tag = %r, ndim = %r, shape = %r", tag, np_value.ndim, - np_value.shape) + logger.warning( + f"The tensor is not a single scalar, tag = {tag}, ndim = {np_value.ndim}, shape = {np_value.shape}") summary.scalar_value = next(np_value.flat).item() return True - logger.error("There no values inside tensor, tag = %r, size = %r", tag, np_value.size) + logger.error(f"There no values inside tensor, tag = {tag}, size = {np_value.size}") return False @@ -212,7 +212,7 @@ def _fill_tensor_summary(tag: str, np_value, summary_tensor): Retruns: Summary, return tensor summary content. """ - logger.debug("Set(%r) the tensor summary value", tag) + logger.debug(f"Set({tag}) the tensor summary value") # get tensor dtype tensor_dtype = _nptype_to_prototype(np_value) summary_tensor.data_type = DataType.Value(tensor_dtype) @@ -266,7 +266,7 @@ def _fill_histogram_summary(tag: str, np_value: np.ndarray, summary) -> None: np_value (np.ndarray): Summary data. summary (summary_pb2.Summary.Histogram): Summary histogram data. """ - logger.debug("Set(%r) the histogram summary value", tag) + logger.debug(f"Set({tag}) the histogram summary value") # Default bucket for tensor with no valid data. ma_value = np.ma.masked_invalid(np_value) total, valid = np_value.size, ma_value.count() @@ -281,7 +281,7 @@ def _fill_histogram_summary(tag: str, np_value: np.ndarray, summary) -> None: summary.count = total summary.nan_count, summary.pos_inf_count, summary.neg_inf_count = invalids if not valid: - logger.warning('There are no valid values in the ndarray(size=%d, shape=%d)', total, np_value.shape) + logger.warning(f'There are no valid values in the ndarray(size={total}, shape={np_value.shape})') # summary.{min, max, sum} are 0s by default, no need to explicitly set else: # BUG: max of a masked array with dtype np.float16 returns inf @@ -290,9 +290,8 @@ def _fill_histogram_summary(tag: str, np_value: np.ndarray, summary) -> None: summary.min = ma_value.min(fill_value=np.PINF) summary.max = ma_value.max(fill_value=np.NINF) if summary.min < F32_MIN or summary.max > F32_MAX: - logger.warning( - 'Values(%r, %r) are too large, ' - 'you may encounter some undefined behaviours hereafter.', summary.min, summary.max) + logger.warning(f'Values({summary.min}, {summary.max}) are too large, ' + f'you may encounter some undefined behaviours hereafter.') else: summary.min = ma_value.min() summary.max = ma_value.max() @@ -327,14 +326,14 @@ def _fill_image_summary(tag: str, np_value, summary_image, input_format='NCHW'): Returns: Summary, return image summary content. """ - logger.debug("Set(%r) the image summary value", tag) + logger.debug(f"Set({tag}) the image summary value") if np_value.ndim != 4 or np_value.shape[1] not in (1, 3): - logger.error("The value is not Image, tag = %r, ndim = %r, shape=%r", tag, np_value.ndim, np_value.shape) + logger.error(f"The value is not Image, tag = {tag}, ndim = {np_value.ndim}, shape={np_value.shape}") return False if np_value.ndim != len(input_format): - logger.error("The tensor with dim(%r) can't convert the format(%r) because dim not same", np_value.ndim, - input_format) + logger.error( + f"The tensor with dim({np_value.ndim}) can't convert the format({input_format}) because dim not same") return False # convert the tensor format diff --git a/mindspore/train/summary/_summary_writer.py b/mindspore/train/summary/_summary_writer.py new file mode 100644 index 0000000000..36d020819a --- /dev/null +++ b/mindspore/train/summary/_summary_writer.py @@ -0,0 +1,79 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Writes events to disk in a logdir.""" +import os +import stat + +from ..._c_expression import EventWriter_ +from ._summary_adapter import package_init_event + + +class BaseWriter: + """BaseWriter to be subclass.""" + + def __init__(self, filepath) -> None: + self._filepath = filepath + self._writer: EventWriter_ = None + + def init_writer(self): + """Write some metadata etc.""" + + @property + def writer(self) -> EventWriter_: + """Get the writer.""" + if self._writer is not None: + return self._writer + + with open(self._filepath, 'w'): + os.chmod(self._filepath, stat.S_IWUSR | stat.S_IRUSR) + self._writer = EventWriter_(self._filepath) + self.init_writer() + return self._writer + + def write(self, plugin, mode, data): + """Write data to file.""" + raise NotImplementedError() + + def flush(self): + """Flush the writer.""" + if self._writer is not None: + self._writer.Flush() + + def close(self): + """Close the writer.""" + if self._writer is not None: + self._writer.Shut() + + +class SummaryWriter(BaseWriter): + """SummaryWriter for write summaries.""" + + def init_writer(self): + """Write some metadata etc.""" + self.writer.Write(package_init_event().SerializeToString()) + + def write(self, plugin, mode, data): + """Write data to file.""" + if plugin in ('summary', 'graph'): + self.writer.Write(data) + + +class LineageWriter(BaseWriter): + """LineageWriter for write lineage.""" + + def write(self, plugin, mode, data): + """Write data to file.""" + if plugin in ('dataset_graph', 'train_lineage', 'eval_lineage', 'custom_lineage_data'): + self.writer.Write(data) diff --git a/mindspore/train/summary/_writer_pool.py b/mindspore/train/summary/_writer_pool.py new file mode 100644 index 0000000000..2d219743de --- /dev/null +++ b/mindspore/train/summary/_writer_pool.py @@ -0,0 +1,114 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Write events to disk in a base directory.""" +import os +from collections import deque +from multiprocessing import Pool, Process, Queue, cpu_count + +from ._lineage_adapter import serialize_to_lineage_event +from ._summary_adapter import package_graph_event, package_summary_event +from ._summary_writer import SummaryWriter, LineageWriter + + +def _pack_data(datadict): + """Pack data according to which plugin.""" + result = [] + summaries, step, mode = [], None, None + for plugin, datalist in datadict.items(): + for data in datalist: + if plugin == 'graph': + result.append([plugin, data.get('mode'), package_graph_event(data.get('value')).SerializeToString()]) + elif plugin in ('train_lineage', 'eval_lineage', 'custom_lineage_data', 'dataset_graph'): + result.append([plugin, data.get('mode'), serialize_to_lineage_event(plugin, data.get('value'))]) + elif plugin in ('scalar', 'tensor', 'histogram', 'image'): + summaries.append({'_type': plugin.title(), 'name': data.get('tag'), 'data': data.get('value')}) + step = data.get('step') + mode = data.get('mode') + if summaries: + result.append(['summary', mode, package_summary_event(summaries, step).SerializeToString()]) + return result + + +class WriterPool(Process): + """ + Use a set of pooled resident processes for writing a list of file. + + Args: + base_dir (str): The base directory to hold all the files. + filelist (str): The mapping from short name to long filename. + """ + + def __init__(self, base_dir, **filedict) -> None: + super().__init__() + self._base_dir, self._filedict = base_dir, filedict + self._queue = Queue(cpu_count() * 2) + self.start() + + def run(self): + writers = self._get_writers() + + with Pool() as pool: + deq = deque() + while True: + while deq and deq[0].ready(): + for plugin, mode, data in deq.popleft().get(): + for writer in writers: + writer.write(plugin, mode, data) + + if not self._queue.empty(): + action, data = self._queue.get() + if action == 'WRITE': + deq.append(pool.apply_async(_pack_data, (data,))) + elif action == 'FLUSH': + for writer in writers: + writer.flush() + elif action == 'END': + break + for result in deq: + for plugin, mode, data in result.get(): + for writer in writers: + writer.write(plugin, mode, data) + + for writer in writers: + writer.close() + + def _get_writers(self): + writers = [] + for plugin, filename in self._filedict.items(): + filepath = os.path.join(self._base_dir, filename) + if plugin == 'summary': + writers.append(SummaryWriter(filepath)) + elif plugin == 'lineage': + writers.append(LineageWriter(filepath)) + return writers + + def write(self, data) -> None: + """ + Write the event to file. + + Args: + name (str): The key of a specified file. + data (Optional[str, Tuple[list, int]]): The data to write. + """ + self._queue.put(('WRITE', data)) + + def flush(self): + """Flush the writer and sync data to disk.""" + self._queue.put(('FLUSH', None)) + + def close(self) -> None: + """Close the writer.""" + self._queue.put(('END', None)) + self.join() diff --git a/mindspore/train/summary/enum.py b/mindspore/train/summary/enum.py new file mode 100644 index 0000000000..84044eab6c --- /dev/null +++ b/mindspore/train/summary/enum.py @@ -0,0 +1,43 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Summary's enumeration file.""" +from enum import Enum + + +class BaseEnum(Enum): + """The base enum class.""" + + @classmethod + def to_list(cls): + """Converts the enumeration into a list.""" + return [member.value for member in cls.__members__.values()] + + +class PluginEnum(BaseEnum): + """The list of plugins currently supported by the summary.""" + GRAPH = 'graph' + SCALAR = 'scalar' + IMAGE = 'image' + TENSOR = 'tensor' + HISTOGRAM = 'histogram' + TRAIN_LINEAGE = 'train_lineage' + EVAL_LINEAGE = 'eval_lineage' + DATASET_GRAPH = 'dataset_graph' + + +class ModeEnum(BaseEnum): + """The modes currently supported by the summary.""" + TRAIN = 'train' + EVAL = 'eval' diff --git a/mindspore/train/summary/summary_record.py b/mindspore/train/summary/summary_record.py index b2bc872a1f..61c2c8adeb 100644 --- a/mindspore/train/summary/summary_record.py +++ b/mindspore/train/summary/summary_record.py @@ -21,9 +21,9 @@ from mindspore import log as logger from ..._c_expression import Tensor from ..._checkparam import _check_str_by_regular -from .._utils import _make_directory -from ._event_writer import EventWriter -from ._summary_adapter import get_event_file_name, package_graph_event, package_init_event +from .._utils import _make_directory, _check_to_numpy, _check_lineage_value +from ._summary_adapter import get_event_file_name, package_graph_event +from ._writer_pool import WriterPool # for the moment, this lock is for caution's sake, # there are actually no any concurrencies happening. @@ -53,16 +53,20 @@ def _get_summary_tensor_data(): return data +def _dictlist(): + from collections import defaultdict + return defaultdict(list) + + class SummaryRecord: """ - SummaryRecord is used to record the summary value. + SummaryRecord is used to record the summary data and lineage data. Note: - The API will create an event file in a given directory and add summaries and events to it. - It writes the event log to a file by executing the record method. In addition, - if the SummaryRecord object is created and the summary operator is used in the network, - even if the record method is not called, the event in the cache will be written to the - file at the end of execution. Make sure to close the SummaryRecord object at the end. + The API will create a summary file and a lineage file lazily in a given directory and writes data to them. + It writes the data to files by executing the record method. In addition to record the data bubbled up from + the network by defining the summary operators, SummaryRecord also supports to record extra data which + can be added by calling add_value. Finally, make sure to close the SummaryRecord object at the end. Args: log_dir (str): The log_dir is a directory location to save the summary. @@ -89,8 +93,12 @@ class SummaryRecord: file_suffix="_MS", network=None): + self._closed, self._mode = False, 'train' + self._data_pool = _dictlist() + _check_str_by_regular(file_prefix) _check_str_by_regular(file_suffix) + self.log_path = _make_directory(log_dir) if not isinstance(queue_max_size, int) or not isinstance(flush_time, int): @@ -113,7 +121,6 @@ class SummaryRecord: self.suffix = file_suffix self.network = network self.has_graph = False - self._closed = False # create the summary writer file self.event_file_name = get_event_file_name(self.prefix, self.suffix) @@ -122,18 +129,12 @@ class SummaryRecord: except Exception as ex: raise RuntimeError(ex) - self._event_writer = None - - def _init_event_writer(self): - """Init event writer and write metadata.""" - event_writer = EventWriter(self.full_file_name, self.flush_time) - event_writer.write(package_init_event().SerializeToString()) - return event_writer + self._event_writer = WriterPool(log_dir, + summary=self.full_file_name, + lineage=get_event_file_name('events', '_lineage')) def __enter__(self): """Enter the context manager.""" - if not self._event_writer: - self._event_writer = self._init_event_writer() if self._closed: raise ValueError('SummaryRecord has been closed.') return self @@ -142,6 +143,76 @@ class SummaryRecord: """Exit the context manager.""" self.close() + def set_mode(self, mode): + """ + Set the mode for the recorder to be aware. The mode is set 'train' by default. + + Args: + mode (str): The mode to set, which should be 'train' or 'eval'. + + Raises: + ValueError: When the mode is not recognized. + + Examples: + >>> with SummaryRecord(log_dir="/opt/log", file_prefix="xxx_", file_suffix="_yyy") as summary_record: + >>> summary_record.set_mode('eval') + """ + mode_spec = 'train', 'eval' + if mode not in mode_spec: + raise ValueError(f'{repr(mode)} is not a recognized mode.') + self._mode = mode + + def add_value(self, plugin, name, value): + """ + Add value to be record later on. + + When the plugin is 'tensor', 'scalar', 'image' or 'histogram', + the name should be the tag name, and the value should be a Tensor. + + When the plugin plugin is 'graph', the value should be a GraphProto. + + When the plugin 'dataset_graph', 'train_lineage', 'eval_lineage', + or 'custom_lineage_data', the value should be a proto message. + + + Args: + plugin (str): The plugin for the value. + name (str): The name for the value. + value (Union[Tensor, GraphProto, TrainLineage, EvaluationLineage, DatasetGraph, UserDefinedInfo]): \ + The value to store. + + - GraphProto: The 'value' should be a serialized string this type when the plugin is 'graph'. + - Tensor: The 'value' should be this type when the plugin is 'scalar', 'image', 'tensor' or 'histogram'. + - TrainLineage: The 'value' should be this type when the plugin is 'train_lineage'. + - EvaluationLineage: The 'value' should be this type when the plugin is 'eval_lineage'. + - DatasetGraph: The 'value' should be this type when the plugin is 'dataset_graph'. + - UserDefinedInfo: The 'value' should be this type when the plugin is 'custom_lineage_data'. + + Raises: + ValueError: When the name is not valid. + TypeError: When the value is not a Tensor. + + Examples: + >>> with SummaryRecord(log_dir="/opt/log", file_prefix="xxx_", file_suffix="_yyy") as summary_record: + >>> summary_record.add_value('scalar', 'loss', Tensor(0.1)) + """ + if plugin in ('tensor', 'scalar', 'image', 'histogram'): + if not name or not isinstance(name, str): + raise ValueError(f'{repr(name)} is not a valid tag name.') + if not isinstance(value, Tensor): + raise TypeError(f'Expect the value to be Tensor, but got {type(value).__name__}') + np_value = _check_to_numpy(plugin, value) + self._data_pool[plugin].append(dict(tag=name, mode=self._mode, value=np_value)) + + elif plugin in ('train_lineage', 'eval_lineage', 'dataset_graph', 'custom_lineage_data'): + _check_lineage_value(plugin, value) + self._data_pool[plugin].append(dict(mode=self._mode, value=value.SerializeToString())) + elif plugin == 'graph': + package_graph_event(value) + self._data_pool[plugin].append(dict(mode=self._mode, value=value)) + else: + raise ValueError(f'No such plugin of {repr(plugin)}') + def record(self, step, train_network=None): """ Record the summary. @@ -150,12 +221,12 @@ class SummaryRecord: step (int): Represents training step number. train_network (Cell): The network that called the callback. + Returns: + bool, whether the record process is successful or not. + Examples: >>> with SummaryRecord(log_dir="/opt/log", file_prefix="xxx_", file_suffix="_yyy") as summary_record: >>> summary_record.record(step=2) - - Returns: - bool, whether the record process is successful or not. """ logger.info("SummaryRecord step is %r.", step) if self._closed: @@ -164,10 +235,6 @@ class SummaryRecord: if not isinstance(step, int) or isinstance(step, bool): raise ValueError("`step` should be int") # Set the current summary of train step - if not self._event_writer: - self._event_writer = self._init_event_writer() - logger.warning('SummaryRecord should be used as context manager for a with statement.') - if self.network is not None and not self.has_graph: graph_proto = self.network.get_func_graph_proto() if graph_proto is None and train_network is not None: @@ -175,39 +242,48 @@ class SummaryRecord: if graph_proto is None: logger.error("Failed to get proto for graph") else: - self._event_writer.write(package_graph_event(graph_proto).SerializeToString()) + self._event_writer.write({'graph': [{'step': step, 'value': graph_proto}]}) self.has_graph = True if not _summary_tensor_cache: return True - data = _get_summary_tensor_data() - if not data: - logger.error("The step(%r) does not have record data.", step) - return False - if self.queue_max_size > 0 and len(data) > self.queue_max_size: - logger.error("The size of data record is %r, which is greater than queue_max_size %r.", len(data), - self.queue_max_size) - - # process the data - result = self._data_convert(data) - if not result: - logger.error("The step(%r) summary data is invalid.", step) - return False - self._event_writer.write((result, step)) - logger.debug("Send the summary data to scheduler for saving, step = %d", step) + if self._mode == 'train': + self._add_summary_tensor_data() + + self._event_writer.write(self._consume_data_pool(step)) return True + def _add_summary_tensor_data(self): + summary_data = _get_summary_tensor_data() + if not summary_data: + logger.debug(f'No summary data bubbled from the network.') + for name, tensor in summary_data.items(): + tag, plugin = SummaryRecord._parse_from(name) + if (tag, plugin) == (None, None): + logger.warning("The name(%r) is invalid, expected 'TAG[:TYPE]'.", name) + else: + self.add_value(plugin.lower(), tag, tensor) + + def _consume_data_pool(self, step): + try: + for values in self._data_pool.values(): + for value in values: + value['step'] = step + return self._data_pool + finally: + self._data_pool = _dictlist() + @property def log_dir(self): """ Get the full path of the log file. + Returns: + str, the full path of log file. + Examples: >>> with SummaryRecord(log_dir="/opt/log", file_prefix="xxx_", file_suffix="_yyy") as summary_record: >>> print(summary_record.log_dir) - - Returns: - String, the full path of log file. """ return self.full_file_name @@ -236,46 +312,19 @@ class SummaryRecord: """ if not self._closed and self._event_writer: # event writer flush and close + logger.info('Please wait it may take quite some time to finish writing and closing.') self._event_writer.close() self._closed = True def __del__(self) -> None: self.close() - def _data_convert(self, summary): - """Convert the data.""" - # convert the summary to numpy - result = [] - for name, data in summary.items(): - # confirm the data is valid - summary_tag, summary_type = SummaryRecord._parse_from(name) - if summary_tag is None: - logger.error("The data type is invalid, name = %r, tensor = %r", name, data) - return None - if isinstance(data, Tensor): - result.append({'name': summary_tag, 'data': data.asnumpy(), '_type': summary_type}) - else: - logger.error("The data type is invalid, name = %r, tensor = %r", name, data) - return None - - return result - @staticmethod def _parse_from(name: str = None): - """ - Parse the tag and type from name. - - Args: - name (str): Format: TAG[:TYPE]. - - Returns: - Tuple, (summary_tag, summary_type). - """ - if name is None: - logger.error("The name is None") + """Parse the tag and type from name.""" + if not isinstance(name, str): return None, None match = re.match(r'(.+)\[:(.+)\]', name) if match: return match.groups() - logger.error("The name(%r) format is invalid, expected 'TAG[:TYPE]'.", name) return None, None diff --git a/model_zoo/README.md b/model_zoo/README.md new file mode 100644 index 0000000000..24be683b22 --- /dev/null +++ b/model_zoo/README.md @@ -0,0 +1,306 @@ +![](https://www.mindspore.cn/static/img/logo.a3e472c9.png) + + +# Welcome to the Model Zoo for MindSpore + +In order to facilitate developers to enjoy the benefits of MindSpore framework and Huawei chips, we will continue to add typical networks and models . If you have needs for the model zoo, you can file an issue on [gitee](https://gitee.com/mindspore/mindspore/issues) or [MindSpore](https://bbs.huaweicloud.com/forum/forum-1076-1.html), We will consider it in time. + +- SOTA models using the latest MindSpore APIs + +- The best benefits from MindSpore and Huawei chips + +- Officially maintained and supported + + + +# Table of Contents + +- [Models and Implementations](#models-and-implementations) + - [Computer Vision](#computer-vision) + - [Image Classification](#image-classification) + - [GoogleNet](#googlenet) + - [ResNet50[benchmark]](#resnet50) + - [ResNet101](#resnet101) + - [VGG16](#vgg16) + - [AlexNet](#alexnet) + - [LeNet](#lenet) + - [Object Detection and Segmentation](#object-detection-and-segmentation) + - [YoloV3](#yolov3) + - [MobileNetV2](#mobilenetv2) + - [MobileNetV3](#mobilenetv3) + - [SSD](#ssd) + - [Natural Language Processing](#natural-language-processing) + - [BERT](#bert) + - [MASS](#mass) + + +# Announcements +| Date | News | +| ------------ | ------------------------------------------------------------ | +| May 31, 2020 | Support [MindSpore v0.3.0-alpha](https://www.mindspore.cn/news/newschildren?id=215) | + + +# Models and Implementations + +## Computer Vision + +### Image Classification + +#### [GoogleNet](#table-of-contents) +| Parameters | GoogleNet | +| -------------------------- | ------------------------------------------------------------ | +| Published Year | 2014 | +| Paper | [Going Deeper with Convolutions](https://arxiv.org/abs/1409.4842) | +| Resource | Ascend 910 | +| Features | • Mixed Precision • Multi-GPU training support with Ascend | +| MindSpore Version | 0.3.0-alpha | +| Dataset | CIFAR-10 | +| Training Parameters | epoch=125, batch_size = 128, lr=0.1 | +| Optimizer | Momentum | +| Loss Function | Softmax Cross Entropy | +| Accuracy | 1pc: 93.4%; 8pcs: 92.17% | +| Speed | 79 ms/Step | +| Loss | 0.0016 | +| Params (M) | 6.8 | +| Checkpoint for Fine tuning | 43.07M (.ckpt file) | +| Model for inference | 21.50M (.onnx file), 21.60M(.geir file) | +| Scripts | https://gitee.com/mindspore/mindspore/tree/master/model_zoo/googlenet | + +#### [ResNet50](#table-of-contents) + +| Parameters | ResNet50 | +| -------------------------- | -------- | +| Published Year | | +| Paper | | +| Resource | | +| Features | | +| MindSpore Version | | +| Dataset | | +| Training Parameters | | +| Optimizer | | +| Loss Function | | +| Accuracy | | +| Speed | | +| Loss | | +| Params (M) | | +| Checkpoint for Fine tuning | | +| Model for inference | | +| Scripts | | + +#### [ResNet101](#table-of-contents) + +| Parameters | ResNet101 | +| -------------------------- | --------- | +| Published Year | | +| Paper | | +| Resource | | +| Features | | +| MindSpore Version | | +| Dataset | | +| Training Parameters | | +| Optimizer | | +| Loss Function | | +| Accuracy | | +| Speed | | +| Loss | | +| Params (M) | | +| Checkpoint for Fine tuning | | +| Model for inference | | +| Scripts | | + +#### [VGG16](#table-of-contents) + +| Parameters | VGG16 | +| -------------------------- | ----- | +| Published Year | | +| Paper | | +| Resource | | +| Features | | +| MindSpore Version | | +| Dataset | | +| Training Parameters | | +| Optimizer | | +| Loss Function | | +| Accuracy | | +| Speed | | +| Loss | | +| Params (M) | | +| Checkpoint for Fine tuning | | +| Model for inference | | +| Scripts | | + +#### [AlexNet](#table-of-contents) + +| Parameters | AlexNet | +| -------------------------- | ------- | +| Published Year | | +| Paper | | +| Resource | | +| Features | | +| MindSpore Version | | +| Dataset | | +| Training Parameters | | +| Optimizer | | +| Loss Function | | +| Accuracy | | +| Speed | | +| Loss | | +| Params (M) | | +| Checkpoint for Fine tuning | | +| Model for inference | | +| Scripts | | + +#### [LeNet](#table-of-contents) + +| Parameters | LeNet | +| -------------------------- | ----- | +| Published Year | | +| Paper | | +| Resource | | +| Features | | +| MindSpore Version | | +| Dataset | | +| Training Parameters | | +| Optimizer | | +| Loss Function | | +| Accuracy | | +| Speed | | +| Loss | | +| Params (M) | | +| Checkpoint for Fine tuning | | +| Model for inference | | +| Scripts | | + +### Object Detection and Segmentation + +#### [YoloV3](#table-of-contents) + +| Parameters | YoLoV3 | +| -------------------------------- | ------ | +| Published Year | | +| Paper | | +| Resource | | +| Features | | +| MindSpore Version | | +| Dataset | | +| Training Parameters | | +| Optimizer | | +| Loss Function | | +| Mean Average Precision (mAP@0.5) | | +| Speed | | +| Loss | | +| Params (M) | | +| Checkpoint for Fine tuning | | +| Model for inference | | +| Scripts | | + +#### [MobileNetV2](#table-of-contents) + +| Parameters | MobileNetV2 | +| -------------------------------- | ----------- | +| Published Year | | +| Paper | | +| Resource | | +| Features | | +| MindSpore Version | | +| Dataset | | +| Training Parameters | | +| Optimizer | | +| Loss Function | | +| Mean Average Precision (mAP@0.5) | | +| Speed | | +| Loss | | +| Params (M) | | +| Checkpoint for Fine tuning | | +| Model for inference | | +| Scripts | | + +#### [MobileNetV3](#table-of-contents) + +| Parameters | MobileNetV3 | +| -------------------------------- | ----------- | +| Published Year | | +| Paper | | +| Resource | | +| Features | | +| MindSpore Version | | +| Dataset | | +| Training Parameters | | +| Optimizer | | +| Loss Function | | +| Mean Average Precision (mAP@0.5) | | +| Speed | | +| Loss | | +| Params (M) | | +| Checkpoint for Fine tuning | | +| Model for inference | | +| Scripts | | + +#### [SSD](#table-of-contents) + +| Parameters | SSD | +| -------------------------------- | ---- | +| Published Year | | +| Paper | | +| Resource | | +| Features | | +| MindSpore Version | | +| Dataset | | +| Training Parameters | | +| Optimizer | | +| Loss Function | | +| Mean Average Precision (mAP@0.5) | | +| Speed | | +| Loss | | +| Params (M) | | +| Checkpoint for Fine tuning | | +| Model for inference | | +| Scripts | | + +## Natural Language Processing + +#### [BERT](#table-of-contents) + +| Parameters | BERT | +| -------------------------- | ---- | +| Published Year | | +| Paper | | +| Resource | | +| Features | | +| MindSpore Version | | +| Dataset | | +| Training Parameters | | +| Optimizer | | +| Loss Function | | +| GLUE Score | | +| Speed | | +| Loss | | +| Params (M) | | +| Checkpoint for Fine tuning | | +| Model for inference | | +| Scripts | | + +#### [MASS](#table-of-contents) + +| Parameters | MASS | +| -------------------------- | ---- | +| Published Year | | +| Paper | | +| Resource | | +| Features | | +| MindSpore Version | | +| Dataset | | +| Training Parameters | | +| Optimizer | | +| Loss Function | | +| ROUGE Score | | +| Speed | | +| Loss | | +| Params (M) | | +| Checkpoint for Fine tuning | | +| Model for inference | | +| Scripts | | + +#### License + +[Apache License 2.0](https://github.com/mindspore-ai/mindspore/blob/master/LICENSE) diff --git a/model_zoo/Transformer/eval.py b/model_zoo/Transformer/eval.py index 26d00f1c58..5ced75ba33 100644 --- a/model_zoo/Transformer/eval.py +++ b/model_zoo/Transformer/eval.py @@ -78,9 +78,8 @@ def load_weights(model_path): weights = {} for msname in ms_ckpt: - infer_name = msname.replace("transformer.transformer.", "") + infer_name = msname if "tfm_decoder" in msname: - infer_name = infer_name.replace(".layers.", ".layer") infer_name = "tfm_decoder.decoder." + infer_name if is_npz: weights[infer_name] = ms_ckpt[msname] diff --git a/model_zoo/Transformer/src/transformer_model.py b/model_zoo/Transformer/src/transformer_model.py index 17b5127dca..409f8965eb 100644 --- a/model_zoo/Transformer/src/transformer_model.py +++ b/model_zoo/Transformer/src/transformer_model.py @@ -20,11 +20,11 @@ import numpy as np import mindspore.common.dtype as mstype import mindspore.nn as nn import mindspore.ops.functional as F -from mindspore.common.initializer import TruncatedNormal, initializer from mindspore.ops import operations as P from mindspore.common.tensor import Tensor from mindspore.common.parameter import Parameter from .beam_search import BeamSearchDecoder, TileBeam +from .weight_init import normal_weight, weight_variable class TransformerConfig: """ @@ -118,9 +118,7 @@ class EmbeddingLookup(nn.Cell): self.vocab_size = vocab_size self.embedding_size = embedding_size self.use_one_hot_embeddings = use_one_hot_embeddings - self.embedding_table = Parameter(initializer - (TruncatedNormal(initializer_range), - [vocab_size, embedding_size]), + self.embedding_table = Parameter(normal_weight([vocab_size, embedding_size], embedding_size), name='embedding_table') self.expand = P.ExpandDims() self.shape_flat = (-1,) @@ -138,8 +136,7 @@ class EmbeddingLookup(nn.Cell): flat_ids = self.reshape(input_ids, self.shape_flat) if self.use_one_hot_embeddings: one_hot_ids = self.one_hot(flat_ids, self.vocab_size, self.on_value, self.off_value) - output_for_reshape = self.array_mul( - one_hot_ids, self.embedding_table) + output_for_reshape = self.array_mul(one_hot_ids, self.embedding_table) else: output_for_reshape = self.gather(self.embedding_table, flat_ids, 0) @@ -329,22 +326,22 @@ class MultiheadAttention(nn.Cell): units, activation=query_act, has_bias=False, - weight_init=TruncatedNormal(initializer_range)).to_float(compute_type) + weight_init=weight_variable([units, from_tensor_width])).to_float(compute_type) self.key_layer = nn.Dense(to_tensor_width, units, activation=key_act, has_bias=False, - weight_init=TruncatedNormal(initializer_range)).to_float(compute_type) + weight_init=weight_variable([units, to_tensor_width])).to_float(compute_type) self.value_layer = nn.Dense(to_tensor_width, units, activation=value_act, has_bias=False, - weight_init=TruncatedNormal(initializer_range)).to_float(compute_type) + weight_init=weight_variable([units, to_tensor_width])).to_float(compute_type) self.out_layer = nn.Dense(units, out_tensor_width, activation=out_act, has_bias=False, - weight_init=TruncatedNormal(initializer_range)).to_float(compute_type) + weight_init=weight_variable([out_tensor_width, units])).to_float(compute_type) self.shape_from = (batch_size, from_seq_length, num_attention_heads, size_per_head) self.shape_to = (batch_size, to_seq_length, num_attention_heads, size_per_head) @@ -518,10 +515,10 @@ class FeedForward(nn.Cell): self.conv1 = nn.Dense(in_channels, hidden_size, activation=hidden_act, - weight_init=TruncatedNormal(initializer_range)).to_float(compute_type) + weight_init=weight_variable([hidden_size, in_channels])).to_float(compute_type) self.conv2 = nn.Dense(hidden_size, out_channels, - weight_init=TruncatedNormal(initializer_range)).to_float(compute_type) + weight_init=weight_variable([out_channels, hidden_size])).to_float(compute_type) self.preprocess = LayerPreprocess(in_channels=in_channels) self.postprocess = LayerPostprocess(dropout_prob=hidden_dropout_prob) @@ -784,95 +781,22 @@ class TransformerDecoder(nn.Cell): super(TransformerDecoder, self).__init__() self.num_hidden_layers = num_hidden_layers - # wait to be supported - # layers = [] - # for _ in range(num_hidden_layers): - # layer = DecoderCell(batch_size=batch_size, - # hidden_size=hidden_size, - # seq_length=seq_length, - # enc_seq_length=enc_seq_length, - # num_attention_heads=num_attention_heads, - # intermediate_size=intermediate_size, - # attention_probs_dropout_prob=attention_probs_dropout_prob, - # use_one_hot_embeddings=use_one_hot_embeddings, - # initializer_range=initializer_range, - # hidden_dropout_prob=hidden_dropout_prob, - # hidden_act=hidden_act, - # compute_type=compute_type) - # layers.append(layer) - # self.layers = nn.CellList(layers) - self.layer0 = DecoderCell(batch_size=batch_size, - hidden_size=hidden_size, - seq_length=seq_length, - enc_seq_length=enc_seq_length, - num_attention_heads=num_attention_heads, - intermediate_size=intermediate_size, - attention_probs_dropout_prob=attention_probs_dropout_prob, - use_one_hot_embeddings=use_one_hot_embeddings, - initializer_range=initializer_range, - hidden_dropout_prob=hidden_dropout_prob, - hidden_act=hidden_act, - compute_type=compute_type) - self.layer1 = DecoderCell(batch_size=batch_size, - hidden_size=hidden_size, - seq_length=seq_length, - enc_seq_length=enc_seq_length, - num_attention_heads=num_attention_heads, - intermediate_size=intermediate_size, - attention_probs_dropout_prob=attention_probs_dropout_prob, - use_one_hot_embeddings=use_one_hot_embeddings, - initializer_range=initializer_range, - hidden_dropout_prob=hidden_dropout_prob, - hidden_act=hidden_act, - compute_type=compute_type) - self.layer2 = DecoderCell(batch_size=batch_size, - hidden_size=hidden_size, - seq_length=seq_length, - enc_seq_length=enc_seq_length, - num_attention_heads=num_attention_heads, - intermediate_size=intermediate_size, - attention_probs_dropout_prob=attention_probs_dropout_prob, - use_one_hot_embeddings=use_one_hot_embeddings, - initializer_range=initializer_range, - hidden_dropout_prob=hidden_dropout_prob, - hidden_act=hidden_act, - compute_type=compute_type) - self.layer3 = DecoderCell(batch_size=batch_size, - hidden_size=hidden_size, - seq_length=seq_length, - enc_seq_length=enc_seq_length, - num_attention_heads=num_attention_heads, - intermediate_size=intermediate_size, - attention_probs_dropout_prob=attention_probs_dropout_prob, - use_one_hot_embeddings=use_one_hot_embeddings, - initializer_range=initializer_range, - hidden_dropout_prob=hidden_dropout_prob, - hidden_act=hidden_act, - compute_type=compute_type) - self.layer4 = DecoderCell(batch_size=batch_size, - hidden_size=hidden_size, - seq_length=seq_length, - enc_seq_length=enc_seq_length, - num_attention_heads=num_attention_heads, - intermediate_size=intermediate_size, - attention_probs_dropout_prob=attention_probs_dropout_prob, - use_one_hot_embeddings=use_one_hot_embeddings, - initializer_range=initializer_range, - hidden_dropout_prob=hidden_dropout_prob, - hidden_act=hidden_act, - compute_type=compute_type) - self.layer5 = DecoderCell(batch_size=batch_size, - hidden_size=hidden_size, - seq_length=seq_length, - enc_seq_length=enc_seq_length, - num_attention_heads=num_attention_heads, - intermediate_size=intermediate_size, - attention_probs_dropout_prob=attention_probs_dropout_prob, - use_one_hot_embeddings=use_one_hot_embeddings, - initializer_range=initializer_range, - hidden_dropout_prob=hidden_dropout_prob, - hidden_act=hidden_act, - compute_type=compute_type) + layers = [] + for _ in range(num_hidden_layers): + layer = DecoderCell(batch_size=batch_size, + hidden_size=hidden_size, + seq_length=seq_length, + enc_seq_length=enc_seq_length, + num_attention_heads=num_attention_heads, + intermediate_size=intermediate_size, + attention_probs_dropout_prob=attention_probs_dropout_prob, + use_one_hot_embeddings=use_one_hot_embeddings, + initializer_range=initializer_range, + hidden_dropout_prob=hidden_dropout_prob, + hidden_act=hidden_act, + compute_type=compute_type) + layers.append(layer) + self.layers = nn.CellList(layers) self.layer_preprocess = LayerPreprocess(in_channels=hidden_size) @@ -883,16 +807,9 @@ class TransformerDecoder(nn.Cell): def construct(self, input_tensor, attention_mask, enc_states, enc_attention_mask): prev_output = self.reshape(input_tensor, self.shape) - # wait to be supported - # for layer_module in self.layers: - # layer_output = layer_module(prev_output, attention_mask, enc_states, enc_attention_mask) - # prev_output = layer_output - prev_output = self.layer0(prev_output, attention_mask, enc_states, enc_attention_mask) - prev_output = self.layer1(prev_output, attention_mask, enc_states, enc_attention_mask) - prev_output = self.layer2(prev_output, attention_mask, enc_states, enc_attention_mask) - prev_output = self.layer3(prev_output, attention_mask, enc_states, enc_attention_mask) - prev_output = self.layer4(prev_output, attention_mask, enc_states, enc_attention_mask) - prev_output = self.layer5(prev_output, attention_mask, enc_states, enc_attention_mask) + for layer_module in self.layers: + layer_output = layer_module(prev_output, attention_mask, enc_states, enc_attention_mask) + prev_output = layer_output prev_output = self.layer_preprocess(prev_output) output = self.reshape(prev_output, self.out_shape) @@ -1108,7 +1025,13 @@ class TransformerModel(nn.Cell): embedding_size=self.embedding_size, use_one_hot_embeddings=use_one_hot_embeddings, initializer_range=config.initializer_range) - self.tfm_embedding_postprocessor = EmbeddingPostprocessor( + self.tfm_embedding_postprocessor_for_encoder = EmbeddingPostprocessor( + embedding_size=self.embedding_size, + use_one_hot_embeddings=use_one_hot_embeddings, + initializer_range=0.02, + max_position_embeddings=config.max_position_embeddings, + dropout_prob=config.hidden_dropout_prob) + self.tfm_embedding_postprocessor_for_decoder = EmbeddingPostprocessor( embedding_size=self.embedding_size, use_one_hot_embeddings=use_one_hot_embeddings, initializer_range=0.02, @@ -1171,7 +1094,7 @@ class TransformerModel(nn.Cell): hidden_act=config.hidden_act, compute_type=config.compute_type, embedding_lookup=self.tfm_embedding_lookup, - embedding_processor=self.tfm_embedding_postprocessor, + embedding_processor=self.tfm_embedding_postprocessor_for_decoder, projection=self.projection) self.tfm_decoder = BeamSearchDecoder( batch_size=config.batch_size, @@ -1195,15 +1118,14 @@ class TransformerModel(nn.Cell): ones = np.ones(shape=(self.seq_length, self.seq_length)) self.future_mask = Tensor(np.tril(ones), dtype=mstype.float32) else: - self.tile_beam = TileBeam( - beam_width=config.beam_width) + self.tile_beam = TileBeam(beam_width=config.beam_width) ones = np.ones(shape=(config.batch_size, config.max_decode_length)) self.encdec_mask = Tensor(ones, dtype=mstype.float32) def construct(self, source_ids, source_mask, target_ids=None, target_mask=None): # process source sentence src_word_embeddings, embedding_tables = self.tfm_embedding_lookup(source_ids) - src_embedding_output = self.tfm_embedding_postprocessor(src_word_embeddings) + src_embedding_output = self.tfm_embedding_postprocessor_for_encoder(src_word_embeddings) # attention mask [batch_size, seq_length, seq_length] enc_attention_mask = self._create_attention_mask_from_input_mask(source_mask) # transformer encoder @@ -1213,7 +1135,7 @@ class TransformerModel(nn.Cell): if self.is_training: # process target sentence tgt_word_embeddings, _ = self.tfm_embedding_lookup(target_ids) - tgt_embedding_output = self.tfm_embedding_postprocessor(tgt_word_embeddings) + tgt_embedding_output = self.tfm_embedding_postprocessor_for_decoder(tgt_word_embeddings) # attention mask [batch_size, seq_length, seq_length] tgt_attention_mask = self._create_attention_mask_from_input_mask(target_mask) tgt_attention_mask = self.multiply(tgt_attention_mask, self.expand(self.future_mask, 0)) @@ -1223,15 +1145,14 @@ class TransformerModel(nn.Cell): encoder_output, enc_attention_mask) # calculate logits and log_probs log_probs = self.projection(decoder_output, embedding_tables) - return log_probs - - beam_encoder_output = self.tile_beam(encoder_output) + ret = log_probs + else: + beam_encoder_output = self.tile_beam(encoder_output) - enc_attention_mask = self.multiply( - enc_attention_mask[::, 0:1:1, ::], - self.expand(self.encdec_mask, -1)) + enc_attention_mask = self.multiply(enc_attention_mask[::, 0:1:1, ::], self.expand(self.encdec_mask, -1)) - beam_enc_attention_mask = self.tile_beam(enc_attention_mask) - beam_enc_attention_mask = self.cast_compute_type(beam_enc_attention_mask) - predicted_ids = self.tfm_decoder(beam_encoder_output, beam_enc_attention_mask) - return predicted_ids + beam_enc_attention_mask = self.tile_beam(enc_attention_mask) + beam_enc_attention_mask = self.cast_compute_type(beam_enc_attention_mask) + predicted_ids = self.tfm_decoder(beam_encoder_output, beam_enc_attention_mask) + ret = predicted_ids + return ret diff --git a/model_zoo/Transformer/train.py b/model_zoo/Transformer/train.py index 37165a6c20..23c0eb78fd 100644 --- a/model_zoo/Transformer/train.py +++ b/model_zoo/Transformer/train.py @@ -16,9 +16,10 @@ import time import argparse +import random +import numpy as np import mindspore.common.dtype as mstype -from mindspore.common.parameter import Parameter from mindspore.common.tensor import Tensor from mindspore.nn.optim import Adam from mindspore.train.model import Model @@ -26,6 +27,7 @@ from mindspore.train.loss_scale_manager import DynamicLossScaleManager from mindspore.train.callback import CheckpointConfig, ModelCheckpoint from mindspore.train.callback import Callback, TimeMonitor from mindspore.train.serialization import load_checkpoint, load_param_into_net +import mindspore.dataset.engine as de import mindspore.communication.management as D from mindspore.train.parallel_utils import ParallelMode from mindspore import context @@ -34,9 +36,12 @@ from src.transformer_for_train import TransformerTrainOneStepCell, TransformerNe TransformerTrainOneStepWithLossScaleCell from src.config import cfg, transformer_net_cfg from src.dataset import create_transformer_dataset -from src.weight_init import weight_variable, one_weight, zero_weight, normal_weight from src.lr_schedule import create_dynamic_lr +random_seed = 1 +random.seed(random_seed) +np.random.seed(random_seed) +de.config.set_seed(random_seed) def get_ms_timestamp(): t = time.time() @@ -108,7 +113,7 @@ def run_transformer_train(): parser = argparse_init() args, _ = parser.parse_known_args() context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args.device_id) - context.set_context(save_graphs=True, reserve_class_name_in_scope=False, enable_auto_mixed_precision=False) + context.set_context(reserve_class_name_in_scope=False, enable_auto_mixed_precision=False) if args.distribute == "true": device_num = args.device_num @@ -129,29 +134,15 @@ def run_transformer_train(): if args.checkpoint_path: parameter_dict = load_checkpoint(args.checkpoint_path) - else: - parameter_dict = {} - params = netwithloss.trainable_params() - for param in params: - name = param.name - value = param.default_input - if isinstance(value, Tensor): - if name.endswith(".gamma"): - parameter_dict[name] = Parameter(one_weight(value.asnumpy().shape), name=name) - elif name.endswith(".beta") or name.endswith(".bias"): - parameter_dict[name] = Parameter(zero_weight(value.asnumpy().shape), name=name) - elif "embedding" in name: - parameter_dict[name] = Parameter(normal_weight(value.asnumpy().shape, - transformer_net_cfg.hidden_size), name=name) - else: - parameter_dict[name] = Parameter(weight_variable(value.asnumpy().shape), name=name) - load_param_into_net(netwithloss, parameter_dict) + load_param_into_net(netwithloss, parameter_dict) lr = Tensor(create_dynamic_lr(schedule="constant*rsqrt_hidden*linear_warmup*rsqrt_decay", training_steps=dataset.get_dataset_size()*args.epoch_size, learning_rate=cfg.lr_schedule.learning_rate, warmup_steps=cfg.lr_schedule.warmup_steps, - hidden_size=transformer_net_cfg.hidden_size), mstype.float32) + hidden_size=transformer_net_cfg.hidden_size, + start_decay_step=cfg.lr_schedule.start_decay_step, + min_lr=cfg.lr_schedule.min_lr), mstype.float32) optimizer = Adam(netwithloss.trainable_params(), lr) callbacks = [TimeMonitor(dataset.get_dataset_size()), LossCallBack()] diff --git a/model_zoo/__init__.py b/model_zoo/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/example/alexnet_cifar10/README.md b/model_zoo/alexnet/README.md similarity index 69% rename from example/alexnet_cifar10/README.md rename to model_zoo/alexnet/README.md index e6649e4055..1059e22aae 100644 --- a/example/alexnet_cifar10/README.md +++ b/model_zoo/alexnet/README.md @@ -2,7 +2,7 @@ ## Description -Training AlexNet with CIFAR-10 dataset in MindSpore. +Training AlexNet with dataset in MindSpore. This is the simple tutorial for training AlexNet in MindSpore. @@ -10,19 +10,19 @@ This is the simple tutorial for training AlexNet in MindSpore. - Install [MindSpore](https://www.mindspore.cn/install/en). -- Download the CIFAR-10 dataset, the directory structure is as follows: +- Download the dataset, the directory structure is as follows: ``` -├─cifar-10-batches-bin +├─10-batches-bin │ -└─cifar-10-verify-bin +└─10-verify-bin ``` ## Running the example ```python # train AlexNet, hyperparameter setting in config.py -python train.py --data_path cifar-10-batches-bin +python train.py --data_path 10-batches-bin ``` You will get the loss value of each step as following: @@ -38,8 +38,8 @@ epoch: 1 step: 1538, loss is 1.0221305 Then, evaluate AlexNet according to network model ```python -# evaluate AlexNet, 1 epoch training accuracy is up to 51.1%; 10 epoch training accuracy is up to 81.2% -python eval.py --data_path cifar-10-verify-bin --mode test --ckpt_path checkpoint_alexnet-1_1562.ckpt +# evaluate AlexNet +python eval.py --data_path 10-verify-bin --ckpt_path checkpoint_alexnet-1_1562.ckpt ``` ## Note diff --git a/example/alexnet_cifar10/eval.py b/model_zoo/alexnet/eval.py similarity index 84% rename from example/alexnet_cifar10/eval.py rename to model_zoo/alexnet/eval.py index 2efc6d15f6..4190451632 100644 --- a/example/alexnet_cifar10/eval.py +++ b/model_zoo/alexnet/eval.py @@ -19,11 +19,11 @@ python eval.py --data_path /YourDataPath --ckpt_path Your.ckpt """ import argparse -from config import alexnet_cfg as cfg -from dataset import create_dataset +from src.config import alexnet_cfg as cfg +from src.dataset import create_dataset_mnist +from src.alexnet import AlexNet import mindspore.nn as nn from mindspore import context -from mindspore.model_zoo.alexnet import AlexNet from mindspore.train.serialization import load_checkpoint, load_param_into_net from mindspore.train import Model from mindspore.nn.metrics import Accuracy @@ -36,7 +36,7 @@ if __name__ == "__main__": parser.add_argument('--data_path', type=str, default="./", help='path where the dataset is saved') parser.add_argument('--ckpt_path', type=str, default="./ckpt", help='if is test, must provide\ path where the trained ckpt file') - parser.add_argument('--dataset_sink_mode', type=bool, default=False, help='dataset_sink_mode is False or True') + parser.add_argument('--dataset_sink_mode', type=bool, default=True, help='dataset_sink_mode is False or True') args = parser.parse_args() context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target) @@ -45,14 +45,13 @@ if __name__ == "__main__": loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") repeat_size = cfg.epoch_size opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum) - model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()}) # test + model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()}) print("============== Starting Testing ==============") param_dict = load_checkpoint(args.ckpt_path) load_param_into_net(network, param_dict) - ds_eval = create_dataset(args.data_path, - cfg.batch_size, - 1, - "test") + ds_eval = create_dataset_mnist(args.data_path, + cfg.batch_size, + status="test") acc = model.eval(ds_eval, dataset_sink_mode=args.dataset_sink_mode) - print("============== Accuracy:{} ==============".format(acc)) + print("============== {} ==============".format(acc)) diff --git a/model_zoo/alexnet/src/__init__.py b/model_zoo/alexnet/src/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/mindspore/model_zoo/alexnet.py b/model_zoo/alexnet/src/alexnet.py similarity index 95% rename from mindspore/model_zoo/alexnet.py rename to model_zoo/alexnet/src/alexnet.py index 7ad1c8e37b..c528ae39e9 100644 --- a/mindspore/model_zoo/alexnet.py +++ b/model_zoo/alexnet/src/alexnet.py @@ -36,10 +36,9 @@ class AlexNet(nn.Cell): """ Alexnet """ - def __init__(self, num_classes=10): + def __init__(self, num_classes=10, channel=3): super(AlexNet, self).__init__() - self.batch_size = 32 - self.conv1 = conv(3, 96, 11, stride=4) + self.conv1 = conv(channel, 96, 11, stride=4) self.conv2 = conv(96, 256, 5, pad_mode="same") self.conv3 = conv(256, 384, 3, pad_mode="same") self.conv4 = conv(384, 384, 3, pad_mode="same") diff --git a/example/alexnet_cifar10/config.py b/model_zoo/alexnet/src/config.py similarity index 100% rename from example/alexnet_cifar10/config.py rename to model_zoo/alexnet/src/config.py diff --git a/example/alexnet_cifar10/dataset.py b/model_zoo/alexnet/src/dataset.py similarity index 94% rename from example/alexnet_cifar10/dataset.py rename to model_zoo/alexnet/src/dataset.py index d62ed2852d..6e9f310bed 100644 --- a/example/alexnet_cifar10/dataset.py +++ b/model_zoo/alexnet/src/dataset.py @@ -16,14 +16,14 @@ Produce the dataset """ -from config import alexnet_cfg as cfg import mindspore.dataset as ds import mindspore.dataset.transforms.c_transforms as C import mindspore.dataset.transforms.vision.c_transforms as CV from mindspore.common import dtype as mstype +from .config import alexnet_cfg as cfg -def create_dataset(data_path, batch_size=32, repeat_size=1, status="train"): +def create_dataset_mnist(data_path, batch_size=32, repeat_size=1, status="train"): """ create dataset for train or test """ diff --git a/example/alexnet_cifar10/generator_lr.py b/model_zoo/alexnet/src/generator_lr.py similarity index 100% rename from example/alexnet_cifar10/generator_lr.py rename to model_zoo/alexnet/src/generator_lr.py diff --git a/example/alexnet_cifar10/train.py b/model_zoo/alexnet/train.py similarity index 85% rename from example/alexnet_cifar10/train.py rename to model_zoo/alexnet/train.py index 0a288ea1db..184290c26c 100644 --- a/example/alexnet_cifar10/train.py +++ b/model_zoo/alexnet/train.py @@ -19,15 +19,15 @@ python train.py --data_path /YourDataPath """ import argparse -from config import alexnet_cfg as cfg -from dataset import create_dataset -from generator_lr import get_lr +from src.config import alexnet_cfg as cfg +from src.dataset import create_dataset_mnist +from src.generator_lr import get_lr +from src.alexnet import AlexNet import mindspore.nn as nn from mindspore import context from mindspore import Tensor from mindspore.train import Model from mindspore.nn.metrics import Accuracy -from mindspore.model_zoo.alexnet import AlexNet from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor @@ -38,24 +38,22 @@ if __name__ == "__main__": parser.add_argument('--data_path', type=str, default="./", help='path where the dataset is saved') parser.add_argument('--ckpt_path', type=str, default="./ckpt", help='if is test, must provide\ path where the trained ckpt file') - parser.add_argument('--dataset_sink_mode', type=bool, default=False, help='dataset_sink_mode is False or True') + parser.add_argument('--dataset_sink_mode', type=bool, default=True, help='dataset_sink_mode is False or True') args = parser.parse_args() context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target) + ds_train = create_dataset_mnist(args.data_path, cfg.batch_size, cfg.epoch_size) network = AlexNet(cfg.num_classes) loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") - lr = Tensor(get_lr(0, cfg.learning_rate, cfg.epoch_size, cfg.save_checkpoint_steps)) + lr = Tensor(get_lr(0, cfg.learning_rate, cfg.epoch_size, ds_train.get_dataset_size())) opt = nn.Momentum(network.trainable_params(), lr, cfg.momentum) - model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()}) # test - - print("============== Starting Training ==============") - ds_train = create_dataset(args.data_path, - cfg.batch_size, - cfg.epoch_size) + model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()}) time_cb = TimeMonitor(data_size=ds_train.get_dataset_size()) config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps, keep_checkpoint_max=cfg.keep_checkpoint_max) ckpoint_cb = ModelCheckpoint(prefix="checkpoint_alexnet", directory=args.ckpt_path, config=config_ck) + + print("============== Starting Training ==============") model.train(cfg.epoch_size, ds_train, callbacks=[time_cb, ckpoint_cb, LossMonitor()], dataset_sink_mode=args.dataset_sink_mode) diff --git a/model_zoo/bert/evaluation.py b/model_zoo/bert/evaluation.py index c58bf836fd..4877b60cef 100644 --- a/model_zoo/bert/evaluation.py +++ b/model_zoo/bert/evaluation.py @@ -18,9 +18,11 @@ Bert evaluation script. """ import os +import argparse import numpy as np import mindspore.common.dtype as mstype from mindspore import context +from mindspore import log as logger from mindspore.common.tensor import Tensor import mindspore.dataset as de import mindspore.dataset.transforms.c_transforms as C @@ -105,8 +107,17 @@ def bert_predict(Evaluation): ''' prediction function ''' - devid = int(os.getenv('DEVICE_ID')) - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=devid) + target = args_opt.device_target + if target == "Ascend": + devid = int(os.getenv('DEVICE_ID')) + context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=devid) + elif target == "GPU": + context.set_context(mode=context.GRAPH_MODE, device_target="GPU") + if bert_net_cfg.compute_type != mstype.float32: + logger.warning('GPU only support fp32 temporarily, run with fp32.') + bert_net_cfg.compute_type = mstype.float32 + else: + raise Exception("Target error, GPU or Ascend is supported.") dataset = get_dataset(bert_net_cfg.batch_size, 1) if cfg.use_crf: net_for_pretraining = Evaluation(bert_net_cfg, False, num_labels=len(tag_to_index), use_crf=True, @@ -141,12 +152,15 @@ def test_eval(): if cfg.task == "NER": print("Precision {:.6f} ".format(callback.TP / (callback.TP + callback.FP))) print("Recall {:.6f} ".format(callback.TP / (callback.TP + callback.FN))) - print("F1 {:.6f} ".format(2*callback.TP / (2*callback.TP + callback.FP + callback.FP))) + print("F1 {:.6f} ".format(2*callback.TP / (2*callback.TP + callback.FP + callback.FN))) else: print("acc_num {} , total_num {}, accuracy {:.6f}".format(callback.acc_num, callback.total_num, callback.acc_num / callback.total_num)) print("==============================================================") +parser = argparse.ArgumentParser(description='Bert eval') +parser.add_argument('--device_target', type=str, default='Ascend', help='Device target') +args_opt = parser.parse_args() if __name__ == "__main__": num_labels = cfg.num_labels test_eval() diff --git a/model_zoo/bert/finetune.py b/model_zoo/bert/finetune.py index 646f7cc73b..df16e3c91d 100644 --- a/model_zoo/bert/finetune.py +++ b/model_zoo/bert/finetune.py @@ -18,10 +18,12 @@ Bert finetune script. ''' import os +import argparse from src.utils import BertFinetuneCell, BertCLS, BertNER, BertSquad, BertSquadCell from src.finetune_config import cfg, bert_net_cfg, tag_to_index import mindspore.common.dtype as mstype from mindspore import context +from mindspore import log as logger import mindspore.dataset as de import mindspore.dataset.transforms.c_transforms as C from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell @@ -98,8 +100,17 @@ def test_train(): ''' finetune function ''' - devid = int(os.getenv('DEVICE_ID')) - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=devid) + target = args_opt.device_target + if target == "Ascend": + devid = int(os.getenv('DEVICE_ID')) + context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=devid) + elif target == "GPU": + context.set_context(mode=context.GRAPH_MODE, device_target="GPU") + if bert_net_cfg.compute_type != mstype.float32: + logger.warning('GPU only support fp32 temporarily, run with fp32.') + bert_net_cfg.compute_type = mstype.float32 + else: + raise Exception("Target error, GPU or Ascend is supported.") #BertCLSTrain for classification #BertNERTrain for sequence labeling if cfg.task == 'NER': @@ -151,5 +162,9 @@ def test_train(): model = Model(netwithgrads) model.train(cfg.epoch_num, dataset, callbacks=[LossCallBack(), ckpoint_cb]) + +parser = argparse.ArgumentParser(description='Bert finetune') +parser.add_argument('--device_target', type=str, default='Ascend', help='Device target') +args_opt = parser.parse_args() if __name__ == "__main__": test_train() diff --git a/model_zoo/bert/pretrain_eval.py b/model_zoo/bert/pretrain_eval.py new file mode 100644 index 0000000000..5089d88459 --- /dev/null +++ b/model_zoo/bert/pretrain_eval.py @@ -0,0 +1,158 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +""" +Bert evaluation script. +""" + +import os +from src import BertModel, GetMaskedLMOutput +from src.evaluation_config import cfg, bert_net_cfg +import mindspore.common.dtype as mstype +from mindspore import context +from mindspore.common.tensor import Tensor +import mindspore.dataset as de +import mindspore.dataset.transforms.c_transforms as C +from mindspore.train.model import Model +from mindspore.train.serialization import load_checkpoint, load_param_into_net +import mindspore.nn as nn +from mindspore.nn.metrics import Metric +from mindspore.ops import operations as P +from mindspore.common.parameter import Parameter + +class myMetric(Metric): + ''' + Self-defined Metric as a callback. + ''' + def __init__(self): + super(myMetric, self).__init__() + self.clear() + + def clear(self): + self.total_num = 0 + self.acc_num = 0 + + def update(self, *inputs): + total_num = self._convert_data(inputs[0]) + acc_num = self._convert_data(inputs[1]) + self.total_num = total_num + self.acc_num = acc_num + + def eval(self): + return self.acc_num/self.total_num + + +class GetLogProbs(nn.Cell): + ''' + Get MaskedLM prediction scores + ''' + def __init__(self, config): + super(GetLogProbs, self).__init__() + self.bert = BertModel(config, False) + self.cls1 = GetMaskedLMOutput(config) + + def construct(self, input_ids, input_mask, token_type_id, masked_pos): + sequence_output, _, embedding_table = self.bert(input_ids, token_type_id, input_mask) + prediction_scores = self.cls1(sequence_output, embedding_table, masked_pos) + return prediction_scores + + +class BertPretrainEva(nn.Cell): + ''' + Evaluate MaskedLM prediction scores + ''' + def __init__(self, config): + super(BertPretrainEva, self).__init__() + self.bert = GetLogProbs(config) + self.argmax = P.Argmax(axis=-1, output_type=mstype.int32) + self.equal = P.Equal() + self.mean = P.ReduceMean() + self.sum = P.ReduceSum() + self.total = Parameter(Tensor([0], mstype.float32), name='total') + self.acc = Parameter(Tensor([0], mstype.float32), name='acc') + self.reshape = P.Reshape() + self.shape = P.Shape() + self.cast = P.Cast() + + + def construct(self, input_ids, input_mask, token_type_id, masked_pos, masked_ids, masked_weights, nsp_label): + bs, _ = self.shape(input_ids) + probs = self.bert(input_ids, input_mask, token_type_id, masked_pos) + index = self.argmax(probs) + index = self.reshape(index, (bs, -1)) + eval_acc = self.equal(index, masked_ids) + eval_acc1 = self.cast(eval_acc, mstype.float32) + real_acc = eval_acc1 * masked_weights + acc = self.sum(real_acc) + total = self.sum(masked_weights) + self.total += total + self.acc += acc + return acc, self.total, self.acc + + +def get_enwiki_512_dataset(batch_size=1, repeat_count=1, distribute_file=''): + ''' + Get enwiki seq_length=512 dataset + ''' + ds = de.TFRecordDataset([cfg.data_file], cfg.schema_file, columns_list=["input_ids", "input_mask", "segment_ids", + "masked_lm_positions", "masked_lm_ids", + "masked_lm_weights", + "next_sentence_labels"]) + type_cast_op = C.TypeCast(mstype.int32) + ds = ds.map(input_columns="segment_ids", operations=type_cast_op) + ds = ds.map(input_columns="input_mask", operations=type_cast_op) + ds = ds.map(input_columns="input_ids", operations=type_cast_op) + ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) + ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) + ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op) + ds = ds.repeat(repeat_count) + + # apply batch operations + ds = ds.batch(batch_size, drop_remainder=True) + return ds + + +def bert_predict(): + ''' + Predict function + ''' + devid = int(os.getenv('DEVICE_ID')) + context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=devid) + dataset = get_enwiki_512_dataset(bert_net_cfg.batch_size, 1) + net_for_pretraining = BertPretrainEva(bert_net_cfg) + net_for_pretraining.set_train(False) + param_dict = load_checkpoint(cfg.finetune_ckpt) + load_param_into_net(net_for_pretraining, param_dict) + model = Model(net_for_pretraining) + return model, dataset, net_for_pretraining + + +def MLM_eval(): + ''' + Evaluate function + ''' + _, dataset, net_for_pretraining = bert_predict() + net = Model(net_for_pretraining, eval_network=net_for_pretraining, eval_indexes=[0, 1, 2], + metrics={'name': myMetric()}) + res = net.eval(dataset, dataset_sink_mode=False) + print("==============================================================") + for _, v in res.items(): + print("Accuracy is: ") + print(v) + print("==============================================================") + + +if __name__ == "__main__": + MLM_eval() diff --git a/model_zoo/bert/run_pretrain.py b/model_zoo/bert/run_pretrain.py index 1a267b93ff..65768946c1 100644 --- a/model_zoo/bert/run_pretrain.py +++ b/model_zoo/bert/run_pretrain.py @@ -19,7 +19,9 @@ python run_pretrain.py import os import argparse +import numpy import mindspore.communication.management as D +import mindspore.common.dtype as mstype from mindspore import context from mindspore.train.model import Model from mindspore.train.parallel_utils import ParallelMode @@ -27,6 +29,7 @@ from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell from mindspore.train.callback import Callback, ModelCheckpoint, CheckpointConfig, TimeMonitor from mindspore.train.serialization import load_checkpoint, load_param_into_net from mindspore.nn.optim import Lamb, Momentum, AdamWeightDecayDynamicLR +from mindspore import log as logger from src import BertNetworkWithLoss, BertTrainOneStepCell, BertTrainOneStepWithLossScaleCell from src.dataset import create_bert_dataset from src.config import cfg, bert_net_cfg @@ -54,6 +57,8 @@ class LossCallBack(Callback): def run_pretrain(): """pre-train bert_clue""" parser = argparse.ArgumentParser(description='bert pre_training') + parser.add_argument('--device_target', type=str, default='Ascend', choices=['Ascend', 'GPU'], + help='device where the code will be implemented. (Default: Ascend)') parser.add_argument("--distribute", type=str, default="false", help="Run distribute, default is false.") parser.add_argument("--epoch_size", type=int, default="1", help="Epoch size, default is 1.") parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.") @@ -63,41 +68,64 @@ def run_pretrain(): parser.add_argument("--do_shuffle", type=str, default="true", help="Enable shuffle for dataset, default is true.") parser.add_argument("--enable_data_sink", type=str, default="true", help="Enable data sink, default is true.") parser.add_argument("--data_sink_steps", type=int, default="1", help="Sink steps for each epoch, default is 1.") - parser.add_argument("--checkpoint_path", type=str, default="", help="Checkpoint file path") + parser.add_argument("--save_checkpoint_path", type=str, default="", help="Save checkpoint path") + parser.add_argument("--load_checkpoint_path", type=str, default="", help="Load checkpoint file path") parser.add_argument("--save_checkpoint_steps", type=int, default=1000, help="Save checkpoint steps, " "default is 1000.") + parser.add_argument("--train_steps", type=int, default=-1, help="Training Steps, default is -1, " + "meaning run all steps according to epoch number.") parser.add_argument("--save_checkpoint_num", type=int, default=1, help="Save checkpoint numbers, default is 1.") parser.add_argument("--data_dir", type=str, default="", help="Data path, it is better to use absolute path") parser.add_argument("--schema_dir", type=str, default="", help="Schema path, it is better to use absolute path") args_opt = parser.parse_args() - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id) + context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target, device_id=args_opt.device_id) context.set_context(reserve_class_name_in_scope=False) - + context.set_context(variable_memory_max_size="30GB") + ckpt_save_dir = args_opt.save_checkpoint_path if args_opt.distribute == "true": - device_num = args_opt.device_num + if args_opt.device_target == 'Ascend': + D.init('hccl') + device_num = args_opt.device_num + rank = args_opt.device_id % device_num + else: + D.init('nccl') + device_num = D.get_group_size() + rank = D.get_rank() + ckpt_save_dir = args_opt.save_checkpoint_path + 'ckpt_' + str(rank) + '/' + context.reset_auto_parallel_context() context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True, device_num=device_num) from mindspore.parallel._auto_parallel_context import auto_parallel_context if bert_net_cfg.num_hidden_layers == 12: - auto_parallel_context().set_all_reduce_fusion_split_indices([28, 55, 82, 109, 136, 163, 190, 205]) + if bert_net_cfg.use_relative_positions: + auto_parallel_context().set_all_reduce_fusion_split_indices([29, 58, 87, 116, 145, 174, 203, 217]) + else: + auto_parallel_context().set_all_reduce_fusion_split_indices([28, 55, 82, 109, 136, 163, 190, 205]) elif bert_net_cfg.num_hidden_layers == 24: - auto_parallel_context().set_all_reduce_fusion_split_indices([38, 93, 148, 203, 258, 313, 368, 397]) - D.init() - rank = args_opt.device_id % device_num + if bert_net_cfg.use_relative_positions: + auto_parallel_context().set_all_reduce_fusion_split_indices([30, 90, 150, 210, 270, 330, 390, 421]) + else: + auto_parallel_context().set_all_reduce_fusion_split_indices([38, 93, 148, 203, 258, 313, 368, 397]) else: rank = 0 device_num = 1 + if args_opt.device_target == 'GPU' and bert_net_cfg.compute_type != mstype.float32: + logger.warning('Gpu only support fp32 temporarily, run with fp32.') + bert_net_cfg.compute_type = mstype.float32 + + ds, new_repeat_count = create_bert_dataset(args_opt.epoch_size, device_num, rank, args_opt.do_shuffle, args_opt.enable_data_sink, args_opt.data_sink_steps, args_opt.data_dir, args_opt.schema_dir) - + if args_opt.train_steps > 0: + new_repeat_count = min(new_repeat_count, args_opt.train_steps // args_opt.data_sink_steps) netwithloss = BertNetworkWithLoss(bert_net_cfg, True) if cfg.optimizer == 'Lamb': - optimizer = Lamb(netwithloss.trainable_params(), decay_steps=ds.get_dataset_size() * ds.get_repeat_count(), + optimizer = Lamb(netwithloss.trainable_params(), decay_steps=ds.get_dataset_size() * new_repeat_count, start_learning_rate=cfg.Lamb.start_learning_rate, end_learning_rate=cfg.Lamb.end_learning_rate, power=cfg.Lamb.power, warmup_steps=cfg.Lamb.warmup_steps, weight_decay=cfg.Lamb.weight_decay, eps=cfg.Lamb.eps) @@ -106,7 +134,7 @@ def run_pretrain(): momentum=cfg.Momentum.momentum) elif cfg.optimizer == 'AdamWeightDecayDynamicLR': optimizer = AdamWeightDecayDynamicLR(netwithloss.trainable_params(), - decay_steps=ds.get_dataset_size() * ds.get_repeat_count(), + decay_steps=ds.get_dataset_size() * new_repeat_count, learning_rate=cfg.AdamWeightDecayDynamicLR.learning_rate, end_learning_rate=cfg.AdamWeightDecayDynamicLR.end_learning_rate, power=cfg.AdamWeightDecayDynamicLR.power, @@ -120,11 +148,11 @@ def run_pretrain(): if args_opt.enable_save_ckpt == "true": config_ck = CheckpointConfig(save_checkpoint_steps=args_opt.save_checkpoint_steps, keep_checkpoint_max=args_opt.save_checkpoint_num) - ckpoint_cb = ModelCheckpoint(prefix='checkpoint_bert', config=config_ck) + ckpoint_cb = ModelCheckpoint(prefix='checkpoint_bert', directory=ckpt_save_dir, config=config_ck) callback.append(ckpoint_cb) - if args_opt.checkpoint_path: - param_dict = load_checkpoint(args_opt.checkpoint_path) + if args_opt.load_checkpoint_path: + param_dict = load_checkpoint(args_opt.load_checkpoint_path) load_param_into_net(netwithloss, param_dict) if args_opt.enable_lossscale == "true": @@ -139,4 +167,5 @@ def run_pretrain(): model = Model(netwithgrads) model.train(new_repeat_count, ds, callbacks=callback, dataset_sink_mode=(args_opt.enable_data_sink == "true")) if __name__ == '__main__': + numpy.random.seed(0) run_pretrain() diff --git a/model_zoo/bert/scripts/run_distribute_pretrain.sh b/model_zoo/bert/scripts/run_distribute_pretrain.sh index 1d77ff8119..5a9f8735aa 100644 --- a/model_zoo/bert/scripts/run_distribute_pretrain.sh +++ b/model_zoo/bert/scripts/run_distribute_pretrain.sh @@ -64,7 +64,7 @@ do --do_shuffle="true" \ --enable_data_sink="true" \ --data_sink_steps=100 \ - --checkpoint_path="" \ + --load_checkpoint_path="" \ --save_checkpoint_steps=10000 \ --save_checkpoint_num=1 \ --data_dir=$DATA_DIR \ diff --git a/model_zoo/bert/scripts/run_distribute_pretrain_for_gpu.sh b/model_zoo/bert/scripts/run_distribute_pretrain_for_gpu.sh new file mode 100644 index 0000000000..8deff766b9 --- /dev/null +++ b/model_zoo/bert/scripts/run_distribute_pretrain_for_gpu.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +echo "==============================================================================================================" +echo "Please run the scipt as: " +echo "bash run_distribute_pretrain.sh DEVICE_NUM EPOCH_SIZE DATA_DIR SCHEMA_DIR" +echo "for example: bash run_distribute_pretrain.sh 8 40 /path/zh-wiki/ /path/Schema.json" +echo "It is better to use absolute path." +echo "==============================================================================================================" + +RANK_SIZE=$1 +EPOCH_SIZE=$2 +DATA_DIR=$3 +SCHEMA_DIR=$4 + +mpirun --allow-run-as-root -n $RANK_SIZE \ + python run_pretrain.py \ + --device_target="GPU" \ + --distribute="true" \ + --epoch_size=$EPOCH_SIZE \ + --enable_save_ckpt="true" \ + --enable_lossscale="false" \ + --do_shuffle="true" \ + --enable_data_sink="true" \ + --data_sink_steps=1 \ + --load_checkpoint_path="" \ + --save_checkpoint_steps=10000 \ + --save_checkpoint_num=1 \ + --data_dir=$DATA_DIR \ + --schema_dir=$SCHEMA_DIR > log.txt 2>&1 & + diff --git a/model_zoo/bert/scripts/run_standalone_pretrain.sh b/model_zoo/bert/scripts/run_standalone_pretrain.sh index 438dda58c3..3cd9545f7f 100644 --- a/model_zoo/bert/scripts/run_standalone_pretrain.sh +++ b/model_zoo/bert/scripts/run_standalone_pretrain.sh @@ -37,8 +37,8 @@ python run_pretrain.py \ --enable_lossscale="true" \ --do_shuffle="true" \ --enable_data_sink="true" \ - --data_sink_steps=100 \ - --checkpoint_path="" \ + --data_sink_steps=1 \ + --load_checkpoint_path="" \ --save_checkpoint_steps=10000 \ --save_checkpoint_num=1 \ --data_dir=$DATA_DIR \ diff --git a/model_zoo/bert/scripts/run_standalone_pretrain_for_gpu.sh b/model_zoo/bert/scripts/run_standalone_pretrain_for_gpu.sh new file mode 100644 index 0000000000..1e9f1ec3e7 --- /dev/null +++ b/model_zoo/bert/scripts/run_standalone_pretrain_for_gpu.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +echo "==============================================================================================================" +echo "Please run the scipt as: " +echo "bash run_standalone_pretrain.sh DEVICE_ID EPOCH_SIZE DATA_DIR SCHEMA_DIR" +echo "for example: bash run_standalone_pretrain.sh 0 40 /path/zh-wiki/ /path/Schema.json" +echo "==============================================================================================================" + +DEVICE_ID=$1 +EPOCH_SIZE=$2 +DATA_DIR=$3 +SCHEMA_DIR=$4 + +export CUDA_VISIBLE_DEVICES=$DEVICE_ID + +mkdir -p ms_log +CUR_DIR=`pwd` +export GLOG_log_dir=${CUR_DIR}/ms_log +export GLOG_logtostderr=0 +python run_pretrain.py \ + --device_target="GPU" \ + --distribute="false" \ + --epoch_size=$EPOCH_SIZE \ + --enable_save_ckpt="true" \ + --enable_lossscale="false" \ + --do_shuffle="true" \ + --enable_data_sink="true" \ + --data_sink_steps=1 \ + --load_checkpoint_path="" \ + --save_checkpoint_path="" \ + --save_checkpoint_steps=10000 \ + --save_checkpoint_num=1 \ + --data_dir=$DATA_DIR \ + --schema_dir=$SCHEMA_DIR > log.txt 2>&1 & diff --git a/model_zoo/bert/src/bert_for_pre_training.py b/model_zoo/bert/src/bert_for_pre_training.py index 600512b4a7..5e014f02ba 100644 --- a/model_zoo/bert/src/bert_for_pre_training.py +++ b/model_zoo/bert/src/bert_for_pre_training.py @@ -27,12 +27,12 @@ from mindspore.nn.wrap.grad_reducer import DistributedGradReducer from mindspore.train.parallel_utils import ParallelMode from mindspore.communication.management import get_group_size from mindspore import context +from mindspore.ops import _selected_ops from .bert_model import BertModel GRADIENT_CLIP_TYPE = 1 GRADIENT_CLIP_VALUE = 1.0 -_nn_clip_by_norm = nn.ClipByNorm() clip_grad = C.MultitypeFuncGraph("clip_grad") @@ -57,7 +57,7 @@ def _clip_grad(clip_type, clip_value, grad): new_grad = C.clip_by_value(grad, F.cast(F.tuple_to_array((-clip_value,)), dt), F.cast(F.tuple_to_array((clip_value,)), dt)) else: - new_grad = _nn_clip_by_norm(grad, F.cast(F.tuple_to_array((clip_value,)), dt)) + new_grad = nn.ClipByNorm()(grad, F.cast(F.tuple_to_array((clip_value,)), dt)) return new_grad @@ -131,7 +131,7 @@ class GetNextSentenceOutput(nn.Cell): """ def __init__(self, config): super(GetNextSentenceOutput, self).__init__() - self.log_softmax = P.LogSoftmax() + self.log_softmax = _selected_ops.LogSoftmax() self.weight_init = TruncatedNormal(config.initializer_range) self.dense = nn.Dense(config.hidden_size, 2, weight_init=self.weight_init, has_bias=True).to_float(config.compute_type) diff --git a/model_zoo/bert/src/bert_model.py b/model_zoo/bert/src/bert_model.py index 310d330daa..5cd90ab84b 100644 --- a/model_zoo/bert/src/bert_model.py +++ b/model_zoo/bert/src/bert_model.py @@ -261,7 +261,7 @@ class BertOutput(nn.Cell): def construct(self, hidden_status, input_tensor): output = self.dense(hidden_status) output = self.dropout(output) - output = self.add(output, input_tensor) + output = self.add(input_tensor, output) output = self.layernorm(output) return output @@ -832,8 +832,7 @@ class CreateAttentionMaskFromInputMask(nn.Cell): if not self.input_mask_from_dataset: input_mask = self.input_mask - input_mask = self.cast(self.reshape(input_mask, self.shape), mstype.float32) - attention_mask = self.batch_matmul(self.broadcast_ones, input_mask) + attention_mask = self.cast(self.reshape(input_mask, self.shape), mstype.float32) return attention_mask diff --git a/model_zoo/bert/src/cluener_evaluation.py b/model_zoo/bert/src/cluener_evaluation.py index c2c6770a4a..09de6bf0b3 100644 --- a/model_zoo/bert/src/cluener_evaluation.py +++ b/model_zoo/bert/src/cluener_evaluation.py @@ -19,8 +19,8 @@ import json import numpy as np import mindspore.common.dtype as mstype from mindspore.common.tensor import Tensor -import tokenization -from sample_process import label_generation, process_one_example_p +from . import tokenization +from .sample_process import label_generation, process_one_example_p from .evaluation_config import cfg from .CRF import postprocess diff --git a/model_zoo/bert/src/config.py b/model_zoo/bert/src/config.py index d1062b78ee..812f0c2f18 100644 --- a/model_zoo/bert/src/config.py +++ b/model_zoo/bert/src/config.py @@ -56,7 +56,7 @@ if cfg.bert_network == 'base': bert_net_cfg = BertConfig( batch_size=32, seq_length=128, - vocab_size=21136, + vocab_size=21128, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, @@ -77,7 +77,7 @@ if cfg.bert_network == 'nezha': bert_net_cfg = BertConfig( batch_size=32, seq_length=128, - vocab_size=21136, + vocab_size=21128, hidden_size=1024, num_hidden_layers=24, num_attention_heads=16, @@ -98,7 +98,7 @@ if cfg.bert_network == 'large': bert_net_cfg = BertConfig( batch_size=16, seq_length=512, - vocab_size=30528, + vocab_size=30522, hidden_size=1024, num_hidden_layers=24, num_attention_heads=16, diff --git a/model_zoo/bert/src/dataset.py b/model_zoo/bert/src/dataset.py index 1828fac454..7985ca8559 100644 --- a/model_zoo/bert/src/dataset.py +++ b/model_zoo/bert/src/dataset.py @@ -39,6 +39,7 @@ def create_bert_dataset(epoch_size=1, device_num=1, rank=0, do_shuffle="true", e shuffle=(do_shuffle == "true"), num_shards=device_num, shard_id=rank, shard_equal_rows=True) ori_dataset_size = ds.get_dataset_size() + print('origin dataset size: ', ori_dataset_size) new_size = ori_dataset_size if enable_data_sink == "true": new_size = data_sink_steps * bert_net_cfg.batch_size @@ -53,7 +54,7 @@ def create_bert_dataset(epoch_size=1, device_num=1, rank=0, do_shuffle="true", e ds = ds.map(input_columns="input_ids", operations=type_cast_op) # apply batch operations ds = ds.batch(bert_net_cfg.batch_size, drop_remainder=True) - ds = ds.repeat(new_repeat_count) + ds = ds.repeat(max(new_repeat_count, repeat_count)) logger.info("data size: {}".format(ds.get_dataset_size())) logger.info("repeatcount: {}".format(ds.get_repeat_count())) return ds, new_repeat_count diff --git a/model_zoo/bert/src/fused_layer_norm.py b/model_zoo/bert/src/fused_layer_norm.py index ee3160b036..5dbe9999ad 100644 --- a/model_zoo/bert/src/fused_layer_norm.py +++ b/model_zoo/bert/src/fused_layer_norm.py @@ -73,7 +73,7 @@ class FusedLayerNorm(Cell): Examples: >>> x = Tensor(np.ones([20, 5, 10, 10]), mindspore.float32) - >>> shape1 = x.shape()[1:] + >>> shape1 = x.shape[1:] >>> m = nn.LayerNorm(shape1, begin_norm_axis=1, begin_params_axis=1) >>> m(x) """ diff --git a/model_zoo/bert/src/utils.py b/model_zoo/bert/src/utils.py index 50925708fc..9b5383877b 100644 --- a/model_zoo/bert/src/utils.py +++ b/model_zoo/bert/src/utils.py @@ -42,6 +42,13 @@ reciprocal = P.Reciprocal() def tensor_grad_scale(scale, grad): return grad * reciprocal(scale) +_grad_overflow = C.MultitypeFuncGraph("_grad_overflow") +grad_overflow = P.FloatStatus() + +@_grad_overflow.register("Tensor") +def _tensor_grad_overflow(grad): + return grad_overflow(grad) + class BertFinetuneCell(nn.Cell): """ Especifically defined for finetuning where only four inputs tensor are needed. @@ -67,9 +74,16 @@ class BertFinetuneCell(nn.Cell): self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE) self.cast = P.Cast() - self.alloc_status = P.NPUAllocFloatStatus() - self.get_status = P.NPUGetFloatStatus() - self.clear_before_grad = P.NPUClearFloatStatus() + self.gpu_target = False + if context.get_context("device_target") == "GPU": + self.gpu_target = True + self.float_status = P.FloatStatus() + self.addn = P.AddN() + self.reshape = P.Reshape() + else: + self.alloc_status = P.NPUAllocFloatStatus() + self.get_status = P.NPUGetFloatStatus() + self.clear_before_grad = P.NPUClearFloatStatus() self.reduce_sum = P.ReduceSum(keep_dims=False) self.depend_parameter_use = P.ControlDepend(depend_mode=1) self.base = Tensor(1, mstype.float32) @@ -90,7 +104,7 @@ class BertFinetuneCell(nn.Cell): weights = self.weights - init = self.alloc_status() + init = False loss = self.network(input_ids, input_mask, token_type_id, @@ -99,28 +113,36 @@ class BertFinetuneCell(nn.Cell): scaling_sens = self.loss_scale else: scaling_sens = sens + + if not self.gpu_target: + init = self.alloc_status() + clear_before_grad = self.clear_before_grad(init) + F.control_depend(loss, init) + self.depend_parameter_use(clear_before_grad, scaling_sens) grads = self.grad(self.network, weights)(input_ids, input_mask, token_type_id, label_ids, self.cast(scaling_sens, mstype.float32)) - clear_before_grad = self.clear_before_grad(init) - F.control_depend(loss, init) - self.depend_parameter_use(clear_before_grad, scaling_sens) grads = self.hyper_map(F.partial(grad_scale, scaling_sens), grads) grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads) if self.reducer_flag: grads = self.grad_reducer(grads) - flag = self.get_status(init) - flag_sum = self.reduce_sum(init, (0,)) + if not self.gpu_target: + flag = self.get_status(init) + flag_sum = self.reduce_sum(init, (0,)) + F.control_depend(grads, flag) + F.control_depend(flag, flag_sum) + else: + flag_sum = self.hyper_map(F.partial(_grad_overflow), grads) + flag_sum = self.addn(flag_sum) + flag_sum = self.reshape(flag_sum, (())) if self.is_distributed: flag_reduce = self.allreduce(flag_sum) cond = self.less_equal(self.base, flag_reduce) else: cond = self.less_equal(self.base, flag_sum) - F.control_depend(grads, flag) - F.control_depend(flag, flag_sum) overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) diff --git a/model_zoo/deeplabv3/README.md b/model_zoo/deeplabv3/README.md index b178a83e6d..c8df3dab8d 100644 --- a/model_zoo/deeplabv3/README.md +++ b/model_zoo/deeplabv3/README.md @@ -16,17 +16,17 @@ This is an example of training DeepLabv3 with PASCAL VOC 2012 dataset in MindSpo - Set options in config.py. - Run `run_standalone_train.sh` for non-distributed training. ``` bash - sh scripts/run_standalone_train.sh DEVICE_ID EPOCH_SIZE DATA_DIR + sh scripts/run_standalone_train.sh DEVICE_ID DATA_PATH ``` - Run `run_distribute_train.sh` for distributed training. ``` bash - sh scripts/run_distribute_train.sh DEVICE_NUM EPOCH_SIZE DATA_DIR MINDSPORE_HCCL_CONFIG_PATH + sh scripts/run_distribute_train.sh MINDSPORE_HCCL_CONFIG_PATH DATA_PATH ``` ### Evaluation Set options in evaluation_config.py. Make sure the 'data_file' and 'finetune_ckpt' are set to your own path. - Run run_eval.sh for evaluation. ``` bash - sh scripts/run_eval.sh DEVICE_ID DATA_DIR + sh scripts/run_eval.sh DEVICE_ID DATA_PATH PRETRAINED_CKPT_PATH ``` ## Options and Parameters @@ -49,6 +49,11 @@ config.py: decoder_output_stride The ratio of input to output spatial resolution when employing decoder to refine segmentation results, default is None. image_pyramid Input scales for multi-scale feature extraction, default is None. + epoch_size Epoch size, default is 6. + batch_size batch size of input dataset: N, default is 2. + enable_save_ckpt Enable save checkpoint, default is true. + save_checkpoint_steps Save checkpoint steps, default is 1000. + save_checkpoint_num Save checkpoint numbers, default is 1. ``` @@ -56,11 +61,6 @@ config.py: ``` Parameters for dataset and network: distribute Run distribute, default is false. - epoch_size Epoch size, default is 6. - batch_size batch size of input dataset: N, default is 2. data_url Train/Evaluation data url, required. checkpoint_url Checkpoint path, default is None. - enable_save_ckpt Enable save checkpoint, default is true. - save_checkpoint_steps Save checkpoint steps, default is 1000. - save_checkpoint_num Save checkpoint numbers, default is 1. ``` \ No newline at end of file diff --git a/model_zoo/deeplabv3/evaluation.py b/model_zoo/deeplabv3/eval.py similarity index 85% rename from model_zoo/deeplabv3/evaluation.py rename to model_zoo/deeplabv3/eval.py index e54b2d717b..7e43571982 100644 --- a/model_zoo/deeplabv3/evaluation.py +++ b/model_zoo/deeplabv3/eval.py @@ -25,9 +25,7 @@ from src.config import config parser = argparse.ArgumentParser(description="Deeplabv3 evaluation") -parser.add_argument('--epoch_size', type=int, default=2, help='Epoch size.') parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.") -parser.add_argument('--batch_size', type=int, default=2, help='Batch size.') parser.add_argument('--data_url', required=True, default=None, help='Evaluation data url') parser.add_argument('--checkpoint_url', default=None, help='Checkpoint path') @@ -39,8 +37,8 @@ print(args_opt) if __name__ == "__main__": args_opt.crop_size = config.crop_size args_opt.base_size = config.crop_size - eval_dataset = create_dataset(args_opt, args_opt.data_url, args_opt.epoch_size, args_opt.batch_size, usage="eval") - net = deeplabv3_resnet50(config.seg_num_classes, [args_opt.batch_size, 3, args_opt.crop_size, args_opt.crop_size], + eval_dataset = create_dataset(args_opt, args_opt.data_url, config.epoch_size, config.batch_size, usage="eval") + net = deeplabv3_resnet50(config.seg_num_classes, [config.batch_size, 3, args_opt.crop_size, args_opt.crop_size], infer_scale_sizes=config.eval_scales, atrous_rates=config.atrous_rates, decoder_output_stride=config.decoder_output_stride, output_stride=config.output_stride, fine_tune_batch_norm=config.fine_tune_batch_norm, image_pyramid=config.image_pyramid) diff --git a/model_zoo/deeplabv3/scripts/run_distribute_train.sh b/model_zoo/deeplabv3/scripts/run_distribute_train.sh index 514b0229af..4dcd8d9768 100644 --- a/model_zoo/deeplabv3/scripts/run_distribute_train.sh +++ b/model_zoo/deeplabv3/scripts/run_distribute_train.sh @@ -16,17 +16,21 @@ echo "==============================================================================================================" echo "Please run the scipt as: " -echo "bash run_distribute_train.sh DEVICE_NUM EPOCH_SIZE DATA_DIR MINDSPORE_HCCL_CONFIG_PATH" -echo "for example: bash run_distribute_train.sh 8 40 /path/zh-wiki/ /path/hccl.json" +echo "bash run_distribute_train.sh MINDSPORE_HCCL_CONFIG_PATH DATA_PATH" +echo "for example: bash run_distribute_train.sh MINDSPORE_HCCL_CONFIG_PATH DATA_PATH [PRETRAINED_CKPT_PATH](option)" echo "It is better to use absolute path." echo "==============================================================================================================" -EPOCH_SIZE=$2 -DATA_DIR=$3 +DATA_DIR=$2 -export MINDSPORE_HCCL_CONFIG_PATH=$4 -export RANK_TABLE_FILE=$4 -export RANK_SIZE=$1 +export MINDSPORE_HCCL_CONFIG_PATH=$1 +export RANK_TABLE_FILE=$1 +export RANK_SIZE=8 +PATH_CHECKPOINT="" +if [ $# == 3 ] +then + PATH_CHECKPOINT=$3 +fi cores=`cat /proc/cpuinfo|grep "processor" |wc -l` echo "the number of logical core" $cores avg_core_per_rank=`expr $cores \/ $RANK_SIZE` @@ -55,12 +59,8 @@ do env > env.log taskset -c $cmdopt python ../train.py \ --distribute="true" \ - --epoch_size=$EPOCH_SIZE \ --device_id=$DEVICE_ID \ - --enable_save_ckpt="true" \ - --checkpoint_url="" \ - --save_checkpoint_steps=10000 \ - --save_checkpoint_num=1 \ + --checkpoint_url=$PATH_CHECKPOINT \ --data_url=$DATA_DIR > log.txt 2>&1 & cd ../ done \ No newline at end of file diff --git a/model_zoo/deeplabv3/scripts/run_eval.sh b/model_zoo/deeplabv3/scripts/run_eval.sh index 2470138c33..735dce4cbe 100644 --- a/model_zoo/deeplabv3/scripts/run_eval.sh +++ b/model_zoo/deeplabv3/scripts/run_eval.sh @@ -15,18 +15,20 @@ # ============================================================================ echo "==============================================================================================================" echo "Please run the scipt as: " -echo "bash run_eval.sh DEVICE_ID DATA_DIR" -echo "for example: bash run_eval.sh /path/zh-wiki/ " +echo "bash run_eval.sh DEVICE_ID DATA_PATH PRETRAINED_CKPT_PATH" +echo "for example: bash run_eval.sh DEVICE_ID DATA_PATH PRETRAINED_CKPT_PATH" echo "==============================================================================================================" DEVICE_ID=$1 DATA_DIR=$2 +PATH_CHECKPOINT=$3 + mkdir -p ms_log CUR_DIR=`pwd` export GLOG_log_dir=${CUR_DIR}/ms_log export GLOG_logtostderr=0 -python evaluation.py \ +python eval.py \ --device_id=$DEVICE_ID \ - --checkpoint_url="" \ + --checkpoint_url=$PATH_CHECKPOINT \ --data_url=$DATA_DIR > log.txt 2>&1 & \ No newline at end of file diff --git a/model_zoo/deeplabv3/scripts/run_standalone_train.sh b/model_zoo/deeplabv3/scripts/run_standalone_train.sh index 1b84f9d583..6f5e8dbe52 100644 --- a/model_zoo/deeplabv3/scripts/run_standalone_train.sh +++ b/model_zoo/deeplabv3/scripts/run_standalone_train.sh @@ -15,13 +15,17 @@ # ============================================================================ echo "==============================================================================================================" echo "Please run the scipt as: " -echo "bash run_standalone_pretrain.sh DEVICE_ID EPOCH_SIZE DATA_DIR" -echo "for example: bash run_standalone_train.sh 0 40 /path/zh-wiki/ " +echo "bash run_standalone_pretrain.sh DEVICE_ID DATA_PATH" +echo "for example: bash run_standalone_train.sh DEVICE_ID DATA_PATH [PRETRAINED_CKPT_PATH](option)" echo "==============================================================================================================" DEVICE_ID=$1 -EPOCH_SIZE=$2 -DATA_DIR=$3 +DATA_DIR=$2 +PATH_CHECKPOINT="" +if [ $# == 3 ] +then + PATH_CHECKPOINT=$3 +fi mkdir -p ms_log CUR_DIR=`pwd` @@ -29,10 +33,6 @@ export GLOG_log_dir=${CUR_DIR}/ms_log export GLOG_logtostderr=0 python train.py \ --distribute="false" \ - --epoch_size=$EPOCH_SIZE \ --device_id=$DEVICE_ID \ - --enable_save_ckpt="true" \ - --checkpoint_url="" \ - --save_checkpoint_steps=10000 \ - --save_checkpoint_num=1 \ + --checkpoint_url=$PATH_CHECKPOINT \ --data_url=$DATA_DIR > log.txt 2>&1 & \ No newline at end of file diff --git a/model_zoo/deeplabv3/src/config.py b/model_zoo/deeplabv3/src/config.py index c3b73e1097..6b5519e46c 100644 --- a/model_zoo/deeplabv3/src/config.py +++ b/model_zoo/deeplabv3/src/config.py @@ -29,5 +29,10 @@ config = ed({ "fine_tune_batch_norm": False, "ignore_label": 255, "decoder_output_stride": None, - "seg_num_classes": 21 + "seg_num_classes": 21, + "epoch_size": 6, + "batch_size": 2, + "enable_save_ckpt": True, + "save_checkpoint_steps": 10000, + "save_checkpoint_num": 1 }) diff --git a/model_zoo/deeplabv3/src/md_dataset.py b/model_zoo/deeplabv3/src/md_dataset.py index 37b57d1033..e136da23e1 100644 --- a/model_zoo/deeplabv3/src/md_dataset.py +++ b/model_zoo/deeplabv3/src/md_dataset.py @@ -16,6 +16,7 @@ from PIL import Image import mindspore.dataset as de import mindspore.dataset.transforms.vision.c_transforms as C +import numpy as np from .ei_dataset import HwVocRawDataset from .utils import custom_transforms as tr @@ -52,8 +53,8 @@ class DataTransform: rhf_tr = tr.RandomHorizontalFlip() image, label = rhf_tr(image, label) - nor_tr = tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) - image, label = nor_tr(image, label) + image = np.array(image).astype(np.float32) + label = np.array(label).astype(np.float32) return image, label @@ -71,13 +72,13 @@ class DataTransform: fsc_tr = tr.FixScaleCrop(crop_size=self.args.crop_size) image, label = fsc_tr(image, label) - nor_tr = tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) - image, label = nor_tr(image, label) + image = np.array(image).astype(np.float32) + label = np.array(label).astype(np.float32) return image, label -def create_dataset(args, data_url, epoch_num=1, batch_size=1, usage="train"): +def create_dataset(args, data_url, epoch_num=1, batch_size=1, usage="train", shuffle=True): """ Create Dataset for DeepLabV3. @@ -106,7 +107,7 @@ def create_dataset(args, data_url, epoch_num=1, batch_size=1, usage="train"): # 1464 samples / batch_size 8 = 183 batches # epoch_num is num of steps # 3658 steps / 183 = 20 epochs - if usage == "train": + if usage == "train" and shuffle: dataset = dataset.shuffle(1464) dataset = dataset.batch(batch_size, drop_remainder=(usage == "train")) dataset = dataset.repeat(count=epoch_num) diff --git a/model_zoo/deeplabv3/src/utils/custom_transforms.py b/model_zoo/deeplabv3/src/utils/custom_transforms.py index 3473f7eef5..75c78e1240 100644 --- a/model_zoo/deeplabv3/src/utils/custom_transforms.py +++ b/model_zoo/deeplabv3/src/utils/custom_transforms.py @@ -33,6 +33,7 @@ class Normalize: def __call__(self, img, mask): img = np.array(img).astype(np.float32) mask = np.array(mask).astype(np.float32) + img = ((img - self.mean) / self.std).astype(np.float32) return img, mask diff --git a/model_zoo/deeplabv3/train.py b/model_zoo/deeplabv3/train.py index 2135b0abf5..d096613977 100644 --- a/model_zoo/deeplabv3/train.py +++ b/model_zoo/deeplabv3/train.py @@ -27,14 +27,10 @@ from src.config import config parser = argparse.ArgumentParser(description="Deeplabv3 training") parser.add_argument("--distribute", type=str, default="false", help="Run distribute, default is false.") -parser.add_argument('--epoch_size', type=int, default=6, help='Epoch size.') -parser.add_argument('--batch_size', type=int, default=2, help='Batch size.') parser.add_argument('--data_url', required=True, default=None, help='Train data url') parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.") parser.add_argument('--checkpoint_url', default=None, help='Checkpoint path') -parser.add_argument("--enable_save_ckpt", type=str, default="true", help="Enable save checkpoint, default is true.") -parser.add_argument("--save_checkpoint_steps", type=int, default=1000, help="Save checkpoint steps, default is 1000.") -parser.add_argument("--save_checkpoint_num", type=int, default=1, help="Save checkpoint numbers, default is 1.") + args_opt = parser.parse_args() print(args_opt) context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id) @@ -70,16 +66,16 @@ if __name__ == "__main__": init() args_opt.base_size = config.crop_size args_opt.crop_size = config.crop_size - train_dataset = create_dataset(args_opt, args_opt.data_url, args_opt.epoch_size, args_opt.batch_size, usage="train") + train_dataset = create_dataset(args_opt, args_opt.data_url, config.epoch_size, config.batch_size, usage="train") dataset_size = train_dataset.get_dataset_size() time_cb = TimeMonitor(data_size=dataset_size) callback = [time_cb, LossCallBack()] - if args_opt.enable_save_ckpt == "true": - config_ck = CheckpointConfig(save_checkpoint_steps=args_opt.save_checkpoint_steps, - keep_checkpoint_max=args_opt.save_checkpoint_num) + if config.enable_save_ckpt: + config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_steps, + keep_checkpoint_max=config.save_checkpoint_num) ckpoint_cb = ModelCheckpoint(prefix='checkpoint_deeplabv3', config=config_ck) callback.append(ckpoint_cb) - net = deeplabv3_resnet50(config.seg_num_classes, [args_opt.batch_size, 3, args_opt.crop_size, args_opt.crop_size], + net = deeplabv3_resnet50(config.seg_num_classes, [config.batch_size, 3, args_opt.crop_size, args_opt.crop_size], infer_scale_sizes=config.eval_scales, atrous_rates=config.atrous_rates, decoder_output_stride=config.decoder_output_stride, output_stride=config.output_stride, fine_tune_batch_norm=config.fine_tune_batch_norm, image_pyramid=config.image_pyramid) @@ -88,5 +84,5 @@ if __name__ == "__main__": loss = OhemLoss(config.seg_num_classes, config.ignore_label) opt = Momentum(filter(lambda x: 'beta' not in x.name and 'gamma' not in x.name and 'depth' not in x.name and 'bias' not in x.name, net.trainable_params()), learning_rate=config.learning_rate, momentum=config.momentum, weight_decay=config.weight_decay) model = Model(net, loss, opt) - model.train(args_opt.epoch_size, train_dataset, callback) + model.train(config.epoch_size, train_dataset, callback) \ No newline at end of file diff --git a/model_zoo/faster_rcnn/src/dataset.py b/model_zoo/faster_rcnn/src/dataset.py index e384534f77..d64de09391 100644 --- a/model_zoo/faster_rcnn/src/dataset.py +++ b/model_zoo/faster_rcnn/src/dataset.py @@ -23,6 +23,8 @@ from numpy import random import mmcv import mindspore.dataset as de import mindspore.dataset.transforms.vision.c_transforms as C +import mindspore.dataset.transforms.c_transforms as CC +import mindspore.common.dtype as mstype from mindspore.mindrecord import FileWriter from src.config import config @@ -229,6 +231,21 @@ def flip_column(img, img_shape, gt_bboxes, gt_label, gt_num): return (img_data, img_shape, flipped, gt_label, gt_num) +def flipped_generation(img, img_shape, gt_bboxes, gt_label, gt_num): + """flipped generation""" + img_data = img + flipped = gt_bboxes.copy() + _, w, _ = img_data.shape + + flipped[..., 0::4] = w - gt_bboxes[..., 2::4] - 1 + flipped[..., 2::4] = w - gt_bboxes[..., 0::4] - 1 + + return (img_data, img_shape, flipped, gt_label, gt_num) + +def image_bgr_rgb(img, img_shape, gt_bboxes, gt_label, gt_num): + img_data = img[:, :, ::-1] + return (img_data, img_shape, gt_bboxes, gt_label, gt_num) + def transpose_column(img, img_shape, gt_bboxes, gt_label, gt_num): """transpose operation for image""" img_data = img.transpose(2, 0, 1).copy() @@ -264,9 +281,10 @@ def preprocess_fn(image, box, is_training): input_data = rescale_column(*input_data) else: input_data = resize_column_test(*input_data) - input_data = imnormalize_column(*input_data) - output_data = transpose_column(*input_data) + input_data = image_bgr_rgb(*input_data) + + output_data = input_data return output_data def _data_aug(image, box, is_training): @@ -289,24 +307,24 @@ def preprocess_fn(image, box, is_training): if not is_training: return _infer_data(image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert) - flip = (np.random.rand() < config.flip_ratio) - photo = (np.random.rand() < config.photo_ratio) - expand = (np.random.rand() < config.expand_ratio) input_data = image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert + expand = (np.random.rand() < config.expand_ratio) if expand: input_data = expand_column(*input_data) + if config.keep_ratio: input_data = rescale_column(*input_data) else: input_data = resize_column(*input_data) + + photo = (np.random.rand() < config.photo_ratio) if photo: input_data = photo_crop_column(*input_data) - input_data = imnormalize_column(*input_data) - if flip: - input_data = flip_column(*input_data) - output_data = transpose_column(*input_data) + input_data = image_bgr_rgb(*input_data) + + output_data = input_data return output_data return _data_aug(image, box, is_training) @@ -423,19 +441,46 @@ def create_fasterrcnn_dataset(mindrecord_file, batch_size=2, repeat_num=12, devi ds = ds.map(input_columns=["image"], operations=decode) compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training)) + hwc_to_chw = C.HWC2CHW() + normalize_op = C.Normalize((123.675, 116.28, 103.53), (58.395, 57.12, 57.375)) + horizontally_op = C.RandomHorizontalFlip(1) + type_cast0 = CC.TypeCast(mstype.float32) + type_cast1 = CC.TypeCast(mstype.float16) + type_cast2 = CC.TypeCast(mstype.int32) + type_cast3 = CC.TypeCast(mstype.bool_) + if is_training: ds = ds.map(input_columns=["image", "annotation"], output_columns=["image", "image_shape", "box", "label", "valid_num"], columns_order=["image", "image_shape", "box", "label", "valid_num"], - operations=compose_map_func, python_multiprocessing=True, num_parallel_workers=num_parallel_workers) - ds = ds.batch(batch_size, drop_remainder=True) - ds = ds.repeat(repeat_num) + operations=compose_map_func, num_parallel_workers=4) + + ds = ds.map(input_columns=["image"], operations=[normalize_op, type_cast0], + num_parallel_workers=num_parallel_workers) + + flip = (np.random.rand() < config.flip_ratio) + if flip: + ds = ds.map(input_columns=["image"], operations=[horizontally_op], + num_parallel_workers=num_parallel_workers) + ds = ds.map(input_columns=["image", "image_shape", "box", "label", "valid_num"], + operations=flipped_generation, num_parallel_workers=4) else: ds = ds.map(input_columns=["image", "annotation"], output_columns=["image", "image_shape", "box", "label", "valid_num"], columns_order=["image", "image_shape", "box", "label", "valid_num"], operations=compose_map_func, num_parallel_workers=num_parallel_workers) - ds = ds.batch(batch_size, drop_remainder=True) - ds = ds.repeat(repeat_num) + + ds = ds.map(input_columns=["image"], operations=[normalize_op, type_cast0], + num_parallel_workers=num_parallel_workers) + + # transpose_column from python to c + ds = ds.map(input_columns=["image"], operations=[hwc_to_chw, type_cast1]) + ds = ds.map(input_columns=["image_shape"], operations=[type_cast1]) + ds = ds.map(input_columns=["box"], operations=[type_cast1]) + ds = ds.map(input_columns=["label"], operations=[type_cast2]) + ds = ds.map(input_columns=["valid_num"], operations=[type_cast3]) + ds = ds.batch(batch_size, drop_remainder=True) + ds = ds.repeat(repeat_num) + return ds diff --git a/model_zoo/gat/README.md b/model_zoo/gat/README.md new file mode 100644 index 0000000000..7c30e08851 --- /dev/null +++ b/model_zoo/gat/README.md @@ -0,0 +1,166 @@ + + +- [Graph Attention Networks Description](#graph-attention-networks-description) +- [Model architecture](#model-architecture) +- [Dataset](#dataset) + - [Data Preparation](#data-preparation) +- [Features](#features) + - [Mixed Precision](#mixed-precision) +- [Environment Requirements](#environment-requirements) +- [Structure](#structure) + - [Parameter configuration](#parameter-configuration) +- [Running the example](#running-the-example) + - [Usage](#usage) + - [Result](#result) +- [Description of random situation](#description-of-random-situation) +- [Others](#others) + +# Graph Attention Networks Description + +Graph Attention Networks(GAT) was proposed in 2017 by Petar Veličković et al. By leveraging masked self-attentional layers to address shortcomings of prior graph based method, GAT achieved or matched state of the art performance on both transductive datasets like Cora and inductive dataset like PPI. This is an example of training GAT with Cora dataset in MindSpore. + +[Paper](https://arxiv.org/abs/1710.10903): Veličković, P., Cucurull, G., Casanova, A., Romero, A., Lio, P., & Bengio, Y. (2017). Graph attention networks. arXiv preprint arXiv:1710.10903. + +# Model architecture + +An illustration of multi- head attention (with K = 3 heads) by node 1 on its neighborhood can be found below: + +![](https://camo.githubusercontent.com/4fe1a90e67d17a2330d7cfcddc930d5f7501750c/68747470733a2f2f7777772e64726f70626f782e636f6d2f732f71327a703170366b37396a6a6431352f6761745f6c617965722e706e673f7261773d31) + +Note that according to whether this attention layer is the output layer of the network or not, the node update function can be concatenate or average. + +# Dataset +Statistics of dataset used are summerized as below: + +| | Cora | Citeseer | +| ------------------ | -------------: | -------------: | +| Task | Transductive | Transductive | +| # Nodes | 2708 (1 graph) | 3327 (1 graph) | +| # Edges | 5429 | 4732 | +| # Features/Node | 1433 | 3703 | +| # Classes | 7 | 6 | +| # Training Nodes | 140 | 120 | +| # Validation Nodes | 500 | 500 | +| # Test Nodes | 1000 | 1000 | + +## Data Preparation +Download the dataset Cora or Citeseer provided by /kimiyoung/planetoid from github. + +> Place the dataset to any path you want, the folder should include files as follows(we use Cora dataset as an example): + +``` +. +└─data + ├─ind.cora.allx + ├─ind.cora.ally + ├─ind.cora.graph + ├─ind.cora.test.index + ├─ind.cora.tx + ├─ind.cora.ty + ├─ind.cora.x + └─ind.cora.y +``` + +> Generate dataset in mindrecord format for cora or citeseer. +>> Usage +```buildoutcfg +cd ./scripts +# SRC_PATH is the dataset file path you downloaded, DATASET_NAME is cora or citeseer +sh run_process_data.sh [SRC_PATH] [DATASET_NAME] +``` + +>> Launch +``` +#Generate dataset in mindrecord format for cora +sh run_process_data.sh cora +#Generate dataset in mindrecord format for citeseer +sh run_process_data.sh citeseer +``` + +# Features + +## Mixed Precision + +To ultilize the strong computation power of Ascend chip, and accelerate the training process, the mixed training method is used. MindSpore is able to cope with FP32 inputs and FP16 operators. In GAT example, the model is set to FP16 mode except for the loss calculation part. + +# Environment Requirements + +- Hardward (Ascend) +- Install [MindSpore](https://www.mindspore.cn/install/en). + +# Structure + +```shell +. +└─gat + ├─README.md + ├─scripts + | ├─run_process_data.sh # Generate dataset in mindrecord format + | └─run_train.sh # Launch training + | + ├─src + | ├─config.py # Training configurations + | ├─dataset.py # Data preprocessing + | ├─gat.py # GAT model + | └─utils.py # Utils for training gat + | + └─train.py # Train net +``` + +## Parameter configuration + +Parameters for training can be set in config.py. + +``` +"learning_rate": 0.005, # Learning rate +"num_epochs": 200, # Epoch sizes for training +"hid_units": [8], # Hidden units for attention head at each layer +"n_heads": [8, 1], # Num heads for each layer +"early_stopping": 100, # Early stop patience +"l2_coeff": 0.0005 # l2 coefficient +"attn_dropout": 0.6 # Attention dropout ratio +"feature_dropout":0.6 # Feature dropout ratio +``` + +# Running the example +## Usage +After Dataset is correctly generated. +``` +# run train with cora dataset, DATASET_NAME is cora +sh run_train.sh [DATASET_NAME] +``` + +## Result + +Training result will be stored in the scripts path, whose folder name begins with "train". You can find the result like the followings in log. + + +``` +Epoch:0, train loss=1.98498 train acc=0.17143 | val loss=1.97946 val acc=0.27200 +Epoch:1, train loss=1.98345 train acc=0.15000 | val loss=1.97233 val acc=0.32600 +Epoch:2, train loss=1.96968 train acc=0.21429 | val loss=1.96747 val acc=0.37400 +Epoch:3, train loss=1.97061 train acc=0.20714 | val loss=1.96410 val acc=0.47600 +Epoch:4, train loss=1.96864 train acc=0.13571 | val loss=1.96066 val acc=0.59600 +... +Epoch:195, train loss=1.45111 train_acc=0.56429 | val_loss=1.44325 val_acc=0.81200 +Epoch:196, train loss=1.52476 train_acc=0.52143 | val_loss=1.43871 val_acc=0.81200 +Epoch:197, train loss=1.35807 train_acc=0.62857 | val_loss=1.43364 val_acc=0.81400 +Epoch:198, train loss=1.47566 train_acc=0.51429 | val_loss=1.42948 val_acc=0.81000 +Epoch:199, train loss=1.56411 train_acc=0.55000 | val_loss=1.42632 val_acc=0.80600 +Test loss=1.5366285, test acc=0.84199995 +... +``` + +Results on Cora dataset is shown by table below: + +| | MindSpore + Ascend910 | Tensorflow + V100 | +| ------------------------------------ | --------------------: | ----------------: | +| Accuracy | 0.830933271 | 0.828649968 | +| Training Cost(200 epochs) | 27.62298311s | 36.711862s | +| End to End Training Cost(200 epochs) | 39.074s | 50.894s | + +# Description of random situation +GAT model contains lots of dropout operations, if you want to disable dropout, set the attn_dropout and feature_dropout to 0 in src/config.py. Note that this operation will cause the accuracy drop to approximately 80%. + +# Others +GAT model is verified on Ascend environment, not on CPU or GPU. \ No newline at end of file diff --git a/model_zoo/gat/scripts/run_process_data.sh b/model_zoo/gat/scripts/run_process_data.sh new file mode 100755 index 0000000000..4501f3c67f --- /dev/null +++ b/model_zoo/gat/scripts/run_process_data.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 2 ] +then + echo "Usage: sh run_train.sh [SRC_PATH] [DATASET_NAME]" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} +SRC_PATH=$(get_real_path $1) +echo $SRC_PATH + +DATASET_NAME=$2 +echo $DATASET_NAME + +if [ ! -d data_mr ]; then + mkdir data_mr +else + echo data_mr exist +fi +MINDRECORD_PATH=`pwd`/data_mr + +rm -f $MINDRECORD_PATH/* + +cd ../../../example/graph_to_mindrecord || exit + +python writer.py --mindrecord_script $DATASET_NAME \ +--mindrecord_file "$MINDRECORD_PATH/$DATASET_NAME" \ +--mindrecord_partitions 1 \ +--mindrecord_header_size_by_bit 18 \ +--mindrecord_page_size_by_bit 20 \ +--graph_api_args "$SRC_PATH" + +cd - || exit diff --git a/model_zoo/gat/scripts/run_train.sh b/model_zoo/gat/scripts/run_train.sh new file mode 100644 index 0000000000..3e9213712d --- /dev/null +++ b/model_zoo/gat/scripts/run_train.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 1 ] +then + echo "Usage: sh run_train.sh [DATASET_NAME]" +exit 1 +fi + +DATASET_NAME=$1 +echo $DATASET_NAME + +ulimit -u unlimited +export DEVICE_NUM=1 +export RANK_SIZE=$DEVICE_NUM +export DEVICE_ID=0 +export RANK_ID=0 + +if [ -d "train" ]; +then + rm -rf ./train +fi +mkdir ./train +cp ../*.py ./train +cp *.sh ./train +cp -r ../src ./train +cd ./train || exit +env > env.log +echo "start training for device $DEVICE_ID" + + +if [ $DATASET_NAME == cora ] +then + python train.py --data_dir=../data_mr/$DATASET_NAME &> log & +fi + +if [ $DATASET_NAME == citeseer ] +then + python train.py --data_dir=../data_mr/$DATASET_NAME --train_nodes_num=120 &> log & +fi +cd .. diff --git a/model_zoo/gat/src/__init__.py b/model_zoo/gat/src/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/model_zoo/gat/src/config.py b/model_zoo/gat/src/config.py new file mode 100644 index 0000000000..8e22ab5a78 --- /dev/null +++ b/model_zoo/gat/src/config.py @@ -0,0 +1,26 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Train configs for training gat""" + + +class GatConfig(): + lr = 0.005 + num_epochs = 200 + hid_units = [8] + n_heads = [8, 1] + early_stopping = 100 + l2_coeff = 0.0005 + attn_dropout = 0.6 + feature_dropout = 0.6 diff --git a/model_zoo/gat/src/dataset.py b/model_zoo/gat/src/dataset.py new file mode 100644 index 0000000000..0d0b544514 --- /dev/null +++ b/model_zoo/gat/src/dataset.py @@ -0,0 +1,87 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Preprocess data obtained for training""" +import numpy as np +import mindspore.dataset as ds + + +def adj_to_bias(adj): + """Add self loop to adj and make sure only one hop neighbors are engaged in computing""" + num_graphs = adj.shape[0] + adj_temp = np.empty(adj.shape) + for i in range(num_graphs): + adj_temp[i] = adj[i] + np.eye(adj.shape[1]) + return -1e9 * (1.0 - adj_temp) + + +def get_biases_features_labels(data_dir): + """Get biases, features, labels from Dataset""" + g = ds.GraphData(data_dir) + nodes = g.get_all_nodes(0) + nodes_list = nodes.tolist() + row_tensor = g.get_node_feature(nodes_list, [1, 2]) + features = row_tensor[0] + features = features[np.newaxis] + + labels = row_tensor[1] + + nodes_num = labels.shape[0] + class_num = labels.max() + 1 + labels_onehot = np.eye(nodes_num, class_num)[labels].astype(np.float32) + + neighbor = g.get_all_neighbors(nodes_list, 0) + node_map = {node_id: index for index, node_id in enumerate(nodes_list)} + adj = np.zeros([nodes_num, nodes_num], dtype=np.float32) + for index, value in np.ndenumerate(neighbor): + if value >= 0 and index[1] > 0: + adj[node_map[neighbor[index[0], 0]], node_map[value]] = 1 + adj = adj[np.newaxis] + biases = adj_to_bias(adj) + + return biases, features, labels_onehot + + +def get_mask(total, begin, end): + """Generate mask according to begin and end position""" + mask = np.zeros([total]).astype(np.float32) + mask[begin:end] = 1 + return np.array(mask, dtype=np.bool) + + +def load_and_process(data_dir, train_node_num, eval_node_num, test_node_num): + """Load cora dataset and preprocessing""" + biases, feature, label = get_biases_features_labels(data_dir) + # split training, validation and testing set + nodes_num = label.shape[0] + train_mask = get_mask(nodes_num, 0, train_node_num) + eval_mask = get_mask(nodes_num, train_node_num, train_node_num + eval_node_num) + test_mask = get_mask(nodes_num, nodes_num - test_node_num, nodes_num) + + y_train = np.zeros(label.shape) + y_val = np.zeros(label.shape) + y_test = np.zeros(label.shape) + + y_train[train_mask, :] = label[train_mask, :] + y_val[eval_mask, :] = label[eval_mask, :] + y_test[test_mask, :] = label[test_mask, :] + + y_train = y_train[np.newaxis] + y_val = y_val[np.newaxis] + y_test = y_test[np.newaxis] + train_mask = train_mask[np.newaxis] + eval_mask = eval_mask[np.newaxis] + test_mask = test_mask[np.newaxis] + + return feature, biases, y_train, train_mask, y_val, eval_mask, y_test, test_mask diff --git a/model_zoo/gat/src/gat.py b/model_zoo/gat/src/gat.py new file mode 100644 index 0000000000..3cb3cc1106 --- /dev/null +++ b/model_zoo/gat/src/gat.py @@ -0,0 +1,496 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Aggregator.""" +import mindspore.nn as nn +from mindspore.ops import operations as P +from mindspore.ops import functional as F +from mindspore._extends import cell_attr_register +from mindspore import Tensor, Parameter +from mindspore.common.initializer import initializer +from mindspore._checkparam import check_int_positive, check_bool +from mindspore.nn.layer.activation import get_activation + + +class GNNFeatureTransform(nn.Cell): + r""" + The GNN featuren transform layer for input. + + Applies linear transformation for the input feature. This layer implements the operation as: + + .. math:: + \text{outputs} = \text{inputs} * \text{kernel} + \text{bias}, + + where :math:`\text{activation}` is the activation function passed as the activation + argument (if passed in),:math:`\text{activation}` is a weight matrix with the same + data type as the inputs created by the layer, and :math:`\text{bias}` is a bias vector + with the same data type as the inputs created by the layer (only if has_bias is True). + + Args: + in_channels (int): The number of channels in the input space. + out_channels (int): The number of channels in the output space. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as input x. The values of str refer to the function `initializer`. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is + same as input x. The values of str refer to the function `initializer`. Default: 'zeros'. + has_bias (bool): Specifies whether the layer uses a bias vector. Default: True. + + Raises: + ValueError: If weight_init or bias_init shape is incorrect. + + Inputs: + - **input_x** (Tensor) - The first tensor to be multiplied. The shape of the tensor is :math:`(*B, N, C)`, + where :math:`*B` represents the batch size which can be multidimensional, :math:`N` and :math:`C` are the + size of the last two dimensions. If `transpose_a` is True, its shape should be :math:`(*B, C, N)`. + + Outputs: + Tensor, the shape of the output tensor is :math:`(*B, N, M)`. + + Examples: + >>> net = nn.Dense(3, 4) + >>> input = Tensor(np.random.randint(0, 255, [2, 3]), mindspore.float32) + >>> net(input) + [[ 2.5246444 2.2738023 0.5711005 -3.9399147 ] + [ 1.0739875 4.0155234 0.94188046 -5.459526 ]] + """ + @cell_attr_register + def __init__(self, + in_channels, + out_channels, + weight_init='normal', + bias_init='zeros', + has_bias=True): + super(GNNFeatureTransform, self).__init__() + self.in_channels = check_int_positive(in_channels) + self.out_channels = check_int_positive(out_channels) + self.has_bias = check_bool(has_bias) + + if isinstance(weight_init, Tensor): + if weight_init.dim() != 2 or weight_init.shape()[0] != out_channels or \ + weight_init.shape()[1] != in_channels: + raise ValueError("weight_init shape error") + + self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight") + + if self.has_bias: + if isinstance(bias_init, Tensor): + if bias_init.dim() != 1 or bias_init.shape()[0] != out_channels: + raise ValueError("bias_init shape error") + + self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias") + + self.matmul = P.MatMul(transpose_b=True) + self.bias_add = P.BiasAdd() + + def construct(self, x): + tensor_shape = F.shape(x) + input_feature = F.reshape(x, (tensor_shape[0] * tensor_shape[1], tensor_shape[2])) + output = self.matmul(input_feature, self.weight) + if self.has_bias: + output = self.bias_add(output, self.bias) + output = F.reshape(output, (tensor_shape[0], tensor_shape[1], self.out_channels)) + return output + + def extend_repr(self): + str_info = 'in_channels={}, out_channels={}, weight={}, has_bias={}' \ + .format(self.in_channels, self.out_channels, self.weight, self.has_bias) + if self.has_bias: + str_info = str_info + ', bias={}'.format(self.bias) + + return str_info + + +class _BaseAggregator(nn.Cell): + """ + Base Aggregator of GNN + + Args: + feature_in_dim (int): Node or edge input feature dim. + feature_out_dim (int): Node or edge outpout feature dim. + use_fc (bool): Specifies whether a linear transformation before message is aggregated. Default: True + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as input x. The values of str refer to the function `initializer`. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is + same as input x. The values of str refer to the function `initializer`. Default: 'zeros'. + has_bias (bool): Specifies whether the layer uses a bias vector. Default: True. + dropout_ratio (float): The keep rate of dropout layer, greater than 0 and less equal than 1. Default: None. + activation (str): Regularizer function applied to the output of the layer, eg. 'relu'. Default: None. + + Examples: + >>> class MyAggregator(_BaseAggregator): + >>> def __init__(self): + >>> super(MyAggregator, self).__init__(self, feature_in_dim, feature_out_dim) + >>> self.reduce_mean = P.ReduceSum() + >>> + >>> def construct(self, x): + >>> return self.reduce_mean(x, 1) + """ + def __init__(self, + feature_in_dim, + feature_out_dim, + use_fc=True, + weight_init="normal", + bias_init="zeros", + has_bias=True, + dropout_ratio=None, + activation=None): + super(_BaseAggregator, self).__init__() + self.in_dim = feature_in_dim + self.out_dim = feature_out_dim + self.use_fc = use_fc + if self.use_fc: + self.weight_init = weight_init + self.bias_init = bias_init + self.has_bias = has_bias + self.fc = GNNFeatureTransform(self.in_dim, + self.out_dim, + weight_init=self.weight_init, + bias_init=self.bias_init, + has_bias=self.has_bias) + self.dropout_ratio = dropout_ratio + if self.dropout_ratio is not None: + self.dropout = nn.Dropout(keep_prob=self.dropout_ratio) + self.dropout_flag = self.dropout_ratio is not None + self.activation = get_activation(activation) + self.activation_flag = self.activation is not None + + def construct(self, **kward): + """Must be overridden by all subclasses.""" + raise NotImplementedError + + +class MeanAggregator(_BaseAggregator): + """ + Mean Aggregator of GNN + + Args: + feature_in_dim (int): Node or edge input feature dim. + feature_out_dim (int): Node or edge outpout feature dim. + use_fc (bool): Specifies whether a linear transformation before message is aggregated. Default: True + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as input x. The values of str refer to the function `initializer`. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is + same as input x. The values of str refer to the function `initializer`. Default: 'zeros'. + has_bias (bool): Specifies whether the layer uses a bias vector. Default: True. + dropout_ratio (float): The keep rate of dropout layer, greater than 0 and less equal than 1. Default: None. + activation (str): Regularizer function applied to the output of the layer, eg. 'relu'. Default: None. + + Examples: + >>> net = MeanAggregator(32, 64, activation="relu", dropout=0.5) + >>> input_data = Tensor(np.array(np.random.rand(32, 3, 32), dtypy=np.float32)) + >>> output = net(input_data) + """ + def __init__(self, + feature_in_dim, + feature_out_dim, + use_fc=True, + weight_init="normal", + bias_init="zeros", + has_bias=True, + dropout_ratio=None, + activation=None): + super(MeanAggregator, self).__init__( + feature_in_dim, + feature_out_dim, + use_fc, + weight_init, + bias_init, + has_bias, + dropout_ratio, + activation) + self.reduce_mean = P.ReduceMean(keep_dims=False) + + def construct(self, input_feature): + if self.use_fc: + input_feature = self.fc(input_feature) + if self.dropout_flag: + input_feature = self.dropout(input_feature) + if self.activation_flag: + input_feature = self.activation(input_feature) + output_feature = self.reduce_mean(input_feature, 1) + return output_feature + + +class AttentionHead(nn.Cell): + """ + Attention Head for Graph Attention Networks. + + Args: + in_channel (int): The number of input channel, input feature dim. + out_channel (int): The number of output channel, output feature dim. + in_drop_ratio (float): Input feature dropout ratio, default 0.0. + coef_drop_ratio (float): Coefficient dropout ratio, default 0.0. + residual (bool): Whether to use residual connection, default False. + coef_activation (Cell): The attention coefficient activation function, + default nn.LeakyReLU(). + activation (Cell): The output activation function, default nn.ELU(). + + Inputs: + - **input_feature** (Tensor) - Tensor of shape : (batch_size, num_nodes, feature_dim). + - **bias_mat** (Tensor) - Tensor of shape : (batch_size, num_nodes, num_nodes). + + Examples: + >>> head = AttentionHead(1433, + 8, + in_drop_ratio=0.6, + coef_drop_ratio=0.6, + residual=False) + >>> input_data = Tensor(np.array(np.random.rand(1, 2708, 1433), dtypy=np.float32)) + >>> output = net(input_data) + """ + + def __init__(self, + in_channel, + out_channel, + in_drop_ratio=0.0, + coef_drop_ratio=0.0, + residual=False, + coef_activation=nn.LeakyReLU(), + activation=nn.ELU()): + super(AttentionHead, self).__init__() + self.in_channel = check_int_positive(in_channel) + self.out_channel = check_int_positive(out_channel) + self.in_drop_ratio = in_drop_ratio + self.in_drop = nn.Dropout(keep_prob=1 - in_drop_ratio) + self.in_drop_2 = nn.Dropout(keep_prob=1 - in_drop_ratio) + self.feature_transform = GNNFeatureTransform( + in_channels=self.in_channel, + out_channels=self.out_channel, + has_bias=False, + weight_init='XavierUniform') + + self.f_1_transform = GNNFeatureTransform( + in_channels=self.out_channel, + out_channels=1, + weight_init='XavierUniform') + self.f_2_transform = GNNFeatureTransform( + in_channels=self.out_channel, + out_channels=1, + weight_init='XavierUniform') + self.softmax = nn.Softmax() + + self.coef_drop = nn.Dropout(keep_prob=1 - coef_drop_ratio) + self.matmul = P.MatMul() + self.bias_add = P.BiasAdd() + self.bias = Parameter(initializer('zeros', self.out_channel), name='bias') + self.residual = check_bool(residual) + if self.residual: + if in_channel != out_channel: + self.residual_transform_flag = True + self.residual_transform = GNNFeatureTransform( + in_channels=self.in_channel, + out_channels=self.out_channel) + else: + self.residual_transform = None + self.coef_activation = coef_activation + self.activation = activation + + def construct(self, input_feature, bias_mat, training=True): + if training is True: + input_feature = self.in_drop(input_feature) + + feature = self.feature_transform(input_feature) + # self attention + f_1 = self.f_1_transform(feature) + f_2 = self.f_2_transform(feature) + logits = f_1 + P.Transpose()(f_2, (0, 2, 1)) + logits = self.coef_activation(logits) + bias_mat + coefs = self.softmax(logits) + if training is True: + coefs = self.coef_drop(coefs) + feature = self.in_drop_2(feature) + + coefs = P.Squeeze(0)(coefs) + feature = P.Squeeze(0)(feature) + + ret = self.matmul(coefs, feature) + ret = self.bias_add(ret, self.bias) + ret = P.ExpandDims()(ret, 0) + # residual connection + if self.residual: + if self.residual_transform_flag: + res = self.residual_transform(input_feature) + ret = ret + res + else: + ret = ret + input_feature + # activation + if self.activation is not None: + ret = self.activation(ret) + return ret + + +class AttentionAggregator(nn.Cell): + """ + Attention Head for Graph Attention Networks,can be regarded as one + GAT layer. + + Args: + in_channel (int): Input channel. + out_channel (int): Output channel. + num_heads (int): Number of attention heads for this layer, default 1. + in_drop_ratio (float): Input feature dropout ratio, default 0.0. + coef_drop_ratio (float): Coefficient dropout ratio, default 0.0. + activation (Cell): The output activation function, default nn.ELU(). + residual (bool): Whether to use residual connection, default False. + output_transform (str['concat', 'sum']): output transform for a layer, + default 'concat' + + Inputs: + - **input_feature** (Tensor) - Tensor of shape : (batch_size, num_nodes, feature_dim). + - **bias_mat** (Tensor) - Tensor of shape : (batch_size, num_nodes, num_nodes). + + Examples: + >>> input_data = Tensor(np.array(np.random.rand(1, 2708, 1433), dtype=np.float32)) + >>> biases = Tensor(np.array(np.random.rand(1, 2708, 2708), dtype=np.float32)) + >>> net = AttentionAggregator(1433, + 8, + 8) + >>> net(input_data, biases) + """ + def __init__(self, + in_channels, + out_channels, + num_heads=1, + in_drop=0.0, + coef_drop=0.0, + activation=nn.ELU(), + residual=False, + output_transform='concat'): + super(AttentionAggregator, self).__init__() + self.num_heads = num_heads + self.attns = [] + for _ in range(num_heads): + self.attns.append(AttentionHead(in_channels, + out_channels, + in_drop_ratio=in_drop, + coef_drop_ratio=coef_drop, + activation=activation, + residual=residual)) + self.attns = nn.layer.CellList(self.attns) + if output_transform == 'concat': + self.out_trans = P.Concat(-1) + elif output_transform == 'sum': + self.out_trans = P.AddN() + else: + raise ValueError("output_transform must be either 'concat' or 'sum'") + + def construct(self, input_data, bias_mat, training=True): + res = () + for i in range(self.num_heads): + res += (self.attns[i](input_data, bias_mat, training),) + return self.out_trans(res) + + +class GAT(nn.Cell): + """ + Graph Attention Network + + Args: + ftr_dims (int): Initial feature dimensions. + num_class (int): Num of class to identify. + num_nodes (int): Num of nodes in this graph. + hidden_units (list[int]): Num of hidden units at each layer. + num_heads (list[int]): Num of heads at each layer. + attn_drop (float): Drop out ratio of attention coefficient, + default 0.0. + ftr_drop (float): Drop out ratio of feature, default 0.0. + activation (Cell): Activation Function for output layer, default + nn.Elu(). + residual (bool): Whether to use residual connection between + intermediate layers, default False. + + Examples: + >>> ft_sizes = 1433 + >>> num_class = 7 + >>> num_nodes = 2708 + >>> hid_units = [8] + >>> n_heads = [8, 1] + >>> activation = nn.ELU() + >>> residual = False + >>> input_data = np.array(np.random.rand(1, 2708, 1433)) + >>> biases = np.array(np.random.rand(1, 2708, 2708)) + >>> net = GAT(ft_sizes, + num_class, + num_nodes, + hidden_units=hid_units, + num_heads=n_heads, + attn_drop=0.6, + ftr_drop=0.6, + activation=activation, + residual=residual) + >>> output = net(input_data, biases) + """ + + def __init__(self, + features, + biases, + ftr_dims, + num_class, + num_nodes, + hidden_units, + num_heads, + attn_drop=0.0, + ftr_drop=0.0, + activation=nn.ELU(), + residual=False): + super(GAT, self).__init__() + self.features = Tensor(features) + self.biases = Tensor(biases) + self.ftr_dims = check_int_positive(ftr_dims) + self.num_class = check_int_positive(num_class) + self.num_nodes = check_int_positive(num_nodes) + self.hidden_units = hidden_units + self.num_heads = num_heads + self.attn_drop = attn_drop + self.ftr_drop = ftr_drop + self.activation = activation + self.residual = check_bool(residual) + self.layers = [] + # first layer + self.layers.append(AttentionAggregator( + self.ftr_dims, + self.hidden_units[0], + self.num_heads[0], + self.ftr_drop, + self.attn_drop, + self.activation, + residual=False)) + # intermediate layer + for i in range(1, len(self.hidden_units)): + self.layers.append(AttentionAggregator( + self.hidden_units[i-1]*self.num_heads[i-1], + self.hidden_units[i], + self.num_heads[i], + self.ftr_drop, + self.attn_drop, + self.activation, + residual=self.residual)) + # output layer + self.layers.append(AttentionAggregator( + self.hidden_units[-1]*self.num_heads[-2], + self.num_class, + self.num_heads[-1], + self.ftr_drop, + self.attn_drop, + activation=None, + residual=False, + output_transform='sum')) + self.layers = nn.layer.CellList(self.layers) + + def construct(self, training=True): + input_data = self.features + bias_mat = self.biases + for cell in self.layers: + input_data = cell(input_data, bias_mat, training) + return input_data/self.num_heads[-1] diff --git a/model_zoo/gat/src/utils.py b/model_zoo/gat/src/utils.py new file mode 100644 index 0000000000..03305ca3d3 --- /dev/null +++ b/model_zoo/gat/src/utils.py @@ -0,0 +1,178 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Utils for training gat""" +from mindspore import nn +from mindspore.common.parameter import ParameterTuple +from mindspore import Tensor +from mindspore.common import dtype as mstype +from mindspore.ops import composite as C +from mindspore.ops import functional as F +from mindspore.ops import operations as P + + +class MaskedSoftMaxLoss(nn.Cell): + """Calculate masked softmax loss with l2 loss""" + def __init__(self, num_class, label, mask, l2_coeff, params): + super(MaskedSoftMaxLoss, self).__init__() + self.num_class = num_class + self.label = label + self.mask = mask + self.softmax = P.SoftmaxCrossEntropyWithLogits() + self.reduce_mean = P.ReduceMean() + self.cast = P.Cast() + self.l2_coeff = l2_coeff + self.params = ParameterTuple(list(param for param in params if param.name[-4:] != 'bias')) + self.reduce_sum = P.ReduceSum() + self.num_params = len(self.params) + + def construct(self, logits): + # calc l2 loss + l2_loss = 0 + for i in range(self.num_params): + l2_loss = l2_loss + self.l2_coeff * P.L2Loss()(self.params[i]) + + logits = P.Reshape()(logits, (-1, self.num_class)) + label = P.Reshape()(self.label, (-1, self.num_class)) + mask = P.Reshape()(self.mask, (-1,)) + + logits = self.cast(logits, mstype.float32) + loss = self.softmax(logits, label)[0] + mask /= self.reduce_mean(mask) + loss *= mask + loss = self.reduce_mean(loss) + l2_loss = P.Cast()(l2_loss, mstype.float32) + return loss+l2_loss + + +class MaskedAccuracy(nn.Cell): + """Calculate accuracy with mask""" + def __init__(self, num_class, label, mask): + super(MaskedAccuracy, self).__init__() + self.argmax = P.Argmax(axis=1) + self.cast = P.Cast() + self.reduce_mean = P.ReduceMean() + self.equal = P.Equal() + self.num_class = num_class + self.label = Tensor(label, dtype=mstype.float32) + self.mask = Tensor(mask, dtype=mstype.float32) + + def construct(self, logits): + logits = P.Reshape()(logits, (-1, self.num_class)) + labels = P.Reshape()(self.label, (-1, self.num_class)) + mask = P.Reshape()(self.mask, (-1,)) + + labels = self.cast(labels, mstype.float32) + + correct_prediction = self.equal(self.argmax(logits), self.argmax(labels)) + accuracy_all = self.cast(correct_prediction, mstype.float32) + mask = self.cast(mask, mstype.float32) + mask /= self.reduce_mean(mask) + accuracy_all *= mask + return self.reduce_mean(accuracy_all) + + +class LossAccuracyWrapper(nn.Cell): + """ + Warp GAT model with loss calculation and accuracy calculation, loss is calculated with l2 loss. + + Args: + network (Cell): GAT network with logits calculation as output. + num_class (int): num of class for classification. + label (numpy.ndarray): Train Dataset label. + mask (numpy.ndarray): Train Dataset mask. + l2_coeff (float): l2 loss discount rate. + """ + def __init__(self, network, num_class, label, mask, l2_coeff): + super(LossAccuracyWrapper, self).__init__() + self.network = network + label = Tensor(label, dtype=mstype.float32) + mask = Tensor(mask, dtype=mstype.float32) + self.loss_func = MaskedSoftMaxLoss(num_class, label, mask, l2_coeff, self.network.trainable_params()) + self.acc_func = MaskedAccuracy(num_class, label, mask) + + def construct(self): + logits = self.network(training=False) + loss = self.loss_func(logits) + accuracy = self.acc_func(logits) + return loss, accuracy + + +class LossNetWrapper(nn.Cell): + """Wrap GAT model with loss calculation""" + def __init__(self, network, num_class, label, mask, l2_coeff): + super(LossNetWrapper, self).__init__() + self.network = network + label = Tensor(label, dtype=mstype.float32) + mask = Tensor(mask, dtype=mstype.float32) + params = list(param for param in self.network.trainable_params() if param.name[-4:] != 'bias') + self.loss_func = MaskedSoftMaxLoss(num_class, label, mask, l2_coeff, params) + + def construct(self): + logits = self.network() + loss = self.loss_func(logits) + return loss + + +class TrainOneStepCell(nn.Cell): + """ + For network training. Warp the loss net with optimizer. + + Args: + network (Cell): GAT network with loss calculation as the output. + optimizer (Cell): Optimizer for minimize the loss. + sens (Float): Backpropagation input number, default 1.0. + """ + def __init__(self, network, optimizer, sens=1.0): + super(TrainOneStepCell, self).__init__(auto_prefix=True) + self.network = network + self.network.add_flags(defer_inline=True) + self.weights = ParameterTuple(network.trainable_params()) + self.optimizer = optimizer + self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True) + self.sens = sens + + def construct(self): + weights = self.weights + loss = self.network() + sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens) + grads = self.grad(self.network, weights)(sens) + return F.depend(loss, self.optimizer(grads)) + + +class TrainGAT(nn.Cell): + """ + Warp GAT model with everything needed for training, include loss, optimizer ,etc. + + Args: + network (Cell): GAT network. + num_class (int): num of class for classification. + label (numpy.ndarray): Train Dataset label. + mask (numpy.ndarray): Train Dataset mask. + learning_rate (float): Learning rate. + l2_coeff (float): l2 loss discount rate. + """ + def __init__(self, network, num_class, label, mask, learning_rate, l2_coeff): + super(TrainGAT, self).__init__(auto_prefix=False) + self.network = network + loss_net = LossNetWrapper(network, num_class, label, mask, l2_coeff) + optimizer = nn.Adam(loss_net.trainable_params(), + learning_rate=learning_rate) + self.loss_train_net = TrainOneStepCell(loss_net, optimizer) + self.accuracy_func = MaskedAccuracy(num_class, label, mask) + + def construct(self): + loss = self.loss_train_net() + accuracy = self.accuracy_func(self.network()) + return loss, accuracy diff --git a/model_zoo/gat/train.py b/model_zoo/gat/train.py new file mode 100644 index 0000000000..af1808b995 --- /dev/null +++ b/model_zoo/gat/train.py @@ -0,0 +1,131 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Test train gat""" +import argparse +import os + +import numpy as np +import mindspore.context as context +from mindspore.train.serialization import _exec_save_checkpoint, load_checkpoint + +from src.config import GatConfig +from src.dataset import load_and_process +from src.gat import GAT +from src.utils import LossAccuracyWrapper, TrainGAT + + +def train(): + """Train GAT model.""" + parser = argparse.ArgumentParser() + parser.add_argument('--data_dir', type=str, default='./data/cora/cora_mr', help='Data dir') + parser.add_argument('--train_nodes_num', type=int, default=140, help='Nodes numbers for training') + parser.add_argument('--eval_nodes_num', type=int, default=500, help='Nodes numbers for evaluation') + parser.add_argument('--test_nodes_num', type=int, default=1000, help='Nodes numbers for test') + args = parser.parse_args() + if not os.path.exists("ckpts"): + os.mkdir("ckpts") + context.set_context(mode=context.GRAPH_MODE, + device_target="Ascend", + save_graphs=False) + # train parameters + hid_units = GatConfig.hid_units + n_heads = GatConfig.n_heads + early_stopping = GatConfig.early_stopping + lr = GatConfig.lr + l2_coeff = GatConfig.l2_coeff + num_epochs = GatConfig.num_epochs + feature, biases, y_train, train_mask, y_val, eval_mask, y_test, test_mask = load_and_process(args.data_dir, + args.train_nodes_num, + args.eval_nodes_num, + args.test_nodes_num) + feature_size = feature.shape[2] + num_nodes = feature.shape[1] + num_class = y_train.shape[2] + + gat_net = GAT(feature, + biases, + feature_size, + num_class, + num_nodes, + hid_units, + n_heads, + attn_drop=GatConfig.attn_dropout, + ftr_drop=GatConfig.feature_dropout) + gat_net.add_flags_recursive(fp16=True) + + eval_net = LossAccuracyWrapper(gat_net, + num_class, + y_val, + eval_mask, + l2_coeff) + + train_net = TrainGAT(gat_net, + num_class, + y_train, + train_mask, + lr, + l2_coeff) + + train_net.set_train(True) + val_acc_max = 0.0 + val_loss_min = np.inf + for _epoch in range(num_epochs): + train_result = train_net() + train_loss = train_result[0].asnumpy() + train_acc = train_result[1].asnumpy() + + eval_result = eval_net() + eval_loss = eval_result[0].asnumpy() + eval_acc = eval_result[1].asnumpy() + + print("Epoch:{}, train loss={:.5f}, train acc={:.5f} | val loss={:.5f}, val acc={:.5f}".format( + _epoch, train_loss, train_acc, eval_loss, eval_acc)) + if eval_acc >= val_acc_max or eval_loss < val_loss_min: + if eval_acc >= val_acc_max and eval_loss < val_loss_min: + val_acc_model = eval_acc + val_loss_model = eval_loss + _exec_save_checkpoint(train_net.network, "ckpts/gat.ckpt") + val_acc_max = np.max((val_acc_max, eval_acc)) + val_loss_min = np.min((val_loss_min, eval_loss)) + curr_step = 0 + else: + curr_step += 1 + if curr_step == early_stopping: + print("Early Stop Triggered!, Min loss: {}, Max accuracy: {}".format(val_loss_min, val_acc_max)) + print("Early stop model validation loss: {}, accuracy{}".format(val_loss_model, val_acc_model)) + break + gat_net_test = GAT(feature, + biases, + feature_size, + num_class, + num_nodes, + hid_units, + n_heads, + attn_drop=0.0, + ftr_drop=0.0) + load_checkpoint("ckpts/gat.ckpt", net=gat_net_test) + gat_net_test.add_flags_recursive(fp16=True) + + test_net = LossAccuracyWrapper(gat_net_test, + num_class, + y_test, + test_mask, + l2_coeff) + test_result = test_net() + print("Test loss={}, test acc={}".format(test_result[0], test_result[1])) + + +if __name__ == "__main__": + train() diff --git a/model_zoo/gcn/README.md b/model_zoo/gcn/README.md new file mode 100644 index 0000000000..310c307474 --- /dev/null +++ b/model_zoo/gcn/README.md @@ -0,0 +1,115 @@ +# GCN Example + +## Description + +This is an example of training GCN with Cora and Citeseer dataset in MindSpore. + +## Requirements + +- Install [MindSpore](https://www.mindspore.cn/install/en). + +- Download the dataset Cora or Citeseer provided by /kimiyoung/planetoid from github. + +> Place the dataset to any path you want, the folder should include files as follows(we use Cora dataset as an example): + +``` +. +└─data + ├─ind.cora.allx + ├─ind.cora.ally + ├─ind.cora.graph + ├─ind.cora.test.index + ├─ind.cora.tx + ├─ind.cora.ty + ├─ind.cora.x + └─ind.cora.y +``` + +> Generate dataset in mindrecord format for cora or citeseer. +>> Usage +```buildoutcfg +cd ./scripts +# SRC_PATH is the dataset file path you downloaded, DATASET_NAME is cora or citeseer +sh run_process_data.sh [SRC_PATH] [DATASET_NAME] +``` + +>> Launch +``` +#Generate dataset in mindrecord format for cora +sh run_process_data.sh ./data cora +#Generate dataset in mindrecord format for citeseer +sh run_process_data.sh ./data citeseer +``` + +## Structure + +```shell +. +└─gcn + ├─README.md + ├─scripts + | ├─run_process_data.sh # Generate dataset in mindrecord format + | └─run_train.sh # Launch training + | + ├─src + | ├─config.py # Parameter configuration + | ├─dataset.py # Data preprocessin + | ├─gcn.py # GCN backbone + | └─metrics.py # Loss and accuracy + | + └─train.py # Train net +``` + +## Parameter configuration + +Parameters for training can be set in config.py. + +``` +"learning_rate": 0.01, # Learning rate +"epochs": 200, # Epoch sizes for training +"hidden1": 16, # Hidden size for the first graph convolution layer +"dropout": 0.5, # Dropout ratio for the first graph convolution layer +"weight_decay": 5e-4, # Weight decay for the parameter of the first graph convolution layer +"early_stopping": 10, # Tolerance for early stopping +``` + +## Running the example + +### Train + +#### Usage + +``` +# run train with cora or citeseer dataset, DATASET_NAME is cora or citeseer +sh run_train.sh [DATASET_NAME] +``` + +#### Launch + +```bash +sh run_train.sh cora +``` + +#### Result + +Training result will be stored in the scripts path, whose folder name begins with "train". You can find the result like the followings in log. + + +``` +Epoch: 0001 train_loss= 1.95373 train_acc= 0.09286 val_loss= 1.95075 val_acc= 0.20200 time= 7.25737 +Epoch: 0002 train_loss= 1.94812 train_acc= 0.32857 val_loss= 1.94717 val_acc= 0.34000 time= 0.00438 +Epoch: 0003 train_loss= 1.94249 train_acc= 0.47857 val_loss= 1.94337 val_acc= 0.43000 time= 0.00428 +Epoch: 0004 train_loss= 1.93550 train_acc= 0.55000 val_loss= 1.93957 val_acc= 0.46400 time= 0.00421 +Epoch: 0005 train_loss= 1.92617 train_acc= 0.67143 val_loss= 1.93558 val_acc= 0.45400 time= 0.00430 +... +Epoch: 0196 train_loss= 0.60326 train_acc= 0.97857 val_loss= 1.05155 val_acc= 0.78200 time= 0.00418 +Epoch: 0197 train_loss= 0.60377 train_acc= 0.97143 val_loss= 1.04940 val_acc= 0.78000 time= 0.00418 +Epoch: 0198 train_loss= 0.60680 train_acc= 0.95000 val_loss= 1.04847 val_acc= 0.78000 time= 0.00414 +Epoch: 0199 train_loss= 0.61920 train_acc= 0.96429 val_loss= 1.04797 val_acc= 0.78400 time= 0.00413 +Epoch: 0200 train_loss= 0.57948 train_acc= 0.96429 val_loss= 1.04753 val_acc= 0.78600 time= 0.00415 +Optimization Finished! +Test set results: cost= 1.00983 accuracy= 0.81300 time= 0.39083 +... +``` + + diff --git a/model_zoo/gcn/scripts/run_process_data.sh b/model_zoo/gcn/scripts/run_process_data.sh new file mode 100755 index 0000000000..d51d915943 --- /dev/null +++ b/model_zoo/gcn/scripts/run_process_data.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 2 ] +then + echo "Usage: sh run_train.sh [SRC_PATH] [DATASET_NAME]" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} +SRC_PATH=$(get_real_path $1) +echo $SRC_PATH + +DATASET_NAME=$2 +echo $DATASET_NAME + +if [ ! -d data_mr ]; then + mkdir data_mr +else + echo data_mr exist +fi +MINDRECORD_PATH=`pwd`/data_mr + +rm -f $MINDRECORD_PATH/$DATASET_NAME +rm -f $MINDRECORD_PATH/$DATASET_NAME.db + +cd ../../../example/graph_to_mindrecord || exit + +python writer.py --mindrecord_script $DATASET_NAME \ +--mindrecord_file "$MINDRECORD_PATH/$DATASET_NAME" \ +--mindrecord_partitions 1 \ +--mindrecord_header_size_by_bit 18 \ +--mindrecord_page_size_by_bit 20 \ +--graph_api_args "$SRC_PATH" + +cd - || exit diff --git a/model_zoo/gcn/scripts/run_train.sh b/model_zoo/gcn/scripts/run_train.sh new file mode 100755 index 0000000000..46dee49b0d --- /dev/null +++ b/model_zoo/gcn/scripts/run_train.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 1 ] +then + echo "Usage: sh run_train.sh [DATASET_NAME]" +exit 1 +fi + +DATASET_NAME=$1 +echo $DATASET_NAME + +ulimit -u unlimited +export DEVICE_NUM=1 +export RANK_SIZE=$DEVICE_NUM +export DEVICE_ID=0 +export RANK_ID=0 + +if [ -d "train" ]; +then + rm -rf ./train +fi +mkdir ./train +cp ../*.py ./train +cp *.sh ./train +cp -r ../src ./train +cd ./train || exit +env > env.log +echo "start training for device $DEVICE_ID" + + +if [ $DATASET_NAME == cora ] +then + python train.py --data_dir=../data_mr/$DATASET_NAME --train_nodes_num=140 &> log & +fi + +if [ $DATASET_NAME == citeseer ] +then + python train.py --data_dir=../data_mr/$DATASET_NAME --train_nodes_num=120 &> log & +fi +cd .. + diff --git a/model_zoo/gcn/src/config.py b/model_zoo/gcn/src/config.py new file mode 100644 index 0000000000..83974d706c --- /dev/null +++ b/model_zoo/gcn/src/config.py @@ -0,0 +1,26 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +network config setting, will be used in train.py +""" + + +class ConfigGCN(): + learning_rate = 0.01 + epochs = 200 + hidden1 = 16 + dropout = 0.5 + weight_decay = 5e-4 + early_stopping = 10 diff --git a/model_zoo/gcn/src/dataset.py b/model_zoo/gcn/src/dataset.py new file mode 100644 index 0000000000..7962f6f550 --- /dev/null +++ b/model_zoo/gcn/src/dataset.py @@ -0,0 +1,65 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +create adjacency matrix, node features, labels, and mask for training. +""" +import numpy as np +import scipy.sparse as sp +import mindspore.dataset as ds + + +def normalize_adj(adj): + """Symmetrically normalize adjacency matrix.""" + rowsum = np.array(adj.sum(1)) + d_inv_sqrt = np.power(rowsum, -0.5).flatten() + d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0. + d_mat_inv_sqrt = sp.diags(d_inv_sqrt) + return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo() + + +def get_adj_features_labels(data_dir): + """Get adjacency matrix, node features and labels from dataset.""" + g = ds.GraphData(data_dir) + nodes = g.get_all_nodes(0) + nodes_list = nodes.tolist() + row_tensor = g.get_node_feature(nodes_list, [1, 2]) + features = row_tensor[0] + labels = row_tensor[1] + + nodes_num = labels.shape[0] + class_num = labels.max() + 1 + labels_onehot = np.eye(nodes_num, class_num)[labels].astype(np.float32) + + neighbor = g.get_all_neighbors(nodes_list, 0) + node_map = {node_id: index for index, node_id in enumerate(nodes_list)} + adj = np.zeros([nodes_num, nodes_num], dtype=np.float32) + for index, value in np.ndenumerate(neighbor): + # The first column of neighbor is node_id, second column to last column are neighbors of the first column. + # So we only care index[1] > 1. + # If the node does not have that many neighbors, -1 is padded. So if value < 0, we will not deal with it. + if value >= 0 and index[1] > 0: + adj[node_map[neighbor[index[0], 0]], node_map[value]] = 1 + adj = sp.coo_matrix(adj) + adj = adj + adj.T.multiply(adj.T > adj) + sp.eye(nodes_num) + nor_adj = normalize_adj(adj) + nor_adj = np.array(nor_adj.todense()) + return nor_adj, features, labels_onehot, labels + + +def get_mask(total, begin, end): + """Generate mask.""" + mask = np.zeros([total]).astype(np.float32) + mask[begin:end] = 1 + return mask diff --git a/model_zoo/gcn/src/gcn.py b/model_zoo/gcn/src/gcn.py new file mode 100644 index 0000000000..74199490b6 --- /dev/null +++ b/model_zoo/gcn/src/gcn.py @@ -0,0 +1,220 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""GCN.""" +import numpy as np +from mindspore import nn +from mindspore.common.parameter import ParameterTuple +from mindspore.ops import composite as C +from mindspore.ops import functional as F +from mindspore.ops import operations as P +from mindspore import Tensor +from mindspore.nn.layer.activation import get_activation +from model_zoo.gcn.src.metrics import Loss, Accuracy + + +def glorot(shape): + init_range = np.sqrt(6.0/(shape[0]+shape[1])) + initial = np.random.uniform(-init_range, init_range, shape).astype(np.float32) + return Tensor(initial) + + +class GraphConvolution(nn.Cell): + """ + GCN graph convolution layer. + + Args: + feature_in_dim (int): The input feature dimension. + feature_out_dim (int): The output feature dimension. + dropout_ratio (float): Dropout ratio for the dropout layer. Default: None. + activation (str): Activation function applied to the output of the layer, eg. 'relu'. Default: None. + + Inputs: + - **adj** (Tensor) - Tensor of shape :math:`(N, N)`. + - **input_feature** (Tensor) - Tensor of shape :math:`(N, C)`. + + Outputs: + Tensor, output tensor. + """ + + def __init__(self, + feature_in_dim, + feature_out_dim, + dropout_ratio=None, + activation=None): + super(GraphConvolution, self).__init__() + self.in_dim = feature_in_dim + self.out_dim = feature_out_dim + self.weight_init = glorot([self.out_dim, self.in_dim]) + self.fc = nn.Dense(self.in_dim, + self.out_dim, + weight_init=self.weight_init, + has_bias=False) + self.dropout_ratio = dropout_ratio + if self.dropout_ratio is not None: + self.dropout = nn.Dropout(keep_prob=1-self.dropout_ratio) + self.dropout_flag = self.dropout_ratio is not None + self.activation = get_activation(activation) + self.activation_flag = self.activation is not None + self.matmul = P.MatMul() + + def construct(self, adj, input_feature): + dropout = input_feature + if self.dropout_flag: + dropout = self.dropout(dropout) + + fc = self.fc(dropout) + output_feature = self.matmul(adj, fc) + + if self.activation_flag: + output_feature = self.activation(output_feature) + return output_feature + + +class GCN(nn.Cell): + """ + GCN architecture. + + Args: + config (ConfigGCN): Configuration for GCN. + adj (numpy.ndarray): Numbers of block in different layers. + feature (numpy.ndarray): Input channel in each layer. + output_dim (int): The number of output channels, equal to classes num. + """ + + def __init__(self, config, adj, feature, output_dim): + super(GCN, self).__init__() + self.adj = Tensor(adj) + self.feature = Tensor(feature) + input_dim = feature.shape[1] + self.layer0 = GraphConvolution(input_dim, config.hidden1, activation="relu", dropout_ratio=config.dropout) + self.layer1 = GraphConvolution(config.hidden1, output_dim, dropout_ratio=None) + + def construct(self): + output0 = self.layer0(self.adj, self.feature) + output1 = self.layer1(self.adj, output0) + return output1 + + +class LossAccuracyWrapper(nn.Cell): + """ + Wraps the GCN model with loss and accuracy cell. + + Args: + network (Cell): GCN network. + label (numpy.ndarray): Dataset labels. + mask (numpy.ndarray): Mask for training, evaluation or test. + weight_decay (float): Weight decay parameter for weight of the first convolution layer. + """ + + def __init__(self, network, label, mask, weight_decay): + super(LossAccuracyWrapper, self).__init__() + self.network = network + self.loss = Loss(label, mask, weight_decay, network.trainable_params()[0]) + self.accuracy = Accuracy(label, mask) + + def construct(self): + preds = self.network() + loss = self.loss(preds) + accuracy = self.accuracy(preds) + return loss, accuracy + + +class LossWrapper(nn.Cell): + """ + Wraps the GCN model with loss. + + Args: + network (Cell): GCN network. + label (numpy.ndarray): Dataset labels. + mask (numpy.ndarray): Mask for training. + weight_decay (float): Weight decay parameter for weight of the first convolution layer. + """ + + def __init__(self, network, label, mask, weight_decay): + super(LossWrapper, self).__init__() + self.network = network + self.loss = Loss(label, mask, weight_decay, network.trainable_params()[0]) + + def construct(self): + preds = self.network() + loss = self.loss(preds) + return loss + + +class TrainOneStepCell(nn.Cell): + r""" + Network training package class. + + Wraps the network with an optimizer. The resulting Cell be trained without inputs. + Backward graph will be created in the construct function to do parameter updating. Different + parallel modes are available to run the training. + + Args: + network (Cell): The training network. + optimizer (Cell): Optimizer for updating the weights. + sens (Number): The scaling number to be filled as the input of backpropagation. Default value is 1.0. + + Outputs: + Tensor, a scalar Tensor with shape :math:`()`. + + Examples: + >>> net = Net() + >>> loss_fn = nn.SoftmaxCrossEntropyWithLogits() + >>> optim = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) + >>> loss_net = nn.WithLossCell(net, loss_fn) + >>> train_net = nn.TrainOneStepCell(loss_net, optim) + """ + + def __init__(self, network, optimizer, sens=1.0): + super(TrainOneStepCell, self).__init__(auto_prefix=False) + self.network = network + self.network.add_flags(defer_inline=True) + self.weights = ParameterTuple(network.trainable_params()) + self.optimizer = optimizer + self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True) + self.sens = sens + + def construct(self): + weights = self.weights + loss = self.network() + sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens) + grads = self.grad(self.network, weights)(sens) + return F.depend(loss, self.optimizer(grads)) + + +class TrainNetWrapper(nn.Cell): + """ + Wraps the GCN model with optimizer. + + Args: + network (Cell): GCN network. + label (numpy.ndarray): Dataset labels. + mask (numpy.ndarray): Mask for training, evaluation or test. + config (ConfigGCN): Configuration for GCN. + """ + + def __init__(self, network, label, mask, config): + super(TrainNetWrapper, self).__init__(auto_prefix=True) + self.network = network + loss_net = LossWrapper(network, label, mask, config.weight_decay) + optimizer = nn.Adam(loss_net.trainable_params(), + learning_rate=config.learning_rate) + self.loss_train_net = TrainOneStepCell(loss_net, optimizer) + self.accuracy = Accuracy(label, mask) + + def construct(self): + loss = self.loss_train_net() + accuracy = self.accuracy(self.network()) + return loss, accuracy diff --git a/model_zoo/gcn/src/metrics.py b/model_zoo/gcn/src/metrics.py new file mode 100644 index 0000000000..5930956776 --- /dev/null +++ b/model_zoo/gcn/src/metrics.py @@ -0,0 +1,70 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Loss and accuracy.""" +from mindspore import nn +from mindspore import Tensor +from mindspore.common import dtype as mstype +from mindspore.ops import operations as P + + +class Loss(nn.Cell): + """Softmax cross-entropy loss with masking.""" + def __init__(self, label, mask, weight_decay, param): + super(Loss, self).__init__() + self.label = Tensor(label) + self.mask = Tensor(mask) + self.loss = P.SoftmaxCrossEntropyWithLogits() + self.one = Tensor(1.0, mstype.float32) + self.zero = Tensor(0.0, mstype.float32) + self.mean = P.ReduceMean() + self.cast = P.Cast() + self.l2_loss = P.L2Loss() + self.reduce_sum = P.ReduceSum() + self.weight_decay = weight_decay + self.param = param + + def construct(self, preds): + param = self.l2_loss(self.param) + loss = self.weight_decay * param + preds = self.cast(preds, mstype.float32) + loss = loss + self.loss(preds, self.label)[0] + mask = self.cast(self.mask, mstype.float32) + mask_reduce = self.mean(mask) + mask = mask / mask_reduce + loss = loss * mask + loss = self.mean(loss) + return loss + + +class Accuracy(nn.Cell): + """Accuracy with masking.""" + def __init__(self, label, mask): + super(Accuracy, self).__init__() + self.label = Tensor(label) + self.mask = Tensor(mask) + self.equal = P.Equal() + self.argmax = P.Argmax() + self.cast = P.Cast() + self.mean = P.ReduceMean() + + def construct(self, preds): + preds = self.cast(preds, mstype.float32) + correct_prediction = self.equal(self.argmax(preds), self.argmax(self.label)) + accuracy_all = self.cast(correct_prediction, mstype.float32) + mask = self.cast(self.mask, mstype.float32) + mask_reduce = self.mean(mask) + mask = mask / mask_reduce + accuracy_all *= mask + return self.mean(accuracy_all) diff --git a/model_zoo/gcn/t-SNE_visualization_on_Cora.gif b/model_zoo/gcn/t-SNE_visualization_on_Cora.gif new file mode 100644 index 0000000000..ae5aada9eb Binary files /dev/null and b/model_zoo/gcn/t-SNE_visualization_on_Cora.gif differ diff --git a/model_zoo/gcn/train.py b/model_zoo/gcn/train.py new file mode 100644 index 0000000000..220d2ecd6b --- /dev/null +++ b/model_zoo/gcn/train.py @@ -0,0 +1,127 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +""" +GCN training script. +""" + +import time +import argparse + +import numpy as np +from matplotlib import pyplot as plt +from matplotlib import animation +from sklearn import manifold +from mindspore import context + +from model_zoo.gcn.src.gcn import GCN, LossAccuracyWrapper, TrainNetWrapper +from model_zoo.gcn.src.config import ConfigGCN +from model_zoo.gcn.src.dataset import get_adj_features_labels, get_mask + + +def t_SNE(out_feature, dim): + t_sne = manifold.TSNE(n_components=dim, init='pca', random_state=0) + return t_sne.fit_transform(out_feature) + + +def update_graph(i, data, scat, plot): + scat.set_offsets(data[i]) + plt.title('t-SNE visualization of Epoch:{0}'.format(i)) + return scat, plot + + +def train(): + """Train model.""" + parser = argparse.ArgumentParser(description='GCN') + parser.add_argument('--data_dir', type=str, default='./data/cora/cora_mr', help='Dataset directory') + parser.add_argument('--seed', type=int, default=123, help='Random seed') + parser.add_argument('--train_nodes_num', type=int, default=140, help='Nodes numbers for training') + parser.add_argument('--eval_nodes_num', type=int, default=500, help='Nodes numbers for evaluation') + parser.add_argument('--test_nodes_num', type=int, default=1000, help='Nodes numbers for test') + parser.add_argument('--save_TSNE', type=bool, default=False, help='Whether to save t-SNE graph') + args_opt = parser.parse_args() + + np.random.seed(args_opt.seed) + context.set_context(mode=context.GRAPH_MODE, + device_target="Ascend", save_graphs=False) + config = ConfigGCN() + adj, feature, label_onehot, label = get_adj_features_labels(args_opt.data_dir) + + nodes_num = label_onehot.shape[0] + train_mask = get_mask(nodes_num, 0, args_opt.train_nodes_num) + eval_mask = get_mask(nodes_num, args_opt.train_nodes_num, args_opt.train_nodes_num + args_opt.eval_nodes_num) + test_mask = get_mask(nodes_num, nodes_num - args_opt.test_nodes_num, nodes_num) + + class_num = label_onehot.shape[1] + gcn_net = GCN(config, adj, feature, class_num) + gcn_net.add_flags_recursive(fp16=True) + + eval_net = LossAccuracyWrapper(gcn_net, label_onehot, eval_mask, config.weight_decay) + test_net = LossAccuracyWrapper(gcn_net, label_onehot, test_mask, config.weight_decay) + train_net = TrainNetWrapper(gcn_net, label_onehot, train_mask, config) + + loss_list = [] + + if args_opt.save_TSNE: + out_feature = gcn_net() + tsne_result = t_SNE(out_feature.asnumpy(), 2) + graph_data = [] + graph_data.append(tsne_result) + fig = plt.figure() + scat = plt.scatter(tsne_result[:, 0], tsne_result[:, 1], s=2, c=label, cmap='rainbow') + plt.title('t-SNE visualization of Epoch:0', fontsize='large', fontweight='bold', verticalalignment='center') + + for epoch in range(config.epochs): + t = time.time() + + train_net.set_train() + train_result = train_net() + train_loss = train_result[0].asnumpy() + train_accuracy = train_result[1].asnumpy() + + eval_net.set_train(False) + eval_result = eval_net() + eval_loss = eval_result[0].asnumpy() + eval_accuracy = eval_result[1].asnumpy() + + loss_list.append(eval_loss) + print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(train_loss), + "train_acc=", "{:.5f}".format(train_accuracy), "val_loss=", "{:.5f}".format(eval_loss), + "val_acc=", "{:.5f}".format(eval_accuracy), "time=", "{:.5f}".format(time.time() - t)) + + if args_opt.save_TSNE: + out_feature = gcn_net() + tsne_result = t_SNE(out_feature.asnumpy(), 2) + graph_data.append(tsne_result) + + if epoch > config.early_stopping and loss_list[-1] > np.mean(loss_list[-(config.early_stopping+1):-1]): + print("Early stopping...") + break + + t_test = time.time() + test_net.set_train(False) + test_result = test_net() + test_loss = test_result[0].asnumpy() + test_accuracy = test_result[1].asnumpy() + print("Test set results:", "loss=", "{:.5f}".format(test_loss), + "accuracy=", "{:.5f}".format(test_accuracy), "time=", "{:.5f}".format(time.time() - t_test)) + + if args_opt.save_TSNE: + ani = animation.FuncAnimation(fig, update_graph, frames=range(config.epochs + 1), fargs=(graph_data, scat, plt)) + ani.save('t-SNE_visualization.gif', writer='imagemagick') + + +if __name__ == '__main__': + train() diff --git a/model_zoo/googlenet/README.md b/model_zoo/googlenet/README.md new file mode 100644 index 0000000000..92cdd8af43 --- /dev/null +++ b/model_zoo/googlenet/README.md @@ -0,0 +1,324 @@ +# Contents + +- [GoogleNet Description](#googlenet-description) +- [Model Architecture](#model-architecture) +- [Dataset](#dataset) +- [Features](#features) + - [Mixed Precision](#mixed-precision) +- [Environment Requirements](#environment-requirements) +- [Quick Start](#quick-start) +- [Script Description](#script-description) + - [Script and Sample Code](#script-and-sample-code) + - [Script Parameters](#script-parameters) + - [Training Process](#training-process) + - [Training](#training) + - [Distributed Training](#distributed-training) + - [Evaluation Process](#evaluation-process) + - [Evaluation](#evaluation) +- [Model Description](#model-description) + - [Performance](#performance) + - [Evaluation Performance](#evaluation-performance) + - [Inference Performance](#evaluation-performance) + - [How to use](#how-to-use) + - [Inference](#inference) + - [Continue Training on the Pretrained Model](#continue-training-on-the-pretrained-model) + - [Transfer Learning](#transfer-learning) +- [Description of Random Situation](#description-of-random-situation) +- [ModelZoo Homepage](#modelzoo-homepage) + + +# [GoogleNet Description](#contents) + +GoogleNet, a 22 layers deep network, was proposed in 2014 and won the first place in the ImageNet Large-Scale Visual Recognition Challenge 2014 (ILSVRC14). GoogleNet, also called Inception v1, has significant improvement over ZFNet (The winner in 2013) and AlexNet (The winner in 2012), and has relatively lower error rate compared to VGGNet. Typically deeper deep learning network means larger number of parameters, which makes it more prone to overfitting. Furthermore, the increased network size leads to increased use of computational resources. To tackle these issues, GoogleNet adopts 1*1 convolution middle of the network to reduce dimension, and thus further reduce the computation. Global average pooling is used at the end of the network, instead of using fully connected layers. Another technique, called inception module, is to have different sizes of convolutions for the same input and stacking all the outputs. + +[Paper](https://arxiv.org/abs/1409.4842): Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed, Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich. "Going deeper with convolutions." *Proceedings of the IEEE conference on computer vision and pattern recognition*. 2015. + + +# [Model Architecture](#contents) + +The overall network architecture of GoogleNet is shown below: + +![](https://miro.medium.com/max/3780/1*ZFPOSAted10TPd3hBQU8iQ.png) + +Specifically, the GoogleNet contains numerous inception modules, which are connected together to go deeper. In general, an inception module with dimensionality reduction consists of **1×1 conv**, **3×3 conv**, **5×5 conv**, and **3×3 max pooling**, which are done altogether for the previous input, and stack together again at output. + +![](https://miro.medium.com/max/1108/1*sezFsYW1MyM9YOMa1q909A.png) + + + +# [Dataset](#contents) + +Dataset used: [CIFAR-10]() + +- Dataset size:175M,60,000 32*32 colorful images in 10 classes + - Train:146M,50,000 images + - Test:29.3M,10,000 images +- Data format:binary files + - Note:Data will be processed in dataset.py + + + +# [Features](#contents) + +## Mixed Precision + +The [mixed precision](https://www.mindspore.cn/tutorial/zh-CN/master/advanced_use/mixed_precision.html) training method accelerates the deep learning neural network training process by using both the single-precision and half-precision data formats, and maintains the network precision achieved by the single-precision training at the same time. Mixed precision training can accelerate the computation process, reduce memory usage, and enable a larger model or batch size to be trained on specific hardware. +For FP16 operators, if the input data type is FP32, the backend of MindSpore will automatically handle it with reduced precision. Users could check the reduced-precision operators by enabling INFO log and then searching ‘reduce precision’. + + + +# [Environment Requirements](#contents) + +- Hardware(Ascend/GPU) + - Prepare hardware environment with Ascend or GPU processor. If you want to try Ascend , please send the [application form](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/file/other/Ascend%20Model%20Zoo%E4%BD%93%E9%AA%8C%E8%B5%84%E6%BA%90%E7%94%B3%E8%AF%B7%E8%A1%A8.docx) to ascend@huawei.com. Once approved, you can get the resources. +- Framework + - [MindSpore](http://10.90.67.50/mindspore/archive/20200506/OpenSource/me_vm_x86/) +- For more information, please check the resources below: + - [MindSpore tutorials](https://www.mindspore.cn/tutorial/zh-CN/master/index.html) + - [MindSpore API](https://www.mindspore.cn/api/zh-CN/master/index.html) + + + +# [Quick Start](#contents) + +After installing MindSpore via the official website, you can start training and evaluation as follows: + +```python +# run training example +python train.py > train.log 2>&1 & + +# run distributed training example +sh scripts/run_train.sh rank_table.json + +# run evaluation example +python eval.py > eval.log 2>&1 & OR sh run_eval.sh +``` + + + +# [Script Description](#contents) + +## [Script and Sample Code](#contents) + +``` +├── model_zoo + ├── README.md // descriptions about all the models + ├── googlenet + ├── README.md // descriptions about googlenet + ├── scripts + │ ├──run_train.sh // shell script for distributed + │ ├──run_eval.sh // shell script for evaluation + ├── src + │ ├──dataset.py // creating dataset + │ ├──googlenet.py // googlenet architecture + │ ├──config.py // parameter configuration + ├── train.py // training script + ├── eval.py // evaluation script + ├── export.py // export checkpoint files into geir/onnx +``` + +## [Script Parameters](#contents) + +```python +Major parameters in train.py and config.py are: + +--data_path: The absolute full path to the train and evaluation datasets. +--epoch_size: Total training epochs. +--batch_size: Training batch size. +--lr_init: Initial learning rate. +--num_classes: The number of classes in the training set. +--weight_decay: Weight decay value. +--image_height: Image height used as input to the model. +--image_width: Image width used as input the model. +--pre_trained: Whether training from scratch or training based on the + pre-trained model.Optional values are True, False. +--device_target: Device where the code will be implemented. Optional values + are "Ascend", "GPU". +--device_id: Device ID used to train or evaluate the dataset. Ignore it + when you use run_train.sh for distributed training. +--checkpoint_path: The absolute full path to the checkpoint file saved + after training. +--onnx_filename: File name of the onnx model used in export.py. +--geir_filename: File name of the geir model used in export.py. +``` + + +## [Training Process](#contents) + +### Training + +``` +python train.py > train.log 2>&1 & +``` + +The python command above will run in the background, you can view the results through the file `train.log`. + +After training, you'll get some checkpoint files under the script folder by default. The loss value will be achieved as follows: + +``` +# grep "loss is " train.log +epoch: 1 step: 390, loss is 1.4842823 +epcoh: 2 step: 390, loss is 1.0897788 +... +``` + +The model checkpoint will be saved in the current directory. + +### Distributed Training + +``` +sh scripts/run_train.sh rank_table.json +``` + +The above shell script will run distribute training in the background. You can view the results through the file `train_parallel[X]/log`. The loss value will be achieved as follows: + +``` +# grep "result: " train_parallel*/log +train_parallel0/log:epoch: 1 step: 48, loss is 1.4302931 +train_parallel0/log:epcoh: 2 step: 48, loss is 1.4023874 +... +train_parallel1/log:epoch: 1 step: 48, loss is 1.3458025 +train_parallel1/log:epcoh: 2 step: 48, loss is 1.3729336 +... +... +``` + + +## [Evaluation Process](#contents) + +### Evaluation + +Before running the command below, please check the checkpoint path used for evaluation. Please set the checkpoint path to be the absolute full path, e.g., "username/googlenet/train_googlenet_cifar10-125_390.ckpt". + +``` +python eval.py > eval.log 2>&1 & +OR +sh scripts/run_eval.sh +``` + +The above python command will run in the background. You can view the results through the file "eval.log". The accuracy of the test dataset will be as follows: + +``` +# grep "accuracy: " eval.log +accuracy: {'acc': 0.934} +``` + +Note that for evaluation after distributed training, please set the checkpoint_path to be the last saved checkpoint file such as "username/googlenet/train_parallel0/train_googlenet_cifar10-125_48.ckpt". The accuracy of the test dataset will be as follows: + +``` +# grep "accuracy: " dist.eval.log +accuracy: {'acc': 0.9217} +``` + + +# [Model Description](#contents) +## [Performance](#contents) + +### Evaluation Performance + +| Parameters | GoogleNet | +| -------------------------- | ----------------------------------------------------------- | +| Model Version | Inception V1 | +| Resource | Ascend 910 ;CPU 2.60GHz,56cores;Memory,314G | +| uploaded Date | 06/09/2020 (month/day/year) | +| MindSpore Version | 0.3.0-alpha | +| Dataset | CIFAR-10 | +| Training Parameters | epoch=125, steps=390, batch_size = 128, lr=0.1 | +| Optimizer | SGD | +| Loss Function | Softmax Cross Entropy | +| outputs | probability | +| Loss | 0.0016 | +| Speed | 1pc: 79 ms/step; 8pcs: 82 ms/step | +| Total time | 1pc: 63.85 mins; 8pcs: 11.28 mins | +| Parameters (M) | 6.8 | +| Checkpoint for Fine tuning | 43.07M (.ckpt file) | +| Model for inference | 21.50M (.onnx file), 21.60M(.geir file) | +| Scripts | https://gitee.com/mindspore/mindspore/tree/master/model_zoo/googlenet | + + +### Inference Performance + +| Parameters | GoogleNet | +| ------------------- | --------------------------- | +| Model Version | Inception V1 | +| Resource | Ascend 910 | +| Uploaded Date | 06/09/2020 (month/day/year) | +| MindSpore Version | 0.3.0-alpha | +| Dataset | CIFAR-10, 10,000 images | +| batch_size | 128 | +| outputs | probability | +| Accuracy | 1pc: 93.4%; 8pcs: 92.17% | +| Model for inference | 21.50M (.onnx file) | + +## [How to use](#contents) +### Inference + +If you need to use the trained model to perform inference on multiple hardware platforms, such as GPU, Ascend 910 or Ascend 310, you can refer to this [Link](https://www.mindspore.cn/tutorial/zh-CN/master/advanced_use/network_migration.html). Following the steps below, this is a simple example: + +``` +# Load unseen dataset for inference +dataset = dataset.create_dataset(cfg.data_path, 1, False) + +# Define model +net = GoogleNet(num_classes=cfg.num_classes) +opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, + cfg.momentum, weight_decay=cfg.weight_decay) +loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', + is_grad=False) +model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'}) + +# Load pre-trained model +param_dict = load_checkpoint(cfg.checkpoint_path) +load_param_into_net(net, param_dict) +net.set_train(False) + +# Make predictions on the unseen dataset +acc = model.eval(dataset) +print("accuracy: ", acc) +``` + +### Continue Training on the Pretrained Model + +``` +# Load dataset +dataset = create_dataset(cfg.data_path, cfg.epoch_size) +batch_num = dataset.get_dataset_size() + +# Define model +net = GoogleNet(num_classes=cfg.num_classes) +# Continue training if set pre_trained to be True +if cfg.pre_trained: + param_dict = load_checkpoint(cfg.checkpoint_path) + load_param_into_net(net, param_dict) +lr = lr_steps(0, lr_max=cfg.lr_init, total_epochs=cfg.epoch_size, + steps_per_epoch=batch_num) +opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), + Tensor(lr), cfg.momentum, weight_decay=cfg.weight_decay) +loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False) +model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'}, + amp_level="O2", keep_batchnorm_fp32=False, loss_scale_manager=None) + +# Set callbacks +config_ck = CheckpointConfig(save_checkpoint_steps=batch_num * 5, + keep_checkpoint_max=cfg.keep_checkpoint_max) +time_cb = TimeMonitor(data_size=batch_num) +ckpoint_cb = ModelCheckpoint(prefix="train_googlenet_cifar10", directory="./", + config=config_ck) +loss_cb = LossMonitor() + +# Start training +model.train(cfg.epoch_size, dataset, callbacks=[time_cb, ckpoint_cb, loss_cb]) +print("train success") +``` + +### Transfer Learning +To be added. + + +# [Description of Random Situation](#contents) + +In dataset.py, we set the seed inside “create_dataset" function. We also use random seed in train.py. + + +# [ModelZoo Homepage](#contents) + Please check the official [homepage](https://gitee.com/mindspore/mindspore/tree/master/model_zoo). diff --git a/example/googlenet_cifar10/eval.py b/model_zoo/googlenet/eval.py similarity index 53% rename from example/googlenet_cifar10/eval.py rename to model_zoo/googlenet/eval.py index cc09539aa7..fc469879e7 100644 --- a/example/googlenet_cifar10/eval.py +++ b/model_zoo/googlenet/eval.py @@ -14,42 +14,32 @@ # ============================================================================ """ ##############test googlenet example on cifar10################# -python eval.py --data_path=$DATA_HOME --device_id=$DEVICE_ID +python eval.py """ -import argparse - import mindspore.nn as nn from mindspore import context -from mindspore.model_zoo.googlenet import GooGLeNet from mindspore.nn.optim.momentum import Momentum from mindspore.train.model import Model from mindspore.train.serialization import load_checkpoint, load_param_into_net -import dataset -from config import cifar_cfg as cfg +from src.config import cifar_cfg as cfg +from src.dataset import create_dataset +from src.googlenet import GoogleNet if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Cifar10 classification') - parser.add_argument('--device_target', type=str, default='Ascend', choices=['Ascend', 'GPU'], - help='device where the code will be implemented. (Default: Ascend)') - parser.add_argument('--data_path', type=str, default='./cifar', help='path where the dataset is saved') - parser.add_argument('--checkpoint_path', type=str, default=None, help='checkpoint file path.') - parser.add_argument('--device_id', type=int, default=None, help='device id of GPU or Ascend. (Default: None)') - args_opt = parser.parse_args() - - context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target) - context.set_context(device_id=args_opt.device_id) + context.set_context(mode=context.GRAPH_MODE, device_target=cfg.device_target) + context.set_context(device_id=cfg.device_id) - net = GooGLeNet(num_classes=cfg.num_classes) + net = GoogleNet(num_classes=cfg.num_classes) opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, cfg.momentum, weight_decay=cfg.weight_decay) loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False) model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'}) - param_dict = load_checkpoint(args_opt.checkpoint_path) + param_dict = load_checkpoint(cfg.checkpoint_path) load_param_into_net(net, param_dict) net.set_train(False) - dataset = dataset.create_dataset(args_opt.data_path, 1, False) - res = model.eval(dataset) - print("result: ", res) + dataset = create_dataset(cfg.data_path, 1, False) + acc = model.eval(dataset) + print("accuracy: ", acc) diff --git a/model_zoo/googlenet/export.py b/model_zoo/googlenet/export.py new file mode 100644 index 0000000000..d1a6de9b8d --- /dev/null +++ b/model_zoo/googlenet/export.py @@ -0,0 +1,36 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +##############export checkpoint file into geir and onnx models################# +python export.py +""" +import numpy as np + +import mindspore as ms +from mindspore import Tensor +from mindspore.train.serialization import load_checkpoint, load_param_into_net, export + +from src.config import cifar_cfg as cfg +from src.googlenet import GoogleNet + + +if __name__ == '__main__': + net = GoogleNet(num_classes=cfg.num_classes) + param_dict = load_checkpoint(cfg.checkpoint_path) + load_param_into_net(net, param_dict) + + input_arr = Tensor(np.random.uniform(0.0, 1.0, size=[1, 3, 224, 224]), ms.float32) + export(net, input_arr, file_name=cfg.onnx_filename, file_format="ONNX") + export(net, input_arr, file_name=cfg.geir_filename, file_format="GEIR") diff --git a/model_zoo/googlenet/scripts/run_eval.sh b/model_zoo/googlenet/scripts/run_eval.sh new file mode 100644 index 0000000000..4aad02a4af --- /dev/null +++ b/model_zoo/googlenet/scripts/run_eval.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +ulimit -u unlimited + +BASEPATH=$(cd "`dirname $0`" || exit; pwd) +export PYTHONPATH=${BASEPATH}:$PYTHONPATH +export DEVICE_ID=0 + +python ${BASEPATH}/../eval.py > ./eval.log 2>&1 & diff --git a/example/googlenet_cifar10/run_distribute_train.sh b/model_zoo/googlenet/scripts/run_train.sh old mode 100755 new mode 100644 similarity index 73% rename from example/googlenet_cifar10/run_distribute_train.sh rename to model_zoo/googlenet/scripts/run_train.sh index c9b8dfc48f..c21c2f04b6 --- a/example/googlenet_cifar10/run_distribute_train.sh +++ b/model_zoo/googlenet/scripts/run_train.sh @@ -14,28 +14,24 @@ # limitations under the License. # ============================================================================ -if [ $# != 2 ] -then - echo "Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATA_PATH]" +if [ $# != 1 ] +then + echo "Usage: sh run_train.sh [MINDSPORE_HCCL_CONFIG_PATH]" exit 1 fi if [ ! -f $1 ] -then +then echo "error: MINDSPORE_HCCL_CONFIG_PATH=$1 is not a file" exit 1 -fi - -if [ ! -d $2 ] -then - echo "error: DATA_PATH=$2 is not a directory" -exit 1 -fi +fi ulimit -u unlimited export DEVICE_NUM=8 export RANK_SIZE=8 -export MINDSPORE_HCCL_CONFIG_PATH=$1 +MINDSPORE_HCCL_CONFIG_PATH=$(realpath $1) +export MINDSPORE_HCCL_CONFIG_PATH +echo "MINDSPORE_HCCL_CONFIG_PATH=${MINDSPORE_HCCL_CONFIG_PATH}" for((i=0; i<${DEVICE_NUM}; i++)) do @@ -43,11 +39,11 @@ do export RANK_ID=$i rm -rf ./train_parallel$i mkdir ./train_parallel$i - cp *.py ./train_parallel$i - cp *.sh ./train_parallel$i - cd ./train_parallel$i || exit + cp -r ./src ./train_parallel$i + cp ./train.py ./train_parallel$i echo "start training for rank $RANK_ID, device $DEVICE_ID" + cd ./train_parallel$i ||exit env > env.log - python train.py --data_path=$2 --device_id=$i &> log & + python train.py --device_id=$i > log 2>&1 & cd .. done diff --git a/example/googlenet_cifar10/config.py b/model_zoo/googlenet/src/config.py similarity index 78% rename from example/googlenet_cifar10/config.py rename to model_zoo/googlenet/src/config.py index 4b134f68da..5f803ad325 100644 --- a/example/googlenet_cifar10/config.py +++ b/model_zoo/googlenet/src/config.py @@ -18,6 +18,7 @@ network config setting, will be used in main.py from easydict import EasyDict as edict cifar_cfg = edict({ + 'pre_trained': False, 'num_classes': 10, 'lr_init': 0.1, 'batch_size': 128, @@ -27,5 +28,11 @@ cifar_cfg = edict({ 'buffer_size': 10, 'image_height': 224, 'image_width': 224, - 'keep_checkpoint_max': 10 + 'data_path': './cifar10', + 'device_target': 'Ascend', + 'device_id': 4, + 'keep_checkpoint_max': 10, + 'checkpoint_path': './train_googlenet_cifar10-125_390.ckpt', + 'onnx_filename': 'googlenet.onnx', + 'geir_filename': 'googlenet.geir' }) diff --git a/example/googlenet_cifar10/dataset.py b/model_zoo/googlenet/src/dataset.py similarity index 98% rename from example/googlenet_cifar10/dataset.py rename to model_zoo/googlenet/src/dataset.py index e7b6abfb56..a1cbc2cdab 100644 --- a/example/googlenet_cifar10/dataset.py +++ b/model_zoo/googlenet/src/dataset.py @@ -21,7 +21,7 @@ import mindspore.common.dtype as mstype import mindspore.dataset as ds import mindspore.dataset.transforms.c_transforms as C import mindspore.dataset.transforms.vision.c_transforms as vision -from config import cifar_cfg as cfg +from src.config import cifar_cfg as cfg def create_dataset(data_home, repeat_num=1, training=True): diff --git a/mindspore/model_zoo/googlenet.py b/model_zoo/googlenet/src/googlenet.py similarity index 97% rename from mindspore/model_zoo/googlenet.py rename to model_zoo/googlenet/src/googlenet.py index 4a572828de..701b3aeb5a 100644 --- a/mindspore/model_zoo/googlenet.py +++ b/model_zoo/googlenet/src/googlenet.py @@ -40,8 +40,7 @@ class Conv2dBlock(nn.Cell): def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding=0, pad_mode="same"): super(Conv2dBlock, self).__init__() self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, - padding=padding, pad_mode=pad_mode, weight_init=weight_variable(), - bias_init=False) + padding=padding, pad_mode=pad_mode, weight_init=weight_variable()) self.bn = nn.BatchNorm2d(out_channels, eps=0.001) self.relu = nn.ReLU() @@ -78,13 +77,13 @@ class Inception(nn.Cell): return self.concat((branch1, branch2, branch3, branch4)) -class GooGLeNet(nn.Cell): +class GoogleNet(nn.Cell): """ Googlenet architecture """ def __init__(self, num_classes): - super(GooGLeNet, self).__init__() + super(GoogleNet, self).__init__() self.conv1 = Conv2dBlock(3, 64, kernel_size=7, stride=2, padding=0) self.maxpool1 = P.MaxPoolWithArgmax(ksize=3, strides=2, padding="same") diff --git a/example/googlenet_cifar10/train.py b/model_zoo/googlenet/train.py similarity index 82% rename from example/googlenet_cifar10/train.py rename to model_zoo/googlenet/train.py index bee0297bb3..0129176510 100644 --- a/example/googlenet_cifar10/train.py +++ b/model_zoo/googlenet/train.py @@ -14,7 +14,7 @@ # ============================================================================ """ #################train googlent example on cifar10######################## -python train.py --data_path=$DATA_HOME --device_id=$DEVICE_ID +python train.py """ import argparse import os @@ -26,14 +26,14 @@ import mindspore.nn as nn from mindspore import Tensor from mindspore import context from mindspore.communication.management import init -from mindspore.model_zoo.googlenet import GooGLeNet from mindspore.nn.optim.momentum import Momentum from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor from mindspore.train.model import Model, ParallelMode +from mindspore.train.serialization import load_checkpoint, load_param_into_net - -from dataset import create_dataset -from config import cifar_cfg as cfg +from src.config import cifar_cfg as cfg +from src.dataset import create_dataset +from src.googlenet import GoogleNet random.seed(1) np.random.seed(1) @@ -62,14 +62,14 @@ def lr_steps(global_step, lr_max=None, total_epochs=None, steps_per_epoch=None): if __name__ == '__main__': parser = argparse.ArgumentParser(description='Cifar10 classification') - parser.add_argument('--device_target', type=str, default='Ascend', choices=['Ascend', 'GPU'], - help='device where the code will be implemented. (Default: Ascend)') - parser.add_argument('--data_path', type=str, default='./cifar', help='path where the dataset is saved') parser.add_argument('--device_id', type=int, default=None, help='device id of GPU or Ascend. (Default: None)') args_opt = parser.parse_args() - context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target) - context.set_context(device_id=args_opt.device_id) + context.set_context(mode=context.GRAPH_MODE, device_target=cfg.device_target) + if args_opt.device_id is not None: + context.set_context(device_id=args_opt.device_id) + else: + context.set_context(device_id=cfg.device_id) device_num = int(os.environ.get("DEVICE_NUM", 1)) if device_num > 1: @@ -78,10 +78,14 @@ if __name__ == '__main__': mirror_mean=True) init() - dataset = create_dataset(args_opt.data_path, cfg.epoch_size) + dataset = create_dataset(cfg.data_path, cfg.epoch_size) batch_num = dataset.get_dataset_size() - net = GooGLeNet(num_classes=cfg.num_classes) + net = GoogleNet(num_classes=cfg.num_classes) + # Continue training if set pre_trained to be True + if cfg.pre_trained: + param_dict = load_checkpoint(cfg.checkpoint_path) + load_param_into_net(net, param_dict) lr = lr_steps(0, lr_max=cfg.lr_init, total_epochs=cfg.epoch_size, steps_per_epoch=batch_num) opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), Tensor(lr), cfg.momentum, weight_decay=cfg.weight_decay) diff --git a/example/lenet_mnist/README.md b/model_zoo/lenet/README.md similarity index 78% rename from example/lenet_mnist/README.md rename to model_zoo/lenet/README.md index 88c8769e05..579c9894b2 100644 --- a/example/lenet_mnist/README.md +++ b/model_zoo/lenet/README.md @@ -2,7 +2,7 @@ ## Description -Training LeNet with MNIST dataset in MindSpore. +Training LeNet with dataset in MindSpore. This is the simple and basic tutorial for constructing a network in MindSpore. @@ -10,10 +10,10 @@ This is the simple and basic tutorial for constructing a network in MindSpore. - Install [MindSpore](https://www.mindspore.cn/install/en). -- Download the MNIST dataset, the directory structure is as follows: +- Download the dataset, the directory structure is as follows: ``` -└─MNIST_Data +└─Data ├─test │ t10k-images.idx3-ubyte │ t10k-labels.idx1-ubyte @@ -27,7 +27,7 @@ This is the simple and basic tutorial for constructing a network in MindSpore. ```python # train LeNet, hyperparameter setting in config.py -python train.py --data_path MNIST_Data +python train.py --data_path Data ``` You will get the loss value of each step as following: @@ -43,8 +43,8 @@ epoch: 1 step: 1741, loss is 0.05018193 Then, evaluate LeNet according to network model ```python -# evaluate LeNet, after 1 epoch training, the accuracy is up to 96.5% -python eval.py --data_path MNIST_Data --mode test --ckpt_path checkpoint_lenet-1_1875.ckpt +# evaluate LeNet +python eval.py --data_path Data --ckpt_path checkpoint_lenet-1_1875.ckpt ``` ## Note diff --git a/example/lenet_mnist/eval.py b/model_zoo/lenet/eval.py similarity index 89% rename from example/lenet_mnist/eval.py rename to model_zoo/lenet/eval.py index 8317785a66..a9842f4426 100644 --- a/example/lenet_mnist/eval.py +++ b/model_zoo/lenet/eval.py @@ -20,10 +20,10 @@ python eval.py --data_path /YourDataPath --ckpt_path Your.ckpt import os import argparse -from dataset import create_dataset -from config import mnist_cfg as cfg +from src.dataset import create_dataset +from src.config import mnist_cfg as cfg +from src.lenet import LeNet5 import mindspore.nn as nn -from mindspore.model_zoo.lenet import LeNet5 from mindspore import context from mindspore.train.serialization import load_checkpoint, load_param_into_net from mindspore.train.callback import ModelCheckpoint, CheckpointConfig @@ -32,10 +32,10 @@ from mindspore.nn.metrics import Accuracy if __name__ == "__main__": - parser = argparse.ArgumentParser(description='MindSpore MNIST Example') + parser = argparse.ArgumentParser(description='MindSpore Lenet Example') parser.add_argument('--device_target', type=str, default="Ascend", choices=['Ascend', 'GPU', 'CPU'], help='device where the code will be implemented (default: Ascend)') - parser.add_argument('--data_path', type=str, default="./MNIST_Data", + parser.add_argument('--data_path', type=str, default="./Data", help='path where the dataset is saved') parser.add_argument('--ckpt_path', type=str, default="", help='if mode is test, must provide\ path where the trained ckpt file') @@ -61,4 +61,4 @@ if __name__ == "__main__": cfg.batch_size, 1) acc = model.eval(ds_eval, dataset_sink_mode=args.dataset_sink_mode) - print("============== Accuracy:{} ==============".format(acc)) + print("============== {} ==============".format(acc)) diff --git a/model_zoo/lenet/src/__init__.py b/model_zoo/lenet/src/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/example/lenet_mnist/config.py b/model_zoo/lenet/src/config.py similarity index 100% rename from example/lenet_mnist/config.py rename to model_zoo/lenet/src/config.py diff --git a/example/lenet_mnist/dataset.py b/model_zoo/lenet/src/dataset.py similarity index 100% rename from example/lenet_mnist/dataset.py rename to model_zoo/lenet/src/dataset.py diff --git a/mindspore/model_zoo/lenet.py b/model_zoo/lenet/src/lenet.py similarity index 95% rename from mindspore/model_zoo/lenet.py rename to model_zoo/lenet/src/lenet.py index 6e39c439bf..3864315dba 100644 --- a/mindspore/model_zoo/lenet.py +++ b/model_zoo/lenet/src/lenet.py @@ -50,11 +50,10 @@ class LeNet5(nn.Cell): >>> LeNet(num_class=10) """ - def __init__(self, num_class=10): + def __init__(self, num_class=10, channel=1): super(LeNet5, self).__init__() self.num_class = num_class - self.batch_size = 32 - self.conv1 = conv(1, 6, 5) + self.conv1 = conv(channel, 6, 5) self.conv2 = conv(6, 16, 5) self.fc1 = fc_with_initialize(16 * 5 * 5, 120) self.fc2 = fc_with_initialize(120, 84) diff --git a/example/lenet_mnist/train.py b/model_zoo/lenet/train.py similarity index 86% rename from example/lenet_mnist/train.py rename to model_zoo/lenet/train.py index 3186f5fca7..740b6e8ca3 100644 --- a/example/lenet_mnist/train.py +++ b/model_zoo/lenet/train.py @@ -20,10 +20,10 @@ python train.py --data_path /YourDataPath import os import argparse -from config import mnist_cfg as cfg -from dataset import create_dataset +from src.config import mnist_cfg as cfg +from src.dataset import create_dataset +from src.lenet import LeNet5 import mindspore.nn as nn -from mindspore.model_zoo.lenet import LeNet5 from mindspore import context from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor from mindspore.train import Model @@ -31,15 +31,18 @@ from mindspore.nn.metrics import Accuracy if __name__ == "__main__": - parser = argparse.ArgumentParser(description='MindSpore MNIST Example') + parser = argparse.ArgumentParser(description='MindSpore Lenet Example') parser.add_argument('--device_target', type=str, default="Ascend", choices=['Ascend', 'GPU', 'CPU'], help='device where the code will be implemented (default: Ascend)') - parser.add_argument('--data_path', type=str, default="./MNIST_Data", + parser.add_argument('--data_path', type=str, default="./Data", help='path where the dataset is saved') - parser.add_argument('--dataset_sink_mode', type=bool, default=False, help='dataset_sink_mode is False or True') + parser.add_argument('--dataset_sink_mode', type=bool, default=True, help='dataset_sink_mode is False or True') args = parser.parse_args() + if args.device_target == "CPU": + args.dataset_sink_mode = False + context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target) ds_train = create_dataset(os.path.join(args.data_path, "train"), cfg.batch_size, diff --git a/model_zoo/lenet_quant/README.md b/model_zoo/lenet_quant/README.md new file mode 100644 index 0000000000..26cdcc3ecd --- /dev/null +++ b/model_zoo/lenet_quant/README.md @@ -0,0 +1,248 @@ +# LeNet Quantization Aware Training + +## Description + +Training LeNet with MNIST dataset in MindSpore with quantization aware training. + +This is the simple and basic tutorial for constructing a network in MindSpore with quantization aware. + +In this tutorial, you will: + +1. Train a MindSpore fusion model for MNIST from scratch using `nn.Conv2dBnAct` and `nn.DenseBnAct`. +2. Fine tune the fusion model by applying the quantization aware training auto network converter API `convert_quant_network`, after the network convergence then export a quantization aware model checkpoint file. +3. Use the quantization aware model to create an actually quantized model for the Ascend inference backend. +4. See the persistence of accuracy in inference backend and a 4x smaller model. To see the latency benefits on mobile, try out the Ascend inference backend examples. + + +## Train fusion model + +### Install + +Install MindSpore base on the ascend device and GPU device from [MindSpore](https://www.mindspore.cn/install/en). + + +```python +pip uninstall -y mindspore-ascend +pip uninstall -y mindspore-gpu +pip install mindspore-ascend.whl +``` + +Then you will get the following display + + +```bash +>>> Found existing installation: mindspore-ascend +>>> Uninstalling mindspore-ascend: +>>> Successfully uninstalled mindspore-ascend. +``` + +### Prepare Dataset + +Download the MNIST dataset, the directory structure is as follows: + +``` +└─MNIST_Data + ├─test + │ t10k-images.idx3-ubyte + │ t10k-labels.idx1-ubyte + └─train + train-images.idx3-ubyte + train-labels.idx1-ubyte +``` + +### Define fusion model + +Define a MindSpore fusion model using `nn.Conv2dBnAct` and `nn.DenseBnAct`. + +```Python +class LeNet5(nn.Cell): + """ + Define Lenet fusion model + """ + + def __init__(self, num_class=10, channel=1): + super(LeNet5, self).__init__() + self.num_class = num_class + + # change `nn.Conv2d` to `nn.Conv2dBnAct` + self.conv1 = nn.Conv2dBnAct(channel, 6, 5, activation='relu') + self.conv2 = nn.Conv2dBnAct(6, 16, 5, activation='relu') + # change `nn.Dense` to `nn.DenseBnAct` + self.fc1 = nn.DenseBnAct(16 * 5 * 5, 120, activation='relu') + self.fc2 = nn.DenseBnAct(120, 84, activation='relu') + self.fc3 = nn.DenseBnAct(84, self.num_class) + + self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2) + self.flatten = nn.Flatten() + + def construct(self, x): + x = self.conv1(x) + x = self.max_pool2d(x) + x = self.conv2(x) + x = self.max_pool2d(x) + x = self.flatten(x) + x = self.fc1(x) + x = self.fc2(x) + x = self.fc3(x) + return x +``` + +Get the MNIST from scratch dataset. + +```Python +ds_train = create_dataset(os.path.join(args.data_path, "train"), + cfg.batch_size, cfg.epoch_size) +step_size = ds_train.get_dataset_size() +``` + +### Train model + +Load the Lenet fusion network, training network using loss `nn.SoftmaxCrossEntropyWithLogits` with optimization `nn.Momentum`. + +```Python +# Define the network +network = LeNet5Fusion(cfg.num_classes) +# Define the loss +net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") +# Define optimization +net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum) + +# Define model using loss and optimization. +time_cb = TimeMonitor(data_size=ds_train.get_dataset_size()) +config_ck = CheckpointConfig(save_checkpoint_steps=cfg.epoch_size * step_size, + keep_checkpoint_max=cfg.keep_checkpoint_max) +ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet", config=config_ck) +model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) +``` + +Now we can start training. + +```Python +model.train(cfg['epoch_size'], ds_train, + callbacks=[time_cb, ckpoint_cb, LossMonitor()], + dataset_sink_mode=args.dataset_sink_mode) +``` + +After all the following we will get the loss value of each step as following: + +```bash +>>> Epoch: [ 1/ 10] step: [ 1/ 900], loss: [2.3040/2.5234], time: [1.300234] +>>> ... +>>> Epoch: [ 10/ 10] step: [887/ 900], loss: [0.0113/0.0223], time: [1.300234] +>>> Epoch: [ 10/ 10] step: [888/ 900], loss: [0.0334/0.0223], time: [1.300234] +>>> Epoch: [ 10/ 10] step: [889/ 900], loss: [0.0233/0.0223], time: [1.300234] +``` + +Also, you can just run this command instead. + +```python +python train.py --data_path MNIST_Data --device_target Ascend +``` + +### Evaluate fusion model + +After training epoch stop. We can get the fusion model checkpoint file like `checkpoint_lenet.ckpt`. Meanwhile, we can evaluate this fusion model. + +```python +python eval.py --data_path MNIST_Data --device_target Ascend --ckpt_path checkpoint_lenet.ckpt +``` + +The top1 accuracy would display on shell. + +```bash +>>> Accuracy: 98.53. +``` + +## Train quantization aware model + +### Define quantization aware model + +You will apply quantization aware training to the whole model and the layers of "fake quant op" are insert into the whole model. All layers are now perpare by "fake quant op". + +Note that the resulting model is quantization aware but not quantized (e.g. the weights are float32 instead of int8). + +```python +# define funsion network +network = LeNet5Fusion(cfg.num_classes) + +# load quantization aware network checkpoint +param_dict = load_checkpoint(args.ckpt_path) +load_param_into_net(network, param_dict) + +# convert funsion netwrok to quantization aware network +network = quant.convert_quant_network(network) +``` + +### load checkpoint + +After convert to quantization aware network, we can load the checkpoint file. + +```python +config_ck = CheckpointConfig(save_checkpoint_steps=cfg.epoch_size * step_size, + keep_checkpoint_max=cfg.keep_checkpoint_max) +ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet", config=config_ck) +model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) +``` + +### train quantization aware model + +Also, you can just run this command instread. + +```python +python train_quant.py --data_path MNIST_Data --device_target Ascend --ckpt_path checkpoint_lenet.ckpt +``` + +After all the following we will get the loss value of each step as following: + +```bash +>>> Epoch: [ 1/ 10] step: [ 1/ 900], loss: [2.3040/2.5234], time: [1.300234] +>>> ... +>>> Epoch: [ 10/ 10] step: [887/ 900], loss: [0.0113/0.0223], time: [1.300234] +>>> Epoch: [ 10/ 10] step: [888/ 900], loss: [0.0334/0.0223], time: [1.300234] +>>> Epoch: [ 10/ 10] step: [889/ 900], loss: [0.0233/0.0223], time: [1.300234] +``` + +### Evaluate quantization aware model + +Procedure of quantization aware model evaluation is different from normal. Because the checkpoint was create by quantization aware model, so we need to load fusion model checkpoint before convert fusion model to quantization aware model. + +```python +# define funsion network +network = LeNet5Fusion(cfg.num_classes) + +# load quantization aware network checkpoint +param_dict = load_checkpoint(args.ckpt_path) +load_param_into_net(network, param_dict) + +# convert funsion netwrok to quantization aware network +network = quant.convert_quant_network(network +``` + +Also, you can just run this command insread. + +```python +python eval_quant.py --data_path MNIST_Data --device_target Ascend --ckpt_path checkpoint_lenet.ckpt +``` + +The top1 accuracy would display on shell. + +```bash +>>> Accuracy: 98.54. +``` + +## Note + +Here are some optional parameters: + +```bash +--device_target {Ascend,GPU,CPU} + device where the code will be implemented (default: Ascend) +--data_path DATA_PATH + path where the dataset is saved +--dataset_sink_mode DATASET_SINK_MODE + dataset_sink_mode is False or True +``` + +You can run ```python train.py -h``` or ```python eval.py -h``` to get more information. + +We encourage you to try this new capability, which can be particularly important for deployment in resource-constrained environments. \ No newline at end of file diff --git a/model_zoo/lenet_quant/eval.py b/model_zoo/lenet_quant/eval.py new file mode 100644 index 0000000000..c1e3a5fd8c --- /dev/null +++ b/model_zoo/lenet_quant/eval.py @@ -0,0 +1,64 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +######################## eval lenet example ######################## +eval lenet according to model file: +python eval.py --data_path /YourDataPath --ckpt_path Your.ckpt +""" + +import os +import argparse +import mindspore.nn as nn +from mindspore import context +from mindspore.train.serialization import load_checkpoint, load_param_into_net +from mindspore.train.callback import ModelCheckpoint, CheckpointConfig +from mindspore.train import Model +from mindspore.nn.metrics import Accuracy +from src.dataset import create_dataset +from src.config import mnist_cfg as cfg +from src.lenet_fusion import LeNet5 as LeNet5Fusion + +parser = argparse.ArgumentParser(description='MindSpore MNIST Example') +parser.add_argument('--device_target', type=str, default="Ascend", + choices=['Ascend', 'GPU', 'CPU'], + help='device where the code will be implemented (default: Ascend)') +parser.add_argument('--data_path', type=str, default="./MNIST_Data", + help='path where the dataset is saved') +parser.add_argument('--ckpt_path', type=str, default="", + help='if mode is test, must provide path where the trained ckpt file') +parser.add_argument('--dataset_sink_mode', type=bool, default=True, + help='dataset_sink_mode is False or True') +args = parser.parse_args() + +if __name__ == "__main__": + context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target) + ds_eval = create_dataset(os.path.join(args.data_path, "test"), cfg.batch_size, 1) + step_size = ds_eval.get_dataset_size() + + network = LeNet5Fusion(cfg.num_classes) + net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") + repeat_size = cfg.epoch_size + net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum) + config_ck = CheckpointConfig(save_checkpoint_steps=cfg.epoch_size * step_size, + keep_checkpoint_max=cfg.keep_checkpoint_max) + ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet", config=config_ck) + model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) + + param_dict = load_checkpoint(args.ckpt_path) + load_param_into_net(network, param_dict) + + print("============== Starting Testing ==============") + acc = model.eval(ds_eval, dataset_sink_mode=args.dataset_sink_mode) + print("============== {} ==============".format(acc)) diff --git a/model_zoo/lenet_quant/eval_quant.py b/model_zoo/lenet_quant/eval_quant.py new file mode 100644 index 0000000000..492f6d36b2 --- /dev/null +++ b/model_zoo/lenet_quant/eval_quant.py @@ -0,0 +1,69 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +######################## eval lenet example ######################## +eval lenet according to model file: +python eval.py --data_path /YourDataPath --ckpt_path Your.ckpt +""" + +import os +import argparse +import mindspore.nn as nn +from mindspore import context +from mindspore.train.serialization import load_checkpoint, load_param_into_net +from mindspore.train.callback import ModelCheckpoint, CheckpointConfig +from mindspore.train import Model +from mindspore.nn.metrics import Accuracy +from mindspore.train.quant import quant +from src.dataset import create_dataset +from src.config import mnist_cfg as cfg +from src.lenet_fusion import LeNet5 as LeNet5Fusion + +parser = argparse.ArgumentParser(description='MindSpore MNIST Example') +parser.add_argument('--device_target', type=str, default="Ascend", + choices=['Ascend', 'GPU', 'CPU'], + help='device where the code will be implemented (default: Ascend)') +parser.add_argument('--data_path', type=str, default="./MNIST_Data", + help='path where the dataset is saved') +parser.add_argument('--ckpt_path', type=str, default="", + help='if mode is test, must provide path where the trained ckpt file') +parser.add_argument('--dataset_sink_mode', type=bool, default=True, + help='dataset_sink_mode is False or True') +args = parser.parse_args() + +if __name__ == "__main__": + context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target) + ds_eval = create_dataset(os.path.join(args.data_path, "test"), cfg.batch_size, 1) + step_size = ds_eval.get_dataset_size() + + # define funsion network + network = LeNet5Fusion(cfg.num_classes) + # convert funsion netwrok to quantization aware network + network = quant.convert_quant_network(network, quant_delay=0, bn_fold=False, freeze_bn=10000) + + net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") + net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum) + config_ck = CheckpointConfig(save_checkpoint_steps=cfg.epoch_size * step_size, + keep_checkpoint_max=cfg.keep_checkpoint_max) + ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet", config=config_ck) + model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) + + # load quantization aware network checkpoint + param_dict = load_checkpoint(args.ckpt_path) + load_param_into_net(network, param_dict) + + print("============== Starting Testing ==============") + acc = model.eval(ds_eval, dataset_sink_mode=args.dataset_sink_mode) + print("============== {} ==============".format(acc)) diff --git a/model_zoo/lenet_quant/src/config.py b/model_zoo/lenet_quant/src/config.py new file mode 100644 index 0000000000..ab4b2e4084 --- /dev/null +++ b/model_zoo/lenet_quant/src/config.py @@ -0,0 +1,31 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +network config setting, will be used in train.py +""" + +from easydict import EasyDict as edict + +mnist_cfg = edict({ + 'num_classes': 10, + 'lr': 0.01, + 'momentum': 0.9, + 'epoch_size': 10, + 'batch_size': 64, + 'buffer_size': 1000, + 'image_height': 32, + 'image_width': 32, + 'keep_checkpoint_max': 10, +}) diff --git a/model_zoo/lenet_quant/src/dataset.py b/model_zoo/lenet_quant/src/dataset.py new file mode 100644 index 0000000000..cef6973483 --- /dev/null +++ b/model_zoo/lenet_quant/src/dataset.py @@ -0,0 +1,60 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +Produce the dataset +""" + +import mindspore.dataset as ds +import mindspore.dataset.transforms.vision.c_transforms as CV +import mindspore.dataset.transforms.c_transforms as C +from mindspore.dataset.transforms.vision import Inter +from mindspore.common import dtype as mstype + + +def create_dataset(data_path, batch_size=32, repeat_size=1, + num_parallel_workers=1): + """ + create dataset for train or test + """ + # define dataset + mnist_ds = ds.MnistDataset(data_path) + + resize_height, resize_width = 32, 32 + rescale = 1.0 / 255.0 + shift = 0.0 + rescale_nml = 1 / 0.3081 + shift_nml = -1 * 0.1307 / 0.3081 + + # define map operations + resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR) # Bilinear mode + rescale_nml_op = CV.Rescale(rescale_nml, shift_nml) + rescale_op = CV.Rescale(rescale, shift) + hwc2chw_op = CV.HWC2CHW() + type_cast_op = C.TypeCast(mstype.int32) + + # apply map operations on images + mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) + + # apply DatasetOps + buffer_size = 10000 + mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size) # 10000 as in LeNet train script + mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True) + mnist_ds = mnist_ds.repeat(repeat_size) + + return mnist_ds diff --git a/model_zoo/lenet_quant/src/lenet.py b/model_zoo/lenet_quant/src/lenet.py new file mode 100644 index 0000000000..026f1e8df5 --- /dev/null +++ b/model_zoo/lenet_quant/src/lenet.py @@ -0,0 +1,60 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""LeNet.""" +import mindspore.nn as nn + + +class LeNet5(nn.Cell): + """ + Lenet network + + Args: + num_class (int): Num classes. Default: 10. + + Returns: + Tensor, output tensor + Examples: + >>> LeNet(num_class=10) + + """ + + def __init__(self, num_class=10, channel=1): + super(LeNet5, self).__init__() + self.num_class = num_class + + self.conv1 = nn.Conv2d(channel, 6, 5) + self.conv2 = nn.Conv2d(6, 16, 5) + self.fc1 = nn.Dense(16 * 5 * 5, 120) + self.fc2 = nn.Dense(120, 84) + self.fc3 = nn.Dense(84, self.num_class) + + self.relu = nn.ReLU() + self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2) + self.flatten = nn.Flatten() + + def construct(self, x): + x = self.conv1(x) + x = self.relu(x) + x = self.max_pool2d(x) + x = self.conv2(x) + x = self.relu(x) + x = self.max_pool2d(x) + x = self.flatten(x) + x = self.fc1(x) + x = self.relu(x) + x = self.fc2(x) + x = self.relu(x) + x = self.fc3(x) + return x diff --git a/model_zoo/lenet_quant/src/lenet_fusion.py b/model_zoo/lenet_quant/src/lenet_fusion.py new file mode 100644 index 0000000000..809276a482 --- /dev/null +++ b/model_zoo/lenet_quant/src/lenet_fusion.py @@ -0,0 +1,57 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""LeNet.""" +import mindspore.nn as nn + + +class LeNet5(nn.Cell): + """ + Lenet network + + Args: + num_class (int): Num classes. Default: 10. + + Returns: + Tensor, output tensor + Examples: + >>> LeNet(num_class=10) + + """ + + def __init__(self, num_class=10, channel=1): + super(LeNet5, self).__init__() + self.num_class = num_class + + # change `nn.Conv2d` to `nn.Conv2dBnAct` + self.conv1 = nn.Conv2dBnAct(channel, 6, 5, activation='relu') + self.conv2 = nn.Conv2dBnAct(6, 16, 5, activation='relu') + # change `nn.Dense` to `nn.DenseBnAct` + self.fc1 = nn.DenseBnAct(16 * 5 * 5, 120, activation='relu') + self.fc2 = nn.DenseBnAct(120, 84, activation='relu') + self.fc3 = nn.DenseBnAct(84, self.num_class) + + self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2) + self.flatten = nn.Flatten() + + def construct(self, x): + x = self.conv1(x) + x = self.max_pool2d(x) + x = self.conv2(x) + x = self.max_pool2d(x) + x = self.flatten(x) + x = self.fc1(x) + x = self.fc2(x) + x = self.fc3(x) + return x diff --git a/model_zoo/lenet_quant/train.py b/model_zoo/lenet_quant/train.py new file mode 100644 index 0000000000..6e7a46fb38 --- /dev/null +++ b/model_zoo/lenet_quant/train.py @@ -0,0 +1,61 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +######################## train lenet example ######################## +train lenet and get network model files(.ckpt) : +python train.py --data_path /YourDataPath +""" + +import os +import argparse +import mindspore.nn as nn +from mindspore import context +from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor +from mindspore.train import Model +from mindspore.nn.metrics import Accuracy +from src.dataset import create_dataset +from src.config import mnist_cfg as cfg +from src.lenet_fusion import LeNet5 as LeNet5Fusion + +parser = argparse.ArgumentParser(description='MindSpore MNIST Example') +parser.add_argument('--device_target', type=str, default="Ascend", + choices=['Ascend', 'GPU', 'CPU'], + help='device where the code will be implemented (default: Ascend)') +parser.add_argument('--data_path', type=str, default="./MNIST_Data", + help='path where the dataset is saved') +parser.add_argument('--ckpt_path', type=str, default="", + help='if mode is test, must provide path where the trained ckpt file') +parser.add_argument('--dataset_sink_mode', type=bool, default=True, + help='dataset_sink_mode is False or True') +args = parser.parse_args() + +if __name__ == "__main__": + context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target) + ds_train = create_dataset(os.path.join(args.data_path, "train"), cfg.batch_size, cfg.epoch_size) + step_size = ds_train.get_dataset_size() + + network = LeNet5Fusion(cfg.num_classes) + net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") + net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum) + time_cb = TimeMonitor(data_size=ds_train.get_dataset_size()) + config_ck = CheckpointConfig(save_checkpoint_steps=cfg.epoch_size * step_size, + keep_checkpoint_max=cfg.keep_checkpoint_max) + ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet", config=config_ck) + model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) + + print("============== Starting Training ==============") + model.train(cfg['epoch_size'], ds_train, callbacks=[time_cb, ckpoint_cb, LossMonitor()], + dataset_sink_mode=args.dataset_sink_mode) + print("============== End Training ==============") diff --git a/model_zoo/lenet_quant/train_quant.py b/model_zoo/lenet_quant/train_quant.py new file mode 100644 index 0000000000..04f595f322 --- /dev/null +++ b/model_zoo/lenet_quant/train_quant.py @@ -0,0 +1,70 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +######################## train lenet example ######################## +train lenet and get network model files(.ckpt) : +python train.py --data_path /YourDataPath +""" + +import os +import argparse +import mindspore.nn as nn +from mindspore import context +from mindspore.train.serialization import load_checkpoint, load_param_into_net +from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor +from mindspore.train import Model +from mindspore.nn.metrics import Accuracy +from mindspore.train.quant import quant +from src.dataset import create_dataset +from src.config import mnist_cfg as cfg +from src.lenet_fusion import LeNet5 as LeNet5Fusion + +parser = argparse.ArgumentParser(description='MindSpore MNIST Example') +parser.add_argument('--device_target', type=str, default="Ascend", + choices=['Ascend', 'GPU', 'CPU'], + help='device where the code will be implemented (default: Ascend)') +parser.add_argument('--data_path', type=str, default="./MNIST_Data", + help='path where the dataset is saved') +parser.add_argument('--ckpt_path', type=str, default="", + help='if mode is test, must provide path where the trained ckpt file') +parser.add_argument('--dataset_sink_mode', type=bool, default=True, + help='dataset_sink_mode is False or True') +args = parser.parse_args() + +if __name__ == "__main__": + context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target) + ds_train = create_dataset(os.path.join(args.data_path, "train"), cfg.batch_size, cfg.epoch_size) + step_size = ds_train.get_dataset_size() + + # define funsion network + network = LeNet5Fusion(cfg.num_classes) + # load quantization aware network checkpoint + param_dict = load_checkpoint(args.ckpt_path) + load_param_into_net(network, param_dict) + # convert funsion netwrok to quantization aware network + network = quant.convert_quant_network(network, quant_delay=0, bn_fold=False, freeze_bn=10000) + + net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") + net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum) + time_cb = TimeMonitor(data_size=ds_train.get_dataset_size()) + config_ck = CheckpointConfig(save_checkpoint_steps=cfg.epoch_size * step_size, + keep_checkpoint_max=cfg.keep_checkpoint_max) + ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet", config=config_ck) + model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) + + print("============== Starting Training ==============") + model.train(cfg['epoch_size'], ds_train, callbacks=[time_cb, ckpoint_cb, LossMonitor()], + dataset_sink_mode=args.dataset_sink_mode) + print("============== End Training ==============") diff --git a/example/lstm_aclImdb/README.md b/model_zoo/lstm/README.md similarity index 94% rename from example/lstm_aclImdb/README.md rename to model_zoo/lstm/README.md index 95ac30f3dc..00a3334968 100644 --- a/example/lstm_aclImdb/README.md +++ b/model_zoo/lstm/README.md @@ -72,7 +72,8 @@ result: {'acc': 0.83} ``` usage: train.py [--preprocess {true,false}] [--aclimdb_path ACLIMDB_PATH] [--glove_path GLOVE_PATH] [--preprocess_path PREPROCESS_PATH] - [--ckpt_path CKPT_PATH] [--device_target {GPU,CPU}] + [--ckpt_path CKPT_PATH] [--pre_trained PRE_TRAINED] + [--device_target {GPU,CPU}] parameters/options: --preprocess whether to preprocess data. @@ -80,6 +81,7 @@ parameters/options: --glove_path path where the GloVe is stored. --preprocess_path path where the pre-process data is stored. --ckpt_path the path to save the checkpoint file. + --pre_trained the pretrained checkpoint file path. --device_target the target device to run, support "GPU", "CPU". ``` diff --git a/example/lstm_aclImdb/eval.py b/model_zoo/lstm/eval.py similarity index 93% rename from example/lstm_aclImdb/eval.py rename to model_zoo/lstm/eval.py index e76d40ac67..a9b81199c1 100644 --- a/example/lstm_aclImdb/eval.py +++ b/model_zoo/lstm/eval.py @@ -21,8 +21,8 @@ import os import numpy as np -from config import lstm_cfg as cfg -from dataset import create_dataset, convert_to_mindrecord +from src.config import lstm_cfg as cfg +from src.dataset import lstm_create_dataset, convert_to_mindrecord from mindspore import Tensor, nn, Model, context from mindspore.model_zoo.lstm import SentimentNet from mindspore.nn import Accuracy @@ -71,11 +71,11 @@ if __name__ == '__main__': model = Model(network, loss, opt, {'acc': Accuracy()}) print("============== Starting Testing ==============") - ds_eval = create_dataset(args.preprocess_path, cfg.batch_size, training=False) + ds_eval = lstm_create_dataset(args.preprocess_path, cfg.batch_size, training=False) param_dict = load_checkpoint(args.ckpt_path) load_param_into_net(network, param_dict) if args.device_target == "CPU": acc = model.eval(ds_eval, dataset_sink_mode=False) else: acc = model.eval(ds_eval) - print("============== Accuracy:{} ==============".format(acc)) + print("============== {} ==============".format(acc)) diff --git a/model_zoo/lstm/src/__init__.py b/model_zoo/lstm/src/__init__.py new file mode 100644 index 0000000000..301ef9dcb7 --- /dev/null +++ b/model_zoo/lstm/src/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the License); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# httpwww.apache.orglicensesLICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an AS IS BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ diff --git a/example/lstm_aclImdb/config.py b/model_zoo/lstm/src/config.py similarity index 100% rename from example/lstm_aclImdb/config.py rename to model_zoo/lstm/src/config.py diff --git a/example/lstm_aclImdb/dataset.py b/model_zoo/lstm/src/dataset.py similarity index 96% rename from example/lstm_aclImdb/dataset.py rename to model_zoo/lstm/src/dataset.py index 24797198e0..03d4276dfd 100644 --- a/example/lstm_aclImdb/dataset.py +++ b/model_zoo/lstm/src/dataset.py @@ -19,12 +19,12 @@ import os import numpy as np -from imdb import ImdbParser import mindspore.dataset as ds from mindspore.mindrecord import FileWriter +from .imdb import ImdbParser -def create_dataset(data_home, batch_size, repeat_num=1, training=True): +def lstm_create_dataset(data_home, batch_size, repeat_num=1, training=True): """Data operations.""" ds.config.set_seed(1) data_dir = os.path.join(data_home, "aclImdb_train.mindrecord0") diff --git a/example/lstm_aclImdb/imdb.py b/model_zoo/lstm/src/imdb.py similarity index 100% rename from example/lstm_aclImdb/imdb.py rename to model_zoo/lstm/src/imdb.py index 66d04f1281..9888b4c36f 100644 --- a/example/lstm_aclImdb/imdb.py +++ b/model_zoo/lstm/src/imdb.py @@ -18,8 +18,8 @@ imdb dataset parser. import os from itertools import chain -import gensim import numpy as np +import gensim class ImdbParser(): diff --git a/mindspore/model_zoo/lstm.py b/model_zoo/lstm/src/lstm.py similarity index 63% rename from mindspore/model_zoo/lstm.py rename to model_zoo/lstm/src/lstm.py index 7368bbf8e5..f014eef8df 100644 --- a/mindspore/model_zoo/lstm.py +++ b/model_zoo/lstm/src/lstm.py @@ -13,43 +13,12 @@ # limitations under the License. # ============================================================================ """LSTM.""" -import math import numpy as np -from mindspore import Parameter, Tensor, nn, context, ParameterTuple -from mindspore.common.initializer import initializer +from mindspore import Tensor, nn, context from mindspore.ops import operations as P - -def init_lstm_weight( - input_size, - hidden_size, - num_layers, - bidirectional, - has_bias=True): - """Initialize lstm weight.""" - num_directions = 1 - if bidirectional: - num_directions = 2 - - weight_size = 0 - gate_size = 4 * hidden_size - for layer in range(num_layers): - for _ in range(num_directions): - input_layer_size = input_size if layer == 0 else hidden_size * num_directions - weight_size += gate_size * input_layer_size - weight_size += gate_size * hidden_size - if has_bias: - weight_size += 2 * gate_size - - stdv = 1 / math.sqrt(hidden_size) - w_np = np.random.uniform(-stdv, stdv, (weight_size, 1, 1)).astype(np.float32) - w = Parameter(initializer(Tensor(w_np), [weight_size, 1, 1]), name='weight') - - return w - - # Initialize short-term memory (h) and long-term memory (c) to 0 def lstm_default_state(batch_size, hidden_size, num_layers, bidirectional): """init default input.""" @@ -60,19 +29,15 @@ def lstm_default_state(batch_size, hidden_size, num_layers, bidirectional): if context.get_context("device_target") == "CPU": h_list = [] c_list = [] - for i in range(num_layers): - hi = Parameter(initializer( - Tensor(np.zeros((num_directions, batch_size, hidden_size)).astype(np.float32)), - [num_directions, batch_size, hidden_size] - ), name='h' + str(i)) + i = 0 + while i < num_layers: + hi = Tensor(np.zeros((num_directions, batch_size, hidden_size)).astype(np.float32)) h_list.append(hi) - ci = Parameter(initializer( - Tensor(np.zeros((num_directions, batch_size, hidden_size)).astype(np.float32)), - [num_directions, batch_size, hidden_size] - ), name='c' + str(i)) + ci = Tensor(np.zeros((num_directions, batch_size, hidden_size)).astype(np.float32)) c_list.append(ci) - h = ParameterTuple(tuple(h_list)) - c = ParameterTuple(tuple(c_list)) + i = i + 1 + h = tuple(h_list) + c = tuple(c_list) return h, c h = Tensor( @@ -108,12 +73,7 @@ class SentimentNet(nn.Cell): has_bias=True, bidirectional=bidirectional, dropout=0.0) - w_init = init_lstm_weight( - embed_size, - num_hiddens, - num_layers, - bidirectional) - self.encoder.weight = w_init + self.h, self.c = lstm_default_state(batch_size, num_hiddens, num_layers, bidirectional) self.concat = P.Concat(1) @@ -128,6 +88,6 @@ class SentimentNet(nn.Cell): embeddings = self.trans(embeddings, self.perm) output, _ = self.encoder(embeddings, (self.h, self.c)) # states[i] size(64,200) -> encoding.size(64,400) - encoding = self.concat((output[0], output[1])) + encoding = self.concat((output[0], output[-1])) outputs = self.decoder(encoding) return outputs diff --git a/example/lstm_aclImdb/train.py b/model_zoo/lstm/train.py similarity index 87% rename from example/lstm_aclImdb/train.py rename to model_zoo/lstm/train.py index 08bea7c63d..732655f1de 100644 --- a/example/lstm_aclImdb/train.py +++ b/model_zoo/lstm/train.py @@ -21,13 +21,14 @@ import os import numpy as np -from config import lstm_cfg as cfg -from dataset import convert_to_mindrecord -from dataset import create_dataset +from src.config import lstm_cfg as cfg +from src.dataset import convert_to_mindrecord +from src.dataset import lstm_create_dataset from mindspore import Tensor, nn, Model, context from mindspore.model_zoo.lstm import SentimentNet from mindspore.nn import Accuracy from mindspore.train.callback import LossMonitor, CheckpointConfig, ModelCheckpoint, TimeMonitor +from mindspore.train.serialization import load_param_into_net, load_checkpoint if __name__ == '__main__': parser = argparse.ArgumentParser(description='MindSpore LSTM Example') @@ -41,6 +42,8 @@ if __name__ == '__main__': help='path where the pre-process data is stored.') parser.add_argument('--ckpt_path', type=str, default="./", help='the path to save the checkpoint file.') + parser.add_argument('--pre_trained', type=str, default=None, + help='the pretrained checkpoint file path.') parser.add_argument('--device_target', type=str, default="GPU", choices=['GPU', 'CPU'], help='the target device to run, support "GPU", "CPU". Default: "GPU".') args = parser.parse_args() @@ -63,6 +66,9 @@ if __name__ == '__main__': num_classes=cfg.num_classes, weight=Tensor(embedding_table), batch_size=cfg.batch_size) + # pre_trained + if args.pre_trained: + load_param_into_net(network, load_checkpoint(args.pre_trained)) loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum) @@ -71,7 +77,7 @@ if __name__ == '__main__': model = Model(network, loss, opt, {'acc': Accuracy()}) print("============== Starting Training ==============") - ds_train = create_dataset(args.preprocess_path, cfg.batch_size, cfg.num_epochs) + ds_train = lstm_create_dataset(args.preprocess_path, cfg.batch_size, cfg.num_epochs) config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps, keep_checkpoint_max=cfg.keep_checkpoint_max) ckpoint_cb = ModelCheckpoint(prefix="lstm", directory=args.ckpt_path, config=config_ck) diff --git a/model_zoo/mass/README.md b/model_zoo/mass/README.md new file mode 100644 index 0000000000..d6b1c29186 --- /dev/null +++ b/model_zoo/mass/README.md @@ -0,0 +1,592 @@ +![](https://www.mindspore.cn/static/img/logo.a3e472c9.png) + + + +- [MASS: Masked Sequence to Sequence Pre-training for Language Generation Description](#googlenet-description) +- [Model architecture](#model-architecture) +- [Dataset](#dataset) +- [Features](#features) +- [Script description](#script-description) + - [Data Preparation](#Data-Preparation) + - [Tokenization](#Tokenization) + - [Byte Pair Encoding](#Byte-Pair-Encoding) + - [Build Vocabulary](#Build-Vocabulary) + - [Generate Dataset](#Generate-Dataset) + - [News Crawl Corpus](#News-Crawl-Corpus) + - [Gigaword Corpus](#Gigaword-Corpus) + - [Cornell Movie Dialog Corpus](#Cornell-Movie-Dialog-Corpus) + - [Configuration](#Configuration) + - [Training & Evaluation process](#Training-&-Evaluation-process) + - [Weights average](#Weights-average) + - [Learning rate scheduler](#Learning-rate-scheduler) +- [Model description](#model-description) + - [Performance](#performance) + - [Results](#results) + - [Training Performance](#training-performance) + - [Inference Performance](#inference-performance) +- [Environment Requirements](#environment-requirements) + - [Platform](#Platform) + - [Requirements](#Requirements) +- [Get started](#get-started) + - [Pre-training](#Pre-training) + - [Fine-tuning](#Fine-tuning) + - [Inference](#Inference) +- [Description of random situation](#description-of-random-situation) +- [others](#others) +- [ModelZoo Homepage](#modelzoo-homepage) + + + + +# MASS: Masked Sequence to Sequence Pre-training for Language Generation Description + +[MASS: Masked Sequence to Sequence Pre-training for Language Generation](https://www.microsoft.com/en-us/research/uploads/prod/2019/06/MASS-paper-updated-002.pdf) was released by MicroSoft in June 2019. + +BERT(Devlin et al., 2018) have achieved SOTA in natural language understanding area by pre-training the encoder part of Transformer(Vaswani et al., 2017) with masked rich-resource text. Likewise, GPT(Raddford et al., 2018) pre-trains the decoder part of Transformer with masked(encoder inputs are masked) rich-resource text. Both of them build a robust language model by pre-training with masked rich-resource text. + +Inspired by BERT, GPT and other language models, MicroSoft addressed [MASS: Masked Sequence to Sequence Pre-training for Language Generation](https://www.microsoft.com/en-us/research/uploads/prod/2019/06/MASS-paper-updated-002.pdf) which combines BERT's and GPT's idea. MASS has an important parameter k, which controls the masked fragment length. BERT and GPT are specicl case when k equals to 1 and sentence length. + +[Introducing MASS – A pre-training method that outperforms BERT and GPT in sequence to sequence language generation tasks](https://www.microsoft.com/en-us/research/blog/introducing-mass-a-pre-training-method-that-outperforms-bert-and-gpt-in-sequence-to-sequence-language-generation-tasks/) + +[Paper](https://www.microsoft.com/en-us/research/uploads/prod/2019/06/MASS-paper-updated-002.pdf): Song, Kaitao, Xu Tan, Tao Qin, Jianfeng Lu and Tie-Yan Liu. “MASS: Masked Sequence to Sequence Pre-training for Language Generation.” ICML (2019). + + +# Model architecture + +The overall network architecture of MASS is shown below, which is Transformer(Vaswani et al., 2017): + +MASS is consisted of 6-layer encoder and 6-layer decoder with 1024 embedding/hidden size, and 4096 intermediate size between feed forward network which has two full connection layers. + +![Transformer architecture](https://cdn.analyticsvidhya.com/wp-content/uploads/2019/06/Screenshot-from-2019-06-17-19-53-10.png) + + +# Dataset + +Dataset used: +- monolingual English data from News Crawl dataset(WMT 2019) for pre-training. +- Gigaword Corpus(Graff et al., 2003) for Text Summarization. +- Cornell movie dialog corpus(DanescuNiculescu-Mizil & Lee, 2011). + +Details about those dataset could be found in [MASS: Masked Sequence to Sequence Pre-training for Language Generation](https://www.microsoft.com/en-us/research/uploads/prod/2019/06/MASS-paper-updated-002.pdf). + + +# Features + +Mass is designed to jointly pre train encoder and decoder to complete the task of language generation. +First of all, through a sequence to sequence framework, mass only predicts the blocked token, which forces the encoder to understand the meaning of the unshielded token, and encourages the decoder to extract useful information from the encoder. +Secondly, by predicting the continuous token of the decoder, the decoder can build better language modeling ability than only predicting discrete token. +Third, by further shielding the input token of the decoder which is not shielded in the encoder, the decoder is encouraged to extract more useful information from the encoder side, rather than using the rich information in the previous token. + + +# Script description + +MASS script and code structure are as follow: + +```text +├── mass + ├── README.md // Introduction of MASS model. + ├── config + │ ├──config.py // Configuration instance definition. + │ ├──config.json // Configuration file. + ├── src + │ ├──dataset + │ ├──bi_data_loader.py // Dataset loader for fine-tune or inferring. + │ ├──mono_data_loader.py // Dataset loader for pre-training. + │ ├──language_model + │ ├──noise_channel_language_model.p // Noisy channel language model for dataset generation. + │ ├──mass_language_model.py // MASS language model according to MASS paper. + │ ├──loose_masked_language_model.py // MASS language model according to MASS released code. + │ ├──masked_language_model.py // Masked language model according to MASS paper. + │ ├──transformer + │ ├──create_attn_mask.py // Generate mask matrix to remove padding positions. + │ ├──transformer.py // Transformer model architecture. + │ ├──encoder.py // Transformer encoder component. + │ ├──decoder.py // Transformer decoder component. + │ ├──self_attention.py // Self-Attention block component. + │ ├──multi_head_attention.py // Multi-Head Self-Attention component. + │ ├──embedding.py // Embedding component. + │ ├──positional_embedding.py // Positional embedding component. + │ ├──feed_forward_network.py // Feed forward network. + │ ├──residual_conn.py // Residual block. + │ ├──beam_search.py // Beam search decoder for inferring. + │ ├──transformer_for_infer.py // Use Transformer to infer. + │ ├──transformer_for_train.py // Use Transformer to train. + │ ├──utils + │ ├──byte_pair_encoding.py // Apply BPE with subword-nmt. + │ ├──dictionary.py // Dictionary. + │ ├──loss_moniter.py // Callback of monitering loss during training step. + │ ├──lr_scheduler.py // Learning rate scheduler. + │ ├──ppl_score.py // Perplexity score based on N-gram. + │ ├──rouge_score.py // Calculate ROUGE score. + │ ├──load_weights.py // Load weights from a checkpoint or NPZ file. + │ ├──initializer.py // Parameters initializer. + ├── vocab + │ ├──all.bpe.codes // BPE codes table(this file should be generated by user). + │ ├──all_en.dict.bin // Learned vocabulary file(this file should be generated by user). + ├── scripts + │ ├──run.sh // Train & evaluate model script. + │ ├──learn_subword.sh // Learn BPE codes. + │ ├──stop_training.sh // Stop training. + ├── requirements.txt // Requirements of third party package. + ├── train.py // Train API entry. + ├── eval.py // Infer API entry. + ├── tokenize_corpus.py // Corpus tokenization. + ├── apply_bpe_encoding.py // Applying bpe encoding. + ├── weights_average.py // Average multi model checkpoints to NPZ format. + ├── news_crawl.py // Create News Crawl dataset for pre-training. + ├── gigaword.py // Create Gigaword Corpus. + ├── cornell_dialog.py // Create Cornell Movie Dialog dataset for conversation response. + +``` + + +## Data Preparation + +The data preparation of a natural language processing task contains data cleaning, tokenization, encoding and vocabulary generation steps. + +In our experiments, using [Byte Pair Encoding(BPE)](https://arxiv.org/abs/1508.07909) could reduce size of vocabulary, and relieve the OOV influence effectively. + +Vocabulary could be created using `src/utils/dictionary.py` with text dictionary which is learnt from BPE. +For more detail about BPE, please refer to [Subword-nmt lib](https://www.cnpython.com/pypi/subword-nmt) or [paper](https://arxiv.org/abs/1508.07909). + +In our experiments, vocabulary was learned based on 1.9M sentences from News Crawl Dataset, size of vocabulary is 45755. + +Here, we have a brief introduction of data preparation scripts. + + +### Tokenization +Using `tokenize_corpus.py` could tokenize corpus whose text files are in format of `.txt`. + +Major parameters in `tokenize_corpus.py`: + +```bash +--corpus_folder: Corpus folder path, if multi-folders are provided, use ',' split folders. +--output_folder: Output folder path. +--tokenizer: Tokenizer to be used, nltk or jieba, if nltk is not installed fully, use jieba instead. +--pool_size: Processes pool size. +``` + +Sample code: +```bash +python tokenize_corpus.py --corpus_folder /{path}/corpus --output_folder /{path}/tokenized_corpus --tokenizer {nltk|jieba} --pool_size 16 +``` + + +### Byte Pair Encoding +After tokenization, BPE is applied to tokenized corpus with provided `all.bpe.codes`. + +Apply BPE script can be found in `apply_bpe_encoding.py`. + +Major parameters in `apply_bpe_encoding.py`: + +```bash +--codes: BPE codes file. +--src_folder: Corpus folders. +--output_folder: Output files folder. +--prefix: Prefix of text file in `src_folder`. +--vocab_path: Generated vocabulary output path. +--threshold: Filter out words that frequency is lower than threshold. +--processes: Size of process pool (to accelerate). Default: 2. +``` + +Sample code: +```bash +python tokenize_corpus.py --codes /{path}/all.bpe.codes \ + --src_folder /{path}/tokenized_corpus \ + --output_folder /{path}/tokenized_corpus/bpe \ + --prefix tokenized \ + --vocab_path /{path}/vocab_en.dict.bin + --processes 32 +``` + + +### Build Vocabulary +Support that you want to create a new vocabulary, there are two options: +1. Learn BPE codes from scratch, and create vocabulary with multi vocabulary files from `subword-nmt`. +2. Create from an existing vocabulary file which lines in the format of `word frequency`. +3. *Optional*, Create a small vocabulary based on `vocab/all_en.dict.bin` with method of `shink` from `src/utils/dictionary.py`. +4. Persistent vocabulary to `vocab` folder with method `persistence()`. + +Major interface of `src/utils/dictionary.py` are as follow: + +1. `shrink(self, threshold=50)`: Shrink the size of vocabulary by filter out words frequency is lower than threshold. It returns a new vocabulary. +2. `load_from_text(cls, filepaths: List[str])`: Load existed text vocabulary which lines in the format of `word frequency`. +3. `load_from_persisted_dict(cls, filepath)`: Load from a persisted binary vocabulary which was saved by calling `persistence()` method. +4. `persistence(self, path)`: Save vocabulary object to binary file. + +Sample code: +```python +from src.utils import Dictionary + +vocabulary = Dictionary.load_from_persisted_dict("vocab/all_en.dict.bin") +tokens = [1, 2, 3, 4, 5] +# Convert ids to symbols. +print([vocabulary[t] for t in tokens]) + +sentence = ["Hello", "world"] +# Convert symbols to ids. +print([vocabulary.index[s] for s in sentence]) +``` + +For more detail, please refer to the source file. + + +### Generate Dataset +As mentioned above, three corpus are used in MASS mode, dataset generation scripts for them are provided. + +#### News Crawl Corpus +Script can be found in `news_crawl.py`. + +Major parameters in `news_crawl.py`: + +```bash +Note that please provide `--existed_vocab` or `--dict_folder` at least one. +A new vocabulary would be created in `output_folder` when pass `--dict_folder`. + +--src_folder: Corpus folders. +--existed_vocab: Optional, persisted vocabulary file. +--mask_ratio: Ratio of mask. +--output_folder: Output dataset files folder path. +--max_len: Maximum sentence length. If a sentence longer than `max_len`, then drop it. +--suffix: Optional, suffix of generated dataset files. +--processes: Optional, size of process pool (to accelerate). Default: 2. +``` + +Sample code: + +```bash +python news_crawl.py --src_folder /{path}/news_crawl \ + --existed_vocab /{path}/mass/vocab/all_en.dict.bin \ + --mask_ratio 0.5 \ + --output_folder /{path}/news_crawl_dataset \ + --max_len 32 \ + --processes 32 +``` + + +#### Gigaword Corpus +Script can be found in `gigaword.py`. + +Major parameters in `gigaword.py`: + +```bash +--train_src: Train source file path. +--train_ref: Train reference file path. +--test_src: Test source file path. +--test_ref: Test reference file path. +--existed_vocab: Persisted vocabulary file. +--output_folder: Output dataset files folder path. +--noise_prob: Optional, add noise prob. Default: 0. +--max_len: Optional, maximum sentence length. If a sentence longer than `max_len`, then drop it. Default: 64. +--format: Optional, dataset format, "mindrecord" or "tfrecord". Default: "tfrecord". +``` + +Sample code: + +```bash +python gigaword.py --train_src /{path}/gigaword/train_src.txt \ + --train_ref /{path}/gigaword/train_ref.txt \ + --test_src /{path}/gigaword/test_src.txt \ + --test_ref /{path}/gigaword/test_ref.txt \ + --existed_vocab /{path}/mass/vocab/all_en.dict.bin \ + --noise_prob 0.1 \ + --output_folder /{path}/gigaword_dataset \ + --max_len 64 +``` + + +#### Cornell Movie Dialog Corpus +Script can be found in `cornell_dialog.py`. + +Major parameters in `cornell_dialog.py`: + +```bash +--src_folder: Corpus folders. +--existed_vocab: Persisted vocabulary file. +--train_prefix: Train source and target file prefix. Default: train. +--test_prefix: Test source and target file prefix. Default: test. +--output_folder: Output dataset files folder path. +--max_len: Maximum sentence length. If a sentence longer than `max_len`, then drop it. +--valid_prefix: Optional, Valid source and target file prefix. Default: valid. +``` + +Sample code: + +```bash +python cornell_dialog.py --src_folder /{path}/cornell_dialog \ + --existed_vocab /{path}/mass/vocab/all_en.dict.bin \ + --train_prefix train \ + --test_prefix test \ + --noise_prob 0.1 \ + --output_folder /{path}/cornell_dialog_dataset \ + --max_len 64 +``` + + +## Configuration +Json file under the path `config/` is the template configuration file. +Almost all of the options and arguments needed could be assigned conveniently, including the training platform, configurations of dataset and model, arguments of optimizer etc. Optional features such as loss scale and checkpoint are also available by setting the options correspondingly. +For more detailed information about the attributes, refer to the file `config/config.py`. + +## Training & Evaluation process +For training a model, the shell script `run.sh` is all you need. In this scripts, the environment variable is set and the training script `train.py` under `mass` is executed. +You may start a task training with single device or multiple devices by assigning the options and run the command in bash: +```bash +sh run.sh [--options] +``` + +The usage is shown as bellow: +```text +Usage: run.sh [-h, --help] [-t, --task ] [-n, --device_num ] + [-i, --device_id ] [-j, --hccl_json ] + [-c, --config ] [-o, --output ] + [-v, --vocab ] + +options: + -h, --help show usage + -t, --task select task: CHAR, 't' for train and 'i' for inference". + -n, --device_num device number used for training: N, default is 1. + -i, --device_id device id used for training with single device: N, 0<=N<=7, default is 0. + -j, --hccl_json rank table file used for training with multiple devices: FILE. + -c, --config configuration file as shown in the path 'mass/config': FILE. + -o, --output assign output file of inference: FILE. + -v, --vocab set the vocabulary" +``` +Notes: Be sure to assign the hccl_json file while running a distributed-training. + +The command followed shows a example for training with 2 devices. +```bash +sh run.sh --task t --device_num 2 --hccl_json /{path}/rank_table.json --config /{path}/config.json +``` +ps. Discontinuous device id is not supported in `run.sh` at present, device id in `rank_table.json` must start from 0. + + +If use a single chip, it would be like this: +```bash +sh run.sh --task t --device_num 1 --device_id 0 --config /{path}/config.json +``` + + +## Weights average + +```python +python weights_average.py --input_files your_checkpoint_list --output_file model.npz +``` + +The input_files is a list of you checkpoints file. To use model.npz as the weights, add its path in config.json at "existed_ckpt". +```json +{ + ... + "checkpoint_options": { + "existed_ckpt": "/xxx/xxx/model.npz", + "save_ckpt_steps": 1000, + ... + }, + ... +} +``` + + +## Learning rate scheduler + +Two learning rate scheduler are provided in our model: + +1. [Polynomial decay scheduler](https://towardsdatascience.com/learning-rate-schedules-and-adaptive-learning-rate-methods-for-deep-learning-2c8f433990d1). +2. [Inverse square root scheduler](https://ece.uwaterloo.ca/~dwharder/aads/Algorithms/Inverse_square_root/). + +LR scheduler could be config in `config/config.json`. + +For Polynomial decay scheduler, config could be like: +```json +{ + ... + "learn_rate_config": { + "optimizer": "adam", + "lr": 1e-4, + "lr_scheduler": "poly", + "poly_lr_scheduler_power": 0.5, + "decay_steps": 10000, + "warmup_steps": 2000, + "min_lr": 1e-6 + }, + ... +} +``` + +For Inverse square root scheduler, config could be like: +```json +{ + ... + "learn_rate_config": { + "optimizer": "adam", + "lr": 1e-4, + "lr_scheduler": "isr", + "decay_start_step": 12000, + "warmup_steps": 2000, + "min_lr": 1e-6 + }, + ... +} +``` + +More detail about LR scheduler could be found in `src/utils/lr_scheduler.py`. + + +# Model description + +The MASS network is implemented by Transformer, which has multi-encoder layers and multi-decoder layers. +For pre-training, we use the Adam optimizer and loss-scale to get the pre-trained model. +During fine-turning, we fine-tune this pre-trained model with different dataset according to different tasks. +During testing, we use the fine-turned model to predict the result, and adopt a beam search algorithm to +get the most possible prediction results. + + +![MASS framework](https://www.microsoft.com/en-us/research/uploads/prod/2019/06/MASS-Fig-2.png) + + +## Performance + +### Results + +#### Fine-Tuning on Text Summarization +The comparisons between MASS and two other pre-training methods in terms of ROUGE score on the text summarization task +with 3.8M training data are as follows: + +| Method | RG-1(F) | RG-2(F) | RG-L(F) | +|:---------------|:--------------|:-------------|:-------------| +| MASS | Ongoing | Ongoing | Ongoing | + +#### Fine-Tuning on Conversational ResponseGeneration +The comparisons between MASS and other baseline methods in terms of PPL on Cornell Movie Dialog corpus are as follows: + +| Method | Data = 10K | Data = 110K | +|--------------------|------------------|-----------------| +| MASS | Ongoing | Ongoing | + +#### Training Performance + +| Parameters | Masked Sequence to Sequence Pre-training for Language Generation | +|:---------------------------|:--------------------------------------------------------------------------| +| Model Version | v1 | +| Resource | Ascend 910, cpu 2.60GHz, 56cores;memory, 314G | +| uploaded Date | 05/24/2020 | +| MindSpore Version | 0.2.0 | +| Dataset | News Crawl 2007-2017 English monolingual corpus, Gigaword corpus, Cornell Movie Dialog corpus | +| Training Parameters | Epoch=50, steps=XXX, batch_size=192, lr=1e-4 | +| Optimizer | Adam | +| Loss Function | Label smoothed cross-entropy criterion | +| outputs | Sentence and probability | +| Loss | Lower than 2 | +| Accuracy | For conversation response, ppl=23.52, for text summarization, RG-1=29.79. | +| Speed | 611.45 sentences/s | +| Total time | --/-- | +| Params (M) | 44.6M | +| Checkpoint for Fine tuning | ---Mb, --, [A link]() | +| Model for inference | ---Mb, --, [A link]() | +| Scripts | [A link]() | + + +#### Inference Performance + +| Parameters | Masked Sequence to Sequence Pre-training for Language Generation | +|:---------------------------|:-----------------------------------------------------------| +| Model Version | V1 | +| Resource | Huawei 910 | +| uploaded Date | 05/24/2020 | +| MindSpore Version | 0.2.0 | +| Dataset | Gigaword corpus, Cornell Movie Dialog corpus | +| batch_size | --- | +| outputs | Sentence and probability | +| Accuracy | ppl=23.52 for conversation response, RG-1=29.79 for text summarization. | +| Speed | ---- sentences/s | +| Total time | --/-- | +| Model for inference | ---Mb, --, [A link]() | + + +# Environment Requirements + +## Platform + +- Hardware(Ascend) + - Prepare hardware environment with Ascend processor. If you want to try Ascend, please send the [application form](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/file/other/Ascend%20Model%20Zoo%E4%BD%93%E9%AA%8C%E8%B5%84%E6%BA%90%E7%94%B3%E8%AF%B7%E8%A1%A8.docx) to ascend@huawei.com. Once approved, you could get the resources for trial. +- Framework + - [MindSpore](http://10.90.67.50/mindspore/archive/20200506/OpenSource/me_vm_x86/) +- For more information, please check the resources below: + - [MindSpore tutorials](https://www.mindspore.cn/tutorial/zh-CN/master/index.html) + - [MindSpore API](https://www.mindspore.cn/api/zh-CN/master/index.html) + +## Requirements + +```txt +nltk +numpy +subword-nmt +rouge +``` + +https://www.mindspore.cn/tutorial/zh-CN/master/advanced_use/network_migration.html + + +# Get started +MASS pre-trains a sequence to sequence model by predicting the masked fragments in an input sequence. After this, downstream tasks including text summarization and conversation response are candidated for fine-tuning the model and for inference. +Here we provide a practice example to demonstrate the basic usage of MASS for pre-training, fine-tuning a model, and the inference process. The overall process is as follows: +1. Download and process the dataset. +2. Modify the `config.json` to config the network. +3. Run a task for pre-training and fine-tuning. +4. Perform inference and validation. + +## Pre-training +For pre-training a model, config the options in `config.json` firstly: +- Assign the `pre_train_dataset` under `dataset_config` node to the dataset path. +- Choose the optimizer('momentum/adam/lamb' is available). +- Assign the 'ckpt_prefix' and 'ckpt_path' under `checkpoint_path` to save the model files. +- Set other arguments including dataset configurations and network configurations. +- If you have a trained model already, assign the `existed_ckpt` to the checkpoint file. + +Run the shell script `run.sh` as followed: + +```bash +sh run.sh -t t -n 1 -i 1 -c /mass/config/config.json +``` +Get the log and output files under the path `./run_mass_*/`, and the model file under the path assigned in the `config/config.json` file. + +## Fine-tuning +For fine-tuning a model, config the options in `config.json` firstly: +- Assign the `fine_tune_dataset` under `dataset_config` node to the dataset path. +- Assign the `existed_ckpt` under `checkpoint_path` node to the existed model file generated by pre-training. +- Choose the optimizer('momentum/adam/lamb' is available). +- Assign the `ckpt_prefix` and `ckpt_path` under `checkpoint_path` node to save the model files. +- Set other arguments including dataset configurations and network configurations. + +Run the shell script `run.sh` as followed: +```bash +sh run.sh -t t -n 1 -i 1 -c config/config.json +``` +Get the log and output files under the path `./run_mass_*/`, and the model file under the path assigned in the `config/config.json` file. + +## Inference +If you need to use the trained model to perform inference on multiple hardware platforms, such as GPU, Ascend 910 or Ascend 310, you can refer to this [Link](https://www.mindspore.cn/tutorial/zh-CN/master/advanced_use/network_migration.html). +For inference, config the options in `config.json` firstly: +- Assign the `test_dataset` under `dataset_config` node to the dataset path. +- Assign the `existed_ckpt` under `checkpoint_path` node to the model file produced by fine-tuning. +- Choose the optimizer('momentum/adam/lamb' is available). +- Assign the `ckpt_prefix` and `ckpt_path` under `checkpoint_path` node to save the model files. +- Set other arguments including dataset configurations and network configurations. + +Run the shell script `run.sh` as followed: + +```bash +sh run.sh -t i -n 1 -i 1 -c config/config.json -o {outputfile} +``` + +# Description of random situation + +MASS model contains dropout operations, if you want to disable dropout, please set related dropout_rate to 0 in `config/config.json`. + + +# others +The model has been validated on Ascend environment, not validated on CPU and GPU. + + +# ModelZoo Homepage + [Link](https://gitee.com/mindspore/mindspore/tree/master/mindspore/model_zoo) diff --git a/model_zoo/mass/__init__.py b/model_zoo/mass/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/model_zoo/mass/apply_bpe_encoding.py b/model_zoo/mass/apply_bpe_encoding.py new file mode 100644 index 0000000000..24341a62ac --- /dev/null +++ b/model_zoo/mass/apply_bpe_encoding.py @@ -0,0 +1,84 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Apply bpe script.""" +import os +import argparse +from multiprocessing import Pool, cpu_count + +from src.utils import Dictionary +from src.utils import bpe_encode + +parser = argparse.ArgumentParser(description='Apply BPE.') +parser.add_argument("--codes", type=str, default="", required=True, + help="bpe codes path.") +parser.add_argument("--src_folder", type=str, default="", required=True, + help="raw corpus folder.") +parser.add_argument("--output_folder", type=str, default="", required=True, + help="encoded corpus output path.") +parser.add_argument("--prefix", type=str, default="", required=False, + help="Prefix of text file.") +parser.add_argument("--vocab_path", type=str, default="", required=True, + help="Generated vocabulary output path.") +parser.add_argument("--threshold", type=int, default=None, required=False, + help="Filter out words that frequency is lower than threshold.") +parser.add_argument("--processes", type=int, default=2, required=False, + help="Number of processes to use.") + +if __name__ == '__main__': + args, _ = parser.parse_known_args() + + if not (args.codes and args.src_folder and args.output_folder): + raise ValueError("Please enter required params.") + + source_folder = args.src_folder + output_folder = args.output_folder + codes = args.codes + + if not os.path.exists(codes): + raise FileNotFoundError("`--codes` is not existed.") + if not os.path.exists(source_folder) or not os.path.isdir(source_folder): + raise ValueError("`--src_folder` must be a dir and existed.") + if not os.path.exists(output_folder) or not os.path.isdir(output_folder): + raise ValueError("`--output_folder` must be a dir and existed.") + if not isinstance(args.prefix, str) or len(args.prefix) > 128: + raise ValueError("`--prefix` must be a str and len <= 128.") + if not isinstance(args.processes, int): + raise TypeError("`--processes` must be an integer.") + + available_dict = [] + args_groups = [] + for file in os.listdir(source_folder): + if args.prefix and not file.startswith(args.prefix): + continue + if file.endswith(".txt"): + output_path = os.path.join(output_folder, file.replace(".txt", "_bpe.txt")) + dict_path = os.path.join(output_folder, file.replace(".txt", ".dict")) + available_dict.append(dict_path) + args_groups.append((codes, os.path.join(source_folder, file), + output_path, dict_path)) + + kernel_size = 1 if args.processes <= 0 else args.processes + kernel_size = min(kernel_size, cpu_count()) + pool = Pool(kernel_size) + for arg in args_groups: + pool.apply_async(bpe_encode, args=arg) + pool.close() + pool.join() + + vocab = Dictionary.load_from_text(available_dict) + if args.threshold is not None: + vocab = vocab.shrink(args.threshold) + vocab.persistence(args.vocab_path) + print(f" | Vocabulary Size: {len(vocab)}") diff --git a/model_zoo/mass/config/__init__.py b/model_zoo/mass/config/__init__.py new file mode 100644 index 0000000000..d5c6589ee7 --- /dev/null +++ b/model_zoo/mass/config/__init__.py @@ -0,0 +1,20 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""MASS model configuration.""" +from .config import TransformerConfig + +__all__ = [ + "TransformerConfig" +] diff --git a/model_zoo/mass/config/config.json b/model_zoo/mass/config/config.json new file mode 100644 index 0000000000..081fb4a72c --- /dev/null +++ b/model_zoo/mass/config/config.json @@ -0,0 +1,54 @@ +{ + "dataset_config": { + "epochs": 20, + "batch_size": 192, + "pre_train_dataset": "", + "fine_tune_dataset": "", + "test_dataset": "", + "valid_dataset": "", + "dataset_sink_mode": false, + "dataset_sink_step": 100 + }, + "model_config": { + "random_seed": 100, + "save_graphs": false, + "seq_length": 64, + "vocab_size": 45744, + "hidden_size": 1024, + "num_hidden_layers": 6, + "num_attention_heads": 8, + "intermediate_size": 4096, + "hidden_act": "relu", + "hidden_dropout_prob": 0.2, + "attention_dropout_prob": 0.2, + "max_position_embeddings": 64, + "initializer_range": 0.02, + "label_smoothing": 0.1, + "beam_width": 4, + "length_penalty_weight": 1.0, + "max_decode_length": 64, + "input_mask_from_dataset": true + }, + "loss_scale_config": { + "init_loss_scale": 65536, + "loss_scale_factor": 2, + "scale_window": 200 + }, + "learn_rate_config": { + "optimizer": "adam", + "lr": 1e-4, + "lr_scheduler": "poly", + "poly_lr_scheduler_power": 0.5, + "decay_steps": 10000, + "decay_start_step": 12000, + "warmup_steps": 4000, + "min_lr": 1e-6 + }, + "checkpoint_options": { + "existed_ckpt": "", + "save_ckpt_steps": 2500, + "keep_ckpt_max": 50, + "ckpt_prefix": "ckpt", + "ckpt_path": "checkpoints" + } +} diff --git a/model_zoo/mass/config/config.py b/model_zoo/mass/config/config.py new file mode 100644 index 0000000000..985f3aa318 --- /dev/null +++ b/model_zoo/mass/config/config.py @@ -0,0 +1,232 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Configuration class for Transformer.""" +import os +import json +import copy +from typing import List + +import mindspore.common.dtype as mstype + + +def _is_dataset_file(file: str): + return "tfrecord" in file.lower() or "mindrecord" in file.lower() + + +def _get_files_from_dir(folder: str): + _files = [] + for file in os.listdir(folder): + if _is_dataset_file(file): + _files.append(os.path.join(folder, file)) + return _files + + +def get_source_list(folder: str) -> List: + """ + Get file list from a folder. + + Returns: + list, file list. + """ + _list = [] + if not folder: + return _list + + if os.path.isdir(folder): + _list = _get_files_from_dir(folder) + else: + if _is_dataset_file(folder): + _list.append(folder) + return _list + + +PARAM_NODES = {"dataset_config", + "model_config", + "loss_scale_config", + "learn_rate_config", + "checkpoint_options"} + + +class TransformerConfig: + """ + Configuration for `Transformer`. + + Args: + random_seed (int): Random seed. + batch_size (int): Batch size of input dataset. + epochs (int): Epoch number. + dataset_sink_mode (bool): Whether enable dataset sink mode. + dataset_sink_step (int): Dataset sink step. + lr_scheduler (str): Whether use lr_scheduler, only support "ISR" now. + lr (float): Initial learning rate. + min_lr (float): Minimum learning rate. + decay_start_step (int): Step to decay. + warmup_steps (int): Warm up steps. + dataset_schema (str): Path of dataset schema file. + pre_train_dataset (str): Path of pre-training dataset file or folder. + fine_tune_dataset (str): Path of fine-tune dataset file or folder. + test_dataset (str): Path of test dataset file or folder. + valid_dataset (str): Path of validation dataset file or folder. + ckpt_path (str): Checkpoints save path. + save_ckpt_steps (int): Interval of saving ckpt. + ckpt_prefix (str): Prefix of ckpt file. + keep_ckpt_max (int): Max ckpt files number. + seq_length (int): Length of input sequence. Default: 64. + vocab_size (int): The shape of each embedding vector. Default: 46192. + hidden_size (int): Size of embedding, attention, dim. Default: 512. + num_hidden_layers (int): Encoder, Decoder layers. + num_attention_heads (int): Number of hidden layers in the Transformer encoder/decoder + cell. Default: 6. + intermediate_size (int): Size of intermediate layer in the Transformer + encoder/decoder cell. Default: 4096. + hidden_act (str): Activation function used in the Transformer encoder/decoder + cell. Default: "relu". + init_loss_scale (int): Initialized loss scale. + loss_scale_factor (int): Loss scale factor. + scale_window (int): Window size of loss scale. + beam_width (int): Beam width for beam search in inferring. Default: 4. + length_penalty_weight (float): Penalty for sentence length. Default: 1.0. + label_smoothing (float): Label smoothing setting. Default: 0.1. + input_mask_from_dataset (bool): Specifies whether to use the input mask that loaded from + dataset. Default: True. + save_graphs (bool): Whether to save graphs, please set to True if mindinsight + is wanted. + dtype (mstype): Data type of the input. Default: mstype.float32. + max_decode_length (int): Max decode length for inferring. Default: 64. + hidden_dropout_prob (float): The dropout probability for hidden outputs. Default: 0.1. + attention_dropout_prob (float): The dropout probability for + Multi-head Self-Attention. Default: 0.1. + max_position_embeddings (int): Maximum length of sequences used in this + model. Default: 512. + initializer_range (float): Initialization value of TruncatedNormal. Default: 0.02. + """ + + def __init__(self, + random_seed=74, + batch_size=64, epochs=1, + dataset_sink_mode=True, dataset_sink_step=1, + lr_scheduler="", optimizer="adam", + lr=1e-4, min_lr=1e-6, + decay_steps=10000, poly_lr_scheduler_power=1, + decay_start_step=-1, warmup_steps=2000, + pre_train_dataset: str = None, + fine_tune_dataset: str = None, + test_dataset: str = None, + valid_dataset: str = None, + ckpt_path: str = None, + save_ckpt_steps=2000, + ckpt_prefix="CKPT", + existed_ckpt="", + keep_ckpt_max=20, + seq_length=128, + vocab_size=46192, + hidden_size=512, + num_hidden_layers=6, + num_attention_heads=8, + intermediate_size=4096, + hidden_act="relu", + hidden_dropout_prob=0.1, + attention_dropout_prob=0.1, + max_position_embeddings=64, + initializer_range=0.02, + init_loss_scale=2 ** 10, + loss_scale_factor=2, scale_window=2000, + beam_width=5, + length_penalty_weight=1.0, + label_smoothing=0.1, + input_mask_from_dataset=True, + save_graphs=False, + dtype=mstype.float32, + max_decode_length=64): + + self.save_graphs = save_graphs + self.random_seed = random_seed + self.pre_train_dataset = get_source_list(pre_train_dataset) # type: List[str] + self.fine_tune_dataset = get_source_list(fine_tune_dataset) # type: List[str] + self.valid_dataset = get_source_list(valid_dataset) # type: List[str] + self.test_dataset = get_source_list(test_dataset) # type: List[str] + + if not isinstance(epochs, int) and epochs < 0: + raise ValueError("`epoch` must be type of int.") + + self.epochs = epochs + self.dataset_sink_mode = dataset_sink_mode + self.dataset_sink_step = dataset_sink_step + + self.ckpt_path = ckpt_path + self.keep_ckpt_max = keep_ckpt_max + self.save_ckpt_steps = save_ckpt_steps + self.ckpt_prefix = ckpt_prefix + self.existed_ckpt = existed_ckpt + + self.batch_size = batch_size + self.seq_length = seq_length + self.vocab_size = vocab_size + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.hidden_act = hidden_act + self.intermediate_size = intermediate_size + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_dropout_prob = attention_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.initializer_range = initializer_range + self.label_smoothing = label_smoothing + + self.beam_width = beam_width + self.length_penalty_weight = length_penalty_weight + self.max_decode_length = max_decode_length + self.input_mask_from_dataset = input_mask_from_dataset + self.compute_type = mstype.float16 + self.dtype = dtype + + self.scale_window = scale_window + self.loss_scale_factor = loss_scale_factor + self.init_loss_scale = init_loss_scale + + self.optimizer = optimizer + self.lr = lr + self.lr_scheduler = lr_scheduler + self.min_lr = min_lr + self.poly_lr_scheduler_power = poly_lr_scheduler_power + self.decay_steps = decay_steps + self.decay_start_step = decay_start_step + self.warmup_steps = warmup_steps + + self.train_url = "" + + @classmethod + def from_dict(cls, json_object: dict): + """Constructs a `TransformerConfig` from a Python dictionary of parameters.""" + _params = {} + for node in PARAM_NODES: + for key in json_object[node]: + _params[key] = json_object[node][key] + return cls(**_params) + + @classmethod + def from_json_file(cls, json_file): + """Constructs a `TransformerConfig` from a json file of parameters.""" + with open(json_file, "r") as reader: + return cls.from_dict(json.load(reader)) + + def to_dict(self): + """Serializes this instance to a Python dictionary.""" + output = copy.deepcopy(self.__dict__) + return output + + def to_json_string(self): + """Serializes this instance to a JSON string.""" + return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n" diff --git a/model_zoo/mass/cornell_dialog.py b/model_zoo/mass/cornell_dialog.py new file mode 100644 index 0000000000..e2e9e9155f --- /dev/null +++ b/model_zoo/mass/cornell_dialog.py @@ -0,0 +1,110 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Generate Cornell Movie Dialog dataset.""" +import os +import argparse +from src.dataset import BiLingualDataLoader +from src.language_model import NoiseChannelLanguageModel +from src.utils import Dictionary + +parser = argparse.ArgumentParser(description='Generate Cornell Movie Dialog dataset file.') +parser.add_argument("--src_folder", type=str, default="", required=True, + help="Raw corpus folder.") +parser.add_argument("--existed_vocab", type=str, default="", required=True, + help="Existed vocabulary.") +parser.add_argument("--train_prefix", type=str, default="train", required=False, + help="Prefix of train file.") +parser.add_argument("--test_prefix", type=str, default="test", required=False, + help="Prefix of test file.") +parser.add_argument("--valid_prefix", type=str, default=None, required=False, + help="Prefix of valid file.") +parser.add_argument("--noise_prob", type=float, default=0., required=False, + help="Add noise prob.") +parser.add_argument("--max_len", type=int, default=32, required=False, + help="Max length of sentence.") +parser.add_argument("--output_folder", type=str, default="", required=True, + help="Dataset output path.") + +if __name__ == '__main__': + args, _ = parser.parse_known_args() + + dicts = [] + train_src_file = "" + train_tgt_file = "" + test_src_file = "" + test_tgt_file = "" + valid_src_file = "" + valid_tgt_file = "" + for file in os.listdir(args.src_folder): + if file.startswith(args.train_prefix) and "src" in file and file.endswith(".txt"): + train_src_file = os.path.join(args.src_folder, file) + elif file.startswith(args.train_prefix) and "tgt" in file and file.endswith(".txt"): + train_tgt_file = os.path.join(args.src_folder, file) + elif file.startswith(args.test_prefix) and "src" in file and file.endswith(".txt"): + test_src_file = os.path.join(args.src_folder, file) + elif file.startswith(args.test_prefix) and "tgt" in file and file.endswith(".txt"): + test_tgt_file = os.path.join(args.src_folder, file) + elif args.valid_prefix and file.startswith(args.valid_prefix) and "src" in file and file.endswith(".txt"): + valid_src_file = os.path.join(args.src_folder, file) + elif args.valid_prefix and file.startswith(args.valid_prefix) and "tgt" in file and file.endswith(".txt"): + valid_tgt_file = os.path.join(args.src_folder, file) + else: + continue + + vocab = Dictionary.load_from_persisted_dict(args.existed_vocab) + + if train_src_file and train_tgt_file: + BiLingualDataLoader( + src_filepath=train_src_file, + tgt_filepath=train_tgt_file, + src_dict=vocab, tgt_dict=vocab, + src_lang="en", tgt_lang="en", + language_model=NoiseChannelLanguageModel(add_noise_prob=args.noise_prob), + max_sen_len=args.max_len + ).write_to_tfrecord( + path=os.path.join( + args.output_folder, "train_cornell_dialog.tfrecord" + ) + ) + + if test_src_file and test_tgt_file: + BiLingualDataLoader( + src_filepath=test_src_file, + tgt_filepath=test_tgt_file, + src_dict=vocab, tgt_dict=vocab, + src_lang="en", tgt_lang="en", + language_model=NoiseChannelLanguageModel(add_noise_prob=0.), + max_sen_len=args.max_len + ).write_to_tfrecord( + path=os.path.join( + args.output_folder, "test_cornell_dialog.tfrecord" + ) + ) + + if args.valid_prefix: + BiLingualDataLoader( + src_filepath=os.path.join(args.src_folder, valid_src_file), + tgt_filepath=os.path.join(args.src_folder, valid_tgt_file), + src_dict=vocab, tgt_dict=vocab, + src_lang="en", tgt_lang="en", + language_model=NoiseChannelLanguageModel(add_noise_prob=0.), + max_sen_len=args.max_len + ).write_to_tfrecord( + path=os.path.join( + args.output_folder, "valid_cornell_dialog.tfrecord" + ) + ) + + print(f" | Vocabulary size: {vocab.size}.") diff --git a/model_zoo/mass/eval.py b/model_zoo/mass/eval.py new file mode 100644 index 0000000000..4da63a7333 --- /dev/null +++ b/model_zoo/mass/eval.py @@ -0,0 +1,75 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Evaluation api.""" +import argparse +import pickle +import numpy as np + +from mindspore.common import dtype as mstype + +from config import TransformerConfig +from src.transformer import infer +from src.utils import ngram_ppl +from src.utils import Dictionary +from src.utils import rouge + +parser = argparse.ArgumentParser(description='Evaluation MASS.') +parser.add_argument("--config", type=str, required=True, + help="Model config json file path.") +parser.add_argument("--vocab", type=str, required=True, + help="Vocabulary to use.") +parser.add_argument("--output", type=str, required=True, + help="Result file path.") + + +def get_config(config): + config = TransformerConfig.from_json_file(config) + config.compute_type = mstype.float16 + config.dtype = mstype.float32 + return config + + +if __name__ == '__main__': + args, _ = parser.parse_known_args() + vocab = Dictionary.load_from_persisted_dict(args.vocab) + _config = get_config(args.config) + result = infer(_config) + with open(args.output, "wb") as f: + pickle.dump(result, f, 1) + + ppl_score = 0. + preds = [] + tgts = [] + _count = 0 + for sample in result: + sentence_prob = np.array(sample['prediction_prob'], dtype=np.float32) + sentence_prob = sentence_prob[:, 1:] + _ppl = [] + for path in sentence_prob: + _ppl.append(ngram_ppl(path, log_softmax=True)) + ppl = np.min(_ppl) + preds.append(' '.join([vocab[t] for t in sample['prediction']])) + tgts.append(' '.join([vocab[t] for t in sample['target']])) + print(f" | source: {' '.join([vocab[t] for t in sample['source']])}") + print(f" | target: {tgts[-1]}") + print(f" | prediction: {preds[-1]}") + print(f" | ppl: {ppl}.") + if np.isinf(ppl): + continue + ppl_score += ppl + _count += 1 + + print(f" | PPL={ppl_score / _count}.") + rouge(preds, tgts) diff --git a/model_zoo/mass/gigaword.py b/model_zoo/mass/gigaword.py new file mode 100644 index 0000000000..f473ddd5ce --- /dev/null +++ b/model_zoo/mass/gigaword.py @@ -0,0 +1,84 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Generate Gigaword dataset.""" +import os +import argparse + +from src.dataset import BiLingualDataLoader +from src.language_model import NoiseChannelLanguageModel +from src.utils import Dictionary + +parser = argparse.ArgumentParser(description='Create Gigaword fine-tune Dataset.') +parser.add_argument("--train_src", type=str, default="", required=False, + help="train dataset source file path.") +parser.add_argument("--train_ref", type=str, default="", required=False, + help="train dataset reference file path.") +parser.add_argument("--test_src", type=str, default="", required=False, + help="test dataset source file path.") +parser.add_argument("--test_ref", type=str, default="", required=False, + help="test dataset reference file path.") +parser.add_argument("--noise_prob", type=float, default=0., required=False, + help="add noise prob.") +parser.add_argument("--existed_vocab", type=str, default="", required=False, + help="existed vocab path.") +parser.add_argument("--max_len", type=int, default=64, required=False, + help="max length of sentences.") +parser.add_argument("--output_folder", type=str, default="", required=True, + help="dataset output path.") +parser.add_argument("--format", type=str, default="tfrecord", required=False, + help="dataset format.") + +if __name__ == '__main__': + args, _ = parser.parse_known_args() + + vocab = Dictionary.load_from_persisted_dict(args.existed_vocab) + + if args.train_src and args.train_ref: + train = BiLingualDataLoader( + src_filepath=args.train_src, + tgt_filepath=args.train_ref, + src_dict=vocab, tgt_dict=vocab, + src_lang="en", tgt_lang="en", + language_model=NoiseChannelLanguageModel(add_noise_prob=args.noise_prob), + max_sen_len=args.max_len + ) + if "tf" in args.format.lower(): + train.write_to_tfrecord( + path=os.path.join(args.output_folder, "gigaword_train_dataset.tfrecord") + ) + else: + train.write_to_mindrecord( + path=os.path.join(args.output_folder, "gigaword_train_dataset.mindrecord") + ) + + if args.test_src and args.test_ref: + test = BiLingualDataLoader( + src_filepath=args.test_src, + tgt_filepath=args.test_ref, + src_dict=vocab, tgt_dict=vocab, + src_lang="en", tgt_lang="en", + language_model=NoiseChannelLanguageModel(add_noise_prob=0), + max_sen_len=args.max_len + ) + if "tf" in args.format.lower(): + test.write_to_tfrecord( + path=os.path.join(args.output_folder, "gigaword_test_dataset.tfrecord") + ) + else: + test.write_to_mindrecord( + path=os.path.join(args.output_folder, "gigaword_test_dataset.mindrecord") + ) + + print(f" | Vocabulary size: {vocab.size}.") diff --git a/model_zoo/mass/news_crawl.py b/model_zoo/mass/news_crawl.py new file mode 100644 index 0000000000..4481846cca --- /dev/null +++ b/model_zoo/mass/news_crawl.py @@ -0,0 +1,58 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Generate News Crawl corpus dataset.""" +import argparse + +from src.utils import Dictionary +from src.utils.preprocess import create_pre_training_dataset + +parser = argparse.ArgumentParser(description='Create News Crawl Pre-Training Dataset.') +parser.add_argument("--src_folder", type=str, default="", required=True, + help="Raw corpus folder.") +parser.add_argument("--existed_vocab", type=str, default="", required=True, + help="Existed vocab path.") +parser.add_argument("--mask_ratio", type=float, default=0.4, required=True, + help="Mask ratio.") +parser.add_argument("--output_folder", type=str, default="", required=True, + help="Dataset output path.") +parser.add_argument("--max_len", type=int, default=32, required=False, + help="Max length of sentences.") +parser.add_argument("--suffix", type=str, default="", required=False, + help="Add suffix to output file.") +parser.add_argument("--processes", type=int, default=2, required=False, + help="Size of processes pool.") + +if __name__ == '__main__': + args, _ = parser.parse_known_args() + if not (args.src_folder and args.output_folder): + raise ValueError("Please enter required params.") + + if not args.existed_vocab: + raise ValueError("`--existed_vocab` is required.") + + vocab = Dictionary.load_from_persisted_dict(args.existed_vocab) + + create_pre_training_dataset( + folder_path=args.src_folder, + output_folder_path=args.output_folder, + vocabulary=vocab, + prefix="news.20", suffix=args.suffix, + mask_ratio=args.mask_ratio, + min_sen_len=10, + max_sen_len=args.max_len, + dataset_type="tfrecord", + cores=args.processes + ) + print(f" | Vocabulary size: {vocab.size}.") diff --git a/model_zoo/mass/requirements.txt b/model_zoo/mass/requirements.txt new file mode 100644 index 0000000000..f70e569a82 --- /dev/null +++ b/model_zoo/mass/requirements.txt @@ -0,0 +1,5 @@ +nltk +jieba +numpy +subword-nmt +files2rouge diff --git a/model_zoo/mass/scripts/__init__.py b/model_zoo/mass/scripts/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/model_zoo/mass/scripts/learn_subword.sh b/model_zoo/mass/scripts/learn_subword.sh new file mode 100644 index 0000000000..05dd516880 --- /dev/null +++ b/model_zoo/mass/scripts/learn_subword.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +src_folder_path=$1 # source text folder path. + +cd $src_folder_path || exit +cat *.txt | subword-nmt learn-bpe -s 46000 -o all.bpe.codes diff --git a/model_zoo/mass/scripts/run.sh b/model_zoo/mass/scripts/run.sh new file mode 100644 index 0000000000..fc9606fcbd --- /dev/null +++ b/model_zoo/mass/scripts/run.sh @@ -0,0 +1,169 @@ +#!/usr/bin/env bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +export DEVICE_ID=0 +export RANK_ID=0 +export RANK_SIZE=1 + +options=`getopt -u -o ht:n:i:j:c:o:v: -l help,task:,device_num:,device_id:,hccl_json:,config:,output:,vocab -- "$@"` +eval set -- "$options" +echo $options + +echo_help() +{ + echo "Usage:" + echo "bash train.sh [-h] [-t t|i] [-n N] [-i N] [-j FILE] [-c FILE] [-o FILE] [-v FILE]" + echo "options:" + echo " -h --help show usage" + echo " -t --task select task, 't' for training and 'i' for inference" + echo " -n --device_num training with N devices" + echo " -i --device_id training with device i" + echo " -j --hccl_json set the rank table file" + echo " -c --config set the configuration file" + echo " -o --output set the output file of inference" + echo " -v --vocab set the vocabulary" +} + +set_hccl_json() +{ + while [ -n "$1" ] + do + if [[ "$1" == "-j" || "$1" == "--hccl_json" ]] + then + export MINDSPORE_HCCL_CONFIG_PATH=$2 #/data/wsc/hccl_2p_01.json + export RANK_TABLE_FILE=$2 #/data/wsc/hccl_2p_01.json + break + fi + shift + done +} +set_device_id() +{ + while [ -n "$1" ] + do + if [[ "$1" == "-i" || "$1" == "--device_id" ]] + then + if [[ $2 -ge 0 && $2 -le 7 ]] + then + export DEVICE_ID=$2 + fi + break + fi + shift + done +} + +while [ -n "$1" ] +do + case "$1" in + -h|--help) + echo_help + shift + ;; + -t|--task) + echo "task:" + if [ "$2" == "t" ] + then + task=train + elif [ "$2" == "i" ] + then + task=infer + fi + shift 2 + ;; + -n|--device_num) + echo "device_num" + if [ $2 -eq 1 ] + then + set_device_id $options + elif [ $2 -gt 1 ] + then + export HCCL_FLAG=1 + export DEPLOY_MODE=0 + + export RANK_SIZE=$2 + set_hccl_json $options + fi + shift 2 + ;; + -i|--device_id) + echo "set device id" + export DEVICE_ID=$2 + shift 2 + ;; + -c|--config) + echo "config"; + configurations=$2 + shift 2 + ;; + -o|--output) + echo "output"; + output=$2 + shift 2 + ;; + -v|--vocab) + echo "vocab"; + vocab=$2 + shift 2 + ;; + --) + shift + break + ;; + *) + shift + ;; +esac +done + +for((i=0; i < $RANK_SIZE; i++)) +do + if [ $RANK_SIZE -gt 1 ] + then + echo $RANK_SIZE + export RANK_ID=$i + export DEVICE_ID=$[i] + fi + echo "Working on device $i" + + file_path=$(cd "$(dirname $0)" || exit; pwd) + cd $file_path || exit + cd ../ || exit + + rm -rf ./run_mass_$DEVICE_ID + mkdir ./run_mass_$DEVICE_ID + + cp train.py ./run_mass_$DEVICE_ID + cp eval.py ./run_mass_$DEVICE_ID + cp $configurations ./run_mass_$DEVICE_ID + + if [ $vocab ] + then + cp $vocab ./run_mass_$DEVICE_ID + fi + + cd ./run_mass_$DEVICE_ID || exit + env > log.log + echo $task + if [ "$task" == "train" ] + then + python train.py --config ${configurations##*/} >>log.log 2>&1 & + elif [ "$task" == "infer" ] + then + python eval.py --config ${configurations##*/} --output ${output} --vocab ${vocab##*/} >>log_infer.log 2>&1 & + fi + cd ../ +done diff --git a/model_zoo/mass/src/__init__.py b/model_zoo/mass/src/__init__.py new file mode 100644 index 0000000000..7e943365a0 --- /dev/null +++ b/model_zoo/mass/src/__init__.py @@ -0,0 +1,44 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Source of mass model.""" +from .dataset import load_dataset +from .dataset import bi_data_loader +from .dataset import mono_data_loader +from .transformer import TransformerDecoder +from .transformer import TransformerEncoder +from .transformer import Transformer +from .transformer import TransformerNetworkWithLoss +from .transformer import LabelSmoothedCrossEntropyCriterion +from .transformer import TransformerTrainOneStepWithLossScaleCell +from .transformer import TransformerTraining +from .transformer import infer +from .language_model import LooseMaskedLanguageModel +from .language_model import MaskedLanguageModel +from .language_model import NoiseChannelLanguageModel + +__all__ = [ + "load_dataset", + "bi_data_loader", + "mono_data_loader", + "Transformer", + "infer", + "TransformerTraining", + "TransformerNetworkWithLoss", + "TransformerTrainOneStepWithLossScaleCell", + "LabelSmoothedCrossEntropyCriterion", + "LooseMaskedLanguageModel", + "MaskedLanguageModel", + "NoiseChannelLanguageModel" +] diff --git a/model_zoo/mass/src/dataset/__init__.py b/model_zoo/mass/src/dataset/__init__.py new file mode 100644 index 0000000000..b93504d922 --- /dev/null +++ b/model_zoo/mass/src/dataset/__init__.py @@ -0,0 +1,24 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Dataset module.""" +from .bi_data_loader import BiLingualDataLoader +from .mono_data_loader import MonoLingualDataLoader +from .load_dataset import load_dataset + +__all__ = [ + "load_dataset", + "BiLingualDataLoader", + "MonoLingualDataLoader" +] diff --git a/model_zoo/mass/src/dataset/base.py b/model_zoo/mass/src/dataset/base.py new file mode 100644 index 0000000000..79f1281513 --- /dev/null +++ b/model_zoo/mass/src/dataset/base.py @@ -0,0 +1,102 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Base class of data loader.""" +import os +import collections +import numpy as np + +from mindspore.mindrecord import FileWriter +from .schema import SCHEMA + + +class DataLoader: + """Data loader for dataset.""" + _SCHEMA = SCHEMA + + def __init__(self, max_sen_len=66): + self._examples = [] + self._max_sentence_len = max_sen_len + + def _load(self): + raise NotImplementedError + + def padding(self, sen, padding_idx, dtype=np.int64): + """Padding to sentence.""" + if sen.shape[0] > self._max_sentence_len: + return None + new_sen = np.array([padding_idx] * self._max_sentence_len, + dtype=dtype) + new_sen[:sen.shape[0]] = sen[:] + return new_sen + + def write_to_mindrecord(self, path, shard_num=1, desc=""): + """ + Write mindrecord file. + + Args: + path (str): File path. + shard_num (int): Shard num. + desc (str): Description. + """ + if not os.path.isabs(path): + path = os.path.abspath(path) + + writer = FileWriter(file_name=path, shard_num=shard_num) + writer.add_schema(self._SCHEMA, desc) + if not self._examples: + self._load() + + writer.write_raw_data(self._examples) + writer.commit() + print(f"| Wrote to {path}.") + + def write_to_tfrecord(self, path, shard_num=1): + """ + Write to tfrecord. + + Args: + path (str): Output file path. + shard_num (int): Shard num. + """ + import tensorflow as tf + if not os.path.isabs(path): + path = os.path.abspath(path) + output_files = [] + for i in range(shard_num): + output_file = path + "-%03d-of-%03d" % (i + 1, shard_num) + output_files.append(output_file) + # create writers + writers = [] + for output_file in output_files: + writers.append(tf.io.TFRecordWriter(output_file)) + + if not self._examples: + self._load() + + # create feature + features = collections.OrderedDict() + for example in self._examples: + for key in example: + features[key] = tf.train.Feature(int64_list=tf.train.Int64List(value=example[key].tolist())) + tf_example = tf.train.Example(features=tf.train.Features(feature=features)) + for writer in writers: + writer.write(tf_example.SerializeToString()) + for writer in writers: + writer.close() + for p in output_files: + print(f" | Write to {p}.") + + def _add_example(self, example): + self._examples.append(example) diff --git a/model_zoo/mass/src/dataset/bi_data_loader.py b/model_zoo/mass/src/dataset/bi_data_loader.py new file mode 100644 index 0000000000..e2532662d9 --- /dev/null +++ b/model_zoo/mass/src/dataset/bi_data_loader.py @@ -0,0 +1,142 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Bilingual data loader.""" +import numpy as np + +from src.utils import Dictionary +from .base import DataLoader +from ..language_model.base import LanguageModel +from ..language_model.noise_channel_language_model import NoiseChannelLanguageModel + + +class BiLingualDataLoader(DataLoader): + """Loader for bilingual data.""" + + def __init__(self, src_filepath: str, tgt_filepath: str, + src_dict: Dictionary, tgt_dict: Dictionary, + src_lang: str, tgt_lang: str, + language_model: LanguageModel = NoiseChannelLanguageModel(add_noise_prob=0), + max_sen_len=66, + merge_dict=True): + super(BiLingualDataLoader, self).__init__(max_sen_len) + self._src_filepath = src_filepath + self._tgt_filepath = tgt_filepath + self._src_dict = src_dict + self._tgt_dict = tgt_dict + self.src_lang = src_lang + self.tgt_lang = tgt_lang + self._lm = language_model + self.max_sen_len = max_sen_len + self.share_dict = merge_dict + self._merge_dict() + + def _merge_dict(self): + if self.share_dict: + merged_dict = self._src_dict.merge_dict(self._tgt_dict, + new_dict=True) + self._src_dict = merged_dict + self._tgt_dict = merged_dict + + @property + def src_dict(self): + return self._src_dict + + @property + def tgt_dict(self): + return self._tgt_dict + + def _load(self): + _min_len = 9999999999 + _max_len = 0 + unk_count = 0 + tokens_count = 0 + count = 0 + with open(self._src_filepath, "r") as _src_file: + print(f" | Processing corpus {self._src_filepath}.") + print(f" | Processing corpus {self._tgt_filepath}.") + with open(self._tgt_filepath, "r") as _tgt_file: + _min, _max = 9999999, -1 + for _, _pair in enumerate(zip(_src_file, _tgt_file)): + src_tokens = [ + self._src_dict.index(t) + for t in _pair[0].strip().split(" ") if t + ] + tgt_tokens = [ + self._tgt_dict.index(t) + for t in _pair[1].strip().split(" ") if t + ] + src_tokens.append(self._src_dict.eos_index) + tgt_tokens.append(self._tgt_dict.eos_index) + opt = self._lm.emit( + sentence=np.array(src_tokens, dtype=np.int64), + target=np.array(tgt_tokens, dtype=np.int64), + mask_symbol_idx=self._src_dict.mask_index, + bos_symbol_idx=self._tgt_dict.bos_index + ) + src_len = opt["sentence_length"] + tgt_len = opt["tgt_sen_length"] + + _min_len = min(_min_len, opt["sentence_length"], opt["tgt_sen_length"]) + _max_len = max(_max_len, opt["sentence_length"], opt["tgt_sen_length"]) + + if src_len > self.max_sen_len or tgt_len > self.max_sen_len: + continue + + src_padding = np.zeros(shape=self.max_sen_len, dtype=np.int64) + tgt_padding = np.zeros(shape=self.max_sen_len, dtype=np.int64) + for i in range(src_len): + src_padding[i] = 1 + for j in range(tgt_len): + tgt_padding[j] = 1 + + tokens_count += opt["encoder_input"].shape[0] + tokens_count += opt["decoder_input"].shape[0] + tokens_count += opt["decoder_output"].shape[0] + unk_count += np.where(opt["encoder_input"] == self._src_dict.unk_index)[0].shape[0] + unk_count += np.where(opt["decoder_input"] == self._src_dict.unk_index)[0].shape[0] + unk_count += np.where(opt["decoder_output"] == self._src_dict.unk_index)[0].shape[0] + + encoder_input = self.padding(opt["encoder_input"], + self._src_dict.padding_index) + decoder_input = self.padding(opt["decoder_input"], + self._tgt_dict.padding_index) + decoder_output = self.padding(opt["decoder_output"], + self._tgt_dict.padding_index) + if encoder_input is None or decoder_input is None or decoder_output is None: + continue + + _min = np.min([np.min(encoder_input), + np.min(decoder_input), + np.min(decoder_output), _min]) + _max = np.max([np.max(encoder_input), + np.max(decoder_input), + np.max(decoder_output), _max]) + + example = { + "src_padding": src_padding, + "tgt_padding": tgt_padding, + "src": encoder_input, + "prev_opt": decoder_input, + "prev_padding": tgt_padding, + "target": decoder_output + } + self._add_example(example) + count += 1 + + print(f" | Shortest len = {_min_len}.") + print(f" | Longest len = {_max_len}.") + print(f" | Total sen = {count}.") + print(f" | Total token num={tokens_count}, " + f"{unk_count / tokens_count * 100}% replaced by .") diff --git a/model_zoo/mass/src/dataset/load_dataset.py b/model_zoo/mass/src/dataset/load_dataset.py new file mode 100644 index 0000000000..9d9d558cb6 --- /dev/null +++ b/model_zoo/mass/src/dataset/load_dataset.py @@ -0,0 +1,121 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Dataset loader to feed into model.""" +import os +import mindspore.common.dtype as mstype +import mindspore.dataset.engine as de +import mindspore.dataset.transforms.c_transforms as deC + + +def _load_dataset(input_files, batch_size, epoch_count=1, + sink_mode=False, sink_step=1, rank_size=1, rank_id=0, shuffle=True): + """ + Load dataset according to passed in params. + + Args: + input_files (list): Data files. + batch_size (int): Batch size. + epoch_count (int): Epoch count. + sink_mode (bool): Whether enable sink mode. + sink_step (int): Step to sink. + rank_size (int): Rank size. + rank_id (int): Rank id. + shuffle (bool): Whether shuffle dataset. + + Returns: + Dataset, dataset instance. + """ + if not input_files: + raise FileNotFoundError("Require at least one dataset.") + + if not (schema_file and + os.path.exists(schema_file) + and os.path.isfile(schema_file) + and os.path.basename(schema_file).endswith(".json")): + raise FileNotFoundError("`dataset_schema` must be a existed json file.") + + if not isinstance(sink_mode, bool): + raise ValueError("`sink` must be type of bool.") + + for datafile in input_files: + print(f" | Loading {datafile}.") + + ds = de.TFRecordDataset( + input_files, + columns_list=[ + "src", "src_padding", + "prev_opt", "prev_padding", + "target", "tgt_padding" + ], + shuffle=shuffle, num_shards=rank_size, shard_id=rank_id, + shard_equal_rows=True, num_parallel_workers=8) + + ori_dataset_size = ds.get_dataset_size() + print(f" | Dataset size: {ori_dataset_size}.") + repeat_count = epoch_count + if sink_mode: + ds.set_dataset_size(sink_step * batch_size) + repeat_count = epoch_count * ori_dataset_size // ds.get_dataset_size() + + type_cast_op = deC.TypeCast(mstype.int32) + ds = ds.map(input_columns="src", operations=type_cast_op) + ds = ds.map(input_columns="src_padding", operations=type_cast_op) + ds = ds.map(input_columns="prev_opt", operations=type_cast_op) + ds = ds.map(input_columns="prev_padding", operations=type_cast_op) + ds = ds.map(input_columns="target", operations=type_cast_op) + ds = ds.map(input_columns="tgt_padding", operations=type_cast_op) + + ds = ds.rename( + input_columns=["src", + "src_padding", + "prev_opt", + "prev_padding", + "target", + "tgt_padding"], + output_columns=["source_eos_ids", + "source_eos_mask", + "target_sos_ids", + "target_sos_mask", + "target_eos_ids", + "target_eos_mask"] + ) + + ds = ds.batch(batch_size, drop_remainder=True) + ds = ds.repeat(repeat_count) + + ds.channel_name = 'transformer' + return ds + + +def load_dataset(data_files: list, batch_size: int, epoch_count: int, + sink_mode: bool, sink_step: int = 1, rank_size: int = 1, rank_id: int = 0, shuffle=True): + """ + Load dataset. + + Args: + data_files (list): Data files. + batch_size (int): Batch size. + epoch_count (int): Epoch count. + sink_mode (bool): Whether enable sink mode. + sink_step (int): Step to sink. + rank_size (int): Rank size. + rank_id (int): Rank id. + shuffle (bool): Whether shuffle dataset. + + Returns: + Dataset, dataset instance. + """ + return _load_dataset(data_files, batch_size, epoch_count, sink_mode, + sink_step, rank_size, rank_id, shuffle=shuffle) diff --git a/model_zoo/mass/src/dataset/mono_data_loader.py b/model_zoo/mass/src/dataset/mono_data_loader.py new file mode 100644 index 0000000000..13379a2f42 --- /dev/null +++ b/model_zoo/mass/src/dataset/mono_data_loader.py @@ -0,0 +1,109 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Mono data loader.""" +import numpy as np + +from src.utils import Dictionary + +from .base import DataLoader +from .schema import SCHEMA +from ..language_model.base import LanguageModel +from ..language_model import LooseMaskedLanguageModel + + +class MonoLingualDataLoader(DataLoader): + """Loader for monolingual data.""" + _SCHEMA = SCHEMA + + def __init__(self, src_filepath: str, lang: str, dictionary: Dictionary, + language_model: LanguageModel = LooseMaskedLanguageModel(mask_ratio=0.3), + max_sen_len=66, min_sen_len=16): + super(MonoLingualDataLoader, self).__init__(max_sen_len=max_sen_len) + self._file_path = src_filepath + self._lang = lang + self._dictionary = dictionary + self._lm = language_model + self.max_sen_len = max_sen_len + self.min_sen_len = min_sen_len + + @property + def dict(self): + return self._dictionary + + def generate_padding_mask(self, sentence, length, exclude_mask=False): + """Generate padding mask vector.""" + src_padding = np.zeros(shape=self.max_sen_len, dtype=np.int64) + if exclude_mask: + pos = np.where(sentence == self._dictionary.padding_index)[0] + else: + pos = np.where((sentence == self._dictionary.padding_index) | (sentence == self._dictionary.mask_index))[0] + src_padding[0:length] = 1 + if pos.shape[0] != 0: + src_padding[pos] = 0 + return src_padding + + def _load(self): + _min_len = 9999999999 + _max_len = 0 + count = 0 + with open(self._file_path, "r") as _file: + print(f" | Processing corpus {self._file_path}.") + for _, _line in enumerate(_file): + tokens = [self._dictionary.index(t.replace(" ", "")) + for t in _line.strip().split(" ") if t] + # In mass code, it doesn't add to sen. + tokens.append(self._dictionary.eos_index) + opt = self._lm.emit(sentence=np.array(tokens, dtype=np.int32), + vocabulary=self._dictionary) + + src_len = opt["sentence_length"] + _min_len = min(_min_len, opt["sentence_length"], opt["tgt_sen_length"]) + _max_len = max(_max_len, opt["sentence_length"], opt["tgt_sen_length"]) + + if src_len > self.max_sen_len: + continue + if src_len < self.min_sen_len: + continue + + src_padding = self.generate_padding_mask(opt["encoder_input"], + opt["sentence_length"], + exclude_mask=False) + tgt_padding = self.generate_padding_mask(opt["decoder_input"], + opt["tgt_sen_length"], + exclude_mask=True) + + encoder_input = self.padding(opt["encoder_input"], + self._dictionary.padding_index) + decoder_input = self.padding(opt["decoder_input"], + self._dictionary.padding_index) + decoder_output = self.padding(opt["decoder_output"], + self._dictionary.padding_index) + if encoder_input is None or decoder_input is None or decoder_output is None: + continue + + example = { + "src": encoder_input, + "src_padding": src_padding, + "prev_opt": decoder_input, + "prev_padding": tgt_padding, + "target": decoder_output, + "tgt_padding": tgt_padding, + } + self._add_example(example) + count += 1 + + print(f" | Shortest len = {_min_len}.") + print(f" | Longest len = {_max_len}.") + print(f" | Total sen = {count}.") diff --git a/model_zoo/mass/src/dataset/schema.py b/model_zoo/mass/src/dataset/schema.py new file mode 100644 index 0000000000..9e92d7979b --- /dev/null +++ b/model_zoo/mass/src/dataset/schema.py @@ -0,0 +1,24 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Define schema of mindrecord.""" + +SCHEMA = { + "src": {"type": "int64", "shape": [-1]}, + "src_padding": {"type": "int64", "shape": [-1]}, + "prev_opt": {"type": "int64", "shape": [-1]}, + "prev_padding": {"type": "int64", "shape": [-1]}, + "target": {"type": "int64", "shape": [-1]}, + "tgt_padding": {"type": "int64", "shape": [-1]}, +} diff --git a/model_zoo/mass/src/language_model/__init__.py b/model_zoo/mass/src/language_model/__init__.py new file mode 100644 index 0000000000..329e39c128 --- /dev/null +++ b/model_zoo/mass/src/language_model/__init__.py @@ -0,0 +1,26 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Language model.""" +from .noise_channel_language_model import NoiseChannelLanguageModel +from .masked_language_model import MaskedLanguageModel +from .loose_masked_language_model import LooseMaskedLanguageModel +from .mass_language_model import MassLanguageModel + +__all__ = [ + "LooseMaskedLanguageModel", + "MassLanguageModel", + "MaskedLanguageModel", + "NoiseChannelLanguageModel" +] diff --git a/model_zoo/mass/src/language_model/base.py b/model_zoo/mass/src/language_model/base.py new file mode 100644 index 0000000000..1803a9ea13 --- /dev/null +++ b/model_zoo/mass/src/language_model/base.py @@ -0,0 +1,25 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Base language model.""" + + +class LanguageModel: + """Define base language model.""" + + def __init__(self): + pass + + def emit(self, **kwargs): + raise NotImplementedError diff --git a/model_zoo/mass/src/language_model/loose_masked_language_model.py b/model_zoo/mass/src/language_model/loose_masked_language_model.py new file mode 100644 index 0000000000..eb7df52a5f --- /dev/null +++ b/model_zoo/mass/src/language_model/loose_masked_language_model.py @@ -0,0 +1,130 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Modified masked language model.""" +import numpy as np + +from src.utils import Dictionary +from .base import LanguageModel + + +class LooseMaskedLanguageModel(LanguageModel): + """ + Modified mask operation on sentence. + + If k is assigned, then mask sentence with length k. + Otherwise, use mask_ratio. + + Args: + k (int): Length of fragment. + mask_ratio (float): Mask ratio. + """ + + def __init__(self, k: int = None, mask_ratio=0.5, + mask_all_prob=None): + super(LooseMaskedLanguageModel, self).__init__() + self.mask_ratio = mask_ratio + self._k = k + self._threshold = mask_all_prob + + def emit(self, sentence: np.ndarray, vocabulary: Dictionary): + """ + Mask mono source sentence. + + A sample used to train model is processed with following step: + + encoder input (source): [x1, x2, x3, x4, x5, x6, x7, x8, ] + masked encoder input: [x1, x2, x3, _, _, _, x7, x8, ] + decoder input: [ -, x3, x4, x5] + | | | | + V V V V + decoder output: [x3, x4, x5, x6] + + Notes: + A simple rule is made that source sentence starts without + but end with . + + Args: + vocabulary (Dictionary): Vocabulary. + sentence (np.ndarray): Raw sentence instance. + + Returns: + dict, an example. + """ + # If v=0, then u must equal to 0. [u, v) + u, v = self._get_masked_interval(sentence.shape[0], + self._k, self._threshold) + + encoder_input = sentence.copy() + right_shifted_sentence = np.concatenate(([vocabulary.bos_index], sentence[:-1])) + + if u == 0: + _len = v - u if v - u != 0 else sentence.shape[0] + decoder_input = right_shifted_sentence[:_len] + decoder_input[0] = vocabulary.mask_index + decoder_output = sentence[:_len].copy() + else: + decoder_input = right_shifted_sentence[u - 1:v] + decoder_input[0] = vocabulary.mask_index + decoder_output = sentence[u - 1:v].copy() + + if v == 0: + decoder_input[:] = vocabulary.mask_index + else: + encoder_input[np.arange(start=u, stop=v)] = vocabulary.mask_index + + if u != v and u > 1: + padding = np.array([vocabulary.padding_index] * (u - 1), dtype=np.int32) + decoder_input = np.concatenate((padding, decoder_input)) + decoder_output = np.concatenate((padding, decoder_output)) + + if decoder_input.shape[0] != decoder_output.shape[0]: + raise ValueError("seq len must equal.") + + return { + "sentence_length": sentence.shape[0], + "tgt_sen_length": decoder_output.shape[0], + "encoder_input": encoder_input, # end with + "decoder_input": decoder_input, + "decoder_output": decoder_output # end with + } + + def _get_masked_interval(self, length, fix_length=None, + threshold_to_mask_all=None): + """ + Generate a sequence length according to length and mask_ratio. + + Args: + length (int): Sequence length. + + Returns: + Tuple[int, int], [start position, end position]. + """ + # Can not larger than sequence length. + # Mask_length belongs to [0, length]. + if fix_length is not None: + interval_length = min(length, fix_length) + else: + interval_length = min(length, round(self.mask_ratio * length)) + + _magic = np.random.random() + if threshold_to_mask_all is not None and _magic <= threshold_to_mask_all: + return 0, length + + # If not sequence to be masked, then return 0, 0. + if interval_length == 0: + return 0, 0 + # Otherwise, return start position and interval length. + start_pos = np.random.randint(low=0, high=length - interval_length + 1) + return start_pos, start_pos + interval_length diff --git a/model_zoo/mass/src/language_model/masked_language_model.py b/model_zoo/mass/src/language_model/masked_language_model.py new file mode 100644 index 0000000000..52aed8d53e --- /dev/null +++ b/model_zoo/mass/src/language_model/masked_language_model.py @@ -0,0 +1,128 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Masked language model.""" +import numpy as np + +from .base import LanguageModel + + +class MaskedLanguageModel(LanguageModel): + """ + Do mask operation on sentence. + + If k is assigned, then mask sentence with length k. + Otherwise, use mask_ratio. + + Args: + k (int): Length of fragment. + mask_ratio (float): Mask ratio. + """ + + def __init__(self, k: int = None, mask_ratio=0.5, + mask_all_prob=None): + super(MaskedLanguageModel, self).__init__() + self.mask_ratio = mask_ratio + self._k = k + self._threshold = mask_all_prob + + def emit(self, sentence: np.ndarray, vocabulary): + """ + Mask mono source sentence. + + A sample used to train model is processed with following step: + + encoder input (source): [x1, x2, x3, x4, x5, x6, x7, x8, ] + masked encoder input: [x1, x2, _, _, _, x6, x7, x8, ] + decoder input: [ _, x3, x4] + | | | + V V V + decoder output: [ x3, x4, x5] + + Notes: + A simple rule is made that source sentence starts without + but end with . + + Args: + vocabulary (Dictionary): Vocabulary. + sentence (np.ndarray): Raw sentence instance. + + Returns: + dict, an example. + """ + encoder_input = sentence.copy() + seq_len = encoder_input.shape[0] + + # If v=0, then u must equal to 0. [u, v) + u, v = self._get_masked_interval(len(encoder_input), + self._k, self._threshold) + + if u == 0: + _len = v - u if v - u != 0 else seq_len + decoder_input = np.array([vocabulary.mask_index] * _len, dtype=np.int32) + decoder_input[1:] = encoder_input[:_len - 1].copy() + else: + decoder_input = np.array([vocabulary.mask_index] * (v - u), dtype=np.int32) + decoder_input[1:] = encoder_input[u:v - 1].copy() + + if v == 0: + decoder_output = encoder_input.copy() + encoder_input[:] = vocabulary.mask_index + else: + decoder_output = encoder_input[u:v].copy() + encoder_input[np.arange(start=u, stop=v)] = vocabulary.mask_index + + if u != v and u > 0: + padding = np.array([vocabulary.padding_index] * u, dtype=np.int32) + decoder_input = np.concatenate((padding, decoder_input)) + decoder_output = np.concatenate((padding, decoder_output)) + + assert decoder_input.shape[0] == decoder_output.shape[0], "seq len must equal." + + return { + "sentence_length": seq_len, + "tgt_sen_length": decoder_output.shape[0], + "encoder_input": encoder_input, # end with + "decoder_input": decoder_input, + "decoder_output": decoder_output # end with + } + + def _get_masked_interval(self, length, fix_length=None, + threshold_to_mask_all=None): + """ + Generate a sequence length according to length and mask_ratio. + + Args: + length (int): Sequence length. + + Returns: + Tuple[int, int], [start position, end position]. + """ + # Can not larger than sequence length. + # Mask_length belongs to [0, length]. + if fix_length is not None: + interval_length = min(length, fix_length) + else: + interval_length = min(length, round(self.mask_ratio * length)) + + _magic = np.random.random() + if threshold_to_mask_all is not None and _magic <= threshold_to_mask_all: + return 0, length + + # If not sequence to be masked, then return 0, 0. + if interval_length == 0: + return 0, 0 + # Otherwise, return start position and interval length. + start_pos = np.random.randint(low=0, high=length - interval_length + 1) + return start_pos, start_pos + interval_length diff --git a/model_zoo/mass/src/language_model/mass_language_model.py b/model_zoo/mass/src/language_model/mass_language_model.py new file mode 100644 index 0000000000..68b79265f8 --- /dev/null +++ b/model_zoo/mass/src/language_model/mass_language_model.py @@ -0,0 +1,202 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Masked language model.""" +import numpy as np + +from .base import LanguageModel + + +class MassLanguageModel(LanguageModel): + """ + Do mask operation on sentence. + + If k is assigned, then mask sentence with length k. + Otherwise, use mask_ratio. + + In mass paper, mask_ratio:keep_ratio:random_ratio=8:1:1, + fragment_ratio=0.5. + + Args: + fragment_ratio (float): Masked length of fragment. + mask_ratio (float): Total mask ratio. + keep_ratio (float): Keep ratio. + random_ratio (float): Random replacement ratio. + mask_all_prob (float): Mask all ratio. + """ + + def __init__(self, fragment_ratio: float = 0.5, + mask_ratio: float = 0.8, + keep_ratio: float = 0.1, + random_ratio: float = 0.1, + mask_all_prob=None): + if mask_ratio + keep_ratio + random_ratio > 1: + raise ValueError("The sum of `mask_ratio`, `keep_ratio` and `random_ratio` must less or equal to 1.") + + super(MassLanguageModel, self).__init__() + self.fragment_ratio = fragment_ratio + self.keep_ratio = keep_ratio + self.random_ratio = random_ratio + self._threshold = mask_all_prob + + def emit(self, sentence: np.ndarray, vocabulary): + """ + Mask mono source sentence. + + A sample used to train model is processed with following step: + + encoder input (source): [x1, x2, x3, x4, x5, x6, x7, x8, ] + masked encoder input: [x1, x2, _, _, _, x6, x7, x8, ] + decoder input: [ _, x3, x4] + | | | + V V V + decoder output: [ x3, x4, x5] + + Notes: + A simple rule is made that source sentence starts without + but end with . + + Args: + vocabulary (Dictionary): Vocabulary. + sentence (np.ndarray): Raw sentence instance. + + Returns: + dict, an example. + """ + encoder_input = sentence.copy() + seq_len = encoder_input.shape[0] + + # If v=0, then u must equal to 0. [u, v) + u, v = self._get_masked_interval( + len(encoder_input), + threshold_to_mask_all=self._threshold + ) + + if u == 0: + _len = v - u if v - u != 0 else seq_len + decoder_input = np.array([vocabulary.mask_index] * _len, dtype=np.int32) + decoder_input[1:] = encoder_input[:_len - 1].copy() + else: + decoder_input = np.array([vocabulary.mask_index] * (v - u), dtype=np.int32) + decoder_input[1:] = encoder_input[u:v - 1].copy() + + if v == 0: + decoder_output = encoder_input.copy() + encoder_input[:] = vocabulary.mask_index + else: + decoder_output = encoder_input[u:v].copy() + encoder_input[np.arange(start=u, stop=v)] = vocabulary.mask_index + + if u != v and u > 0: + padding = np.array([vocabulary.padding_index] * u, dtype=np.int32) + decoder_input = np.concatenate((padding, decoder_input)) + decoder_output = np.concatenate((padding, decoder_output)) + + assert decoder_input.shape[0] == decoder_output.shape[0], "seq len must equal." + + # Get masked tokens positions. + src_idx = np.where(encoder_input == vocabulary.mask_index)[0] + if src_idx.shape[0] != 0: + encoder_input = self._replace(encoder_input.copy(), + replacement=sentence, + position=src_idx, + vocabulary=vocabulary, + repl_prob=self.keep_ratio, + random_prob=self.random_ratio) + + prev_opt_idx = np.where(decoder_input != vocabulary.padding_index)[0] + if prev_opt_idx.shape[0] != 0: + decoder_input = self._replace(decoder_input.copy(), + replacement=vocabulary.mask_index, + position=prev_opt_idx, + vocabulary=vocabulary, + repl_prob=self.keep_ratio, + random_prob=self.random_ratio) + + return { + "sentence_length": seq_len, + "tgt_sen_length": decoder_output.shape[0], + "encoder_input": encoder_input, # end with + "decoder_input": decoder_input, + "decoder_output": decoder_output # end with + } + + @staticmethod + def _replace(sentence, replacement, position, vocabulary, repl_prob, random_prob): + """ + Do replacement randomly according to mass paper. + + Args: + sentence (np.ndarray): Sentence. + replacement (Union[int, np.ndarray]): Replacement char. + position (np.ndarray): Position to be replaced. + vocabulary (Dictionary): Vocabulary. + repl_prob (float): Replace to mask prob. + random_prob (float): Replace randomly prob. + + Returns: + np.ndarray, a sentence. + """ + _probs = [repl_prob, random_prob] + _repl_len, _random_len = np.floor( + np.array(_probs) * position.shape[0] + ).astype(np.int32) + + if _repl_len + _random_len >= position.shape[0]: + return sentence + + if 0 < _repl_len < position.shape[0]: + _repl_idx = np.random.choice(a=position, size=_repl_len, replace=False) + if isinstance(replacement, np.ndarray): + sentence[_repl_idx] = replacement[_repl_idx] + else: + sentence[_repl_idx] = replacement + + if 0 < _random_len < position.shape[0]: + _random_idx = np.random.choice(a=position, size=_random_len, replace=False) + sentence[_random_idx] = np.random.randint( + low=5, high=vocabulary.size, + size=_random_idx.shape[0], dtype=np.int32 + ) + + return sentence + + def _get_masked_interval(self, length, fix_length=None, + threshold_to_mask_all=None): + """ + Generate a sequence length according to length and mask_ratio. + + Args: + length (int): Sequence length. + + Returns: + Tuple[int, int], [start position, end position]. + """ + # Can not larger than sequence length. + # Mask_length belongs to [0, length]. + if fix_length is not None: + interval_length = min(length, fix_length) + else: + interval_length = min(length, round(self.fragment_ratio * length)) + + _magic = np.random.random() + if threshold_to_mask_all is not None and _magic <= threshold_to_mask_all: + return 0, length + + # If not sequence to be masked, then return 0, 0. + if interval_length == 0: + return 0, 0 + # Otherwise, return start position and interval length. + start_pos = np.random.randint(low=0, high=length - interval_length + 1) + return start_pos, start_pos + interval_length diff --git a/model_zoo/mass/src/language_model/noise_channel_language_model.py b/model_zoo/mass/src/language_model/noise_channel_language_model.py new file mode 100644 index 0000000000..2da89b659e --- /dev/null +++ b/model_zoo/mass/src/language_model/noise_channel_language_model.py @@ -0,0 +1,72 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Noise channel language model.""" +import numpy as np + +from .base import LanguageModel + + +class NoiseChannelLanguageModel(LanguageModel): + """Do mask on bilingual data.""" + + def __init__(self, add_noise_prob: float = 0.1): + super(NoiseChannelLanguageModel, self).__init__() + self._noisy_prob = add_noise_prob + + def emit(self, sentence: np.ndarray, target: np.ndarray, + mask_symbol_idx: int, + bos_symbol_idx: int): + """ + Add noise to sentence randomly. + + For example, given a sentence pair: + source sentence: [x1, x2, x3, x4, x5, x6, ] + target sentence: [y1, y2, y3, y4, ] + + After do random mask, data is looked like: + encoder input (source): [x1, x2, _, x4, x5, _, ] + decoder input: [, y1, y2, y3, y4] + | | | | | + V V V V V + decoder output: [ y1, y2, y3, y4, ] + + Args: + sentence (np.ndarray): Raw sentence. + target (np.ndarray): Target output (prediction). + mask_symbol_idx (int): Index of MASK symbol. + bos_symbol_idx (int): Index of bos symbol. + + Returns: + dict, an example. + """ + encoder_input = sentence.copy() + tgt_seq_len = target.shape[0] + + for i, _ in enumerate(encoder_input): + _prob = np.random.random() + if _prob < self._noisy_prob: + encoder_input[i] = mask_symbol_idx + + decoder_input = np.empty(shape=tgt_seq_len, dtype=np.int64) + decoder_input[1:] = target[:-1] + decoder_input[0] = bos_symbol_idx + + return { + "sentence_length": encoder_input.shape[0], + "tgt_sen_length": tgt_seq_len, + "encoder_input": encoder_input, # end with + "decoder_input": decoder_input, # start with + "decoder_output": target # end with + } diff --git a/model_zoo/mass/src/transformer/__init__.py b/model_zoo/mass/src/transformer/__init__.py new file mode 100644 index 0000000000..7912e7f0dd --- /dev/null +++ b/model_zoo/mass/src/transformer/__init__.py @@ -0,0 +1,34 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Transformer model module.""" +from .transformer import Transformer +from .encoder import TransformerEncoder +from .decoder import TransformerDecoder +from .beam_search import BeamSearchDecoder +from .transformer_for_train import TransformerTraining, LabelSmoothedCrossEntropyCriterion, \ + TransformerNetworkWithLoss, TransformerTrainOneStepWithLossScaleCell +from .infer_mass import infer + +__all__ = [ + "infer", + "TransformerTraining", + "LabelSmoothedCrossEntropyCriterion", + "TransformerTrainOneStepWithLossScaleCell", + "TransformerNetworkWithLoss", + "Transformer", + "TransformerEncoder", + "TransformerDecoder", + "BeamSearchDecoder" +] diff --git a/model_zoo/mass/src/transformer/beam_search.py b/model_zoo/mass/src/transformer/beam_search.py new file mode 100644 index 0000000000..0c48aa3cf0 --- /dev/null +++ b/model_zoo/mass/src/transformer/beam_search.py @@ -0,0 +1,363 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Beam search decoder.""" +import numpy as np + +import mindspore.common.dtype as mstype +import mindspore.nn as nn +from mindspore.ops import operations as P +from mindspore.common.tensor import Tensor + +INF = 1. * 1e9 + + +class LengthPenalty(nn.Cell): + """ + Length penalty. + + Args: + weight (float): The length penalty weight. + compute_type (mstype): Mindspore data type. Default: mstype.float32. + """ + + def __init__(self, weight=1.0, compute_type=mstype.float32): + super(LengthPenalty, self).__init__() + self.weight = weight + + self.add = P.TensorAdd() + self.pow = P.Pow() + self.div = P.RealDiv() + + self.five = Tensor(5.0, mstype.float32) + self.six = Tensor(6.0, mstype.float32) + + self.cast = P.Cast() + + def construct(self, length_tensor): + """ + Process source sentence + + Inputs: + length_tensor (Tensor): the input tensor. + + Returns: + Tensor, after punishment of length. + """ + length_tensor = self.cast(length_tensor, mstype.float32) + output = self.add(length_tensor, self.five) + output = self.div(output, self.six) + output = self.pow(output, self.weight) + return output + + +class TileBeam(nn.Cell): + """ + Beam Tile operation. + + Args: + beam_width (int): The Number of beam. + compute_type (mstype): Mindspore data type. Default: mstype.float32. + """ + + def __init__(self, beam_width, compute_type=mstype.float32): + super(TileBeam, self).__init__() + self.beam_width = beam_width + + self.expand = P.ExpandDims() + self.tile = P.Tile() + self.reshape = P.Reshape() + self.shape = P.Shape() + + def construct(self, input_tensor): + """ + Process source sentence + + Inputs: + input_tensor (Tensor): with shape (N, T, D). + + Returns: + Tensor, tiled tensor. + """ + shape = self.shape(input_tensor) + # add an dim + input_tensor = self.expand(input_tensor, 1) + # get tile shape: [1, beam, ...] + # shape = self.shape(input_tensor) + tile_shape = (1,) + (self.beam_width,) + for _ in range(len(shape) - 1): + tile_shape = tile_shape + (1,) + # tile + output = self.tile(input_tensor, tile_shape) + # reshape to [batch*beam, ...] + out_shape = (shape[0] * self.beam_width,) + shape[1:] + output = self.reshape(output, out_shape) + + return output + + +class Mod(nn.Cell): + """ + Mod operation. + + Args: + compute_type (mstype): Mindspore data type. Default: mstype.float32. + """ + + def __init__(self, + compute_type=mstype.float32): + super(Mod, self).__init__() + self.compute_type = compute_type + + self.floor_div = P.FloorDiv() + self.sub = P.Sub() + self.multiply = P.Mul() + + def construct(self, input_x, input_y): + """ + Get the remainder of input_x and input_y. + + Inputs: + input_x (Tensor): Divisor. + input_y (Tensor): Dividend. + + Returns: + Tensor, remainder. + """ + x = self.floor_div(input_x, input_y) + x = self.multiply(x, input_y) + x = self.sub(input_x, x) + return x + + +class BeamSearchDecoder(nn.Cell): + """ + Beam search decoder. + + Args: + batch_size (int): Batch size of input dataset. + seq_length (int): Length of input sequence. + vocab_size (int): The shape of each embedding vector. + decoder (Cell): The transformrer decoder. + beam_width (int): Beam width for beam search in inferring. Default: 4. + length_penalty_weight (float): Penalty for sentence length. Default: 1.0. + max_decode_length (int): Max decode length for inferring. Default: 64. + sos_id (int): The index of start label . Default: 1. + eos_id (int): The index of end label . Default: 2. + compute_type (mstype): Compute type in TransformerAttention. + Default: mstype.float32. + """ + + def __init__(self, + batch_size, + seq_length, + vocab_size, + decoder, + beam_width=4, + length_penalty_weight=1.0, + max_decode_length=64, + sos_id=1, + eos_id=2): + super(BeamSearchDecoder, self).__init__(auto_prefix=False) + + self.batch_size = batch_size + self.vocab_size = vocab_size + self.beam_width = beam_width + self.length_penalty_weight = length_penalty_weight + self.max_decode_length = max_decode_length + + self.decoder = decoder + + self.add = P.TensorAdd() + self.expand = P.ExpandDims() + self.reshape = P.Reshape() + self.shape_flat = (-1,) + self.shape = P.Shape() + + self.zero_tensor = Tensor(np.zeros([batch_size, beam_width]), mstype.float32) + self.ninf_tensor = Tensor(np.full([batch_size, beam_width], -INF), mstype.float32) + + self.select = P.Select() + self.flat_shape = (batch_size, beam_width * vocab_size) + self.topk = P.TopK(sorted=True) + self.floor_div = P.FloorDiv() + self.vocab_size_tensor = Tensor(self.vocab_size, mstype.int32) + self.real_div = P.RealDiv() + self.mod = Mod() + self.equal = P.Equal() + self.eos_ids = Tensor(np.full([batch_size, beam_width], eos_id), mstype.int32) + + beam_ids = np.tile(np.arange(beam_width).reshape((1, beam_width)), [batch_size, 1]) + self.beam_ids = Tensor(beam_ids, mstype.int32) + + batch_ids = np.arange(batch_size * beam_width).reshape((batch_size, beam_width)) // beam_width + self.batch_ids = Tensor(batch_ids, mstype.int32) + + self.concat = P.Concat(axis=-1) + self.gather_nd = P.GatherNd() + + # init inputs and states + self.start_ids = Tensor(np.full([batch_size * beam_width, 1], sos_id), mstype.int32) + self.init_seq = Tensor(np.full([batch_size, beam_width, 1], sos_id), mstype.int32) + + init_scores = np.tile(np.array([[0.] + [-INF] * (beam_width - 1)]), [batch_size, 1]) + + self.init_total_log_probs = Tensor(np.zeros([batch_size, beam_width, 1]), mstype.float32) + self.init_scores = Tensor(init_scores, mstype.float32) + + self.init_attention = Tensor(np.zeros([batch_size, beam_width, seq_length, 1]), mstype.float32) + self.init_finished = Tensor(np.zeros([batch_size, beam_width], dtype=np.bool)) + self.init_length = Tensor(np.zeros([batch_size, beam_width], dtype=np.int32)) + + self.length_penalty = LengthPenalty(weight=length_penalty_weight) + + self.one = Tensor(1, mstype.int32) + self.prob_concat = P.Concat(axis=1) + + def one_step(self, cur_input_ids, enc_states, enc_attention_mask, state_log_probs, state_seq, state_finished, + state_length, entire_log_probs): + """ + Beam search one_step output. + + Inputs: + cur_input_ids (Tensor): with shape (batch_size * beam_width, m). + enc_states (Tensor): with shape (batch_size * beam_width, T, D). + enc_attention_mask (Tensor): with shape (batch_size * beam_width, T, D). + state_log_probs (Tensor): with shape (batch_size, beam_width). + state_seq (Tensor): with shape (batch_size, beam_width, m). + state_finished (Tensor): with shape (batch_size, beam_width). + state_length (Tensor): with shape (batch_size, beam_width). + entire_log_probs (Tensor): with shape (batch_size, beam_width, vocab_size). + + Return: + Update input parameters. + """ + # log_probs, [batch_size * beam_width, 1, V] + log_probs = self.decoder(cur_input_ids, enc_states, enc_attention_mask) + # log_probs: [batch_size, beam_width, V] + log_probs = self.reshape(log_probs, (self.batch_size, self.beam_width, self.vocab_size)) + + # select topk indices, [batch_size, beam_width, V] + total_log_probs = self.add(log_probs, self.expand(state_log_probs, -1)) + + # mask finished beams, [batch_size, beam_width] + # t-1 has finished + mask_tensor = self.select(state_finished, self.ninf_tensor, self.zero_tensor) + # save the t-1 probability + total_log_probs = self.add(total_log_probs, self.expand(mask_tensor, -1)) + # [batch, beam*vocab] + flat_scores = self.reshape(total_log_probs, self.flat_shape) + # select topk, [batch, beam] + topk_scores, topk_indices = self.topk(flat_scores, self.beam_width) + + # convert to beam and word indices, [batch, beam] + beam_indices = self.floor_div(topk_indices, self.vocab_size_tensor) + word_indices = self.mod(topk_indices, self.vocab_size_tensor) + + current_word_pro = self.gather_nd( + log_probs, + self.concat((self.expand(self.batch_ids, -1), + self.expand(beam_indices, -1), + self.expand(word_indices, -1))) + ) + # [batch, beam] + current_word_pro = self.reshape(current_word_pro, (self.batch_size, self.beam_width)) + + # mask finished indices, [batch, beam] + beam_indices = self.select(state_finished, self.beam_ids, beam_indices) + word_indices = self.select(state_finished, self.eos_ids, word_indices) + topk_scores = self.select(state_finished, state_log_probs, topk_scores) + + current_word_pro = self.select(state_finished, self.ninf_tensor, current_word_pro) + + # sort according to scores with -inf for finished beams, [batch, beam] + # t ends + tmp_log_probs = self.select( + self.equal(word_indices, self.eos_ids), + self.ninf_tensor, + topk_scores) + + _, tmp_indices = self.topk(tmp_log_probs, self.beam_width) + # update, [batch_size, beam_width, 2] + tmp_gather_indices = self.concat((self.expand(self.batch_ids, -1), self.expand(tmp_indices, -1))) + # [batch_size, beam_width] + beam_indices = self.gather_nd(beam_indices, tmp_gather_indices) + word_indices = self.gather_nd(word_indices, tmp_gather_indices) + topk_scores = self.gather_nd(topk_scores, tmp_gather_indices) + # [batch_size, beam_width] + sorted_current_word_pro = self.gather_nd(current_word_pro, tmp_gather_indices) + + # gather indices for selecting alive beams + gather_indices = self.concat((self.expand(self.batch_ids, -1), self.expand(beam_indices, -1))) + + # length add 1 if not finished in the previous step, [batch_size, beam_width] + length_add = self.add(state_length, self.one) + state_length = self.select(state_finished, state_length, length_add) + state_length = self.gather_nd(state_length, gather_indices) + + # concat seq + seq = self.gather_nd(state_seq, gather_indices) + state_seq = self.concat((seq, self.expand(word_indices, -1))) + # update the probability of entire_log_probs + selected_entire_log_probs = self.gather_nd(entire_log_probs, gather_indices) + entire_log_probs = self.concat((selected_entire_log_probs, + self.expand(sorted_current_word_pro, -1))) + + # new finished flag and log_probs + state_finished = self.equal(word_indices, self.eos_ids) + state_log_probs = topk_scores + cur_input_ids = self.reshape(state_seq, (self.batch_size * self.beam_width, -1)) + + return cur_input_ids, state_log_probs, state_seq, state_finished, state_length, entire_log_probs + + def construct(self, enc_states, enc_attention_mask): + """ + Process source sentence + + Inputs: + enc_states (Tensor): Output of transformer encoder with shape (N, T, D). + enc_attention_mask (Tensor): encoder attention mask with shape (N, T, T). + + Returns: + Tensor, predictions output and prediction probs. + """ + cur_input_ids = self.start_ids + # beam search states + state_log_probs = self.init_scores + state_seq = self.init_seq + state_finished = self.init_finished + state_length = self.init_length + entire_log_probs = self.init_total_log_probs + + for _ in range(self.max_decode_length): + # run one step decoder to get outputs of the current step + # shape [batch*beam, 1, vocab] + cur_input_ids, state_log_probs, state_seq, state_finished, state_length, entire_log_probs = self.one_step( + cur_input_ids, enc_states, enc_attention_mask, state_log_probs, + state_seq, state_finished, state_length, entire_log_probs) + + # add length penalty scores + penalty_len = self.length_penalty(state_length) + # return penalty_len + log_probs = self.real_div(state_log_probs, penalty_len) + + # sort according to scores + _, top_beam_indices = self.topk(log_probs, self.beam_width) + gather_indices = self.concat((self.expand(self.batch_ids, -1), self.expand(top_beam_indices, -1))) + # sort sequence and attention scores + predicted_ids = self.gather_nd(state_seq, gather_indices) + # take the first one + predicted_ids = predicted_ids[::, 0:1:1, ::] + + return predicted_ids, entire_log_probs diff --git a/model_zoo/mass/src/transformer/components.py b/model_zoo/mass/src/transformer/components.py new file mode 100644 index 0000000000..2efa1ee757 --- /dev/null +++ b/model_zoo/mass/src/transformer/components.py @@ -0,0 +1,66 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Components of model.""" +import mindspore.common.dtype as mstype +import mindspore.nn as nn +from mindspore.ops import operations as P + + +class SaturateCast(nn.Cell): + """Cast wrapper.""" + + def __init__(self, dst_type=mstype.float32): + super(SaturateCast, self).__init__() + self.cast = P.Cast() + self.dst_type = dst_type + + def construct(self, x): + return self.cast(x, self.dst_type) + + +class LayerNorm(nn.Cell): + """ + Do layer norm. + + Args: + in_channels (int): In channels number of layer norm. + return_2d (bool): Whether return 2d tensor. + + Returns: + Tensor, output. + """ + + def __init__(self, in_channels=None, return_2d=False): + super(LayerNorm, self).__init__() + self.return_2d = return_2d + self.layer_norm = nn.LayerNorm((in_channels,)) + self.cast = P.Cast() + self.get_dtype = P.DType() + self.reshape = P.Reshape() + self.get_shape = P.Shape() + + def construct(self, input_tensor): + shape = self.get_shape(input_tensor) + batch_size = shape[0] + max_len = shape[1] + embed_dim = shape[2] + + output = self.reshape(input_tensor, (-1, embed_dim)) + output = self.cast(output, mstype.float32) + output = self.layer_norm(output) + output = self.cast(output, self.get_dtype(input_tensor)) + if not self.return_2d: + output = self.reshape(output, (batch_size, max_len, embed_dim)) + return output diff --git a/model_zoo/mass/src/transformer/create_attn_mask.py b/model_zoo/mass/src/transformer/create_attn_mask.py new file mode 100644 index 0000000000..160e7ec3fb --- /dev/null +++ b/model_zoo/mass/src/transformer/create_attn_mask.py @@ -0,0 +1,76 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Create mask matrix for inputs.""" +import numpy as np +import mindspore.common.dtype as mstype +from mindspore import nn +from mindspore.ops import operations as P +from mindspore.common.tensor import Tensor + + +class CreateAttentionMaskFromInputMask(nn.Cell): + """ + Create attention mask according to input mask. + + Args: + config (TransformerConfig): Config class. + + Returns: + Tensor, shape of (N, T, T). + """ + + def __init__(self, config): + super(CreateAttentionMaskFromInputMask, self).__init__() + self.input_mask_from_dataset = config.input_mask_from_dataset + self.input_mask = None + + assert self.input_mask_from_dataset + + self.cast = P.Cast() + self.shape = P.Shape() + self.reshape = P.Reshape() + self.batch_matmul = P.BatchMatMul() + self.multiply = P.Mul() + self.shape = P.Shape() + # mask future positions + ones = np.ones(shape=(config.batch_size, config.seq_length, config.seq_length)) + self.lower_triangle_mask = Tensor(np.tril(ones), dtype=mstype.float32) + + def construct(self, input_mask, mask_future=False): + """ + Construct network. + + Args: + input_mask (Tensor): Tensor mask vectors with shape (N, T). + mask_future (bool): Whether mask future (for decoder training). + + Returns: + Tensor, shape of (N, T, T). + """ + input_shape = self.shape(input_mask) + # Add this for infer as the seq_length will increase. + shape_right = (input_shape[0], 1, input_shape[1]) + shape_left = input_shape + (1,) + + input_mask = self.cast(input_mask, mstype.float32) + mask_left = self.reshape(input_mask, shape_left) + mask_right = self.reshape(input_mask, shape_right) + + attention_mask = self.batch_matmul(mask_left, mask_right) + + if mask_future: + attention_mask = self.multiply(attention_mask, self.lower_triangle_mask) + + return attention_mask diff --git a/model_zoo/mass/src/transformer/decoder.py b/model_zoo/mass/src/transformer/decoder.py new file mode 100644 index 0000000000..3e18dcf25f --- /dev/null +++ b/model_zoo/mass/src/transformer/decoder.py @@ -0,0 +1,221 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Decoder of Transformer.""" +import mindspore.common.dtype as mstype +from mindspore import nn + +from .feed_forward_network import FeedForwardNet +from .self_attention import SelfAttention +from .components import LayerNorm + + +class DecoderCell(nn.Cell): + """ + Decoder cells used in Transformer. + + Args: + attn_embed_dim (int): Dimensions of attention weight, e.g. Q, K, V. + num_attn_heads (int): Attention heads number. + intermediate_size (int): Hidden size in FFN. + attn_dropout_prob (float): Dropout rate in attention layer. Default: 0.1. + initializer_range (float): Initial range. Default: 0.02. + dropout_prob (float): Dropout rate between layers. Default: 0.1. + hidden_act (str): Activation function in FFN. Default: "relu". + compute_type (mstype): Mindspore data type. Default: mstype.float32. + + Returns: + Tensor, output with shape (N, T', D). + """ + + def __init__(self, + attn_embed_dim=768, + num_attn_heads=12, + intermediate_size=3072, + attn_dropout_prob=0.02, + initializer_range=0.02, + dropout_prob=0.1, + hidden_act="relu", + compute_type=mstype.float32): + super(DecoderCell, self).__init__() + self.masked_attn = SelfAttention( + attn_embed_dim=attn_embed_dim, + num_attn_heads=num_attn_heads, + attn_dropout_prob=attn_dropout_prob, + initializer_range=initializer_range, + dropout_prob=dropout_prob, + compute_type=compute_type) + self.enc_dec_attn = SelfAttention( + attn_embed_dim=attn_embed_dim, + num_attn_heads=num_attn_heads, + attn_dropout_prob=attn_dropout_prob, + initializer_range=initializer_range, + dropout_prob=dropout_prob, + compute_type=compute_type) + self.feed_forward_net = FeedForwardNet( + in_channels=attn_embed_dim, + hidden_size=intermediate_size, + out_channels=attn_embed_dim, + hidden_act=hidden_act, + initializer_range=initializer_range, + hidden_dropout_prob=dropout_prob, + compute_type=compute_type) + + def construct(self, queries, attention_mask, encoder_out, enc_attention_mask): + """ + Construct network. + + Args: + queries (Tensor): With shape (N, T', D). + attention_mask (Tensor): With shape (N, T', T'). + encoder_out (Tensor): With shape (N, T, D). + enc_attention_mask (Tensor): With shape (N, T, T). + + Returns: + Tensor, output. + """ + attention_output = self.masked_attn( + queries, queries, queries, + attention_mask + ) + attention_output = self.enc_dec_attn( + attention_output, # (N, T', D) + encoder_out, encoder_out, # (N, T, D) + enc_attention_mask # (N, T, T) + ) + output = self.feed_forward_net(attention_output) + return output + + +class TransformerDecoder(nn.Cell): + """ + Implements of Transformer decoder. + + Args: + attn_embed_dim (int): Dimensions of attention layer. + decoder_layers (int): Decoder layers. + num_attn_heads (int): Attention heads number. + intermediate_size (int): Hidden size of FFN. + attn_dropout_prob (float): Dropout rate in attention. Default: 0.1. + initializer_range (float): Initial range. Default: 0.02. + dropout_prob (float): Dropout rate between layers. Default: 0.1. + hidden_act (str): Non-linear activation function in FFN. Default: "relu". + compute_type (mstype): Mindspore data type. Default: mstype.float32. + + Returns: + Tensor, shape of (N, T', D). + """ + + def __init__(self, + attn_embed_dim, + decoder_layers, + num_attn_heads, + intermediate_size, + attn_dropout_prob=0.1, + initializer_range=0.02, + dropout_prob=0.1, + hidden_act="relu", + compute_type=mstype.float32): + super(TransformerDecoder, self).__init__() + self.num_layers = decoder_layers + self.attn_embed_dim = attn_embed_dim + + self.layer0 = DecoderCell( + attn_embed_dim=attn_embed_dim, + num_attn_heads=num_attn_heads, + intermediate_size=intermediate_size, + attn_dropout_prob=attn_dropout_prob, + initializer_range=initializer_range, + dropout_prob=dropout_prob, + hidden_act=hidden_act, + compute_type=compute_type + ) + self.layer1 = DecoderCell( + attn_embed_dim=attn_embed_dim, + num_attn_heads=num_attn_heads, + intermediate_size=intermediate_size, + attn_dropout_prob=attn_dropout_prob, + initializer_range=initializer_range, + dropout_prob=dropout_prob, + hidden_act=hidden_act, + compute_type=compute_type + ) + self.layer2 = DecoderCell( + attn_embed_dim=attn_embed_dim, + num_attn_heads=num_attn_heads, + intermediate_size=intermediate_size, + attn_dropout_prob=attn_dropout_prob, + initializer_range=initializer_range, + dropout_prob=dropout_prob, + hidden_act=hidden_act, + compute_type=compute_type + ) + self.layer3 = DecoderCell( + attn_embed_dim=attn_embed_dim, + num_attn_heads=num_attn_heads, + intermediate_size=intermediate_size, + attn_dropout_prob=attn_dropout_prob, + initializer_range=initializer_range, + dropout_prob=dropout_prob, + hidden_act=hidden_act, + compute_type=compute_type + ) + self.layer4 = DecoderCell( + attn_embed_dim=attn_embed_dim, + num_attn_heads=num_attn_heads, + intermediate_size=intermediate_size, + attn_dropout_prob=attn_dropout_prob, + initializer_range=initializer_range, + dropout_prob=dropout_prob, + hidden_act=hidden_act, + compute_type=compute_type + ) + self.layer5 = DecoderCell( + attn_embed_dim=attn_embed_dim, + num_attn_heads=num_attn_heads, + intermediate_size=intermediate_size, + attn_dropout_prob=attn_dropout_prob, + initializer_range=initializer_range, + dropout_prob=dropout_prob, + hidden_act=hidden_act, + compute_type=compute_type + ) + + self.layer_preprocess = LayerNorm(in_channels=attn_embed_dim, + return_2d=False) + + def construct(self, input_tensor, attention_mask, encoder_out, enc_attention_mask): + """ + Construct network. + + Args: + input_tensor (Tensor): With shape of (N, T', D). + attention_mask (Tensor): With shape of (N, T', T'). + encoder_out (Tensor): With shape of (N, T, D). + enc_attention_mask (Tensor): With shape of (N, T, T). + + Returns: + Tensor, shape of (N, T', D). + """ + prev_output = input_tensor + prev_output = self.layer0(prev_output, attention_mask, encoder_out, enc_attention_mask) + prev_output = self.layer1(prev_output, attention_mask, encoder_out, enc_attention_mask) + prev_output = self.layer2(prev_output, attention_mask, encoder_out, enc_attention_mask) + prev_output = self.layer3(prev_output, attention_mask, encoder_out, enc_attention_mask) + prev_output = self.layer4(prev_output, attention_mask, encoder_out, enc_attention_mask) + prev_output = self.layer5(prev_output, attention_mask, encoder_out, enc_attention_mask) + + # Add layer norm, and full connection layer. + prev_output = self.layer_preprocess(prev_output) + return prev_output diff --git a/model_zoo/mass/src/transformer/embedding.py b/model_zoo/mass/src/transformer/embedding.py new file mode 100644 index 0000000000..bdce540416 --- /dev/null +++ b/model_zoo/mass/src/transformer/embedding.py @@ -0,0 +1,81 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Embedding.""" +import numpy as np +import mindspore.common.dtype as mstype +from mindspore import nn +from mindspore.ops import operations as P +from mindspore.common.tensor import Tensor +from mindspore.common.parameter import Parameter + + +class EmbeddingLookup(nn.Cell): + """Embeddings lookup table with a fixed dictionary and size.""" + + def __init__(self, + vocab_size, + embed_dim, + use_one_hot_embeddings=False): + """ + Embeddings lookup table with a fixed dictionary and size. + + Args: + vocab_size (int): Size of the dictionary of embeddings. + embed_dim (int): The size of word embedding. + use_one_hot_embeddings (bool): Whether use one-hot embedding. Default: False. + """ + super(EmbeddingLookup, self).__init__() + self.embedding_dim = embed_dim + self.vocab_size = vocab_size + self.use_one_hot_embeddings = use_one_hot_embeddings + + init_weight = np.random.normal(0, embed_dim ** -0.5, size=[vocab_size, embed_dim]) + # 0 is Padding index, thus init it as 0. + init_weight[0, :] = 0 + self.embedding_table = Parameter(Tensor(init_weight), + name='embedding_table') + self.expand = P.ExpandDims() + self.gather = P.GatherV2() + self.one_hot = P.OneHot() + self.on_value = Tensor(1.0, mstype.float32) + self.off_value = Tensor(0.0, mstype.float32) + self.array_mul = P.MatMul() + self.reshape = P.Reshape() + self.get_shape = P.Shape() + + def construct(self, input_ids): + """ + Construct network. + + Args: + input_ids (Tensor): A batch of sentences with shape (N, T). + + Returns: + Tensor, word embeddings with shape (N, T, D) + """ + _shape = self.get_shape(input_ids) # (N, T). + _batch_size = _shape[0] + _max_len = _shape[1] + + flat_ids = self.reshape(input_ids, (_batch_size * _max_len,)) + if self.use_one_hot_embeddings: + one_hot_ids = self.one_hot(flat_ids, self.vocab_size, self.on_value, self.off_value) + output_for_reshape = self.array_mul( + one_hot_ids, self.embedding_table) + else: + output_for_reshape = self.gather(self.embedding_table, flat_ids, 0) + + output = self.reshape(output_for_reshape, (_batch_size, _max_len, self.embedding_dim)) + return output, self.embedding_table diff --git a/model_zoo/mass/src/transformer/encoder.py b/model_zoo/mass/src/transformer/encoder.py new file mode 100644 index 0000000000..35a112a2c3 --- /dev/null +++ b/model_zoo/mass/src/transformer/encoder.py @@ -0,0 +1,179 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Encoder of Transformer.""" +import mindspore.common.dtype as mstype +from mindspore import nn + +from .feed_forward_network import FeedForwardNet +from .self_attention import SelfAttention +from .components import LayerNorm + + +class EncoderCell(nn.Cell): + """ + Single Encoder layer. + + Layer structure is as below: + -> pre_LayerNorm + -> Multi-head Self-Attention + -> Dropout & Add + -> pre_LayerNorm + -> Fc1 + -> Activation Function + -> Dropout + -> Fc2 + -> Dropout & Add + + Args: + attn_embed_dim (int): Dimensions of attention weights. + num_attn_heads (int): Heads number. + intermediate_size (int): Hidden size in FFN. + attention_dropout_prob (float): Dropout rate in attention layer. + initializer_range (float): Initial range. + hidden_dropout_prob (float): Dropout rate in FFN. + hidden_act (str): Activation function in FFN. + compute_type (mstype): Mindspore data type. + + Returns: + Tensor, shape of (N, T, D). + """ + + def __init__(self, + attn_embed_dim=768, + num_attn_heads=12, + intermediate_size=3072, + attention_dropout_prob=0.02, + initializer_range=0.02, + hidden_dropout_prob=0.1, + hidden_act="relu", + compute_type=mstype.float32): + super(EncoderCell, self).__init__() + self.attention = SelfAttention( + attn_embed_dim=attn_embed_dim, + num_attn_heads=num_attn_heads, + attn_dropout_prob=attention_dropout_prob, + initializer_range=initializer_range, + dropout_prob=hidden_dropout_prob, + compute_type=compute_type) + self.feed_forward_net = FeedForwardNet( + in_channels=attn_embed_dim, + hidden_size=intermediate_size, + out_channels=attn_embed_dim, + hidden_act=hidden_act, + initializer_range=initializer_range, + hidden_dropout_prob=hidden_dropout_prob, + dropout=hidden_dropout_prob, + compute_type=compute_type) + + def construct(self, queries, attention_mask): + """ + Construct network. + + Args: + queries (Tensor): Shape (N, T, D). + attention_mask (Tensor): Shape (N, T, T'). + + Returns: + Tensor, shape (N, T, D). + """ + attention_output = self.attention(queries, queries, queries, + attention_mask) # (N, T, D) + output = self.feed_forward_net(attention_output) # (N, T, D) + return output + + +class TransformerEncoder(nn.Cell): + """ + Implements of Transformer encoder. + + According to Google Tensor2Tensor lib experience, they found that + put layer norm behind the multi-head self-attention and ffn would + make model more robust. + + Thus, we take the same action. + + Encoder layer structure is as below: + -> pre_LayerNorm + -> Multi-head Self-Attention + -> Dropout & Add + -> pre_LayerNorm + -> Fc1 + -> Activation Function + -> Dropout + -> Fc2 + -> Dropout & Add + + Args: + attn_embed_dim (int): Dimensions of attention weights. + encoder_layers (int): Encoder layers. + num_attn_heads (int): Heads number. + intermediate_size (int): Hidden size in FFN. + attention_dropout_prob (float): Dropout rate in attention. + initializer_range (float): Initial range. + hidden_dropout_prob (float): Dropout rate in FFN. + hidden_act (str): Activation function. + compute_type (mstype): Mindspore data type. + + Returns: + Tensor, shape of (N, T, D). + """ + + def __init__(self, + attn_embed_dim, + encoder_layers, + num_attn_heads=12, + intermediate_size=3072, + attention_dropout_prob=0.1, + initializer_range=0.02, + hidden_dropout_prob=0.1, + hidden_act="relu", + compute_type=mstype.float32): + super(TransformerEncoder, self).__init__() + self.num_layers = encoder_layers + + layers = [] + for _ in range(encoder_layers): + layer = EncoderCell( + attn_embed_dim=attn_embed_dim, + num_attn_heads=num_attn_heads, + intermediate_size=intermediate_size, + attention_dropout_prob=attention_dropout_prob, + initializer_range=initializer_range, + hidden_dropout_prob=hidden_dropout_prob, + hidden_act=hidden_act, + compute_type=compute_type + ) + layers.append(layer) + + self.layers = nn.CellList(layers) + self.layer_norm = LayerNorm(in_channels=attn_embed_dim) + + def construct(self, input_tensor, attention_mask): + """ + Construct network. + + Args: + input_tensor (Tensor): Shape (N, T, D). + attention_mask (Tensor): Shape (N, T, T). + + Returns: + Tensor, shape (N, T, D). + """ + prev_output = input_tensor + for layer_module in self.layers: + prev_output = layer_module(prev_output, + attention_mask) # (N, T, D) + prev_output = self.layer_norm(prev_output) # (N, T, D) + return prev_output diff --git a/model_zoo/mass/src/transformer/feed_forward_network.py b/model_zoo/mass/src/transformer/feed_forward_network.py new file mode 100644 index 0000000000..ca42e6a3e7 --- /dev/null +++ b/model_zoo/mass/src/transformer/feed_forward_network.py @@ -0,0 +1,92 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Forward network with two fc layers.""" +import mindspore.common.dtype as mstype +from mindspore import nn +from mindspore.common.initializer import TruncatedNormal +from mindspore.ops import operations as P + +from .residual_conn import ResidualConnection +from .components import LayerNorm + + +class FeedForwardNet(nn.Cell): + """ + Feed Forward Network (contain 2 fc layers). + + Args: + in_channels (int): Dimensions of input matrix. + hidden_size (int): Hidden size. + out_channels (int): Dimensions of output matrix. + hidden_act (str): Activation function. + initializer_range (float): Initialization value of TruncatedNormal. Default: 0.02. + hidden_dropout_prob (float): The dropout probability for hidden outputs. Default: 0.1. + dropout (float): Dropout in residual block. Default: 0.1. + compute_type (mstype): Compute type in FeedForward. Default: mstype.float32. + + Returns: + Tensor, shape of (N, T, D). + """ + + def __init__(self, + in_channels, + hidden_size, + out_channels, + hidden_act="relu", + initializer_range=0.02, + hidden_dropout_prob=0.1, + dropout=None, + compute_type=mstype.float32): + super(FeedForwardNet, self).__init__() + + self.fc1 = nn.Dense(in_channels, + hidden_size, + activation=hidden_act, + weight_init=TruncatedNormal(initializer_range)).to_float(compute_type) + self.fc2 = nn.Dense(hidden_size, + out_channels, + weight_init=TruncatedNormal(initializer_range)).to_float(compute_type) + + self.layer_norm = LayerNorm(in_channels=in_channels, + return_2d=True) + self.residual = ResidualConnection( + dropout_prob=hidden_dropout_prob if dropout is None else dropout + ) + self.get_shape = P.Shape() + self.reshape = P.Reshape() + self.dropout = nn.Dropout(keep_prob=1 - hidden_dropout_prob) + + def construct(self, input_tensor): + """ + Construct network. + + Args: + input_tensor (Tensor): Shape (N, T, D). + + Returns: + Tensor, (N, T, D). + """ + shape = self.get_shape(input_tensor) + batch_size = shape[0] + max_len = shape[1] + embed_dim = shape[2] + + output = self.layer_norm(input_tensor) + output = self.fc1(output) + output = self.dropout(output) + output = self.fc2(output) # (-1, D) + output = self.residual(self.reshape(output, (batch_size, max_len, embed_dim)), + input_tensor) # (N, T, D) + return output diff --git a/model_zoo/mass/src/transformer/grad_clip.py b/model_zoo/mass/src/transformer/grad_clip.py new file mode 100644 index 0000000000..33a169967e --- /dev/null +++ b/model_zoo/mass/src/transformer/grad_clip.py @@ -0,0 +1,67 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Gradient clip.""" +import mindspore.nn as nn +from mindspore.ops import operations as P +from mindspore.ops import functional as F +from mindspore.ops import composite as C + +GRADIENT_CLIP_TYPE = 1 +GRADIENT_CLIP_VALUE = 8.0 + + +class ClipGradients(nn.Cell): + """ + Clip gradients. + + Returns: + List, a list of clipped_grad tuples. + """ + + def __init__(self): + super(ClipGradients, self).__init__() + self.clip_by_norm = nn.ClipByNorm() + self.cast = P.Cast() + self.dtype = P.DType() + + def construct(self, + grads, + clip_type, + clip_value): + """ + Construct gradient clip network. + + Args: + grads (list): List of gradient tuples. + clip_type (Tensor): The way to clip, 'value' or 'norm'. + clip_value (Tensor): Specifies how much to clip. + + Returns: + List, a list of clipped_grad tuples. + """ + if clip_type != 0 and clip_type != 1: # pylint: disable=R1714 + return grads + + new_grads = () + for grad in grads: + dt = self.dtype(grad) + if clip_type == 0: + t = C.clip_by_value(grad, self.cast(F.tuple_to_array((-clip_value,)), dt), + self.cast(F.tuple_to_array((clip_value,)), dt)) + else: + t = self.clip_by_norm(grad, self.cast(F.tuple_to_array((clip_value,)), dt)) + new_grads = new_grads + (t,) + + return new_grads diff --git a/model_zoo/mass/src/transformer/infer_mass.py b/model_zoo/mass/src/transformer/infer_mass.py new file mode 100644 index 0000000000..54a0b4e54f --- /dev/null +++ b/model_zoo/mass/src/transformer/infer_mass.py @@ -0,0 +1,158 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Infer api.""" +import time + +import mindspore.nn as nn +import mindspore.common.dtype as mstype +from mindspore.common.tensor import Tensor +from mindspore.train.model import Model + +from mindspore import context + +from src.dataset import load_dataset +from .transformer_for_infer import TransformerInferModel +from ..utils.load_weights import load_infer_weights + +context.set_context( + mode=context.GRAPH_MODE, + save_graphs=False, + device_target="Ascend", + reserve_class_name_in_scope=False) + + +class TransformerInferCell(nn.Cell): + """ + Encapsulation class of transformer network infer. + + Args: + network (nn.Cell): Transformer model. + + Returns: + Tuple[Tensor, Tensor], predicted_ids and predicted_probs. + """ + + def __init__(self, network): + super(TransformerInferCell, self).__init__(auto_prefix=False) + self.network = network + + def construct(self, + source_ids, + source_mask): + """Defines the computation performed.""" + + predicted_ids, predicted_probs = self.network(source_ids, + source_mask) + + return predicted_ids, predicted_probs + + +def transformer_infer(config, dataset): + """ + Run infer with Transformer. + + Args: + config (TransformerConfig): Config. + dataset (Dataset): Dataset. + + Returns: + List[Dict], prediction, each example has 4 keys, "source", + "target", "prediction" and "prediction_prob". + """ + tfm_model = TransformerInferModel(config=config, use_one_hot_embeddings=False) + tfm_model.init_parameters_data() + + params = tfm_model.trainable_params() + weights = load_infer_weights(config) + + for param in params: + value = param.default_input + name = param.name + if name not in weights: + raise ValueError(f"{name} is not found in weights.") + + with open("weight_after_deal.txt", "a+") as f: + weights_name = name + f.write(weights_name + "\n") + if isinstance(value, Tensor): + print(name, value.asnumpy().shape) + if weights_name in weights: + assert weights_name in weights + param.default_input = Tensor(weights[weights_name], mstype.float32) + else: + raise ValueError(f"{weights_name} is not found in checkpoint.") + else: + raise TypeError(f"Type of {weights_name} is not Tensor.") + + print(" | Load weights successfully.") + tfm_infer = TransformerInferCell(tfm_model) + model = Model(tfm_infer) + + predictions = [] + probs = [] + source_sentences = [] + target_sentences = [] + for batch in dataset.create_dict_iterator(): + source_sentences.append(batch["source_eos_ids"]) + target_sentences.append(batch["target_eos_ids"]) + + source_ids = Tensor(batch["source_eos_ids"], mstype.int32) + source_mask = Tensor(batch["source_eos_mask"], mstype.int32) + + start_time = time.time() + predicted_ids, entire_probs = model.predict(source_ids, source_mask) + print(f" | Batch size: {config.batch_size}, " + f"Time cost: {time.time() - start_time}.") + + predictions.append(predicted_ids.asnumpy()) + probs.append(entire_probs.asnumpy()) + + output = [] + for inputs, ref, batch_out, batch_probs in zip(source_sentences, + target_sentences, + predictions, + probs): + for i in range(config.batch_size): + if batch_out.ndim == 3: + batch_out = batch_out[:, 0] + + example = { + "source": inputs[i].tolist(), + "target": ref[i].tolist(), + "prediction": batch_out[i].tolist(), + "prediction_prob": batch_probs[i].tolist() + } + output.append(example) + + return output + + +def infer(config): + """ + Transformer infer api. + + Args: + config (TransformerConfig): Config. + + Returns: + list, result with + """ + eval_dataset = load_dataset(data_files=config.test_dataset, + batch_size=config.batch_size, + epoch_count=1, + sink_mode=config.dataset_sink_mode, + shuffle=False) if config.test_dataset else None + prediction = transformer_infer(config, eval_dataset) + return prediction diff --git a/model_zoo/mass/src/transformer/multi_head_attention.py b/model_zoo/mass/src/transformer/multi_head_attention.py new file mode 100644 index 0000000000..dbdf1716cf --- /dev/null +++ b/model_zoo/mass/src/transformer/multi_head_attention.py @@ -0,0 +1,226 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Multi-Head Self-Attention block.""" +import math + +import mindspore.common.dtype as mstype +import mindspore.nn as nn +import mindspore.ops.functional as F +from mindspore.common.initializer import TruncatedNormal +from mindspore.common.tensor import Tensor +from mindspore.ops import operations as P +from .components import SaturateCast + + +class MultiHeadAttention(nn.Cell): + """ + Implement of multi-head self-attention. + + In the encoder, the calculation of single-head self-attention is as below. + + Inputs: [x1, x2, x3, x4...] (xi is a word embedding, with shape T*D, Inputs's shape is N*T*D); + Weights: Wq(D*embed_dim), Wk(D*embed_dim), Wv(D*embed_dim); + + Query, key, value are calculated in below formula: + Q = Input * Wq (N*T*embed_dim); + K = Input * Wk (N*T*embed_dim); + V = Input * Wv (N*T*embed_dim); + + Then, attention score is calculated: + A = K * Q.T (qi is doted with each ki, A's shape is N*T*T. + e.g. q1 is doted with k1, k2, k3, k4, + then vector of [a1.1, a1.2, a1.3, a1.4] will be available. + ai,j represent the importance of j-th word embedding to i-th.) + + A^ = Soft-max(A) (Normalize the score, N*T*T). + + Finally, the output of self-attention cell is: + O = A^ * V (N*T*embed_dim, each word embedding was represented with self-attention.) + + Multi-head self-attention is the same with single-head self-attention except that + Wq, Wk, Wv are repeat `head_num` times. + + In our implements, Wq = Wk = Wv = attn_embed_dim // num_attn_heads. + + Args: + src_dim (int): Dimensions of queries. + tgt_dim (int): Dimensions of keys and values. + attn_embed_dim (int): Dimensions of attention weight, e.g. Q, K, V. + num_attn_heads (int): Attention heads number. Default: 1. + query_act (str): Activation function for Q. Default: None. + key_act (str): Activation function for K. Default: None. + value_act (str): Activation function for V. Default: None. + has_attention_mask (bool): Whether has attention mask. Default: True. + attention_dropout_prob (float): Dropout rate in attention. Default: 0.1. + initializer_range (float): Initial range. + do_return_2d_tensor (bool): Whether return 2d matrix. Default: True. + compute_type (mstype): Mindspore data type. Default: mstype.float32. + + Returns: + Tensor, with shape (N, T, D). + """ + + def __init__(self, + src_dim, + tgt_dim, + attn_embed_dim, + num_attn_heads=1, + query_act=None, + key_act=None, + value_act=None, + out_act=None, + has_attention_mask=True, + attention_dropout_prob=0.0, + initializer_range=0.02, + do_return_2d_tensor=True, + compute_type=mstype.float32): + super(MultiHeadAttention, self).__init__() + if attn_embed_dim % num_attn_heads != 0: + raise ValueError(f"The hidden size {attn_embed_dim} is not a multiple of the " + f"number of attention heads {num_attn_heads}") + + self.attn_embed_dim = attn_embed_dim + self.num_attn_heads = num_attn_heads + self.size_per_head = attn_embed_dim // num_attn_heads + self.src_dim = src_dim + self.tgt_dim = tgt_dim + self.has_attention_mask = has_attention_mask + + if attn_embed_dim != self.num_attn_heads * self.size_per_head: + raise ValueError("`attn_embed_dim` must be divided by num_attn_heads.") + + self.scores_mul = Tensor([1.0 / math.sqrt(float(self.size_per_head))], + dtype=compute_type) + self.reshape = P.Reshape() + + self.query_layer = nn.Dense(src_dim, + attn_embed_dim, + activation=query_act, + has_bias=True, + weight_init=TruncatedNormal(initializer_range)).to_float(compute_type) + self.key_layer = nn.Dense(tgt_dim, + attn_embed_dim, + activation=key_act, + has_bias=True, + weight_init=TruncatedNormal(initializer_range)).to_float(compute_type) + self.value_layer = nn.Dense(tgt_dim, + attn_embed_dim, + activation=value_act, + has_bias=True, + weight_init=TruncatedNormal(initializer_range)).to_float(compute_type) + self.out_layer = nn.Dense(attn_embed_dim, + attn_embed_dim, + activation=out_act, + has_bias=True, + weight_init=TruncatedNormal(initializer_range)).to_float(compute_type) + + self.matmul_trans_b = P.BatchMatMul(transpose_b=True) + self.multiply = P.Mul() + self.transpose = P.Transpose() + self.multiply_data = Tensor([-10000.0], dtype=compute_type) + self.matmul = P.BatchMatMul() + + self.softmax = nn.Softmax() + self.dropout = nn.Dropout(1 - attention_dropout_prob) + + if self.has_attention_mask: + self.expand_dims = P.ExpandDims() + self.sub = P.Sub() + self.add = P.TensorAdd() + self.cast = P.Cast() + self.get_dtype = P.DType() + + self.do_return_2d_tensor = do_return_2d_tensor + self.cast_compute_type = SaturateCast(dst_type=compute_type) + self.softmax_cast = P.Cast() + self.get_shape = P.Shape() + self.transpose_orders = (0, 2, 1, 3) + + def construct(self, queries, keys, values, attention_mask): + """ + Construct network. + + For self attention operation, T==T'. + For encoder-decoder-attention, T!=T' + + Args: + queries (Tensor): Input queries, with shape (N, T, D). + keys (Tensor): Input keys, with shape (N, T', D). + values (Tensor): Input values, with shape (N, T', D). + attention_mask (Tensor): Mask matrix, with shape (N, T, T'). + + Returns: + Tensor, with shape (N, T, D). + """ + q_shape = self.get_shape(queries) # (N, T, D) + batch_size = q_shape[0] + src_max_len = q_shape[1] + + k_shape = self.get_shape(keys) # (N, T', D) + tgt_max_len = k_shape[1] + + _src_4d_shape = (batch_size, src_max_len, self.num_attn_heads, self.size_per_head) + _tgt_4d_shape = (batch_size, tgt_max_len, self.num_attn_heads, self.size_per_head) + + queries_2d = self.reshape(queries, (-1, self.src_dim)) + keys_2d = self.reshape(keys, (-1, self.tgt_dim)) + values_2d = self.reshape(values, (-1, self.tgt_dim)) + + query_out = self.query_layer(queries_2d) # (N*T, D)*(D, D) -> (N*T, D) + key_out = self.key_layer(keys_2d) # (N*T, D)*(D, D) -> (N*T, D) + value_out = self.value_layer(values_2d) # (N*T, D)*(D, D) -> (N*T, D) + + query_out = self.multiply(query_out, self.scores_mul) + + query_layer = self.reshape(query_out, _src_4d_shape) + query_layer = self.transpose(query_layer, self.transpose_orders) # (N, h, T, D') + key_layer = self.reshape(key_out, _tgt_4d_shape) + key_layer = self.transpose(key_layer, self.transpose_orders) # (N, h, T', D') + value_layer = self.reshape(value_out, _tgt_4d_shape) + value_layer = self.transpose(value_layer, self.transpose_orders) # (N, h, T', D') + + # (N, h, T, D')*(N, h, D', T') -> (N, h, T, T') + attention_scores = self.matmul_trans_b(query_layer, key_layer) + + if self.has_attention_mask: + attention_mask = self.expand_dims(attention_mask, 1) + multiply_out = self.sub( + self.cast(F.tuple_to_array((1.0,)), self.get_dtype(attention_scores)), + self.cast(attention_mask, self.get_dtype(attention_scores)) + ) # make mask position into 1, unmask position into 0. + adder = self.multiply(multiply_out, self.multiply_data) + adder = self.softmax_cast(adder, mstype.float32) + attention_scores = self.softmax_cast(attention_scores, mstype.float32) + attention_scores = self.add(adder, attention_scores) + + attention_scores = self.softmax_cast(attention_scores, mstype.float32) + attention_prob = self.softmax(attention_scores) + attention_prob = self.softmax_cast(attention_prob, self.get_dtype(key_layer)) + attention_prob = self.dropout(attention_prob) + + # (N, h, T, T')*(N, h, T', D') -> (N, h, T, D') + context_layer = self.matmul(attention_prob, value_layer) + context_layer = self.transpose(context_layer, self.transpose_orders) # (N, T, h, D') + context_layer = self.reshape(context_layer, + (batch_size * src_max_len, self.attn_embed_dim)) # (N*T, D) + + context_layer = self.out_layer(context_layer) + + if not self.do_return_2d_tensor: + context_layer = self.reshape( + context_layer, (batch_size, src_max_len, self.attn_embed_dim) + ) # (N, T, D) + + return context_layer diff --git a/model_zoo/mass/src/transformer/positional_embedding.py b/model_zoo/mass/src/transformer/positional_embedding.py new file mode 100644 index 0000000000..317077aef7 --- /dev/null +++ b/model_zoo/mass/src/transformer/positional_embedding.py @@ -0,0 +1,82 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Positional Embedding.""" +import numpy as np +from mindspore import nn +from mindspore import Tensor +import mindspore.common.dtype as mstype +from mindspore.ops import operations as P + + +def position_encoding(length, depth, + min_timescale=1, + max_timescale=1e4): + """ + Create Tensor of sinusoids of different frequencies. + + Args: + length (int): Length of the Tensor to create, i.e. Number of steps. + depth (int): Dimensions of embedding. + min_timescale (float): Minimum time scale. + max_timescale (float): Maximum time scale. + + Returns: + Tensor of shape (T, D) + """ + depth = depth // 2 + positions = np.arange(length, dtype=np.float32) + log_timescale_increment = (np.log(max_timescale / min_timescale) / (depth - 1)) + inv_timescales = min_timescale * np.exp( + np.arange(depth, dtype=np.float32) * -log_timescale_increment) + scaled_time = np.expand_dims(positions, 1) * np.expand_dims(inv_timescales, 0) + # instead of using SIN and COS interleaved + # it's the same to first use SIN then COS + # as they are applied to the same position + x = np.concatenate([np.sin(scaled_time), np.cos(scaled_time)], axis=1) + return x + + +class PositionalEmbedding(nn.Cell): + """ + Add positional info to word embeddings. + + Args: + embedding_size (int): Size of word embedding. + max_position_embeddings (int): Maximum step in this model. + + Returns: + Tensor, shape of (N, T, D). + """ + + def __init__(self, + embedding_size, + max_position_embeddings=512): + super(PositionalEmbedding, self).__init__() + self.add = P.TensorAdd() + self.expand_dims = P.ExpandDims() + self.position_embedding_table = Tensor( + position_encoding(max_position_embeddings, embedding_size), + mstype.float32 + ) + self.gather = P.GatherV2() + self.get_shape = P.Shape() + + def construct(self, word_embeddings): + input_shape = self.get_shape(word_embeddings) + input_len = input_shape[1] + position_embeddings = self.position_embedding_table[0:input_len:1, ::] + position_embeddings = self.expand_dims(position_embeddings, 0) + output = self.add(word_embeddings, position_embeddings) + return output diff --git a/model_zoo/mass/src/transformer/residual_conn.py b/model_zoo/mass/src/transformer/residual_conn.py new file mode 100644 index 0000000000..9d75a9b0c2 --- /dev/null +++ b/model_zoo/mass/src/transformer/residual_conn.py @@ -0,0 +1,49 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Residual block.""" +import mindspore.nn as nn +from mindspore.ops import operations as P + + +class ResidualConnection(nn.Cell): + """ + Add residual to output. + + Args: + dropout_prob (float): Dropout rate. + + Returns: + Tensor, with same shape of hidden_tensor. + """ + + def __init__(self, dropout_prob=0.1): + super(ResidualConnection, self).__init__() + self.add = P.TensorAdd() + self.dropout = nn.Dropout(1 - dropout_prob) + + def construct(self, hidden_tensor, residual): + """ + Construct network. + + Args: + hidden_tensor (Tensor): Hidden tensor. + residual (Tensor): Input tensor. + + Returns: + Tensor, which has the same shape with hidden_tensor and residual. + """ + output = self.dropout(hidden_tensor) + output = self.add(output, residual) + return output diff --git a/model_zoo/mass/src/transformer/self_attention.py b/model_zoo/mass/src/transformer/self_attention.py new file mode 100644 index 0000000000..5a21c5aaf3 --- /dev/null +++ b/model_zoo/mass/src/transformer/self_attention.py @@ -0,0 +1,86 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Self-Attention block.""" +import mindspore.common.dtype as mstype +from mindspore import nn + +from .multi_head_attention import MultiHeadAttention +from .residual_conn import ResidualConnection +from .components import LayerNorm + + +class SelfAttention(nn.Cell): + """ + Self-Attention. + + Layer norm -> Multi-Head Self-Attention -> Add & Dropout. + + Args: + attn_embed_dim (int): Dimensions of attention weight, e.g. Q, K, V. + num_attn_heads (int): Attention heads number. Default: 1. + attn_dropout_prob (float): Dropout rate in attention. Default: 0.1. + initializer_range (float): Initial range. + dropout_prob (float): Dropout rate. + has_attention_mask (bool): Whether has attention mask. + compute_type (mstype): Mindspore data type. Default: mstype.float32. + + Returns: + Tensor, shape (N, T, D). + """ + + def __init__(self, + attn_embed_dim, + num_attn_heads, + attn_dropout_prob=0.1, + initializer_range=0.02, + dropout_prob=0.1, + has_attention_mask=True, + compute_type=mstype.float32): + super(SelfAttention, self).__init__() + self.multi_head_self_attention = MultiHeadAttention( + src_dim=attn_embed_dim, + tgt_dim=attn_embed_dim, + attn_embed_dim=attn_embed_dim, + num_attn_heads=num_attn_heads, + attention_dropout_prob=attn_dropout_prob, + initializer_range=initializer_range, + has_attention_mask=has_attention_mask, + do_return_2d_tensor=False, + compute_type=compute_type) + + self.layer_norm = LayerNorm(in_channels=attn_embed_dim) + self.residual = ResidualConnection(dropout_prob=dropout_prob) + + def construct(self, queries, keys, values, attention_mask): + """ + Construct self-attention block. + + Layer norm -> Multi-Head Self-Attention -> Add & Dropout. + + Args: + queries (Tensor): Shape (N, T, D). + keys (Tensor): Shape (N, T', D). + values (Tensor): Shape (N, T', D). + attention_mask (Tensor): Shape (N, T, T'). + + Returns: + Tensor, shape (N, T, D). + """ + q = self.layer_norm(queries) # (N, T, D) + attention_output = self.multi_head_self_attention( + q, keys, values, attention_mask + ) # (N, T, D) + q = self.residual(attention_output, queries) + return q diff --git a/model_zoo/mass/src/transformer/transformer.py b/model_zoo/mass/src/transformer/transformer.py new file mode 100644 index 0000000000..97d682f29b --- /dev/null +++ b/model_zoo/mass/src/transformer/transformer.py @@ -0,0 +1,166 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Transformer model addressed by Vaswani et al., 2017.""" +import copy +import math + +from mindspore import nn, Tensor +from mindspore.ops import operations as P +from mindspore.common import dtype as mstype + +from config.config import TransformerConfig + +from .encoder import TransformerEncoder +from .decoder import TransformerDecoder +from .create_attn_mask import CreateAttentionMaskFromInputMask +from .embedding import EmbeddingLookup +from .positional_embedding import PositionalEmbedding +from .components import SaturateCast + + +class Transformer(nn.Cell): + """ + Transformer with encoder and decoder. + + In Transformer, we define T = src_max_len, T' = tgt_max_len. + + Args: + config (TransformerConfig): Model config. + is_training (bool): Whether is training. + use_one_hot_embeddings (bool): Whether use one-hot embedding. + + Returns: + Tuple[Tensor], network outputs. + """ + + def __init__(self, + config: TransformerConfig, + is_training: bool, + use_one_hot_embeddings: bool = False, + use_positional_embedding: bool = True): + super(Transformer, self).__init__() + + self.use_positional_embedding = use_positional_embedding + config = copy.deepcopy(config) + self.is_training = is_training + if not is_training: + config.hidden_dropout_prob = 0.0 + config.attention_dropout_prob = 0.0 + + self.input_mask_from_dataset = config.input_mask_from_dataset + self.batch_size = config.batch_size + self.max_positions = config.seq_length + self.attn_embed_dim = config.hidden_size + self.num_layers = config.num_hidden_layers + self.word_embed_dim = config.hidden_size + + self.last_idx = self.num_layers - 1 + + self.embedding_lookup = EmbeddingLookup( + vocab_size=config.vocab_size, + embed_dim=self.word_embed_dim, + use_one_hot_embeddings=use_one_hot_embeddings) + + if self.use_positional_embedding: + self.positional_embedding = PositionalEmbedding( + embedding_size=self.word_embed_dim, + max_position_embeddings=config.max_position_embeddings) + + self.encoder = TransformerEncoder( + attn_embed_dim=self.attn_embed_dim, + encoder_layers=self.num_layers, + num_attn_heads=config.num_attention_heads, + intermediate_size=config.intermediate_size, + attention_dropout_prob=config.attention_dropout_prob, + initializer_range=config.initializer_range, + hidden_dropout_prob=config.hidden_dropout_prob, + hidden_act=config.hidden_act, + compute_type=config.compute_type) + + self.decoder = TransformerDecoder( + attn_embed_dim=self.attn_embed_dim, + decoder_layers=self.num_layers, + num_attn_heads=config.num_attention_heads, + intermediate_size=config.intermediate_size, + attn_dropout_prob=config.attention_dropout_prob, + initializer_range=config.initializer_range, + dropout_prob=config.hidden_dropout_prob, + hidden_act=config.hidden_act, + compute_type=config.compute_type) + + self.cast = P.Cast() + self.dtype = config.dtype + self.cast_compute_type = SaturateCast(dst_type=config.compute_type) + self.slice = P.StridedSlice() + self.dropout = nn.Dropout(keep_prob=1 - config.hidden_dropout_prob) + + self._create_attention_mask_from_input_mask = CreateAttentionMaskFromInputMask(config) + + self.scale = Tensor([math.sqrt(float(self.word_embed_dim))], + dtype=mstype.float32) + self.multiply = P.Mul() + + def construct(self, source_ids, source_mask, target_ids, target_mask): + """ + Construct network. + + In this method, T = src_max_len, T' = tgt_max_len. + + Args: + source_ids (Tensor): Source sentences with shape (N, T). + source_mask (Tensor): Source sentences padding mask with shape (N, T), + where 0 indicates padding position. + target_ids (Tensor): Target sentences with shape (N, T'). + target_mask (Tensor): Target sentences padding mask with shape (N, T'), + where 0 indicates padding position. + + Returns: + Tuple[Tensor], network outputs. + """ + # Process source sentences. + src_embeddings, embedding_tables = self.embedding_lookup(source_ids) + src_embeddings = self.multiply(src_embeddings, self.scale) + if self.use_positional_embedding: + src_embeddings = self.positional_embedding(src_embeddings) + src_embeddings = self.dropout(src_embeddings) + + # Attention mask with shape (N, T, T). + enc_attention_mask = self._create_attention_mask_from_input_mask(source_mask) + # Transformer encoder. + encoder_output = self.encoder( + self.cast_compute_type(src_embeddings), # (N, T, D). + self.cast_compute_type(enc_attention_mask) # (N, T, T). + ) + + # Process target sentences. + tgt_embeddings, _ = self.embedding_lookup(target_ids) + tgt_embeddings = self.multiply(tgt_embeddings, self.scale) + if self.use_positional_embedding: + tgt_embeddings = self.positional_embedding(tgt_embeddings) + tgt_embeddings = self.dropout(tgt_embeddings) + + # Attention mask with shape (N, T', T'). + tgt_attention_mask = self._create_attention_mask_from_input_mask( + target_mask, True + ) + # Transformer decoder. + decoder_output = self.decoder( + self.cast_compute_type(tgt_embeddings), # (N, T', D) + self.cast_compute_type(tgt_attention_mask), # (N, T', T') + encoder_output, # (N, T, D) + enc_attention_mask # (N, T, T) + ) + + return encoder_output, decoder_output, embedding_tables diff --git a/model_zoo/mass/src/transformer/transformer_for_infer.py b/model_zoo/mass/src/transformer/transformer_for_infer.py new file mode 100644 index 0000000000..8b1a1c4667 --- /dev/null +++ b/model_zoo/mass/src/transformer/transformer_for_infer.py @@ -0,0 +1,331 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Transformer for infer.""" +import math +import copy +import numpy as np +import mindspore.common.dtype as mstype +import mindspore.nn as nn +from mindspore.ops import operations as P +from mindspore.common.tensor import Tensor + +from .beam_search import BeamSearchDecoder, TileBeam +from .embedding import EmbeddingLookup +from .positional_embedding import PositionalEmbedding +from .components import SaturateCast +from .create_attn_mask import CreateAttentionMaskFromInputMask +from .decoder import TransformerDecoder +from .encoder import TransformerEncoder + + +class PredLogProbs(nn.Cell): + """ + Get log probs. + + Args: + batch_size (int): Batch size of input dataset. + seq_length (int): The length of sequences. + width (int): Number of parameters of a layer + compute_type (int): Type of input type. + dtype (int): Type of MindSpore output type. + """ + + def __init__(self, + batch_size, + seq_length, + width, + compute_type=mstype.float32, + dtype=mstype.float32): + super(PredLogProbs, self).__init__() + self.batch_size = batch_size + self.seq_length = seq_length + self.width = width + self.compute_type = compute_type + self.dtype = dtype + + self.reshape = P.Reshape() + self.matmul = P.MatMul(transpose_b=True) + self.log_softmax = nn.LogSoftmax(axis=-1) + self.shape_flat_sequence_tensor = (self.batch_size * self.seq_length, self.width) + self.cast = P.Cast() + + def construct(self, input_tensor, output_weights): + """ + Calculate the log_softmax. + + Inputs: + input_tensor (Tensor): A batch of sentences with shape (N, T). + output_weights (Tensor): A batch of masks with shape (N, T). + + Returns: + Tensor, the prediction probability with shape (N, T'). + """ + input_tensor = self.reshape(input_tensor, self.shape_flat_sequence_tensor) + input_tensor = self.cast(input_tensor, self.compute_type) + output_weights = self.cast(output_weights, self.compute_type) + + logits = self.matmul(input_tensor, output_weights) + logits = self.cast(logits, self.dtype) + + log_probs = self.log_softmax(logits) + return log_probs + + +class TransformerDecoderStep(nn.Cell): + """ + Multi-layer transformer decoder step. + + Args: + config (TransformerConfig): The config of Transformer. + num_hidden_layers (int): The numbers of hidden layers. + attn_embed_dim (int): Dimensions of attention weights. + num_attn_heads=12 (int): Heads number. + seq_length (int): The length of a sequence. + intermediate_size: Hidden size in FFN. + attn_dropout_prob (float): Dropout rate in attention. Default: 0.1. + initializer_range (float): Initial range. + hidden_dropout_prob (float): Dropout rate in FFN. + hidden_act (str): Activation function in FFN. + compute_type (mstype): Mindspore data type. Default: mstype.float32. + embedding_lookup (function): Embeddings lookup operation. Default: None. + positional_embedding (function): Position Embedding operation. Default: None. + projection (function): Function to get log probs. Default: None. + """ + + def __init__(self, + config, + num_hidden_layers, + attn_embed_dim, + num_attn_heads=12, + seq_length=64, + intermediate_size=3072, + attn_dropout_prob=0.1, + initializer_range=0.02, + hidden_dropout_prob=0.1, + hidden_act="relu", + compute_type=mstype.float32, + embedding_lookup=None, + positional_embedding=None, + projection=None): + super(TransformerDecoderStep, self).__init__(auto_prefix=False) + self.embedding_lookup = embedding_lookup + self.positional_embedding = positional_embedding + self.projection = projection + self.seq_length = seq_length + self.decoder = TransformerDecoder( + attn_embed_dim=attn_embed_dim, + num_attn_heads=num_attn_heads, + decoder_layers=num_hidden_layers, + intermediate_size=intermediate_size, + attn_dropout_prob=attn_dropout_prob, + initializer_range=initializer_range, + dropout_prob=hidden_dropout_prob, + hidden_act=hidden_act, + compute_type=compute_type) + + self.ones_like = P.OnesLike() + self.shape = P.Shape() + + self._create_attention_mask_from_input_mask = CreateAttentionMaskFromInputMask(config) + self.expand = P.ExpandDims() + self.multiply = P.Mul() + + ones = np.ones(shape=(seq_length, seq_length)) + self.future_mask = Tensor(np.tril(ones), dtype=mstype.float32) + + self.cast_compute_type = SaturateCast(dst_type=compute_type) + self.scale = Tensor([math.sqrt(float(attn_embed_dim))], dtype=mstype.float32) + + def construct(self, input_ids, enc_states, enc_attention_mask): + """ + Get log probs. + + Args: + input_ids: [batch_size * beam_width, m] + enc_states: [batch_size * beam_width, T, D] + enc_attention_mask: [batch_size * beam_width, T, D] + + Returns: + Tensor, the log_probs. [batch_size * beam_width, 1, Vocabulary_Dimension] + """ + + # process embedding. input_embedding: [batch_size * beam_width, m, D], embedding_tables: [V, D] + input_embedding, embedding_tables = self.embedding_lookup(input_ids) + input_embedding = self.multiply(input_embedding, self.scale) + input_embedding = self.positional_embedding(input_embedding) + input_embedding = self.cast_compute_type(input_embedding) + + input_shape = self.shape(input_ids) + input_len = input_shape[1] + # [m,m] + future_mask = self.future_mask[0:input_len:1, 0:input_len:1] + # [batch_size * beam_width, m] + input_mask = self.ones_like(input_ids) + # [batch_size * beam_width, m, m] + input_mask = self._create_attention_mask_from_input_mask(input_mask) + # [batch_size * beam_width, m, m] + input_mask = self.multiply(input_mask, self.expand(future_mask, 0)) + input_mask = self.cast_compute_type(input_mask) + + # [batch_size * beam_width, m, D] + enc_attention_mask = enc_attention_mask[::, 0:input_len:1, ::] + + # call TransformerDecoder: [batch_size * beam_width, m, D] + decoder_output = self.decoder(input_embedding, input_mask, enc_states, enc_attention_mask) + + # take the last step, [batch_size * beam_width, 1, D] + decoder_output = decoder_output[::, input_len - 1:input_len:1, ::] + + # projection and log_prob + log_probs = self.projection(decoder_output, embedding_tables) + + # [batch_size * beam_width, 1, vocabulary_size] + return log_probs + + +class TransformerInferModel(nn.Cell): + """ + Transformer Infer. + + Args: + config (TransformerConfig): The config of Transformer. + use_one_hot_embeddings (bool): Specifies whether to use one hot encoding form. Default: False. + """ + + def __init__(self, + config, + use_one_hot_embeddings=False): + super(TransformerInferModel, self).__init__() + config = copy.deepcopy(config) + config.hidden_dropout_prob = 0.0 + config.attention_dropout_prob = 0.0 + + self.input_mask_from_dataset = config.input_mask_from_dataset + self.batch_size = config.batch_size + self.seq_length = config.seq_length + self.hidden_size = config.hidden_size + self.num_hidden_layers = config.num_hidden_layers + self.embedding_size = config.hidden_size + self.attn_embed_dim = config.hidden_size + self.num_layers = config.num_hidden_layers + self.last_idx = self.num_hidden_layers - 1 + + self.embedding_lookup = EmbeddingLookup( + vocab_size=config.vocab_size, + embed_dim=self.embedding_size, + use_one_hot_embeddings=use_one_hot_embeddings) + + self.positional_embedding = PositionalEmbedding( + embedding_size=self.embedding_size, + max_position_embeddings=config.max_position_embeddings) + # use for infer + self.projection = PredLogProbs( + batch_size=config.batch_size * config.beam_width, + seq_length=1, + width=self.hidden_size, + compute_type=config.compute_type) + + self.encoder = TransformerEncoder( + attn_embed_dim=self.attn_embed_dim, + encoder_layers=self.num_layers, + num_attn_heads=config.num_attention_heads, + intermediate_size=config.intermediate_size, + attention_dropout_prob=config.attention_dropout_prob, + initializer_range=config.initializer_range, + hidden_dropout_prob=config.hidden_dropout_prob, + hidden_act=config.hidden_act, + compute_type=config.compute_type) + + decoder_cell = TransformerDecoderStep( + config=config, + num_hidden_layers=config.num_hidden_layers, + attn_embed_dim=self.attn_embed_dim, + seq_length=config.seq_length, + num_attn_heads=config.num_attention_heads, + intermediate_size=config.intermediate_size, + hidden_dropout_prob=config.hidden_dropout_prob, + compute_type=config.compute_type, + initializer_range=config.initializer_range, + hidden_act="relu", + embedding_lookup=self.embedding_lookup, + positional_embedding=self.positional_embedding, + attn_dropout_prob=config.attention_dropout_prob, + projection=self.projection + ) + + # link beam_search after decoder + self.decoder = BeamSearchDecoder( + batch_size=config.batch_size, + seq_length=config.seq_length, + vocab_size=config.vocab_size, + decoder=decoder_cell, + beam_width=config.beam_width, + length_penalty_weight=config.length_penalty_weight, + max_decode_length=config.max_decode_length) + + self.decoder.add_flags(loop_can_unroll=True) + + self.cast = P.Cast() + self.dtype = config.dtype + self.cast_compute_type = SaturateCast(dst_type=config.compute_type) + self.expand = P.ExpandDims() + self.multiply = P.Mul() + + self._create_attention_mask_from_input_mask = CreateAttentionMaskFromInputMask(config) + + # use for infer + self.tile_beam = TileBeam(beam_width=config.beam_width) + ones = np.ones(shape=(config.batch_size, config.max_decode_length)) + self.encode_mask = Tensor(ones, dtype=mstype.float32) + + self.scale = Tensor([math.sqrt(float(self.embedding_size))], + dtype=mstype.float32) + self.reshape = P.Reshape() + + def construct(self, source_ids, source_mask, target_ids=None, target_mask=None): + """ + Process source sentence + + Inputs: + source_ids (Tensor): Source sentences with shape (N, T). + source_mask (Tensor): Source sentences padding mask with shape (N, T), + where 0 indicates padding position. + + Returns: + Tensor, Predictions with shape (N, T'). + """ + # word_embeddings + src_embeddings, _ = self.embedding_lookup(source_ids) + src_embeddings = self.multiply(src_embeddings, self.scale) + # position_embeddings + src_embeddings = self.positional_embedding(src_embeddings) + # attention mask, [batch_size, seq_length, seq_length] + enc_attention_mask = self._create_attention_mask_from_input_mask(source_mask) + # encode + encoder_output = self.encoder(self.cast_compute_type(src_embeddings), + self.cast_compute_type(enc_attention_mask)) + + # bean search for encoder output + beam_encoder_output = self.tile_beam(encoder_output) + # [batch_size, T, D] + enc_attention_mask = self.multiply( + enc_attention_mask[::, 0:1:1, ::], + self.expand(self.encode_mask, -1)) + # [N*batch_size, T, D] + beam_enc_attention_mask = self.tile_beam(enc_attention_mask) + beam_enc_attention_mask = self.cast_compute_type(beam_enc_attention_mask) + predicted_ids, predicted_probs = self.decoder(beam_encoder_output, beam_enc_attention_mask) + predicted_ids = self.reshape(predicted_ids, (self.batch_size, -1)) + return predicted_ids, predicted_probs diff --git a/model_zoo/mass/src/transformer/transformer_for_train.py b/model_zoo/mass/src/transformer/transformer_for_train.py new file mode 100644 index 0000000000..eb75e2d7b9 --- /dev/null +++ b/model_zoo/mass/src/transformer/transformer_for_train.py @@ -0,0 +1,348 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Transformer for training.""" +from mindspore import nn +from mindspore.ops import operations as P +from mindspore.ops import functional as F +from mindspore.ops import composite as C +from mindspore.common.tensor import Tensor +from mindspore.common.parameter import Parameter, ParameterTuple +from mindspore.common import dtype as mstype +from mindspore.nn.wrap.grad_reducer import DistributedGradReducer +from mindspore.train.parallel_utils import ParallelMode +from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_mirror_mean + +from .transformer import Transformer +from .grad_clip import GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE, ClipGradients + + +class PredLogProbs(nn.Cell): + """ + Get log probs. + + Args: + config (TransformerConfig): The config of Transformer. + + Returns: + Tensor, masked lm output. + """ + + def __init__(self, config): + super(PredLogProbs, self).__init__() + self.width = config.hidden_size + self.reshape = P.Reshape() + + self.matmul = P.MatMul(transpose_b=True) + self.log_softmax = nn.LogSoftmax(axis=-1) + self.shape_flat_sequence_tensor = (config.batch_size * config.seq_length, self.width) + self.cast = P.Cast() + self.compute_type = config.compute_type + self.dtype = config.dtype + self.get_shape = P.Shape() + + def construct(self, input_tensor, output_weights): + """ + Construct network. + + Args: + input_tensor (Tensor): Tensor. + output_weights (Tensor): Tensor. + + Returns: + Tensor, masked lm output. + """ + shape = self.get_shape(input_tensor) + + input_tensor = self.reshape(input_tensor, (shape[0] * shape[1], shape[2])) + input_tensor = self.cast(input_tensor, self.compute_type) + output_weights = self.cast(output_weights, self.compute_type) + + logits = self.matmul(input_tensor, output_weights) + logits = self.cast(logits, self.dtype) + + log_probs = self.log_softmax(logits) + return log_probs + + +class TransformerTraining(nn.Cell): + """ + Transformer training network. + + Args: + config (TransformerConfig): The config of Transformer. + is_training (bool): Specifies whether to use the training mode. + use_one_hot_embeddings (bool): Specifies whether to use one-hot for embeddings. + + Returns: + Tensor, prediction_scores, seq_relationship_score. + """ + + def __init__(self, config, is_training, use_one_hot_embeddings): + super(TransformerTraining, self).__init__() + self.transformer = Transformer(config, is_training, use_one_hot_embeddings) + self.projection = PredLogProbs(config) + + def construct(self, source_ids, source_mask, target_ids, target_mask): + """ + Construct network. + + Args: + source_ids (Tensor): Source sentence. + source_mask (Tensor): Source padding mask. + target_ids (Tensor): Target sentence. + target_mask (Tensor): Target padding mask. + + Returns: + Tensor, prediction_scores, seq_relationship_score. + """ + _, decoder_outputs, embedding_table = \ + self.transformer(source_ids, source_mask, target_ids, target_mask) + prediction_scores = self.projection(decoder_outputs, + embedding_table) + return prediction_scores + + +class LabelSmoothedCrossEntropyCriterion(nn.Cell): + """ + Label Smoothed Cross-Entropy Criterion. + + Args: + config (TransformerConfig): The config of Transformer. + + Returns: + Tensor, final loss. + """ + + def __init__(self, config): + super(LabelSmoothedCrossEntropyCriterion, self).__init__() + self.vocab_size = config.vocab_size + self.onehot = P.OneHot() + self.on_value = Tensor(float(1 - config.label_smoothing), mstype.float32) + self.off_value = Tensor(config.label_smoothing / float(self.vocab_size - 1), mstype.float32) + self.reduce_sum = P.ReduceSum() + self.reduce_mean = P.ReduceMean() + self.reshape = P.Reshape() + self.last_idx = (-1,) + self.flatten = P.Flatten() + self.neg = P.Neg() + self.cast = P.Cast() + self.flat_shape = (config.batch_size * config.seq_length,) + self.get_shape = P.Shape() + + def construct(self, prediction_scores, label_ids, label_weights): + """ + Construct network to calculate loss. + + Args: + prediction_scores (Tensor): Prediction scores. + label_ids (Tensor): Labels. + label_weights (Tensor): Mask tensor. + + Returns: + Tensor, final loss. + """ + label_shape = self.get_shape(label_ids) + + label_ids = self.reshape(label_ids, (label_shape[0] * label_shape[1],)) + label_weights = self.cast( + self.reshape(label_weights, (label_shape[0] * label_shape[1],)), + mstype.float32 + ) + one_hot_labels = self.onehot(label_ids, self.vocab_size, self.on_value, self.off_value) + + per_example_loss = self.neg(self.reduce_sum(prediction_scores * one_hot_labels, self.last_idx)) + numerator = self.reduce_sum(label_weights * per_example_loss, ()) + denominator = self.reduce_sum(label_weights, ()) + self.cast(F.tuple_to_array((1e-5,)), mstype.float32) + loss = numerator / denominator + + return loss + + +class TransformerNetworkWithLoss(nn.Cell): + """ + Provide transformer training loss through network. + + Args: + config (BertConfig): The config of Transformer. + is_training (bool): Specifies whether to use the training mode. + use_one_hot_embeddings (bool): Specifies whether to use one-hot for embeddings. Default: False. + + Returns: + Tensor, the loss of the network. + """ + + def __init__(self, config, is_training, use_one_hot_embeddings=False): + super(TransformerNetworkWithLoss, self).__init__() + self.transformer = TransformerTraining(config, is_training, use_one_hot_embeddings) + self.loss = LabelSmoothedCrossEntropyCriterion(config) + self.cast = P.Cast() + + def construct(self, + source_ids, + source_mask, + target_ids, + target_mask, + label_ids, + label_weights): + prediction_scores = self.transformer(source_ids, source_mask, target_ids, target_mask) + total_loss = self.loss(prediction_scores, label_ids, label_weights) + return self.cast(total_loss, mstype.float32) + + +grad_scale = C.MultitypeFuncGraph("grad_scale") +reciprocal = P.Reciprocal() + + +@grad_scale.register("Tensor", "Tensor") +def tensor_grad_scale(scale, grad): + return grad * F.cast(reciprocal(scale), F.dtype(grad)) + + +class TransformerTrainOneStepWithLossScaleCell(nn.Cell): + """ + Encapsulation class of Transformer network training. + + Append an optimizer to the training network after that the construct + function can be called to create the backward graph. + + Args: + network: Cell. The training network. Note that loss function should have + been added. + optimizer: Optimizer. Optimizer for updating the weights. + + Returns: + Tuple[Tensor, Tensor, Tensor], loss, overflow, sen. + """ + + def __init__(self, network, optimizer, scale_update_cell=None): + + super(TransformerTrainOneStepWithLossScaleCell, self).__init__(auto_prefix=False) + self.network = network + self.network.add_flags(defer_inline=True) + self.weights = ParameterTuple(network.trainable_params()) + self.optimizer = optimizer + self.grad = C.GradOperation('grad', get_by_list=True, + sens_param=True) + self.reducer_flag = False + self.all_reduce = P.AllReduce() + + self.parallel_mode = _get_parallel_mode() + if self.parallel_mode not in ParallelMode.MODE_LIST: + raise ValueError("Parallel mode does not support: ", self.parallel_mode) + if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]: + self.reducer_flag = True + self.grad_reducer = None + if self.reducer_flag: + mean = _get_mirror_mean() + degree = _get_device_num() + self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) + self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE) + self.clip_gradients = ClipGradients() + self.cast = P.Cast() + self.alloc_status = P.NPUAllocFloatStatus() + self.get_status = P.NPUGetFloatStatus() + self.clear_before_grad = P.NPUClearFloatStatus() + self.reduce_sum = P.ReduceSum(keep_dims=False) + self.depend_parameter_use = P.ControlDepend(depend_mode=1) + self.base = Tensor(1, mstype.float32) + self.less_equal = P.LessEqual() + self.hyper_map = C.HyperMap() + + self.loss_scale = None + self.loss_scaling_manager = scale_update_cell + if scale_update_cell: + self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32), + name="loss_scale") + self.add_flags(has_effect=True) + + def construct(self, + source_eos_ids, + source_eos_mask, + target_sos_ids, + target_sos_mask, + target_eos_ids, + target_eos_mask, + sens=None): + """ + Construct network. + + Args: + source_eos_ids (Tensor): Source sentence. + source_eos_mask (Tensor): Source padding mask. + target_sos_ids (Tensor): Target sentence. + target_sos_mask (Tensor): Target padding mask. + target_eos_ids (Tensor): Prediction sentence. + target_eos_mask (Tensor): Prediction padding mask. + sens (Tensor): Loss sen. + + Returns: + Tuple[Tensor, Tensor, Tensor], loss, overflow, sen. + """ + source_ids = source_eos_ids + source_mask = source_eos_mask + target_ids = target_sos_ids + target_mask = target_sos_mask + label_ids = target_eos_ids + label_weights = target_eos_mask + + weights = self.weights + loss = self.network(source_ids, + source_mask, + target_ids, + target_mask, + label_ids, + label_weights) + # Alloc status. + init = self.alloc_status() + # Clear overflow buffer. + self.clear_before_grad(init) + if sens is None: + scaling_sens = self.loss_scale + else: + scaling_sens = sens + grads = self.grad(self.network, weights)(source_ids, + source_mask, + target_ids, + target_mask, + label_ids, + label_weights, + self.cast(scaling_sens, + mstype.float32)) + + grads = self.hyper_map(F.partial(grad_scale, scaling_sens), grads) + grads = self.clip_gradients(grads, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE) + if self.reducer_flag: + # Apply grad reducer on grads. + grads = self.grad_reducer(grads) + self.get_status(init) + flag_sum = self.reduce_sum(init, (0,)) + + if self.is_distributed: + # Sum overflow flag over devices. + flag_reduce = self.all_reduce(flag_sum) + cond = self.less_equal(self.base, flag_reduce) + else: + cond = self.less_equal(self.base, flag_sum) + + overflow = cond + if sens is None: + overflow = self.loss_scaling_manager(self.loss_scale, cond) + if overflow: + succ = False + else: + succ = self.optimizer(grads) + + ret = (loss, cond, scaling_sens) + return F.depend(ret, succ) diff --git a/model_zoo/mass/src/utils/__init__.py b/model_zoo/mass/src/utils/__init__.py new file mode 100644 index 0000000000..f78be57b22 --- /dev/null +++ b/model_zoo/mass/src/utils/__init__.py @@ -0,0 +1,35 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Utils for mass model.""" +from .dictionary import Dictionary +from .ppl_score import ngram_ppl +from .lr_scheduler import square_root_schedule +from .loss_monitor import LossCallBack +from .byte_pair_encoding import bpe_encode +from .initializer import zero_weight, one_weight, normal_weight, weight_variable +from .rouge_score import rouge + +__all__ = [ + "Dictionary", + "rouge", + "bpe_encode", + "ngram_ppl", + "square_root_schedule", + "LossCallBack", + "one_weight", + "zero_weight", + "normal_weight", + "weight_variable" +] diff --git a/model_zoo/mass/src/utils/byte_pair_encoding.py b/model_zoo/mass/src/utils/byte_pair_encoding.py new file mode 100644 index 0000000000..fb0e34a30d --- /dev/null +++ b/model_zoo/mass/src/utils/byte_pair_encoding.py @@ -0,0 +1,52 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""BPE.""" +import os +import subprocess + +ENCODER = "subword-nmt apply-bpe -c {codes} -i {input} -o {output}" +LEARN_DICT = "subword-nmt get-vocab -i {input} -o {dict_path}" + + +def bpe_encode(codes_path, src_path, output_path, dict_path): + """ + Do bpe. + + Args: + codes_path (str): BPE codes file. + src_path (str): Source text file path. + output_path (str): Output path. + dict_path (str): Dict path. + """ + if not (os.path.isabs(codes_path) + and os.path.isabs(src_path) + and os.path.isabs(output_path) + and os.path.isabs(dict_path)): + raise ValueError("Absolute path is required.") + + if not (os.path.exists(os.path.dirname(codes_path)) + and os.path.exists(os.path.dirname(src_path)) + and os.path.exists(os.path.dirname(output_path)) + and os.path.exists(os.path.dirname(dict_path))): + raise FileNotFoundError("Dir not found.") + + # Encoding. + print(f" | Applying BPE encoding.") + subprocess.call(ENCODER.format(codes=codes_path, input=src_path, output=output_path), + shell=True) + print(f" | Fetching vocabulary from single file.") + # Learn vocab. + subprocess.call(LEARN_DICT.format(input=output_path, dict_path=dict_path), + shell=True) diff --git a/model_zoo/mass/src/utils/dictionary.py b/model_zoo/mass/src/utils/dictionary.py new file mode 100644 index 0000000000..5ccfbd4ea2 --- /dev/null +++ b/model_zoo/mass/src/utils/dictionary.py @@ -0,0 +1,276 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Vocabulary.""" +from typing import List +import numpy as np + +CUBE_SIZE = 16 +REPLACE_THRESHOLD = 200 + + +class Dictionary: + """Dictionary for mono lingual dataset.""" + + def __init__(self, max_size=46000, bos="", eos="", unk="", + mask="", padding=""): + self._bos = bos + self._eos = eos + self._unk = unk + self._mask = mask + self._padding = padding + self._symbols = [] + self._frequency = [] + self._mapping = {} + self._init_symbols() + self.is_learning = False + self.max_vocab_size = max_size + + def shrink(self, threshold=50): + """ + Shrink dataset into a small one. + + Args: + threshold (int): Threshold that determinate whether to + drop the word. + + Returns: + Dictionary, a new dict. + """ + _new_dict = Dictionary() + + freq_idx = [(f, i) for i, f in enumerate(self._frequency)] + freq_idx = sorted(freq_idx, key=lambda x: x[0], reverse=True) + + freqs = np.array(self._frequency, dtype=np.int) + filtered_count = np.where(freqs <= threshold)[0].shape[0] + + left_count = self.size - filtered_count + if left_count % CUBE_SIZE != 0: + supplement = CUBE_SIZE - left_count % CUBE_SIZE + if supplement <= filtered_count: + filtered_count -= supplement + + for f, i in freq_idx: + if f <= threshold and filtered_count > 0: + filtered_count -= 1 + continue + _new_dict.add_symbol(self._symbols[i], f) + + return _new_dict + + def set_to_learn(self, learn: bool): + self.is_learning = learn + + def is_empty(self): + if self.size <= 4: + if sum(self._frequency) == 0: + return True + return False + + @property + def symbols(self): + return self._symbols + + @property + def frequency(self): + return self._frequency + + @property + def size(self): + return len(self._symbols) + + @property + def mask(self): + return self._mask + + @property + def eos(self): + return self._eos + + @property + def bos(self): + return self._bos + + @property + def unk(self): + return self._unk + + @property + def padding(self): + return self._padding + + @property + def padding_index(self): + return self._padding_index + + @property + def mask_index(self): + return self._mask_index + + @property + def eos_index(self): + return self._eos_index + + @property + def bos_index(self): + return self._bos_index + + @property + def unk_index(self): + return self._unk_index + + def _init_symbols(self): + self._padding_index = self.add_symbol(self._padding, 0) # 0 + self._bos_index = self.add_symbol(self._bos, 0) # 1 + self._eos_index = self.add_symbol(self._eos, 0) # 2 + self._unk_index = self.add_symbol(self._unk, 0) # 3 + self._mask_index = self.add_symbol(self._mask, 0) # 4 + + def __contains__(self, symbol): + return symbol in self._mapping + + def __getitem__(self, idx): + if 0 <= idx < self.size: + return self._symbols[idx] + return self._unk + + def __len__(self): + return self.size + + def index(self, symbol: str): + """ + Return id according to symbol. + + Args: + symbol (str): Symbol. + + Returns: + int, id. + """ + idx = self._mapping.get(symbol) + if idx is None: + if self.is_learning and symbol.isalpha(): + if self.max_vocab_size <= self.size: + return self.add_symbol(symbol) + + if symbol.lower() in self._mapping: + return self._mapping.get(symbol.lower()) + + idx = self._mapping.get(symbol.lower()) + if idx is not None: + freq = self._frequency[idx] + # If lower symbol in vocabulary and + # its frequency larger than `REPLACE_THRESHOLD`, + # then replace symbol by lower symbol. + if freq >= REPLACE_THRESHOLD: + return idx + return self.unk_index + return idx + + def add_symbol(self, symbol, times=1): + """ + Add symbol to dict. + + Args: + symbol (str): Symbol. + times (int): Frequency. + + Returns: + int, token id. + """ + if symbol in self._mapping: + idx = self._mapping[symbol] + self._frequency[idx] = self._frequency[idx] + times + return idx + + idx = len(self._symbols) + self._mapping[symbol] = idx + self._symbols.append(symbol) + self._frequency.append(times) + return idx + + @classmethod + def load_from_text(cls, filepaths: List[str]): + """ + Load dict from text which is in format of [word, freq]. + + Args: + filepaths (str): Dict list. + + Returns: + Dictionary, dict instance. + """ + _dict = cls() + for filepath in filepaths: + with open(filepath, "r", encoding="utf-8") as f: + for _, line in enumerate(f): + line = line.strip() + if line is None: + continue + try: + word, freq = line.split(" ") + _dict.add_symbol(word, times=int(freq)) + except ValueError: + continue + + return _dict + + @classmethod + def load_from_persisted_dict(cls, filepath): + """ + Load dict from binary file. + + Args: + filepath (str): File path. + + Returns: + Dictionary, dict instance. + """ + import pickle + with open(filepath, "rb") as f: + return pickle.load(f) + + def persistence(self, path): + """Save dict to binary file.""" + import pickle + with open(path, "wb") as _dict: + pickle.dump(self, _dict, protocol=1) + + def merge_dict(self, other, new_dict=False): + """Merge two dict.""" + if other.is_empty(): + return self + + if new_dict: + _dict = Dictionary() + + for s, f in zip(self.symbols, self.frequency): + _dict.add_symbol(s, times=f) + for s, f in zip(other.symbols, other.frequency): + _dict.add_symbol(s, times=f) + return _dict + + for s, f in zip(other.symbols, other.frequency): + self.add_symbol(s, times=f) + + return self + + def export(self, path): + """Save text-like vocabulary.""" + _lines = [] + for token, freq in zip(self._symbols, self._frequency): + _lines.append(f"{token} {freq}") + with open(path, "w") as f: + f.writelines(_lines) diff --git a/model_zoo/mass/src/utils/initializer.py b/model_zoo/mass/src/utils/initializer.py new file mode 100644 index 0000000000..d1b5ba92ba --- /dev/null +++ b/model_zoo/mass/src/utils/initializer.py @@ -0,0 +1,108 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Initializer.""" +import math +import numpy as np + +from mindspore import Tensor + + +def _compute_fans(shape): + """ + Computes the number of input and output units for a weight shape. + + Args: + shape (tuple): Integer shape tuple or TF tensor shape. + + Returns: + tuple, integer scalars (fan_in, fan_out). + """ + if not shape: + fan_in = fan_out = 1 + elif len(shape) == 1: + fan_in = fan_out = shape[0] + elif len(shape) == 2: + fan_in = shape[0] + fan_out = shape[1] + else: + # Assuming convolution kernels (2D, 3D, or more). + # kernel shape: (..., input_depth, depth) + receptive_field_size = 1 + for dim in shape[:-2]: + receptive_field_size *= dim + fan_in = shape[-2] * receptive_field_size + fan_out = shape[-1] * receptive_field_size + return int(fan_in), int(fan_out) + + +def weight_variable(shape): + """ + Generate weight var. + + Args: + shape (tuple): Shape. + + Returns: + Tensor, var. + """ + scale_shape = shape + fan_in, fan_out = _compute_fans(scale_shape) + scale = 1.0 / max(1., (fan_in + fan_out) / 2.) + limit = math.sqrt(3.0 * scale) + values = np.random.uniform(-limit, limit, shape).astype(np.float32) + return Tensor(values) + + +def one_weight(shape): + """ + Generate weight with ones. + + Args: + shape (tuple): Shape. + + Returns: + Tensor, var. + """ + ones = np.ones(shape).astype(np.float32) + return Tensor(ones) + + +def zero_weight(shape): + """ + Generate weight with zeros. + + Args: + shape (tuple): Shape. + + Returns: + Tensor, var. + """ + zeros = np.zeros(shape).astype(np.float32) + return Tensor(zeros) + + +def normal_weight(shape, num_units): + """ + Generate weight with normal dist. + + Args: + shape (tuple): Shape. + num_units (int): Dimension. + + Returns: + Tensor, var. + """ + norm = np.random.normal(0.0, num_units ** -0.5, shape).astype(np.float32) + return Tensor(norm) diff --git a/model_zoo/mass/src/utils/load_weights.py b/model_zoo/mass/src/utils/load_weights.py new file mode 100644 index 0000000000..c5b30fefe6 --- /dev/null +++ b/model_zoo/mass/src/utils/load_weights.py @@ -0,0 +1,52 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Weight loader.""" +import numpy as np +from mindspore.train.serialization import load_checkpoint + + +def load_infer_weights(config): + """ + Load weights from ckpt or npz. + + Args: + config (TransformerConfig): Config. + + Returns: + dict, weights. + """ + model_path = config.existed_ckpt + if model_path.endswith(".npz"): + ms_ckpt = np.load(model_path) + is_npz = True + else: + ms_ckpt = load_checkpoint(model_path) + is_npz = False + weights = {} + with open("variable_after_deal.txt", "a") as f: + for param_name in ms_ckpt: + infer_name = param_name.replace("transformer.transformer.", "") + if not infer_name.startswith("encoder"): + if infer_name.startswith("decoder.layers."): + infer_name = infer_name.replace("decoder.layers.", "decoder.layer") + infer_name = "decoder.decoder." + infer_name + if is_npz: + weights[infer_name] = ms_ckpt[param_name] + else: + weights[infer_name] = ms_ckpt[param_name].data.asnumpy() + f.write(infer_name) + f.write("\n") + f.close() + return weights diff --git a/model_zoo/mass/src/utils/loss_monitor.py b/model_zoo/mass/src/utils/loss_monitor.py new file mode 100644 index 0000000000..80b95c0c12 --- /dev/null +++ b/model_zoo/mass/src/utils/loss_monitor.py @@ -0,0 +1,62 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Loss monitor.""" +import time +from mindspore.train.callback import Callback +from config import TransformerConfig + + +class LossCallBack(Callback): + """ + Monitor the loss in training. + + If the loss is NAN or INF terminating training. + + Note: + If per_print_times is 0 do not print loss. + + Args: + per_print_times (int): Print loss every times. Default: 1. + """ + time_stamp_init = False + time_stamp_first = 0 + + def __init__(self, config: TransformerConfig, per_print_times: int = 1): + super(LossCallBack, self).__init__() + if not isinstance(per_print_times, int) or per_print_times < 0: + raise ValueError("print_step must be int and >= 0.") + self.config = config + self._per_print_times = per_print_times + + if not self.time_stamp_init: + self.time_stamp_first = self._get_ms_timestamp() + self.time_stamp_init = True + + def step_end(self, run_context): + cb_params = run_context.original_args() + file_name = "./loss.log" + with open(file_name, "a+") as f: + time_stamp_current = self._get_ms_timestamp() + f.write("time: {}, epoch: {}, step: {}, outputs are {}.\n".format( + time_stamp_current - self.time_stamp_first, + cb_params.cur_epoch_num, + cb_params.cur_step_num, + str(cb_params.net_outputs) + )) + + @staticmethod + def _get_ms_timestamp(): + t = time.time() + return int(round(t * 1000)) diff --git a/model_zoo/mass/src/utils/lr_scheduler.py b/model_zoo/mass/src/utils/lr_scheduler.py new file mode 100644 index 0000000000..44ef397fdd --- /dev/null +++ b/model_zoo/mass/src/utils/lr_scheduler.py @@ -0,0 +1,107 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Learning scheduler.""" +from math import ceil + +import numpy as np + + +def square_root_schedule(lr, update_num, decay_start_step, + warmup_steps=2000, + min_lr=1e-5): + """ + Decay the LR based on the ISR(inverse square root). + + During warm-up:: + lrs = np.linspace(0, lr, warmup_steps) + + After warm-up: + decay_factor = lr * sqrt(warmup_steps) + lr = decay_factor / sqrt(step) if step >= decay_start_step else lr + + Args: + lr (float): Init learning rate. + update_num (int): Total steps. + decay_start_step (int): Decay begins after `decay_start_step` steps. + warmup_steps (int): Warm up steps. + min_lr (float): Min learning rate. + + Returns: + np.ndarray, learning rate array. + """ + warmup_end_lr = lr + warmup_init_lr = 0 if warmup_steps > 0 else warmup_end_lr + + # If warmup_init_lr > lr, then lr_step is negative. + # Otherwise, it's positive. + lr_step = (warmup_end_lr - warmup_init_lr) / warmup_steps + decay_factor = lr * warmup_steps ** 0.5 + + lrs = np.empty(shape=update_num, dtype=np.float32) + _start_step = 0 + if 0 < warmup_steps < update_num: + lrs[:warmup_steps] = np.linspace(warmup_init_lr, warmup_end_lr, warmup_steps) + _start_step = warmup_steps + + for step in range(_start_step, update_num): + if step < warmup_steps: + _lr = warmup_init_lr + step * lr_step + elif step < decay_start_step: + _lr = lr + else: + _lr = decay_factor * step ** -0.5 + if _lr < min_lr: + _lr = min_lr + lrs[step] = _lr + + return lrs + + +def polynomial_decay_scheduler(lr, min_lr, decay_steps, total_update_num, warmup_steps=1000, power=1.0): + """ + Implements of polynomial decay learning rate scheduler which cycles by default. + + Args: + lr (float): Initial learning rate. + warmup_steps (int): Warmup steps. + decay_steps (int): Decay steps. + total_update_num (int): Total update steps. + min_lr (float): Min learning. + power (float): Power factor. + + Returns: + np.ndarray, learning rate of each step. + """ + lrs = np.zeros(shape=total_update_num, dtype=np.float32) + + if decay_steps <= 0: + raise ValueError("`decay_steps` must larger than 1.") + + _start_step = 0 + if 0 < warmup_steps < total_update_num: + warmup_end_lr = lr + warmup_init_lr = 0 if warmup_steps > 0 else warmup_end_lr + lrs[:warmup_steps] = np.linspace(warmup_init_lr, warmup_end_lr, warmup_steps) + _start_step = warmup_steps + + decay_steps = decay_steps + for step in range(_start_step, total_update_num): + _step = step - _start_step # 2999 + ratio = ceil(_step / decay_steps) # 3 + ratio = 1 if ratio < 1 else ratio + _decay_steps = decay_steps * ratio # 3000 + lrs[step] = (lr - min_lr) * pow(1 - _step / _decay_steps, power) + min_lr + + return lrs diff --git a/model_zoo/mass/src/utils/ppl_score.py b/model_zoo/mass/src/utils/ppl_score.py new file mode 100644 index 0000000000..2e5d6e6642 --- /dev/null +++ b/model_zoo/mass/src/utils/ppl_score.py @@ -0,0 +1,64 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Calculate Perplexity score under N-gram language model.""" +from typing import Union + +import numpy as np + +NINF = -1.0 * 1e9 + + +def ngram_ppl(prob: Union[np.ndarray, list], log_softmax=False, index: float = np.e): + """ + Calculate Perplexity(PPL) score under N-gram language model. + + Please make sure the sum of `prob` is 1. + Otherwise, assign `normalize=True`. + + The number of N is depended by model. + + Args: + prob (Union[list, np.ndarray]): Prediction probability + of the sentence. + log_softmax (bool): If sum of `prob` is not 1, please + set normalize=True. + index (float): Base number of log softmax. + + Returns: + float, ppl score. + """ + eps = 1e-8 + if not isinstance(prob, (np.ndarray, list)): + raise TypeError("`prob` must be type of list or np.ndarray.") + if not isinstance(prob, np.ndarray): + prob = np.array(prob) + if prob.shape[0] == 0: + raise ValueError("`prob` length must greater than 0.") + + p = 1.0 + sen_len = 0 + for t in range(prob.shape[0]): + s = prob[t] + if s <= NINF: + break + if log_softmax: + s = np.power(index, s) + p *= (1 / (s + eps)) + sen_len += 1 + + if sen_len == 0: + return np.inf + + return pow(p, 1 / sen_len) diff --git a/model_zoo/mass/src/utils/preprocess.py b/model_zoo/mass/src/utils/preprocess.py new file mode 100644 index 0000000000..04f7eeaf5c --- /dev/null +++ b/model_zoo/mass/src/utils/preprocess.py @@ -0,0 +1,127 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Create pre-training dataset.""" +import os +from multiprocessing import Pool, cpu_count + +from src.dataset import MonoLingualDataLoader +from src.language_model import LooseMaskedLanguageModel + + +def _create_pre_train(text_file, vocabulary, output_folder_path, + mask_ratio, + mask_all_prob, + min_sen_len, + max_sen_len, + suffix, + dataset_type): + """ + Create pre-training dataset. + + Args: + text_file (str): Text file path. + vocabulary (Dictionary): Vocab instance. + output_folder_path (str): Output folder path. + mask_ratio (float): Mask ratio. + mask_all_prob (float): Mask all ratio. + min_sen_len (int): Minimum sentence length. + max_sen_len (int): Maximum sentence length. + suffix (str): Suffix of output file. + dataset_type (str): Tfrecord or mindrecord. + """ + suffix = suffix if not suffix else "_" + suffix + loader = MonoLingualDataLoader( + src_filepath=text_file, + lang="en", dictionary=vocabulary, + language_model=LooseMaskedLanguageModel(mask_ratio=mask_ratio, mask_all_prob=mask_all_prob), + max_sen_len=max_sen_len, min_sen_len=min_sen_len + ) + src_file_name = os.path.basename(text_file) + if dataset_type.lower() == "tfrecord": + file_name = os.path.join( + output_folder_path, + src_file_name.replace('.txt', f'_len_{max_sen_len}{suffix}.tfrecord') + ) + loader.write_to_tfrecord(path=file_name) + else: + file_name = os.path.join( + output_folder_path, + src_file_name.replace('.txt', f'_len_{max_sen_len}{suffix}.mindrecord') + ) + loader.write_to_mindrecord(path=file_name) + + +def create_pre_training_dataset(folder_path, + output_folder_path, + vocabulary, + prefix, suffix="", + mask_ratio=0.3, + mask_all_prob=None, + min_sen_len=7, + max_sen_len=82, + dataset_type="tfrecord", + cores=2): + """ + Create pre-training dataset. + + Args: + folder_path (str): Text file folder path. + vocabulary (Dictionary): Vocab instance. + output_folder_path (str): Output folder path. + mask_ratio (float): Mask ratio. + mask_all_prob (float): Mask all ratio. + min_sen_len (int): Minimum sentence length. + max_sen_len (int): Maximum sentence length. + prefix (str): Prefix of text file. + suffix (str): Suffix of output file. + dataset_type (str): Tfrecord or mindrecord. + cores (int): Cores to use. + """ + # Second step of data preparation. + # Create mono zh-zh train MindRecord. + if not os.path.exists(output_folder_path): + raise NotADirectoryError(f"`output_folder_path` is not existed.") + if not os.path.isdir(output_folder_path): + raise NotADirectoryError(f"`output_folder_path` must be a dir.") + + data_file = [] + dirs = os.listdir(folder_path) + for file in dirs: + if file.startswith(prefix) and file.endswith(".txt"): + data_file.append(os.path.join(folder_path, file)) + + if not data_file: + raise FileNotFoundError("No available text file found.") + + args_groups = [] + for text_file in data_file: + args_groups.append((text_file, + vocabulary, + output_folder_path, + mask_ratio, + mask_all_prob, + min_sen_len, + max_sen_len, + suffix, + dataset_type)) + + cores = min(cores, cpu_count()) + pool = Pool(cores) + for arg in args_groups: + pool.apply_async(_create_pre_train, args=arg) + pool.close() + pool.join() + + print(f" | Generate Dataset for Pre-training is done.") diff --git a/model_zoo/mass/src/utils/rouge_score.py b/model_zoo/mass/src/utils/rouge_score.py new file mode 100644 index 0000000000..f453b5d2e1 --- /dev/null +++ b/model_zoo/mass/src/utils/rouge_score.py @@ -0,0 +1,60 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Calculate ROUGE score.""" +from typing import List +from rouge import Rouge + +H_PATH = "summaries.txt" +R_PATH = "references.txt" + + +def rouge(hypothesis: List[str], target: List[str]): + """ + Calculate ROUGE score. + + Args: + hypothesis (List[str]): Inference result. + target (List[str]): Reference. + """ + + def cut(s): + idx = s.find("") + if idx != -1: + s = s[:idx] + return s + + if not hypothesis or not target: + raise ValueError(f"`hypothesis` and `target` can not be None.") + + edited_hyp = [] + edited_ref = [] + for h, r in zip(hypothesis, target): + h = cut(h).replace("", "").strip() + r = cut(r).replace("", "").strip() + edited_hyp.append(h + "\n") + edited_ref.append(r + "\n") + + _rouge = Rouge() + scores = _rouge.get_scores(edited_hyp, target, avg=True) + print(" | ROUGE Score:") + print(f" | RG-1(F): {scores['rouge-1']['f'] * 100:8.2f}") + print(f" | RG-2(F): {scores['rouge-2']['f'] * 100:8.2f}") + print(f" | RG-L(F): {scores['rouge-l']['f'] * 100:8.2f}") + + with open(H_PATH, "w") as f: + f.writelines(edited_hyp) + + with open(R_PATH, "w") as f: + f.writelines(edited_ref) diff --git a/model_zoo/mass/tokenize_corpus.py b/model_zoo/mass/tokenize_corpus.py new file mode 100644 index 0000000000..4717cfdd12 --- /dev/null +++ b/model_zoo/mass/tokenize_corpus.py @@ -0,0 +1,97 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Tokenizer.""" +import os +import argparse +from typing import Callable +from multiprocessing import Pool + +parser = argparse.ArgumentParser(description='Corpus tokenizer which text file must end with `.txt`.') +parser.add_argument("--corpus_folder", type=str, default="", required=True, + help="Corpus folder path, if multi-folders are provided, use ',' split folders.") +parser.add_argument("--output_folder", type=str, default="", required=True, + help="Output folder path.") +parser.add_argument("--tokenizer", type=str, default="nltk", required=False, + help="Tokenizer to be used, nltk or jieba, if nltk is not installed fully, " + "use jieba instead.") +parser.add_argument("--pool_size", type=int, default=2, required=False, + help="Processes pool size.") + +TOKENIZER = Callable + + +def create_tokenized_sentences(file_path, tokenized_file): + """ + Create tokenized sentences. + + Args: + file_path (str): Text file. + tokenized_file (str): Output file. + """ + global TOKENIZER + + print(f" | Processing {file_path}.") + tokenized_sen = [] + with open(file_path, "r") as file: + for sen in file: + tokens = TOKENIZER(sen) + tokens = [t for t in tokens if t != " "] + if len(tokens) > 175: + continue + tokenized_sen.append(" ".join(tokens) + "\n") + + with open(tokenized_file, "w") as file: + file.writelines(tokenized_sen) + print(f" | Wrote to {tokenized_file}.") + + +def tokenize(): + """Tokenizer.""" + global TOKENIZER + + args, _ = parser.parse_known_args() + src_folder = args.corpus_folder.split(",") + + try: + from nltk.tokenize import word_tokenize + + TOKENIZER = word_tokenize + except (ImportError, ModuleNotFoundError, LookupError): + try: + import jieba + except Exception as e: + raise e + + print(" | NLTK is not found, use jieba instead.") + TOKENIZER = jieba.cut + + if args.tokenizer == "jieba": + import jieba + TOKENIZER = jieba.cut + + pool = Pool(args.pool_size) + for folder in src_folder: + for file in os.listdir(folder): + if not file.endswith(".txt"): + continue + file_path = os.path.join(folder, file) + out_path = os.path.join(args.output_folder, file.replace(".txt", "_tokenized.txt")) + pool.apply_async(create_tokenized_sentences, (file_path, out_path,)) + pool.close() + pool.join() + + +if __name__ == '__main__': + tokenize() diff --git a/model_zoo/mass/train.py b/model_zoo/mass/train.py new file mode 100644 index 0000000000..05b96ddae3 --- /dev/null +++ b/model_zoo/mass/train.py @@ -0,0 +1,330 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Train api.""" +import os +import argparse +import pickle + +import numpy as np + +import mindspore.common.dtype as mstype +from mindspore.common.tensor import Tensor +from mindspore.nn import Momentum +from mindspore.nn.optim import Adam, Lamb +from mindspore.train.model import Model +from mindspore.train.loss_scale_manager import DynamicLossScaleManager +from mindspore.train.callback import CheckpointConfig, ModelCheckpoint +from mindspore import context, ParallelMode, Parameter +from mindspore.communication import management as MultiAscend +from mindspore.train.serialization import load_checkpoint + +from config import TransformerConfig +from src.dataset import load_dataset +from src.transformer import TransformerNetworkWithLoss, TransformerTrainOneStepWithLossScaleCell +from src.transformer.infer_mass import infer +from src.utils import LossCallBack +from src.utils import one_weight, zero_weight, weight_variable +from src.utils import square_root_schedule +from src.utils.lr_scheduler import polynomial_decay_scheduler + +parser = argparse.ArgumentParser(description='MASS train entry point.') +parser.add_argument("--config", type=str, required=True, help="model config json file path.") + +device_id = os.getenv('DEVICE_ID', None) +if device_id is None: + raise RuntimeError("`DEVICE_ID` can not be None.") + +device_id = int(device_id) +context.set_context( + mode=context.GRAPH_MODE, + device_target="Ascend", + reserve_class_name_in_scope=False, + device_id=device_id) + + +def get_config(config): + config = TransformerConfig.from_json_file(config) + config.compute_type = mstype.float16 + config.dtype = mstype.float32 + return config + + +def _train(model, config: TransformerConfig, + pre_training_dataset=None, fine_tune_dataset=None, test_dataset=None, + callbacks: list = None): + """ + Train model. + + Args: + model (Model): MindSpore model instance. + config (TransformerConfig): Config of mass model. + pre_training_dataset (Dataset): Pre-training dataset. + fine_tune_dataset (Dataset): Fine-tune dataset. + test_dataset (Dataset): Test dataset. + callbacks (list): A list of callbacks. + """ + callbacks = callbacks if callbacks else [] + + if pre_training_dataset is not None: + print(" | Start pre-training job.") + epoch_size = pre_training_dataset.get_repeat_count() + if os.getenv("RANK_SIZE") is not None and int(os.getenv("RANK_SIZE")) > 1: + print(f" | Rank {MultiAscend.get_rank()} Call model train.") + model.train(epoch_size, pre_training_dataset, + callbacks=callbacks, dataset_sink_mode=config.dataset_sink_mode) + # Test the accuracy of the model. + if test_dataset is not None: + print(" | Start test job.") + result = infer(_config) + with open("validation_res_after_pre_training.bin", "wb") as f: + pickle.dump(result, f, 1) + + if fine_tune_dataset is not None: + print(" | Start fine-tuning job.") + epoch_size = fine_tune_dataset.get_repeat_count() + + model.train(epoch_size, fine_tune_dataset, + callbacks=callbacks, dataset_sink_mode=config.dataset_sink_mode) + + # Test the accuracy of the model. + if test_dataset is not None: + print(" | Start test job.") + result = infer(_config) + with open("validation_res_after_pre_training.bin", "wb") as f: + pickle.dump(result, f, 1) + + +def _build_training_pipeline(config: TransformerConfig, + pre_training_dataset=None, + fine_tune_dataset=None, + test_dataset=None): + """ + Build training pipeline. + + Args: + config (TransformerConfig): Config of mass model. + pre_training_dataset (Dataset): Pre-training dataset. + fine_tune_dataset (Dataset): Fine-tune dataset. + test_dataset (Dataset): Test dataset. + """ + net_with_loss = TransformerNetworkWithLoss(config, is_training=True) + + if config.existed_ckpt: + if config.existed_ckpt.endswith(".npz"): + weights = np.load(config.existed_ckpt) + else: + weights = load_checkpoint(config.existed_ckpt) + for param in net_with_loss.trainable_params(): + weights_name = param.name + if weights_name not in weights: + raise ValueError(f"Param {weights_name} is not found in ckpt file.") + + if isinstance(weights[weights_name], Parameter): + param.default_input = weights[weights_name].default_input + elif isinstance(weights[weights_name], Tensor): + param.default_input = Tensor(weights[weights_name].asnumpy(), config.dtype) + elif isinstance(weights[weights_name], np.ndarray): + param.default_input = Tensor(weights[weights_name], config.dtype) + else: + param.default_input = weights[weights_name] + else: + for param in net_with_loss.trainable_params(): + name = param.name + value = param.default_input + if isinstance(value, Tensor): + if name.endswith(".gamma"): + param.default_input = one_weight(value.asnumpy().shape) + elif name.endswith(".beta") or name.endswith(".bias"): + param.default_input = zero_weight(value.asnumpy().shape) + else: + param.default_input = weight_variable(value.asnumpy().shape) + + dataset = pre_training_dataset if pre_training_dataset is not None \ + else fine_tune_dataset + + if dataset is None: + raise ValueError("pre-training dataset or fine-tuning dataset must be provided one.") + + update_steps = dataset.get_repeat_count() * dataset.get_dataset_size() + if config.lr_scheduler == "isr": + lr = Tensor(square_root_schedule(lr=config.lr, + update_num=update_steps, + decay_start_step=config.decay_start_step, + warmup_steps=config.warmup_steps, + min_lr=config.min_lr), dtype=mstype.float32) + elif config.lr_scheduler == "poly": + lr = Tensor(polynomial_decay_scheduler(lr=config.lr, + min_lr=config.min_lr, + decay_steps=config.decay_steps, + total_update_num=update_steps, + warmup_steps=config.warmup_steps, + power=config.poly_lr_scheduler_power), dtype=mstype.float32) + else: + lr = config.lr + + if config.optimizer.lower() == "adam": + optimizer = Adam(net_with_loss.trainable_params(), lr, beta1=0.9, beta2=0.98) + elif config.optimizer.lower() == "lamb": + optimizer = Lamb(net_with_loss.trainable_params(), decay_steps=12000, + start_learning_rate=config.lr, end_learning_rate=config.min_lr, + power=10.0, warmup_steps=config.warmup_steps, weight_decay=0.01, + eps=1e-6) + elif config.optimizer.lower() == "momentum": + optimizer = Momentum(net_with_loss.trainable_params(), lr, momentum=0.9) + else: + raise ValueError(f"optimizer only support `adam` and `momentum` now.") + + # Dynamic loss scale. + scale_manager = DynamicLossScaleManager(init_loss_scale=config.init_loss_scale, + scale_factor=config.loss_scale_factor, + scale_window=config.scale_window) + net_with_grads = TransformerTrainOneStepWithLossScaleCell( + network=net_with_loss, optimizer=optimizer, + scale_update_cell=scale_manager.get_update_cell() + ) + net_with_grads.set_train(True) + model = Model(net_with_grads) + loss_monitor = LossCallBack(config) + ckpt_config = CheckpointConfig(save_checkpoint_steps=config.save_ckpt_steps, + keep_checkpoint_max=config.keep_ckpt_max) + + rank_size = os.getenv('RANK_SIZE') + callbacks = [loss_monitor] + if rank_size is not None and int(rank_size) > 1 and MultiAscend.get_rank() % 8 == 0: + ckpt_callback = ModelCheckpoint( + prefix=config.ckpt_prefix, + directory=os.path.join(config.ckpt_path, 'ckpt_{}'.format(os.getenv('DEVICE_ID'))), + config=ckpt_config) + callbacks.append(ckpt_callback) + + if rank_size is None or int(rank_size) == 1: + ckpt_callback = ModelCheckpoint( + prefix=config.ckpt_prefix, + directory=os.path.join(config.ckpt_path, 'ckpt_{}'.format(os.getenv('DEVICE_ID'))), + config=ckpt_config) + callbacks.append(ckpt_callback) + + print(f" | ALL SET, PREPARE TO TRAIN.") + _train(model=model, config=config, + pre_training_dataset=pre_training_dataset, + fine_tune_dataset=fine_tune_dataset, + test_dataset=test_dataset, + callbacks=callbacks) + + +def _setup_parallel_env(): + context.reset_auto_parallel_context() + MultiAscend.init() + context.set_auto_parallel_context( + parallel_mode=ParallelMode.DATA_PARALLEL, + device_num=MultiAscend.get_group_size(), + parameter_broadcast=True, + mirror_mean=True + ) + + +def train_parallel(config: TransformerConfig): + """ + Train model with multi ascend chips. + + Args: + config (TransformerConfig): Config for MASS model. + """ + _setup_parallel_env() + + print(f" | Starting training on {os.getenv('RANK_SIZE', None)} devices.") + + pre_train_dataset = load_dataset( + data_files=config.pre_train_dataset, + batch_size=config.batch_size, epoch_count=config.epochs, + sink_mode=config.dataset_sink_mode, + sink_step=config.dataset_sink_step, + rank_size=MultiAscend.get_group_size(), + rank_id=MultiAscend.get_rank() + ) if config.pre_train_dataset else None + fine_tune_dataset = load_dataset( + data_files=config.fine_tune_dataset, + batch_size=config.batch_size, epoch_count=config.epochs, + sink_mode=config.dataset_sink_mode, + sink_step=config.dataset_sink_step, + rank_size=MultiAscend.get_group_size(), + rank_id=MultiAscend.get_rank() + ) if config.fine_tune_dataset else None + test_dataset = load_dataset( + data_files=config.test_dataset, + batch_size=config.batch_size, epoch_count=config.epochs, + sink_mode=config.dataset_sink_mode, + sink_step=config.dataset_sink_step, + rank_size=MultiAscend.get_group_size(), + rank_id=MultiAscend.get_rank() + ) if config.test_dataset else None + + _build_training_pipeline(config=config, + pre_training_dataset=pre_train_dataset, + fine_tune_dataset=fine_tune_dataset, + test_dataset=test_dataset) + + +def train_single(config: TransformerConfig): + """ + Train model on single device. + + Args: + config (TransformerConfig): Config for model. + """ + print(" | Starting training on single device.") + pre_train_dataset = load_dataset(data_files=config.pre_train_dataset, + batch_size=config.batch_size, + epoch_count=config.epochs, + sink_mode=config.dataset_sink_mode, + sink_step=config.dataset_sink_step) if config.pre_train_dataset else None + fine_tune_dataset = load_dataset(data_files=config.fine_tune_dataset, + batch_size=config.batch_size, + epoch_count=config.epochs, + sink_mode=config.dataset_sink_mode, + sink_step=config.dataset_sink_step) if config.fine_tune_dataset else None + test_dataset = load_dataset(data_files=config.test_dataset, + batch_size=config.batch_size, + epoch_count=config.epochs, + sink_mode=config.dataset_sink_mode, + sink_step=config.dataset_sink_step) if config.test_dataset else None + + _build_training_pipeline(config=config, + pre_training_dataset=pre_train_dataset, + fine_tune_dataset=fine_tune_dataset, + test_dataset=test_dataset) + + +def _check_args(config): + if not os.path.exists(config): + raise FileNotFoundError("`config` is not existed.") + if not isinstance(config, str): + raise ValueError("`config` must be type of str.") + + +if __name__ == '__main__': + _rank_size = os.getenv('RANK_SIZE') + + args, _ = parser.parse_known_args() + _check_args(args.config) + _config = get_config(args.config) + + np.random.seed(_config.random_seed) + context.set_context(save_graphs=_config.save_graphs) + + if _rank_size is not None and int(_rank_size) > 1: + train_parallel(_config) + else: + train_single(_config) diff --git a/model_zoo/mass/weights_average.py b/model_zoo/mass/weights_average.py new file mode 100644 index 0000000000..911181ba45 --- /dev/null +++ b/model_zoo/mass/weights_average.py @@ -0,0 +1,81 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Weight average.""" +import os +import argparse +import numpy as np +from mindspore.train.serialization import load_checkpoint + +parser = argparse.ArgumentParser(description='transformer') +parser.add_argument("--input_files", type=str, default=None, required=False, + help="Multi ckpt files path.") +parser.add_argument("--input_folder", type=str, default=None, required=False, + help="Ckpt files folder.") +parser.add_argument("--output_file", type=str, default=None, required=True, + help="Output model file path.") + + +def average_me_models(ckpt_list): + """ + Average multi ckpt params. + + Args: + ckpt_list (list): Ckpt paths. + + Returns: + dict, params dict. + """ + avg_model = {} + # load all checkpoint + for ckpt in ckpt_list: + if not ckpt.endswith(".ckpt"): + continue + if not os.path.exists(ckpt): + raise FileNotFoundError(f"Checkpoint file is not existed.") + + print(f" | Loading ckpt from {ckpt}.") + ms_ckpt = load_checkpoint(ckpt) + for param_name in ms_ckpt: + if param_name not in avg_model: + avg_model[param_name] = [] + avg_model[param_name].append(ms_ckpt[param_name].data.asnumpy()) + + for name in avg_model: + avg_model[name] = sum(avg_model[name]) / float(len(ckpt_list)) + + return avg_model + + +def main(): + """Entry point.""" + args, _ = parser.parse_known_args() + + if not args.input_files and not args.input_folder: + raise ValueError("`--input_files` or `--input_folder` must be provided one as least.") + + ckpt_list = [] + if args.input_files: + ckpt_list.extend(args.input_files.split(",")) + + if args.input_folder and os.path.exists(args.input_folder) and os.path.isdir(args.input_folder): + for file in os.listdir(args.input_folder): + ckpt_list.append(os.path.join(args.input_folder, file)) + + avg_weights = average_me_models(ckpt_list) + np.savez(args.output_file, **avg_weights) + + +if __name__ == '__main__': + main() diff --git a/model_zoo/mobilenetv2/src/mobilenetV2.py b/model_zoo/mobilenetv2/src/mobilenetV2.py index df35c5f369..5b1b4cc5ef 100644 --- a/model_zoo/mobilenetv2/src/mobilenetV2.py +++ b/model_zoo/mobilenetv2/src/mobilenetV2.py @@ -267,21 +267,21 @@ class MobileNetV2(nn.Cell): if isinstance(m, (nn.Conv2d, DepthwiseConv)): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.set_parameter_data(Tensor(np.random.normal(0, np.sqrt(2. / n), - m.weight.data.shape()).astype("float32"))) + m.weight.data.shape).astype("float32"))) if m.bias is not None: m.bias.set_parameter_data( - Tensor(np.zeros(m.bias.data.shape(), dtype="float32"))) + Tensor(np.zeros(m.bias.data.shape, dtype="float32"))) elif isinstance(m, nn.BatchNorm2d): m.gamma.set_parameter_data( - Tensor(np.ones(m.gamma.data.shape(), dtype="float32"))) + Tensor(np.ones(m.gamma.data.shape, dtype="float32"))) m.beta.set_parameter_data( - Tensor(np.zeros(m.beta.data.shape(), dtype="float32"))) + Tensor(np.zeros(m.beta.data.shape, dtype="float32"))) elif isinstance(m, nn.Dense): m.weight.set_parameter_data(Tensor(np.random.normal( - 0, 0.01, m.weight.data.shape()).astype("float32"))) + 0, 0.01, m.weight.data.shape).astype("float32"))) if m.bias is not None: m.bias.set_parameter_data( - Tensor(np.zeros(m.bias.data.shape(), dtype="float32"))) + Tensor(np.zeros(m.bias.data.shape, dtype="float32"))) def mobilenet_v2(**kwargs): diff --git a/model_zoo/mobilenetv3/src/mobilenetV3.py b/model_zoo/mobilenetv3/src/mobilenetV3.py index 820e60493f..61b63f9ea1 100644 --- a/model_zoo/mobilenetv3/src/mobilenetV3.py +++ b/model_zoo/mobilenetv3/src/mobilenetV3.py @@ -322,21 +322,21 @@ class MobileNetV3(nn.Cell): if isinstance(m, (nn.Conv2d)): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.set_parameter_data(Tensor(np.random.normal(0, np.sqrt(2. / n), - m.weight.data.shape()).astype("float32"))) + m.weight.data.shape).astype("float32"))) if m.bias is not None: m.bias.set_parameter_data( - Tensor(np.zeros(m.bias.data.shape(), dtype="float32"))) + Tensor(np.zeros(m.bias.data.shape, dtype="float32"))) elif isinstance(m, nn.BatchNorm2d): m.gamma.set_parameter_data( - Tensor(np.ones(m.gamma.data.shape(), dtype="float32"))) + Tensor(np.ones(m.gamma.data.shape, dtype="float32"))) m.beta.set_parameter_data( - Tensor(np.zeros(m.beta.data.shape(), dtype="float32"))) + Tensor(np.zeros(m.beta.data.shape, dtype="float32"))) elif isinstance(m, nn.Dense): m.weight.set_parameter_data(Tensor(np.random.normal( - 0, 0.01, m.weight.data.shape()).astype("float32"))) + 0, 0.01, m.weight.data.shape).astype("float32"))) if m.bias is not None: m.bias.set_parameter_data( - Tensor(np.zeros(m.bias.data.shape(), dtype="float32"))) + Tensor(np.zeros(m.bias.data.shape, dtype="float32"))) def mobilenet_v3(model_name, **kwargs): diff --git a/example/resnet101_imagenet2012/README.md b/model_zoo/resnet101/README.md similarity index 77% rename from example/resnet101_imagenet2012/README.md rename to model_zoo/resnet101/README.md index 6ccaf5f6b6..86744be372 100644 --- a/example/resnet101_imagenet2012/README.md +++ b/model_zoo/resnet101/README.md @@ -20,19 +20,24 @@ This is an example of training ResNet101 with ImageNet dataset in MindSpore. └─validation_preprocess ``` -## Example structure +## Structure ```shell . -├── crossentropy.py # CrossEntropy loss function -├── config.py # parameter configuration -├── dataset.py # data preprocessing -├── eval.py # eval net -├── lr_generator.py # generate learning rate -├── run_distribute_train.sh # launch distributed training(8p) -├── run_infer.sh # launch evaluating -├── run_standalone_train.sh # launch standalone training(1p) -└── train.py # train net +└─resnet101 + ├─README.md + ├─scripts + ├─run_standalone_train.sh # launch standalone training(1p) + ├─run_distribute_train.sh # launch distributed training(8p) + └─run_eval.sh # launch evaluating + ├─src + ├─config.py # parameter configuration + ├─crossentropy.py # CrossEntropy loss function + ├─dataset.py # data preprocessin + ├─lr_generator.py # generate learning rate + ├─resnet101.py # resnet101 backbone + ├─eval.py # eval net + └─train.py # train net ``` ## Parameter configuration @@ -95,7 +100,7 @@ sh run_standalone_train.sh dataset/ilsvrc ./ckpt/pretrained.ckpt #### Result -Training result will be stored in the example path, whose folder name begins with "train" or "train_parallel". You can find checkpoint file together with result like the followings in log. +Training result will be stored in the scripts path, whose folder name begins with "train" or "train_parallel". You can find checkpoint file together with result like the followings in log. ``` @@ -119,14 +124,14 @@ epoch: 70 step: 5004, loss is 1.8717369 ``` # infer -sh run_infer.sh [VALIDATION_DATASET_PATH] [CHECKPOINT_PATH] +sh run_eval.sh [VALIDATION_DATASET_PATH] [CHECKPOINT_PATH] ``` #### Launch ```bash # infer with checkpoint -sh run_infer.sh dataset/validation_preprocess/ train_parallel0/resnet-120_5004.ckpt +sh run_eval.sh dataset/validation_preprocess/ train_parallel0/resnet-120_5004.ckpt ``` @@ -135,7 +140,7 @@ sh run_infer.sh dataset/validation_preprocess/ train_parallel0/resnet-120_5004.c #### Result -Inference result will be stored in the example path, whose folder name is "infer". Under this, you can find result like the followings in log. +Inference result will be stored in the scripts path, whose folder name is "eval". Under this, you can find result like the followings in log. ``` result: {'top_5_accuracy': 0.9429417413572343, 'top_1_accuracy': 0.7853513124199744} ckpt=train_parallel0/resnet-120_5004.ckpt diff --git a/example/resnet101_imagenet2012/eval.py b/model_zoo/resnet101/eval.py similarity index 95% rename from example/resnet101_imagenet2012/eval.py rename to model_zoo/resnet101/eval.py index 88d942866b..73c0289ebd 100755 --- a/example/resnet101_imagenet2012/eval.py +++ b/model_zoo/resnet101/eval.py @@ -19,16 +19,16 @@ import os import argparse import random import numpy as np -from dataset import create_dataset -from config import config from mindspore import context -from mindspore.model_zoo.resnet import resnet101 from mindspore.parallel._auto_parallel_context import auto_parallel_context from mindspore.train.model import Model, ParallelMode from mindspore.train.serialization import load_checkpoint, load_param_into_net import mindspore.dataset.engine as de from mindspore.communication.management import init -from crossentropy import CrossEntropy +from src.resnet101 import resnet101 +from src.dataset import create_dataset +from src.config import config +from src.crossentropy import CrossEntropy random.seed(1) np.random.seed(1) diff --git a/example/resnet101_imagenet2012/run_distribute_train.sh b/model_zoo/resnet101/scripts/run_distribute_train.sh similarity index 96% rename from example/resnet101_imagenet2012/run_distribute_train.sh rename to model_zoo/resnet101/scripts/run_distribute_train.sh index 8f8021202d..65790b88c1 100755 --- a/example/resnet101_imagenet2012/run_distribute_train.sh +++ b/model_zoo/resnet101/scripts/run_distribute_train.sh @@ -67,8 +67,9 @@ do export RANK_ID=$i rm -rf ./train_parallel$i mkdir ./train_parallel$i - cp *.py ./train_parallel$i + cp ../*.py ./train_parallel$i cp *.sh ./train_parallel$i + cp -r ../src ./train_parallel$i cd ./train_parallel$i || exit echo "start training for rank $RANK_ID, device $DEVICE_ID" env > env.log diff --git a/example/resnet101_imagenet2012/run_infer.sh b/model_zoo/resnet101/scripts/run_eval.sh similarity index 87% rename from example/resnet101_imagenet2012/run_infer.sh rename to model_zoo/resnet101/scripts/run_eval.sh index b82427e15f..88f5d364ce 100755 --- a/example/resnet101_imagenet2012/run_infer.sh +++ b/model_zoo/resnet101/scripts/run_eval.sh @@ -16,7 +16,7 @@ if [ $# != 2 ] then - echo "Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH]" + echo "Usage: sh run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH]" exit 1 fi @@ -50,14 +50,15 @@ export DEVICE_ID=0 export RANK_SIZE=$DEVICE_NUM export RANK_ID=0 -if [ -d "infer" ]; +if [ -d "eval" ]; then - rm -rf ./infer + rm -rf ./eval fi -mkdir ./infer -cp *.py ./infer -cp *.sh ./infer -cd ./infer || exit +mkdir ./eval +cp ../*.py ./eval +cp *.sh ./eval +cp -r ../src ./eval +cd ./eval || exit env > env.log echo "start infering for device $DEVICE_ID" python eval.py --do_eval=True --dataset_path=$PATH1 --checkpoint_path=$PATH2 &> log & diff --git a/example/resnet101_imagenet2012/run_standalone_train.sh b/model_zoo/resnet101/scripts/run_standalone_train.sh similarity index 97% rename from example/resnet101_imagenet2012/run_standalone_train.sh rename to model_zoo/resnet101/scripts/run_standalone_train.sh index 7db8b5d7bc..7214d114d5 100755 --- a/example/resnet101_imagenet2012/run_standalone_train.sh +++ b/model_zoo/resnet101/scripts/run_standalone_train.sh @@ -58,8 +58,9 @@ then rm -rf ./train fi mkdir ./train -cp *.py ./train +cp ../*.py ./train cp *.sh ./train +cp -r ../src ./train cd ./train || exit echo "start training for device $DEVICE_ID" env > env.log diff --git a/example/resnet101_imagenet2012/config.py b/model_zoo/resnet101/src/config.py similarity index 100% rename from example/resnet101_imagenet2012/config.py rename to model_zoo/resnet101/src/config.py diff --git a/example/resnet101_imagenet2012/crossentropy.py b/model_zoo/resnet101/src/crossentropy.py similarity index 100% rename from example/resnet101_imagenet2012/crossentropy.py rename to model_zoo/resnet101/src/crossentropy.py diff --git a/example/resnet101_imagenet2012/dataset.py b/model_zoo/resnet101/src/dataset.py similarity index 99% rename from example/resnet101_imagenet2012/dataset.py rename to model_zoo/resnet101/src/dataset.py index 31377cfc12..b2a074a535 100755 --- a/example/resnet101_imagenet2012/dataset.py +++ b/model_zoo/resnet101/src/dataset.py @@ -20,7 +20,7 @@ import mindspore.common.dtype as mstype import mindspore.dataset.engine as de import mindspore.dataset.transforms.vision.c_transforms as C import mindspore.dataset.transforms.c_transforms as C2 -from config import config +from src.config import config def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32): """ diff --git a/example/resnet101_imagenet2012/lr_generator.py b/model_zoo/resnet101/src/lr_generator.py similarity index 100% rename from example/resnet101_imagenet2012/lr_generator.py rename to model_zoo/resnet101/src/lr_generator.py diff --git a/model_zoo/resnet101/src/resnet101.py b/model_zoo/resnet101/src/resnet101.py new file mode 100755 index 0000000000..33f10fd6cb --- /dev/null +++ b/model_zoo/resnet101/src/resnet101.py @@ -0,0 +1,261 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""ResNet101.""" +import numpy as np +import mindspore.nn as nn +from mindspore.ops import operations as P +from mindspore.common.tensor import Tensor + + +def _weight_variable(shape, factor=0.01): + init_value = np.random.randn(*shape).astype(np.float32) * factor + return Tensor(init_value) + + +def _conv3x3(in_channel, out_channel, stride=1): + weight_shape = (out_channel, in_channel, 3, 3) + weight = _weight_variable(weight_shape) + return nn.Conv2d(in_channel, out_channel, + kernel_size=3, stride=stride, padding=0, pad_mode='same', weight_init=weight) + + +def _conv1x1(in_channel, out_channel, stride=1): + weight_shape = (out_channel, in_channel, 1, 1) + weight = _weight_variable(weight_shape) + return nn.Conv2d(in_channel, out_channel, + kernel_size=1, stride=stride, padding=0, pad_mode='same', weight_init=weight) + + +def _conv7x7(in_channel, out_channel, stride=1): + weight_shape = (out_channel, in_channel, 7, 7) + weight = _weight_variable(weight_shape) + return nn.Conv2d(in_channel, out_channel, + kernel_size=7, stride=stride, padding=0, pad_mode='same', weight_init=weight) + + +def _bn(channel): + return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9, + gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1) + + +def _bn_last(channel): + return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9, + gamma_init=0, beta_init=0, moving_mean_init=0, moving_var_init=1) + + +def _fc(in_channel, out_channel): + weight_shape = (out_channel, in_channel) + weight = _weight_variable(weight_shape) + return nn.Dense(in_channel, out_channel, has_bias=True, weight_init=weight, bias_init=0) + + +class ResidualBlock(nn.Cell): + """ + ResNet V1 residual block definition. + + Args: + in_channel (int): Input channel. + out_channel (int): Output channel. + stride (int): Stride size for the first convolutional layer. Default: 1. + + Returns: + Tensor, output tensor. + + Examples: + >>> ResidualBlock(3, 256, stride=2) + """ + expansion = 4 + + def __init__(self, + in_channel, + out_channel, + stride=1): + super(ResidualBlock, self).__init__() + + channel = out_channel // self.expansion + self.conv1 = _conv1x1(in_channel, channel, stride=1) + self.bn1 = _bn(channel) + + self.conv2 = _conv3x3(channel, channel, stride=stride) + self.bn2 = _bn(channel) + + self.conv3 = _conv1x1(channel, out_channel, stride=1) + self.bn3 = _bn_last(out_channel) + + self.relu = nn.ReLU() + + self.down_sample = False + + if stride != 1 or in_channel != out_channel: + self.down_sample = True + self.down_sample_layer = None + + if self.down_sample: + self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride), + _bn(out_channel)]) + self.add = P.TensorAdd() + + def construct(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.down_sample: + identity = self.down_sample_layer(identity) + + out = self.add(out, identity) + out = self.relu(out) + + return out + + +class ResNet(nn.Cell): + """ + ResNet architecture. + + Args: + block (Cell): Block for network. + layer_nums (list): Numbers of block in different layers. + in_channels (list): Input channel in each layer. + out_channels (list): Output channel in each layer. + strides (list): Stride size in each layer. + num_classes (int): The number of classes that the training images are belonging to. + Returns: + Tensor, output tensor. + + Examples: + >>> ResNet(ResidualBlock, + >>> [3, 4, 6, 3], + >>> [64, 256, 512, 1024], + >>> [256, 512, 1024, 2048], + >>> [1, 2, 2, 2], + >>> 10) + """ + + def __init__(self, + block, + layer_nums, + in_channels, + out_channels, + strides, + num_classes): + super(ResNet, self).__init__() + + if not len(layer_nums) == len(in_channels) == len(out_channels) == 4: + raise ValueError("the length of layer_num, in_channels, out_channels list must be 4!") + + self.conv1 = _conv7x7(3, 64, stride=2) + self.bn1 = _bn(64) + self.relu = P.ReLU() + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same") + + self.layer1 = self._make_layer(block, + layer_nums[0], + in_channel=in_channels[0], + out_channel=out_channels[0], + stride=strides[0]) + self.layer2 = self._make_layer(block, + layer_nums[1], + in_channel=in_channels[1], + out_channel=out_channels[1], + stride=strides[1]) + self.layer3 = self._make_layer(block, + layer_nums[2], + in_channel=in_channels[2], + out_channel=out_channels[2], + stride=strides[2]) + self.layer4 = self._make_layer(block, + layer_nums[3], + in_channel=in_channels[3], + out_channel=out_channels[3], + stride=strides[3]) + + self.mean = P.ReduceMean(keep_dims=True) + self.flatten = nn.Flatten() + self.end_point = _fc(out_channels[3], num_classes) + + def _make_layer(self, block, layer_num, in_channel, out_channel, stride): + """ + Make stage network of ResNet. + + Args: + block (Cell): Resnet block. + layer_num (int): Layer number. + in_channel (int): Input channel. + out_channel (int): Output channel. + stride (int): Stride size for the first convolutional layer. + + Returns: + SequentialCell, the output layer. + + Examples: + >>> _make_layer(ResidualBlock, 3, 128, 256, 2) + """ + layers = [] + + resnet_block = block(in_channel, out_channel, stride=stride) + layers.append(resnet_block) + + for _ in range(1, layer_num): + resnet_block = block(out_channel, out_channel, stride=1) + layers.append(resnet_block) + + return nn.SequentialCell(layers) + + def construct(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + c1 = self.maxpool(x) + + c2 = self.layer1(c1) + c3 = self.layer2(c2) + c4 = self.layer3(c3) + c5 = self.layer4(c4) + + out = self.mean(c5, (2, 3)) + out = self.flatten(out) + out = self.end_point(out) + + return out + +def resnet101(class_num=1001): + """ + Get ResNet101 neural network. + + Args: + class_num (int): Class number. + + Returns: + Cell, cell instance of ResNet101 neural network. + + Examples: + >>> net = resnet101(1001) + """ + return ResNet(ResidualBlock, + [3, 4, 23, 3], + [64, 256, 512, 1024], + [256, 512, 1024, 2048], + [1, 2, 2, 2], + class_num) diff --git a/example/resnet101_imagenet2012/train.py b/model_zoo/resnet101/train.py similarity index 93% rename from example/resnet101_imagenet2012/train.py rename to model_zoo/resnet101/train.py index e3d6adb267..1cd3627a11 100755 --- a/example/resnet101_imagenet2012/train.py +++ b/model_zoo/resnet101/train.py @@ -17,12 +17,8 @@ import os import argparse import random import numpy as np -from dataset import create_dataset -from lr_generator import warmup_cosine_annealing_lr -from config import config from mindspore import context from mindspore import Tensor -from mindspore.model_zoo.resnet import resnet101 from mindspore.parallel._auto_parallel_context import auto_parallel_context from mindspore.nn.optim.momentum import Momentum from mindspore.train.model import Model, ParallelMode @@ -33,7 +29,11 @@ import mindspore.dataset.engine as de from mindspore.communication.management import init import mindspore.nn as nn import mindspore.common.initializer as weight_init -from crossentropy import CrossEntropy +from src.resnet101 import resnet101 +from src.dataset import create_dataset +from src.lr_generator import warmup_cosine_annealing_lr +from src.config import config +from src.crossentropy import CrossEntropy random.seed(1) np.random.seed(1) @@ -66,12 +66,12 @@ if __name__ == '__main__': for _, cell in net.cells_and_names(): if isinstance(cell, nn.Conv2d): cell.weight.default_input = weight_init.initializer(weight_init.XavierUniform(), - cell.weight.default_input.shape(), - cell.weight.default_input.dtype()).to_tensor() + cell.weight.default_input.shape, + cell.weight.default_input.dtype).to_tensor() if isinstance(cell, nn.Dense): cell.weight.default_input = weight_init.initializer(weight_init.TruncatedNormal(), - cell.weight.default_input.shape(), - cell.weight.default_input.dtype()).to_tensor() + cell.weight.default_input.shape, + cell.weight.default_input.dtype).to_tensor() if not config.label_smooth: config.label_smooth_factor = 0.0 loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num) diff --git a/model_zoo/ssd/README.md b/model_zoo/ssd/README.md new file mode 100644 index 0000000000..ded107e499 --- /dev/null +++ b/model_zoo/ssd/README.md @@ -0,0 +1,119 @@ +# SSD Example + +## Description + +SSD network based on MobileNetV2, with support for training and evaluation. + +## Requirements + +- Install [MindSpore](https://www.mindspore.cn/install/en). + +- Dataset + + We use coco2017 as training dataset in this example by default, and you can also use your own datasets. + + 1. If coco dataset is used. **Select dataset to coco when run script.** + Install Cython and pycocotool. + + ``` + pip install Cython + + pip install pycocotools + ``` + And change the coco_root and other settings you need in `config.py`. The directory structure is as follows: + + + ``` + . + └─cocodataset + ├─annotations + ├─instance_train2017.json + └─instance_val2017.json + ├─val2017 + └─train2017 + ``` + + 2. If your own dataset is used. **Select dataset to other when run script.** + Organize the dataset infomation into a TXT file, each row in the file is as follows: + + ``` + train2017/0000001.jpg 0,259,401,459,7 35,28,324,201,2 0,30,59,80,2 + ``` + + Each row is an image annotation which split by space, the first column is a relative path of image, the others are box and class infomations of the format [xmin,ymin,xmax,ymax,class]. We read image from an image path joined by the `image_dir`(dataset directory) and the relative path in `anno_path`(the TXT file path), `image_dir` and `anno_path` are setting in `config.py`. + + +## Running the example + +### Training + +To train the model, run `train.py`. If the `mindrecord_dir` is empty, it will generate [mindrecord](https://www.mindspore.cn/tutorial/en/master/use/data_preparation/converting_datasets.html) files by `coco_root`(coco dataset) or `iamge_dir` and `anno_path`(own dataset). **Note if mindrecord_dir isn't empty, it will use mindrecord_dir instead of raw images.** + + +- Stand alone mode + + ``` + python train.py --dataset coco + + ``` + + You can run ```python train.py -h``` to get more information. + + +- Distribute mode + + ``` + sh run_distribute_train.sh 8 500 0.2 coco /data/hccl.json + ``` + + The input parameters are device numbers, epoch size, learning rate, dataset mode and [hccl json configuration file](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html). **It is better to use absolute path.** + +You will get the loss value of each step as following: + +``` +epoch: 1 step: 458, loss is 3.1681802 +epoch time: 228752.4654865265, per step time: 499.4595316299705 +epoch: 2 step: 458, loss is 2.8847265 +epoch time: 38912.93382644653, per step time: 84.96273761232868 +epoch: 3 step: 458, loss is 2.8398118 +epoch time: 38769.184827804565, per step time: 84.64887516987896 +... + +epoch: 498 step: 458, loss is 0.70908034 +epoch time: 38771.079778671265, per step time: 84.65301261718616 +epoch: 499 step: 458, loss is 0.7974688 +epoch time: 38787.413120269775, per step time: 84.68867493508685 +epoch: 500 step: 458, loss is 0.5548882 +epoch time: 39064.8467540741, per step time: 85.29442522723602 +``` + +### Evaluation + +for evaluation , run `eval.py` with `checkpoint_path`. `checkpoint_path` is the path of [checkpoint](https://www.mindspore.cn/tutorial/en/master/use/saving_and_loading_model_parameters.html) file. + +``` +python eval.py --checkpoint_path ssd.ckpt --dataset coco +``` + +You can run ```python eval.py -h``` to get more information. + +You will get the result as following: + +``` +Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.189 +Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.341 +Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.183 +Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.040 +Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.181 +Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.326 +Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.213 +Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.348 +Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.380 +Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.124 +Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.412 +Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.588 + +======================================== + +mAP: 0.18937438355383837 +``` diff --git a/example/ssd_coco2017/eval.py b/model_zoo/ssd/eval.py similarity index 78% rename from example/ssd_coco2017/eval.py rename to model_zoo/ssd/eval.py index d5e0d86b67..9054bf6f24 100644 --- a/example/ssd_coco2017/eval.py +++ b/model_zoo/ssd/eval.py @@ -14,49 +14,51 @@ # ============================================================================ """Evaluation for SSD""" + import os import argparse import time +import numpy as np from mindspore import context, Tensor from mindspore.train.serialization import load_checkpoint, load_param_into_net -from mindspore.model_zoo.ssd import SSD300, ssd_mobilenet_v2 -from dataset import create_ssd_dataset, data_to_mindrecord_byte_image -from config import ConfigSSD -from util import metrics +from src.ssd import SSD300, ssd_mobilenet_v2 +from src.dataset import create_ssd_dataset, data_to_mindrecord_byte_image +from src.config import config +from src.coco_eval import metrics def ssd_eval(dataset_path, ckpt_path): """SSD evaluation.""" - - ds = create_ssd_dataset(dataset_path, batch_size=1, repeat_num=1, is_training=False) - net = SSD300(ssd_mobilenet_v2(), ConfigSSD(), is_training=False) + batch_size = 1 + ds = create_ssd_dataset(dataset_path, batch_size=batch_size, repeat_num=1, is_training=False) + net = SSD300(ssd_mobilenet_v2(), config, is_training=False) print("Load Checkpoint!") param_dict = load_checkpoint(ckpt_path) net.init_parameters_data() load_param_into_net(net, param_dict) net.set_train(False) - i = 1. - total = ds.get_dataset_size() + i = batch_size + total = ds.get_dataset_size() * batch_size start = time.time() pred_data = [] print("\n========================================\n") print("total images num: ", total) print("Processing, please wait a moment.") for data in ds.create_dict_iterator(): + img_id = data['img_id'] img_np = data['image'] image_shape = data['image_shape'] - annotation = data['annotation'] output = net(Tensor(img_np)) for batch_idx in range(img_np.shape[0]): pred_data.append({"boxes": output[0].asnumpy()[batch_idx], "box_scores": output[1].asnumpy()[batch_idx], - "annotation": annotation, - "image_shape": image_shape}) - percent = round(i / total * 100, 2) + "img_id": int(np.squeeze(img_id[batch_idx])), + "image_shape": image_shape[batch_idx]}) + percent = round(i / total * 100., 2) print(f' {str(percent)} [{i}/{total}]', end='\r') - i += 1 + i += batch_size cost_time = int((time.time() - start) * 1000) print(f' 100% [{total}/{total}] cost {cost_time} ms') mAP = metrics(pred_data) @@ -73,22 +75,21 @@ if __name__ == '__main__': context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id) - config = ConfigSSD() prefix = "ssd_eval.mindrecord" - mindrecord_dir = config.MINDRECORD_DIR + mindrecord_dir = config.mindrecord_dir mindrecord_file = os.path.join(mindrecord_dir, prefix + "0") if not os.path.exists(mindrecord_file): if not os.path.isdir(mindrecord_dir): os.makedirs(mindrecord_dir) if args_opt.dataset == "coco": - if os.path.isdir(config.COCO_ROOT): + if os.path.isdir(config.coco_root): print("Create Mindrecord.") data_to_mindrecord_byte_image("coco", False, prefix) print("Create Mindrecord Done, at {}".format(mindrecord_dir)) else: - print("COCO_ROOT not exits.") + print("coco_root not exits.") else: - if os.path.isdir(config.IMAGE_DIR) and os.path.exists(config.ANNO_PATH): + if os.path.isdir(config.image_dir) and os.path.exists(config.anno_path): print("Create Mindrecord.") data_to_mindrecord_byte_image("other", False, prefix) print("Create Mindrecord Done, at {}".format(mindrecord_dir)) diff --git a/example/ssd_coco2017/run_distribute_train.sh b/model_zoo/ssd/scripts/run_distribute_train.sh similarity index 74% rename from example/ssd_coco2017/run_distribute_train.sh rename to model_zoo/ssd/scripts/run_distribute_train.sh index bd8519be41..60eccf2c40 100644 --- a/example/ssd_coco2017/run_distribute_train.sh +++ b/model_zoo/ssd/scripts/run_distribute_train.sh @@ -14,60 +14,62 @@ # limitations under the License. # ============================================================================ -echo "=================================================================================================================" +echo "==============================================================================================================" echo "Please run the scipt as: " -echo "sh run_distribute_train.sh DEVICE_NUM EPOCH_SIZE DATASET MINDSPORE_HCCL_CONFIG_PATH PRE_TRAINED PRE_TRAINED_EPOCH_SIZE" -echo "for example: sh run_distribute_train.sh 8 350 coco /data/hccl.json /opt/ssd-300.ckpt(optional) 200(optional)" +echo "sh run_distribute_train.sh DEVICE_NUM EPOCH_SIZE LR DATASET MINDSPORE_HCCL_CONFIG_PATH PRE_TRAINED PRE_TRAINED_EPOCH_SIZE" +echo "for example: sh run_distribute_train.sh 8 500 0.2 coco /data/hccl.json /opt/ssd-300.ckpt(optional) 200(optional)" echo "It is better to use absolute path." -echo "The learning rate is 0.4 as default, if you want other lr, please change the value in this script." echo "=================================================================================================================" -if [ $# != 4 ] && [ $# != 6 ] +if [ $# != 5 ] && [ $# != 7 ] then - echo "Usage: sh run_distribute_train.sh [DEVICE_NUM] [EPOCH_SIZE] [DATASET] \ + echo "Usage: sh run_distribute_train.sh [DEVICE_NUM] [EPOCH_SIZE] [LR] [DATASET] \ [MINDSPORE_HCCL_CONFIG_PATH] [PRE_TRAINED](optional) [PRE_TRAINED_EPOCH_SIZE](optional)" exit 1 fi # Before start distribute train, first create mindrecord files. +BASE_PATH=$(cd "`dirname $0`" || exit; pwd) +cd $BASE_PATH/../ || exit python train.py --only_create_dataset=1 echo "After running the scipt, the network runs in the background. The log will be generated in LOGx/log.txt" export RANK_SIZE=$1 EPOCH_SIZE=$2 -DATASET=$3 -PRE_TRAINED=$5 -PRE_TRAINED_EPOCH_SIZE=$6 -export MINDSPORE_HCCL_CONFIG_PATH=$4 - +LR=$3 +DATASET=$4 +PRE_TRAINED=$6 +PRE_TRAINED_EPOCH_SIZE=$7 +export MINDSPORE_HCCL_CONFIG_PATH=$5 for((i=0;i env.log - if [ $# == 4 ] + if [ $# == 5 ] then - python ../train.py \ + python train.py \ --distribute=1 \ - --lr=0.4 \ + --lr=$LR \ --dataset=$DATASET \ --device_num=$RANK_SIZE \ --device_id=$DEVICE_ID \ --epoch_size=$EPOCH_SIZE > log.txt 2>&1 & fi - if [ $# == 6 ] + if [ $# == 7 ] then - python ../train.py \ + python train.py \ --distribute=1 \ - --lr=0.4 \ + --lr=$LR \ --dataset=$DATASET \ --device_num=$RANK_SIZE \ --device_id=$DEVICE_ID \ diff --git a/model_zoo/ssd/src/__init__.py b/model_zoo/ssd/src/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/model_zoo/ssd/src/box_utils.py b/model_zoo/ssd/src/box_utils.py new file mode 100644 index 0000000000..5e75ab6a4e --- /dev/null +++ b/model_zoo/ssd/src/box_utils.py @@ -0,0 +1,165 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Bbox utils""" + +import math +import itertools as it +import numpy as np +from .config import config + + +class GeneratDefaultBoxes(): + """ + Generate Default boxes for SSD, follows the order of (W, H, archor_sizes). + `self.default_boxes` has a shape of [archor_sizes, H, W, 4], the last dimension is [y, x, h, w]. + `self.default_boxes_ltrb` has a shape as `self.default_boxes`, the last dimension is [y1, x1, y2, x2]. + """ + def __init__(self): + fk = config.img_shape[0] / np.array(config.steps) + scale_rate = (config.max_scale - config.min_scale) / (len(config.num_default) - 1) + scales = [config.min_scale + scale_rate * i for i in range(len(config.num_default))] + [1.0] + self.default_boxes = [] + for idex, feature_size in enumerate(config.feature_size): + sk1 = scales[idex] + sk2 = scales[idex + 1] + sk3 = math.sqrt(sk1 * sk2) + if idex == 0: + w, h = sk1 * math.sqrt(2), sk1 / math.sqrt(2) + all_sizes = [(0.1, 0.1), (w, h), (h, w)] + else: + all_sizes = [(sk1, sk1)] + for aspect_ratio in config.aspect_ratios[idex]: + w, h = sk1 * math.sqrt(aspect_ratio), sk1 / math.sqrt(aspect_ratio) + all_sizes.append((w, h)) + all_sizes.append((h, w)) + all_sizes.append((sk3, sk3)) + + assert len(all_sizes) == config.num_default[idex] + + for i, j in it.product(range(feature_size), repeat=2): + for w, h in all_sizes: + cx, cy = (j + 0.5) / fk[idex], (i + 0.5) / fk[idex] + self.default_boxes.append([cy, cx, h, w]) + + def to_ltrb(cy, cx, h, w): + return cy - h / 2, cx - w / 2, cy + h / 2, cx + w / 2 + + # For IoU calculation + self.default_boxes_ltrb = np.array(tuple(to_ltrb(*i) for i in self.default_boxes), dtype='float32') + self.default_boxes = np.array(self.default_boxes, dtype='float32') + + +default_boxes_ltrb = GeneratDefaultBoxes().default_boxes_ltrb +default_boxes = GeneratDefaultBoxes().default_boxes +y1, x1, y2, x2 = np.split(default_boxes_ltrb[:, :4], 4, axis=-1) +vol_anchors = (x2 - x1) * (y2 - y1) +matching_threshold = config.match_thershold + + +def ssd_bboxes_encode(boxes): + """ + Labels anchors with ground truth inputs. + + Args: + boxex: ground truth with shape [N, 5], for each row, it stores [y, x, h, w, cls]. + + Returns: + gt_loc: location ground truth with shape [num_anchors, 4]. + gt_label: class ground truth with shape [num_anchors, 1]. + num_matched_boxes: number of positives in an image. + """ + + def jaccard_with_anchors(bbox): + """Compute jaccard score a box and the anchors.""" + # Intersection bbox and volume. + ymin = np.maximum(y1, bbox[0]) + xmin = np.maximum(x1, bbox[1]) + ymax = np.minimum(y2, bbox[2]) + xmax = np.minimum(x2, bbox[3]) + w = np.maximum(xmax - xmin, 0.) + h = np.maximum(ymax - ymin, 0.) + + # Volumes. + inter_vol = h * w + union_vol = vol_anchors + (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) - inter_vol + jaccard = inter_vol / union_vol + return np.squeeze(jaccard) + + pre_scores = np.zeros((config.num_ssd_boxes), dtype=np.float32) + t_boxes = np.zeros((config.num_ssd_boxes, 4), dtype=np.float32) + t_label = np.zeros((config.num_ssd_boxes), dtype=np.int64) + for bbox in boxes: + label = int(bbox[4]) + scores = jaccard_with_anchors(bbox) + idx = np.argmax(scores) + scores[idx] = 2.0 + mask = (scores > matching_threshold) + mask = mask & (scores > pre_scores) + pre_scores = np.maximum(pre_scores, scores * mask) + t_label = mask * label + (1 - mask) * t_label + for i in range(4): + t_boxes[:, i] = mask * bbox[i] + (1 - mask) * t_boxes[:, i] + + index = np.nonzero(t_label) + + # Transform to ltrb. + bboxes = np.zeros((config.num_ssd_boxes, 4), dtype=np.float32) + bboxes[:, [0, 1]] = (t_boxes[:, [0, 1]] + t_boxes[:, [2, 3]]) / 2 + bboxes[:, [2, 3]] = t_boxes[:, [2, 3]] - t_boxes[:, [0, 1]] + + # Encode features. + bboxes_t = bboxes[index] + default_boxes_t = default_boxes[index] + bboxes_t[:, :2] = (bboxes_t[:, :2] - default_boxes_t[:, :2]) / (default_boxes_t[:, 2:] * config.prior_scaling[0]) + bboxes_t[:, 2:4] = np.log(bboxes_t[:, 2:4] / default_boxes_t[:, 2:4]) / config.prior_scaling[1] + bboxes[index] = bboxes_t + + num_match = np.array([len(np.nonzero(t_label)[0])], dtype=np.int32) + return bboxes, t_label.astype(np.int32), num_match + + +def ssd_bboxes_decode(boxes): + """Decode predict boxes to [y, x, h, w]""" + boxes_t = boxes.copy() + default_boxes_t = default_boxes.copy() + boxes_t[:, :2] = boxes_t[:, :2] * config.prior_scaling[0] * default_boxes_t[:, 2:] + default_boxes_t[:, :2] + boxes_t[:, 2:4] = np.exp(boxes_t[:, 2:4] * config.prior_scaling[1]) * default_boxes_t[:, 2:4] + + bboxes = np.zeros((len(boxes_t), 4), dtype=np.float32) + + bboxes[:, [0, 1]] = boxes_t[:, [0, 1]] - boxes_t[:, [2, 3]] / 2 + bboxes[:, [2, 3]] = boxes_t[:, [0, 1]] + boxes_t[:, [2, 3]] / 2 + + return np.clip(bboxes, 0, 1) + + +def intersect(box_a, box_b): + """Compute the intersect of two sets of boxes.""" + max_yx = np.minimum(box_a[:, 2:4], box_b[2:4]) + min_yx = np.maximum(box_a[:, :2], box_b[:2]) + inter = np.clip((max_yx - min_yx), a_min=0, a_max=np.inf) + return inter[:, 0] * inter[:, 1] + + +def jaccard_numpy(box_a, box_b): + """Compute the jaccard overlap of two sets of boxes.""" + inter = intersect(box_a, box_b) + area_a = ((box_a[:, 2] - box_a[:, 0]) * + (box_a[:, 3] - box_a[:, 1])) + area_b = ((box_b[2] - box_b[0]) * + (box_b[3] - box_b[1])) + union = area_a + area_b - inter + return inter / union diff --git a/model_zoo/ssd/src/coco_eval.py b/model_zoo/ssd/src/coco_eval.py new file mode 100644 index 0000000000..eb36618089 --- /dev/null +++ b/model_zoo/ssd/src/coco_eval.py @@ -0,0 +1,127 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Coco metrics utils""" + +import os +import json +import numpy as np +from .config import config +from .box_utils import ssd_bboxes_decode + + +def apply_nms(all_boxes, all_scores, thres, max_boxes): + """Apply NMS to bboxes.""" + y1 = all_boxes[:, 0] + x1 = all_boxes[:, 1] + y2 = all_boxes[:, 2] + x2 = all_boxes[:, 3] + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + + order = all_scores.argsort()[::-1] + keep = [] + + while order.size > 0: + i = order[0] + keep.append(i) + + if len(keep) >= max_boxes: + break + + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + + ovr = inter / (areas[i] + areas[order[1:]] - inter) + + inds = np.where(ovr <= thres)[0] + + order = order[inds + 1] + return keep + + +def metrics(pred_data): + """Calculate mAP of predicted bboxes.""" + from pycocotools.coco import COCO + from pycocotools.cocoeval import COCOeval + num_classes = config.num_classes + + coco_root = config.coco_root + data_type = config.val_data_type + + #Classes need to train or test. + val_cls = config.coco_classes + val_cls_dict = {} + for i, cls in enumerate(val_cls): + val_cls_dict[i] = cls + + anno_json = os.path.join(coco_root, config.instances_set.format(data_type)) + coco_gt = COCO(anno_json) + classs_dict = {} + cat_ids = coco_gt.loadCats(coco_gt.getCatIds()) + for cat in cat_ids: + classs_dict[cat["name"]] = cat["id"] + + predictions = [] + img_ids = [] + + for sample in pred_data: + pred_boxes = sample['boxes'] + box_scores = sample['box_scores'] + img_id = sample['img_id'] + h, w = sample['image_shape'] + + pred_boxes = ssd_bboxes_decode(pred_boxes) + final_boxes = [] + final_label = [] + final_score = [] + img_ids.append(img_id) + + for c in range(1, num_classes): + class_box_scores = box_scores[:, c] + score_mask = class_box_scores > config.min_score + class_box_scores = class_box_scores[score_mask] + class_boxes = pred_boxes[score_mask] * [h, w, h, w] + + if score_mask.any(): + nms_index = apply_nms(class_boxes, class_box_scores, config.nms_thershold, config.max_boxes) + class_boxes = class_boxes[nms_index] + class_box_scores = class_box_scores[nms_index] + + final_boxes += class_boxes.tolist() + final_score += class_box_scores.tolist() + final_label += [classs_dict[val_cls_dict[c]]] * len(class_box_scores) + + for loc, label, score in zip(final_boxes, final_label, final_score): + res = {} + res['image_id'] = img_id + res['bbox'] = [loc[1], loc[0], loc[3] - loc[1], loc[2] - loc[0]] + res['score'] = score + res['category_id'] = label + predictions.append(res) + with open('predictions.json', 'w') as f: + json.dump(predictions, f) + + coco_dt = coco_gt.loadRes('predictions.json') + E = COCOeval(coco_gt, coco_dt, iouType='bbox') + E.params.imgIds = img_ids + E.evaluate() + E.accumulate() + E.summarize() + return E.stats[0] diff --git a/model_zoo/ssd/src/config.py b/model_zoo/ssd/src/config.py new file mode 100644 index 0000000000..683b8de31f --- /dev/null +++ b/model_zoo/ssd/src/config.py @@ -0,0 +1,78 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#" ============================================================================ + +"""Config parameters for SSD models.""" + +from easydict import EasyDict as ed + +config = ed({ + "img_shape": [300, 300], + "num_ssd_boxes": 1917, + "neg_pre_positive": 3, + "match_thershold": 0.5, + "nms_thershold": 0.6, + "min_score": 0.1, + "max_boxes": 100, + + # learing rate settings + "global_step": 0, + "lr_init": 0.001, + "lr_end_rate": 0.001, + "warmup_epochs": 2, + "momentum": 0.9, + "weight_decay": 1.5e-4, + + # network + "num_default": [3, 6, 6, 6, 6, 6], + "extras_in_channels": [256, 576, 1280, 512, 256, 256], + "extras_out_channels": [576, 1280, 512, 256, 256, 128], + "extras_srides": [1, 1, 2, 2, 2, 2], + "extras_ratio": [0.2, 0.2, 0.2, 0.25, 0.5, 0.25], + "feature_size": [19, 10, 5, 3, 2, 1], + "min_scale": 0.2, + "max_scale": 0.95, + "aspect_ratios": [(2,), (2, 3), (2, 3), (2, 3), (2, 3), (2, 3)], + "steps": (16, 32, 64, 100, 150, 300), + "prior_scaling": (0.1, 0.2), + "gamma": 2.0, + "alpha": 0.75, + + # `mindrecord_dir` and `coco_root` are better to use absolute path. + "mindrecord_dir": "/data/MindRecord_COCO", + "coco_root": "/data/coco2017", + "train_data_type": "train2017", + "val_data_type": "val2017", + "instances_set": "annotations/instances_{}.json", + "coco_classes": ('background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', + 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', + 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', + 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', + 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', + 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', + 'kite', 'baseball bat', 'baseball glove', 'skateboard', + 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', + 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', + 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', + 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', + 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', + 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', + 'refrigerator', 'book', 'clock', 'vase', 'scissors', + 'teddy bear', 'hair drier', 'toothbrush'), + "num_classes": 81, + + # if coco used, `image_dir` and `anno_path` are useless. + "image_dir": "", + "anno_path": "", +}) diff --git a/model_zoo/ssd/src/dataset.py b/model_zoo/ssd/src/dataset.py new file mode 100644 index 0000000000..19c66fc598 --- /dev/null +++ b/model_zoo/ssd/src/dataset.py @@ -0,0 +1,289 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""SSD dataset""" + +from __future__ import division + +import os +import cv2 +import numpy as np + +import mindspore.dataset as de +import mindspore.dataset.transforms.vision.c_transforms as C +from mindspore.mindrecord import FileWriter +from .config import config +from .box_utils import jaccard_numpy, ssd_bboxes_encode + + +def _rand(a=0., b=1.): + """Generate random.""" + return np.random.rand() * (b - a) + a + + +def random_sample_crop(image, boxes): + """Random Crop the image and boxes""" + height, width, _ = image.shape + min_iou = np.random.choice([None, 0.1, 0.3, 0.5, 0.7, 0.9]) + + if min_iou is None: + return image, boxes + + # max trails (50) + for _ in range(50): + image_t = image + + w = _rand(0.3, 1.0) * width + h = _rand(0.3, 1.0) * height + + # aspect ratio constraint b/t .5 & 2 + if h / w < 0.5 or h / w > 2: + continue + + left = _rand() * (width - w) + top = _rand() * (height - h) + + rect = np.array([int(top), int(left), int(top+h), int(left+w)]) + overlap = jaccard_numpy(boxes, rect) + + # dropout some boxes + drop_mask = overlap > 0 + if not drop_mask.any(): + continue + + if overlap[drop_mask].min() < min_iou: + continue + + image_t = image_t[rect[0]:rect[2], rect[1]:rect[3], :] + + centers = (boxes[:, :2] + boxes[:, 2:4]) / 2.0 + + m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1]) + m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1]) + + # mask in that both m1 and m2 are true + mask = m1 * m2 * drop_mask + + # have any valid boxes? try again if not + if not mask.any(): + continue + + # take only matching gt boxes + boxes_t = boxes[mask, :].copy() + + boxes_t[:, :2] = np.maximum(boxes_t[:, :2], rect[:2]) + boxes_t[:, :2] -= rect[:2] + boxes_t[:, 2:4] = np.minimum(boxes_t[:, 2:4], rect[2:4]) + boxes_t[:, 2:4] -= rect[:2] + + return image_t, boxes_t + return image, boxes + + +def preprocess_fn(img_id, image, box, is_training): + """Preprocess function for dataset.""" + def _infer_data(image, input_shape): + img_h, img_w, _ = image.shape + input_h, input_w = input_shape + + image = cv2.resize(image, (input_w, input_h)) + + #When the channels of image is 1 + if len(image.shape) == 2: + image = np.expand_dims(image, axis=-1) + image = np.concatenate([image, image, image], axis=-1) + + return img_id, image, np.array((img_h, img_w), np.float32) + + def _data_aug(image, box, is_training, image_size=(300, 300)): + """Data augmentation function.""" + ih, iw, _ = image.shape + w, h = image_size + + if not is_training: + return _infer_data(image, image_size) + + # Random crop + box = box.astype(np.float32) + image, box = random_sample_crop(image, box) + ih, iw, _ = image.shape + + # Resize image + image = cv2.resize(image, (w, h)) + + # Flip image or not + flip = _rand() < .5 + if flip: + image = cv2.flip(image, 1, dst=None) + + # When the channels of image is 1 + if len(image.shape) == 2: + image = np.expand_dims(image, axis=-1) + image = np.concatenate([image, image, image], axis=-1) + + box[:, [0, 2]] = box[:, [0, 2]] / ih + box[:, [1, 3]] = box[:, [1, 3]] / iw + + if flip: + box[:, [1, 3]] = 1 - box[:, [3, 1]] + + box, label, num_match = ssd_bboxes_encode(box) + return image, box, label, num_match + return _data_aug(image, box, is_training, image_size=config.img_shape) + + +def create_coco_label(is_training): + """Get image path and annotation from COCO.""" + from pycocotools.coco import COCO + + coco_root = config.coco_root + data_type = config.val_data_type + if is_training: + data_type = config.train_data_type + + #Classes need to train or test. + train_cls = config.coco_classes + train_cls_dict = {} + for i, cls in enumerate(train_cls): + train_cls_dict[cls] = i + + anno_json = os.path.join(coco_root, config.instances_set.format(data_type)) + + coco = COCO(anno_json) + classs_dict = {} + cat_ids = coco.loadCats(coco.getCatIds()) + for cat in cat_ids: + classs_dict[cat["id"]] = cat["name"] + + image_ids = coco.getImgIds() + images = [] + image_path_dict = {} + image_anno_dict = {} + + for img_id in image_ids: + image_info = coco.loadImgs(img_id) + file_name = image_info[0]["file_name"] + anno_ids = coco.getAnnIds(imgIds=img_id, iscrowd=None) + anno = coco.loadAnns(anno_ids) + image_path = os.path.join(coco_root, data_type, file_name) + annos = [] + iscrowd = False + for label in anno: + bbox = label["bbox"] + class_name = classs_dict[label["category_id"]] + iscrowd = iscrowd or label["iscrowd"] + if class_name in train_cls: + x_min, x_max = bbox[0], bbox[0] + bbox[2] + y_min, y_max = bbox[1], bbox[1] + bbox[3] + annos.append(list(map(round, [y_min, x_min, y_max, x_max])) + [train_cls_dict[class_name]]) + + if not is_training and iscrowd: + continue + if len(annos) >= 1: + images.append(img_id) + image_path_dict[img_id] = image_path + image_anno_dict[img_id] = np.array(annos) + + return images, image_path_dict, image_anno_dict + + +def anno_parser(annos_str): + """Parse annotation from string to list.""" + annos = [] + for anno_str in annos_str: + anno = list(map(int, anno_str.strip().split(','))) + annos.append(anno) + return annos + + +def filter_valid_data(image_dir, anno_path): + """Filter valid image file, which both in image_dir and anno_path.""" + images = [] + image_path_dict = {} + image_anno_dict = {} + if not os.path.isdir(image_dir): + raise RuntimeError("Path given is not valid.") + if not os.path.isfile(anno_path): + raise RuntimeError("Annotation file is not valid.") + + with open(anno_path, "rb") as f: + lines = f.readlines() + for img_id, line in enumerate(lines): + line_str = line.decode("utf-8").strip() + line_split = str(line_str).split(' ') + file_name = line_split[0] + image_path = os.path.join(image_dir, file_name) + if os.path.isfile(image_path): + images.append(img_id) + image_path_dict[img_id] = image_path + image_anno_dict[img_id] = anno_parser(line_split[1:]) + + return images, image_path_dict, image_anno_dict + + +def data_to_mindrecord_byte_image(dataset="coco", is_training=True, prefix="ssd.mindrecord", file_num=8): + """Create MindRecord file.""" + mindrecord_dir = config.mindrecord_dir + mindrecord_path = os.path.join(mindrecord_dir, prefix) + writer = FileWriter(mindrecord_path, file_num) + if dataset == "coco": + images, image_path_dict, image_anno_dict = create_coco_label(is_training) + else: + images, image_path_dict, image_anno_dict = filter_valid_data(config.image_dir, config.anno_path) + + ssd_json = { + "img_id": {"type": "int32", "shape": [1]}, + "image": {"type": "bytes"}, + "annotation": {"type": "int32", "shape": [-1, 5]}, + } + writer.add_schema(ssd_json, "ssd_json") + + for img_id in images: + image_path = image_path_dict[img_id] + with open(image_path, 'rb') as f: + img = f.read() + annos = np.array(image_anno_dict[img_id], dtype=np.int32) + img_id = np.array([img_id], dtype=np.int32) + row = {"img_id": img_id, "image": img, "annotation": annos} + writer.write_raw_data([row]) + writer.commit() + + +def create_ssd_dataset(mindrecord_file, batch_size=32, repeat_num=10, device_num=1, rank=0, + is_training=True, num_parallel_workers=4): + """Creatr SSD dataset with MindDataset.""" + ds = de.MindDataset(mindrecord_file, columns_list=["img_id", "image", "annotation"], num_shards=device_num, + shard_id=rank, num_parallel_workers=num_parallel_workers, shuffle=is_training) + decode = C.Decode() + ds = ds.map(input_columns=["image"], operations=decode) + change_swap_op = C.HWC2CHW() + normalize_op = C.Normalize(mean=[0.485*255, 0.456*255, 0.406*255], std=[0.229*255, 0.224*255, 0.225*255]) + color_adjust_op = C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4) + compose_map_func = (lambda img_id, image, annotation: preprocess_fn(img_id, image, annotation, is_training)) + if is_training: + output_columns = ["image", "box", "label", "num_match"] + trans = [color_adjust_op, normalize_op, change_swap_op] + else: + output_columns = ["img_id", "image", "image_shape"] + trans = [normalize_op, change_swap_op] + ds = ds.map(input_columns=["img_id", "image", "annotation"], + output_columns=output_columns, columns_order=output_columns, + operations=compose_map_func, python_multiprocessing=is_training, + num_parallel_workers=num_parallel_workers) + ds = ds.map(input_columns=["image"], operations=trans, python_multiprocessing=is_training, + num_parallel_workers=num_parallel_workers) + ds = ds.batch(batch_size, drop_remainder=True) + ds = ds.repeat(repeat_num) + return ds diff --git a/model_zoo/ssd/src/init_params.py b/model_zoo/ssd/src/init_params.py new file mode 100644 index 0000000000..6e1f8869b3 --- /dev/null +++ b/model_zoo/ssd/src/init_params.py @@ -0,0 +1,41 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Parameters utils""" + +from mindspore import Tensor +from mindspore.common.initializer import initializer, TruncatedNormal + +def init_net_param(network, initialize_mode='TruncatedNormal'): + """Init the parameters in net.""" + params = network.trainable_params() + for p in params: + if isinstance(p.data, Tensor) and 'beta' not in p.name and 'gamma' not in p.name and 'bias' not in p.name: + if initialize_mode == 'TruncatedNormal': + p.set_parameter_data(initializer(TruncatedNormal(0.03), p.data.shape, p.data.dtype)) + else: + p.set_parameter_data(initialize_mode, p.data.shape, p.data.dtype) + + +def load_backbone_params(network, param_dict): + """Init the parameters from pre-train model, default is mobilenetv2.""" + for _, param in net.parameters_and_names(): + param_name = param.name.replace('network.backbone.', '') + name_split = param_name.split('.') + if 'features_1' in param_name: + param_name = param_name.replace('features_1', 'features') + if 'features_2' in param_name: + param_name = '.'.join(['features', str(int(name_split[1]) + 14)] + name_split[2:]) + if param_name in param_dict: + param.set_parameter_data(param_dict[param_name].data) diff --git a/model_zoo/ssd/src/lr_schedule.py b/model_zoo/ssd/src/lr_schedule.py new file mode 100644 index 0000000000..4df26b3905 --- /dev/null +++ b/model_zoo/ssd/src/lr_schedule.py @@ -0,0 +1,56 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Learning rate schedule""" + +import math +import numpy as np + + +def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch): + """ + generate learning rate array + + Args: + global_step(int): total steps of the training + lr_init(float): init learning rate + lr_end(float): end learning rate + lr_max(float): max learning rate + warmup_epochs(float): number of warmup epochs + total_epochs(int): total epoch of training + steps_per_epoch(int): steps of one epoch + + Returns: + np.array, learning rate array + """ + lr_each_step = [] + total_steps = steps_per_epoch * total_epochs + warmup_steps = steps_per_epoch * warmup_epochs + for i in range(total_steps): + if i < warmup_steps: + lr = lr_init + (lr_max - lr_init) * i / warmup_steps + else: + lr = lr_end + \ + (lr_max - lr_end) * \ + (1. + math.cos(math.pi * (i - warmup_steps) / (total_steps - warmup_steps))) / 2. + if lr < 0.0: + lr = 0.0 + lr_each_step.append(lr) + + current_step = global_step + lr_each_step = np.array(lr_each_step).astype(np.float32) + learning_rate = lr_each_step[current_step:] + + return learning_rate diff --git a/mindspore/model_zoo/ssd.py b/model_zoo/ssd/src/ssd.py similarity index 74% rename from mindspore/model_zoo/ssd.py rename to model_zoo/ssd/src/ssd.py index b69942cd5c..d2fb64531e 100644 --- a/mindspore/model_zoo/ssd.py +++ b/model_zoo/ssd/src/ssd.py @@ -14,25 +14,17 @@ # ============================================================================ """SSD net based MobilenetV2.""" + import mindspore.common.dtype as mstype import mindspore as ms import mindspore.nn as nn -from mindspore import context +from mindspore import Parameter, context, Tensor from mindspore.parallel._auto_parallel_context import auto_parallel_context from mindspore.communication.management import get_group_size from mindspore.ops import operations as P from mindspore.ops import functional as F from mindspore.ops import composite as C from mindspore.common.initializer import initializer -from mindspore.ops.operations import TensorAdd -from mindspore import Parameter - - -def _conv2d(in_channel, out_channel, kernel_size=3, stride=1, pad_mod='same'): - weight_shape = (out_channel, in_channel, kernel_size, kernel_size) - weight = initializer('XavierUniform', shape=weight_shape, dtype=mstype.float32).to_tensor() - return nn.Conv2d(in_channel, out_channel, kernel_size=kernel_size, stride=stride, - padding=0, pad_mode=pad_mod, weight_init=weight) def _make_divisible(v, divisor, min_value=None): @@ -46,6 +38,55 @@ def _make_divisible(v, divisor, min_value=None): return new_v +def _conv2d(in_channel, out_channel, kernel_size=3, stride=1, pad_mod='same'): + return nn.Conv2d(in_channel, out_channel, kernel_size=kernel_size, stride=stride, + padding=0, pad_mode=pad_mod, has_bias=True) + + +def _bn(channel): + return nn.BatchNorm2d(channel, eps=1e-3, momentum=0.97, + gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1) + + +def _last_conv2d(in_channel, out_channel, kernel_size=3, stride=1, pad_mod='same', pad=0): + depthwise_conv = DepthwiseConv(in_channel, kernel_size, stride, pad_mode='same', pad=pad) + conv = _conv2d(in_channel, out_channel, kernel_size=1) + return nn.SequentialCell([depthwise_conv, _bn(in_channel), nn.ReLU6(), conv]) + + +class ConvBNReLU(nn.Cell): + """ + Convolution/Depthwise fused with Batchnorm and ReLU block definition. + + Args: + in_planes (int): Input channel. + out_planes (int): Output channel. + kernel_size (int): Input kernel size. + stride (int): Stride size for the first convolutional layer. Default: 1. + groups (int): channel group. Convolution is 1 while Depthiwse is input channel. Default: 1. + + Returns: + Tensor, output tensor. + + Examples: + >>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1) + """ + def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1): + super(ConvBNReLU, self).__init__() + padding = 0 + if groups == 1: + conv = nn.Conv2d(in_planes, out_planes, kernel_size, stride, pad_mode='same', + padding=padding) + else: + conv = DepthwiseConv(in_planes, kernel_size, stride, pad_mode='same', pad=padding) + layers = [conv, _bn(out_planes), nn.ReLU6()] + self.features = nn.SequentialCell(layers) + + def construct(self, x): + output = self.features(x) + return output + + class DepthwiseConv(nn.Cell): """ Depthwise Convolution warpper definition. @@ -64,6 +105,7 @@ class DepthwiseConv(nn.Cell): Examples: >>> DepthwiseConv(16, 3, 1, 'pad', 1, channel_multiplier=1) """ + def __init__(self, in_planes, kernel_size, stride, pad_mode, pad, channel_multiplier=1, has_bias=False): super(DepthwiseConv, self).__init__() self.has_bias = has_bias @@ -91,42 +133,9 @@ class DepthwiseConv(nn.Cell): return output -class ConvBNReLU(nn.Cell): - """ - Convolution/Depthwise fused with Batchnorm and ReLU block definition. - - Args: - in_planes (int): Input channel. - out_planes (int): Output channel. - kernel_size (int): Input kernel size. - stride (int): Stride size for the first convolutional layer. Default: 1. - groups (int): channel group. Convolution is 1 while Depthiwse is input channel. Default: 1. - - Returns: - Tensor, output tensor. - - Examples: - >>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1) - """ - def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1): - super(ConvBNReLU, self).__init__() - padding = (kernel_size - 1) // 2 - if groups == 1: - conv = nn.Conv2d(in_planes, out_planes, kernel_size, stride, pad_mode='pad', - padding=padding) - else: - conv = DepthwiseConv(in_planes, kernel_size, stride, pad_mode='pad', pad=padding) - layers = [conv, nn.BatchNorm2d(out_planes), nn.ReLU6()] - self.features = nn.SequentialCell(layers) - - def construct(self, x): - output = self.features(x) - return output - - class InvertedResidual(nn.Cell): """ - Mobilenetv2 residual block definition. + Residual block definition. Args: inp (int): Input channel. @@ -140,7 +149,7 @@ class InvertedResidual(nn.Cell): Examples: >>> ResidualBlock(3, 256, 1, 1) """ - def __init__(self, inp, oup, stride, expand_ratio): + def __init__(self, inp, oup, stride, expand_ratio, last_relu=False): super(InvertedResidual, self).__init__() assert stride in [1, 2] @@ -155,17 +164,21 @@ class InvertedResidual(nn.Cell): ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim), # pw-linear nn.Conv2d(hidden_dim, oup, kernel_size=1, stride=1, has_bias=False), - nn.BatchNorm2d(oup), + _bn(oup), ]) self.conv = nn.SequentialCell(layers) - self.add = TensorAdd() + self.add = P.TensorAdd() self.cast = P.Cast() + self.last_relu = last_relu + self.relu = nn.ReLU6() def construct(self, x): identity = x x = self.conv(x) if self.use_res_connect: - return self.add(identity, x) + x = self.add(identity, x) + if self.last_relu: + x = self.relu(x) return x @@ -174,14 +187,14 @@ class FlattenConcat(nn.Cell): Concatenate predictions into a single tensor. Args: - config (Class): The default config of SSD. + config (dict): The default config of SSD. Returns: Tensor, flatten predictions. """ def __init__(self, config): super(FlattenConcat, self).__init__() - self.num_ssd_boxes = config.NUM_SSD_BOXES + self.num_ssd_boxes = config.num_ssd_boxes self.concat = P.Concat(axis=1) self.transpose = P.Transpose() def construct(self, inputs): @@ -199,7 +212,7 @@ class MultiBox(nn.Cell): Multibox conv layers. Each multibox layer contains class conf scores and localization predictions. Args: - config (Class): The default config of SSD. + config (dict): The default config of SSD. Returns: Tensor, localization predictions. @@ -207,17 +220,17 @@ class MultiBox(nn.Cell): """ def __init__(self, config): super(MultiBox, self).__init__() - num_classes = config.NUM_CLASSES - out_channels = config.EXTRAS_OUT_CHANNELS - num_default = config.NUM_DEFAULT + num_classes = config.num_classes + out_channels = config.extras_out_channels + num_default = config.num_default loc_layers = [] cls_layers = [] for k, out_channel in enumerate(out_channels): - loc_layers += [_conv2d(out_channel, 4 * num_default[k], - kernel_size=3, stride=1, pad_mod='same')] - cls_layers += [_conv2d(out_channel, num_classes * num_default[k], - kernel_size=3, stride=1, pad_mod='same')] + loc_layers += [_last_conv2d(out_channel, 4 * num_default[k], + kernel_size=3, stride=1, pad_mod='same', pad=0)] + cls_layers += [_last_conv2d(out_channel, num_classes * num_default[k], + kernel_size=3, stride=1, pad_mod='same', pad=0)] self.multi_loc_layers = nn.layer.CellList(loc_layers) self.multi_cls_layers = nn.layer.CellList(cls_layers) @@ -238,7 +251,7 @@ class SSD300(nn.Cell): Args: backbone (Cell): Backbone Network. - config (Class): The default config of SSD. + config (dict): The default config of SSD. Returns: Tensor, localization predictions. @@ -246,25 +259,26 @@ class SSD300(nn.Cell): Examples:backbone SSD300(backbone=resnet34(num_classes=None), - config=ConfigSSDResNet34()). + config=config). """ def __init__(self, backbone, config, is_training=True): super(SSD300, self).__init__() self.backbone = backbone - in_channels = config.EXTRAS_IN_CHANNELS - out_channels = config.EXTRAS_OUT_CHANNELS - ratios = config.EXTRAS_RATIO - strides = config.EXTRAS_STRIDES + in_channels = config.extras_in_channels + out_channels = config.extras_out_channels + ratios = config.extras_ratio + strides = config.extras_srides residual_list = [] for i in range(2, len(in_channels)): - residual = InvertedResidual(in_channels[i], out_channels[i], stride=strides[i], expand_ratio=ratios[i]) + residual = InvertedResidual(in_channels[i], out_channels[i], stride=strides[i], + expand_ratio=ratios[i], last_relu=True) residual_list.append(residual) self.multi_residual = nn.layer.CellList(residual_list) self.multi_box = MultiBox(config) self.is_training = is_training if not is_training: - self.softmax = P.Softmax() + self.activation = P.Sigmoid() def construct(self, x): layer_out_13, output = self.backbone(x) @@ -275,77 +289,42 @@ class SSD300(nn.Cell): multi_feature += (feature,) pred_loc, pred_label = self.multi_box(multi_feature) if not self.is_training: - pred_label = self.softmax(pred_label) + pred_label = self.activation(pred_label) return pred_loc, pred_label -class LocalizationLoss(nn.Cell): +class SigmoidFocalClassificationLoss(nn.Cell): """" - Computes the localization loss with SmoothL1Loss. - - Returns: - Tensor, box regression loss. - """ - def __init__(self): - super(LocalizationLoss, self).__init__() - self.reduce_sum = P.ReduceSum() - self.reduce_mean = P.ReduceMean() - self.loss = nn.SmoothL1Loss() - self.expand_dims = P.ExpandDims() - self.less = P.Less() - - def construct(self, pred_loc, gt_loc, gt_label, num_matched_boxes): - mask = F.cast(self.less(0, gt_label), mstype.float32) - mask = self.expand_dims(mask, -1) - smooth_l1 = self.loss(gt_loc, pred_loc) * mask - box_loss = self.reduce_sum(smooth_l1, 1) - return self.reduce_mean(box_loss / F.cast(num_matched_boxes, mstype.float32), (0, 1)) - - -class ClassificationLoss(nn.Cell): - """" - Computes the classification loss with hard example mining. + Sigmoid focal-loss for classification. Args: - config (Class): The default config of SSD. + gamma (float): Hyper-parameter to balance the easy and hard examples. Default: 2.0 + alpha (float): Hyper-parameter to balance the positive and negative example. Default: 0.25 Returns: - Tensor, classification loss. + Tensor, the focal loss. """ - def __init__(self, config): - super(ClassificationLoss, self).__init__() - self.num_classes = config.NUM_CLASSES - self.num_boxes = config.NUM_SSD_BOXES - self.neg_pre_positive = config.NEG_PRE_POSITIVE - self.minimum = P.Minimum() - self.less = P.Less() - self.sort = P.TopK() - self.tile = P.Tile() - self.reduce_sum = P.ReduceSum() - self.reduce_mean = P.ReduceMean() - self.expand_dims = P.ExpandDims() - self.sort_descend = P.TopK(True) - self.cross_entropy = nn.SoftmaxCrossEntropyWithLogits(sparse=True) - - def construct(self, pred_label, gt_label, num_matched_boxes): - gt_label = F.cast(gt_label, mstype.int32) - mask = F.cast(self.less(0, gt_label), mstype.float32) - gt_label_shape = F.shape(gt_label) - pred_label = F.reshape(pred_label, (-1, self.num_classes)) - gt_label = F.reshape(gt_label, (-1,)) - cross_entropy = self.cross_entropy(pred_label, gt_label) - cross_entropy = F.reshape(cross_entropy, gt_label_shape) - - # Hard example mining - num_matched_boxes = F.reshape(num_matched_boxes, (-1,)) - neg_masked_cross_entropy = F.cast(cross_entropy * (1- mask), mstype.float16) - _, loss_idx = self.sort_descend(neg_masked_cross_entropy, self.num_boxes) - _, relative_position = self.sort(F.cast(loss_idx, mstype.float16), self.num_boxes) - num_neg_boxes = self.minimum(num_matched_boxes * self.neg_pre_positive, self.num_boxes) - tile_num_neg_boxes = self.tile(self.expand_dims(num_neg_boxes, -1), (1, self.num_boxes)) - top_k_neg_mask = F.cast(self.less(relative_position, tile_num_neg_boxes), mstype.float32) - class_loss = self.reduce_sum(cross_entropy * (mask + top_k_neg_mask), 1) - return self.reduce_mean(class_loss / F.cast(num_matched_boxes, mstype.float32), 0) + def __init__(self, gamma=2.0, alpha=0.25): + super(SigmoidFocalClassificationLoss, self).__init__() + self.sigmiod_cross_entropy = P.SigmoidCrossEntropyWithLogits() + self.sigmoid = P.Sigmoid() + self.pow = P.Pow() + self.onehot = P.OneHot() + self.on_value = Tensor(1.0, mstype.float32) + self.off_value = Tensor(0.0, mstype.float32) + self.gamma = gamma + self.alpha = alpha + + def construct(self, logits, label): + label = self.onehot(label, F.shape(logits)[-1], self.on_value, self.off_value) + sigmiod_cross_entropy = self.sigmiod_cross_entropy(logits, label) + sigmoid = self.sigmoid(logits) + label = F.cast(label, mstype.float32) + p_t = label * sigmoid + (1 - label) * (1 - sigmoid) + modulating_factor = self.pow(1 - p_t, self.gamma) + alpha_weight_factor = label * self.alpha + (1 - label) * (1 - self.alpha) + focal_loss = modulating_factor * alpha_weight_factor * sigmiod_cross_entropy + return focal_loss class SSDWithLossCell(nn.Cell): @@ -354,7 +333,7 @@ class SSDWithLossCell(nn.Cell): Args: network (Cell): The training network. - config (Class): SSD config. + config (dict): SSD config. Returns: Tensor, the loss of the network. @@ -362,14 +341,29 @@ class SSDWithLossCell(nn.Cell): def __init__(self, network, config): super(SSDWithLossCell, self).__init__() self.network = network - self.class_loss = ClassificationLoss(config) - self.box_loss = LocalizationLoss() + self.less = P.Less() + self.tile = P.Tile() + self.reduce_sum = P.ReduceSum() + self.reduce_mean = P.ReduceMean() + self.expand_dims = P.ExpandDims() + self.class_loss = SigmoidFocalClassificationLoss(config.gamma, config.alpha) + self.loc_loss = nn.SmoothL1Loss() def construct(self, x, gt_loc, gt_label, num_matched_boxes): pred_loc, pred_label = self.network(x) - loss_cls = self.class_loss(pred_label, gt_label, num_matched_boxes) - loss_loc = self.box_loss(pred_loc, gt_loc, gt_label, num_matched_boxes) - return loss_cls + loss_loc + mask = F.cast(self.less(0, gt_label), mstype.float32) + num_matched_boxes = self.reduce_sum(F.cast(num_matched_boxes, mstype.float32)) + + # Localization Loss + mask_loc = self.tile(self.expand_dims(mask, -1), (1, 1, 4)) + smooth_l1 = self.loc_loss(pred_loc, gt_loc) * mask_loc + loss_loc = self.reduce_sum(self.reduce_mean(smooth_l1, -1), -1) + + # Classification Loss + loss_cls = self.class_loss(pred_label, gt_label) + loss_cls = self.reduce_sum(loss_cls, (1, 2)) + + return self.reduce_sum((loss_cls + loss_loc) / num_matched_boxes) class TrainingWrapper(nn.Cell): @@ -415,7 +409,6 @@ class TrainingWrapper(nn.Cell): return F.depend(loss, self.optimizer(grads)) - class SSDWithMobileNetV2(nn.Cell): """ MobileNetV2 architecture for SSD backbone. diff --git a/example/ssd_coco2017/train.py b/model_zoo/ssd/train.py similarity index 64% rename from example/ssd_coco2017/train.py rename to model_zoo/ssd/train.py index 9347bf61c8..27f0e7ad0f 100644 --- a/example/ssd_coco2017/train.py +++ b/model_zoo/ssd/train.py @@ -13,83 +13,38 @@ # limitations under the License. # ============================================================================ -"""train SSD and get checkpoint files.""" +"""Train SSD and get checkpoint files.""" import os -import math import argparse -import numpy as np import mindspore.nn as nn from mindspore import context, Tensor from mindspore.communication.management import init from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, LossMonitor, TimeMonitor from mindspore.train import Model, ParallelMode from mindspore.train.serialization import load_checkpoint, load_param_into_net -from mindspore.common.initializer import initializer - -from mindspore.model_zoo.ssd import SSD300, SSDWithLossCell, TrainingWrapper, ssd_mobilenet_v2 -from config import ConfigSSD -from dataset import create_ssd_dataset, data_to_mindrecord_byte_image - - -def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch): - """ - generate learning rate array - - Args: - global_step(int): total steps of the training - lr_init(float): init learning rate - lr_end(float): end learning rate - lr_max(float): max learning rate - warmup_epochs(int): number of warmup epochs - total_epochs(int): total epoch of training - steps_per_epoch(int): steps of one epoch - - Returns: - np.array, learning rate array - """ - lr_each_step = [] - total_steps = steps_per_epoch * total_epochs - warmup_steps = steps_per_epoch * warmup_epochs - for i in range(total_steps): - if i < warmup_steps: - lr = lr_init + (lr_max - lr_init) * i / warmup_steps - else: - lr = lr_end + (lr_max - lr_end) * \ - (1. + math.cos(math.pi * (i - warmup_steps) / (total_steps - warmup_steps))) / 2. - if lr < 0.0: - lr = 0.0 - lr_each_step.append(lr) - - current_step = global_step - lr_each_step = np.array(lr_each_step).astype(np.float32) - learning_rate = lr_each_step[current_step:] - - return learning_rate - +from src.ssd import SSD300, SSDWithLossCell, TrainingWrapper, ssd_mobilenet_v2 +from src.config import config +from src.dataset import create_ssd_dataset, data_to_mindrecord_byte_image +from src.lr_schedule import get_lr +from src.init_params import init_net_param -def init_net_param(network, initialize_mode='XavierUniform'): - """Init the parameters in net.""" - params = network.trainable_params() - for p in params: - if isinstance(p.data, Tensor) and 'beta' not in p.name and 'gamma' not in p.name and 'bias' not in p.name: - p.set_parameter_data(initializer(initialize_mode, p.data.shape(), p.data.dtype())) def main(): parser = argparse.ArgumentParser(description="SSD training") parser.add_argument("--only_create_dataset", type=bool, default=False, help="If set it true, only create " - "Mindrecord, default is false.") - parser.add_argument("--distribute", type=bool, default=False, help="Run distribute, default is false.") + "Mindrecord, default is False.") + parser.add_argument("--distribute", type=bool, default=False, help="Run distribute, default is False.") parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.") parser.add_argument("--device_num", type=int, default=1, help="Use device nums, default is 1.") - parser.add_argument("--lr", type=float, default=0.25, help="Learning rate, default is 0.25.") + parser.add_argument("--lr", type=float, default=0.05, help="Learning rate, default is 0.05.") parser.add_argument("--mode", type=str, default="sink", help="Run sink mode or not, default is sink.") parser.add_argument("--dataset", type=str, default="coco", help="Dataset, defalut is coco.") - parser.add_argument("--epoch_size", type=int, default=70, help="Epoch size, default is 70.") + parser.add_argument("--epoch_size", type=int, default=250, help="Epoch size, default is 250.") parser.add_argument("--batch_size", type=int, default=32, help="Batch size, default is 32.") parser.add_argument("--pre_trained", type=str, default=None, help="Pretrained Checkpoint file path.") parser.add_argument("--pre_trained_epoch_size", type=int, default=0, help="Pretrained epoch size.") - parser.add_argument("--save_checkpoint_epochs", type=int, default=5, help="Save checkpoint epochs, default is 5.") + parser.add_argument("--save_checkpoint_epochs", type=int, default=10, help="Save checkpoint epochs, default is 5.") parser.add_argument("--loss_scale", type=int, default=1024, help="Loss scale, default is 1024.") args_opt = parser.parse_args() @@ -111,27 +66,26 @@ def main(): # It will generate mindrecord file in args_opt.mindrecord_dir, # and the file name is ssd.mindrecord0, 1, ... file_num. - config = ConfigSSD() prefix = "ssd.mindrecord" - mindrecord_dir = config.MINDRECORD_DIR + mindrecord_dir = config.mindrecord_dir mindrecord_file = os.path.join(mindrecord_dir, prefix + "0") if not os.path.exists(mindrecord_file): if not os.path.isdir(mindrecord_dir): os.makedirs(mindrecord_dir) if args_opt.dataset == "coco": - if os.path.isdir(config.COCO_ROOT): + if os.path.isdir(config.coco_root): print("Create Mindrecord.") data_to_mindrecord_byte_image("coco", True, prefix) print("Create Mindrecord Done, at {}".format(mindrecord_dir)) else: - print("COCO_ROOT not exits.") + print("coco_root not exits.") else: - if os.path.isdir(config.IMAGE_DIR) and os.path.exists(config.ANNO_PATH): + if os.path.isdir(config.image_dir) and os.path.exists(config.anno_path): print("Create Mindrecord.") data_to_mindrecord_byte_image("other", True, prefix) print("Create Mindrecord Done, at {}".format(mindrecord_dir)) else: - print("IMAGE_DIR or ANNO_PATH not exits.") + print("image_dir or anno_path not exits.") if not args_opt.only_create_dataset: loss_scale = float(args_opt.loss_scale) @@ -143,7 +97,8 @@ def main(): dataset_size = dataset.get_dataset_size() print("Create dataset done!") - ssd = SSD300(backbone=ssd_mobilenet_v2(), config=config) + backbone = ssd_mobilenet_v2() + ssd = SSD300(backbone=backbone, config=config) net = SSDWithLossCell(ssd, config) init_net_param(net) @@ -157,12 +112,13 @@ def main(): param_dict = load_checkpoint(args_opt.pre_trained) load_param_into_net(net, param_dict) - lr = Tensor(get_lr(global_step=args_opt.pre_trained_epoch_size * dataset_size, - lr_init=0, lr_end=0, lr_max=args_opt.lr, - warmup_epochs=max(350 // 20, 1), - total_epochs=350, + lr = Tensor(get_lr(global_step=config.global_step, + lr_init=config.lr_init, lr_end=config.lr_end_rate * args_opt.lr, lr_max=args_opt.lr, + warmup_epochs=config.warmup_epochs, + total_epochs=args_opt.epoch_size, steps_per_epoch=dataset_size)) - opt = nn.Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, 0.9, 0.0001, loss_scale) + opt = nn.Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, + config.momentum, config.weight_decay, loss_scale) net = TrainingWrapper(net, opt, loss_scale) callback = [TimeMonitor(data_size=dataset_size), LossMonitor(), ckpoint_cb] diff --git a/example/vgg16_cifar10/README.md b/model_zoo/vgg16/README.md similarity index 93% rename from example/vgg16_cifar10/README.md rename to model_zoo/vgg16/README.md index 2c3de2eed9..53eb05f66d 100644 --- a/example/vgg16_cifar10/README.md +++ b/model_zoo/vgg16/README.md @@ -73,12 +73,13 @@ train_parallel1/log:epcoh: 2 step: 97, loss is 1.7133579 ### Training ``` usage: train.py [--device_target TARGET][--data_path DATA_PATH] - [--device_id DEVICE_ID] + [--device_id DEVICE_ID][--pre_trained PRE_TRAINED] parameters/options: --device_target the training backend type, default is Ascend. --data_path the storage path of dataset --device_id the device which used to train model. + --pre_trained the pretrained checkpoint file path. ``` @@ -98,7 +99,7 @@ parameters/options: ### Distribute Training ``` -Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATA_PATH] +Usage: sh script/run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATA_PATH] parameters/options: MINDSPORE_HCCL_CONFIG_PATH HCCL configuration file path. diff --git a/example/vgg16_cifar10/eval.py b/model_zoo/vgg16/eval.py similarity index 93% rename from example/vgg16_cifar10/eval.py rename to model_zoo/vgg16/eval.py index ec9fc607c2..8cdcc86031 100644 --- a/example/vgg16_cifar10/eval.py +++ b/model_zoo/vgg16/eval.py @@ -17,14 +17,15 @@ python eval.py --data_path=$DATA_HOME --device_id=$DEVICE_ID """ import argparse + import mindspore.nn as nn +from mindspore import context from mindspore.nn.optim.momentum import Momentum from mindspore.train.model import Model -from mindspore import context from mindspore.train.serialization import load_checkpoint, load_param_into_net -from mindspore.model_zoo.vgg import vgg16 -from config import cifar_cfg as cfg -import dataset +from src.config import cifar_cfg as cfg +from src.dataset import vgg_create_dataset +from src.vgg import vgg16 if __name__ == '__main__': parser = argparse.ArgumentParser(description='Cifar10 classification') @@ -47,6 +48,6 @@ if __name__ == '__main__': param_dict = load_checkpoint(args_opt.checkpoint_path) load_param_into_net(net, param_dict) net.set_train(False) - dataset = dataset.create_dataset(args_opt.data_path, 1, False) + dataset = vgg_create_dataset(args_opt.data_path, 1, False) res = model.eval(dataset) print("result: ", res) diff --git a/example/vgg16_cifar10/run_distribute_train.sh b/model_zoo/vgg16/scripts/run_distribute_train.sh similarity index 92% rename from example/vgg16_cifar10/run_distribute_train.sh rename to model_zoo/vgg16/scripts/run_distribute_train.sh index c9b8dfc48f..ca4c993ded 100755 --- a/example/vgg16_cifar10/run_distribute_train.sh +++ b/model_zoo/vgg16/scripts/run_distribute_train.sh @@ -15,39 +15,38 @@ # ============================================================================ if [ $# != 2 ] -then +then echo "Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATA_PATH]" exit 1 fi if [ ! -f $1 ] -then +then echo "error: MINDSPORE_HCCL_CONFIG_PATH=$1 is not a file" exit 1 -fi +fi if [ ! -d $2 ] -then +then echo "error: DATA_PATH=$2 is not a directory" exit 1 -fi +fi -ulimit -u unlimited export DEVICE_NUM=8 export RANK_SIZE=8 export MINDSPORE_HCCL_CONFIG_PATH=$1 -for((i=0; i<${DEVICE_NUM}; i++)) +for((i=0;i env.log python train.py --data_path=$2 --device_id=$i &> log & cd .. -done +done \ No newline at end of file diff --git a/model_zoo/vgg16/src/__init__.py b/model_zoo/vgg16/src/__init__.py new file mode 100644 index 0000000000..301ef9dcb7 --- /dev/null +++ b/model_zoo/vgg16/src/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the License); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# httpwww.apache.orglicensesLICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an AS IS BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ diff --git a/example/vgg16_cifar10/config.py b/model_zoo/vgg16/src/config.py similarity index 100% rename from example/vgg16_cifar10/config.py rename to model_zoo/vgg16/src/config.py diff --git a/example/vgg16_cifar10/dataset.py b/model_zoo/vgg16/src/dataset.py similarity index 96% rename from example/vgg16_cifar10/dataset.py rename to model_zoo/vgg16/src/dataset.py index e8dfd777e6..b08659fb5e 100644 --- a/example/vgg16_cifar10/dataset.py +++ b/model_zoo/vgg16/src/dataset.py @@ -16,13 +16,15 @@ Data operations, will be used in train.py and eval.py """ import os + +import mindspore.common.dtype as mstype import mindspore.dataset as ds import mindspore.dataset.transforms.c_transforms as C import mindspore.dataset.transforms.vision.c_transforms as vision -import mindspore.common.dtype as mstype -from config import cifar_cfg as cfg +from .config import cifar_cfg as cfg + -def create_dataset(data_home, repeat_num=1, training=True): +def vgg_create_dataset(data_home, repeat_num=1, training=True): """Data operations.""" ds.config.set_seed(1) data_dir = os.path.join(data_home, "cifar-10-batches-bin") diff --git a/mindspore/model_zoo/vgg.py b/model_zoo/vgg16/src/vgg.py similarity index 100% rename from mindspore/model_zoo/vgg.py rename to model_zoo/vgg16/src/vgg.py diff --git a/example/vgg16_cifar10/train.py b/model_zoo/vgg16/train.py similarity index 87% rename from example/vgg16_cifar10/train.py rename to model_zoo/vgg16/train.py index 9993db706a..c582cdd679 100644 --- a/example/vgg16_cifar10/train.py +++ b/model_zoo/vgg16/train.py @@ -19,20 +19,25 @@ python train.py --data_path=$DATA_HOME --device_id=$DEVICE_ID import argparse import os import random + import numpy as np + import mindspore.nn as nn from mindspore import Tensor +from mindspore import context from mindspore.communication.management import init from mindspore.nn.optim.momentum import Momentum -from mindspore.train.model import Model, ParallelMode -from mindspore import context from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor -from mindspore.model_zoo.vgg import vgg16 -from dataset import create_dataset -from config import cifar_cfg as cfg +from mindspore.train.model import Model, ParallelMode +from mindspore.train.serialization import load_param_into_net, load_checkpoint +from src.config import cifar_cfg as cfg +from src.dataset import vgg_create_dataset +from src.vgg import vgg16 + random.seed(1) np.random.seed(1) + def lr_steps(global_step, lr_max=None, total_epochs=None, steps_per_epoch=None): """Set learning rate.""" lr_each_step = [] @@ -60,6 +65,7 @@ if __name__ == '__main__': help='device where the code will be implemented. (Default: Ascend)') parser.add_argument('--data_path', type=str, default='./cifar', help='path where the dataset is saved') parser.add_argument('--device_id', type=int, default=None, help='device id of GPU or Ascend. (Default: None)') + parser.add_argument('--pre_trained', type=str, default=None, help='the pretrained checkpoint file path.') args_opt = parser.parse_args() context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target) @@ -72,12 +78,17 @@ if __name__ == '__main__': mirror_mean=True) init() - dataset = create_dataset(args_opt.data_path, cfg.epoch_size) + dataset = vgg_create_dataset(args_opt.data_path, cfg.epoch_size) batch_num = dataset.get_dataset_size() net = vgg16(num_classes=cfg.num_classes) + # pre_trained + if args_opt.pre_trained: + load_param_into_net(net, load_checkpoint(args_opt.pre_trained)) + lr = lr_steps(0, lr_max=cfg.lr_init, total_epochs=cfg.epoch_size, steps_per_epoch=batch_num) - opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), Tensor(lr), cfg.momentum, weight_decay=cfg.weight_decay) + opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), Tensor(lr), cfg.momentum, + weight_decay=cfg.weight_decay) loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False) model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'}, amp_level="O2", keep_batchnorm_fp32=False, loss_scale_manager=None) diff --git a/model_zoo/wide_and_deep/README.md b/model_zoo/wide_and_deep/README.md index 48e979815e..54367ef173 100644 --- a/model_zoo/wide_and_deep/README.md +++ b/model_zoo/wide_and_deep/README.md @@ -13,24 +13,28 @@ The Criteo datasets are used for model training and evaluation. The entire code structure is as following: ``` |--- wide_and_deep/ - train_and_test.py "Entrance of Wide&Deep model training and evaluation" - test.py "Entrance of Wide&Deep model evaluation" - train.py "Entrance of Wide&Deep model training" - train_and_test_multinpu.py "Entrance of Wide&Deep model data parallel training and evaluation" - |--- src/ "entrance of training and evaluation" - config.py "parameters configuration" - dataset.py "Dataset loader class" - process_data.py "process dataset" - preprocess_data.py "pre_process dataset" - WideDeep.py "Model structure" - callbacks.py "Callback class for training and evaluation" - metrics.py "Metric class" + train_and_eval.py "Entrance of Wide&Deep model training and evaluation" + eval.py "Entrance of Wide&Deep model evaluation" + train.py "Entrance of Wide&Deep model training" + train_and_eval_multinpu.py "Entrance of Wide&Deep model data parallel training and evaluation" + train_and_eval_auto_parallel.py + |--- src/ "Entrance of training and evaluation" + config.py "Parameters configuration" + dataset.py "Dataset loader class" + process_data.py "Process dataset" + preprocess_data.py "Pre_process dataset" + wide_and_deep.py "Model structure" + callbacks.py "Callback class for training and evaluation" + metrics.py "Metric class" + |--- script/ "Run shell dir" + run_multinpu_train.sh "Run data parallel" + run_auto_parallel_train.sh "Run auto parallel" ``` ### Train and evaluate model To train and evaluate the model, command as follows: ``` -python train_and_test.py +python train_and_eval.py ``` Arguments: * `--data_path`: This should be set to the same directory given to the data_download's data_dir argument. @@ -42,6 +46,7 @@ Arguments: * `--emb_dim`: The dense embedding dimension of sparse feature. * `--deep_layers_dim`: The dimension of all deep layers. * `--deep_layers_act`: The activation of all deep layers. + * `--dropout_flag`: Whether do dropout. * `--keep_prob`: The rate to keep in dropout layer. * `--ckpt_path`:The location of the checkpoint file. * `--eval_file_name` : Eval output file. @@ -61,6 +66,7 @@ Arguments: * `--emb_dim`: The dense embedding dimension of sparse feature. * `--deep_layers_dim`: The dimension of all deep layers. * `--deep_layers_act`: The activation of all deep layers. + * `--dropout_flag`: Whether do dropout. * `--keep_prob`: The rate to keep in dropout layer. * `--ckpt_path`:The location of the checkpoint file. * `--eval_file_name` : Eval output file. @@ -68,13 +74,17 @@ Arguments: To train the model in distributed, command as follows: ``` -# configure environment path, RANK_TABLE_FILE, RANK_SIZE, MINDSPORE_HCCL_CONFIG_PATH before training -bash run_multinpu_train.sh +# configure environment path before training +bash run_multinpu_train.sh RANK_SIZE EPOCHS DATASET RANK_TABLE_FILE +``` +``` +# configure environment path before training +bash run_auto_parallel_train.sh RANK_SIZE EPOCHS DATASET RANK_TABLE_FILE ``` To evaluate the model, command as follows: ``` -python test.py +python eval.py ``` Arguments: * `--data_path`: This should be set to the same directory given to the data_download's data_dir argument. diff --git a/model_zoo/wide_and_deep/test.py b/model_zoo/wide_and_deep/eval.py similarity index 100% rename from model_zoo/wide_and_deep/test.py rename to model_zoo/wide_and_deep/eval.py diff --git a/model_zoo/wide_and_deep/run_multinpu_train.sh b/model_zoo/wide_and_deep/run_multinpu_train.sh deleted file mode 100644 index db7823eed7..0000000000 --- a/model_zoo/wide_and_deep/run_multinpu_train.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash -# bash run_multinpu_train.sh -execute_path=$(pwd) - -export RANK_TABLE_FILE=${execute_path}/rank_table_8p.json -export RANK_SIZE=8 -export MINDSPORE_HCCL_CONFIG_PATH=${execute_path}/rank_table_8p.json - -for((i=0;i<=7;i++)); -do - rm -rf ${execute_path}/device_$i/ - mkdir ${execute_path}/device_$i/ - cd ${execute_path}/device_$i/ || exit - export RANK_ID=$i - export DEVICE_ID=$i - pytest -s ${execute_path}/train_and_test_multinpu.py >train_deep$i.log 2>&1 & -done diff --git a/model_zoo/wide_and_deep/script/run_auto_parallel_train.sh b/model_zoo/wide_and_deep/script/run_auto_parallel_train.sh new file mode 100644 index 0000000000..9e9226a23a --- /dev/null +++ b/model_zoo/wide_and_deep/script/run_auto_parallel_train.sh @@ -0,0 +1,35 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +# bash run_multinpu_train.sh +execute_path=$(pwd) +script_self=$(readlink -f "$0") +self_path=$(dirname "${script_self}") +export RANK_SIZE=$1 +export EPOCH_SIZE=$2 +export DATASET=$3 +export RANK_TABLE_FILE=$4 +export MINDSPORE_HCCL_CONFIG_PATH=$4 + +for((i=0;i<$RANK_SIZE;i++)); +do + rm -rf ${execute_path}/device_$i/ + mkdir ${execute_path}/device_$i/ + cd ${execute_path}/device_$i/ || exit + export RANK_ID=$i + export DEVICE_ID=$i + python -s ${self_path}/../train_and_eval_auto_parallel.py --data_path=$DATASET --epochs=$EPOCH_SIZE >train_deep$i.log 2>&1 & +done diff --git a/model_zoo/wide_and_deep/script/run_multigpu_train.sh b/model_zoo/wide_and_deep/script/run_multigpu_train.sh new file mode 100644 index 0000000000..987eeaa65e --- /dev/null +++ b/model_zoo/wide_and_deep/script/run_multigpu_train.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +# bash run_multigpu_train.sh +script_self=$(readlink -f "$0") +self_path=$(dirname "${script_self}") +RANK_SIZE=$1 +EPOCH_SIZE=$2 +DATASET=$3 + +mpirun --allow-run-as-root -n $RANK_SIZE \ + python -s ${self_path}/../train_and_eval_distribute.py \ + --device_target="GPU" \ + --data_path=$DATASET \ + --epochs=$EPOCH_SIZE > log.txt 2>&1 & diff --git a/model_zoo/wide_and_deep/script/run_multinpu_train.sh b/model_zoo/wide_and_deep/script/run_multinpu_train.sh new file mode 100644 index 0000000000..4b642bc196 --- /dev/null +++ b/model_zoo/wide_and_deep/script/run_multinpu_train.sh @@ -0,0 +1,35 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +# bash run_multinpu_train.sh +execute_path=$(pwd) +script_self=$(readlink -f "$0") +self_path=$(dirname "${script_self}") +export RANK_SIZE=$1 +export EPOCH_SIZE=$2 +export DATASET=$3 +export RANK_TABLE_FILE=$4 +export MINDSPORE_HCCL_CONFIG_PATH=$4 + +for((i=0;i<$RANK_SIZE;i++)); +do + rm -rf ${execute_path}/device_$i/ + mkdir ${execute_path}/device_$i/ + cd ${execute_path}/device_$i/ || exit + export RANK_ID=$i + export DEVICE_ID=$i + python -s ${self_path}/../train_and_eval_distribute.py --data_path=$DATASET --epochs=$EPOCH_SIZE >train_deep$i.log 2>&1 & +done diff --git a/model_zoo/wide_and_deep/src/callbacks.py b/model_zoo/wide_and_deep/src/callbacks.py index 6e3bb75aae..4c2f9c700e 100644 --- a/model_zoo/wide_and_deep/src/callbacks.py +++ b/model_zoo/wide_and_deep/src/callbacks.py @@ -17,6 +17,7 @@ callbacks import time from mindspore.train.callback import Callback from mindspore import context +from mindspore.train import ParallelMode def add_write(file_path, out_str): """ @@ -85,14 +86,17 @@ class EvalCallBack(Callback): self.aucMetric = auc_metric self.aucMetric.clear() self.eval_file_name = config.eval_file_name + self.eval_values = [] - def epoch_name(self, run_context): + def epoch_end(self, run_context): """ - epoch name + epoch end """ self.aucMetric.clear() - context.set_auto_parallel_context(strategy_ckpt_save_file="", - strategy_ckpt_load_file="./strategy_train.ckpt") + parallel_mode = context.get_auto_parallel_context("parallel_mode") + if parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL): + context.set_auto_parallel_context(strategy_ckpt_save_file="", + strategy_ckpt_load_file="./strategy_train.ckpt") start_time = time.time() out = self.model.eval(self.eval_dataset) end_time = time.time() @@ -101,4 +105,5 @@ class EvalCallBack(Callback): time_str = time.strftime("%Y-%m-%d %H:%M%S", time.localtime()) out_str = "{}==== EvalCallBack model.eval(): {}; eval_time: {}s".format(time_str, out.values(), eval_time) print(out_str) + self.eval_values = out.values() add_write(self.eval_file_name, out_str) diff --git a/model_zoo/wide_and_deep/src/config.py b/model_zoo/wide_and_deep/src/config.py index 3559e8bf23..f8a2c84743 100644 --- a/model_zoo/wide_and_deep/src/config.py +++ b/model_zoo/wide_and_deep/src/config.py @@ -20,17 +20,20 @@ def argparse_init(): argparse_init """ parser = argparse.ArgumentParser(description='WideDeep') + parser.add_argument("--device_target", type=str, default="Ascend", choices=["Ascend", "GPU"], + help="device where the code will be implemented. (Default: Ascend)") parser.add_argument("--data_path", type=str, default="./test_raw_data/") parser.add_argument("--epochs", type=int, default=15) + parser.add_argument("--full_batch", type=bool, default=False) parser.add_argument("--batch_size", type=int, default=16000) parser.add_argument("--eval_batch_size", type=int, default=16000) parser.add_argument("--field_size", type=int, default=39) - parser.add_argument("--vocab_size", type=int, default=184965) + parser.add_argument("--vocab_size", type=int, default=200000) parser.add_argument("--emb_dim", type=int, default=80) parser.add_argument("--deep_layer_dim", type=int, nargs='+', default=[1024, 512, 256, 128]) parser.add_argument("--deep_layer_act", type=str, default='relu') parser.add_argument("--keep_prob", type=float, default=1.0) - + parser.add_argument("--dropout_flag", type=int, default=0) parser.add_argument("--output_path", type=str, default="./output/") parser.add_argument("--ckpt_path", type=str, default="./checkpoints/") parser.add_argument("--eval_file_name", type=str, default="eval.log") @@ -43,12 +46,14 @@ class WideDeepConfig(): WideDeepConfig """ def __init__(self): + self.device_target = "Ascend" self.data_path = "./test_raw_data/" + self.full_batch = False self.epochs = 15 self.batch_size = 16000 self.eval_batch_size = 16000 self.field_size = 39 - self.vocab_size = 184965 + self.vocab_size = 200000 self.emb_dim = 80 self.deep_layer_dim = [1024, 512, 256, 128] self.deep_layer_act = 'relu' @@ -70,8 +75,10 @@ class WideDeepConfig(): """ parser = argparse_init() args, _ = parser.parse_known_args() + self.device_target = args.device_target self.data_path = args.data_path self.epochs = args.epochs + self.full_batch = args.full_batch self.batch_size = args.batch_size self.eval_batch_size = args.eval_batch_size self.field_size = args.field_size @@ -83,7 +90,7 @@ class WideDeepConfig(): self.weight_bias_init = ['normal', 'normal'] self.emb_init = 'normal' self.init_args = [-0.01, 0.01] - self.dropout_flag = False + self.dropout_flag = bool(args.dropout_flag) self.l2_coef = 8e-5 self.output_path = args.output_path diff --git a/model_zoo/wide_and_deep/src/datasets.py b/model_zoo/wide_and_deep/src/datasets.py index 775dd7ca54..0ec4f327dd 100644 --- a/model_zoo/wide_and_deep/src/datasets.py +++ b/model_zoo/wide_and_deep/src/datasets.py @@ -17,11 +17,20 @@ import os import math +from enum import Enum import numpy as np import pandas as pd import mindspore.dataset.engine as de import mindspore.common.dtype as mstype +class DataType(Enum): + """ + Enumerate supported dataset format. + """ + MINDRECORD = 1 + TFRECORD = 2 + H5 = 3 + class H5Dataset(): """ @@ -193,15 +202,60 @@ def _get_tf_dataset(data_dir, train_mode=True, epochs=1, batch_size=1000, ds = ds.repeat(epochs) return ds +def _get_mindrecord_dataset(directory, train_mode=True, epochs=1, batch_size=1000, + line_per_sample=1000, rank_size=None, rank_id=None): + """ + Get dataset with mindrecord format. + + Args: + directory (str): Dataset directory. + train_mode (bool): Whether dataset is use for train or eval (default=True). + epochs (int): Dataset epoch size (default=1). + batch_size (int): Dataset batch size (default=1000). + line_per_sample (int): The number of sample per line (default=1000). + rank_size (int): The number of device, not necessary for single device (default=None). + rank_id (int): Id of device, not necessary for single device (default=None). + + Returns: + Dataset. + """ + file_prefix_name = 'train_input_part.mindrecord' if train_mode else 'test_input_part.mindrecord' + file_suffix_name = '00' if train_mode else '0' + shuffle = train_mode + + if rank_size is not None and rank_id is not None: + ds = de.MindDataset(os.path.join(directory, file_prefix_name + file_suffix_name), + columns_list=['feat_ids', 'feat_vals', 'label'], + num_shards=rank_size, shard_id=rank_id, shuffle=shuffle, + num_parallel_workers=8) + else: + ds = de.MindDataset(os.path.join(directory, file_prefix_name + file_suffix_name), + columns_list=['feat_ids', 'feat_vals', 'label'], + shuffle=shuffle, num_parallel_workers=8) + ds = ds.batch(int(batch_size / line_per_sample), drop_remainder=True) + ds = ds.map(operations=(lambda x, y, z: (np.array(x).flatten().reshape(batch_size, 39), + np.array(y).flatten().reshape(batch_size, 39), + np.array(z).flatten().reshape(batch_size, 1))), + input_columns=['feat_ids', 'feat_vals', 'label'], + columns_order=['feat_ids', 'feat_vals', 'label'], + num_parallel_workers=8) + ds = ds.repeat(epochs) + return ds + def create_dataset(data_dir, train_mode=True, epochs=1, batch_size=1000, - is_tf_dataset=True, line_per_sample=1000, rank_size=None, rank_id=None): + data_type=DataType.TFRECORD, line_per_sample=1000, rank_size=None, rank_id=None): """ create_dataset """ - if is_tf_dataset: + if data_type == DataType.TFRECORD: return _get_tf_dataset(data_dir, train_mode, epochs, batch_size, line_per_sample, rank_size=rank_size, rank_id=rank_id) + if data_type == DataType.MINDRECORD: + return _get_mindrecord_dataset(data_dir, train_mode, epochs, + batch_size, line_per_sample, + rank_size, rank_id) + if rank_size > 1: raise RuntimeError("please use tfrecord dataset.") return _get_h5_dataset(data_dir, train_mode, epochs, batch_size) diff --git a/model_zoo/wide_and_deep/src/metrics.py b/model_zoo/wide_and_deep/src/metrics.py index 277d6744dc..c89e948405 100644 --- a/model_zoo/wide_and_deep/src/metrics.py +++ b/model_zoo/wide_and_deep/src/metrics.py @@ -17,8 +17,10 @@ Area under cure metric """ -from mindspore.nn.metrics import Metric from sklearn.metrics import roc_auc_score +from mindspore import context +from mindspore.nn.metrics import Metric +from mindspore.communication.management import get_rank, get_group_size class AUCMetric(Metric): """ @@ -28,6 +30,7 @@ class AUCMetric(Metric): def __init__(self): super(AUCMetric, self).__init__() self.clear() + self.full_batch = context.get_auto_parallel_context("full_batch") def clear(self): """Clear the internal evaluation result.""" @@ -35,10 +38,17 @@ class AUCMetric(Metric): self.pred_probs = [] def update(self, *inputs): # inputs - all_predict = inputs[1].asnumpy() # predict - all_label = inputs[2].asnumpy() # label - self.true_labels.extend(all_label.flatten().tolist()) - self.pred_probs.extend(all_predict.flatten().tolist()) + """Update list of predicts and labels.""" + all_predict = inputs[1].asnumpy().flatten().tolist() # predict + all_label = inputs[2].asnumpy().flatten().tolist() # label + self.pred_probs.extend(all_predict) + if self.full_batch: + rank_id = get_rank() + group_size = get_group_size() + gap = len(all_label) // group_size + self.true_labels.extend(all_label[rank_id*gap: (rank_id+1)*gap]) + else: + self.true_labels.extend(all_label) def eval(self): if len(self.true_labels) != len(self.pred_probs): diff --git a/model_zoo/wide_and_deep/src/process_data.py b/model_zoo/wide_and_deep/src/process_data.py index 37b38b0bbb..acf618297f 100644 --- a/model_zoo/wide_and_deep/src/process_data.py +++ b/model_zoo/wide_and_deep/src/process_data.py @@ -248,8 +248,8 @@ def random_split_trans2h5(in_file_path, output_path, criteo_stats, part_rows=200 if __name__ == "__main__": parser = argparse.ArgumentParser(description="Get and Process datasets") - parser.add_argument("--raw_data_path", default="/opt/npu/data/origin_criteo_data/", help="The path to save dataset") - parser.add_argument("--output_path", default="/opt/npu/data/origin_criteo_data/h5_data/", + parser.add_argument("--raw_data_path", default="./raw_data", help="The path to save dataset") + parser.add_argument("--output_path", default="./output", help="The path to save dataset") args, _ = parser.parse_known_args() base_path = args.raw_data_path diff --git a/model_zoo/wide_and_deep/src/wide_and_deep.py b/model_zoo/wide_and_deep/src/wide_and_deep.py index 7772431ab3..16102039a8 100644 --- a/model_zoo/wide_and_deep/src/wide_and_deep.py +++ b/model_zoo/wide_and_deep/src/wide_and_deep.py @@ -14,16 +14,20 @@ # ============================================================================ """wide and deep model""" from mindspore import nn -from mindspore import Tensor, Parameter, ParameterTuple +from mindspore import Parameter, ParameterTuple import mindspore.common.dtype as mstype from mindspore.ops import functional as F from mindspore.ops import composite as C from mindspore.ops import operations as P -# from mindspore.nn import Dropout +from mindspore.nn import Dropout from mindspore.nn.optim import Adam, FTRL # from mindspore.nn.metrics import Metric from mindspore.common.initializer import Uniform, initializer # from mindspore.train.callback import ModelCheckpoint, CheckpointConfig +from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_mirror_mean +from mindspore.train.parallel_utils import ParallelMode +from mindspore.nn.wrap.grad_reducer import DistributedGradReducer +from mindspore.communication.management import get_group_size import numpy as np np_type = np.float32 @@ -42,8 +46,7 @@ def init_method(method, shape, name, max_val=1.0): elif method == 'zero': params = Parameter(initializer("zeros", shape, ms_type), name=name) elif method == "normal": - params = Parameter(Tensor(np.random.normal( - loc=0.0, scale=0.01, size=shape).astype(dtype=np_type)), name=name) + params = Parameter(initializer("normal", shape, ms_type), name=name) return params @@ -66,8 +69,8 @@ def init_var_dict(init_args, in_vars): var_map[key] = Parameter(initializer( "zeros", shape, ms_type), name=key) elif method == 'normal': - var_map[key] = Parameter(Tensor(np.random.normal( - loc=0.0, scale=0.01, size=shape).astype(dtype=np_type)), name=key) + var_map[key] = Parameter(initializer( + "normal", shape, ms_type), name=key) return var_map @@ -79,7 +82,7 @@ class DenseLayer(nn.Cell): """ def __init__(self, input_dim, output_dim, weight_bias_init, act_str, - keep_prob=0.7, scale_coef=1.0, convert_dtype=True): + keep_prob=0.7, use_activation=True, convert_dtype=True, drop_out=False): super(DenseLayer, self).__init__() weight_init, bias_init = weight_bias_init self.weight = init_method( @@ -89,11 +92,10 @@ class DenseLayer(nn.Cell): self.matmul = P.MatMul(transpose_b=False) self.bias_add = P.BiasAdd() self.cast = P.Cast() - #self.dropout = Dropout(keep_prob=keep_prob) - self.mul = P.Mul() - self.realDiv = P.RealDiv() - self.scale_coef = scale_coef + self.dropout = Dropout(keep_prob=keep_prob) + self.use_activation = use_activation self.convert_dtype = convert_dtype + self.drop_out = drop_out def _init_activation(self, act_str): act_str = act_str.lower() @@ -106,20 +108,23 @@ class DenseLayer(nn.Cell): return act_func def construct(self, x): - x = self.act_func(x) - # if self.training: - # x = self.dropout(x) - x = self.mul(x, self.scale_coef) + if self.training and self.drop_out: + x = self.dropout(x) if self.convert_dtype: x = self.cast(x, mstype.float16) weight = self.cast(self.weight, mstype.float16) + bias = self.cast(self.bias, mstype.float16) wx = self.matmul(x, weight) + wx = self.bias_add(wx, bias) + if self.use_activation: + wx = self.act_func(wx) wx = self.cast(wx, mstype.float32) else: wx = self.matmul(x, self.weight) - wx = self.realDiv(wx, self.scale_coef) - output = self.bias_add(wx, self.bias) - return output + wx = self.bias_add(wx, self.bias) + if self.use_activation: + wx = self.act_func(wx) + return wx class WideDeepModel(nn.Cell): @@ -132,6 +137,9 @@ class WideDeepModel(nn.Cell): def __init__(self, config): super(WideDeepModel, self).__init__() self.batch_size = config.batch_size + parallel_mode = _get_parallel_mode() + if parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL): + self.batch_size = self.batch_size * get_group_size() self.field_size = config.field_size self.vocab_size = config.vocab_size self.emb_dim = config.emb_dim @@ -157,23 +165,28 @@ class WideDeepModel(nn.Cell): self.dense_layer_1 = DenseLayer(self.all_dim_list[0], self.all_dim_list[1], self.weight_bias_init, - self.deep_layer_act, convert_dtype=True) + self.deep_layer_act, + convert_dtype=True, drop_out=config.dropout_flag) self.dense_layer_2 = DenseLayer(self.all_dim_list[1], self.all_dim_list[2], self.weight_bias_init, - self.deep_layer_act, convert_dtype=True) + self.deep_layer_act, + convert_dtype=True, drop_out=config.dropout_flag) self.dense_layer_3 = DenseLayer(self.all_dim_list[2], self.all_dim_list[3], self.weight_bias_init, - self.deep_layer_act, convert_dtype=True) + self.deep_layer_act, + convert_dtype=True, drop_out=config.dropout_flag) self.dense_layer_4 = DenseLayer(self.all_dim_list[3], self.all_dim_list[4], self.weight_bias_init, - self.deep_layer_act, convert_dtype=True) + self.deep_layer_act, + convert_dtype=True, drop_out=config.dropout_flag) self.dense_layer_5 = DenseLayer(self.all_dim_list[4], self.all_dim_list[5], self.weight_bias_init, - self.deep_layer_act, convert_dtype=True) + self.deep_layer_act, + use_activation=False, convert_dtype=True, drop_out=config.dropout_flag) self.gather_v2 = P.GatherV2() self.mul = P.Mul() @@ -258,7 +271,7 @@ class TrainStepWrap(nn.Cell): sens (Number): The adjust parameter. Default: 1000.0 """ - def __init__(self, network, sens=1000.0): + def __init__(self, network, sens=1024.0): super(TrainStepWrap, self).__init__() self.network = network self.network.set_train() @@ -285,6 +298,18 @@ class TrainStepWrap(nn.Cell): self.loss_net_w = IthOutputCell(network, output_index=0) self.loss_net_d = IthOutputCell(network, output_index=1) + self.reducer_flag = False + self.grad_reducer_w = None + self.grad_reducer_d = None + parallel_mode = _get_parallel_mode() + self.reducer_flag = parallel_mode in (ParallelMode.DATA_PARALLEL, + ParallelMode.HYBRID_PARALLEL) + if self.reducer_flag: + mean = _get_mirror_mean() + degree = _get_device_num() + self.grad_reducer_w = DistributedGradReducer(self.optimizer_w.parameters, mean, degree) + self.grad_reducer_d = DistributedGradReducer(self.optimizer_d.parameters, mean, degree) + def construct(self, batch_ids, batch_wts, label): weights_w = self.weights_w weights_d = self.weights_d @@ -295,6 +320,9 @@ class TrainStepWrap(nn.Cell): label, sens_w) grads_d = self.grad_d(self.loss_net_d, weights_d)(batch_ids, batch_wts, label, sens_d) + if self.reducer_flag: + grads_w = self.grad_reducer_w(grads_w) + grads_d = self.grad_reducer_d(grads_d) return F.depend(loss_w, self.optimizer_w(grads_w)), F.depend(loss_d, self.optimizer_d(grads_d)) diff --git a/model_zoo/wide_and_deep/train.py b/model_zoo/wide_and_deep/train.py index b3996e01cb..ac9750c547 100644 --- a/model_zoo/wide_and_deep/train.py +++ b/model_zoo/wide_and_deep/train.py @@ -14,7 +14,7 @@ """ test_training """ import os from mindspore import Model, context -from mindspore.train.callback import ModelCheckpoint, CheckpointConfig +from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, TimeMonitor from src.wide_and_deep import PredictWithSigmoid, TrainStepWrap, NetWithLossClass, WideDeepModel from src.callbacks import LossCallBack @@ -75,7 +75,7 @@ def test_train(configure): ckptconfig = CheckpointConfig(save_checkpoint_steps=1, keep_checkpoint_max=5) ckpoint_cb = ModelCheckpoint(prefix='widedeep_train', directory=configure.ckpt_path, config=ckptconfig) - model.train(epochs, ds_train, callbacks=[callback, ckpoint_cb]) + model.train(epochs, ds_train, callbacks=[TimeMonitor(ds_train.get_dataset_size()), callback, ckpoint_cb]) if __name__ == "__main__": diff --git a/model_zoo/wide_and_deep/train_and_test.py b/model_zoo/wide_and_deep/train_and_eval.py similarity index 100% rename from model_zoo/wide_and_deep/train_and_test.py rename to model_zoo/wide_and_deep/train_and_eval.py diff --git a/model_zoo/wide_and_deep/train_and_eval_auto_parallel.py b/model_zoo/wide_and_deep/train_and_eval_auto_parallel.py new file mode 100644 index 0000000000..780c95540c --- /dev/null +++ b/model_zoo/wide_and_deep/train_and_eval_auto_parallel.py @@ -0,0 +1,119 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""train_multinpu.""" + + +import os +import sys +import mindspore.dataset.engine as de +from mindspore import Model, context +from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, TimeMonitor +from mindspore.train import ParallelMode +from mindspore.communication.management import get_rank, get_group_size, init +from mindspore.parallel import _cost_model_context as cost_model_context +from mindspore.nn.wrap.cell_wrapper import VirtualDatasetCellTriple + +from src.wide_and_deep import PredictWithSigmoid, TrainStepWrap, NetWithLossClass, WideDeepModel +from src.callbacks import LossCallBack, EvalCallBack +from src.datasets import create_dataset +from src.metrics import AUCMetric +from src.config import WideDeepConfig + +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True) +context.set_auto_parallel_context(parallel_mode=ParallelMode.AUTO_PARALLEL, mirror_mean=True) +cost_model_context.set_cost_model_context(multi_subgraphs=True) +init() + + + +def get_WideDeep_net(config): + WideDeep_net = WideDeepModel(config) + loss_net = NetWithLossClass(WideDeep_net, config) + loss_net = VirtualDatasetCellTriple(loss_net) + train_net = TrainStepWrap(loss_net) + eval_net = PredictWithSigmoid(WideDeep_net) + eval_net = VirtualDatasetCellTriple(eval_net) + return train_net, eval_net + + +class ModelBuilder(): + """ + ModelBuilder + """ + def __init__(self): + pass + + def get_hook(self): + pass + + def get_train_hook(self): + hooks = [] + callback = LossCallBack() + hooks.append(callback) + if int(os.getenv('DEVICE_ID')) == 0: + pass + return hooks + + def get_net(self, config): + return get_WideDeep_net(config) + + +def train_and_eval(config): + """ + test_train_eval + """ + data_path = config.data_path + batch_size = config.batch_size + epochs = config.epochs + print("epochs is {}".format(epochs)) + if config.full_batch: + context.set_auto_parallel_context(full_batch=True) + de.config.set_seed(1) + ds_train = create_dataset(data_path, train_mode=True, epochs=epochs, + batch_size=batch_size*get_group_size()) + ds_eval = create_dataset(data_path, train_mode=False, epochs=epochs + 1, + batch_size=batch_size*get_group_size()) + else: + ds_train = create_dataset(data_path, train_mode=True, epochs=epochs, + batch_size=batch_size, rank_id=get_rank(), rank_size=get_group_size()) + ds_eval = create_dataset(data_path, train_mode=False, epochs=epochs + 1, + batch_size=batch_size, rank_id=get_rank(), rank_size=get_group_size()) + print("ds_train.size: {}".format(ds_train.get_dataset_size())) + print("ds_eval.size: {}".format(ds_eval.get_dataset_size())) + + net_builder = ModelBuilder() + + train_net, eval_net = net_builder.get_net(config) + train_net.set_train() + auc_metric = AUCMetric() + + model = Model(train_net, eval_network=eval_net, metrics={"auc": auc_metric}) + + eval_callback = EvalCallBack(model, ds_eval, auc_metric, config) + + callback = LossCallBack(config=config) + ckptconfig = CheckpointConfig(save_checkpoint_steps=ds_train.get_dataset_size(), keep_checkpoint_max=5) + ckpoint_cb = ModelCheckpoint(prefix='widedeep_train', + directory=config.ckpt_path, config=ckptconfig) + context.set_auto_parallel_context(strategy_ckpt_save_file="./strategy_train.ckpt") + model.train(epochs, ds_train, + callbacks=[TimeMonitor(ds_train.get_dataset_size()), eval_callback, callback, ckpoint_cb]) + + +if __name__ == "__main__": + wide_deep_config = WideDeepConfig() + wide_deep_config.argparse_init() + train_and_eval(wide_deep_config) diff --git a/model_zoo/wide_and_deep/train_and_eval_distribute.py b/model_zoo/wide_and_deep/train_and_eval_distribute.py new file mode 100644 index 0000000000..db98bacfec --- /dev/null +++ b/model_zoo/wide_and_deep/train_and_eval_distribute.py @@ -0,0 +1,113 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""train_multinpu.""" + + +import os +import sys +import numpy as np +from mindspore import Model, context +from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, TimeMonitor +from mindspore.train import ParallelMode +from mindspore.communication.management import get_rank, get_group_size, init + +from src.wide_and_deep import PredictWithSigmoid, TrainStepWrap, NetWithLossClass, WideDeepModel +from src.callbacks import LossCallBack, EvalCallBack +from src.datasets import create_dataset +from src.metrics import AUCMetric +from src.config import WideDeepConfig + +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +def get_WideDeep_net(config): + WideDeep_net = WideDeepModel(config) + loss_net = NetWithLossClass(WideDeep_net, config) + train_net = TrainStepWrap(loss_net) + eval_net = PredictWithSigmoid(WideDeep_net) + return train_net, eval_net + + +class ModelBuilder(): + """ + ModelBuilder + """ + def __init__(self): + pass + + def get_hook(self): + pass + + def get_train_hook(self): + hooks = [] + callback = LossCallBack() + hooks.append(callback) + if int(os.getenv('DEVICE_ID')) == 0: + pass + return hooks + + def get_net(self, config): + return get_WideDeep_net(config) + + +def train_and_eval(config): + """ + test_train_eval + """ + np.random.seed(1000) + data_path = config.data_path + batch_size = config.batch_size + epochs = config.epochs + print("epochs is {}".format(epochs)) + ds_train = create_dataset(data_path, train_mode=True, epochs=epochs, + batch_size=batch_size, rank_id=get_rank(), rank_size=get_group_size()) + ds_eval = create_dataset(data_path, train_mode=False, epochs=epochs + 1, + batch_size=batch_size, rank_id=get_rank(), rank_size=get_group_size()) + print("ds_train.size: {}".format(ds_train.get_dataset_size())) + print("ds_eval.size: {}".format(ds_eval.get_dataset_size())) + + net_builder = ModelBuilder() + + train_net, eval_net = net_builder.get_net(config) + train_net.set_train() + auc_metric = AUCMetric() + + model = Model(train_net, eval_network=eval_net, metrics={"auc": auc_metric}) + + eval_callback = EvalCallBack(model, ds_eval, auc_metric, config) + + callback = LossCallBack(config=config) + ckptconfig = CheckpointConfig(save_checkpoint_steps=ds_train.get_dataset_size(), keep_checkpoint_max=5) + ckpoint_cb = ModelCheckpoint(prefix='widedeep_train', + directory=config.ckpt_path, config=ckptconfig) + out = model.eval(ds_eval) + print("=====" * 5 + "model.eval() initialized: {}".format(out)) + model.train(epochs, ds_train, + callbacks=[TimeMonitor(ds_train.get_dataset_size()), eval_callback, callback, ckpoint_cb]) + + +if __name__ == "__main__": + wide_deep_config = WideDeepConfig() + wide_deep_config.argparse_init() + + context.set_context(mode=context.GRAPH_MODE, device_target=wide_deep_config.device_target, save_graphs=True) + if wide_deep_config.device_target == "Ascend": + init("hccl") + elif wide_deep_config.device_target == "GPU": + init("nccl") + context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True, + device_num=get_group_size()) + + train_and_eval(wide_deep_config) diff --git a/example/yolov3_coco2017/README.md b/model_zoo/yolov3/README.md similarity index 100% rename from example/yolov3_coco2017/README.md rename to model_zoo/yolov3/README.md diff --git a/example/yolov3_coco2017/eval.py b/model_zoo/yolov3/eval.py similarity index 92% rename from example/yolov3_coco2017/eval.py rename to model_zoo/yolov3/eval.py index 6e6d358248..65dc408a15 100644 --- a/example/yolov3_coco2017/eval.py +++ b/model_zoo/yolov3/eval.py @@ -19,10 +19,10 @@ import argparse import time from mindspore import context, Tensor from mindspore.train.serialization import load_checkpoint, load_param_into_net -from mindspore.model_zoo.yolov3 import yolov3_resnet18, YoloWithEval -from dataset import create_yolo_dataset, data_to_mindrecord_byte_image -from config import ConfigYOLOV3ResNet18 -from util import metrics +from src.yolov3 import yolov3_resnet18, YoloWithEval +from src.dataset import create_yolo_dataset, data_to_mindrecord_byte_image +from src.config import ConfigYOLOV3ResNet18 +from src.utils import metrics def yolo_eval(dataset_path, ckpt_path): """Yolov3 evaluation.""" @@ -88,15 +88,15 @@ if __name__ == '__main__': if not os.path.isdir(args_opt.mindrecord_dir): os.makedirs(args_opt.mindrecord_dir) - prefix = "yolo.mindrecord" - mindrecord_file = os.path.join(args_opt.mindrecord_dir, prefix + "0") + yolo_prefix = "yolo.mindrecord" + mindrecord_file = os.path.join(args_opt.mindrecord_dir, yolo_prefix + "0") if not os.path.exists(mindrecord_file): if os.path.isdir(args_opt.image_dir) and os.path.exists(args_opt.anno_path): print("Create Mindrecord") data_to_mindrecord_byte_image(args_opt.image_dir, args_opt.anno_path, args_opt.mindrecord_dir, - prefix=prefix, + prefix=yolo_prefix, file_num=8) print("Create Mindrecord Done, at {}".format(args_opt.mindrecord_dir)) else: diff --git a/example/yolov3_coco2017/run_distribute_train.sh b/model_zoo/yolov3/scripts/run_distribute_train.sh similarity index 94% rename from example/yolov3_coco2017/run_distribute_train.sh rename to model_zoo/yolov3/scripts/run_distribute_train.sh index 0b764419d2..eeda5077e9 100644 --- a/example/yolov3_coco2017/run_distribute_train.sh +++ b/model_zoo/yolov3/scripts/run_distribute_train.sh @@ -45,6 +45,9 @@ echo "After running the scipt, the network runs in the background. The log will export MINDSPORE_HCCL_CONFIG_PATH=$6 export RANK_SIZE=$1 +BASE_PATH=$(cd "`dirname $0`" || exit; pwd) +cd $BASE_PATH/../ || exit + for((i=0;i= 1.17.0 +numpy >= 1.17.0, <= 1.17.5 protobuf >= 3.8.0 asttokens >= 1.1.13 pillow >= 6.2.0 @@ -10,4 +10,6 @@ wheel >= 0.32.0 decorator >= 4.4.0 setuptools >= 40.8.0 matplotlib >= 3.1.3 # for ut test -opencv-python >= 4.2.0.32 # for ut test +opencv-python >= 4.1.2.30 # for ut test +sklearn >= 0.0 # for st test +pandas >= 1.0.2 # for ut test \ No newline at end of file diff --git a/serving/CMakeLists.txt b/serving/CMakeLists.txt new file mode 100644 index 0000000000..3c1c08ece0 --- /dev/null +++ b/serving/CMakeLists.txt @@ -0,0 +1,69 @@ +find_package(Threads REQUIRED) + +# This branch assumes that gRPC and all its dependencies are already installed +# on this system, so they can be located by find_package(). + +# Find Protobuf installation +# Looks for protobuf-config.cmake file installed by Protobuf's cmake installation. + +#set(protobuf_MODULE_COMPATIBLE TRUE) +#find_package(Protobuf CONFIG REQUIRED) +#message(STATUS "Using protobuf ${protobuf_VERSION}") +add_library(protobuf::libprotobuf ALIAS protobuf::protobuf) +add_executable(protobuf::libprotoc ALIAS protobuf::protoc) + +set(_PROTOBUF_LIBPROTOBUF protobuf::libprotobuf) +set(_REFLECTION gRPC::grpc++_reflection) +if(CMAKE_CROSSCOMPILING) + find_program(_PROTOBUF_PROTOC protoc) +else() + set(_PROTOBUF_PROTOC $) +endif() + +# Find gRPC installation +# Looks for gRPCConfig.cmake file installed by gRPC's cmake installation. +find_package(gRPC CONFIG REQUIRED) +message(STATUS "Using gRPC ${gRPC_VERSION}") + +set(_GRPC_GRPCPP gRPC::grpc++) +if(CMAKE_CROSSCOMPILING) + find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin) +else() + set(_GRPC_CPP_PLUGIN_EXECUTABLE $) +endif() + +# Proto file +get_filename_component(hw_proto "ms_service.proto" ABSOLUTE) +get_filename_component(hw_proto_path "${hw_proto}" PATH) + +# Generated sources +set(hw_proto_srcs "${CMAKE_CURRENT_BINARY_DIR}/ms_service.pb.cc") +set(hw_proto_hdrs "${CMAKE_CURRENT_BINARY_DIR}/ms_service.pb.h") +set(hw_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/ms_service.grpc.pb.cc") +set(hw_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/ms_service.grpc.pb.h") +add_custom_command( + OUTPUT "${hw_proto_srcs}" "${hw_proto_hdrs}" "${hw_grpc_srcs}" "${hw_grpc_hdrs}" + COMMAND ${_PROTOBUF_PROTOC} + ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}" + --cpp_out "${CMAKE_CURRENT_BINARY_DIR}" + -I "${hw_proto_path}" + --plugin=protoc-gen-grpc="${_GRPC_CPP_PLUGIN_EXECUTABLE}" + "${hw_proto}" + DEPENDS "${hw_proto}") + +# Include generated *.pb.h files +include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/core" + "${PROJECT_SOURCE_DIR}/mindspore/ccsrc") +file(GLOB_RECURSE CORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} + "core/*.cc" "core/util/*.cc" "core/version_control/*.cc") + +list(APPEND SERVING_SRC "main.cc" ${hw_proto_srcs} ${hw_grpc_srcs} ${CORE_SRC_LIST}) + +include_directories(${CMAKE_BINARY_DIR}) +add_executable(ms_serving ${SERVING_SRC}) +target_link_libraries(ms_serving inference mindspore_gvar) +target_link_libraries(ms_serving ${_REFLECTION} ${_GRPC_GRPCPP} ${_PROTOBUF_LIBPROTOBUF} pthread) +if (ENABLE_D) + add_compile_definitions(ENABLE_D) + target_link_libraries(ms_serving ${RUNTIME_LIB}) +endif() diff --git a/serving/README.en.md b/serving/README.en.md new file mode 100644 index 0000000000..830b94537a --- /dev/null +++ b/serving/README.en.md @@ -0,0 +1,36 @@ +# serving + +#### Description +A flexible, high-performance serving system for deep learning models + +#### Software Architecture +Software architecture description + +#### Installation + +1. xxxx +2. xxxx +3. xxxx + +#### Instructions + +1. xxxx +2. xxxx +3. xxxx + +#### Contribution + +1. Fork the repository +2. Create Feat_xxx branch +3. Commit your code +4. Create Pull Request + + +#### Gitee Feature + +1. You can use Readme\_XXX.md to support different languages, such as Readme\_en.md, Readme\_zh.md +2. Gitee blog [blog.gitee.com](https://blog.gitee.com) +3. Explore open source project [https://gitee.com/explore](https://gitee.com/explore) +4. The most valuable open source project [GVP](https://gitee.com/gvp) +5. The manual of Gitee [https://gitee.com/help](https://gitee.com/help) +6. The most popular members [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) diff --git a/serving/README.md b/serving/README.md new file mode 100644 index 0000000000..b26b9a6887 --- /dev/null +++ b/serving/README.md @@ -0,0 +1,37 @@ +# serving + +#### 介绍 +A flexible, high-performance serving system for deep learning models + +#### 软件架构 +软件架构说明 + + +#### 安装教程 + +1. xxxx +2. xxxx +3. xxxx + +#### 使用说明 + +1. xxxx +2. xxxx +3. xxxx + +#### 参与贡献 + +1. Fork 本仓库 +2. 新建 Feat_xxx 分支 +3. 提交代码 +4. 新建 Pull Request + + +#### 码云特技 + +1. 使用 Readme\_XXX.md 来支持不同的语言,例如 Readme\_en.md, Readme\_zh.md +2. 码云官方博客 [blog.gitee.com](https://blog.gitee.com) +3. 你可以 [https://gitee.com/explore](https://gitee.com/explore) 这个地址来了解码云上的优秀开源项目 +4. [GVP](https://gitee.com/gvp) 全称是码云最有价值开源项目,是码云综合评定出的优秀开源项目 +5. 码云官方提供的使用手册 [https://gitee.com/help](https://gitee.com/help) +6. 码云封面人物是一档用来展示码云会员风采的栏目 [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) diff --git a/serving/core/server.cc b/serving/core/server.cc new file mode 100644 index 0000000000..add9d16bee --- /dev/null +++ b/serving/core/server.cc @@ -0,0 +1,277 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "core/server.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mindspore/ccsrc/utils/log_adapter.h" +#include "serving/ms_service.grpc.pb.h" +#include "core/util/option_parser.h" +#include "core/version_control/version_controller.h" +#include "mindspore/ccsrc/utils/context/ms_context.h" +#include "core/util/file_system_operation.h" +#include "graphengine/third_party/fwkacllib/inc/runtime/context.h" + +using ms_serving::MSService; +using ms_serving::PredictReply; +using ms_serving::PredictRequest; + +namespace mindspore { +namespace serving { +using MSTensorPtr = std::shared_ptr; + +Status Session::CreatDeviceSession(const std::string &device, uint32_t device_id) { + session_ = inference::MSSession::CreateSession(device + "Inference", device_id); + if (session_ == nullptr) { + MS_LOG(ERROR) << "Creat Session Failed"; + return FAILED; + } + device_type_ = device; + return SUCCESS; +} + +Session &Session::Instance() { + static Session instance; + return instance; +} + +Status Session::Predict(const std::vector &inputs, inference::MultiTensor *outputs) { + if (last_graph_ == nullptr) { + MS_LOG(ERROR) << "the model has not loaded"; + return FAILED; + } + if (session_ == nullptr) { + MS_LOG(ERROR) << "the inference session has not be initialized"; + return FAILED; + } + std::lock_guard lock(mutex_); + MS_LOG(INFO) << "run Predict"; + + *outputs = session_->RunGraph(graph_id_, inputs); + return SUCCESS; +} + +Status Session::Warmup(const MindSporeModelPtr model) { + if (session_ == nullptr) { + MS_LOG(ERROR) << "The CreatDeviceSession should be called, before warmup"; + return FAILED; + } + std::lock_guard lock(mutex_); + size_t size = 0; + std::string file_name = model->GetModelPath() + '/' + model->GetModelName(); + char *graphBuf = ReadFile(file_name.c_str(), &size); + if (graphBuf == nullptr) { + MS_LOG(ERROR) << "Load graph model failed, file name is " << file_name.c_str(); + return FAILED; + } + last_graph_ = inference::LoadModel(graphBuf, size, device_type_); + graph_id_ = session_->CompileGraph(last_graph_); + MS_LOG(INFO) << "Session Warmup"; + return SUCCESS; +} + +Status Session::Clear() { + session_ = nullptr; + return SUCCESS; +} + +namespace { +const std::map type2id_map{ + {ms_serving::MS_UNKNOWN, TypeId::kNumberTypeBegin}, {ms_serving::MS_BOOL, TypeId::kNumberTypeBool}, + {ms_serving::MS_INT8, TypeId::kNumberTypeInt8}, {ms_serving::MS_UINT8, TypeId::kNumberTypeUInt8}, + {ms_serving::MS_INT16, TypeId::kNumberTypeInt16}, {ms_serving::MS_UINT16, TypeId::kNumberTypeUInt16}, + {ms_serving::MS_INT32, TypeId::kNumberTypeInt32}, {ms_serving::MS_UINT32, TypeId::kNumberTypeUInt32}, + {ms_serving::MS_INT64, TypeId::kNumberTypeInt64}, {ms_serving::MS_UINT64, TypeId::kNumberTypeUInt64}, + {ms_serving::MS_FLOAT16, TypeId::kNumberTypeFloat16}, {ms_serving::MS_FLOAT32, TypeId::kNumberTypeFloat32}, + {ms_serving::MS_FLOAT64, TypeId::kNumberTypeFloat64}, +}; + +const std::map id2type_map{ + {TypeId::kNumberTypeBegin, ms_serving::MS_UNKNOWN}, {TypeId::kNumberTypeBool, ms_serving::MS_BOOL}, + {TypeId::kNumberTypeInt8, ms_serving::MS_INT8}, {TypeId::kNumberTypeUInt8, ms_serving::MS_UINT8}, + {TypeId::kNumberTypeInt16, ms_serving::MS_INT16}, {TypeId::kNumberTypeUInt16, ms_serving::MS_UINT16}, + {TypeId::kNumberTypeInt32, ms_serving::MS_INT32}, {TypeId::kNumberTypeUInt32, ms_serving::MS_UINT32}, + {TypeId::kNumberTypeInt64, ms_serving::MS_INT64}, {TypeId::kNumberTypeUInt64, ms_serving::MS_UINT64}, + {TypeId::kNumberTypeFloat16, ms_serving::MS_FLOAT16}, {TypeId::kNumberTypeFloat32, ms_serving::MS_FLOAT32}, + {TypeId::kNumberTypeFloat64, ms_serving::MS_FLOAT64}, +}; +const std::map length_map{ + {ms_serving::MS_UNKNOWN, 0}, + {ms_serving::MS_BOOL, sizeof(bool)}, + {ms_serving::MS_INT8, sizeof(int8_t)}, + {ms_serving::MS_UINT8, sizeof(uint8_t)}, + {ms_serving::MS_INT16, sizeof(int16_t)}, + {ms_serving::MS_UINT16, sizeof(uint16_t)}, + {ms_serving::MS_INT32, sizeof(int32_t)}, + {ms_serving::MS_UINT32, sizeof(uint32_t)}, + {ms_serving::MS_INT64, sizeof(int64_t)}, + {ms_serving::MS_UINT64, sizeof(uint64_t)}, + {ms_serving::MS_FLOAT16, 2}, + {ms_serving::MS_FLOAT32, 4}, + {ms_serving::MS_FLOAT64, 8}, +}; +MSTensorPtr ServingTensor2MSTensor(const ms_serving::Tensor &tensor) { + std::vector shape; + for (auto dim : tensor.tensor_shape().dims()) { + shape.push_back(static_cast(dim)); + } + auto iter = type2id_map.find(tensor.tensor_type()); + if (iter == type2id_map.end()) { + MS_LOG(ERROR) << "input tensor type is wrong, type is " << tensor.tensor_type(); + return nullptr; + } + TypeId type = iter->second; + auto ms_tensor = std::shared_ptr(inference::MSTensor::CreateTensor(type, shape)); + memcpy_s(ms_tensor->MutableData(), tensor.data().size(), tensor.data().data(), tensor.data().size()); + return ms_tensor; +} + +ms_serving::Tensor MSTensor2ServingTensor(MSTensorPtr ms_tensor) { + ms_serving::Tensor tensor; + ms_serving::TensorShape shape; + for (auto dim : ms_tensor->shape()) { + shape.add_dims(dim); + } + *tensor.mutable_tensor_shape() = shape; + auto iter = id2type_map.find(ms_tensor->data_type()); + if (iter == id2type_map.end()) { + MS_LOG(ERROR) << "input tensor type is wrong, type is " << tensor.tensor_type(); + return tensor; + } + tensor.set_tensor_type(iter->second); + tensor.set_data(ms_tensor->MutableData(), ms_tensor->Size()); + return tensor; +} + +void ClearEnv() { + Session::Instance().Clear(); + inference::ExitInference(); +} +void HandleSignal(int sig) { + ClearEnv(); + exit(0); +} + +#ifdef ENABLE_D +static rtContext_t g_ctx = nullptr; +#endif +} // namespace + +// Service Implement +class MSServiceImpl final : public MSService::Service { + grpc::Status Predict(grpc::ServerContext *context, const PredictRequest *request, PredictReply *reply) override { + std::lock_guard lock(mutex_); +#ifdef ENABLE_D + if (g_ctx == nullptr) { + MS_LOG(ERROR) << "rtCtx is nullptr"; + return grpc::Status::CANCELLED; + } + rtError_t rt_ret = rtCtxSetCurrent(g_ctx); + if (rt_ret != RT_ERROR_NONE) { + MS_LOG(ERROR) << "set Ascend rtCtx failed"; + } +#endif + std::vector inputs; + inference::MultiTensor outputs; + for (int i = 0; i < request->data_size(); i++) { + auto input = ServingTensor2MSTensor(request->data(i)); + if (input == nullptr) { + MS_LOG(ERROR) << "Tensor convert failed"; + return grpc::Status::CANCELLED; + } + inputs.push_back(input); + } + auto res = Session::Instance().Predict(inputs, &outputs); + if (res != SUCCESS) { + return grpc::Status::CANCELLED; + } + for (const auto &tensor : outputs) { + *reply->add_result() = MSTensor2ServingTensor(tensor); + } + MS_LOG(INFO) << "Finish call service Eval"; + return grpc::Status::OK; + } + + grpc::Status Test(grpc::ServerContext *context, const PredictRequest *request, PredictReply *reply) override { + MS_LOG(INFO) << "TestService call"; + return grpc::Status::OK; + } + std::mutex mutex_; +}; + +Status Server::BuildAndStart() { + // handle exit signal + signal(SIGINT, HandleSignal); + Status res; + auto option_args = Options::Instance().GetArgs(); + std::string server_address = "0.0.0.0:" + std::to_string(option_args->grpc_port); + std::string model_path = option_args->model_path; + std::string model_name = option_args->model_name; + std::string device_type = option_args->device_type; + auto device_id = option_args->device_id; + res = Session::Instance().CreatDeviceSession(device_type, device_id); + if (res != SUCCESS) { + MS_LOG(ERROR) << "creat session failed"; + ClearEnv(); + return res; + } + VersionController version_controller(option_args->poll_model_wait_seconds, model_path, model_name); + res = version_controller.Run(); + if (res != SUCCESS) { + MS_LOG(ERROR) << "load model failed"; + ClearEnv(); + return res; + } +#ifdef ENABLE_D + // set d context + rtContext_t ctx = nullptr; + rtError_t rt_ret = rtCtxGetCurrent(&ctx); + if (rt_ret != RT_ERROR_NONE || ctx == nullptr) { + MS_LOG(ERROR) << "the ascend device context is null"; + return FAILED; + } + g_ctx = ctx; +#endif + MSServiceImpl service; + grpc::EnableDefaultHealthCheckService(true); + grpc::reflection::InitProtoReflectionServerBuilderPlugin(); + // Set the port is not reuseable + auto option = grpc::MakeChannelArgumentOption(GRPC_ARG_ALLOW_REUSEPORT, 0); + grpc::ServerBuilder builder; + builder.SetOption(std::move(option)); + // Listen on the given address without any authentication mechanism. + builder.AddListeningPort(server_address, grpc::InsecureServerCredentials()); + // Register "service" as the instance through which we'll communicate with + // clients. In this case it corresponds to an *synchronous* service. + builder.RegisterService(&service); + // Finally assemble the server. + std::unique_ptr server(builder.BuildAndStart()); + MS_LOG(INFO) << "Server listening on " << server_address << std::endl; + + // Wait for the server to shutdown. Note that some other thread must be + // responsible for shutting down the server for this call to ever return. + server->Wait(); + return SUCCESS; +} + +} // namespace serving +} // namespace mindspore diff --git a/serving/core/server.h b/serving/core/server.h new file mode 100644 index 0000000000..f1927e9946 --- /dev/null +++ b/serving/core/server.h @@ -0,0 +1,56 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_SERVER_H +#define MINDSPORE_SERVER_H + +#include +#include +#include +#include +#include "util/status.h" +#include "version_control/model.h" +#include "include/inference.h" +#include "mindspore/ccsrc/debug/info.h" +namespace mindspore { +namespace serving { +class Session { + public: + static Session &Instance(); + Status CreatDeviceSession(const std::string &device, uint32_t device_id); + Status Predict(const std::vector> &inputs, inference::MultiTensor *output); + Status Warmup(const MindSporeModelPtr model); + Status Clear(); + + private: + Session() = default; + ~Session() = default; + int sesseion_id_{0}; + std::shared_ptr session_{nullptr}; + FuncGraphPtr last_graph_{nullptr}; + uint32_t graph_id_{0}; + std::mutex mutex_; + std::string device_type_; +}; + +class Server { + public: + Server() = default; + ~Server() = default; + Status BuildAndStart(); +}; +} // namespace serving +} // namespace mindspore +#endif // MINDSPORE_SERVER_H diff --git a/serving/core/util/file_system_operation.cc b/serving/core/util/file_system_operation.cc new file mode 100644 index 0000000000..a5143995de --- /dev/null +++ b/serving/core/util/file_system_operation.cc @@ -0,0 +1,102 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "core/util/file_system_operation.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mindspore/ccsrc/utils/log_adapter.h" + +namespace mindspore { +namespace serving { + +char *ReadFile(const char *file, size_t *size) { + if (file == nullptr) { + MS_LOG(ERROR) << "file is nullptr"; + return nullptr; + } + MS_ASSERT(size != nullptr); + std::string realPath = file; + std::ifstream ifs(realPath); + if (!ifs.good()) { + MS_LOG(ERROR) << "file: " << realPath << " is not exist"; + return nullptr; + } + + if (!ifs.is_open()) { + MS_LOG(ERROR) << "file: " << realPath << "open failed"; + return nullptr; + } + + ifs.seekg(0, std::ios::end); + *size = ifs.tellg(); + std::unique_ptr buf(new (std::nothrow) char[*size]); + if (buf == nullptr) { + MS_LOG(ERROR) << "malloc buf failed, file: " << realPath; + ifs.close(); + return nullptr; + } + + ifs.seekg(0, std::ios::beg); + ifs.read(buf.get(), *size); + ifs.close(); + + return buf.release(); +} + +bool DirOrFileExist(const std::string &file_path) { + int ret = access(file_path.c_str(), 0); + return (ret == -1) ? false : true; +} + +std::vector GetAllSubDirs(const std::string &dir_path) { + DIR *dir; + struct dirent *ptr; + std::vector SubDirs; + + if ((dir = opendir(dir_path.c_str())) == NULL) { + MS_LOG(ERROR) << "Open " << dir_path << " error!"; + return std::vector(); + } + + while ((ptr = readdir(dir)) != NULL) { + std::string name = ptr->d_name; + if (name == "." || name == "..") { + continue; + } + if (ptr->d_type == DT_DIR) { + SubDirs.push_back(dir_path + "/" + name); + } + } + closedir(dir); + std::sort(SubDirs.begin(), SubDirs.end()); + return SubDirs; +} + +time_t GetModifyTime(const std::string &file_path) { + struct stat info; + (void)stat(file_path.c_str(), &info); + return info.st_mtime; +} +} // namespace serving +} // namespace mindspore diff --git a/serving/core/util/file_system_operation.h b/serving/core/util/file_system_operation.h new file mode 100644 index 0000000000..e03883b812 --- /dev/null +++ b/serving/core/util/file_system_operation.h @@ -0,0 +1,32 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_SERVING_FILE_SYSTEM_OPERATION_H_ +#define MINDSPORE_SERVING_FILE_SYSTEM_OPERATION_H_ + +#include +#include +#include + +namespace mindspore { +namespace serving { +char *ReadFile(const char *file, size_t *size); +bool DirOrFileExist(const std::string &file_path); +std::vector GetAllSubDirs(const std::string &dir_path); +time_t GetModifyTime(const std::string &file_path); +} // namespace serving +} // namespace mindspore + +#endif // !MINDSPORE_SERVING_FILE_SYSTEM_OPERATION_H_ diff --git a/serving/core/util/option_parser.cc b/serving/core/util/option_parser.cc new file mode 100644 index 0000000000..9cbd7eaee8 --- /dev/null +++ b/serving/core/util/option_parser.cc @@ -0,0 +1,243 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "core/util/option_parser.h" +#include +#include +#include +#include +#include +#include "mindspore/ccsrc/utils/log_adapter.h" + +namespace mindspore { +namespace serving { +bool StartWith(const std::string &str, const std::string &expected) { + return expected.empty() || + (str.size() >= expected.size() && memcmp(str.data(), expected.data(), expected.size()) == 0); +} + +bool RemovePrefix(std::string *str, const std::string &prefix) { + if (!StartWith(*str, prefix)) return false; + str->replace(str->begin(), str->begin() + prefix.size(), ""); + return true; +} + +bool Option::ParseInt32(std::string *arg) { + if (RemovePrefix(arg, "--") && RemovePrefix(arg, name_) && RemovePrefix(arg, "=")) { + char extra; + int32_t parsed_value; + if (sscanf(arg->data(), "%d%c", &parsed_value, &extra) != 1) { + std::cout << "Parse " << name_ << "Error for option " << *arg << std::endl; + return false; + } else { + *int32_default_ = parsed_value; + } + return true; + } + + return false; +} + +bool Option::ParseBool(std::string *arg) { + if (RemovePrefix(arg, "--") && RemovePrefix(arg, name_) && RemovePrefix(arg, "=")) { + if (*arg == "true") { + *bool_default_ = true; + } else if (*arg == "false") { + *bool_default_ = false; + } else { + std::cout << "Parse " << name_ << " Error for option " << *arg << std::endl; + return false; + } + return true; + } + + return false; +} + +bool Option::ParseString(std::string *arg) { + if (RemovePrefix(arg, "--") && RemovePrefix(arg, name_) && RemovePrefix(arg, "=")) { + *string_default_ = *arg; + return true; + } + return false; +} + +bool Option::ParseFloat(std::string *arg) { + if (RemovePrefix(arg, "--") && RemovePrefix(arg, name_) && RemovePrefix(arg, "=")) { + char extra; + float parsed_value; + if (sscanf(arg->data(), "%f%c", &parsed_value, &extra) != 1) { + std::cout << "Parse " << name_ << "Error for option " << *arg << std::endl; + return false; + } else { + *float_default_ = parsed_value; + } + return true; + } + + return false; +} + +Option::Option(const std::string &name, int32_t *default_point, const std::string &usage) + : name_(name), + type_(MS_TYPE_INT32), + int32_default_(default_point), + bool_default_(nullptr), + string_default_(nullptr), + float_default_(nullptr), + usage_(usage) {} + +Option::Option(const std::string &name, bool *default_point, const std::string &usage) + : name_(name), + type_(MS_TYPE_BOOL), + int32_default_(nullptr), + bool_default_(default_point), + string_default_(nullptr), + float_default_(nullptr), + usage_(usage) {} + +Option::Option(const std::string &name, std::string *default_point, const std::string &usage) + : name_(name), + type_(MS_TYPE_STRING), + int32_default_(nullptr), + bool_default_(nullptr), + string_default_(default_point), + float_default_(nullptr), + usage_(usage) {} + +Option::Option(const std::string &name, float *default_point, const std::string &usage) + : name_(name), + type_(MS_TYPE_FLOAT), + int32_default_(nullptr), + bool_default_(nullptr), + string_default_(nullptr), + float_default_(default_point), + usage_(usage) {} + +bool Option::Parse(std::string *arg) { + bool result = false; + switch (type_) { + case MS_TYPE_BOOL: + result = ParseBool(arg); + break; + case MS_TYPE_FLOAT: + result = ParseFloat(arg); + break; + case MS_TYPE_INT32: + result = ParseInt32(arg); + break; + case MS_TYPE_STRING: + result = ParseString(arg); + break; + default: + break; + } + return result; +} + +std::shared_ptr Options::inst_ = nullptr; + +Options &Options::Instance() { + static Options instance; + return instance; +} + +Options::Options() : args_(nullptr) { CreateOptions(); } + +void Options::CreateOptions() { + args_ = std::make_shared(); + std::vector