diff --git a/CMakeLists.txt b/CMakeLists.txt index 7dceca7ad7..dc07ccae8b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,10 @@ cmake_minimum_required(VERSION 3.14) project (MindSpore) + +if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.3.0) + message(FATAL_ERROR "GCC vesion ${CMAKE_CXX_COMPILER_VERSION} must not be less than 7.3.0") +endif () + include(${CMAKE_SOURCE_DIR}/cmake/options.cmake) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/modules/") @@ -18,7 +23,16 @@ set(PYBIND11_CPP_STANDARD -std=c++17) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPTION_CXX_FLAGS}") find_package(Threads) -find_package(Patch) +if (DEFINED ENV{MS_PATCH_PATH}) + find_program(Patch_EXECUTABLE patch PATHS $ENV{MS_PATCH_PATH}) + set(Patch_FOUND ${Patch_EXECUTABLE}) +else () + find_package(Patch) +endif () +if (NOT Patch_FOUND) + message(FATAL_ERROR "Patch not found, please set env variable MS_PATCH_PATH, " + "usually locate in GIT_PATH/usr/bin in windows") +endif () message(PATCH_EXECUTABLE = ${Patch_EXECUTABLE}) include(${CMAKE_SOURCE_DIR}/cmake/mind_expression.cmake) diff --git a/README.md b/README.md index 3de87d3fec..24108cf3fe 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ ![MindSpore Logo](docs/MindSpore-logo.png "MindSpore logo") ============================================================ -- [What is MindSpore?](#what-is-mindspore) +- [What Is MindSpore?](#what-is-mindspore) - [Automatic Differentiation](#automatic-differentiation) - [Automatic Parallel](#automatic-parallel) - [Installation](#installation) @@ -29,7 +29,7 @@ enrichment of the AI software/hardware application ecosystem. MindSpore Architecture -For more details please check out our [Architecture Guide](https://www.mindspore.cn/docs/en/0.1.0-alpha/architecture.html). +For more details please check out our [Architecture Guide](https://www.mindspore.cn/docs/en/0.2.0-alpha/architecture.html). ### Automatic Differentiation @@ -76,13 +76,36 @@ For installation using `pip`, take `CPU` and `Ubuntu-x86` build version as an ex 1. Download whl from [MindSpore download page](https://www.mindspore.cn/versions/en), and install the package. ``` - pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/cpu/ubuntu-x86/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl + pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/cpu/x86_ubuntu/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl ``` 2. Run the following command to verify the install. + ```python + import numpy as np + import mindspore.context as context + import mindspore.nn as nn + from mindspore import Tensor + from mindspore.ops import operations as P + + context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + + class Mul(nn.Cell): + def __init__(self): + super(Mul, self).__init__() + self.mul = P.Mul() + + def construct(self, x, y): + return self.mul(x, y) + + x = Tensor(np.array([1.0, 2.0, 3.0]).astype(np.float32)) + y = Tensor(np.array([4.0, 5.0, 6.0]).astype(np.float32)) + + mul = Mul() + print(mul(x, y)) ``` - python -c 'import mindspore' + ``` + [ 4. 10. 18.] ``` ### From Source @@ -96,20 +119,22 @@ currently the containerized build options are supported as follows: | Hardware Platform | Docker Image Repository | Tag | Description | | :---------------- | :---------------------- | :-- | :---------- | -| CPU | `mindspore/mindspore-cpu` | `0.1.0-alpha` | Production environment with pre-installed MindSpore `0.1.0-alpha` CPU release. | +| CPU | `mindspore/mindspore-cpu` | `x.y.z` | Production environment with pre-installed MindSpore `x.y.z` CPU release. | | | | `devel` | Development environment provided to build MindSpore (with `CPU` backend) from the source, refer to https://www.mindspore.cn/install/en for installation details. | | | | `runtime` | Runtime environment provided to install MindSpore binary package with `CPU` backend. | -| GPU | `mindspore/mindspore-gpu` | `0.1.0-alpha` | Production environment with pre-installed MindSpore `0.1.0-alpha` GPU release. | +| GPU | `mindspore/mindspore-gpu` | `x.y.z` | Production environment with pre-installed MindSpore `x.y.z` GPU release. | | | | `devel` | Development environment provided to build MindSpore (with `GPU CUDA10.1` backend) from the source, refer to https://www.mindspore.cn/install/en for installation details. | -| | | `runtime` | Runtime environment provided to install MindSpore binary package with `GPU` backend. | +| | | `runtime` | Runtime environment provided to install MindSpore binary package with `GPU CUDA10.1` backend. | | Ascend |
|
| Coming soon. | +> **NOTICE:** For GPU `devel` docker image, it's NOT suggested to directly install the whl package after building from the source, instead we strongly RECOMMEND you transfer and install the whl package inside GPU `runtime` docker image. + * CPU - For `CPU` backend, you can directly pull and run the image using the below command: + For `CPU` backend, you can directly pull and run the latest stable image using the below command: ``` - docker pull mindspore/mindspore-cpu:0.1.0-alpha - docker run -it mindspore/mindspore-cpu:0.1.0-alpha python -c 'import mindspore' + docker pull mindspore/mindspore-cpu:0.2.0-alpha + docker run -it mindspore/mindspore-cpu:0.2.0-alpha /bin/bash ``` * GPU @@ -124,20 +149,21 @@ currently the containerized build options are supported as follows: sudo systemctl restart docker ``` - Then you can pull and run the image using the below command: + Then you can pull and run the latest stable image using the below command: ``` - docker pull mindspore/mindspore-gpu:0.1.0-alpha - docker run -it --runtime=nvidia --privileged=true mindspore/mindspore-gpu:0.1.0-alpha /bin/bash + docker pull mindspore/mindspore-gpu:0.2.0-alpha + docker run -it --runtime=nvidia --privileged=true mindspore/mindspore-gpu:0.2.0-alpha /bin/bash ``` To test if the docker image works, please execute the python code below and check the output: ```python import numpy as np + import mindspore.context as context from mindspore import Tensor from mindspore.ops import functional as F - import mindspore.context as context context.set_context(device_target="GPU") + x = Tensor(np.ones([1,3,3,4]).astype(np.float32)) y = Tensor(np.ones([1,3,3,4]).astype(np.float32)) print(F.tensor_add(x, y)) @@ -157,11 +183,11 @@ currently the containerized build options are supported as follows: ``` If you want to learn more about the building process of MindSpore docker images, -please check out `docker` folder for the details. +please check out [docker](docker/README.md) repo for the details. ## Quickstart -See the [Quick Start](https://www.mindspore.cn/tutorial/en/0.1.0-alpha/quick_start/quick_start.html) +See the [Quick Start](https://www.mindspore.cn/tutorial/en/0.2.0-alpha/quick_start/quick_start.html) to implement the image classification. ## Docs diff --git a/RELEASE.md b/RELEASE.md index ce9064e4b1..6857ede676 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,3 +1,75 @@ +# Release 0.2.0-alpha + +## Major Features and Improvements + +### Ascend 910 Training and Inference Framework +* New models + * MobileNetV2: Inverted Residuals and Linear Bottlenecks. + * ResNet101: Deep Residual Learning for Image Recognition. + +* Frontend and User Interface + * Support for all python comparison operators. + * Support for math operators **,//,%. Support for other python operators like and/or/not/is/is not/ in/ not in. + * Support for the gradients of function with variable arguments. + * Support for tensor indexing assignment for certain indexing type. + * Support for dynamic learning rate. + * User interfaces change log + * DepthwiseConv2dNative, DepthwiseConv2dNativeBackpropFilter, DepthwiseConv2dNativeBackpropInput([!424](https://gitee.com/mindspore/mindspore/pulls/424)) + * ReLU6, ReLU6Grad([!224](https://gitee.com/mindspore/mindspore/pulls/224)) + * GeneratorDataset([!183](https://gitee.com/mindspore/mindspore/pulls/183)) + * VOCDataset([!477](https://gitee.com/mindspore/mindspore/pulls/477)) + * MindDataset, PKSampler([!514](https://gitee.com/mindspore/mindspore/pulls/514)) + * map([!506](https://gitee.com/mindspore/mindspore/pulls/506)) + * Conv([!226](https://gitee.com/mindspore/mindspore/pulls/226)) + * Adam([!253](https://gitee.com/mindspore/mindspore/pulls/253)) + * _set_fusion_strategy_by_idx, _set_fusion_strategy_by_size([!189](https://gitee.com/mindspore/mindspore/pulls/189)) + * CheckpointConfig([!122](https://gitee.com/mindspore/mindspore/pulls/122)) + * Constant([!54](https://gitee.com/mindspore/mindspore/pulls/54)) +* Executor and Performance Optimization + * Support parallel execution of data prefetching and forward/backward computing. + * Support parallel execution of gradient aggregation and forward/backward computing in distributed training scenarios. + * Support operator fusion optimization. + * Optimize compilation process and improve the performance. +* Data processing, augmentation, and save format + * Support multi-process of GeneratorDataset/PyFunc for high performance + * Support variable batchsize + * Support new Dataset operators, such as filter,skip,take,TextLineDataset + +### Other Hardware Support +* GPU platform + * Use dynamic memory pool by default on GPU. + * Support parallel execution of computation and communication. + * Support continuous address allocation by memory pool. +* CPU platform + * Support for windows 10 OS. + +## Bugfixes +* Models + * Fix mixed precision bug for VGG16 model ([!629](https://gitee.com/mindspore/mindspore/pulls/629)). +* Python API + * Fix ControlDepend operator bugs on CPU and GPU ([!396](https://gitee.com/mindspore/mindspore/pulls/396)). + * Fix ArgMinWithValue operator bugs ([!338](https://gitee.com/mindspore/mindspore/pulls/338)). + * Fix Dense operator bugs on PyNative mode ([!276](https://gitee.com/mindspore/mindspore/pulls/276)). + * Fix MatMul operator bugs on PyNative mode ([!288](https://gitee.com/mindspore/mindspore/pulls/288)). +* Executor + * Fix operator selection bugs and make it general ([!300](https://gitee.com/mindspore/mindspore/pulls/300)). + * Fix memory reuse bug for GetNext op ([!291](https://gitee.com/mindspore/mindspore/pulls/291)). +* GPU platform + * Fix memory allocation in multi-graph scenarios ([!444](https://gitee.com/mindspore/mindspore/pulls/444)). + * Fix bias_add_grad under fp16 precision ([!598](https://gitee.com/mindspore/mindspore/pulls/598)). + * Fix support for fp16 kernels on nvidia 1080Ti([!571](https://gitee.com/mindspore/mindspore/pulls/571)). + * Fix parsing of tuple type parameters ([!316](https://gitee.com/mindspore/mindspore/pulls/316)). +* Data processing + * Fix TypeErrors about can't pickle mindspore._c_dataengine.DEPipeline objects([!434](https://gitee.com/mindspore/mindspore/pulls/434)). + * Add TFRecord file verification([!406](https://gitee.com/mindspore/mindspore/pulls/406)). + +## Contributors +Thanks goes to these wonderful people: + +Alexey_Shevlyakov, Cathy, Chong, Hoai, Jonathan, Junhan, JunhanHu, Peilin, SanjayChan, StrawNoBerry, VectorSL, Wei, WeibiaoYu, Xiaoda, Yanjun, YuJianfeng, ZPaC, Zhang, ZhangQinghua, ZiruiWu, amongo, anthonyaje, anzhengqi, biffex, caifubi, candanzg, caojian05, casgj, cathwong, ch-l, chang, changzherui, chenfei, chengang, chenhaozhe, chenjianping, chentingting, chenzomi, chujinjin, dengwentao, dinghao, fanglei, fary86, flywind, gaojing, geekun, gengdongjie, ghzl, gong, gongchen, gukecai, guohongzilong, guozhijian, gziyan, h.farahat, hesham, huangdongrun, huanghui, jiangzhiwen, jinyaohui, jjfeing, jojobugfree, jonathan_yan, jonyguo, jzw, kingfo, kisnwang, laiyongqiang, leonwanghui, lianliguang, lichen, lichenever, limingqi107, liubuyu, liuxiao, liyong, liyong126, lizhenyu, lupengcheng, lvliang, maoweiyong, ms_yan, mxm, ougongchang, panfengfeng, panyifeng, pengyanjun, penn, qianlong, seatea, simson, suteng, thlinh, vlne-v1, wangchengke, wanghua, wangnan39, wangqiuliang, wenchunjiang, wenkai, wukesong, xiefangqi, xulei, yanghaitao, yanghaoran, yangjie159, yangzhenzhang, yankai10, yanzhenxiang2020, yao_yf, yoonlee666, zhangbuxue, zhangz0911gm, zhangzheng, zhaojichen, zhaoting, zhaozhenlong, zhongligeng, zhoufeng, zhousiyi, zjun, zyli2020, yuhuijun, limingqi107, lizhenyu, chenweifeng. + +Contributions of any kind are welcome! + # Release 0.1.0-alpha ## Main Features diff --git a/build.bat b/build.bat index ddb2e8affe..4e875fa11a 100644 --- a/build.bat +++ b/build.bat @@ -14,27 +14,27 @@ @rem ============================================================================ @echo off @title mindspore_build - + SET BASEPATH=%CD% IF NOT EXIST %BASEPATH%/build ( md "build" ) - + cd %BASEPATH%/build SET BUILD_PATH=%CD% - + IF NOT EXIST %BUILD_PATH%/mindspore ( md "mindspore" ) - + cd %CD%/mindspore - + cmake -DCMAKE_BUILD_TYPE=Release -DENABLE_CPU=ON -DENABLE_MINDDATA=ON -DUSE_GLOG=ON -G "CodeBlocks - MinGW Makefiles" ../.. IF NOT %errorlevel% == 0 ( echo "cmake fail." goto run_fail ) - + IF "%1%" == "" ( cmake --build . --target package -- -j6 ) ELSE ( diff --git a/build.sh b/build.sh index b48014ed93..0b60344980 100755 --- a/build.sh +++ b/build.sh @@ -433,9 +433,9 @@ build_predict() cd "${BASEPATH}/predict/output/" if [[ "$PREDICT_PLATFORM" == "x86_64" ]]; then - tar -cf MSPredict-0.1.0-linux_x86_64.tar.gz include/ lib/ --warning=no-file-changed + tar -cf MSPredict-0.2.0-linux_x86_64.tar.gz include/ lib/ --warning=no-file-changed elif [[ "$PREDICT_PLATFORM" == "arm64" ]]; then - tar -cf MSPredict-0.1.0-linux_aarch64.tar.gz include/ lib/ --warning=no-file-changed + tar -cf MSPredict-0.2.0-linux_aarch64.tar.gz include/ lib/ --warning=no-file-changed fi echo "success to build predict project!" } diff --git a/cmake/external_libs/mkl_dnn.cmake b/cmake/external_libs/mkl_dnn.cmake index 4b2c46670a..85a3132ba1 100644 --- a/cmake/external_libs/mkl_dnn.cmake +++ b/cmake/external_libs/mkl_dnn.cmake @@ -4,7 +4,7 @@ if (CMAKE_SYSTEM_NAME MATCHES "Windows") mindspore_add_pkg(onednn VER 1.1.1 LIBS dnnl mkldnn - HEAD_ONLY ./ + HEAD_ONLY ./include RELEASE on URL https://github.com/oneapi-src/oneDNN/releases/download/v1.1.1/dnnl_win_1.1.1_cpu_vcomp.zip MD5 ecaab9ed549643067699c80e5cea1c23) diff --git a/cmake/external_libs/protobuf.cmake b/cmake/external_libs/protobuf.cmake index a574e789db..6fe34577af 100644 --- a/cmake/external_libs/protobuf.cmake +++ b/cmake/external_libs/protobuf.cmake @@ -38,17 +38,17 @@ function(ms_protobuf_generate c_var h_var) get_filename_component(file_dir ${abs_file} PATH) file(RELATIVE_PATH rel_path ${CMAKE_CURRENT_SOURCE_DIR} ${file_dir}) - list(APPEND ${c_var} "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.cc") - list(APPEND ${h_var} "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.h") + list(APPEND ${c_var} "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.cc") + list(APPEND ${h_var} "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.h") add_custom_command( - OUTPUT "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.cc" - "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.h" + OUTPUT "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.cc" + "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.h" WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} - COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/${rel_path}" - COMMAND protobuf::protoc -I${file_dir} --cpp_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file} + COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/proto" + COMMAND protobuf::protoc -I${file_dir} --cpp_out=${CMAKE_BINARY_DIR}/proto ${abs_file} DEPENDS protobuf::protoc ${abs_file} - COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM ) + COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM) endforeach() set_source_files_properties(${${c_var}} ${${h_var}} PROPERTIES GENERATED TRUE) @@ -71,40 +71,38 @@ function(ms_protobuf_generate_py c_var h_var py_var) get_filename_component(abs_file ${file} ABSOLUTE) get_filename_component(file_name ${file} NAME_WE) get_filename_component(file_dir ${abs_file} PATH) - file(RELATIVE_PATH rel_path ${CMAKE_CURRENT_SOURCE_DIR} ${file_dir}) - - list(APPEND ${c_var} "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.cc") - list(APPEND ${h_var} "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.h") - list(APPEND ${py_var} "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py") + list(APPEND ${c_var} "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.cc") + list(APPEND ${h_var} "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.h") + list(APPEND ${py_var} "${CMAKE_BINARY_DIR}/proto/${file_name}_pb2.py") if (WIN32) add_custom_command( - OUTPUT "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.cc" - "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.h" - "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py" + OUTPUT "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.cc" + "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.h" + "${CMAKE_BINARY_DIR}/proto/${file_name}_pb2.py" WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} - COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/${rel_path}" - COMMAND protobuf::protoc -I${file_dir} --cpp_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file} - COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file} - COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file} - COMMAND perl -pi.bak -e "s/import (.+_pb2.*)/from . import \\1/" "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py" - COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py" "${PROJECT_SOURCE_DIR}/mindspore/train/" + COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/proto" + COMMAND protobuf::protoc -I${file_dir} --cpp_out=${CMAKE_BINARY_DIR}/proto ${abs_file} + COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/proto ${abs_file} + COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/proto ${abs_file} + COMMAND perl -pi.bak -e "s/import (.+_pb2.*)/from . import \\1/" "${CMAKE_BINARY_DIR}/proto/${file_name}_pb2.py" + COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_BINARY_DIR}/proto/${file_name}_pb2.py" "${PROJECT_SOURCE_DIR}/mindspore/train/" DEPENDS protobuf::protoc ${abs_file} COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM ) else() add_custom_command( - OUTPUT "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.cc" - "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.h" - "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py" + OUTPUT "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.cc" + "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.h" + "${CMAKE_BINARY_DIR}/proto/${file_name}_pb2.py" WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} - COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/${rel_path}" - COMMAND protobuf::protoc -I${file_dir} --cpp_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file} - COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file} - COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file} - COMMAND perl -pi -e "s/import (.+_pb2.*)/from . import \\1/" "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py" - COMMAND cp "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py" "${PROJECT_SOURCE_DIR}/mindspore/train/" + COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/proto" + COMMAND protobuf::protoc -I${file_dir} --cpp_out=${CMAKE_BINARY_DIR}/proto ${abs_file} + COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/proto ${abs_file} + COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/proto ${abs_file} + COMMAND perl -pi -e "s/import (.+_pb2.*)/from . import \\1/" "${CMAKE_BINARY_DIR}/proto/${file_name}_pb2.py" + COMMAND cp "${CMAKE_BINARY_DIR}/proto/${file_name}_pb2.py" "${PROJECT_SOURCE_DIR}/mindspore/train/" DEPENDS protobuf::protoc ${abs_file} - COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM ) + COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM) endif() endforeach() set_source_files_properties(${${c_var}} ${${h_var}} ${${py_var}} PROPERTIES GENERATED TRUE) diff --git a/cmake/package.cmake b/cmake/package.cmake index d35ce0463b..08919eb0e7 100644 --- a/cmake/package.cmake +++ b/cmake/package.cmake @@ -153,10 +153,14 @@ endif () if (CMAKE_SYSTEM_NAME MATCHES "Windows") get_filename_component(CXX_DIR ${CMAKE_CXX_COMPILER} PATH) file(GLOB CXX_LIB_LIST ${CXX_DIR}/*.dll) + + string(REPLACE "\\" "/" SystemRoot $ENV{SystemRoot}) + file(GLOB VC_LIB_LIST ${SystemRoot}/System32/msvcp140.dll ${SystemRoot}/System32/vcomp140.dll) + file(GLOB JPEG_LIB_LIST ${jpeg_turbo_LIBPATH}/*.dll) file(GLOB SQLITE_LIB_LIST ${sqlite_LIBPATH}/*.dll) install( - FILES ${CXX_LIB_LIST} ${JPEG_LIB_LIST} ${SQLITE_LIB_LIST} + FILES ${CXX_LIB_LIST} ${JPEG_LIB_LIST} ${SQLITE_LIB_LIST} ${VC_LIB_LIST} DESTINATION ${INSTALL_LIB_DIR} COMPONENT mindspore ) diff --git a/docker/README.md b/docker/README.md index c6851fe531..bceeef0cae 100644 --- a/docker/README.md +++ b/docker/README.md @@ -4,14 +4,13 @@ This folder hosts all the `Dockerfile` to build MindSpore container images with ### MindSpore docker build command -* CPU +| Hardware Platform | Version | Build Command | +| :---------------- | :------ | :------------ | +| CPU | `x.y.z` | cd mindspore-cpu/x.y.z && docker build . -t mindspore/mindspore-cpu:x.y.z | +| | `devel` | cd mindspore-cpu/devel && docker build . -t mindspore/mindspore-cpu:devel | +| | `runtime` | cd mindspore-cpu/runtime && docker build . -t mindspore/mindspore-cpu:runtime | +| GPU | `x.y.z` | cd mindspore-gpu/x.y.z && docker build . -t mindspore/mindspore-gpu:x.y.z | +| | `devel` | cd mindspore-gpu/devel && docker build . -t mindspore/mindspore-gpu:devel | +| | `runtime` | cd mindspore-gpu/runtime && docker build . -t mindspore/mindspore-gpu:runtime | - ``` - cd mindspore-cpu/0.1.0-alpha && docker build . -t mindspore/mindspore-cpu:0.1.0-alpha - ``` - -* GPU - - ``` - cd mindspore-gpu/0.1.0-alpha && docker build . -t mindspore/mindspore-gpu:0.1.0-alpha - ``` +> **NOTICE:** The `x.y.z` version shown above should be replaced with the real version number. diff --git a/docker/mindspore-cpu/0.2.0-alpha/Dockerfile b/docker/mindspore-cpu/0.2.0-alpha/Dockerfile new file mode 100644 index 0000000000..dc69d21326 --- /dev/null +++ b/docker/mindspore-cpu/0.2.0-alpha/Dockerfile @@ -0,0 +1,67 @@ +FROM ubuntu:18.04 + +MAINTAINER leonwanghui + +# Set env +ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5 +ENV PATH /usr/local/bin:$PATH + +# Install base tools +RUN apt update \ + && DEBIAN_FRONTEND=noninteractive apt install -y \ + vim \ + wget \ + curl \ + xz-utils \ + net-tools \ + openssh-client \ + git \ + ntpdate \ + tzdata \ + tcl \ + sudo \ + bash-completion + +# Install compile tools +RUN DEBIAN_FRONTEND=noninteractive apt install -y \ + gcc \ + g++ \ + zlibc \ + make \ + libgmp-dev \ + patch \ + autoconf \ + libtool \ + automake \ + flex + +# Set bash +RUN echo "dash dash/sh boolean false" | debconf-set-selections +RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash + +# Install python (v3.7.5) +RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \ + libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \ + && cd /tmp \ + && wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \ + && tar -xvf v3.7.5.tar.gz \ + && cd /tmp/cpython-3.7.5 \ + && mkdir -p ${PYTHON_ROOT_PATH} \ + && ./configure --prefix=${PYTHON_ROOT_PATH} \ + && make -j4 \ + && make install -j4 \ + && rm -f /usr/local/bin/python \ + && rm -f /usr/local/bin/pip \ + && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \ + && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \ + && rm -rf /tmp/cpython-3.7.5 \ + && rm -f /tmp/v3.7.5.tar.gz + +# Set pip source +RUN mkdir -pv /root/.pip \ + && echo "[global]" > /root/.pip/pip.conf \ + && echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \ + && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf + +# Install MindSpore cpu whl package +RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/cpu/x86_ubuntu/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl diff --git a/docker/mindspore-gpu/0.2.0-alpha/Dockerfile b/docker/mindspore-gpu/0.2.0-alpha/Dockerfile new file mode 100644 index 0000000000..a6eaf8382a --- /dev/null +++ b/docker/mindspore-gpu/0.2.0-alpha/Dockerfile @@ -0,0 +1,83 @@ +FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04 + +MAINTAINER leonwanghui + +# Set env +ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5 +ENV OMPI_ROOT_PATH /usr/local/openmpi-3.1.5 +ENV PATH ${OMPI_ROOT_PATH}/bin:/usr/local/bin:$PATH +ENV LD_LIBRARY_PATH ${OMPI_ROOT_PATH}/lib:$LD_LIBRARY_PATH + +# Install base tools +RUN apt update \ + && DEBIAN_FRONTEND=noninteractive apt install -y \ + vim \ + wget \ + curl \ + xz-utils \ + net-tools \ + openssh-client \ + git \ + ntpdate \ + tzdata \ + tcl \ + sudo \ + bash-completion + +# Install compile tools +RUN DEBIAN_FRONTEND=noninteractive apt install -y \ + gcc \ + g++ \ + zlibc \ + make \ + libgmp-dev \ + patch \ + autoconf \ + libtool \ + automake \ + flex \ + libnccl2=2.4.8-1+cuda10.1 \ + libnccl-dev=2.4.8-1+cuda10.1 + +# Set bash +RUN echo "dash dash/sh boolean false" | debconf-set-selections +RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash + +# Install python (v3.7.5) +RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \ + libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \ + && cd /tmp \ + && wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \ + && tar -xvf v3.7.5.tar.gz \ + && cd /tmp/cpython-3.7.5 \ + && mkdir -p ${PYTHON_ROOT_PATH} \ + && ./configure --prefix=${PYTHON_ROOT_PATH} \ + && make -j4 \ + && make install -j4 \ + && rm -f /usr/local/bin/python \ + && rm -f /usr/local/bin/pip \ + && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \ + && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \ + && rm -rf /tmp/cpython-3.7.5 \ + && rm -f /tmp/v3.7.5.tar.gz + +# Set pip source +RUN mkdir -pv /root/.pip \ + && echo "[global]" > /root/.pip/pip.conf \ + && echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \ + && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf + +# Install openmpi (v3.1.5) +RUN cd /tmp \ + && wget https://download.open-mpi.org/release/open-mpi/v3.1/openmpi-3.1.5.tar.gz \ + && tar -xvf openmpi-3.1.5.tar.gz \ + && cd /tmp/openmpi-3.1.5 \ + && mkdir -p ${OMPI_ROOT_PATH} \ + && ./configure --prefix=${OMPI_ROOT_PATH} \ + && make -j4 \ + && make install -j4 \ + && rm -rf /tmp/openmpi-3.1.5 \ + && rm -f /tmp/openmpi-3.1.5.tar.gz + +# Install MindSpore cuda-10.1 whl package +RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/gpu/cuda-10.1/mindspore_gpu-0.2.0-cp37-cp37m-linux_x86_64.whl diff --git a/example/Bert_NEZHA_cnwiki/config.py b/example/Bert_NEZHA_cnwiki/config.py deleted file mode 100644 index a704d9a264..0000000000 --- a/example/Bert_NEZHA_cnwiki/config.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -""" -network config setting, will be used in train.py -""" - -from easydict import EasyDict as edict -import mindspore.common.dtype as mstype -from mindspore.model_zoo.Bert_NEZHA import BertConfig -bert_train_cfg = edict({ - 'epoch_size': 10, - 'num_warmup_steps': 0, - 'start_learning_rate': 1e-4, - 'end_learning_rate': 0.0, - 'decay_steps': 1000, - 'power': 10.0, - 'save_checkpoint_steps': 2000, - 'keep_checkpoint_max': 10, - 'checkpoint_prefix': "checkpoint_bert", - # please add your own dataset path - 'DATA_DIR': "/your/path/examples.tfrecord", - # please add your own dataset schema path - 'SCHEMA_DIR': "/your/path/datasetSchema.json" -}) -bert_net_cfg = BertConfig( - batch_size=16, - seq_length=128, - vocab_size=21136, - hidden_size=1024, - num_hidden_layers=24, - num_attention_heads=16, - intermediate_size=4096, - hidden_act="gelu", - hidden_dropout_prob=0.0, - attention_probs_dropout_prob=0.0, - max_position_embeddings=512, - type_vocab_size=2, - initializer_range=0.02, - use_relative_positions=True, - input_mask_from_dataset=True, - token_type_ids_from_dataset=True, - dtype=mstype.float32, - compute_type=mstype.float16, -) diff --git a/example/Bert_NEZHA_cnwiki/train.py b/example/Bert_NEZHA_cnwiki/train.py deleted file mode 100644 index 2610542a9a..0000000000 --- a/example/Bert_NEZHA_cnwiki/train.py +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -""" -NEZHA (NEural contextualiZed representation for CHinese lAnguage understanding) is the Chinese pretrained language -model currently based on BERT developed by Huawei. -1. Prepare data -Following the data preparation as in BERT, run command as below to get dataset for training: - python ./create_pretraining_data.py \ - --input_file=./sample_text.txt \ - --output_file=./examples.tfrecord \ - --vocab_file=./your/path/vocab.txt \ - --do_lower_case=True \ - --max_seq_length=128 \ - --max_predictions_per_seq=20 \ - --masked_lm_prob=0.15 \ - --random_seed=12345 \ - --dupe_factor=5 -2. Pretrain -First, prepare the distributed training environment, then adjust configurations in config.py, finally run train.py. -""" - -import os -import numpy as np -from config import bert_train_cfg, bert_net_cfg -import mindspore.dataset.engine.datasets as de -import mindspore.dataset.transforms.c_transforms as C -from mindspore import context -from mindspore.common.tensor import Tensor -import mindspore.common.dtype as mstype -from mindspore.train.model import Model -from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor -from mindspore.model_zoo.Bert_NEZHA import BertNetworkWithLoss, BertTrainOneStepCell -from mindspore.nn.optim import Lamb -_current_dir = os.path.dirname(os.path.realpath(__file__)) - -def create_train_dataset(batch_size): - """create train dataset""" - # apply repeat operations - repeat_count = bert_train_cfg.epoch_size - ds = de.TFRecordDataset([bert_train_cfg.DATA_DIR], bert_train_cfg.SCHEMA_DIR, - columns_list=["input_ids", "input_mask", "segment_ids", "next_sentence_labels", - "masked_lm_positions", "masked_lm_ids", "masked_lm_weights"]) - type_cast_op = C.TypeCast(mstype.int32) - ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) - ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) - ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op) - ds = ds.map(input_columns="segment_ids", operations=type_cast_op) - ds = ds.map(input_columns="input_mask", operations=type_cast_op) - ds = ds.map(input_columns="input_ids", operations=type_cast_op) - # apply batch operations - ds = ds.batch(batch_size, drop_remainder=True) - ds = ds.repeat(repeat_count) - return ds - -def weight_variable(shape): - """weight variable""" - np.random.seed(1) - ones = np.random.uniform(-0.1, 0.1, size=shape).astype(np.float32) - return Tensor(ones) - -def train_bert(): - """train bert""" - context.set_context(mode=context.GRAPH_MODE) - context.set_context(device_target="Ascend") - context.set_context(enable_task_sink=True) - context.set_context(enable_loop_sink=True) - context.set_context(enable_mem_reuse=True) - ds = create_train_dataset(bert_net_cfg.batch_size) - netwithloss = BertNetworkWithLoss(bert_net_cfg, True) - optimizer = Lamb(netwithloss.trainable_params(), decay_steps=bert_train_cfg.decay_steps, - start_learning_rate=bert_train_cfg.start_learning_rate, - end_learning_rate=bert_train_cfg.end_learning_rate, power=bert_train_cfg.power, - warmup_steps=bert_train_cfg.num_warmup_steps, decay_filter=lambda x: False) - netwithgrads = BertTrainOneStepCell(netwithloss, optimizer=optimizer) - netwithgrads.set_train(True) - model = Model(netwithgrads) - config_ck = CheckpointConfig(save_checkpoint_steps=bert_train_cfg.save_checkpoint_steps, - keep_checkpoint_max=bert_train_cfg.keep_checkpoint_max) - ckpoint_cb = ModelCheckpoint(prefix=bert_train_cfg.checkpoint_prefix, config=config_ck) - model.train(ds.get_repeat_count(), ds, callbacks=[LossMonitor(), ckpoint_cb], dataset_sink_mode=False) - -if __name__ == '__main__': - train_bert() diff --git a/example/alexnet_cifar10/README.md b/example/alexnet_cifar10/README.md index 0efd3ca1bf..99245dfe1e 100644 --- a/example/alexnet_cifar10/README.md +++ b/example/alexnet_cifar10/README.md @@ -25,7 +25,7 @@ This is the simple tutorial for training AlexNet in MindSpore. python train.py --data_path cifar-10-batches-bin ``` -You can get loss with each step similar to this: +You will get the loss value of each step as following: ```bash epoch: 1 step: 1, loss is 2.2791853 @@ -36,17 +36,16 @@ epoch: 1 step: 1538, loss is 1.0221305 ... ``` -Then, test AlexNet according to network model +Then, evaluate AlexNet according to network model ```python -# test AlexNet, 1 epoch training accuracy is up to 51.1%; 10 epoch training accuracy is up to 81.2% +# evaluate AlexNet, 1 epoch training accuracy is up to 51.1%; 10 epoch training accuracy is up to 81.2% python eval.py --data_path cifar-10-verify-bin --mode test --ckpt_path checkpoint_alexnet-1_1562.ckpt ``` ## Note -There are some optional arguments: +Here are some optional parameters: ```bash --h, --help show this help message and exit --device_target {Ascend,GPU} device where the code will be implemented (default: Ascend) --data_path DATA_PATH diff --git a/example/bert_clue/README.md b/example/bert_clue/README.md new file mode 100644 index 0000000000..3c66816ff3 --- /dev/null +++ b/example/bert_clue/README.md @@ -0,0 +1,149 @@ +# BERT Example +## Description +This example implements pre-training, fine-tuning and evaluation of [BERT-base](https://github.com/google-research/bert)(the base version of BERT model) and [BERT-NEZHA](https://github.com/huawei-noah/Pretrained-Language-Model)(a Chinese pretrained language model developed by Huawei, which introduced a improvement of Functional Relative Positional Encoding as an effective positional encoding scheme). + +## Requirements +- Install [MindSpore](https://www.mindspore.cn/install/en). +- Download the zhwiki dataset from for pre-training. Extract and clean text in the dataset with [WikiExtractor](https://github.com/attardi/wil +kiextractor). Convert the dataset to TFRecord format and move the files to a specified path. +- Download the CLUE dataset from for fine-tuning and evaluation. +> Notes: + If you are running a fine-tuning or evaluation task, prepare the corresponding checkpoint file. + +## Running the Example +### Pre-Training +- Set options in `config.py`, including lossscale, optimizer and network. Click [here](https://www.mindspore.cn/tutorial/zh-CN/master/use/data_preparation/loading_the_datasets.html#tfrecord) for more information about dataset and the json schema file. + +- Run `run_standalone_pretrain.sh` for non-distributed pre-training of BERT-base and BERT-NEZHA model. + + ``` bash + sh run_standalone_pretrain.sh DEVICE_ID EPOCH_SIZE DATA_DIR SCHEMA_DIR MINDSPORE_PATH + ``` +- Run `run_distribute_pretrain.sh` for distributed pre-training of BERT-base and BERT-NEZHA model. + + ``` bash + sh run_distribute_pretrain.sh DEVICE_NUM EPOCH_SIZE DATA_DIR SCHEMA_DIR MINDSPORE_HCCL_CONFIG_PATH MINDSPORE_PATH + ``` + +### Fine-Tuning +- Set options in `finetune_config.py`. Make sure the 'data_file', 'schema_file' and 'ckpt_file' are set to your own path, set the 'pre_training_ckpt' to save the checkpoint files generated. + +- Run `finetune.py` for fine-tuning of BERT-base and BERT-NEZHA model. + + ```bash + python finetune.py --backend=ms + ``` + +### Evaluation +- Set options in `evaluation_config.py`. Make sure the 'data_file', 'schema_file' and 'finetune_ckpt' are set to your own path. + +- Run `evaluation.py` for evaluation of BERT-base and BERT-NEZHA model. + + ```bash + python evaluation.py --backend=ms + ``` + +## Usage +### Pre-Training +``` +usage: run_pretrain.py [--distribute DISTRIBUTE] [--epoch_size N] [----device_num N] [--device_id N] + [--enable_task_sink ENABLE_TASK_SINK] [--enable_loop_sink ENABLE_LOOP_SINK] + [--enable_mem_reuse ENABLE_MEM_REUSE] [--enable_save_ckpt ENABLE_SAVE_CKPT] + [--enable_lossscale ENABLE_LOSSSCALE] [--do_shuffle DO_SHUFFLE] + [--enable_data_sink ENABLE_DATA_SINK] [--data_sink_steps N] [--checkpoint_path CHECKPOINT_PATH] + [--save_checkpoint_steps N] [--save_checkpoint_num N] + [--data_dir DATA_DIR] [--schema_dir SCHEMA_DIR] + +options: + --distribute pre_training by serveral devices: "true"(training by more than 1 device) | "false", default is "false" + --epoch_size epoch size: N, default is 1 + --device_num number of used devices: N, default is 1 + --device_id device id: N, default is 0 + --enable_task_sink enable task sink: "true" | "false", default is "true" + --enable_loop_sink enable loop sink: "true" | "false", default is "true" + --enable_mem_reuse enable memory reuse: "true" | "false", default is "true" + --enable_save_ckpt enable save checkpoint: "true" | "false", default is "true" + --enable_lossscale enable lossscale: "true" | "false", default is "true" + --do_shuffle enable shuffle: "true" | "false", default is "true" + --enable_data_sink enable data sink: "true" | "false", default is "true" + --data_sink_steps set data sink steps: N, default is 1 + --checkpoint_path path to save checkpoint files: PATH, default is "" + --save_checkpoint_steps steps for saving checkpoint files: N, default is 1000 + --save_checkpoint_num number for saving checkpoint files: N, default is 1 + --data_dir path to dataset directory: PATH, default is "" + --schema_dir path to schema.json file, PATH, default is "" +``` +## Options and Parameters +It contains of parameters of BERT model and options for training, which is set in file `config.py`, `finetune_config.py` and `evaluation_config.py` respectively. +### Options: +``` +Pre-Training: + bert_network version of BERT model: base | nezha, default is base + loss_scale_value initial value of loss scale: N, default is 2^32 + scale_factor factor used to update loss scale: N, default is 2 + scale_window steps for once updatation of loss scale: N, default is 1000 + optimizer optimizer used in the network: AdamWerigtDecayDynamicLR | Lamb | Momentum, default is "Lamb" + +Fine-Tuning: + task task type: NER | XNLI | LCQMC | SENTI + data_file dataset file to load: PATH, default is "/your/path/cn-wiki-128" + schema_file dataset schema file to load: PATH, default is "/your/path/datasetSchema.json" + epoch_num repeat counts of training: N, default is 40 + ckpt_prefix prefix used to save checkpoint files: PREFIX, default is "bert" + ckpt_dir path to save checkpoint files: PATH, default is None + pre_training_ckpt checkpoint file to load: PATH, default is "/your/path/pre_training.ckpt" + optimizer optimizer used in the network: AdamWeigtDecayDynamicLR | Lamb | Momentum, default is "Lamb" + +Evaluation: + task task type: NER | XNLI | LCQMC | SENTI + data_file dataset file to load: PATH, default is "/your/path/evaluation.tfrecord" + schema_file dataset schema file to load: PATH, default is "/your/path/schema.json" + finetune_ckpt checkpoint file to load: PATH, default is "/your/path/your.ckpt" +``` + +### Parameters: +``` +Parameters for dataset and network (Pre-Training/Fine-Tuning/Evaluation): + batch_size batch size of input dataset: N, default is 16 + seq_length length of input sequence: N, default is 128 + vocab_size size of each embedding vector: N, default is 21136 + hidden_size size of bert encoder layers: N, default is 768 + num_hidden_layers number of hidden layers: N, default is 12 + num_attention_heads number of attention heads: N, default is 12 + intermediate_size size of intermediate layer: N, default is 3072 + hidden_act activation function used: ACTIVATION, default is "gelu" + hidden_dropout_prob dropout probability for BertOutput: Q, default is 0.1 + attention_probs_dropout_prob dropout probability for BertAttention: Q, default is 0.1 + max_position_embeddings maximum length of sequences: N, default is 512 + type_vocab_size size of token type vocab: N, default is 16 + initializer_range initialization value of TruncatedNormal: Q, default is 0.02 + use_relative_positions use relative positions or not: True | False, default is False + input_mask_from_dataset use the input mask loaded form dataset or not: True | False, default is True + token_type_ids_from_dataset use the token type ids loaded from dataset or not: True | False, default is True + dtype data type of input: mstype.float16 | mstype.float32, default is mstype.float32 + compute_type compute type in BertTransformer: mstype.float16 | mstype.float32, default is mstype.float16 + +Parameters for optimizer: + AdamWeightDecayDynamicLR: + decay_steps steps of the learning rate decay: N, default is 12276*3 + learning_rate value of learning rate: Q, default is 1e-5 + end_learning_rate value of end learning rate: Q, default is 0.0 + power power: Q, default is 10.0 + warmup_steps steps of the learning rate warm up: N, default is 2100 + weight_decay weight decay: Q, default is 1e-5 + eps term added to the denominator to improve numerical stability: Q, default is 1e-6 + + Lamb: + decay_steps steps of the learning rate decay: N, default is 12276*3 + learning_rate value of learning rate: Q, default is 1e-5 + end_learning_rate value of end learning rate: Q, default is 0.0 + power power: Q, default is 5.0 + warmup_steps steps of the learning rate warm up: N, default is 2100 + weight_decay weight decay: Q, default is 1e-5 + decay_filter function to determine whether to apply weight decay on parameters: FUNCTION, default is lambda x: False + + Momentum: + learning_rate value of learning rate: Q, default is 2e-5 + momentum momentum for the moving average: Q, default is 0.9 +``` + diff --git a/example/bert_clue/config.py b/example/bert_clue/config.py new file mode 100644 index 0000000000..a16dba83c7 --- /dev/null +++ b/example/bert_clue/config.py @@ -0,0 +1,95 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +network config setting, will be used in dataset.py, run_pretrain.py +""" +from easydict import EasyDict as edict +import mindspore.common.dtype as mstype +from mindspore.model_zoo.Bert_NEZHA import BertConfig +cfg = edict({ + 'bert_network': 'base', + 'loss_scale_value': 2**32, + 'scale_factor': 2, + 'scale_window': 1000, + 'optimizer': 'Lamb', + 'AdamWeightDecayDynamicLR': edict({ + 'learning_rate': 3e-5, + 'end_learning_rate': 1e-7, + 'power': 5.0, + 'weight_decay': 1e-5, + 'eps': 1e-6, + }), + 'Lamb': edict({ + 'start_learning_rate': 3e-5, + 'end_learning_rate': 1e-7, + 'power': 10.0, + 'warmup_steps': 10000, + 'weight_decay': 0.01, + 'eps': 1e-6, + }), + 'Momentum': edict({ + 'learning_rate': 2e-5, + 'momentum': 0.9, + }), +}) + +''' +Including two kinds of network: \ +base: Goole BERT-base(the base version of BERT model). +large: BERT-NEZHA(a Chinese pretrained language model developed by Huawei, which introduced a improvement of \ + Functional Relative Posetional Encoding as an effective positional encoding scheme). +''' +if cfg.bert_network == 'base': + bert_net_cfg = BertConfig( + batch_size=32, + seq_length=128, + vocab_size=21128, + hidden_size=768, + num_hidden_layers=12, + num_attention_heads=12, + intermediate_size=3072, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=2, + initializer_range=0.02, + use_relative_positions=False, + input_mask_from_dataset=True, + token_type_ids_from_dataset=True, + dtype=mstype.float32, + compute_type=mstype.float16, + ) +if cfg.bert_network == 'nezha': + bert_net_cfg = BertConfig( + batch_size=32, + seq_length=128, + vocab_size=21128, + hidden_size=1024, + num_hidden_layers=24, + num_attention_heads=16, + intermediate_size=4096, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=2, + initializer_range=0.02, + use_relative_positions=True, + input_mask_from_dataset=True, + token_type_ids_from_dataset=True, + dtype=mstype.float32, + compute_type=mstype.float16, + ) diff --git a/example/bert_clue/dataset.py b/example/bert_clue/dataset.py new file mode 100644 index 0000000000..d54f2a6660 --- /dev/null +++ b/example/bert_clue/dataset.py @@ -0,0 +1,58 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +Data operations, will be used in run_pretrain.py +""" +import os +import mindspore.common.dtype as mstype +import mindspore.dataset.engine.datasets as de +import mindspore.dataset.transforms.c_transforms as C +from mindspore import log as logger +from config import bert_net_cfg + + +def create_bert_dataset(epoch_size=1, device_num=1, rank=0, do_shuffle="true", enable_data_sink="true", + data_sink_steps=1, data_dir=None, schema_dir=None): + """create train dataset""" + # apply repeat operations + repeat_count = epoch_size + files = os.listdir(data_dir) + data_files = [] + for file_name in files: + data_files.append(os.path.join(data_dir, file_name)) + ds = de.TFRecordDataset(data_files, schema_dir, + columns_list=["input_ids", "input_mask", "segment_ids", "next_sentence_labels", + "masked_lm_positions", "masked_lm_ids", "masked_lm_weights"], + shuffle=(do_shuffle == "true"), num_shards=device_num, shard_id=rank, + shard_equal_rows=True) + ori_dataset_size = ds.get_dataset_size() + new_size = ori_dataset_size + if enable_data_sink == "true": + new_size = data_sink_steps * bert_net_cfg.batch_size + ds.set_dataset_size(new_size) + repeat_count = int(repeat_count * ori_dataset_size // ds.get_dataset_size()) + type_cast_op = C.TypeCast(mstype.int32) + ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) + ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) + ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op) + ds = ds.map(input_columns="segment_ids", operations=type_cast_op) + ds = ds.map(input_columns="input_mask", operations=type_cast_op) + ds = ds.map(input_columns="input_ids", operations=type_cast_op) + # apply batch operations + ds = ds.batch(bert_net_cfg.batch_size, drop_remainder=True) + ds = ds.repeat(repeat_count) + logger.info("data size: {}".format(ds.get_dataset_size())) + logger.info("repeatcount: {}".format(ds.get_repeat_count())) + return ds diff --git a/example/bert_clue/run_distribute_pretrain.sh b/example/bert_clue/run_distribute_pretrain.sh new file mode 100644 index 0000000000..86d3747e0b --- /dev/null +++ b/example/bert_clue/run_distribute_pretrain.sh @@ -0,0 +1,64 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +echo "==============================================================================================================" +echo "Please run the scipt as: " +echo "sh run_distribute_pretrain.sh DEVICE_NUM EPOCH_SIZE DATA_DIR SCHEMA_DIR MINDSPORE_HCCL_CONFIG_PATH" +echo "for example: sh run_distribute_pretrain.sh 8 40 /path/zh-wiki/ /path/Schema.json /path/hccl.json" +echo "It is better to use absolute path." +echo "==============================================================================================================" + +EPOCH_SIZE=$2 +DATA_DIR=$3 +SCHEMA_DIR=$4 + +export MINDSPORE_HCCL_CONFIG_PATH=$5 +export RANK_SIZE=$1 + +for((i=0;i env.log + taskset -c $cmdopt python ../run_pretrain.py \ + --distribute="true" \ + --epoch_size=$EPOCH_SIZE \ + --device_id=$DEVICE_ID \ + --device_num=$RANK_SIZE \ + --enable_task_sink="true" \ + --enable_loop_sink="true" \ + --enable_mem_reuse="true" \ + --enable_save_ckpt="true" \ + --enable_lossscale="true" \ + --do_shuffle="true" \ + --enable_data_sink="true" \ + --data_sink_steps=1 \ + --checkpoint_path="" \ + --save_checkpoint_steps=1000 \ + --save_checkpoint_num=1 \ + --data_dir=$DATA_DIR \ + --schema_dir=$SCHEMA_DIR > log.txt 2>&1 & + cd ../ +done diff --git a/example/bert_clue/run_pretrain.py b/example/bert_clue/run_pretrain.py new file mode 100644 index 0000000000..25c78e08d8 --- /dev/null +++ b/example/bert_clue/run_pretrain.py @@ -0,0 +1,144 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +#################pre_train bert example on zh-wiki######################## +python run_pretrain.py +""" + +import os +import argparse +import mindspore.communication.management as D +from mindspore import context +from mindspore.train.model import Model +from mindspore.train.parallel_utils import ParallelMode +from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell +from mindspore.train.callback import Callback, ModelCheckpoint, CheckpointConfig +from mindspore.model_zoo.Bert_NEZHA import BertNetworkWithLoss, BertTrainOneStepCell, BertTrainOneStepWithLossScaleCell +from mindspore.nn.optim import Lamb, Momentum, AdamWeightDecayDynamicLR +from dataset import create_bert_dataset +from config import cfg, bert_net_cfg +_current_dir = os.path.dirname(os.path.realpath(__file__)) + +class LossCallBack(Callback): + """ + Monitor the loss in training. + If the loss in NAN or INF terminating training. + Note: + if per_print_times is 0 do not print loss. + Args: + per_print_times (int): Print loss every times. Default: 1. + """ + def __init__(self, per_print_times=1): + super(LossCallBack, self).__init__() + if not isinstance(per_print_times, int) or per_print_times < 0: + raise ValueError("print_step must be int and >= 0") + self._per_print_times = per_print_times + def step_end(self, run_context): + cb_params = run_context.original_args() + with open("./loss.log", "a+") as f: + f.write("epoch: {}, step: {}, outputs are {}".format(cb_params.cur_epoch_num, cb_params.cur_step_num, + str(cb_params.net_outputs))) + f.write('\n') + +def run_pretrain(): + """pre-train bert_clue""" + parser = argparse.ArgumentParser(description='bert pre_training') + parser.add_argument("--distribute", type=str, default="false", help="Run distribute, default is false.") + parser.add_argument("--epoch_size", type=int, default="1", help="Epoch size, default is 1.") + parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.") + parser.add_argument("--device_num", type=int, default=1, help="Use device nums, default is 1.") + parser.add_argument("--enable_task_sink", type=str, default="true", help="Enable task sink, default is true.") + parser.add_argument("--enable_loop_sink", type=str, default="true", help="Enable loop sink, default is true.") + parser.add_argument("--enable_mem_reuse", type=str, default="true", help="Enable mem reuse, default is true.") + parser.add_argument("--enable_save_ckpt", type=str, default="true", help="Enable save checkpoint, default is true.") + parser.add_argument("--enable_lossscale", type=str, default="true", help="Use lossscale or not, default is not.") + parser.add_argument("--do_shuffle", type=str, default="true", help="Enable shuffle for dataset, default is true.") + parser.add_argument("--enable_data_sink", type=str, default="true", help="Enable data sink, default is true.") + parser.add_argument("--data_sink_steps", type=int, default="1", help="Sink steps for each epoch, default is 1.") + parser.add_argument("--checkpoint_path", type=str, default="", help="Checkpoint file path") + parser.add_argument("--save_checkpoint_steps", type=int, default=1000, help="Save checkpoint steps, " + "default is 1000.") + parser.add_argument("--save_checkpoint_num", type=int, default=1, help="Save checkpoint numbers, default is 1.") + parser.add_argument("--data_dir", type=str, default="", help="Data path, it is better to use absolute path") + parser.add_argument("--schema_dir", type=str, default="", help="Schema path, it is better to use absolute path") + + args_opt = parser.parse_args() + context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id) + context.set_context(enable_task_sink=(args_opt.enable_task_sink == "true"), + enable_loop_sink=(args_opt.enable_loop_sink == "true"), + enable_mem_reuse=(args_opt.enable_mem_reuse == "true")) + context.set_context(reserve_class_name_in_scope=False) + + if args_opt.distribute == "true": + device_num = args_opt.device_num + context.reset_auto_parallel_context() + context.set_context(enable_hccl=True) + context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True, + device_num=device_num) + D.init() + rank = args_opt.device_id % device_num + else: + context.set_context(enable_hccl=False) + rank = 0 + device_num = 1 + + ds = create_bert_dataset(args_opt.epoch_size, device_num, rank, args_opt.do_shuffle, args_opt.enable_data_sink, + args_opt.data_sink_steps, args_opt.data_dir, args_opt.schema_dir) + + netwithloss = BertNetworkWithLoss(bert_net_cfg, True) + + if cfg.optimizer == 'Lamb': + optimizer = Lamb(netwithloss.trainable_params(), decay_steps=ds.get_dataset_size() * ds.get_repeat_count(), + start_learning_rate=cfg.Lamb.start_learning_rate, end_learning_rate=cfg.Lamb.end_learning_rate, + power=cfg.Lamb.power, warmup_steps=cfg.Lamb.warmup_steps, weight_decay=cfg.Lamb.weight_decay, + eps=cfg.Lamb.eps, decay_filter=cfg.Lamb.decay_filter) + elif cfg.optimizer == 'Momentum': + optimizer = Momentum(netwithloss.trainable_params(), learning_rate=cfg.Momentum.learning_rate, + momentum=cfg.Momentum.momentum) + elif cfg.optimizer == 'AdamWeightDecayDynamicLR': + optimizer = AdamWeightDecayDynamicLR(netwithloss.trainable_params(), + decay_steps=ds.get_dataset_size() * ds.get_repeat_count(), + learning_rate=cfg.AdamWeightDecayDynamicLR.learning_rate, + end_learning_rate=cfg.AdamWeightDecayDynamicLR.end_learning_rate, + power=cfg.AdamWeightDecayDynamicLR.power, + weight_decay=cfg.AdamWeightDecayDynamicLR.weight_decay, + eps=cfg.AdamWeightDecayDynamicLR.eps) + else: + raise ValueError("Don't support optimizer {}, only support [Lamb, Momentum, AdamWeightDecayDynamicLR]". + format(cfg.optimizer)) + callback = [LossCallBack()] + if args_opt.enable_save_ckpt == "true": + config_ck = CheckpointConfig(save_checkpoint_steps=args_opt.save_checkpoint_steps, + keep_checkpoint_max=args_opt.save_checkpoint_num) + ckpoint_cb = ModelCheckpoint(prefix='checkpoint_bert', config=config_ck) + callback.append(ckpoint_cb) + + if args_opt.checkpoint_path: + param_dict = load_checkpoint(args_opt.checkpoint_path) + load_param_into_net(netwithloss, param_dict) + + if args_opt.enable_lossscale == "true": + update_cell = DynamicLossScaleUpdateCell(loss_scale_value=cfg.loss_scale_value, + scale_factor=cfg.scale_factor, + scale_window=cfg.scale_window) + netwithgrads = BertTrainOneStepWithLossScaleCell(netwithloss, optimizer=optimizer, + scale_update_cell=update_cell) + else: + netwithgrads = BertTrainOneStepCell(netwithloss, optimizer=optimizer) + + model = Model(netwithgrads) + model.train(ds.get_repeat_count(), ds, callbacks=callback, dataset_sink_mode=(args_opt.enable_data_sink == "true")) +if __name__ == '__main__': + run_pretrain() diff --git a/example/bert_clue/run_standalone_pretrain.sh b/example/bert_clue/run_standalone_pretrain.sh new file mode 100644 index 0000000000..bc4bcb5420 --- /dev/null +++ b/example/bert_clue/run_standalone_pretrain.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +echo "==============================================================================================================" +echo "Please run the scipt as: " +echo "sh run_standalone_pretrain.sh DEVICE_ID EPOCH_SIZE DATA_DIR SCHEMA_DIR" +echo "for example: sh run_standalone_pretrain.sh 0 40 /path/zh-wiki/ /path/Schema.json" +echo "==============================================================================================================" + +DEVICE_ID=$1 +EPOCH_SIZE=$2 +DATA_DIR=$3 +SCHEMA_DIR=$4 + +python run_pretrain.py \ + --distribute="false" \ + --epoch_size=$EPOCH_SIZE \ + --device_id=$DEVICE_ID \ + --enable_task_sink="true" \ + --enable_loop_sink="true" \ + --enable_mem_reuse="true" \ + --enable_save_ckpt="true" \ + --enable_lossscale="true" \ + --do_shuffle="true" \ + --enable_data_sink="true" \ + --data_sink_steps=1 \ + --checkpoint_path="" \ + --save_checkpoint_steps=1000 \ + --save_checkpoint_num=1 \ + --data_dir=$DATA_DIR \ + --schema_dir=$SCHEMA_DIR > log.txt 2>&1 & diff --git a/example/convert_to_mindrecord/README.md b/example/convert_to_mindrecord/README.md deleted file mode 100644 index 8d3b25e311..0000000000 --- a/example/convert_to_mindrecord/README.md +++ /dev/null @@ -1,46 +0,0 @@ -# MindRecord generating guidelines - - - -- [MindRecord generating guidelines](#mindrecord-generating-guidelines) - - [Create work space](#create-work-space) - - [Implement data generator](#implement-data-generator) - - [Run data generator](#run-data-generator) - - - -## Create work space - -Assume the dataset name is 'xyz' -* Create work space from template - ```shell - cd ${your_mindspore_home}/example/convert_to_mindrecord - cp -r template xyz - ``` - -## Implement data generator - -Edit dictionary data generator -* Edit file - ```shell - cd ${your_mindspore_home}/example/convert_to_mindrecord - vi xyz/mr_api.py - ``` - - Two API, 'mindrecord_task_number' and 'mindrecord_dict_data', must be implemented -- 'mindrecord_task_number()' returns number of tasks. Return 1 if data row is generated serially. Return N if generator can be split into N parallel-run tasks. -- 'mindrecord_dict_data(task_id)' yields dictionary data row by row. 'task_id' is 0..N-1, if N is return value of mindrecord_task_number() - - -Tricky for parallel run -- For imagenet, one directory can be a task. -- For TFRecord with multiple files, each file can be a task. -- For TFRecord with 1 file only, it could also be split into N tasks. Task_id=K means: data row is picked only if (count % N == K) - - -## Run data generator -* run python script - ```shell - cd ${your_mindspore_home}/example/convert_to_mindrecord - python writer.py --mindrecord_script imagenet [...] - ``` diff --git a/example/cv_to_mindrecord/ImageNet_Similar_Perf/README.md b/example/cv_to_mindrecord/ImageNet_Similar_Perf/README.md new file mode 100644 index 0000000000..8bdcb9e25d --- /dev/null +++ b/example/cv_to_mindrecord/ImageNet_Similar_Perf/README.md @@ -0,0 +1,95 @@ +# Guideline to Efficiently Generating MindRecord + + + +- [What does the example do](#what-does-the-example-do) +- [Example test for ImageNet](#example-test-for-imagenet) +- [How to use the example for other dataset](#how-to-use-the-example-for-other-dataset) + - [Create work space](#create-work-space) + - [Implement data generator](#implement-data-generator) + - [Run data generator](#run-data-generator) + + + + +## What does the example do + +This example provides an efficient way to generate MindRecord. Users only need to define the parallel granularity of training data reading and the data reading function of a single task. That is, they can efficiently convert the user's training data into MindRecord. + +1. run_template.sh: entry script, users need to modify parameters according to their own training data. +2. writer.py: main script, called by run_template.sh, it mainly reads user training data in parallel and generates MindRecord. +3. template/mr_api.py: uers define their own parallel granularity of training data reading and single task reading function through the template. + +## Example test for ImageNet + +1. Download and prepare the ImageNet dataset as required. + + > [ImageNet dataset download address](http://image-net.org/download) + + Store the downloaded ImageNet dataset in a folder. The folder contains all images and a mapping file that records labels of the images. + + In the mapping file, there are three columns, which are separated by spaces. They indicate image classes, label IDs, and label names. The following is an example of the mapping file: + ``` + n02119789 1 pen + n02100735 2 notbook + n02110185 3 mouse + n02096294 4 orange + ``` + +2. Edit run_imagenet.sh and modify the parameters + ``` + --mindrecord_file: output MindRecord file. + --mindrecord_partitions: the partitions for MindRecord. + --label_file: ImageNet label map file. + --image_dir: ImageNet dir which contain sub dir. + ``` + +3. Run the bash script + ```bash + bash run_imagenet.sh + ``` + +4. Performance result + + | Training Data | General API | Current Example | Env | + | ---- | ---- | ---- | ---- | + |ImageNet(140G)| 2h40m | 50m | CPU: Intel Xeon Gold 6130 x 64, Memory: 256G, Storage: HDD | + +## How to use the example for other dataset + +### Create work space + +Assume the dataset name is 'xyz' +* Create work space from template + ```shell + cd ${your_mindspore_home}/example/cv_to_mindrecord/ImageNet_Similar_Perf + cp -r template xyz + ``` + +### Implement data generator + +Edit dictionary data generator. +* Edit file + ```shell + cd ${your_mindspore_home}/example/cv_to_mindrecord/ImageNet_Similar_Perf + vi xyz/mr_api.py + ``` + +Two API, 'mindrecord_task_number' and 'mindrecord_dict_data', must be implemented. +- 'mindrecord_task_number()' returns number of tasks. Return 1 if data row is generated serially. Return N if generator can be split into N parallel-run tasks. +- 'mindrecord_dict_data(task_id)' yields dictionary data row by row. 'task_id' is 0..N-1, if N is return value of mindrecord_task_number() + +Tricky for parallel run. +- For ImageNet, one directory can be a task. +- For TFRecord with multiple files, each file can be a task. +- For TFRecord with 1 file only, it could also be split into N tasks. Task_id=K means: data row is picked only if (count % N == K) + +### Run data generator + +* run python script + ```shell + cd ${your_mindspore_home}/example/cv_to_mindrecord/ImageNet_Similar_Perf + python writer.py --mindrecord_script xyz [...] + ``` + > You can put this command in script **run_xyz.sh** for easy execution + diff --git a/example/convert_to_mindrecord/imagenet/__init__.py b/example/cv_to_mindrecord/ImageNet_Similar_Perf/imagenet/__init__.py similarity index 100% rename from example/convert_to_mindrecord/imagenet/__init__.py rename to example/cv_to_mindrecord/ImageNet_Similar_Perf/imagenet/__init__.py diff --git a/example/convert_to_mindrecord/imagenet/mr_api.py b/example/cv_to_mindrecord/ImageNet_Similar_Perf/imagenet/mr_api.py similarity index 97% rename from example/convert_to_mindrecord/imagenet/mr_api.py rename to example/cv_to_mindrecord/ImageNet_Similar_Perf/imagenet/mr_api.py index e569b489b5..c8129ec9ff 100644 --- a/example/convert_to_mindrecord/imagenet/mr_api.py +++ b/example/cv_to_mindrecord/ImageNet_Similar_Perf/imagenet/mr_api.py @@ -118,5 +118,8 @@ def mindrecord_dict_data(task_id): image_file = open(file_name, "rb") image_bytes = image_file.read() image_file.close() + if not image_bytes: + print("The image file: {} is invalid.".format(file_name)) + continue data["data"] = image_bytes yield data diff --git a/example/convert_to_mindrecord/run_imagenet.sh b/example/cv_to_mindrecord/ImageNet_Similar_Perf/run_imagenet.sh similarity index 100% rename from example/convert_to_mindrecord/run_imagenet.sh rename to example/cv_to_mindrecord/ImageNet_Similar_Perf/run_imagenet.sh diff --git a/example/convert_to_mindrecord/run_template.sh b/example/cv_to_mindrecord/ImageNet_Similar_Perf/run_template.sh similarity index 100% rename from example/convert_to_mindrecord/run_template.sh rename to example/cv_to_mindrecord/ImageNet_Similar_Perf/run_template.sh diff --git a/example/convert_to_mindrecord/template/__init__.py b/example/cv_to_mindrecord/ImageNet_Similar_Perf/template/__init__.py similarity index 100% rename from example/convert_to_mindrecord/template/__init__.py rename to example/cv_to_mindrecord/ImageNet_Similar_Perf/template/__init__.py diff --git a/example/convert_to_mindrecord/template/mr_api.py b/example/cv_to_mindrecord/ImageNet_Similar_Perf/template/mr_api.py similarity index 100% rename from example/convert_to_mindrecord/template/mr_api.py rename to example/cv_to_mindrecord/ImageNet_Similar_Perf/template/mr_api.py diff --git a/example/convert_to_mindrecord/writer.py b/example/cv_to_mindrecord/ImageNet_Similar_Perf/writer.py similarity index 100% rename from example/convert_to_mindrecord/writer.py rename to example/cv_to_mindrecord/ImageNet_Similar_Perf/writer.py diff --git a/example/lenet_mnist/README.md b/example/lenet_mnist/README.md index fea92883c6..72f3681e30 100644 --- a/example/lenet_mnist/README.md +++ b/example/lenet_mnist/README.md @@ -19,8 +19,8 @@ This is the simple and basic tutorial for constructing a network in MindSpore. │ t10k-labels.idx1-ubyte │ └─train - train-images.idx3-ubyte - train-labels.idx1-ubyte + train-images.idx3-ubyte + train-labels.idx1-ubyte ``` ## Running the example @@ -30,7 +30,7 @@ This is the simple and basic tutorial for constructing a network in MindSpore. python train.py --data_path MNIST_Data ``` -You can get loss with each step similar to this: +You will get the loss value of each step as following: ```bash epoch: 1 step: 1, loss is 2.3040335 @@ -41,17 +41,16 @@ epoch: 1 step: 1741, loss is 0.05018193 ... ``` -Then, test LeNet according to network model +Then, evaluate LeNet according to network model ```python -# test LeNet, after 1 epoch training, the accuracy is up to 96.5% +# evaluate LeNet, after 1 epoch training, the accuracy is up to 96.5% python eval.py --data_path MNIST_Data --mode test --ckpt_path checkpoint_lenet-1_1875.ckpt ``` ## Note -There are some optional arguments: +Here are some optional parameters: ```bash --h, --help show this help message and exit --device_target {Ascend,GPU,CPU} device where the code will be implemented (default: Ascend) --data_path DATA_PATH diff --git a/example/mobilenetv2_imagenet2012/README.md b/example/mobilenetv2_imagenet2012/README.md new file mode 100644 index 0000000000..bb5288908d --- /dev/null +++ b/example/mobilenetv2_imagenet2012/README.md @@ -0,0 +1,101 @@ +# MobileNetV2 Example + +## Description + +This is an example of training MobileNetV2 with ImageNet2012 dataset in MindSpore. + +## Requirements + +* Install [MindSpore](https://www.mindspore.cn/install/en). + +* Download the dataset [ImageNet2012](http://www.image-net.org/). + +> Unzip the ImageNet2012 dataset to any path you want and the folder structure should be as follows: +> ``` +> . +> ├── train # train dataset +> └── val # infer dataset +> ``` + +## Example structure + +``` shell +. +├── config.py # parameter configuration +├── dataset.py # data preprocessing +├── eval.py # infer script +├── launch.py # launcher for distributed training +├── lr_generator.py # generate learning rate for each step +├── run_infer.sh # launch infering +├── run_train.sh # launch training +└── train.py # train script +``` + +## Parameter configuration + +Parameters for both training and inference can be set in 'config.py'. + +``` +"num_classes": 1000, # dataset class num +"image_height": 224, # image height +"image_width": 224, # image width +"batch_size": 256, # training or infering batch size +"epoch_size": 200, # total training epochs, including warmup_epochs +"warmup_epochs": 4, # warmup epochs +"lr": 0.4, # base learning rate +"momentum": 0.9, # momentum +"weight_decay": 4e-5, # weight decay +"loss_scale": 1024, # loss scale +"save_checkpoint": True, # whether save checkpoint +"save_checkpoint_epochs": 1, # the epoch interval between two checkpoints +"keep_checkpoint_max": 200, # only keep the last keep_checkpoint_max checkpoint +"save_checkpoint_path": "./checkpoint" # path to save checkpoint +``` + +## Running the example + +### Train + +#### Usage +Usage: sh run_train.sh [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] + +#### Launch + +``` +# training example +sh run_train.sh 8 192.168.0.1 0,1,2,3,4,5,6,7 ~/imagenet +``` + +#### Result + +Training result will be stored in the example path. Checkpoints will be stored at `. /checkpoint` by default, and training log will be redirected to `./train/train.log` like followings. + +``` +epoch: [ 0/200], step:[ 624/ 625], loss:[5.258/5.258], time:[140412.236], lr:[0.100] +epoch time: 140522.500, per step time: 224.836, avg loss: 5.258 +epoch: [ 1/200], step:[ 624/ 625], loss:[3.917/3.917], time:[138221.250], lr:[0.200] +epoch time: 138331.250, per step time: 221.330, avg loss: 3.917 +``` + +### Infer + +#### Usage + +Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH] + +#### Launch + +``` +# infer example +sh run_infer.sh ~/imagenet ~/train/mobilenet-200_625.ckpt +``` + +> checkpoint can be produced in training process. + +#### Result + +Inference result will be stored in the example path, you can find result like the followings in `val.log`. + +``` +result: {'acc': 0.71976314102564111} ckpt=/path/to/checkpoint/mobilenet-200_625.ckpt +``` diff --git a/example/mobilenetv2_imagenet2012/config.py b/example/mobilenetv2_imagenet2012/config.py new file mode 100644 index 0000000000..2a8d37b6fc --- /dev/null +++ b/example/mobilenetv2_imagenet2012/config.py @@ -0,0 +1,36 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +network config setting, will be used in train.py and eval.py +""" +from easydict import EasyDict as ed + +config = ed({ + "num_classes": 1000, + "image_height": 224, + "image_width": 224, + "batch_size": 256, + "epoch_size": 200, + "warmup_epochs": 4, + "lr": 0.4, + "momentum": 0.9, + "weight_decay": 4e-5, + "label_smooth": 0.1, + "loss_scale": 1024, + "save_checkpoint": True, + "save_checkpoint_epochs": 1, + "keep_checkpoint_max": 200, + "save_checkpoint_path": "./checkpoint", +}) diff --git a/example/mobilenetv2_imagenet2012/dataset.py b/example/mobilenetv2_imagenet2012/dataset.py new file mode 100644 index 0000000000..46f5a1770c --- /dev/null +++ b/example/mobilenetv2_imagenet2012/dataset.py @@ -0,0 +1,84 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +create train or eval dataset. +""" +import os +import mindspore.common.dtype as mstype +import mindspore.dataset.engine as de +import mindspore.dataset.transforms.vision.c_transforms as C +import mindspore.dataset.transforms.c_transforms as C2 +from config import config + + +def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32): + """ + create a train or eval dataset + + Args: + dataset_path(string): the path of dataset. + do_train(bool): whether dataset is used for train or eval. + repeat_num(int): the repeat times of dataset. Default: 1 + batch_size(int): the batch size of dataset. Default: 32 + + Returns: + dataset + """ + rank_size = int(os.getenv("RANK_SIZE")) + rank_id = int(os.getenv("RANK_ID")) + + if rank_size == 1: + ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=16, shuffle=True) + else: + ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=16, shuffle=True, + num_shards=rank_size, shard_id=rank_id) + + resize_height = config.image_height + resize_width = config.image_width + rescale = 1.0 / 255.0 + shift = 0.0 + buffer_size = 1000 + + # define map operations + decode_op = C.Decode() + resize_crop_op = C.RandomResizedCrop(resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333)) + horizontal_flip_op = C.RandomHorizontalFlip(prob=0.5) + + resize_op = C.Resize((256, 256)) + center_crop = C.CenterCrop(resize_width) + rescale_op = C.Rescale(rescale, shift) + normalize_op = C.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + change_swap_op = C.HWC2CHW() + + if do_train: + trans = [decode_op, resize_crop_op, horizontal_flip_op, rescale_op, normalize_op, change_swap_op] + else: + trans = [decode_op, resize_op, center_crop, rescale_op, normalize_op, change_swap_op] + + type_cast_op = C2.TypeCast(mstype.int32) + + ds = ds.map(input_columns="image", operations=trans) + ds = ds.map(input_columns="label", operations=type_cast_op) + + # apply shuffle operations + ds = ds.shuffle(buffer_size=buffer_size) + + # apply batch operations + ds = ds.batch(batch_size, drop_remainder=True) + + # apply dataset repeat operation + ds = ds.repeat(repeat_num) + + return ds diff --git a/example/mobilenetv2_imagenet2012/eval.py b/example/mobilenetv2_imagenet2012/eval.py new file mode 100644 index 0000000000..397b3a37c3 --- /dev/null +++ b/example/mobilenetv2_imagenet2012/eval.py @@ -0,0 +1,54 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +eval. +""" +import os +import argparse +from dataset import create_dataset +from config import config +from mindspore import context +from mindspore.model_zoo.mobilenet import mobilenet_v2 +from mindspore.train.model import Model +from mindspore.train.serialization import load_checkpoint, load_param_into_net +from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits + +parser = argparse.ArgumentParser(description='Image classification') +parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path') +parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path') +args_opt = parser.parse_args() + +device_id = int(os.getenv('DEVICE_ID')) + +context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=device_id, save_graphs=False) +context.set_context(enable_task_sink=True) +context.set_context(enable_loop_sink=True) +context.set_context(enable_mem_reuse=True) + +if __name__ == '__main__': + loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean') + net = mobilenet_v2() + + dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=False, batch_size=config.batch_size) + step_size = dataset.get_dataset_size() + + if args_opt.checkpoint_path: + param_dict = load_checkpoint(args_opt.checkpoint_path) + load_param_into_net(net, param_dict) + net.set_train(False) + + model = Model(net, loss_fn=loss, metrics={'acc'}) + res = model.eval(dataset) + print("result:", res, "ckpt=", args_opt.checkpoint_path) diff --git a/example/mobilenetv2_imagenet2012/launch.py b/example/mobilenetv2_imagenet2012/launch.py new file mode 100644 index 0000000000..bd28e20149 --- /dev/null +++ b/example/mobilenetv2_imagenet2012/launch.py @@ -0,0 +1,143 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""launch train script""" +import os +import sys +import json +from argparse import ArgumentParser + + +def parse_args(): + """ + parse args . + + Args: + + Returns: + args. + + Examples: + >>> parse_args() + """ + parser = ArgumentParser(description="mindspore distributed training launch " + "helper utilty that will spawn up " + "multiple distributed processes") + parser.add_argument("--nproc_per_node", type=int, default=1, + help="The number of processes to launch on each node, " + "for D training, this is recommended to be set " + "to the number of D in your system so that " + "each process can be bound to a single D.") + parser.add_argument("--visible_devices", type=str, default="0,1,2,3,4,5,6,7", + help="will use the visible devices sequentially") + parser.add_argument("--server_id", type=str, default="", + help="server ip") + parser.add_argument("--training_script", type=str, + help="The full path to the single D training " + "program/script to be launched in parallel, " + "followed by all the arguments for the " + "training script") + # rest from the training program + args, unknown = parser.parse_known_args() + args.training_script_args = unknown + return args + + +def main(): + print("start", __file__) + args = parse_args() + print(args) + visible_devices = args.visible_devices.split(',') + assert os.path.isfile(args.training_script) + assert len(visible_devices) >= args.nproc_per_node + print('visible_devices:{}'.format(visible_devices)) + if not args.server_id: + print('pleaser input server ip!!!') + exit(0) + print('server_id:{}'.format(args.server_id)) + + # construct hccn_table + hccn_configs = open('/etc/hccn.conf', 'r').readlines() + device_ips = {} + for hccn_item in hccn_configs: + hccn_item = hccn_item.strip() + if hccn_item.startswith('address_'): + device_id, device_ip = hccn_item.split('=') + device_id = device_id.split('_')[1] + device_ips[device_id] = device_ip + print('device_id:{}, device_ip:{}'.format(device_id, device_ip)) + hccn_table = {} + hccn_table['board_id'] = '0x0000' + hccn_table['chip_info'] = '910' + hccn_table['deploy_mode'] = 'lab' + hccn_table['group_count'] = '1' + hccn_table['group_list'] = [] + instance_list = [] + usable_dev = '' + for instance_id in range(args.nproc_per_node): + instance = {} + instance['devices'] = [] + device_id = visible_devices[instance_id] + device_ip = device_ips[device_id] + usable_dev += str(device_id) + instance['devices'].append({ + 'device_id': device_id, + 'device_ip': device_ip, + }) + instance['rank_id'] = str(instance_id) + instance['server_id'] = args.server_id + instance_list.append(instance) + hccn_table['group_list'].append({ + 'device_num': str(args.nproc_per_node), + 'server_num': '1', + 'group_name': '', + 'instance_count': str(args.nproc_per_node), + 'instance_list': instance_list, + }) + hccn_table['para_plane_nic_location'] = 'device' + hccn_table['para_plane_nic_name'] = [] + for instance_id in range(args.nproc_per_node): + eth_id = visible_devices[instance_id] + hccn_table['para_plane_nic_name'].append('eth{}'.format(eth_id)) + hccn_table['para_plane_nic_num'] = str(args.nproc_per_node) + hccn_table['status'] = 'completed' + + # save hccn_table to file + table_path = os.getcwd() + if not os.path.exists(table_path): + os.mkdir(table_path) + table_fn = os.path.join(table_path, + 'rank_table_{}p_{}_{}.json'.format(args.nproc_per_node, usable_dev, args.server_id)) + with open(table_fn, 'w') as table_fp: + json.dump(hccn_table, table_fp, indent=4) + sys.stdout.flush() + + # spawn the processes + for rank_id in range(0, args.nproc_per_node): + device_id = visible_devices[rank_id] + device_dir = os.path.join(os.getcwd(), 'device{}'.format(rank_id)) + rank_process = 'export RANK_SIZE={} && export RANK_ID={} && export DEVICE_ID={} && '.format(args.nproc_per_node, + rank_id, device_id) + if args.nproc_per_node > 1: + rank_process += 'export MINDSPORE_HCCL_CONFIG_PATH={} && '.format(table_fn) + rank_process += 'export RANK_TABLE_FILE={} && '.format(table_fn) + rank_process += 'rm -rf {dir} && mkdir {dir} && cd {dir} && python {script} '.format(dir=device_dir, + script=args.training_script + ) + rank_process += ' '.join(args.training_script_args) + ' > log{}.log 2>&1 &'.format(rank_id) + os.system(rank_process) + + +if __name__ == "__main__": + main() diff --git a/example/mobilenetv2_imagenet2012/lr_generator.py b/example/mobilenetv2_imagenet2012/lr_generator.py new file mode 100644 index 0000000000..68bbfe3158 --- /dev/null +++ b/example/mobilenetv2_imagenet2012/lr_generator.py @@ -0,0 +1,54 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""learning rate generator""" +import math +import numpy as np + + +def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch): + """ + generate learning rate array + + Args: + global_step(int): total steps of the training + lr_init(float): init learning rate + lr_end(float): end learning rate + lr_max(float): max learning rate + warmup_epochs(int): number of warmup epochs + total_epochs(int): total epoch of training + steps_per_epoch(int): steps of one epoch + + Returns: + np.array, learning rate array + """ + lr_each_step = [] + total_steps = steps_per_epoch * total_epochs + warmup_steps = steps_per_epoch * warmup_epochs + for i in range(total_steps): + if i < warmup_steps: + lr = lr_init + (lr_max - lr_init) * i / warmup_steps + else: + lr = lr_end + \ + (lr_max - lr_end) * \ + (1. + math.cos(math.pi * (i - warmup_steps) / (total_steps - warmup_steps))) / 2. + if lr < 0.0: + lr = 0.0 + lr_each_step.append(lr) + + current_step = global_step + lr_each_step = np.array(lr_each_step).astype(np.float32) + learning_rate = lr_each_step[current_step:] + + return learning_rate diff --git a/example/mobilenetv2_imagenet2012/run_infer.sh b/example/mobilenetv2_imagenet2012/run_infer.sh new file mode 100644 index 0000000000..dc1e4d0b5d --- /dev/null +++ b/example/mobilenetv2_imagenet2012/run_infer.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +if [ $# != 2 ] +then + echo "Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH]" +exit 1 +fi + +if [ ! -d $1 ] +then + echo "error: DATASET_PATH=$1 is not a directory" +exit 1 +fi + +if [ ! -f $2 ] +then + echo "error: CHECKPOINT_PATH=$2 is not a file" +exit 1 +fi + +BASEPATH=$(cd "`dirname $0`" || exit; pwd) +export PYTHONPATH=${BASEPATH}:$PYTHONPATH +export DEVICE_ID=0 +export RANK_ID=0 +export RANK_SIZE=1 +if [ -d "eval" ]; +then + rm -rf ./eval +fi +mkdir ./eval +cd ./eval || exit +python ${BASEPATH}/eval.py \ + --checkpoint_path=$2 \ + --dataset_path=$1 &> infer.log & # dataset val folder path diff --git a/example/mobilenetv2_imagenet2012/run_train.sh b/example/mobilenetv2_imagenet2012/run_train.sh new file mode 100644 index 0000000000..3f92b4f172 --- /dev/null +++ b/example/mobilenetv2_imagenet2012/run_train.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +if [ $# != 4 ] +then + echo "Usage: sh run_train.sh [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]" +exit 1 +fi + +if [ $1 -lt 1 ] && [ $1 -gt 8 ] +then + echo "error: DEVICE_NUM=$1 is not in (1-8)" +exit 1 +fi + +if [ ! -d $4 ] +then + echo "error: DATASET_PATH=$4 is not a directory" +exit 1 +fi + +BASEPATH=$(cd "`dirname $0`" || exit; pwd) +export PYTHONPATH=${BASEPATH}:$PYTHONPATH +if [ -d "train" ]; +then + rm -rf ./train +fi +mkdir ./train +cd ./train || exit +python ${BASEPATH}/launch.py \ + --nproc_per_node=$1 \ + --visible_devices=$3 \ + --server_id=$2 \ + --training_script=${BASEPATH}/train.py \ + --dataset_path=$4 &> train.log & # dataset train folder diff --git a/example/mobilenetv2_imagenet2012/train.py b/example/mobilenetv2_imagenet2012/train.py new file mode 100644 index 0000000000..c12f2ef9c0 --- /dev/null +++ b/example/mobilenetv2_imagenet2012/train.py @@ -0,0 +1,186 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""train_imagenet.""" +import os +import time +import argparse +import random +import numpy as np +from dataset import create_dataset +from lr_generator import get_lr +from config import config +from mindspore import context +from mindspore import Tensor +from mindspore import nn +from mindspore.model_zoo.mobilenet import mobilenet_v2 +from mindspore.parallel._auto_parallel_context import auto_parallel_context +from mindspore.nn.optim.momentum import Momentum +from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits +from mindspore.nn.loss.loss import _Loss +from mindspore.ops import operations as P +from mindspore.ops import functional as F +from mindspore.common import dtype as mstype + +from mindspore.train.model import Model, ParallelMode + +from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, Callback +from mindspore.train.loss_scale_manager import FixedLossScaleManager +import mindspore.dataset.engine as de +from mindspore.communication.management import init + +random.seed(1) +np.random.seed(1) +de.config.set_seed(1) + +parser = argparse.ArgumentParser(description='Image classification') +parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path') +args_opt = parser.parse_args() + +device_id = int(os.getenv('DEVICE_ID')) +rank_id = int(os.getenv('RANK_ID')) +rank_size = int(os.getenv('RANK_SIZE')) +run_distribute = rank_size > 1 + +context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=device_id, save_graphs=False) +context.set_context(enable_task_sink=True) +context.set_context(enable_loop_sink=True) +context.set_context(enable_mem_reuse=True) + +class CrossEntropyWithLabelSmooth(_Loss): + """ + CrossEntropyWith LabelSmooth. + + Args: + smooth_factor (float): smooth factor, default=0. + num_classes (int): num classes + + Returns: + None. + + Examples: + >>> CrossEntropyWithLabelSmooth(smooth_factor=0., num_classes=1000) + """ + + def __init__(self, smooth_factor=0., num_classes=1000): + super(CrossEntropyWithLabelSmooth, self).__init__() + self.onehot = P.OneHot() + self.on_value = Tensor(1.0 - smooth_factor, mstype.float32) + self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32) + self.ce = nn.SoftmaxCrossEntropyWithLogits() + self.mean = P.ReduceMean(False) + self.cast = P.Cast() + + def construct(self, logit, label): + one_hot_label = self.onehot(self.cast(label, mstype.int32), F.shape(logit)[1], self.on_value, self.off_value) + out_loss = self.ce(logit, one_hot_label) + out_loss = self.mean(out_loss, 0) + return out_loss + +class Monitor(Callback): + """ + Monitor loss and time. + + Args: + lr_init (numpy array): train lr + + Returns: + None + + Examples: + >>> Monitor(100,lr_init=Tensor([0.05]*100).asnumpy()) + """ + + def __init__(self, lr_init=None): + super(Monitor, self).__init__() + self.lr_init = lr_init + self.lr_init_len = len(lr_init) + + def epoch_begin(self, run_context): + self.losses = [] + self.epoch_time = time.time() + + def epoch_end(self, run_context): + cb_params = run_context.original_args() + + epoch_mseconds = (time.time() - self.epoch_time) * 1000 + per_step_mseconds = epoch_mseconds / cb_params.batch_num + print("epoch time: {:5.3f}, per step time: {:5.3f}, avg loss: {:5.3f}".format(epoch_mseconds, + per_step_mseconds, + np.mean(self.losses) + ), flush=True) + + def step_begin(self, run_context): + self.step_time = time.time() + + def step_end(self, run_context): + cb_params = run_context.original_args() + step_mseconds = (time.time() - self.step_time) * 1000 + step_loss = cb_params.net_outputs + + if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor): + step_loss = step_loss[0] + if isinstance(step_loss, Tensor): + step_loss = np.mean(step_loss.asnumpy()) + + self.losses.append(step_loss) + cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + + print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:5.3f}/{:5.3f}], time:[{:5.3f}], lr:[{:5.3f}]".format( + cb_params.cur_epoch_num - 1, cb_params.epoch_num, cur_step_in_epoch, cb_params.batch_num, step_loss, + np.mean(self.losses), step_mseconds, self.lr_init[cb_params.cur_step_num - 1]), flush=True) + + +if __name__ == '__main__': + if run_distribute: + context.set_auto_parallel_context(device_num=rank_size, parallel_mode=ParallelMode.DATA_PARALLEL, + parameter_broadcast=True, mirror_mean=True) + auto_parallel_context().set_all_reduce_fusion_split_indices([140]) + init() + + epoch_size = config.epoch_size + net = mobilenet_v2(num_classes=config.num_classes) + net.add_flags_recursive(fp16=True) + for _, cell in net.cells_and_names(): + if isinstance(cell, nn.Dense): + cell.add_flags_recursive(fp32=True) + if config.label_smooth > 0: + loss = CrossEntropyWithLabelSmooth(smooth_factor=config.label_smooth, num_classes=config.num_classes) + else: + loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean') + + print("train args: ", args_opt, "\ncfg: ", config, + "\nparallel args: rank_id {}, device_id {}, rank_size {}".format(rank_id, device_id, rank_size)) + + dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True, + repeat_num=epoch_size, batch_size=config.batch_size) + step_size = dataset.get_dataset_size() + + loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False) + lr = Tensor(get_lr(global_step=0, lr_init=0, lr_end=0, lr_max=config.lr, + warmup_epochs=config.warmup_epochs, total_epochs=epoch_size, steps_per_epoch=step_size)) + opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum, + config.weight_decay, config.loss_scale) + + model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale) + + cb = None + if rank_id == 0: + cb = [Monitor(lr_init=lr.asnumpy())] + if config.save_checkpoint: + config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_epochs * step_size, + keep_checkpoint_max=config.keep_checkpoint_max) + ckpt_cb = ModelCheckpoint(prefix="mobilenet", directory=config.save_checkpoint_path, config=config_ck) + cb += [ckpt_cb] + model.train(epoch_size, dataset, callbacks=cb) diff --git a/example/nlp_to_mindrecord/CLUERNER2020/README.md b/example/nlp_to_mindrecord/CLUERNER2020/README.md new file mode 100644 index 0000000000..7511b4ff88 --- /dev/null +++ b/example/nlp_to_mindrecord/CLUERNER2020/README.md @@ -0,0 +1,82 @@ +# Guideline to Convert Training Data CLUERNER2020 to MindRecord For Bert Fine Tuning + + + +- [What does the example do](#what-does-the-example-do) +- [How to use the example to process CLUERNER2020](#how-to-use-the-example-to-process-cluerner2020) + - [Download CLUERNER2020 and unzip](#download-cluerner2020-and-unzip) + - [Generate MindRecord](#generate-mindrecord) + - [Create MindDataset By MindRecord](#create-minddataset-by-mindrecord) + + + + +## What does the example do + +This example is based on [CLUERNER2020](https://www.cluebenchmarks.com/introduce.html) training data, generating MindRecord file, and finally used for Bert Fine Tuning progress. + +1. run.sh: generate MindRecord entry script + - data_processor_seq.py: the script from [CLUEbenchmark/CLUENER2020](https://github.com/CLUEbenchmark/CLUENER2020/tree/master/tf_version), we just change the part of the generated tfrecord to MindRecord. + - label2id.json: the file from [CLUEbenchmark/CLUENER2020](https://github.com/CLUEbenchmark/CLUENER2020/tree/master/tf_version). + - tokenization.py: the script from [CLUEbenchmark/CLUENER2020](https://github.com/CLUEbenchmark/CLUENER2020/tree/master/tf_version). + - vocab.txt: the file from [CLUEbenchmark/CLUENER2020](https://github.com/CLUEbenchmark/CLUENER2020/tree/master/tf_version). +2. run_read.py: create MindDataset by MindRecord entry script. + - create_dataset.py: use MindDataset to read MindRecord to generate dataset. +3. data: the output directory for MindRecord. +4. cluener_public: the CLUENER2020 training data. + +## How to use the example to process CLUERNER2020 + +Download CLUERNER2020, convert it to MindRecord, use MindDataset to read MindRecord. + +### Download CLUERNER2020 and unzip + +1. Download the training data zip. + > [CLUERNER2020 dataset download address](https://www.cluebenchmarks.com/introduce.html) **-> 任务介绍 -> CLUENER 细粒度命名实体识别 -> cluener下载链接** + +2. Unzip the training data to dir example/nlp_to_mindrecord/CLUERNER2020/cluener_public. + ``` + unzip -d {your-mindspore}/example/nlp_to_mindrecord/CLUERNER2020/cluener_public cluener_public.zip + ``` + +### Generate MindRecord + +1. Run the run.sh script. + ```bash + bash run.sh + ``` + +2. Output like this: + ``` + ... + [INFO] ME(17603:139620983514944,MainProcess):2020-04-28-16:56:12.498.235 [mindspore/mindrecord/filewriter.py:313] The list of mindrecord files created are: ['data/train.mindrecord'], and the list of index files are: ['data/train.mindrecord.db'] + ... + [INFO] ME(17603,python):2020-04-28-16:56:13.400.175 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully. + [INFO] ME(17603,python):2020-04-28-16:56:13.400.863 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully. + [INFO] ME(17603,python):2020-04-28-16:56:13.401.534 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully. + [INFO] ME(17603,python):2020-04-28-16:56:13.402.179 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully. + [INFO] ME(17603,python):2020-04-28-16:56:13.402.702 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully. + ... + [INFO] ME(17603:139620983514944,MainProcess):2020-04-28-16:56:13.431.208 [mindspore/mindrecord/filewriter.py:313] The list of mindrecord files created are: ['data/dev.mindrecord'], and the list of index files are: ['data/dev.mindrecord.db'] + ``` + +### Create MindDataset By MindRecord + +1. Run the run_read.sh script. + ```bash + bash run_read.sh + ``` + +2. Output like this: + ``` + ... + example 1340: input_ids: [ 101 3173 1290 4852 7676 3949 122 3299 123 126 3189 4510 8020 6381 5442 7357 2590 3636 8021 7676 3949 4294 1166 6121 3124 1277 6121 3124 7270 2135 3295 5789 3326 123 126 3189 1355 6134 1093 1325 3173 2399 6590 6791 8024 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + example 1340: input_mask: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + example 1340: segment_ids: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + example 1340: label_ids: [ 0 18 19 20 2 4 0 0 0 0 0 0 0 34 36 26 27 28 0 34 35 35 35 35 35 35 35 35 35 36 26 27 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + example 1341: input_ids: [ 101 1728 711 4293 3868 1168 2190 2150 3791 934 3633 3428 4638 6237 7025 8024 3297 1400 5310 3362 6206 5023 5401 1744 3297 7770 3791 7368 976 1139 1104 2137 511 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + example 1341: input_mask: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + example 1341: segment_ids: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + example 1341: label_ids: [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 18 19 19 19 19 20 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + ... + ``` diff --git a/example/nlp_to_mindrecord/CLUERNER2020/create_dataset.py b/example/nlp_to_mindrecord/CLUERNER2020/create_dataset.py new file mode 100644 index 0000000000..22914e985d --- /dev/null +++ b/example/nlp_to_mindrecord/CLUERNER2020/create_dataset.py @@ -0,0 +1,36 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""create MindDataset by MindRecord""" +import mindspore.dataset as ds + +def create_dataset(data_file): + """create MindDataset""" + num_readers = 4 + data_set = ds.MindDataset(dataset_file=data_file, num_parallel_workers=num_readers, shuffle=True) + index = 0 + for item in data_set.create_dict_iterator(): + # print("example {}: {}".format(index, item)) + print("example {}: input_ids: {}".format(index, item['input_ids'])) + print("example {}: input_mask: {}".format(index, item['input_mask'])) + print("example {}: segment_ids: {}".format(index, item['segment_ids'])) + print("example {}: label_ids: {}".format(index, item['label_ids'])) + index += 1 + if index % 1000 == 0: + print("read rows: {}".format(index)) + print("total rows: {}".format(index)) + +if __name__ == '__main__': + create_dataset('data/train.mindrecord') + create_dataset('data/dev.mindrecord') diff --git a/example/nlp_to_mindrecord/CLUERNER2020/data/README.md b/example/nlp_to_mindrecord/CLUERNER2020/data/README.md new file mode 100644 index 0000000000..7904933f43 --- /dev/null +++ b/example/nlp_to_mindrecord/CLUERNER2020/data/README.md @@ -0,0 +1 @@ +## output dir diff --git a/example/nlp_to_mindrecord/CLUERNER2020/data_processor_seq.py b/example/nlp_to_mindrecord/CLUERNER2020/data_processor_seq.py new file mode 100644 index 0000000000..e0b5ff6ac1 --- /dev/null +++ b/example/nlp_to_mindrecord/CLUERNER2020/data_processor_seq.py @@ -0,0 +1,162 @@ +#!/usr/bin/python +# coding:utf8 +""" +@author: Cong Yu +@time: 2019-12-07 17:03 +""" +import json +import tokenization +import collections + +import numpy as np +from mindspore.mindrecord import FileWriter + +# pylint: skip-file + +def _truncate_seq_pair(tokens_a, tokens_b, max_length): + """Truncates a sequence pair in place to the maximum length.""" + + # This is a simple heuristic which will always truncate the longer sequence + # one token at a time. This makes more sense than truncating an equal percent + # of tokens from each, since if one sequence is very short then each token + # that's truncated likely contains more information than a longer sequence. + while True: + total_length = len(tokens_a) + len(tokens_b) + if total_length <= max_length: + break + if len(tokens_a) > len(tokens_b): + tokens_a.pop() + else: + tokens_b.pop() + + +def process_one_example(tokenizer, label2id, text, label, max_seq_len=128): + # textlist = text.split(' ') + # labellist = label.split(' ') + textlist = list(text) + labellist = list(label) + tokens = [] + labels = [] + for i, word in enumerate(textlist): + token = tokenizer.tokenize(word) + tokens.extend(token) + label_1 = labellist[i] + for m in range(len(token)): + if m == 0: + labels.append(label_1) + else: + print("some unknown token...") + labels.append(labels[0]) + # tokens = tokenizer.tokenize(example.text) -2 的原因是因为序列需要加一个句首和句尾标志 + if len(tokens) >= max_seq_len - 1: + tokens = tokens[0:(max_seq_len - 2)] + labels = labels[0:(max_seq_len - 2)] + ntokens = [] + segment_ids = [] + label_ids = [] + ntokens.append("[CLS]") # 句子开始设置CLS 标志 + segment_ids.append(0) + # [CLS] [SEP] 可以为 他们构建标签,或者 统一到某个标签,反正他们是不变的,基本不参加训练 即:x-l 永远不变 + label_ids.append(0) # label2id["[CLS]"] + for i, token in enumerate(tokens): + ntokens.append(token) + segment_ids.append(0) + label_ids.append(label2id[labels[i]]) + ntokens.append("[SEP]") + segment_ids.append(0) + # append("O") or append("[SEP]") not sure! + label_ids.append(0) # label2id["[SEP]"] + input_ids = tokenizer.convert_tokens_to_ids(ntokens) + input_mask = [1] * len(input_ids) + while len(input_ids) < max_seq_len: + input_ids.append(0) + input_mask.append(0) + segment_ids.append(0) + label_ids.append(0) + ntokens.append("**NULL**") + assert len(input_ids) == max_seq_len + assert len(input_mask) == max_seq_len + assert len(segment_ids) == max_seq_len + assert len(label_ids) == max_seq_len + + feature = (input_ids, input_mask, segment_ids, label_ids) + return feature + + +def prepare_mindrecord_data(tokenizer, max_seq_len, label2id, path, out_path): + """ + 生成训练数据, *.mindrecord, 单标签分类模型, 随机打乱数据 + """ + writer = FileWriter(out_path) + + data_schema = {"input_ids": {"type": "int64", "shape": [-1]}, + "input_mask": {"type": "int64", "shape": [-1]}, + "segment_ids": {"type": "int64", "shape": [-1]}, + "label_ids": {"type": "int64", "shape": [-1]}} + writer.add_schema(data_schema, "CLUENER2020 schema") + + example_count = 0 + + for line in open(path): + if not line.strip(): + continue + _ = json.loads(line.strip()) + len_ = len(_["text"]) + labels = ["O"] * len_ + for k, v in _["label"].items(): + for kk, vv in v.items(): + for vvv in vv: + span = vvv + s = span[0] + e = span[1] + 1 + # print(s, e) + if e - s == 1: + labels[s] = "S_" + k + else: + labels[s] = "B_" + k + for i in range(s + 1, e - 1): + labels[i] = "M_" + k + labels[e - 1] = "E_" + k + # print() + # feature = process_one_example(tokenizer, label2id, row[column_name_x1], row[column_name_y], + # max_seq_len=max_seq_len) + feature = process_one_example(tokenizer, label2id, list(_["text"]), labels, + max_seq_len=max_seq_len) + + features = collections.OrderedDict() + # 序列标注任务 + features["input_ids"] = np.asarray(feature[0]) + features["input_mask"] = np.asarray(feature[1]) + features["segment_ids"] = np.asarray(feature[2]) + features["label_ids"] = np.asarray(feature[3]) + if example_count < 5: + print("*** Example ***") + print(_["text"]) + print(_["label"]) + print("input_ids: %s" % " ".join([str(x) for x in feature[0]])) + print("input_mask: %s" % " ".join([str(x) for x in feature[1]])) + print("segment_ids: %s" % " ".join([str(x) for x in feature[2]])) + print("label: %s " % " ".join([str(x) for x in feature[3]])) + + writer.write_raw_data([features]) + example_count += 1 + + # if example_count == 20: + # break + if example_count % 3000 == 0: + print(example_count) + print("total example:", example_count) + writer.commit() + + +if __name__ == "__main__": + vocab_file = "./vocab.txt" + tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file) + label2id = json.loads(open("label2id.json").read()) + + max_seq_len = 64 + + prepare_mindrecord_data(tokenizer, max_seq_len, label2id, path="cluener_public/train.json", + out_path="data/train.mindrecord") + prepare_mindrecord_data(tokenizer, max_seq_len, label2id, path="cluener_public/dev.json", + out_path="data/dev.mindrecord") diff --git a/example/nlp_to_mindrecord/CLUERNER2020/label2id.json b/example/nlp_to_mindrecord/CLUERNER2020/label2id.json new file mode 100644 index 0000000000..f296bcb28f --- /dev/null +++ b/example/nlp_to_mindrecord/CLUERNER2020/label2id.json @@ -0,0 +1,43 @@ +{ + "O": 0, + "S_address": 1, + "B_address": 2, + "M_address": 3, + "E_address": 4, + "S_book": 5, + "B_book": 6, + "M_book": 7, + "E_book": 8, + "S_company": 9, + "B_company": 10, + "M_company": 11, + "E_company": 12, + "S_game": 13, + "B_game": 14, + "M_game": 15, + "E_game": 16, + "S_government": 17, + "B_government": 18, + "M_government": 19, + "E_government": 20, + "S_movie": 21, + "B_movie": 22, + "M_movie": 23, + "E_movie": 24, + "S_name": 25, + "B_name": 26, + "M_name": 27, + "E_name": 28, + "S_organization": 29, + "B_organization": 30, + "M_organization": 31, + "E_organization": 32, + "S_position": 33, + "B_position": 34, + "M_position": 35, + "E_position": 36, + "S_scene": 37, + "B_scene": 38, + "M_scene": 39, + "E_scene": 40 +} \ No newline at end of file diff --git a/example/nlp_to_mindrecord/CLUERNER2020/run.sh b/example/nlp_to_mindrecord/CLUERNER2020/run.sh new file mode 100644 index 0000000000..0200b2e9d7 --- /dev/null +++ b/example/nlp_to_mindrecord/CLUERNER2020/run.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +rm data/train.mindrecord* +rm data/dev.mindrecord* + +python data_processor_seq.py diff --git a/example/nlp_to_mindrecord/CLUERNER2020/run_read.sh b/example/nlp_to_mindrecord/CLUERNER2020/run_read.sh new file mode 100644 index 0000000000..1ffe4de1cf --- /dev/null +++ b/example/nlp_to_mindrecord/CLUERNER2020/run_read.sh @@ -0,0 +1,17 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +python create_dataset.py diff --git a/example/nlp_to_mindrecord/CLUERNER2020/tokenization.py b/example/nlp_to_mindrecord/CLUERNER2020/tokenization.py new file mode 100644 index 0000000000..856021d6a9 --- /dev/null +++ b/example/nlp_to_mindrecord/CLUERNER2020/tokenization.py @@ -0,0 +1,388 @@ +"""Tokenization classes.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import re +import unicodedata +import six + +# pylint: skip-file + +def validate_case_matches_checkpoint(do_lower_case, init_checkpoint): + """Checks whether the casing config is consistent with the checkpoint name.""" + + # The casing has to be passed in by the user and there is no explicit check + # as to whether it matches the checkpoint. The casing information probably + # should have been stored in the bert_config.json file, but it's not, so + # we have to heuristically detect it to validate. + + if not init_checkpoint: + return + + m = re.match("^.*?([A-Za-z0-9_-]+)/bert_model.ckpt", init_checkpoint) + if m is None: + return + + model_name = m.group(1) + + lower_models = [ + "uncased_L-24_H-1024_A-16", "uncased_L-12_H-768_A-12", + "multilingual_L-12_H-768_A-12", "chinese_L-12_H-768_A-12" + ] + + cased_models = [ + "cased_L-12_H-768_A-12", "cased_L-24_H-1024_A-16", + "multi_cased_L-12_H-768_A-12" + ] + + is_bad_config = False + if model_name in lower_models and not do_lower_case: + is_bad_config = True + actual_flag = "False" + case_name = "lowercased" + opposite_flag = "True" + + if model_name in cased_models and do_lower_case: + is_bad_config = True + actual_flag = "True" + case_name = "cased" + opposite_flag = "False" + + if is_bad_config: + raise ValueError( + "You passed in `--do_lower_case=%s` with `--init_checkpoint=%s`. " + "However, `%s` seems to be a %s model, so you " + "should pass in `--do_lower_case=%s` so that the fine-tuning matches " + "how the model was pre-training. If this error is wrong, please " + "just comment out this check." % (actual_flag, init_checkpoint, + model_name, case_name, opposite_flag)) + + +def convert_to_unicode(text): + """Converts `text` to Unicode (if it's not already), assuming utf-8 input.""" + if six.PY3: + if isinstance(text, str): + return text + elif isinstance(text, bytes): + return text.decode("utf-8", "ignore") + else: + raise ValueError("Unsupported string type: %s" % (type(text))) + elif six.PY2: + if isinstance(text, str): + return text.decode("utf-8", "ignore") + elif isinstance(text, unicode): + return text + else: + raise ValueError("Unsupported string type: %s" % (type(text))) + else: + raise ValueError("Not running on Python2 or Python 3?") + + +def printable_text(text): + """Returns text encoded in a way suitable for print or `tf.logging`.""" + + # These functions want `str` for both Python2 and Python3, but in one case + # it's a Unicode string and in the other it's a byte string. + if six.PY3: + if isinstance(text, str): + return text + elif isinstance(text, bytes): + return text.decode("utf-8", "ignore") + else: + raise ValueError("Unsupported string type: %s" % (type(text))) + elif six.PY2: + if isinstance(text, str): + return text + elif isinstance(text, unicode): + return text.encode("utf-8") + else: + raise ValueError("Unsupported string type: %s" % (type(text))) + else: + raise ValueError("Not running on Python2 or Python 3?") + + +def load_vocab(vocab_file): + """Loads a vocabulary file into a dictionary.""" + vocab = collections.OrderedDict() + index = 0 + with open(vocab_file, "r") as reader: + while True: + token = convert_to_unicode(reader.readline()) + if not token: + break + token = token.strip() + vocab[token] = index + index += 1 + return vocab + + +def convert_by_vocab(vocab, items): + """Converts a sequence of [tokens|ids] using the vocab.""" + output = [] + for item in items: + if item in vocab: + output.append(vocab[item]) + else: + output.append(vocab['[UNK]']) + return output + + +def convert_tokens_to_ids(vocab, tokens): + return convert_by_vocab(vocab, tokens) + + +def convert_ids_to_tokens(inv_vocab, ids): + return convert_by_vocab(inv_vocab, ids) + + +def whitespace_tokenize(text): + """Runs basic whitespace cleaning and splitting on a piece of text.""" + text = text.strip() + if not text: + return [] + tokens = text.split() + return tokens + + +class FullTokenizer(object): + """Runs end-to-end tokenziation.""" + + def __init__(self, vocab_file, do_lower_case=True): + self.vocab = load_vocab(vocab_file) + self.inv_vocab = {v: k for k, v in self.vocab.items()} + self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case) + self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab) + + def tokenize(self, text): + split_tokens = [] + for token in self.basic_tokenizer.tokenize(text): + for sub_token in self.wordpiece_tokenizer.tokenize(token): + split_tokens.append(sub_token) + + return split_tokens + + def convert_tokens_to_ids(self, tokens): + return convert_by_vocab(self.vocab, tokens) + + def convert_ids_to_tokens(self, ids): + return convert_by_vocab(self.inv_vocab, ids) + + +class BasicTokenizer(object): + """Runs basic tokenization (punctuation splitting, lower casing, etc.).""" + + def __init__(self, do_lower_case=True): + """Constructs a BasicTokenizer. + + Args: + do_lower_case: Whether to lower case the input. + """ + self.do_lower_case = do_lower_case + + def tokenize(self, text): + """Tokenizes a piece of text.""" + text = convert_to_unicode(text) + text = self._clean_text(text) + + # This was added on November 1st, 2018 for the multilingual and Chinese + # models. This is also applied to the English models now, but it doesn't + # matter since the English models were not trained on any Chinese data + # and generally don't have any Chinese data in them (there are Chinese + # characters in the vocabulary because Wikipedia does have some Chinese + # words in the English Wikipedia.). + text = self._tokenize_chinese_chars(text) + + orig_tokens = whitespace_tokenize(text) + split_tokens = [] + for token in orig_tokens: + if self.do_lower_case: + token = token.lower() + token = self._run_strip_accents(token) + split_tokens.extend(self._run_split_on_punc(token)) + + output_tokens = whitespace_tokenize(" ".join(split_tokens)) + return output_tokens + + def _run_strip_accents(self, text): + """Strips accents from a piece of text.""" + text = unicodedata.normalize("NFD", text) + output = [] + for char in text: + cat = unicodedata.category(char) + if cat == "Mn": + continue + output.append(char) + return "".join(output) + + def _run_split_on_punc(self, text): + """Splits punctuation on a piece of text.""" + chars = list(text) + i = 0 + start_new_word = True + output = [] + while i < len(chars): + char = chars[i] + if _is_punctuation(char): + output.append([char]) + start_new_word = True + else: + if start_new_word: + output.append([]) + start_new_word = False + output[-1].append(char) + i += 1 + + return ["".join(x) for x in output] + + def _tokenize_chinese_chars(self, text): + """Adds whitespace around any CJK character.""" + output = [] + for char in text: + cp = ord(char) + if self._is_chinese_char(cp): + output.append(" ") + output.append(char) + output.append(" ") + else: + output.append(char) + return "".join(output) + + def _is_chinese_char(self, cp): + """Checks whether CP is the codepoint of a CJK character.""" + # This defines a "chinese character" as anything in the CJK Unicode block: + # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block) + # + # Note that the CJK Unicode block is NOT all Japanese and Korean characters, + # despite its name. The modern Korean Hangul alphabet is a different block, + # as is Japanese Hiragana and Katakana. Those alphabets are used to write + # space-separated words, so they are not treated specially and handled + # like the all of the other languages. + if ((cp >= 0x4E00 and cp <= 0x9FFF) or # + (cp >= 0x3400 and cp <= 0x4DBF) or # + (cp >= 0x20000 and cp <= 0x2A6DF) or # + (cp >= 0x2A700 and cp <= 0x2B73F) or # + (cp >= 0x2B740 and cp <= 0x2B81F) or # + (cp >= 0x2B820 and cp <= 0x2CEAF) or + (cp >= 0xF900 and cp <= 0xFAFF) or # + (cp >= 0x2F800 and cp <= 0x2FA1F)): # + return True + + return False + + def _clean_text(self, text): + """Performs invalid character removal and whitespace cleanup on text.""" + output = [] + for char in text: + cp = ord(char) + if cp == 0 or cp == 0xfffd or _is_control(char): + continue + if _is_whitespace(char): + output.append(" ") + else: + output.append(char) + return "".join(output) + + +class WordpieceTokenizer(object): + """Runs WordPiece tokenziation.""" + + def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=200): + self.vocab = vocab + self.unk_token = unk_token + self.max_input_chars_per_word = max_input_chars_per_word + + def tokenize(self, text): + """Tokenizes a piece of text into its word pieces. + + This uses a greedy longest-match-first algorithm to perform tokenization + using the given vocabulary. + + For example: + input = "unaffable" + output = ["un", "##aff", "##able"] + + Args: + text: A single token or whitespace separated tokens. This should have + already been passed through `BasicTokenizer. + + Returns: + A list of wordpiece tokens. + """ + + text = convert_to_unicode(text) + + output_tokens = [] + for token in whitespace_tokenize(text): + chars = list(token) + if len(chars) > self.max_input_chars_per_word: + output_tokens.append(self.unk_token) + continue + + is_bad = False + start = 0 + sub_tokens = [] + while start < len(chars): + end = len(chars) + cur_substr = None + while start < end: + substr = "".join(chars[start:end]) + if start > 0: + substr = "##" + substr + if substr in self.vocab: + cur_substr = substr + break + end -= 1 + if cur_substr is None: + is_bad = True + break + sub_tokens.append(cur_substr) + start = end + + if is_bad: + output_tokens.append(self.unk_token) + else: + output_tokens.extend(sub_tokens) + return output_tokens + + +def _is_whitespace(char): + """Checks whether `chars` is a whitespace character.""" + # \t, \n, and \r are technically contorl characters but we treat them + # as whitespace since they are generally considered as such. + if char == " " or char == "\t" or char == "\n" or char == "\r": + return True + cat = unicodedata.category(char) + if cat == "Zs": + return True + return False + + +def _is_control(char): + """Checks whether `chars` is a control character.""" + # These are technically control characters but we count them as whitespace + # characters. + if char == "\t" or char == "\n" or char == "\r": + return False + cat = unicodedata.category(char) + if cat.startswith("C"): + return True + return False + + +def _is_punctuation(char): + """Checks whether `chars` is a punctuation character.""" + cp = ord(char) + # We treat all non-letter/number ASCII as punctuation. + # Characters such as "^", "$", and "`" are not in the Unicode + # Punctuation class but we treat them as punctuation anyways, for + # consistency. + if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or + (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)): + return True + cat = unicodedata.category(char) + if cat.startswith("P"): + return True + return False diff --git a/example/nlp_to_mindrecord/CLUERNER2020/vocab.txt b/example/nlp_to_mindrecord/CLUERNER2020/vocab.txt new file mode 100644 index 0000000000..ca4f978103 --- /dev/null +++ b/example/nlp_to_mindrecord/CLUERNER2020/vocab.txt @@ -0,0 +1,21128 @@ +[PAD] +[unused1] +[unused2] +[unused3] +[unused4] +[unused5] +[unused6] +[unused7] +[unused8] +[unused9] +[unused10] +[unused11] +[unused12] +[unused13] +[unused14] +[unused15] +[unused16] +[unused17] +[unused18] +[unused19] +[unused20] +[unused21] +[unused22] +[unused23] +[unused24] +[unused25] +[unused26] +[unused27] +[unused28] +[unused29] +[unused30] +[unused31] +[unused32] +[unused33] +[unused34] +[unused35] +[unused36] +[unused37] +[unused38] +[unused39] +[unused40] +[unused41] +[unused42] +[unused43] +[unused44] +[unused45] +[unused46] +[unused47] +[unused48] +[unused49] +[unused50] +[unused51] +[unused52] +[unused53] +[unused54] +[unused55] +[unused56] +[unused57] +[unused58] +[unused59] +[unused60] +[unused61] +[unused62] +[unused63] +[unused64] +[unused65] +[unused66] +[unused67] +[unused68] +[unused69] +[unused70] +[unused71] +[unused72] +[unused73] +[unused74] +[unused75] +[unused76] +[unused77] +[unused78] +[unused79] +[unused80] +[unused81] +[unused82] +[unused83] +[unused84] +[unused85] +[unused86] +[unused87] +[unused88] +[unused89] +[unused90] +[unused91] +[unused92] +[unused93] +[unused94] +[unused95] +[unused96] +[unused97] +[unused98] +[unused99] +[UNK] +[CLS] +[SEP] +[MASK] + + +! +" +# +$ +% +& +' +( +) +* ++ +, +- +. +/ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +: +; +< += +> +? +@ +[ +\ +] +^ +_ +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +{ +| +} +~ +£ +¤ +¥ +§ +© +« +® +° +± +² +³ +µ +· +¹ +º +» +¼ +× +ß +æ +÷ +ø +đ +ŋ +ɔ +ə +ɡ +ʰ +ˇ +ˈ +ˊ +ˋ +ˍ +ː +˙ +˚ +ˢ +α +β +γ +δ +ε +η +θ +ι +κ +λ +μ +ν +ο +π +ρ +ς +σ +τ +υ +φ +χ +ψ +ω +а +б +в +г +д +е +ж +з +и +к +л +м +н +о +п +р +с +т +у +ф +х +ц +ч +ш +ы +ь +я +і +ا +ب +ة +ت +د +ر +س +ع +ل +م +ن +ه +و +ي +۩ +ก +ง +น +ม +ย +ร +อ +า +เ +๑ +་ +ღ +ᄀ +ᄁ +ᄂ +ᄃ +ᄅ +ᄆ +ᄇ +ᄈ +ᄉ +ᄋ +ᄌ +ᄎ +ᄏ +ᄐ +ᄑ +ᄒ +ᅡ +ᅢ +ᅣ +ᅥ +ᅦ +ᅧ +ᅨ +ᅩ +ᅪ +ᅬ +ᅭ +ᅮ +ᅯ +ᅲ +ᅳ +ᅴ +ᅵ +ᆨ +ᆫ +ᆯ +ᆷ +ᆸ +ᆺ +ᆻ +ᆼ +ᗜ +ᵃ +ᵉ +ᵍ +ᵏ +ᵐ +ᵒ +ᵘ +‖ +„ +† +• +‥ +‧ +
 +‰ +′ +″ +‹ +› +※ +‿ +⁄ +ⁱ +⁺ +ⁿ +₁ +₂ +₃ +₄ +€ +℃ +№ +™ +ⅰ +ⅱ +ⅲ +ⅳ +ⅴ +← +↑ +→ +↓ +↔ +↗ +↘ +⇒ +∀ +− +∕ +∙ +√ +∞ +∟ +∠ +∣ +∥ +∩ +∮ +∶ +∼ +∽ +≈ +≒ +≡ +≤ +≥ +≦ +≧ +≪ +≫ +⊙ +⋅ +⋈ +⋯ +⌒ +① +② +③ +④ +⑤ +⑥ +⑦ +⑧ +⑨ +⑩ +⑴ +⑵ +⑶ +⑷ +⑸ +⒈ +⒉ +⒊ +⒋ +ⓒ +ⓔ +ⓘ +─ +━ +│ +┃ +┅ +┆ +┊ +┌ +└ +├ +┣ +═ +║ +╚ +╞ +╠ +╭ +╮ +╯ +╰ +╱ +╳ +▂ +▃ +▅ +▇ +█ +▉ +▋ +▌ +▍ +▎ +■ +□ +▪ +▫ +▬ +▲ +△ +▶ +► +▼ +▽ +◆ +◇ +○ +◎ +● +◕ +◠ +◢ +◤ +☀ +★ +☆ +☕ +☞ +☺ +☼ +♀ +♂ +♠ +♡ +♣ +♥ +♦ +♪ +♫ +♬ +✈ +✔ +✕ +✖ +✦ +✨ +✪ +✰ +✿ +❀ +❤ +➜ +➤ +⦿ +、 +。 +〃 +々 +〇 +〈 +〉 +《 +》 +「 +」 +『 +』 +【 +】 +〓 +〔 +〕 +〖 +〗 +〜 +〝 +〞 +ぁ +あ +ぃ +い +う +ぇ +え +お +か +き +く +け +こ +さ +し +す +せ +そ +た +ち +っ +つ +て +と +な +に +ぬ +ね +の +は +ひ +ふ +へ +ほ +ま +み +む +め +も +ゃ +や +ゅ +ゆ +ょ +よ +ら +り +る +れ +ろ +わ +を +ん +゜ +ゝ +ァ +ア +ィ +イ +ゥ +ウ +ェ +エ +ォ +オ +カ +キ +ク +ケ +コ +サ +シ +ス +セ +ソ +タ +チ +ッ +ツ +テ +ト +ナ +ニ +ヌ +ネ +ノ +ハ +ヒ +フ +ヘ +ホ +マ +ミ +ム +メ +モ +ャ +ヤ +ュ +ユ +ョ +ヨ +ラ +リ +ル +レ +ロ +ワ +ヲ +ン +ヶ +・ +ー +ヽ +ㄅ +ㄆ +ㄇ +ㄉ +ㄋ +ㄌ +ㄍ +ㄎ +ㄏ +ㄒ +ㄚ +ㄛ +ㄞ +ㄟ +ㄢ +ㄤ +ㄥ +ㄧ +ㄨ +ㆍ +㈦ +㊣ +㎡ +㗎 +一 +丁 +七 +万 +丈 +三 +上 +下 +不 +与 +丐 +丑 +专 +且 +丕 +世 +丘 +丙 +业 +丛 +东 +丝 +丞 +丟 +両 +丢 +两 +严 +並 +丧 +丨 +个 +丫 +中 +丰 +串 +临 +丶 +丸 +丹 +为 +主 +丼 +丽 +举 +丿 +乂 +乃 +久 +么 +义 +之 +乌 +乍 +乎 +乏 +乐 +乒 +乓 +乔 +乖 +乗 +乘 +乙 +乜 +九 +乞 +也 +习 +乡 +书 +乩 +买 +乱 +乳 +乾 +亀 +亂 +了 +予 +争 +事 +二 +于 +亏 +云 +互 +五 +井 +亘 +亙 +亚 +些 +亜 +亞 +亟 +亡 +亢 +交 +亥 +亦 +产 +亨 +亩 +享 +京 +亭 +亮 +亲 +亳 +亵 +人 +亿 +什 +仁 +仃 +仄 +仅 +仆 +仇 +今 +介 +仍 +从 +仏 +仑 +仓 +仔 +仕 +他 +仗 +付 +仙 +仝 +仞 +仟 +代 +令 +以 +仨 +仪 +们 +仮 +仰 +仲 +件 +价 +任 +份 +仿 +企 +伉 +伊 +伍 +伎 +伏 +伐 +休 +伕 +众 +优 +伙 +会 +伝 +伞 +伟 +传 +伢 +伤 +伦 +伪 +伫 +伯 +估 +伴 +伶 +伸 +伺 +似 +伽 +佃 +但 +佇 +佈 +位 +低 +住 +佐 +佑 +体 +佔 +何 +佗 +佘 +余 +佚 +佛 +作 +佝 +佞 +佟 +你 +佢 +佣 +佤 +佥 +佩 +佬 +佯 +佰 +佳 +併 +佶 +佻 +佼 +使 +侃 +侄 +來 +侈 +例 +侍 +侏 +侑 +侖 +侗 +供 +依 +侠 +価 +侣 +侥 +侦 +侧 +侨 +侬 +侮 +侯 +侵 +侶 +侷 +便 +係 +促 +俄 +俊 +俎 +俏 +俐 +俑 +俗 +俘 +俚 +保 +俞 +俟 +俠 +信 +俨 +俩 +俪 +俬 +俭 +修 +俯 +俱 +俳 +俸 +俺 +俾 +倆 +倉 +個 +倌 +倍 +倏 +們 +倒 +倔 +倖 +倘 +候 +倚 +倜 +借 +倡 +値 +倦 +倩 +倪 +倫 +倬 +倭 +倶 +债 +值 +倾 +偃 +假 +偈 +偉 +偌 +偎 +偏 +偕 +做 +停 +健 +側 +偵 +偶 +偷 +偻 +偽 +偿 +傀 +傅 +傍 +傑 +傘 +備 +傚 +傢 +傣 +傥 +储 +傩 +催 +傭 +傲 +傳 +債 +傷 +傻 +傾 +僅 +働 +像 +僑 +僕 +僖 +僚 +僥 +僧 +僭 +僮 +僱 +僵 +價 +僻 +儀 +儂 +億 +儆 +儉 +儋 +儒 +儕 +儘 +償 +儡 +優 +儲 +儷 +儼 +儿 +兀 +允 +元 +兄 +充 +兆 +兇 +先 +光 +克 +兌 +免 +児 +兑 +兒 +兔 +兖 +党 +兜 +兢 +入 +內 +全 +兩 +八 +公 +六 +兮 +兰 +共 +兲 +关 +兴 +兵 +其 +具 +典 +兹 +养 +兼 +兽 +冀 +内 +円 +冇 +冈 +冉 +冊 +册 +再 +冏 +冒 +冕 +冗 +写 +军 +农 +冠 +冢 +冤 +冥 +冨 +冪 +冬 +冯 +冰 +冲 +决 +况 +冶 +冷 +冻 +冼 +冽 +冾 +净 +凄 +准 +凇 +凈 +凉 +凋 +凌 +凍 +减 +凑 +凛 +凜 +凝 +几 +凡 +凤 +処 +凪 +凭 +凯 +凰 +凱 +凳 +凶 +凸 +凹 +出 +击 +函 +凿 +刀 +刁 +刃 +分 +切 +刈 +刊 +刍 +刎 +刑 +划 +列 +刘 +则 +刚 +创 +初 +删 +判 +別 +刨 +利 +刪 +别 +刮 +到 +制 +刷 +券 +刹 +刺 +刻 +刽 +剁 +剂 +剃 +則 +剉 +削 +剋 +剌 +前 +剎 +剐 +剑 +剔 +剖 +剛 +剜 +剝 +剣 +剤 +剥 +剧 +剩 +剪 +副 +割 +創 +剷 +剽 +剿 +劃 +劇 +劈 +劉 +劊 +劍 +劏 +劑 +力 +劝 +办 +功 +加 +务 +劣 +动 +助 +努 +劫 +劭 +励 +劲 +劳 +労 +劵 +効 +劾 +势 +勁 +勃 +勇 +勉 +勋 +勐 +勒 +動 +勖 +勘 +務 +勛 +勝 +勞 +募 +勢 +勤 +勧 +勳 +勵 +勸 +勺 +勻 +勾 +勿 +匀 +包 +匆 +匈 +匍 +匐 +匕 +化 +北 +匙 +匝 +匠 +匡 +匣 +匪 +匮 +匯 +匱 +匹 +区 +医 +匾 +匿 +區 +十 +千 +卅 +升 +午 +卉 +半 +卍 +华 +协 +卑 +卒 +卓 +協 +单 +卖 +南 +単 +博 +卜 +卞 +卟 +占 +卡 +卢 +卤 +卦 +卧 +卫 +卮 +卯 +印 +危 +即 +却 +卵 +卷 +卸 +卻 +卿 +厂 +厄 +厅 +历 +厉 +压 +厌 +厕 +厘 +厚 +厝 +原 +厢 +厥 +厦 +厨 +厩 +厭 +厮 +厲 +厳 +去 +县 +叁 +参 +參 +又 +叉 +及 +友 +双 +反 +収 +发 +叔 +取 +受 +变 +叙 +叛 +叟 +叠 +叡 +叢 +口 +古 +句 +另 +叨 +叩 +只 +叫 +召 +叭 +叮 +可 +台 +叱 +史 +右 +叵 +叶 +号 +司 +叹 +叻 +叼 +叽 +吁 +吃 +各 +吆 +合 +吉 +吊 +吋 +同 +名 +后 +吏 +吐 +向 +吒 +吓 +吕 +吖 +吗 +君 +吝 +吞 +吟 +吠 +吡 +否 +吧 +吨 +吩 +含 +听 +吭 +吮 +启 +吱 +吳 +吴 +吵 +吶 +吸 +吹 +吻 +吼 +吽 +吾 +呀 +呂 +呃 +呆 +呈 +告 +呋 +呎 +呐 +呓 +呕 +呗 +员 +呛 +呜 +呢 +呤 +呦 +周 +呱 +呲 +味 +呵 +呷 +呸 +呻 +呼 +命 +咀 +咁 +咂 +咄 +咆 +咋 +和 +咎 +咏 +咐 +咒 +咔 +咕 +咖 +咗 +咘 +咙 +咚 +咛 +咣 +咤 +咦 +咧 +咨 +咩 +咪 +咫 +咬 +咭 +咯 +咱 +咲 +咳 +咸 +咻 +咽 +咿 +哀 +品 +哂 +哄 +哆 +哇 +哈 +哉 +哋 +哌 +响 +哎 +哏 +哐 +哑 +哒 +哔 +哗 +哟 +員 +哥 +哦 +哧 +哨 +哩 +哪 +哭 +哮 +哲 +哺 +哼 +哽 +唁 +唄 +唆 +唇 +唉 +唏 +唐 +唑 +唔 +唠 +唤 +唧 +唬 +售 +唯 +唰 +唱 +唳 +唷 +唸 +唾 +啃 +啄 +商 +啉 +啊 +問 +啓 +啕 +啖 +啜 +啞 +啟 +啡 +啤 +啥 +啦 +啧 +啪 +啫 +啬 +啮 +啰 +啱 +啲 +啵 +啶 +啷 +啸 +啻 +啼 +啾 +喀 +喂 +喃 +善 +喆 +喇 +喉 +喊 +喋 +喎 +喏 +喔 +喘 +喙 +喚 +喜 +喝 +喟 +喧 +喪 +喫 +喬 +單 +喰 +喱 +喲 +喳 +喵 +営 +喷 +喹 +喺 +喻 +喽 +嗅 +嗆 +嗇 +嗎 +嗑 +嗒 +嗓 +嗔 +嗖 +嗚 +嗜 +嗝 +嗟 +嗡 +嗣 +嗤 +嗦 +嗨 +嗪 +嗬 +嗯 +嗰 +嗲 +嗳 +嗶 +嗷 +嗽 +嘀 +嘅 +嘆 +嘈 +嘉 +嘌 +嘍 +嘎 +嘔 +嘖 +嘗 +嘘 +嘚 +嘛 +嘜 +嘞 +嘟 +嘢 +嘣 +嘤 +嘧 +嘩 +嘭 +嘮 +嘯 +嘰 +嘱 +嘲 +嘴 +嘶 +嘸 +嘹 +嘻 +嘿 +噁 +噌 +噎 +噓 +噔 +噗 +噙 +噜 +噠 +噢 +噤 +器 +噩 +噪 +噬 +噱 +噴 +噶 +噸 +噹 +噻 +噼 +嚀 +嚇 +嚎 +嚏 +嚐 +嚓 +嚕 +嚟 +嚣 +嚥 +嚨 +嚮 +嚴 +嚷 +嚼 +囂 +囉 +囊 +囍 +囑 +囔 +囗 +囚 +四 +囝 +回 +囟 +因 +囡 +团 +団 +囤 +囧 +囪 +囫 +园 +困 +囱 +囲 +図 +围 +囹 +固 +国 +图 +囿 +圃 +圄 +圆 +圈 +國 +圍 +圏 +園 +圓 +圖 +團 +圜 +土 +圣 +圧 +在 +圩 +圭 +地 +圳 +场 +圻 +圾 +址 +坂 +均 +坊 +坍 +坎 +坏 +坐 +坑 +块 +坚 +坛 +坝 +坞 +坟 +坠 +坡 +坤 +坦 +坨 +坪 +坯 +坳 +坵 +坷 +垂 +垃 +垄 +型 +垒 +垚 +垛 +垠 +垢 +垣 +垦 +垩 +垫 +垭 +垮 +垵 +埂 +埃 +埋 +城 +埔 +埕 +埗 +域 +埠 +埤 +埵 +執 +埸 +培 +基 +埼 +堀 +堂 +堃 +堅 +堆 +堇 +堑 +堕 +堙 +堡 +堤 +堪 +堯 +堰 +報 +場 +堵 +堺 +堿 +塊 +塌 +塑 +塔 +塗 +塘 +塚 +塞 +塢 +塩 +填 +塬 +塭 +塵 +塾 +墀 +境 +墅 +墉 +墊 +墒 +墓 +増 +墘 +墙 +墜 +增 +墟 +墨 +墩 +墮 +墳 +墻 +墾 +壁 +壅 +壆 +壇 +壊 +壑 +壓 +壕 +壘 +壞 +壟 +壢 +壤 +壩 +士 +壬 +壮 +壯 +声 +売 +壳 +壶 +壹 +壺 +壽 +处 +备 +変 +复 +夏 +夔 +夕 +外 +夙 +多 +夜 +够 +夠 +夢 +夥 +大 +天 +太 +夫 +夭 +央 +夯 +失 +头 +夷 +夸 +夹 +夺 +夾 +奂 +奄 +奇 +奈 +奉 +奋 +奎 +奏 +奐 +契 +奔 +奕 +奖 +套 +奘 +奚 +奠 +奢 +奥 +奧 +奪 +奬 +奮 +女 +奴 +奶 +奸 +她 +好 +如 +妃 +妄 +妆 +妇 +妈 +妊 +妍 +妒 +妓 +妖 +妘 +妙 +妝 +妞 +妣 +妤 +妥 +妨 +妩 +妪 +妮 +妲 +妳 +妹 +妻 +妾 +姆 +姉 +姊 +始 +姍 +姐 +姑 +姒 +姓 +委 +姗 +姚 +姜 +姝 +姣 +姥 +姦 +姨 +姪 +姫 +姬 +姹 +姻 +姿 +威 +娃 +娄 +娅 +娆 +娇 +娉 +娑 +娓 +娘 +娛 +娜 +娟 +娠 +娣 +娥 +娩 +娱 +娲 +娴 +娶 +娼 +婀 +婁 +婆 +婉 +婊 +婕 +婚 +婢 +婦 +婧 +婪 +婭 +婴 +婵 +婶 +婷 +婺 +婿 +媒 +媚 +媛 +媞 +媧 +媲 +媳 +媽 +媾 +嫁 +嫂 +嫉 +嫌 +嫑 +嫔 +嫖 +嫘 +嫚 +嫡 +嫣 +嫦 +嫩 +嫲 +嫵 +嫻 +嬅 +嬉 +嬌 +嬗 +嬛 +嬢 +嬤 +嬪 +嬰 +嬴 +嬷 +嬸 +嬿 +孀 +孃 +子 +孑 +孔 +孕 +孖 +字 +存 +孙 +孚 +孛 +孜 +孝 +孟 +孢 +季 +孤 +学 +孩 +孪 +孫 +孬 +孰 +孱 +孳 +孵 +學 +孺 +孽 +孿 +宁 +它 +宅 +宇 +守 +安 +宋 +完 +宏 +宓 +宕 +宗 +官 +宙 +定 +宛 +宜 +宝 +实 +実 +宠 +审 +客 +宣 +室 +宥 +宦 +宪 +宫 +宮 +宰 +害 +宴 +宵 +家 +宸 +容 +宽 +宾 +宿 +寂 +寄 +寅 +密 +寇 +富 +寐 +寒 +寓 +寛 +寝 +寞 +察 +寡 +寢 +寥 +實 +寧 +寨 +審 +寫 +寬 +寮 +寰 +寵 +寶 +寸 +对 +寺 +寻 +导 +対 +寿 +封 +専 +射 +将 +將 +專 +尉 +尊 +尋 +對 +導 +小 +少 +尔 +尕 +尖 +尘 +尚 +尝 +尤 +尧 +尬 +就 +尴 +尷 +尸 +尹 +尺 +尻 +尼 +尽 +尾 +尿 +局 +屁 +层 +屄 +居 +屆 +屈 +屉 +届 +屋 +屌 +屍 +屎 +屏 +屐 +屑 +展 +屜 +属 +屠 +屡 +屢 +層 +履 +屬 +屯 +山 +屹 +屿 +岀 +岁 +岂 +岌 +岐 +岑 +岔 +岖 +岗 +岘 +岙 +岚 +岛 +岡 +岩 +岫 +岬 +岭 +岱 +岳 +岷 +岸 +峇 +峋 +峒 +峙 +峡 +峤 +峥 +峦 +峨 +峪 +峭 +峯 +峰 +峴 +島 +峻 +峽 +崁 +崂 +崆 +崇 +崎 +崑 +崔 +崖 +崗 +崙 +崛 +崧 +崩 +崭 +崴 +崽 +嵇 +嵊 +嵋 +嵌 +嵐 +嵘 +嵩 +嵬 +嵯 +嶂 +嶄 +嶇 +嶋 +嶙 +嶺 +嶼 +嶽 +巅 +巍 +巒 +巔 +巖 +川 +州 +巡 +巢 +工 +左 +巧 +巨 +巩 +巫 +差 +己 +已 +巳 +巴 +巷 +巻 +巽 +巾 +巿 +币 +市 +布 +帅 +帆 +师 +希 +帐 +帑 +帕 +帖 +帘 +帚 +帛 +帜 +帝 +帥 +带 +帧 +師 +席 +帮 +帯 +帰 +帳 +帶 +帷 +常 +帼 +帽 +幀 +幂 +幄 +幅 +幌 +幔 +幕 +幟 +幡 +幢 +幣 +幫 +干 +平 +年 +并 +幸 +幹 +幺 +幻 +幼 +幽 +幾 +广 +庁 +広 +庄 +庆 +庇 +床 +序 +庐 +库 +应 +底 +庖 +店 +庙 +庚 +府 +庞 +废 +庠 +度 +座 +庫 +庭 +庵 +庶 +康 +庸 +庹 +庾 +廁 +廂 +廃 +廈 +廉 +廊 +廓 +廖 +廚 +廝 +廟 +廠 +廢 +廣 +廬 +廳 +延 +廷 +建 +廿 +开 +弁 +异 +弃 +弄 +弈 +弊 +弋 +式 +弑 +弒 +弓 +弔 +引 +弗 +弘 +弛 +弟 +张 +弥 +弦 +弧 +弩 +弭 +弯 +弱 +張 +強 +弹 +强 +弼 +弾 +彅 +彆 +彈 +彌 +彎 +归 +当 +录 +彗 +彙 +彝 +形 +彤 +彥 +彦 +彧 +彩 +彪 +彫 +彬 +彭 +彰 +影 +彷 +役 +彻 +彼 +彿 +往 +征 +径 +待 +徇 +很 +徉 +徊 +律 +後 +徐 +徑 +徒 +従 +徕 +得 +徘 +徙 +徜 +從 +徠 +御 +徨 +復 +循 +徬 +微 +徳 +徴 +徵 +德 +徹 +徼 +徽 +心 +必 +忆 +忌 +忍 +忏 +忐 +忑 +忒 +忖 +志 +忘 +忙 +応 +忠 +忡 +忤 +忧 +忪 +快 +忱 +念 +忻 +忽 +忿 +怀 +态 +怂 +怅 +怆 +怎 +怏 +怒 +怔 +怕 +怖 +怙 +怜 +思 +怠 +怡 +急 +怦 +性 +怨 +怪 +怯 +怵 +总 +怼 +恁 +恃 +恆 +恋 +恍 +恐 +恒 +恕 +恙 +恚 +恢 +恣 +恤 +恥 +恨 +恩 +恪 +恫 +恬 +恭 +息 +恰 +恳 +恵 +恶 +恸 +恺 +恻 +恼 +恿 +悄 +悅 +悉 +悌 +悍 +悔 +悖 +悚 +悟 +悠 +患 +悦 +您 +悩 +悪 +悬 +悯 +悱 +悲 +悴 +悵 +悶 +悸 +悻 +悼 +悽 +情 +惆 +惇 +惊 +惋 +惑 +惕 +惘 +惚 +惜 +惟 +惠 +惡 +惦 +惧 +惨 +惩 +惫 +惬 +惭 +惮 +惯 +惰 +惱 +想 +惴 +惶 +惹 +惺 +愁 +愆 +愈 +愉 +愍 +意 +愕 +愚 +愛 +愜 +感 +愣 +愤 +愧 +愫 +愷 +愿 +慄 +慈 +態 +慌 +慎 +慑 +慕 +慘 +慚 +慟 +慢 +慣 +慧 +慨 +慫 +慮 +慰 +慳 +慵 +慶 +慷 +慾 +憂 +憊 +憋 +憎 +憐 +憑 +憔 +憚 +憤 +憧 +憨 +憩 +憫 +憬 +憲 +憶 +憾 +懂 +懇 +懈 +應 +懊 +懋 +懑 +懒 +懦 +懲 +懵 +懶 +懷 +懸 +懺 +懼 +懾 +懿 +戀 +戈 +戊 +戌 +戍 +戎 +戏 +成 +我 +戒 +戕 +或 +战 +戚 +戛 +戟 +戡 +戦 +截 +戬 +戮 +戰 +戲 +戳 +戴 +戶 +户 +戸 +戻 +戾 +房 +所 +扁 +扇 +扈 +扉 +手 +才 +扎 +扑 +扒 +打 +扔 +払 +托 +扛 +扣 +扦 +执 +扩 +扪 +扫 +扬 +扭 +扮 +扯 +扰 +扱 +扳 +扶 +批 +扼 +找 +承 +技 +抄 +抉 +把 +抑 +抒 +抓 +投 +抖 +抗 +折 +抚 +抛 +抜 +択 +抟 +抠 +抡 +抢 +护 +报 +抨 +披 +抬 +抱 +抵 +抹 +押 +抽 +抿 +拂 +拄 +担 +拆 +拇 +拈 +拉 +拋 +拌 +拍 +拎 +拐 +拒 +拓 +拔 +拖 +拗 +拘 +拙 +拚 +招 +拜 +拟 +拡 +拢 +拣 +拥 +拦 +拧 +拨 +择 +括 +拭 +拮 +拯 +拱 +拳 +拴 +拷 +拼 +拽 +拾 +拿 +持 +挂 +指 +挈 +按 +挎 +挑 +挖 +挙 +挚 +挛 +挝 +挞 +挟 +挠 +挡 +挣 +挤 +挥 +挨 +挪 +挫 +振 +挲 +挹 +挺 +挽 +挾 +捂 +捅 +捆 +捉 +捋 +捌 +捍 +捎 +捏 +捐 +捕 +捞 +损 +捡 +换 +捣 +捧 +捨 +捩 +据 +捱 +捲 +捶 +捷 +捺 +捻 +掀 +掂 +掃 +掇 +授 +掉 +掌 +掏 +掐 +排 +掖 +掘 +掙 +掛 +掠 +採 +探 +掣 +接 +控 +推 +掩 +措 +掬 +掰 +掲 +掳 +掴 +掷 +掸 +掺 +揀 +揃 +揄 +揆 +揉 +揍 +描 +提 +插 +揖 +揚 +換 +握 +揣 +揩 +揪 +揭 +揮 +援 +揶 +揸 +揹 +揽 +搀 +搁 +搂 +搅 +損 +搏 +搐 +搓 +搔 +搖 +搗 +搜 +搞 +搡 +搪 +搬 +搭 +搵 +搶 +携 +搽 +摀 +摁 +摄 +摆 +摇 +摈 +摊 +摒 +摔 +摘 +摞 +摟 +摧 +摩 +摯 +摳 +摸 +摹 +摺 +摻 +撂 +撃 +撅 +撇 +撈 +撐 +撑 +撒 +撓 +撕 +撚 +撞 +撤 +撥 +撩 +撫 +撬 +播 +撮 +撰 +撲 +撵 +撷 +撸 +撻 +撼 +撿 +擀 +擁 +擂 +擄 +擅 +擇 +擊 +擋 +操 +擎 +擒 +擔 +擘 +據 +擞 +擠 +擡 +擢 +擦 +擬 +擰 +擱 +擲 +擴 +擷 +擺 +擼 +擾 +攀 +攏 +攒 +攔 +攘 +攙 +攜 +攝 +攞 +攢 +攣 +攤 +攥 +攪 +攫 +攬 +支 +收 +攸 +改 +攻 +放 +政 +故 +效 +敌 +敍 +敎 +敏 +救 +敕 +敖 +敗 +敘 +教 +敛 +敝 +敞 +敢 +散 +敦 +敬 +数 +敲 +整 +敵 +敷 +數 +斂 +斃 +文 +斋 +斌 +斎 +斐 +斑 +斓 +斗 +料 +斛 +斜 +斟 +斡 +斤 +斥 +斧 +斩 +斫 +斬 +断 +斯 +新 +斷 +方 +於 +施 +旁 +旃 +旅 +旋 +旌 +旎 +族 +旖 +旗 +无 +既 +日 +旦 +旧 +旨 +早 +旬 +旭 +旮 +旱 +时 +旷 +旺 +旻 +昀 +昂 +昆 +昇 +昉 +昊 +昌 +明 +昏 +易 +昔 +昕 +昙 +星 +映 +春 +昧 +昨 +昭 +是 +昱 +昴 +昵 +昶 +昼 +显 +晁 +時 +晃 +晉 +晋 +晌 +晏 +晒 +晓 +晔 +晕 +晖 +晗 +晚 +晝 +晞 +晟 +晤 +晦 +晨 +晩 +普 +景 +晰 +晴 +晶 +晷 +智 +晾 +暂 +暄 +暇 +暈 +暉 +暌 +暐 +暑 +暖 +暗 +暝 +暢 +暧 +暨 +暫 +暮 +暱 +暴 +暸 +暹 +曄 +曆 +曇 +曉 +曖 +曙 +曜 +曝 +曠 +曦 +曬 +曰 +曲 +曳 +更 +書 +曹 +曼 +曾 +替 +最 +會 +月 +有 +朋 +服 +朐 +朔 +朕 +朗 +望 +朝 +期 +朦 +朧 +木 +未 +末 +本 +札 +朮 +术 +朱 +朴 +朵 +机 +朽 +杀 +杂 +权 +杆 +杈 +杉 +李 +杏 +材 +村 +杓 +杖 +杜 +杞 +束 +杠 +条 +来 +杨 +杭 +杯 +杰 +東 +杳 +杵 +杷 +杼 +松 +板 +极 +构 +枇 +枉 +枋 +析 +枕 +林 +枚 +果 +枝 +枢 +枣 +枪 +枫 +枭 +枯 +枰 +枱 +枳 +架 +枷 +枸 +柄 +柏 +某 +柑 +柒 +染 +柔 +柘 +柚 +柜 +柞 +柠 +柢 +查 +柩 +柬 +柯 +柱 +柳 +柴 +柵 +査 +柿 +栀 +栃 +栄 +栅 +标 +栈 +栉 +栋 +栎 +栏 +树 +栓 +栖 +栗 +校 +栩 +株 +样 +核 +根 +格 +栽 +栾 +桀 +桁 +桂 +桃 +桅 +框 +案 +桉 +桌 +桎 +桐 +桑 +桓 +桔 +桜 +桠 +桡 +桢 +档 +桥 +桦 +桧 +桨 +桩 +桶 +桿 +梁 +梅 +梆 +梏 +梓 +梗 +條 +梟 +梢 +梦 +梧 +梨 +梭 +梯 +械 +梳 +梵 +梶 +检 +棂 +棄 +棉 +棋 +棍 +棒 +棕 +棗 +棘 +棚 +棟 +棠 +棣 +棧 +森 +棱 +棲 +棵 +棹 +棺 +椁 +椅 +椋 +植 +椎 +椒 +検 +椪 +椭 +椰 +椹 +椽 +椿 +楂 +楊 +楓 +楔 +楚 +楝 +楞 +楠 +楣 +楨 +楫 +業 +楮 +極 +楷 +楸 +楹 +楼 +楽 +概 +榄 +榆 +榈 +榉 +榔 +榕 +榖 +榛 +榜 +榨 +榫 +榭 +榮 +榱 +榴 +榷 +榻 +槁 +槃 +構 +槌 +槍 +槎 +槐 +槓 +様 +槛 +槟 +槤 +槭 +槲 +槳 +槻 +槽 +槿 +樁 +樂 +樊 +樑 +樓 +標 +樞 +樟 +模 +樣 +権 +横 +樫 +樯 +樱 +樵 +樸 +樹 +樺 +樽 +樾 +橄 +橇 +橋 +橐 +橘 +橙 +機 +橡 +橢 +橫 +橱 +橹 +橼 +檀 +檄 +檎 +檐 +檔 +檗 +檜 +檢 +檬 +檯 +檳 +檸 +檻 +櫃 +櫚 +櫛 +櫥 +櫸 +櫻 +欄 +權 +欒 +欖 +欠 +次 +欢 +欣 +欧 +欲 +欸 +欺 +欽 +款 +歆 +歇 +歉 +歌 +歎 +歐 +歓 +歙 +歛 +歡 +止 +正 +此 +步 +武 +歧 +歩 +歪 +歯 +歲 +歳 +歴 +歷 +歸 +歹 +死 +歼 +殁 +殃 +殆 +殇 +殉 +殊 +残 +殒 +殓 +殖 +殘 +殞 +殡 +殤 +殭 +殯 +殲 +殴 +段 +殷 +殺 +殼 +殿 +毀 +毁 +毂 +毅 +毆 +毋 +母 +毎 +每 +毒 +毓 +比 +毕 +毗 +毘 +毙 +毛 +毡 +毫 +毯 +毽 +氈 +氏 +氐 +民 +氓 +气 +氖 +気 +氙 +氛 +氟 +氡 +氢 +氣 +氤 +氦 +氧 +氨 +氪 +氫 +氮 +氯 +氰 +氲 +水 +氷 +永 +氹 +氾 +汀 +汁 +求 +汆 +汇 +汉 +汎 +汐 +汕 +汗 +汙 +汛 +汝 +汞 +江 +池 +污 +汤 +汨 +汩 +汪 +汰 +汲 +汴 +汶 +汹 +決 +汽 +汾 +沁 +沂 +沃 +沅 +沈 +沉 +沌 +沏 +沐 +沒 +沓 +沖 +沙 +沛 +沟 +没 +沢 +沣 +沥 +沦 +沧 +沪 +沫 +沭 +沮 +沱 +河 +沸 +油 +治 +沼 +沽 +沾 +沿 +況 +泄 +泉 +泊 +泌 +泓 +法 +泗 +泛 +泞 +泠 +泡 +波 +泣 +泥 +注 +泪 +泫 +泮 +泯 +泰 +泱 +泳 +泵 +泷 +泸 +泻 +泼 +泽 +泾 +洁 +洄 +洋 +洒 +洗 +洙 +洛 +洞 +津 +洩 +洪 +洮 +洱 +洲 +洵 +洶 +洸 +洹 +活 +洼 +洽 +派 +流 +浃 +浄 +浅 +浆 +浇 +浊 +测 +济 +浏 +浑 +浒 +浓 +浔 +浙 +浚 +浜 +浣 +浦 +浩 +浪 +浬 +浮 +浯 +浴 +海 +浸 +涂 +涅 +涇 +消 +涉 +涌 +涎 +涓 +涔 +涕 +涙 +涛 +涝 +涞 +涟 +涠 +涡 +涣 +涤 +润 +涧 +涨 +涩 +涪 +涮 +涯 +液 +涵 +涸 +涼 +涿 +淀 +淄 +淅 +淆 +淇 +淋 +淌 +淑 +淒 +淖 +淘 +淙 +淚 +淞 +淡 +淤 +淦 +淨 +淩 +淪 +淫 +淬 +淮 +深 +淳 +淵 +混 +淹 +淺 +添 +淼 +清 +済 +渉 +渊 +渋 +渍 +渎 +渐 +渔 +渗 +渙 +渚 +減 +渝 +渠 +渡 +渣 +渤 +渥 +渦 +温 +測 +渭 +港 +渲 +渴 +游 +渺 +渾 +湃 +湄 +湊 +湍 +湖 +湘 +湛 +湟 +湧 +湫 +湮 +湯 +湳 +湾 +湿 +満 +溃 +溅 +溉 +溏 +源 +準 +溜 +溝 +溟 +溢 +溥 +溧 +溪 +溫 +溯 +溱 +溴 +溶 +溺 +溼 +滁 +滂 +滄 +滅 +滇 +滋 +滌 +滑 +滓 +滔 +滕 +滙 +滚 +滝 +滞 +滟 +满 +滢 +滤 +滥 +滦 +滨 +滩 +滬 +滯 +滲 +滴 +滷 +滸 +滾 +滿 +漁 +漂 +漆 +漉 +漏 +漓 +演 +漕 +漠 +漢 +漣 +漩 +漪 +漫 +漬 +漯 +漱 +漲 +漳 +漸 +漾 +漿 +潆 +潇 +潋 +潍 +潑 +潔 +潘 +潛 +潜 +潞 +潟 +潢 +潤 +潦 +潧 +潭 +潮 +潰 +潴 +潸 +潺 +潼 +澀 +澄 +澆 +澈 +澍 +澎 +澗 +澜 +澡 +澤 +澧 +澱 +澳 +澹 +激 +濁 +濂 +濃 +濑 +濒 +濕 +濘 +濛 +濟 +濠 +濡 +濤 +濫 +濬 +濮 +濯 +濱 +濺 +濾 +瀅 +瀆 +瀉 +瀋 +瀏 +瀑 +瀕 +瀘 +瀚 +瀛 +瀝 +瀞 +瀟 +瀧 +瀨 +瀬 +瀰 +瀾 +灌 +灏 +灑 +灘 +灝 +灞 +灣 +火 +灬 +灭 +灯 +灰 +灵 +灶 +灸 +灼 +災 +灾 +灿 +炀 +炁 +炅 +炉 +炊 +炎 +炒 +炔 +炕 +炖 +炙 +炜 +炫 +炬 +炭 +炮 +炯 +炳 +炷 +炸 +点 +為 +炼 +炽 +烁 +烂 +烃 +烈 +烊 +烏 +烘 +烙 +烛 +烟 +烤 +烦 +烧 +烨 +烩 +烫 +烬 +热 +烯 +烷 +烹 +烽 +焉 +焊 +焕 +焖 +焗 +焘 +焙 +焚 +焜 +無 +焦 +焯 +焰 +焱 +然 +焼 +煅 +煉 +煊 +煌 +煎 +煒 +煖 +煙 +煜 +煞 +煤 +煥 +煦 +照 +煨 +煩 +煮 +煲 +煸 +煽 +熄 +熊 +熏 +熒 +熔 +熙 +熟 +熠 +熨 +熬 +熱 +熵 +熹 +熾 +燁 +燃 +燄 +燈 +燉 +燊 +燎 +燒 +燔 +燕 +燙 +燜 +營 +燥 +燦 +燧 +燭 +燮 +燴 +燻 +燼 +燿 +爆 +爍 +爐 +爛 +爪 +爬 +爭 +爰 +爱 +爲 +爵 +父 +爷 +爸 +爹 +爺 +爻 +爽 +爾 +牆 +片 +版 +牌 +牍 +牒 +牙 +牛 +牝 +牟 +牠 +牡 +牢 +牦 +牧 +物 +牯 +牲 +牴 +牵 +特 +牺 +牽 +犀 +犁 +犄 +犊 +犍 +犒 +犢 +犧 +犬 +犯 +状 +犷 +犸 +犹 +狀 +狂 +狄 +狈 +狎 +狐 +狒 +狗 +狙 +狞 +狠 +狡 +狩 +独 +狭 +狮 +狰 +狱 +狸 +狹 +狼 +狽 +猎 +猕 +猖 +猗 +猙 +猛 +猜 +猝 +猥 +猩 +猪 +猫 +猬 +献 +猴 +猶 +猷 +猾 +猿 +獄 +獅 +獎 +獐 +獒 +獗 +獠 +獣 +獨 +獭 +獰 +獲 +獵 +獷 +獸 +獺 +獻 +獼 +獾 +玄 +率 +玉 +王 +玑 +玖 +玛 +玟 +玠 +玥 +玩 +玫 +玮 +环 +现 +玲 +玳 +玷 +玺 +玻 +珀 +珂 +珅 +珈 +珉 +珊 +珍 +珏 +珐 +珑 +珙 +珞 +珠 +珣 +珥 +珩 +珪 +班 +珮 +珲 +珺 +現 +球 +琅 +理 +琇 +琉 +琊 +琍 +琏 +琐 +琛 +琢 +琥 +琦 +琨 +琪 +琬 +琮 +琰 +琲 +琳 +琴 +琵 +琶 +琺 +琼 +瑀 +瑁 +瑄 +瑋 +瑕 +瑗 +瑙 +瑚 +瑛 +瑜 +瑞 +瑟 +瑠 +瑣 +瑤 +瑩 +瑪 +瑯 +瑰 +瑶 +瑾 +璀 +璁 +璃 +璇 +璉 +璋 +璎 +璐 +璜 +璞 +璟 +璧 +璨 +環 +璽 +璿 +瓊 +瓏 +瓒 +瓜 +瓢 +瓣 +瓤 +瓦 +瓮 +瓯 +瓴 +瓶 +瓷 +甄 +甌 +甕 +甘 +甙 +甚 +甜 +生 +產 +産 +甥 +甦 +用 +甩 +甫 +甬 +甭 +甯 +田 +由 +甲 +申 +电 +男 +甸 +町 +画 +甾 +畀 +畅 +界 +畏 +畑 +畔 +留 +畜 +畝 +畢 +略 +畦 +番 +畫 +異 +畲 +畳 +畴 +當 +畸 +畹 +畿 +疆 +疇 +疊 +疏 +疑 +疔 +疖 +疗 +疙 +疚 +疝 +疟 +疡 +疣 +疤 +疥 +疫 +疮 +疯 +疱 +疲 +疳 +疵 +疸 +疹 +疼 +疽 +疾 +痂 +病 +症 +痈 +痉 +痊 +痍 +痒 +痔 +痕 +痘 +痙 +痛 +痞 +痠 +痢 +痣 +痤 +痧 +痨 +痪 +痫 +痰 +痱 +痴 +痹 +痺 +痼 +痿 +瘀 +瘁 +瘋 +瘍 +瘓 +瘘 +瘙 +瘟 +瘠 +瘡 +瘢 +瘤 +瘦 +瘧 +瘩 +瘪 +瘫 +瘴 +瘸 +瘾 +療 +癇 +癌 +癒 +癖 +癜 +癞 +癡 +癢 +癣 +癥 +癫 +癬 +癮 +癱 +癲 +癸 +発 +登 +發 +白 +百 +皂 +的 +皆 +皇 +皈 +皋 +皎 +皑 +皓 +皖 +皙 +皚 +皮 +皰 +皱 +皴 +皺 +皿 +盂 +盃 +盅 +盆 +盈 +益 +盎 +盏 +盐 +监 +盒 +盔 +盖 +盗 +盘 +盛 +盜 +盞 +盟 +盡 +監 +盤 +盥 +盧 +盪 +目 +盯 +盱 +盲 +直 +相 +盹 +盼 +盾 +省 +眈 +眉 +看 +県 +眙 +眞 +真 +眠 +眦 +眨 +眩 +眯 +眶 +眷 +眸 +眺 +眼 +眾 +着 +睁 +睇 +睏 +睐 +睑 +睛 +睜 +睞 +睡 +睢 +督 +睥 +睦 +睨 +睪 +睫 +睬 +睹 +睽 +睾 +睿 +瞄 +瞅 +瞇 +瞋 +瞌 +瞎 +瞑 +瞒 +瞓 +瞞 +瞟 +瞠 +瞥 +瞧 +瞩 +瞪 +瞬 +瞭 +瞰 +瞳 +瞻 +瞼 +瞿 +矇 +矍 +矗 +矚 +矛 +矜 +矢 +矣 +知 +矩 +矫 +短 +矮 +矯 +石 +矶 +矽 +矾 +矿 +码 +砂 +砌 +砍 +砒 +研 +砖 +砗 +砚 +砝 +砣 +砥 +砧 +砭 +砰 +砲 +破 +砷 +砸 +砺 +砼 +砾 +础 +硅 +硐 +硒 +硕 +硝 +硫 +硬 +确 +硯 +硼 +碁 +碇 +碉 +碌 +碍 +碎 +碑 +碓 +碗 +碘 +碚 +碛 +碟 +碣 +碧 +碩 +碰 +碱 +碳 +碴 +確 +碼 +碾 +磁 +磅 +磊 +磋 +磐 +磕 +磚 +磡 +磨 +磬 +磯 +磲 +磷 +磺 +礁 +礎 +礙 +礡 +礦 +礪 +礫 +礴 +示 +礼 +社 +祀 +祁 +祂 +祇 +祈 +祉 +祎 +祐 +祕 +祖 +祗 +祚 +祛 +祜 +祝 +神 +祟 +祠 +祢 +祥 +票 +祭 +祯 +祷 +祸 +祺 +祿 +禀 +禁 +禄 +禅 +禍 +禎 +福 +禛 +禦 +禧 +禪 +禮 +禱 +禹 +禺 +离 +禽 +禾 +禿 +秀 +私 +秃 +秆 +秉 +秋 +种 +科 +秒 +秘 +租 +秣 +秤 +秦 +秧 +秩 +秭 +积 +称 +秸 +移 +秽 +稀 +稅 +程 +稍 +税 +稔 +稗 +稚 +稜 +稞 +稟 +稠 +稣 +種 +稱 +稲 +稳 +稷 +稹 +稻 +稼 +稽 +稿 +穀 +穂 +穆 +穌 +積 +穎 +穗 +穢 +穩 +穫 +穴 +究 +穷 +穹 +空 +穿 +突 +窃 +窄 +窈 +窍 +窑 +窒 +窓 +窕 +窖 +窗 +窘 +窜 +窝 +窟 +窠 +窥 +窦 +窨 +窩 +窪 +窮 +窯 +窺 +窿 +竄 +竅 +竇 +竊 +立 +竖 +站 +竜 +竞 +竟 +章 +竣 +童 +竭 +端 +競 +竹 +竺 +竽 +竿 +笃 +笆 +笈 +笋 +笏 +笑 +笔 +笙 +笛 +笞 +笠 +符 +笨 +第 +笹 +笺 +笼 +筆 +等 +筊 +筋 +筍 +筏 +筐 +筑 +筒 +答 +策 +筛 +筝 +筠 +筱 +筲 +筵 +筷 +筹 +签 +简 +箇 +箋 +箍 +箏 +箐 +箔 +箕 +算 +箝 +管 +箩 +箫 +箭 +箱 +箴 +箸 +節 +篁 +範 +篆 +篇 +築 +篑 +篓 +篙 +篝 +篠 +篡 +篤 +篩 +篪 +篮 +篱 +篷 +簇 +簌 +簍 +簡 +簦 +簧 +簪 +簫 +簷 +簸 +簽 +簾 +簿 +籁 +籃 +籌 +籍 +籐 +籟 +籠 +籤 +籬 +籮 +籲 +米 +类 +籼 +籽 +粄 +粉 +粑 +粒 +粕 +粗 +粘 +粟 +粤 +粥 +粧 +粪 +粮 +粱 +粲 +粳 +粵 +粹 +粼 +粽 +精 +粿 +糅 +糊 +糍 +糕 +糖 +糗 +糙 +糜 +糞 +糟 +糠 +糧 +糬 +糯 +糰 +糸 +系 +糾 +紀 +紂 +約 +紅 +紉 +紊 +紋 +納 +紐 +紓 +純 +紗 +紘 +紙 +級 +紛 +紜 +素 +紡 +索 +紧 +紫 +紮 +累 +細 +紳 +紹 +紺 +終 +絃 +組 +絆 +経 +結 +絕 +絞 +絡 +絢 +給 +絨 +絮 +統 +絲 +絳 +絵 +絶 +絹 +綁 +綏 +綑 +經 +継 +続 +綜 +綠 +綢 +綦 +綫 +綬 +維 +綱 +網 +綴 +綵 +綸 +綺 +綻 +綽 +綾 +綿 +緊 +緋 +総 +緑 +緒 +緘 +線 +緝 +緞 +締 +緣 +編 +緩 +緬 +緯 +練 +緹 +緻 +縁 +縄 +縈 +縛 +縝 +縣 +縫 +縮 +縱 +縴 +縷 +總 +績 +繁 +繃 +繆 +繇 +繋 +織 +繕 +繚 +繞 +繡 +繩 +繪 +繫 +繭 +繳 +繹 +繼 +繽 +纂 +續 +纍 +纏 +纓 +纔 +纖 +纜 +纠 +红 +纣 +纤 +约 +级 +纨 +纪 +纫 +纬 +纭 +纯 +纰 +纱 +纲 +纳 +纵 +纶 +纷 +纸 +纹 +纺 +纽 +纾 +线 +绀 +练 +组 +绅 +细 +织 +终 +绊 +绍 +绎 +经 +绑 +绒 +结 +绔 +绕 +绘 +给 +绚 +绛 +络 +绝 +绞 +统 +绡 +绢 +绣 +绥 +绦 +继 +绩 +绪 +绫 +续 +绮 +绯 +绰 +绳 +维 +绵 +绶 +绷 +绸 +绻 +综 +绽 +绾 +绿 +缀 +缄 +缅 +缆 +缇 +缈 +缉 +缎 +缓 +缔 +缕 +编 +缘 +缙 +缚 +缜 +缝 +缠 +缢 +缤 +缥 +缨 +缩 +缪 +缭 +缮 +缰 +缱 +缴 +缸 +缺 +缽 +罂 +罄 +罌 +罐 +网 +罔 +罕 +罗 +罚 +罡 +罢 +罩 +罪 +置 +罰 +署 +罵 +罷 +罹 +羁 +羅 +羈 +羊 +羌 +美 +羔 +羚 +羞 +羟 +羡 +羣 +群 +羥 +羧 +羨 +義 +羯 +羲 +羸 +羹 +羽 +羿 +翁 +翅 +翊 +翌 +翎 +習 +翔 +翘 +翟 +翠 +翡 +翦 +翩 +翰 +翱 +翳 +翹 +翻 +翼 +耀 +老 +考 +耄 +者 +耆 +耋 +而 +耍 +耐 +耒 +耕 +耗 +耘 +耙 +耦 +耨 +耳 +耶 +耷 +耸 +耻 +耽 +耿 +聂 +聆 +聊 +聋 +职 +聒 +联 +聖 +聘 +聚 +聞 +聪 +聯 +聰 +聲 +聳 +聴 +聶 +職 +聽 +聾 +聿 +肃 +肄 +肅 +肆 +肇 +肉 +肋 +肌 +肏 +肓 +肖 +肘 +肚 +肛 +肝 +肠 +股 +肢 +肤 +肥 +肩 +肪 +肮 +肯 +肱 +育 +肴 +肺 +肽 +肾 +肿 +胀 +胁 +胃 +胄 +胆 +背 +胍 +胎 +胖 +胚 +胛 +胜 +胝 +胞 +胡 +胤 +胥 +胧 +胫 +胭 +胯 +胰 +胱 +胳 +胴 +胶 +胸 +胺 +能 +脂 +脅 +脆 +脇 +脈 +脉 +脊 +脍 +脏 +脐 +脑 +脓 +脖 +脘 +脚 +脛 +脣 +脩 +脫 +脯 +脱 +脲 +脳 +脸 +脹 +脾 +腆 +腈 +腊 +腋 +腌 +腎 +腐 +腑 +腓 +腔 +腕 +腥 +腦 +腩 +腫 +腭 +腮 +腰 +腱 +腳 +腴 +腸 +腹 +腺 +腻 +腼 +腾 +腿 +膀 +膈 +膊 +膏 +膑 +膘 +膚 +膛 +膜 +膝 +膠 +膦 +膨 +膩 +膳 +膺 +膻 +膽 +膾 +膿 +臀 +臂 +臃 +臆 +臉 +臊 +臍 +臓 +臘 +臟 +臣 +臥 +臧 +臨 +自 +臬 +臭 +至 +致 +臺 +臻 +臼 +臾 +舀 +舂 +舅 +舆 +與 +興 +舉 +舊 +舌 +舍 +舎 +舐 +舒 +舔 +舖 +舗 +舛 +舜 +舞 +舟 +航 +舫 +般 +舰 +舱 +舵 +舶 +舷 +舸 +船 +舺 +舾 +艇 +艋 +艘 +艙 +艦 +艮 +良 +艰 +艱 +色 +艳 +艷 +艹 +艺 +艾 +节 +芃 +芈 +芊 +芋 +芍 +芎 +芒 +芙 +芜 +芝 +芡 +芥 +芦 +芩 +芪 +芫 +芬 +芭 +芮 +芯 +花 +芳 +芷 +芸 +芹 +芻 +芽 +芾 +苁 +苄 +苇 +苋 +苍 +苏 +苑 +苒 +苓 +苔 +苕 +苗 +苛 +苜 +苞 +苟 +苡 +苣 +若 +苦 +苫 +苯 +英 +苷 +苹 +苻 +茁 +茂 +范 +茄 +茅 +茉 +茎 +茏 +茗 +茜 +茧 +茨 +茫 +茬 +茭 +茯 +茱 +茲 +茴 +茵 +茶 +茸 +茹 +茼 +荀 +荃 +荆 +草 +荊 +荏 +荐 +荒 +荔 +荖 +荘 +荚 +荞 +荟 +荠 +荡 +荣 +荤 +荥 +荧 +荨 +荪 +荫 +药 +荳 +荷 +荸 +荻 +荼 +荽 +莅 +莆 +莉 +莊 +莎 +莒 +莓 +莖 +莘 +莞 +莠 +莢 +莧 +莪 +莫 +莱 +莲 +莴 +获 +莹 +莺 +莽 +莿 +菀 +菁 +菅 +菇 +菈 +菊 +菌 +菏 +菓 +菖 +菘 +菜 +菟 +菠 +菡 +菩 +華 +菱 +菲 +菸 +菽 +萁 +萃 +萄 +萊 +萋 +萌 +萍 +萎 +萘 +萝 +萤 +营 +萦 +萧 +萨 +萩 +萬 +萱 +萵 +萸 +萼 +落 +葆 +葉 +著 +葚 +葛 +葡 +董 +葦 +葩 +葫 +葬 +葭 +葯 +葱 +葳 +葵 +葷 +葺 +蒂 +蒋 +蒐 +蒔 +蒙 +蒜 +蒞 +蒟 +蒡 +蒨 +蒲 +蒸 +蒹 +蒻 +蒼 +蒿 +蓁 +蓄 +蓆 +蓉 +蓋 +蓑 +蓓 +蓖 +蓝 +蓟 +蓦 +蓬 +蓮 +蓼 +蓿 +蔑 +蔓 +蔔 +蔗 +蔘 +蔚 +蔡 +蔣 +蔥 +蔫 +蔬 +蔭 +蔵 +蔷 +蔺 +蔻 +蔼 +蔽 +蕁 +蕃 +蕈 +蕉 +蕊 +蕎 +蕙 +蕤 +蕨 +蕩 +蕪 +蕭 +蕲 +蕴 +蕻 +蕾 +薄 +薅 +薇 +薈 +薊 +薏 +薑 +薔 +薙 +薛 +薦 +薨 +薩 +薪 +薬 +薯 +薰 +薹 +藉 +藍 +藏 +藐 +藓 +藕 +藜 +藝 +藤 +藥 +藩 +藹 +藻 +藿 +蘆 +蘇 +蘊 +蘋 +蘑 +蘚 +蘭 +蘸 +蘼 +蘿 +虎 +虏 +虐 +虑 +虔 +處 +虚 +虛 +虜 +虞 +號 +虢 +虧 +虫 +虬 +虱 +虹 +虻 +虽 +虾 +蚀 +蚁 +蚂 +蚊 +蚌 +蚓 +蚕 +蚜 +蚝 +蚣 +蚤 +蚩 +蚪 +蚯 +蚱 +蚵 +蛀 +蛆 +蛇 +蛊 +蛋 +蛎 +蛐 +蛔 +蛙 +蛛 +蛟 +蛤 +蛭 +蛮 +蛰 +蛳 +蛹 +蛻 +蛾 +蜀 +蜂 +蜃 +蜆 +蜇 +蜈 +蜊 +蜍 +蜒 +蜓 +蜕 +蜗 +蜘 +蜚 +蜜 +蜡 +蜢 +蜥 +蜱 +蜴 +蜷 +蜻 +蜿 +蝇 +蝈 +蝉 +蝌 +蝎 +蝕 +蝗 +蝙 +蝟 +蝠 +蝦 +蝨 +蝴 +蝶 +蝸 +蝼 +螂 +螃 +融 +螞 +螢 +螨 +螯 +螳 +螺 +蟀 +蟄 +蟆 +蟋 +蟎 +蟑 +蟒 +蟠 +蟬 +蟲 +蟹 +蟻 +蟾 +蠅 +蠍 +蠔 +蠕 +蠛 +蠟 +蠡 +蠢 +蠣 +蠱 +蠶 +蠹 +蠻 +血 +衄 +衅 +衆 +行 +衍 +術 +衔 +街 +衙 +衛 +衝 +衞 +衡 +衢 +衣 +补 +表 +衩 +衫 +衬 +衮 +衰 +衲 +衷 +衹 +衾 +衿 +袁 +袂 +袄 +袅 +袈 +袋 +袍 +袒 +袖 +袜 +袞 +袤 +袪 +被 +袭 +袱 +裁 +裂 +装 +裆 +裊 +裏 +裔 +裕 +裘 +裙 +補 +裝 +裟 +裡 +裤 +裨 +裱 +裳 +裴 +裸 +裹 +製 +裾 +褂 +複 +褐 +褒 +褓 +褔 +褚 +褥 +褪 +褫 +褲 +褶 +褻 +襁 +襄 +襟 +襠 +襪 +襬 +襯 +襲 +西 +要 +覃 +覆 +覇 +見 +規 +覓 +視 +覚 +覦 +覧 +親 +覬 +観 +覷 +覺 +覽 +觀 +见 +观 +规 +觅 +视 +览 +觉 +觊 +觎 +觐 +觑 +角 +觞 +解 +觥 +触 +觸 +言 +訂 +計 +訊 +討 +訓 +訕 +訖 +託 +記 +訛 +訝 +訟 +訣 +訥 +訪 +設 +許 +訳 +訴 +訶 +診 +註 +証 +詆 +詐 +詔 +評 +詛 +詞 +詠 +詡 +詢 +詣 +試 +詩 +詫 +詬 +詭 +詮 +詰 +話 +該 +詳 +詹 +詼 +誅 +誇 +誉 +誌 +認 +誓 +誕 +誘 +語 +誠 +誡 +誣 +誤 +誥 +誦 +誨 +說 +説 +読 +誰 +課 +誹 +誼 +調 +諄 +談 +請 +諏 +諒 +論 +諗 +諜 +諡 +諦 +諧 +諫 +諭 +諮 +諱 +諳 +諷 +諸 +諺 +諾 +謀 +謁 +謂 +謄 +謊 +謎 +謐 +謔 +謗 +謙 +講 +謝 +謠 +謨 +謬 +謹 +謾 +譁 +證 +譎 +譏 +識 +譙 +譚 +譜 +警 +譬 +譯 +議 +譲 +譴 +護 +譽 +讀 +變 +讓 +讚 +讞 +计 +订 +认 +讥 +讧 +讨 +让 +讪 +讫 +训 +议 +讯 +记 +讲 +讳 +讴 +讶 +讷 +许 +讹 +论 +讼 +讽 +设 +访 +诀 +证 +诃 +评 +诅 +识 +诈 +诉 +诊 +诋 +词 +诏 +译 +试 +诗 +诘 +诙 +诚 +诛 +话 +诞 +诟 +诠 +诡 +询 +诣 +诤 +该 +详 +诧 +诩 +诫 +诬 +语 +误 +诰 +诱 +诲 +说 +诵 +诶 +请 +诸 +诺 +读 +诽 +课 +诿 +谀 +谁 +调 +谄 +谅 +谆 +谈 +谊 +谋 +谌 +谍 +谎 +谏 +谐 +谑 +谒 +谓 +谔 +谕 +谗 +谘 +谙 +谚 +谛 +谜 +谟 +谢 +谣 +谤 +谥 +谦 +谧 +谨 +谩 +谪 +谬 +谭 +谯 +谱 +谲 +谴 +谶 +谷 +豁 +豆 +豇 +豈 +豉 +豊 +豌 +豎 +豐 +豔 +豚 +象 +豢 +豪 +豫 +豬 +豹 +豺 +貂 +貅 +貌 +貓 +貔 +貘 +貝 +貞 +負 +財 +貢 +貧 +貨 +販 +貪 +貫 +責 +貯 +貰 +貳 +貴 +貶 +買 +貸 +費 +貼 +貽 +貿 +賀 +賁 +賂 +賃 +賄 +資 +賈 +賊 +賑 +賓 +賜 +賞 +賠 +賡 +賢 +賣 +賤 +賦 +質 +賬 +賭 +賴 +賺 +購 +賽 +贅 +贈 +贊 +贍 +贏 +贓 +贖 +贛 +贝 +贞 +负 +贡 +财 +责 +贤 +败 +账 +货 +质 +贩 +贪 +贫 +贬 +购 +贮 +贯 +贰 +贱 +贲 +贴 +贵 +贷 +贸 +费 +贺 +贻 +贼 +贾 +贿 +赁 +赂 +赃 +资 +赅 +赈 +赊 +赋 +赌 +赎 +赏 +赐 +赓 +赔 +赖 +赘 +赚 +赛 +赝 +赞 +赠 +赡 +赢 +赣 +赤 +赦 +赧 +赫 +赭 +走 +赳 +赴 +赵 +赶 +起 +趁 +超 +越 +趋 +趕 +趙 +趟 +趣 +趨 +足 +趴 +趵 +趸 +趺 +趾 +跃 +跄 +跆 +跋 +跌 +跎 +跑 +跖 +跚 +跛 +距 +跟 +跡 +跤 +跨 +跩 +跪 +路 +跳 +践 +跷 +跹 +跺 +跻 +踉 +踊 +踌 +踏 +踐 +踝 +踞 +踟 +踢 +踩 +踪 +踮 +踱 +踴 +踵 +踹 +蹂 +蹄 +蹇 +蹈 +蹉 +蹊 +蹋 +蹑 +蹒 +蹙 +蹟 +蹣 +蹤 +蹦 +蹩 +蹬 +蹭 +蹲 +蹴 +蹶 +蹺 +蹼 +蹿 +躁 +躇 +躉 +躊 +躋 +躍 +躏 +躪 +身 +躬 +躯 +躲 +躺 +軀 +車 +軋 +軌 +軍 +軒 +軟 +転 +軸 +軼 +軽 +軾 +較 +載 +輒 +輓 +輔 +輕 +輛 +輝 +輟 +輩 +輪 +輯 +輸 +輻 +輾 +輿 +轄 +轅 +轆 +轉 +轍 +轎 +轟 +车 +轧 +轨 +轩 +转 +轭 +轮 +软 +轰 +轲 +轴 +轶 +轻 +轼 +载 +轿 +较 +辄 +辅 +辆 +辇 +辈 +辉 +辊 +辍 +辐 +辑 +输 +辕 +辖 +辗 +辘 +辙 +辛 +辜 +辞 +辟 +辣 +辦 +辨 +辩 +辫 +辭 +辮 +辯 +辰 +辱 +農 +边 +辺 +辻 +込 +辽 +达 +迁 +迂 +迄 +迅 +过 +迈 +迎 +运 +近 +返 +还 +这 +进 +远 +违 +连 +迟 +迢 +迤 +迥 +迦 +迩 +迪 +迫 +迭 +述 +迴 +迷 +迸 +迹 +迺 +追 +退 +送 +适 +逃 +逅 +逆 +选 +逊 +逍 +透 +逐 +递 +途 +逕 +逗 +這 +通 +逛 +逝 +逞 +速 +造 +逢 +連 +逮 +週 +進 +逵 +逶 +逸 +逻 +逼 +逾 +遁 +遂 +遅 +遇 +遊 +運 +遍 +過 +遏 +遐 +遑 +遒 +道 +達 +違 +遗 +遙 +遛 +遜 +遞 +遠 +遢 +遣 +遥 +遨 +適 +遭 +遮 +遲 +遴 +遵 +遶 +遷 +選 +遺 +遼 +遽 +避 +邀 +邁 +邂 +邃 +還 +邇 +邈 +邊 +邋 +邏 +邑 +邓 +邕 +邛 +邝 +邢 +那 +邦 +邨 +邪 +邬 +邮 +邯 +邰 +邱 +邳 +邵 +邸 +邹 +邺 +邻 +郁 +郅 +郊 +郎 +郑 +郜 +郝 +郡 +郢 +郤 +郦 +郧 +部 +郫 +郭 +郴 +郵 +郷 +郸 +都 +鄂 +鄉 +鄒 +鄔 +鄙 +鄞 +鄢 +鄧 +鄭 +鄰 +鄱 +鄲 +鄺 +酉 +酊 +酋 +酌 +配 +酐 +酒 +酗 +酚 +酝 +酢 +酣 +酥 +酩 +酪 +酬 +酮 +酯 +酰 +酱 +酵 +酶 +酷 +酸 +酿 +醃 +醇 +醉 +醋 +醍 +醐 +醒 +醚 +醛 +醜 +醞 +醣 +醪 +醫 +醬 +醮 +醯 +醴 +醺 +釀 +釁 +采 +釉 +释 +釋 +里 +重 +野 +量 +釐 +金 +釗 +釘 +釜 +針 +釣 +釦 +釧 +釵 +鈀 +鈉 +鈍 +鈎 +鈔 +鈕 +鈞 +鈣 +鈦 +鈪 +鈴 +鈺 +鈾 +鉀 +鉄 +鉅 +鉉 +鉑 +鉗 +鉚 +鉛 +鉤 +鉴 +鉻 +銀 +銃 +銅 +銑 +銓 +銖 +銘 +銜 +銬 +銭 +銮 +銳 +銷 +銹 +鋁 +鋅 +鋒 +鋤 +鋪 +鋰 +鋸 +鋼 +錄 +錐 +錘 +錚 +錠 +錢 +錦 +錨 +錫 +錮 +錯 +録 +錳 +錶 +鍊 +鍋 +鍍 +鍛 +鍥 +鍰 +鍵 +鍺 +鍾 +鎂 +鎊 +鎌 +鎏 +鎔 +鎖 +鎗 +鎚 +鎧 +鎬 +鎮 +鎳 +鏈 +鏖 +鏗 +鏘 +鏞 +鏟 +鏡 +鏢 +鏤 +鏽 +鐘 +鐮 +鐲 +鐳 +鐵 +鐸 +鐺 +鑄 +鑊 +鑑 +鑒 +鑣 +鑫 +鑰 +鑲 +鑼 +鑽 +鑾 +鑿 +针 +钉 +钊 +钎 +钏 +钒 +钓 +钗 +钙 +钛 +钜 +钝 +钞 +钟 +钠 +钡 +钢 +钣 +钤 +钥 +钦 +钧 +钨 +钩 +钮 +钯 +钰 +钱 +钳 +钴 +钵 +钺 +钻 +钼 +钾 +钿 +铀 +铁 +铂 +铃 +铄 +铅 +铆 +铉 +铎 +铐 +铛 +铜 +铝 +铠 +铡 +铢 +铣 +铤 +铨 +铩 +铬 +铭 +铮 +铰 +铲 +铵 +银 +铸 +铺 +链 +铿 +销 +锁 +锂 +锄 +锅 +锆 +锈 +锉 +锋 +锌 +锏 +锐 +锑 +错 +锚 +锟 +锡 +锢 +锣 +锤 +锥 +锦 +锭 +键 +锯 +锰 +锲 +锵 +锹 +锺 +锻 +镀 +镁 +镂 +镇 +镉 +镌 +镍 +镐 +镑 +镕 +镖 +镗 +镛 +镜 +镣 +镭 +镯 +镰 +镳 +镶 +長 +长 +門 +閃 +閉 +開 +閎 +閏 +閑 +閒 +間 +閔 +閘 +閡 +関 +閣 +閥 +閨 +閩 +閱 +閲 +閹 +閻 +閾 +闆 +闇 +闊 +闌 +闍 +闔 +闕 +闖 +闘 +關 +闡 +闢 +门 +闪 +闫 +闭 +问 +闯 +闰 +闲 +间 +闵 +闷 +闸 +闹 +闺 +闻 +闽 +闾 +阀 +阁 +阂 +阅 +阆 +阇 +阈 +阉 +阎 +阐 +阑 +阔 +阕 +阖 +阙 +阚 +阜 +队 +阡 +阪 +阮 +阱 +防 +阳 +阴 +阵 +阶 +阻 +阿 +陀 +陂 +附 +际 +陆 +陇 +陈 +陋 +陌 +降 +限 +陕 +陛 +陝 +陞 +陟 +陡 +院 +陣 +除 +陨 +险 +陪 +陰 +陲 +陳 +陵 +陶 +陷 +陸 +険 +陽 +隅 +隆 +隈 +隊 +隋 +隍 +階 +随 +隐 +隔 +隕 +隘 +隙 +際 +障 +隠 +隣 +隧 +隨 +險 +隱 +隴 +隶 +隸 +隻 +隼 +隽 +难 +雀 +雁 +雄 +雅 +集 +雇 +雉 +雋 +雌 +雍 +雎 +雏 +雑 +雒 +雕 +雖 +雙 +雛 +雜 +雞 +離 +難 +雨 +雪 +雯 +雰 +雲 +雳 +零 +雷 +雹 +電 +雾 +需 +霁 +霄 +霆 +震 +霈 +霉 +霊 +霍 +霎 +霏 +霑 +霓 +霖 +霜 +霞 +霧 +霭 +霰 +露 +霸 +霹 +霽 +霾 +靂 +靄 +靈 +青 +靓 +靖 +静 +靚 +靛 +靜 +非 +靠 +靡 +面 +靥 +靦 +革 +靳 +靴 +靶 +靼 +鞅 +鞋 +鞍 +鞏 +鞑 +鞘 +鞠 +鞣 +鞦 +鞭 +韆 +韋 +韌 +韓 +韜 +韦 +韧 +韩 +韬 +韭 +音 +韵 +韶 +韻 +響 +頁 +頂 +頃 +項 +順 +須 +頌 +預 +頑 +頒 +頓 +頗 +領 +頜 +頡 +頤 +頫 +頭 +頰 +頷 +頸 +頹 +頻 +頼 +顆 +題 +額 +顎 +顏 +顔 +願 +顛 +類 +顧 +顫 +顯 +顱 +顴 +页 +顶 +顷 +项 +顺 +须 +顼 +顽 +顾 +顿 +颁 +颂 +预 +颅 +领 +颇 +颈 +颉 +颊 +颌 +颍 +颐 +频 +颓 +颔 +颖 +颗 +题 +颚 +颛 +颜 +额 +颞 +颠 +颡 +颢 +颤 +颦 +颧 +風 +颯 +颱 +颳 +颶 +颼 +飄 +飆 +风 +飒 +飓 +飕 +飘 +飙 +飚 +飛 +飞 +食 +飢 +飨 +飩 +飪 +飯 +飲 +飼 +飽 +飾 +餃 +餅 +餉 +養 +餌 +餐 +餒 +餓 +餘 +餚 +餛 +餞 +餡 +館 +餮 +餵 +餾 +饅 +饈 +饋 +饌 +饍 +饑 +饒 +饕 +饗 +饞 +饥 +饨 +饪 +饬 +饭 +饮 +饯 +饰 +饱 +饲 +饴 +饵 +饶 +饷 +饺 +饼 +饽 +饿 +馀 +馁 +馄 +馅 +馆 +馈 +馋 +馍 +馏 +馒 +馔 +首 +馗 +香 +馥 +馨 +馬 +馭 +馮 +馳 +馴 +駁 +駄 +駅 +駆 +駐 +駒 +駕 +駛 +駝 +駭 +駱 +駿 +騁 +騎 +騏 +験 +騙 +騨 +騰 +騷 +驀 +驅 +驊 +驍 +驒 +驕 +驗 +驚 +驛 +驟 +驢 +驥 +马 +驭 +驮 +驯 +驰 +驱 +驳 +驴 +驶 +驷 +驸 +驹 +驻 +驼 +驾 +驿 +骁 +骂 +骄 +骅 +骆 +骇 +骈 +骊 +骋 +验 +骏 +骐 +骑 +骗 +骚 +骛 +骜 +骞 +骠 +骡 +骤 +骥 +骧 +骨 +骯 +骰 +骶 +骷 +骸 +骼 +髂 +髅 +髋 +髏 +髒 +髓 +體 +髖 +高 +髦 +髪 +髮 +髯 +髻 +鬃 +鬆 +鬍 +鬓 +鬚 +鬟 +鬢 +鬣 +鬥 +鬧 +鬱 +鬼 +魁 +魂 +魄 +魅 +魇 +魍 +魏 +魔 +魘 +魚 +魯 +魷 +鮑 +鮨 +鮪 +鮭 +鮮 +鯉 +鯊 +鯖 +鯛 +鯨 +鯰 +鯽 +鰍 +鰓 +鰭 +鰲 +鰻 +鰾 +鱈 +鱉 +鱔 +鱗 +鱷 +鱸 +鱼 +鱿 +鲁 +鲈 +鲍 +鲑 +鲛 +鲜 +鲟 +鲢 +鲤 +鲨 +鲫 +鲱 +鲲 +鲶 +鲷 +鲸 +鳃 +鳄 +鳅 +鳌 +鳍 +鳕 +鳖 +鳗 +鳝 +鳞 +鳥 +鳩 +鳳 +鳴 +鳶 +鴉 +鴕 +鴛 +鴦 +鴨 +鴻 +鴿 +鵑 +鵜 +鵝 +鵡 +鵬 +鵰 +鵲 +鶘 +鶩 +鶯 +鶴 +鷗 +鷲 +鷹 +鷺 +鸚 +鸞 +鸟 +鸠 +鸡 +鸢 +鸣 +鸥 +鸦 +鸨 +鸪 +鸭 +鸯 +鸳 +鸵 +鸽 +鸾 +鸿 +鹂 +鹃 +鹄 +鹅 +鹈 +鹉 +鹊 +鹌 +鹏 +鹑 +鹕 +鹘 +鹜 +鹞 +鹤 +鹦 +鹧 +鹫 +鹭 +鹰 +鹳 +鹵 +鹹 +鹼 +鹽 +鹿 +麂 +麋 +麒 +麓 +麗 +麝 +麟 +麥 +麦 +麩 +麴 +麵 +麸 +麺 +麻 +麼 +麽 +麾 +黃 +黄 +黍 +黎 +黏 +黑 +黒 +黔 +默 +黛 +黜 +黝 +點 +黠 +黨 +黯 +黴 +鼋 +鼎 +鼐 +鼓 +鼠 +鼬 +鼹 +鼻 +鼾 +齁 +齊 +齋 +齐 +齒 +齡 +齢 +齣 +齦 +齿 +龄 +龅 +龈 +龊 +龋 +龌 +龍 +龐 +龔 +龕 +龙 +龚 +龛 +龜 +龟 +︰ +︱ +︶ +︿ +﹁ +﹂ +﹍ +﹏ +﹐ +﹑ +﹒ +﹔ +﹕ +﹖ +﹗ +﹙ +﹚ +﹝ +﹞ +﹡ +﹣ +! +" +# +$ +% +& +' +( +) +* ++ +, +- +. +/ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +: +; +< += +> +? +@ +[ +\ +] +^ +_ +` +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +{ +| +} +~ +。 +「 +」 +、 +・ +ッ +ー +イ +ク +シ +ス +ト +ノ +フ +ラ +ル +ン +゙ +゚ + ̄ +¥ +👍 +🔥 +😂 +😎 +... +yam +10 +2017 +12 +11 +2016 +20 +30 +15 +06 +lofter +##s +2015 +by +16 +14 +18 +13 +24 +17 +2014 +21 +##0 +22 +19 +25 +23 +com +100 +00 +05 +2013 +##a +03 +09 +08 +28 +##2 +50 +01 +04 +##1 +27 +02 +2012 +##3 +26 +##e +07 +##8 +##5 +##6 +##4 +##9 +##7 +29 +2011 +40 +##t +2010 +##o +##d +##i +2009 +##n +app +www +the +##m +31 +##c +##l +##y +##r +##g +2008 +60 +http +200 +qq +##p +80 +##f +google +pixnet +90 +cookies +tripadvisor +500 +##er +##k +35 +##h +facebook +2007 +2000 +70 +##b +of +##x +##u +45 +300 +iphone +32 +1000 +2006 +48 +ip +36 +in +38 +3d +##w +##ing +55 +ctrip +##on +##v +33 +##の +to +34 +400 +id +2005 +it +37 +windows +llc +top +99 +42 +39 +000 +led +at +##an +41 +51 +52 +46 +49 +43 +53 +44 +##z +android +58 +and +59 +2004 +56 +vr +##か +5000 +2003 +47 +blogthis +twitter +54 +##le +150 +ok +2018 +57 +75 +cn +no +ios +##in +##mm +##00 +800 +on +te +3000 +65 +2001 +360 +95 +ig +lv +120 +##ng +##を +##us +##に +pc +てす +── +600 +##te +85 +2002 +88 +##ed +html +ncc +wifi +email +64 +blog +is +##10 +##て +mail +online +##al +dvd +##ic +studio +##は +##℃ +##ia +##と +line +vip +72 +##q +98 +##ce +##en +for +##is +##ra +##es +##j +usb +net +cp +1999 +asia +4g +##cm +diy +new +3c +##お +ta +66 +language +vs +apple +tw +86 +web +##ne +ipad +62 +you +##re +101 +68 +##tion +ps +de +bt +pony +atm +##2017 +1998 +67 +##ch +ceo +##or +go +##na +av +pro +cafe +96 +pinterest +97 +63 +pixstyleme3c +##ta +more +said +##2016 +1997 +mp3 +700 +##ll +nba +jun +##20 +92 +tv +1995 +pm +61 +76 +nbsp +250 +##ie +linux +##ma +cd +110 +hd +##17 +78 +##ion +77 +6000 +am +##th +##st +94 +##se +##et +69 +180 +gdp +my +105 +81 +abc +89 +flash +79 +one +93 +1990 +1996 +##ck +gps +##も +##ly +web885 +106 +2020 +91 +##ge +4000 +1500 +xd +boss +isbn +1994 +org +##ry +me +love +##11 +0fork +73 +##12 +3g +##ter +##ar +71 +82 +##la +hotel +130 +1970 +pk +83 +87 +140 +ie +##os +##30 +##el +74 +##50 +seo +cpu +##ml +p2p +84 +may +##る +sun +tue +internet +cc +posted +youtube +##at +##ン +##man +ii +##ル +##15 +abs +nt +pdf +yahoo +ago +1980 +##it +news +mac +104 +##てす +##me +##り +java +1992 +spa +##de +##nt +hk +all +plus +la +1993 +##mb +##16 +##ve +west +##da +160 +air +##い +##ps +から +##to +1989 +logo +htc +php +https +fi +momo +##son +sat +##ke +##80 +ebd +suv +wi +day +apk +##88 +##um +mv +galaxy +wiki +or +brake +##ス +1200 +する +this +1991 +mon +##こ +❤2017 +po +##ない +javascript +life +home +june +##ss +system +900 +##ー +##0 +pp +1988 +world +fb +4k +br +##as +ic +ai +leonardo +safari +##60 +live +free +xx +wed +win7 +kiehl +##co +lg +o2o +##go +us +235 +1949 +mm +しい +vfm +kanye +##90 +##2015 +##id +jr +##ey +123 +rss +##sa +##ro +##am +##no +thu +fri +350 +##sh +##ki +103 +comments +name +##のて +##pe +##ine +max +1987 +8000 +uber +##mi +##ton +wordpress +office +1986 +1985 +##ment +107 +bd +win10 +##ld +##li +gmail +bb +dior +##rs +##ri +##rd +##ます +up +cad +##® +dr +して +read +##21 +をお +##io +##99 +url +1984 +pvc +paypal +show +policy +##40 +##ty +##18 +with +##★ +##01 +txt +102 +##ba +dna +from +post +mini +ar +taiwan +john +##ga +privacy +agoda +##13 +##ny +word +##24 +##22 +##by +##ur +##hz +1982 +##ang +265 +cookie +netscape +108 +##ka +##~ +##ad +house +share +note +ibm +code +hello +nike +sim +survey +##016 +1979 +1950 +wikia +##32 +##017 +5g +cbc +##tor +##kg +1983 +##rt +##14 +campaign +store +2500 +os +##ct +##ts +##° +170 +api +##ns +365 +excel +##な +##ao +##ら +##し +~~ +##nd +university +163 +には +518 +##70 +##ya +##il +##25 +pierre +ipo +0020 +897 +##23 +hotels +##ian +のお +125 +years +6606 +##ers +##26 +high +##day +time +##ay +bug +##line +##く +##す +##be +xp +talk2yam +yamservice +10000 +coco +##dy +sony +##ies +1978 +microsoft +david +people +##ha +1960 +instagram +intel +その +##ot +iso +1981 +##va +115 +##mo +##land +xxx +man +co +ltxsw +##ation +baby +220 +##pa +##ol +1945 +7000 +tag +450 +##ue +msn +##31 +oppo +##ト +##ca +control +##om +st +chrome +##ure +##ん +be +##き +lol +##19 +した +##bo +240 +lady +##100 +##way +##から +4600 +##ko +##do +##un +4s +corporation +168 +##ni +herme +##28 +cp +978 +##up +##06 +ui +##ds +ppt +admin +three +します +bbc +re +128 +##48 +ca +##015 +##35 +hp +##ee +tpp +##た +##ive +×× +root +##cc +##ました +##ble +##ity +adobe +park +114 +et +oled +city +##ex +##ler +##ap +china +##book +20000 +view +##ice +global +##km +your +hong +##mg +out +##ms +ng +ebay +##29 +menu +ubuntu +##cy +rom +##view +open +ktv +do +server +##lo +if +english +##ね +##5 +##oo +1600 +##02 +step1 +kong +club +135 +july +inc +1976 +mr +hi +##net +touch +##ls +##ii +michael +lcd +##05 +##33 +phone +james +step2 +1300 +ios9 +##box +dc +##2 +##ley +samsung +111 +280 +pokemon +css +##ent +##les +いいえ +##1 +s8 +atom +play +bmw +##said +sa +etf +ctrl +♥yoyo♥ +##55 +2025 +##2014 +##66 +adidas +amazon +1958 +##ber +##ner +visa +##77 +##der +1800 +connectivity +##hi +firefox +109 +118 +hr +so +style +mark +pop +ol +skip +1975 +as +##27 +##ir +##61 +190 +mba +##う +##ai +le +##ver +1900 +cafe2017 +lte +super +113 +129 +##ron +amd +like +##☆ +are +##ster +we +##sk +paul +data +international +##ft +longchamp +ssd +good +##ート +##ti +reply +##my +↓↓↓ +apr +star +##ker +source +136 +js +112 +get +force +photo +##one +126 +##2013 +##ow +link +bbs +1972 +goods +##lin +python +119 +##ip +game +##ics +##ません +blue +##● +520 +##45 +page +itunes +##03 +1955 +260 +1968 +gt +gif +618 +##ff +##47 +group +くたさい +about +bar +ganji +##nce +music +lee +not +1977 +1971 +1973 +##per +an +faq +comment +##って +days +##ock +116 +##bs +1974 +1969 +v1 +player +1956 +xbox +sql +fm +f1 +139 +##ah +210 +##lv +##mp +##000 +melody +1957 +##3 +550 +17life +199 +1966 +xml +market +##au +##71 +999 +##04 +what +gl +##95 +##age +tips +##68 +book +##ting +mysql +can +1959 +230 +##ung +wonderland +watch +10℃ +##ction +9000 +mar +mobile +1946 +1962 +article +##db +part +▲top +party +って +1967 +1964 +1948 +##07 +##ore +##op +この +dj +##78 +##38 +010 +main +225 +1965 +##ong +art +320 +ad +134 +020 +##73 +117 +pm2 +japan +228 +##08 +ts +1963 +##ica +der +sm +##36 +2019 +##wa +ct +##7 +##や +##64 +1937 +homemesh +search +##85 +##れは +##tv +##di +macbook +##9 +##くたさい +service +##♥ +type +った +750 +##ier +##si +##75 +##います +##ok +best +##ット +goris +lock +##った +cf +3m +big +##ut +ftp +carol +##vi +10 +1961 +happy +sd +##ac +122 +anti +pe +cnn +iii +1920 +138 +##ラ +1940 +esp +jan +tags +##98 +##51 +august +vol +##86 +154 +##™ +##fs +##れ +##sion +design +ac +##ム +press +jordan +ppp +that +key +check +##6 +##tt +##㎡ +1080p +##lt +power +##42 +1952 +##bc +vivi +##ック +he +133 +121 +jpg +##rry +201 +175 +3500 +1947 +nb +##ted +##rn +しています +1954 +usd +##t00 +master +##ンク +001 +model +##58 +al +##09 +1953 +##34 +ram +goo +ても +##ui +127 +1930 +red +##ary +rpg +item +##pm +##41 +270 +##za +project +##2012 +hot +td +blogabstract +##ger +##62 +650 +##44 +gr2 +##します +##m +black +electronic +nfc +year +asus +また +html5 +cindy +##hd +m3 +132 +esc +##od +booking +##53 +fed +tvb +##81 +##ina +mit +165 +##いる +chan +192 +distribution +next +になる +peter +bios +steam +cm +1941 +にも +pk10 +##ix +##65 +##91 +dec +nasa +##ana +icecat +00z +b1 +will +##46 +li +se +##ji +##み +##ard +oct +##ain +jp +##ze +##bi +cio +##56 +smart +h5 +##39 +##port +curve +vpn +##nm +##dia +utc +##あり +12345678910 +##52 +rmvb +chanel +a4 +miss +##and +##im +media +who +##63 +she +girl +5s +124 +vera +##して +class +vivo +king +##フ +##ei +national +ab +1951 +5cm +888 +145 +ipod +ap +1100 +5mm +211 +ms +2756 +##69 +mp4 +msci +##po +##89 +131 +mg +index +380 +##bit +##out +##zz +##97 +##67 +158 +apec +##8 +photoshop +opec +¥799 +ては +##96 +##tes +##ast +2g +○○ +##ール +¥2899 +##ling +##よ +##ory +1938 +##ical +kitty +content +##43 +step3 +##cn +win8 +155 +vc +1400 +iphone7 +robert +##した +tcl +137 +beauty +##87 +en +dollars +##ys +##oc +step +pay +yy +a1 +##2011 +##lly +##ks +##♪ +1939 +188 +download +1944 +sep +exe +ph +います +school +gb +center +pr +street +##board +uv +##37 +##lan +winrar +##que +##ua +##com +1942 +1936 +480 +gpu +##4 +ettoday +fu +tom +##54 +##ren +##via +149 +##72 +b2b +144 +##79 +##tch +rose +arm +mb +##49 +##ial +##nn +nvidia +step4 +mvp +00㎡ +york +156 +##イ +how +cpi +591 +2765 +gov +kg +joe +##xx +mandy +pa +##ser +copyright +fashion +1935 +don +##け +ecu +##ist +##art +erp +wap +have +##lm +talk +##ek +##ning +##if +ch +##ite +video +1943 +cs +san +iot +look +##84 +##2010 +##ku +october +##ux +trump +##hs +##ide +box +141 +first +##ins +april +##ight +##83 +185 +angel +protected +aa +151 +162 +x1 +m2 +##fe +##× +##ho +size +143 +min +ofo +fun +gomaji +ex +hdmi +food +dns +march +chris +kevin +##のか +##lla +##pp +##ec +ag +ems +6s +720p +##rm +##ham +off +##92 +asp +team +fandom +ed +299 +▌♥ +##ell +info +されています +##82 +sina +4066 +161 +##able +##ctor +330 +399 +315 +dll +rights +ltd +idc +jul +3kg +1927 +142 +ma +surface +##76 +##ク +~~~ +304 +mall +eps +146 +green +##59 +map +space +donald +v2 +sodu +##light +1931 +148 +1700 +まて +310 +reserved +htm +##han +##57 +2d +178 +mod +##ise +##tions +152 +ti +##shi +doc +1933 +icp +055 +wang +##ram +shopping +aug +##pi +##well +now +wam +b2 +からお +##hu +236 +1928 +##gb +266 +f2 +##93 +153 +mix +##ef +##uan +bwl +##plus +##res +core +##ess +tea +5℃ +hktvmall +nhk +##ate +list +##ese +301 +feb +4m +inn +ての +nov +159 +12345 +daniel +##ci +pass +##bet +##nk +coffee +202 +ssl +airbnb +##ute +fbi +woshipm +skype +ea +cg +sp +##fc +##www +yes +edge +alt +007 +##94 +fpga +##ght +##gs +iso9001 +さい +##ile +##wood +##uo +image +lin +icon +american +##em +1932 +set +says +##king +##tive +blogger +##74 +なと +256 +147 +##ox +##zy +##red +##ium +##lf +nokia +claire +##リ +##ding +november +lohas +##500 +##tic +##マ +##cs +##ある +##che +##ire +##gy +##ult +db +january +win +##カ +166 +road +ptt +##ま +##つ +198 +##fa +##mer +anna +pchome +はい +udn +ef +420 +##time +##tte +2030 +##ア +g20 +white +かかります +1929 +308 +garden +eleven +di +##おります +chen +309b +777 +172 +young +cosplay +ちてない +4500 +bat +##123 +##tra +##ては +kindle +npc +steve +etc +##ern +##| +call +xperia +ces +travel +sk +s7 +##ous +1934 +##int +みいたたけます +183 +edu +file +cho +qr +##car +##our +186 +##ant +##d +eric +1914 +rends +##jo +##する +mastercard +##2000 +kb +##min +290 +##ino +vista +##ris +##ud +jack +2400 +##set +169 +pos +1912 +##her +##ou +taipei +しく +205 +beta +##ませんか +232 +##fi +express +255 +body +##ill +aphojoy +user +december +meiki +##ick +tweet +richard +##av +##ᆫ +iphone6 +##dd +ちてすか +views +##mark +321 +pd +##00 +times +##▲ +level +##ash +10g +point +5l +##ome +208 +koreanmall +##ak +george +q2 +206 +wma +tcp +##200 +スタッフ +full +mlb +##lle +##watch +tm +run +179 +911 +smith +business +##und +1919 +color +##tal +222 +171 +##less +moon +4399 +##rl +update +pcb +shop +499 +157 +little +なし +end +##mhz +van +dsp +easy +660 +##house +##key +history +##o +oh +##001 +##hy +##web +oem +let +was +##2009 +##gg +review +##wan +182 +##°c +203 +uc +title +##val +united +233 +2021 +##ons +doi +trivago +overdope +sbs +##ance +##ち +grand +special +573032185 +imf +216 +wx17house +##so +##ーム +audi +##he +london +william +##rp +##ake +science +beach +cfa +amp +ps4 +880 +##800 +##link +##hp +crm +ferragamo +bell +make +##eng +195 +under +zh +photos +2300 +##style +##ント +via +176 +da +##gi +company +i7 +##ray +thomas +370 +ufo +i5 +##max +plc +ben +back +research +8g +173 +mike +##pc +##ッフ +september +189 +##ace +vps +february +167 +pantos +wp +lisa +1921 +★★ +jquery +night +long +offer +##berg +##news +1911 +##いて +ray +fks +wto +せます +over +164 +340 +##all +##rus +1924 +##888 +##works +blogtitle +loftpermalink +##→ +187 +martin +test +ling +km +##め +15000 +fda +v3 +##ja +##ロ +wedding +かある +outlet +family +##ea +をこ +##top +story +##ness +salvatore +##lu +204 +swift +215 +room +している +oracle +##ul +1925 +sam +b2c +week +pi +rock +##のは +##a +##けと +##ean +##300 +##gle +cctv +after +chinese +##back +powered +x2 +##tan +1918 +##nes +##イン +canon +only +181 +##zi +##las +say +##oe +184 +##sd +221 +##bot +##world +##zo +sky +made +top100 +just +1926 +pmi +802 +234 +gap +##vr +177 +les +174 +▲topoct +ball +vogue +vi +ing +ofweek +cos +##list +##ort +▲topmay +##なら +##lon +として +last +##tc +##of +##bus +##gen +real +eva +##コ +a3 +nas +##lie +##ria +##coin +##bt +▲topapr +his +212 +cat +nata +vive +health +⋯⋯ +drive +sir +▲topmar +du +cup +##カー +##ook +##よう +##sy +alex +msg +tour +しました +3ce +##word +193 +ebooks +r8 +block +318 +##より +2200 +nice +pvp +207 +months +1905 +rewards +##ther +1917 +0800 +##xi +##チ +##sc +micro +850 +gg +blogfp +op +1922 +daily +m1 +264 +true +##bb +ml +##tar +##のお +##ky +anthony +196 +253 +##yo +state +218 +##ara +##aa +##rc +##tz +##ston +より +gear +##eo +##ade +ge +see +1923 +##win +##ura +ss +heart +##den +##ita +down +##sm +el +png +2100 +610 +rakuten +whatsapp +bay +dream +add +##use +680 +311 +pad +gucci +mpv +##ode +##fo +island +▲topjun +##▼ +223 +jason +214 +chicago +##❤ +しの +##hone +io +##れる +##ことか +sogo +be2 +##ology +990 +cloud +vcd +##con +2~3 +##ford +##joy +##kb +##こさいます +##rade +but +##ach +docker +##ful +rfid +ul +##ase +hit +ford +##star +580 +##○ +11 +a2 +sdk +reading +edited +##are +cmos +##mc +238 +siri +light +##ella +##ため +bloomberg +##read +pizza +##ison +jimmy +##vm +college +node +journal +ba +18k +##play +245 +##cer +20 +magic +##yu +191 +jump +288 +tt +##ings +asr +##lia +3200 +step5 +network +##cd +mc +いします +1234 +pixstyleme +273 +##600 +2800 +money +★★★★★ +1280 +12 +430 +bl +みの +act +##tus +tokyo +##rial +##life +emba +##ae +saas +tcs +##rk +##wang +summer +##sp +ko +##ving +390 +premium +##その +netflix +##ヒ +uk +mt +##lton +right +frank +two +209 +える +##ple +##cal +021 +##んな +##sen +##ville +hold +nexus +dd +##ius +てお +##mah +##なく +tila +zero +820 +ce +##tin +resort +##ws +charles +old +p10 +5d +report +##360 +##ru +##には +bus +vans +lt +##est +pv +##レ +links +rebecca +##ツ +##dm +azure +##365 +きな +limited +bit +4gb +##mon +1910 +moto +##eam +213 +1913 +var +eos +なとの +226 +blogspot +された +699 +e3 +dos +dm +fc +##ments +##ik +##kw +boy +##bin +##ata +960 +er +##せ +219 +##vin +##tu +##ula +194 +##∥ +station +##ろ +##ature +835 +files +zara +hdr +top10 +nature +950 +magazine +s6 +marriott +##シ +avira +case +##っと +tab +##ran +tony +##home +oculus +im +##ral +jean +saint +cry +307 +rosie +##force +##ini +ice +##bert +のある +##nder +##mber +pet +2600 +##◆ +plurk +▲topdec +##sis +00kg +▲topnov +720 +##ence +tim +##ω +##nc +##ても +##name +log +ips +great +ikea +malaysia +unix +##イト +3600 +##ncy +##nie +12000 +akb48 +##ye +##oid +404 +##chi +##いた +oa +xuehai +##1000 +##orm +##rf +275 +さん +##ware +##リー +980 +ho +##pro +text +##era +560 +bob +227 +##ub +##2008 +8891 +scp +avi +##zen +2022 +mi +wu +museum +qvod +apache +lake +jcb +▲topaug +★★★ +ni +##hr +hill +302 +ne +weibo +490 +ruby +##ーシ +##ヶ +##row +4d +▲topjul +iv +##ish +github +306 +mate +312 +##スト +##lot +##ane +andrew +のハイト +##tina +t1 +rf +ed2k +##vel +##900 +way +final +りの +ns +5a +705 +197 +##メ +sweet +bytes +##ene +▲topjan +231 +##cker +##2007 +##px +100g +topapp +229 +helpapp +rs +low +14k +g4g +care +630 +ldquo +あり +##fork +leave +rm +edition +##gan +##zon +##qq +▲topsep +##google +##ism +gold +224 +explorer +##zer +toyota +category +select +visual +##labels +restaurant +##md +posts +s1 +##ico +もっと +angelababy +123456 +217 +sports +s3 +mbc +1915 +してくたさい +shell +x86 +candy +##new +kbs +face +xl +470 +##here +4a +swissinfo +v8 +▲topfeb +dram +##ual +##vice +3a +##wer +sport +q1 +ios10 +public +int +card +##c +ep +au +rt +##れた +1080 +bill +##mll +kim +30 +460 +wan +##uk +##ミ +x3 +298 +0t +scott +##ming +239 +e5 +##3d +h7n9 +worldcat +brown +##あります +##vo +##led +##580 +##ax +249 +410 +##ert +paris +##~6 +polo +925 +##lr +599 +##ナ +capital +##hing +bank +cv +1g +##chat +##s +##たい +adc +##ule +2m +##e +digital +hotmail +268 +##pad +870 +bbq +quot +##ring +before +wali +##まて +mcu +2k +2b +という +costco +316 +north +333 +switch +##city +##p +philips +##mann +management +panasonic +##cl +##vd +##ping +##rge +alice +##lk +##ましょう +css3 +##ney +vision +alpha +##ular +##400 +##tter +lz +にお +##ありません +mode +gre +1916 +pci +##tm +237 +1~2 +##yan +##そ +について +##let +##キ +work +war +coach +ah +mary +##ᅵ +huang +##pt +a8 +pt +follow +##berry +1895 +##ew +a5 +ghost +##ション +##wn +##og +south +##code +girls +##rid +action +villa +git +r11 +table +games +##cket +error +##anonymoussaid +##ag +here +##ame +##gc +qa +##■ +##lis +gmp +##gin +vmalife +##cher +yu +wedding +##tis +demo +dragon +530 +soho +social +bye +##rant +river +orz +acer +325 +##↑ +##ース +##ats +261 +del +##ven +440 +ups +##ように +##ター +305 +value +macd +yougou +##dn +661 +##ano +ll +##urt +##rent +continue +script +##wen +##ect +paper +263 +319 +shift +##chel +##フト +##cat +258 +x5 +fox +243 +##さん +car +aaa +##blog +loading +##yn +##tp +kuso +799 +si +sns +イカせるテンマ +ヒンクテンマ3 +rmb +vdc +forest +central +prime +help +ultra +##rmb +##ような +241 +square +688 +##しい +のないフロクに +##field +##reen +##ors +##ju +c1 +start +510 +##air +##map +cdn +##wo +cba +stephen +m8 +100km +##get +opera +##base +##ood +vsa +com™ +##aw +##ail +251 +なのて +count +t2 +##ᅡ +##een +2700 +hop +##gp +vsc +tree +##eg +##ose +816 +285 +##ories +##shop +alphago +v4 +1909 +simon +##ᆼ +fluke62max +zip +スホンサー +##sta +louis +cr +bas +##~10 +bc +##yer +hadoop +##ube +##wi +1906 +0755 +hola +##low +place +centre +5v +d3 +##fer +252 +##750 +##media +281 +540 +0l +exchange +262 +series +##ハー +##san +eb +##bank +##k +q3 +##nge +##mail +take +##lp +259 +1888 +client +east +cache +event +vincent +##ールを +きを +##nse +sui +855 +adchoice +##и +##stry +##なたの +246 +##zone +ga +apps +sea +##ab +248 +cisco +##タ +##rner +kymco +##care +dha +##pu +##yi +minkoff +royal +p1 +への +annie +269 +collection +kpi +playstation +257 +になります +866 +bh +##bar +queen +505 +radio +1904 +andy +armani +##xy +manager +iherb +##ery +##share +spring +raid +johnson +1908 +##ob +volvo +hall +##ball +v6 +our +taylor +##hk +bi +242 +##cp +kate +bo +water +technology +##rie +サイトは +277 +##ona +##sl +hpv +303 +gtx +hip +rdquo +jayz +stone +##lex +##rum +namespace +##やり +620 +##ale +##atic +des +##erson +##ql +##ves +##type +enter +##この +##てきます +d2 +##168 +##mix +##bian +との +a9 +jj +ky +##lc +access +movie +##hc +リストに +tower +##ration +##mit +ます +##nch +ua +tel +prefix +##o2 +1907 +##point +1901 +ott +~10 +##http +##ury +baidu +##ink +member +##logy +bigbang +nownews +##js +##shot +##tb +##こと +247 +eba +##tics +##lus +ける +v5 +spark +##ama +there +##ions +god +##lls +##down +hiv +##ress +burberry +day2 +##kv +◆◆ +jeff +related +film +edit +joseph +283 +##ark +cx +32gb +order +g9 +30000 +##ans +##tty +s5 +##bee +かあります +thread +xr +buy +sh +005 +land +spotify +mx +##ari +276 +##verse +×email +sf +why +##ことて +244 +7headlines +nego +sunny +dom +exo +401 +666 +positioning +fit +rgb +##tton +278 +kiss +alexa +adam +lp +みリストを +##g +mp +##ties +##llow +amy +##du +np +002 +institute +271 +##rth +##lar +2345 +590 +##des +sidebar +15 +imax +site +##cky +##kit +##ime +##009 +season +323 +##fun +##ンター +##ひ +gogoro +a7 +pu +lily +fire +twd600 +##ッセーシを +いて +##vis +30ml +##cture +##をお +information +##オ +close +friday +##くれる +yi +nick +てすか +##tta +##tel +6500 +##lock +cbd +economy +254 +かお +267 +tinker +double +375 +8gb +voice +##app +oops +channel +today +985 +##right +raw +xyz +##+ +jim +edm +##cent +7500 +supreme +814 +ds +##its +##asia +dropbox +##てすか +##tti +books +272 +100ml +##tle +##ller +##ken +##more +##boy +sex +309 +##dom +t3 +##ider +##なります +##unch +1903 +810 +feel +5500 +##かった +##put +により +s2 +mo +##gh +men +ka +amoled +div +##tr +##n1 +port +howard +##tags +ken +dnf +##nus +adsense +##а +ide +##へ +buff +thunder +##town +##ique +has +##body +auto +pin +##erry +tee +てした +295 +number +##the +##013 +object +psp +cool +udnbkk +16gb +##mic +miui +##tro +most +r2 +##alk +##nity +1880 +±0 +##いました +428 +s4 +law +version +##oa +n1 +sgs +docomo +##tf +##ack +henry +fc2 +##ded +##sco +##014 +##rite +286 +0mm +linkedin +##ada +##now +wii +##ndy +ucbug +##◎ +sputniknews +legalminer +##ika +##xp +2gb +##bu +q10 +oo +b6 +come +##rman +cheese +ming +maker +##gm +nikon +##fig +ppi +kelly +##ります +jchere +てきます +ted +md +003 +fgo +tech +##tto +dan +soc +##gl +##len +hair +earth +640 +521 +img +##pper +##a1 +##てきる +##ロク +acca +##ition +##ference +suite +##ig +outlook +##mond +##cation +398 +##pr +279 +101vip +358 +##999 +282 +64gb +3800 +345 +airport +##over +284 +##おり +jones +##ith +lab +##su +##いるのて +co2 +town +piece +##llo +no1 +vmware +24h +##qi +focus +reader +##admin +##ora +tb +false +##log +1898 +know +lan +838 +##ces +f4 +##ume +motel +stop +##oper +na +flickr +netcomponents +##af +##─ +pose +williams +local +##ound +##cg +##site +##iko +いお +274 +5m +gsm +con +##ath +1902 +friends +##hip +cell +317 +##rey +780 +cream +##cks +012 +##dp +facebooktwitterpinterestgoogle +sso +324 +shtml +song +swiss +##mw +##キンク +lumia +xdd +string +tiffany +522 +marc +られた +insee +russell +sc +dell +##ations +ok +camera +289 +##vs +##flow +##late +classic +287 +##nter +stay +g1 +mtv +512 +##ever +##lab +##nger +qe +sata +ryan +d1 +50ml +cms +##cing +su +292 +3300 +editor +296 +##nap +security +sunday +association +##ens +##700 +##bra +acg +##かり +sofascore +とは +mkv +##ign +jonathan +gary +build +labels +##oto +tesla +moba +qi +gohappy +general +ajax +1024 +##かる +サイト +society +##test +##urs +wps +fedora +##ich +mozilla +328 +##480 +##dr +usa +urn +##lina +##r +grace +##die +##try +##ader +1250 +##なり +elle +570 +##chen +##ᆯ +price +##ten +uhz +##ough +eq +##hen +states +push +session +balance +wow +506 +##cus +##py +when +##ward +##ep +34e +wong +library +prada +##サイト +##cle +running +##ree +313 +ck +date +q4 +##ctive +##ool +##> +mk +##ira +##163 +388 +die +secret +rq +dota +buffet +は1ヶ +e6 +##ez +pan +368 +ha +##card +##cha +2a +##さ +alan +day3 +eye +f3 +##end +france +keep +adi +rna +tvbs +##ala +solo +nova +##え +##tail +##ょう +support +##ries +##なる +##ved +base +copy +iis +fps +##ways +hero +hgih +profile +fish +mu +ssh +entertainment +chang +##wd +click +cake +##ond +pre +##tom +kic +pixel +##ov +##fl +product +6a +##pd +dear +##gate +es +yumi +audio +##² +##sky +echo +bin +where +##ture +329 +##ape +find +sap +isis +##なと +nand +##101 +##load +##ream +band +a6 +525 +never +##post +festival +50cm +##we +555 +guide +314 +zenfone +##ike +335 +gd +forum +jessica +strong +alexander +##ould +software +allen +##ious +program +360° +else +lohasthree +##gar +することかてきます +please +##れます +rc +##ggle +##ric +bim +50000 +##own +eclipse +355 +brian +3ds +##side +061 +361 +##other +##ける +##tech +##ator +485 +engine +##ged +##t +plaza +##fit +cia +ngo +westbrook +shi +tbs +50mm +##みませんか +sci +291 +reuters +##ily +contextlink +##hn +af +##cil +bridge +very +##cel +1890 +cambridge +##ize +15g +##aid +##data +790 +frm +##head +award +butler +##sun +meta +##mar +america +ps3 +puma +pmid +##すか +lc +670 +kitchen +##lic +オーフン5 +きなしソフトサーヒス +そして +day1 +future +★★★★ +##text +##page +##rris +pm1 +##ket +fans +##っています +1001 +christian +bot +kids +trackback +##hai +c3 +display +##hl +n2 +1896 +idea +さんも +##sent +airmail +##ug +##men +pwm +けます +028 +##lution +369 +852 +awards +schemas +354 +asics +wikipedia +font +##tional +##vy +c2 +293 +##れている +##dget +##ein +っている +contact +pepper +スキル +339 +##~5 +294 +##uel +##ument +730 +##hang +みてす +q5 +##sue +rain +##ndi +wei +swatch +##cept +わせ +331 +popular +##ste +##tag +p2 +501 +trc +1899 +##west +##live +justin +honda +ping +messenger +##rap +v9 +543 +##とは +unity +appqq +はすへて +025 +leo +##tone +##テ +##ass +uniqlo +##010 +502 +her +jane +memory +moneydj +##tical +human +12306 +していると +##m2 +coc +miacare +##mn +tmt +##core +vim +kk +##may +fan +target +use +too +338 +435 +2050 +867 +737 +fast +##2c +services +##ope +omega +energy +##わ +pinkoi +1a +##なから +##rain +jackson +##ement +##シャンルの +374 +366 +そんな +p9 +rd +##ᆨ +1111 +##tier +##vic +zone +##│ +385 +690 +dl +isofix +cpa +m4 +322 +kimi +めて +davis +##lay +lulu +##uck +050 +weeks +qs +##hop +920 +##n +ae +##ear +~5 +eia +405 +##fly +korea +jpeg +boost +##ship +small +##リア +1860 +eur +297 +425 +valley +##iel +simple +##ude +rn +k2 +##ena +されます +non +patrick +しているから +##ナー +feed +5757 +30g +process +well +qqmei +##thing +they +aws +lu +pink +##ters +##kin +または +board +##vertisement +wine +##ien +unicode +##dge +r1 +359 +##tant +いを +##twitter +##3c +cool1 +される +##れて +##l +isp +##012 +standard +45㎡2 +402 +##150 +matt +##fu +326 +##iner +googlemsn +pixnetfacebookyahoo +##ラン +x7 +886 +##uce +メーカー +sao +##ev +##きました +##file +9678 +403 +xddd +shirt +6l +##rio +##hat +3mm +givenchy +ya +bang +##lio +monday +crystal +ロクイン +##abc +336 +head +890 +ubuntuforumwikilinuxpastechat +##vc +##~20 +##rity +cnc +7866 +ipv6 +null +1897 +##ost +yang +imsean +tiger +##fet +##ンス +352 +##= +dji +327 +ji +maria +##come +##んて +foundation +3100 +##beth +##なった +1m +601 +active +##aft +##don +3p +sr +349 +emma +##khz +living +415 +353 +1889 +341 +709 +457 +sas +x6 +##face +pptv +x4 +##mate +han +sophie +##jing +337 +fifa +##mand +other +sale +inwedding +##gn +てきちゃいます +##mmy +##pmlast +bad +nana +nbc +してみてくたさいね +なとはお +##wu +##かあります +##あ +note7 +single +##340 +せからこ +してくたさい♪この +しにはとんとんワークケートを +するとあなたにもっとマッチした +ならワークケートへ +もみつかっちゃうかも +ワークケートの +##bel +window +##dio +##ht +union +age +382 +14 +##ivity +##y +コメント +domain +neo +##isa +##lter +5k +f5 +steven +##cts +powerpoint +tft +self +g2 +ft +##テル +zol +##act +mwc +381 +343 +もう +nbapop +408 +てある +eds +ace +##room +previous +author +tomtom +il +##ets +hu +financial +☆☆☆ +っています +bp +5t +chi +1gb +##hg +fairmont +cross +008 +gay +h2 +function +##けて +356 +also +1b +625 +##ータ +##raph +1894 +3~5 +##ils +i3 +334 +avenue +##host +による +##bon +##tsu +message +navigation +50g +fintech +h6 +##ことを +8cm +##ject +##vas +##firm +credit +##wf +xxxx +form +##nor +##space +huawei +plan +json +sbl +##dc +machine +921 +392 +wish +##120 +##sol +windows7 +edward +##ために +development +washington +##nsis +lo +818 +##sio +##ym +##bor +planet +##~8 +##wt +ieee +gpa +##めて +camp +ann +gm +##tw +##oka +connect +##rss +##work +##atus +wall +chicken +soul +2mm +##times +fa +##ather +##cord +009 +##eep +hitachi +gui +harry +##pan +e1 +disney +##press +##ーション +wind +386 +frigidaire +##tl +liu +hsu +332 +basic +von +ev +いた +てきる +スホンサーサイト +learning +##ull +expedia +archives +change +##wei +santa +cut +ins +6gb +turbo +brand +cf1 +508 +004 +return +747 +##rip +h1 +##nis +##をこ +128gb +##にお +3t +application +しており +emc +rx +##oon +384 +quick +412 +15058 +wilson +wing +chapter +##bug +beyond +##cms +##dar +##oh +zoom +e2 +trip +sb +##nba +rcep +342 +aspx +ci +080 +gc +gnu +める +##count +advanced +dance +dv +##url +##ging +367 +8591 +am09 +shadow +battle +346 +##i +##cia +##という +emily +##のてす +##tation +host +ff +techorz +sars +##mini +##mporary +##ering +nc +4200 +798 +##next +cma +##mbps +##gas +##ift +##dot +##ィ +455 +##~17 +amana +##りの +426 +##ros +ir +00㎡1 +##eet +##ible +##↓ +710 +ˋ▽ˊ +##aka +dcs +iq +##v +l1 +##lor +maggie +##011 +##iu +588 +##~1 +830 +##gt +1tb +articles +create +##burg +##iki +database +fantasy +##rex +##cam +dlc +dean +##you +hard +path +gaming +victoria +maps +cb +##lee +##itor +overchicstoretvhome +systems +##xt +416 +p3 +sarah +760 +##nan +407 +486 +x9 +install +second +626 +##ann +##ph +##rcle +##nic +860 +##nar +ec +##とう +768 +metro +chocolate +##rian +~4 +##table +##しています +skin +##sn +395 +mountain +##0mm +inparadise +6m +7x24 +ib +4800 +##jia +eeworld +creative +g5 +g3 +357 +parker +ecfa +village +からの +18000 +sylvia +サーヒス +hbl +##ques +##onsored +##x2 +##きます +##v4 +##tein +ie6 +383 +##stack +389 +ver +##ads +##baby +sound +bbe +##110 +##lone +##uid +ads +022 +gundam +351 +thinkpad +006 +scrum +match +##ave +mems +##470 +##oy +##なりました +##talk +glass +lamigo +span +##eme +job +##a5 +jay +wade +kde +498 +##lace +ocean +tvg +##covery +##r3 +##ners +##rea +junior +think +##aine +cover +##ision +##sia +↓↓ +##bow +msi +413 +458 +406 +##love +711 +801 +soft +z2 +##pl +456 +1840 +mobil +mind +##uy +427 +nginx +##oi +めた +##rr +6221 +##mple +##sson +##ーシてす +371 +##nts +91tv +comhd +crv3000 +##uard +1868 +397 +deep +lost +field +gallery +##bia +rate +spf +redis +traction +930 +icloud +011 +なら +fe +jose +372 +##tory +into +sohu +fx +899 +379 +kicstart2 +##hia +すく +##~3 +##sit +ra +24 +##walk +##xure +500g +##pact +pacific +xa +natural +carlo +##250 +##walker +1850 +##can +cto +gigi +516 +##サー +pen +##hoo +ob +matlab +##b +##yy +13913459 +##iti +mango +##bbs +sense +c5 +oxford +##ニア +walker +jennifer +##ola +course +##bre +701 +##pus +##rder +lucky +075 +##ぁ +ivy +なお +##nia +sotheby +side +##ugh +joy +##orage +##ush +##bat +##dt +364 +r9 +##2d +##gio +511 +country +wear +##lax +##~7 +##moon +393 +seven +study +411 +348 +lonzo +8k +##ェ +evolution +##イフ +##kk +gs +kd +##レス +arduino +344 +b12 +##lux +arpg +##rdon +cook +##x5 +dark +five +##als +##ida +とても +sign +362 +##ちの +something +20mm +##nda +387 +##posted +fresh +tf +1870 +422 +cam +##mine +##skip +##form +##ssion +education +394 +##tee +dyson +stage +##jie +want +##night +epson +pack +あります +##ppy +テリヘル +##█ +wd +##eh +##rence +left +##lvin +golden +mhz +discovery +##trix +##n2 +loft +##uch +##dra +##sse +speed +~1 +1mdb +sorry +welcome +##urn +wave +gaga +##lmer +teddy +##160 +トラックハック +せよ +611 +##f2016 +378 +rp +##sha +rar +##あなたに +##きた +840 +holiday +##ュー +373 +074 +##vg +##nos +##rail +gartner +gi +6p +##dium +kit +488 +b3 +eco +##ろう +20g +sean +##stone +autocad +nu +##np +f16 +write +029 +m5 +##ias +images +atp +##dk +fsm +504 +1350 +ve +52kb +##xxx +##のに +##cake +414 +unit +lim +ru +1v +##ification +published +angela +16g +analytics +ak +##q +##nel +gmt +##icon +again +##₂ +##bby +ios11 +445 +かこさいます +waze +いてす +##ハ +9985 +##ust +##ティー +framework +##007 +iptv +delete +52sykb +cl +wwdc +027 +30cm +##fw +##ての +1389 +##xon +brandt +##ses +##dragon +tc +vetements +anne +monte +modern +official +##へて +##ere +##nne +##oud +もちろん +50 +etnews +##a2 +##graphy +421 +863 +##ちゃん +444 +##rtex +##てお +l2 +##gma +mount +ccd +たと +archive +morning +tan +ddos +e7 +##ホ +day4 +##ウ +gis +453 +its +495 +factory +bruce +pg +##ito +ってくたさい +guest +cdma +##lling +536 +n3 +しかし +3~4 +mega +eyes +ro +13 +women +dac +church +##jun +singapore +##facebook +6991 +starbucks +##tos +##stin +##shine +zen +##mu +tina +20℃ +1893 +##たけて +503 +465 +request +##gence +qt +##っ +1886 +347 +363 +q7 +##zzi +diary +##tore +409 +##ead +468 +cst +##osa +canada +agent +va +##jiang +##ちは +##ーク +##lam +sg +##nix +##sday +##よって +g6 +##master +bing +##zl +charlie +16 +8mm +nb40 +##ーン +thai +##ルフ +ln284ct +##itz +##2f +bonnie +##food +##lent +originals +##stro +##lts +418 +∟∣ +##bscribe +children +ntd +yesstyle +##かも +hmv +##tment +d5 +2cm +arts +sms +##pn +##я +##いい +topios9 +539 +lifestyle +virtual +##ague +xz +##deo +muji +024 +unt +##nnis +##ᅩ +faq1 +1884 +396 +##ette +fly +64㎡ +はしめまして +441 +curry +##pop +のこ +release +##← +##◆◆ +##cast +073 +ありな +500ml +##ews +5c +##stle +ios7 +##ima +787 +dog +lenovo +##r4 +roger +013 +cbs +vornado +100m +417 +##desk +##クok +##ald +1867 +9595 +2900 +##van +oil +##x +some +break +common +##jy +##lines +g7 +twice +419 +ella +nano +belle +にこ +##mes +##self +##note +jb +##ことかてきます +benz +##との +##ova +451 +save +##wing +##ますのて +kai +りは +##hua +##rect +rainer +##unge +448 +##0m +adsl +##かな +guestname +##uma +##kins +##zu +tokichoi +##price +county +##med +##mus +rmk +391 +address +vm +えて +openload +##group +##hin +##iginal +amg +urban +##oz +jobs +emi +##public +beautiful +##sch +album +##dden +##bell +jerry +works +hostel +miller +##drive +##rmin +##10 +376 +boot +828 +##370 +##fx +##cm~ +1885 +##nome +##ctionary +##oman +##lish +##cr +##hm +433 +##how +432 +francis +xi +c919 +b5 +evernote +##uc +vga +##3000 +coupe +##urg +##cca +##uality +019 +6g +れる +multi +##また +##ett +em +hey +##ani +##tax +##rma +inside +than +740 +leonnhurt +##jin +ict +れた +bird +notes +200mm +くの +##dical +##lli +result +442 +iu +ee +438 +smap +gopro +##last +yin +pure +998 +32g +けた +5kg +##dan +##rame +mama +##oot +bean +marketing +##hur +2l +bella +sync +xuite +##ground +515 +discuz +##getrelax +##ince +##bay +##5s +cj +##イス +gmat +apt +##pass +jing +##rix +c4 +rich +##とても +niusnews +##ello +bag +770 +##eting +##mobile +18 +culture +015 +##のてすか +377 +1020 +area +##ience +616 +details +gp +universal +silver +dit +はお +private +ddd +u11 +kanshu +##ified +fung +##nny +dx +##520 +tai +475 +023 +##fr +##lean +3s +##pin +429 +##rin +25000 +ly +rick +##bility +usb3 +banner +##baru +##gion +metal +dt +vdf +1871 +karl +qualcomm +bear +1010 +oldid +ian +jo +##tors +population +##ernel +1882 +mmorpg +##mv +##bike +603 +##© +ww +friend +##ager +exhibition +##del +##pods +fpx +structure +##free +##tings +kl +##rley +##copyright +##mma +california +3400 +orange +yoga +4l +canmake +honey +##anda +##コメント +595 +nikkie +##ルハイト +dhl +publishing +##mall +##gnet +20cm +513 +##クセス +##┅ +e88 +970 +##dog +fishbase +##! +##" +### +##$ +##% +##& +##' +##( +##) +##* +##+ +##, +##- +##. +##/ +##: +##; +##< +##= +##> +##? +##@ +##[ +##\ +##] +##^ +##_ +##{ +##| +##} +##~ +##£ +##¤ +##¥ +##§ +##« +##± +##³ +##µ +##· +##¹ +##º +##» +##¼ +##ß +##æ +##÷ +##ø +##đ +##ŋ +##ɔ +##ə +##ɡ +##ʰ +##ˇ +##ˈ +##ˊ +##ˋ +##ˍ +##ː +##˙ +##˚ +##ˢ +##α +##β +##γ +##δ +##ε +##η +##θ +##ι +##κ +##λ +##μ +##ν +##ο +##π +##ρ +##ς +##σ +##τ +##υ +##φ +##χ +##ψ +##б +##в +##г +##д +##е +##ж +##з +##к +##л +##м +##н +##о +##п +##р +##с +##т +##у +##ф +##х +##ц +##ч +##ш +##ы +##ь +##і +##ا +##ب +##ة +##ت +##د +##ر +##س +##ع +##ل +##م +##ن +##ه +##و +##ي +##۩ +##ก +##ง +##น +##ม +##ย +##ร +##อ +##า +##เ +##๑ +##་ +##ღ +##ᄀ +##ᄁ +##ᄂ +##ᄃ +##ᄅ +##ᄆ +##ᄇ +##ᄈ +##ᄉ +##ᄋ +##ᄌ +##ᄎ +##ᄏ +##ᄐ +##ᄑ +##ᄒ +##ᅢ +##ᅣ +##ᅥ +##ᅦ +##ᅧ +##ᅨ +##ᅪ +##ᅬ +##ᅭ +##ᅮ +##ᅯ +##ᅲ +##ᅳ +##ᅴ +##ᆷ +##ᆸ +##ᆺ +##ᆻ +##ᗜ +##ᵃ +##ᵉ +##ᵍ +##ᵏ +##ᵐ +##ᵒ +##ᵘ +##‖ +##„ +##† +##• +##‥ +##‧ +##
 +##‰ +##′ +##″ +##‹ +##› +##※ +##‿ +##⁄ +##ⁱ +##⁺ +##ⁿ +##₁ +##₃ +##₄ +##€ +##№ +##ⅰ +##ⅱ +##ⅲ +##ⅳ +##ⅴ +##↔ +##↗ +##↘ +##⇒ +##∀ +##− +##∕ +##∙ +##√ +##∞ +##∟ +##∠ +##∣ +##∩ +##∮ +##∶ +##∼ +##∽ +##≈ +##≒ +##≡ +##≤ +##≥ +##≦ +##≧ +##≪ +##≫ +##⊙ +##⋅ +##⋈ +##⋯ +##⌒ +##① +##② +##③ +##④ +##⑤ +##⑥ +##⑦ +##⑧ +##⑨ +##⑩ +##⑴ +##⑵ +##⑶ +##⑷ +##⑸ +##⒈ +##⒉ +##⒊ +##⒋ +##ⓒ +##ⓔ +##ⓘ +##━ +##┃ +##┆ +##┊ +##┌ +##└ +##├ +##┣ +##═ +##║ +##╚ +##╞ +##╠ +##╭ +##╮ +##╯ +##╰ +##╱ +##╳ +##▂ +##▃ +##▅ +##▇ +##▉ +##▋ +##▌ +##▍ +##▎ +##□ +##▪ +##▫ +##▬ +##△ +##▶ +##► +##▽ +##◇ +##◕ +##◠ +##◢ +##◤ +##☀ +##☕ +##☞ +##☺ +##☼ +##♀ +##♂ +##♠ +##♡ +##♣ +##♦ +##♫ +##♬ +##✈ +##✔ +##✕ +##✖ +##✦ +##✨ +##✪ +##✰ +##✿ +##❀ +##➜ +##➤ +##⦿ +##、 +##。 +##〃 +##々 +##〇 +##〈 +##〉 +##《 +##》 +##「 +##」 +##『 +##』 +##【 +##】 +##〓 +##〔 +##〕 +##〖 +##〗 +##〜 +##〝 +##〞 +##ぃ +##ぇ +##ぬ +##ふ +##ほ +##む +##ゃ +##ゅ +##ゆ +##ょ +##゜ +##ゝ +##ァ +##ゥ +##エ +##ォ +##ケ +##サ +##セ +##ソ +##ッ +##ニ +##ヌ +##ネ +##ノ +##ヘ +##モ +##ャ +##ヤ +##ュ +##ユ +##ョ +##ヨ +##ワ +##ヲ +##・ +##ヽ +##ㄅ +##ㄆ +##ㄇ +##ㄉ +##ㄋ +##ㄌ +##ㄍ +##ㄎ +##ㄏ +##ㄒ +##ㄚ +##ㄛ +##ㄞ +##ㄟ +##ㄢ +##ㄤ +##ㄥ +##ㄧ +##ㄨ +##ㆍ +##㈦ +##㊣ +##㗎 +##一 +##丁 +##七 +##万 +##丈 +##三 +##上 +##下 +##不 +##与 +##丐 +##丑 +##专 +##且 +##丕 +##世 +##丘 +##丙 +##业 +##丛 +##东 +##丝 +##丞 +##丟 +##両 +##丢 +##两 +##严 +##並 +##丧 +##丨 +##个 +##丫 +##中 +##丰 +##串 +##临 +##丶 +##丸 +##丹 +##为 +##主 +##丼 +##丽 +##举 +##丿 +##乂 +##乃 +##久 +##么 +##义 +##之 +##乌 +##乍 +##乎 +##乏 +##乐 +##乒 +##乓 +##乔 +##乖 +##乗 +##乘 +##乙 +##乜 +##九 +##乞 +##也 +##习 +##乡 +##书 +##乩 +##买 +##乱 +##乳 +##乾 +##亀 +##亂 +##了 +##予 +##争 +##事 +##二 +##于 +##亏 +##云 +##互 +##五 +##井 +##亘 +##亙 +##亚 +##些 +##亜 +##亞 +##亟 +##亡 +##亢 +##交 +##亥 +##亦 +##产 +##亨 +##亩 +##享 +##京 +##亭 +##亮 +##亲 +##亳 +##亵 +##人 +##亿 +##什 +##仁 +##仃 +##仄 +##仅 +##仆 +##仇 +##今 +##介 +##仍 +##从 +##仏 +##仑 +##仓 +##仔 +##仕 +##他 +##仗 +##付 +##仙 +##仝 +##仞 +##仟 +##代 +##令 +##以 +##仨 +##仪 +##们 +##仮 +##仰 +##仲 +##件 +##价 +##任 +##份 +##仿 +##企 +##伉 +##伊 +##伍 +##伎 +##伏 +##伐 +##休 +##伕 +##众 +##优 +##伙 +##会 +##伝 +##伞 +##伟 +##传 +##伢 +##伤 +##伦 +##伪 +##伫 +##伯 +##估 +##伴 +##伶 +##伸 +##伺 +##似 +##伽 +##佃 +##但 +##佇 +##佈 +##位 +##低 +##住 +##佐 +##佑 +##体 +##佔 +##何 +##佗 +##佘 +##余 +##佚 +##佛 +##作 +##佝 +##佞 +##佟 +##你 +##佢 +##佣 +##佤 +##佥 +##佩 +##佬 +##佯 +##佰 +##佳 +##併 +##佶 +##佻 +##佼 +##使 +##侃 +##侄 +##來 +##侈 +##例 +##侍 +##侏 +##侑 +##侖 +##侗 +##供 +##依 +##侠 +##価 +##侣 +##侥 +##侦 +##侧 +##侨 +##侬 +##侮 +##侯 +##侵 +##侶 +##侷 +##便 +##係 +##促 +##俄 +##俊 +##俎 +##俏 +##俐 +##俑 +##俗 +##俘 +##俚 +##保 +##俞 +##俟 +##俠 +##信 +##俨 +##俩 +##俪 +##俬 +##俭 +##修 +##俯 +##俱 +##俳 +##俸 +##俺 +##俾 +##倆 +##倉 +##個 +##倌 +##倍 +##倏 +##們 +##倒 +##倔 +##倖 +##倘 +##候 +##倚 +##倜 +##借 +##倡 +##値 +##倦 +##倩 +##倪 +##倫 +##倬 +##倭 +##倶 +##债 +##值 +##倾 +##偃 +##假 +##偈 +##偉 +##偌 +##偎 +##偏 +##偕 +##做 +##停 +##健 +##側 +##偵 +##偶 +##偷 +##偻 +##偽 +##偿 +##傀 +##傅 +##傍 +##傑 +##傘 +##備 +##傚 +##傢 +##傣 +##傥 +##储 +##傩 +##催 +##傭 +##傲 +##傳 +##債 +##傷 +##傻 +##傾 +##僅 +##働 +##像 +##僑 +##僕 +##僖 +##僚 +##僥 +##僧 +##僭 +##僮 +##僱 +##僵 +##價 +##僻 +##儀 +##儂 +##億 +##儆 +##儉 +##儋 +##儒 +##儕 +##儘 +##償 +##儡 +##優 +##儲 +##儷 +##儼 +##儿 +##兀 +##允 +##元 +##兄 +##充 +##兆 +##兇 +##先 +##光 +##克 +##兌 +##免 +##児 +##兑 +##兒 +##兔 +##兖 +##党 +##兜 +##兢 +##入 +##內 +##全 +##兩 +##八 +##公 +##六 +##兮 +##兰 +##共 +##兲 +##关 +##兴 +##兵 +##其 +##具 +##典 +##兹 +##养 +##兼 +##兽 +##冀 +##内 +##円 +##冇 +##冈 +##冉 +##冊 +##册 +##再 +##冏 +##冒 +##冕 +##冗 +##写 +##军 +##农 +##冠 +##冢 +##冤 +##冥 +##冨 +##冪 +##冬 +##冯 +##冰 +##冲 +##决 +##况 +##冶 +##冷 +##冻 +##冼 +##冽 +##冾 +##净 +##凄 +##准 +##凇 +##凈 +##凉 +##凋 +##凌 +##凍 +##减 +##凑 +##凛 +##凜 +##凝 +##几 +##凡 +##凤 +##処 +##凪 +##凭 +##凯 +##凰 +##凱 +##凳 +##凶 +##凸 +##凹 +##出 +##击 +##函 +##凿 +##刀 +##刁 +##刃 +##分 +##切 +##刈 +##刊 +##刍 +##刎 +##刑 +##划 +##列 +##刘 +##则 +##刚 +##创 +##初 +##删 +##判 +##別 +##刨 +##利 +##刪 +##别 +##刮 +##到 +##制 +##刷 +##券 +##刹 +##刺 +##刻 +##刽 +##剁 +##剂 +##剃 +##則 +##剉 +##削 +##剋 +##剌 +##前 +##剎 +##剐 +##剑 +##剔 +##剖 +##剛 +##剜 +##剝 +##剣 +##剤 +##剥 +##剧 +##剩 +##剪 +##副 +##割 +##創 +##剷 +##剽 +##剿 +##劃 +##劇 +##劈 +##劉 +##劊 +##劍 +##劏 +##劑 +##力 +##劝 +##办 +##功 +##加 +##务 +##劣 +##动 +##助 +##努 +##劫 +##劭 +##励 +##劲 +##劳 +##労 +##劵 +##効 +##劾 +##势 +##勁 +##勃 +##勇 +##勉 +##勋 +##勐 +##勒 +##動 +##勖 +##勘 +##務 +##勛 +##勝 +##勞 +##募 +##勢 +##勤 +##勧 +##勳 +##勵 +##勸 +##勺 +##勻 +##勾 +##勿 +##匀 +##包 +##匆 +##匈 +##匍 +##匐 +##匕 +##化 +##北 +##匙 +##匝 +##匠 +##匡 +##匣 +##匪 +##匮 +##匯 +##匱 +##匹 +##区 +##医 +##匾 +##匿 +##區 +##十 +##千 +##卅 +##升 +##午 +##卉 +##半 +##卍 +##华 +##协 +##卑 +##卒 +##卓 +##協 +##单 +##卖 +##南 +##単 +##博 +##卜 +##卞 +##卟 +##占 +##卡 +##卢 +##卤 +##卦 +##卧 +##卫 +##卮 +##卯 +##印 +##危 +##即 +##却 +##卵 +##卷 +##卸 +##卻 +##卿 +##厂 +##厄 +##厅 +##历 +##厉 +##压 +##厌 +##厕 +##厘 +##厚 +##厝 +##原 +##厢 +##厥 +##厦 +##厨 +##厩 +##厭 +##厮 +##厲 +##厳 +##去 +##县 +##叁 +##参 +##參 +##又 +##叉 +##及 +##友 +##双 +##反 +##収 +##发 +##叔 +##取 +##受 +##变 +##叙 +##叛 +##叟 +##叠 +##叡 +##叢 +##口 +##古 +##句 +##另 +##叨 +##叩 +##只 +##叫 +##召 +##叭 +##叮 +##可 +##台 +##叱 +##史 +##右 +##叵 +##叶 +##号 +##司 +##叹 +##叻 +##叼 +##叽 +##吁 +##吃 +##各 +##吆 +##合 +##吉 +##吊 +##吋 +##同 +##名 +##后 +##吏 +##吐 +##向 +##吒 +##吓 +##吕 +##吖 +##吗 +##君 +##吝 +##吞 +##吟 +##吠 +##吡 +##否 +##吧 +##吨 +##吩 +##含 +##听 +##吭 +##吮 +##启 +##吱 +##吳 +##吴 +##吵 +##吶 +##吸 +##吹 +##吻 +##吼 +##吽 +##吾 +##呀 +##呂 +##呃 +##呆 +##呈 +##告 +##呋 +##呎 +##呐 +##呓 +##呕 +##呗 +##员 +##呛 +##呜 +##呢 +##呤 +##呦 +##周 +##呱 +##呲 +##味 +##呵 +##呷 +##呸 +##呻 +##呼 +##命 +##咀 +##咁 +##咂 +##咄 +##咆 +##咋 +##和 +##咎 +##咏 +##咐 +##咒 +##咔 +##咕 +##咖 +##咗 +##咘 +##咙 +##咚 +##咛 +##咣 +##咤 +##咦 +##咧 +##咨 +##咩 +##咪 +##咫 +##咬 +##咭 +##咯 +##咱 +##咲 +##咳 +##咸 +##咻 +##咽 +##咿 +##哀 +##品 +##哂 +##哄 +##哆 +##哇 +##哈 +##哉 +##哋 +##哌 +##响 +##哎 +##哏 +##哐 +##哑 +##哒 +##哔 +##哗 +##哟 +##員 +##哥 +##哦 +##哧 +##哨 +##哩 +##哪 +##哭 +##哮 +##哲 +##哺 +##哼 +##哽 +##唁 +##唄 +##唆 +##唇 +##唉 +##唏 +##唐 +##唑 +##唔 +##唠 +##唤 +##唧 +##唬 +##售 +##唯 +##唰 +##唱 +##唳 +##唷 +##唸 +##唾 +##啃 +##啄 +##商 +##啉 +##啊 +##問 +##啓 +##啕 +##啖 +##啜 +##啞 +##啟 +##啡 +##啤 +##啥 +##啦 +##啧 +##啪 +##啫 +##啬 +##啮 +##啰 +##啱 +##啲 +##啵 +##啶 +##啷 +##啸 +##啻 +##啼 +##啾 +##喀 +##喂 +##喃 +##善 +##喆 +##喇 +##喉 +##喊 +##喋 +##喎 +##喏 +##喔 +##喘 +##喙 +##喚 +##喜 +##喝 +##喟 +##喧 +##喪 +##喫 +##喬 +##單 +##喰 +##喱 +##喲 +##喳 +##喵 +##営 +##喷 +##喹 +##喺 +##喻 +##喽 +##嗅 +##嗆 +##嗇 +##嗎 +##嗑 +##嗒 +##嗓 +##嗔 +##嗖 +##嗚 +##嗜 +##嗝 +##嗟 +##嗡 +##嗣 +##嗤 +##嗦 +##嗨 +##嗪 +##嗬 +##嗯 +##嗰 +##嗲 +##嗳 +##嗶 +##嗷 +##嗽 +##嘀 +##嘅 +##嘆 +##嘈 +##嘉 +##嘌 +##嘍 +##嘎 +##嘔 +##嘖 +##嘗 +##嘘 +##嘚 +##嘛 +##嘜 +##嘞 +##嘟 +##嘢 +##嘣 +##嘤 +##嘧 +##嘩 +##嘭 +##嘮 +##嘯 +##嘰 +##嘱 +##嘲 +##嘴 +##嘶 +##嘸 +##嘹 +##嘻 +##嘿 +##噁 +##噌 +##噎 +##噓 +##噔 +##噗 +##噙 +##噜 +##噠 +##噢 +##噤 +##器 +##噩 +##噪 +##噬 +##噱 +##噴 +##噶 +##噸 +##噹 +##噻 +##噼 +##嚀 +##嚇 +##嚎 +##嚏 +##嚐 +##嚓 +##嚕 +##嚟 +##嚣 +##嚥 +##嚨 +##嚮 +##嚴 +##嚷 +##嚼 +##囂 +##囉 +##囊 +##囍 +##囑 +##囔 +##囗 +##囚 +##四 +##囝 +##回 +##囟 +##因 +##囡 +##团 +##団 +##囤 +##囧 +##囪 +##囫 +##园 +##困 +##囱 +##囲 +##図 +##围 +##囹 +##固 +##国 +##图 +##囿 +##圃 +##圄 +##圆 +##圈 +##國 +##圍 +##圏 +##園 +##圓 +##圖 +##團 +##圜 +##土 +##圣 +##圧 +##在 +##圩 +##圭 +##地 +##圳 +##场 +##圻 +##圾 +##址 +##坂 +##均 +##坊 +##坍 +##坎 +##坏 +##坐 +##坑 +##块 +##坚 +##坛 +##坝 +##坞 +##坟 +##坠 +##坡 +##坤 +##坦 +##坨 +##坪 +##坯 +##坳 +##坵 +##坷 +##垂 +##垃 +##垄 +##型 +##垒 +##垚 +##垛 +##垠 +##垢 +##垣 +##垦 +##垩 +##垫 +##垭 +##垮 +##垵 +##埂 +##埃 +##埋 +##城 +##埔 +##埕 +##埗 +##域 +##埠 +##埤 +##埵 +##執 +##埸 +##培 +##基 +##埼 +##堀 +##堂 +##堃 +##堅 +##堆 +##堇 +##堑 +##堕 +##堙 +##堡 +##堤 +##堪 +##堯 +##堰 +##報 +##場 +##堵 +##堺 +##堿 +##塊 +##塌 +##塑 +##塔 +##塗 +##塘 +##塚 +##塞 +##塢 +##塩 +##填 +##塬 +##塭 +##塵 +##塾 +##墀 +##境 +##墅 +##墉 +##墊 +##墒 +##墓 +##増 +##墘 +##墙 +##墜 +##增 +##墟 +##墨 +##墩 +##墮 +##墳 +##墻 +##墾 +##壁 +##壅 +##壆 +##壇 +##壊 +##壑 +##壓 +##壕 +##壘 +##壞 +##壟 +##壢 +##壤 +##壩 +##士 +##壬 +##壮 +##壯 +##声 +##売 +##壳 +##壶 +##壹 +##壺 +##壽 +##处 +##备 +##変 +##复 +##夏 +##夔 +##夕 +##外 +##夙 +##多 +##夜 +##够 +##夠 +##夢 +##夥 +##大 +##天 +##太 +##夫 +##夭 +##央 +##夯 +##失 +##头 +##夷 +##夸 +##夹 +##夺 +##夾 +##奂 +##奄 +##奇 +##奈 +##奉 +##奋 +##奎 +##奏 +##奐 +##契 +##奔 +##奕 +##奖 +##套 +##奘 +##奚 +##奠 +##奢 +##奥 +##奧 +##奪 +##奬 +##奮 +##女 +##奴 +##奶 +##奸 +##她 +##好 +##如 +##妃 +##妄 +##妆 +##妇 +##妈 +##妊 +##妍 +##妒 +##妓 +##妖 +##妘 +##妙 +##妝 +##妞 +##妣 +##妤 +##妥 +##妨 +##妩 +##妪 +##妮 +##妲 +##妳 +##妹 +##妻 +##妾 +##姆 +##姉 +##姊 +##始 +##姍 +##姐 +##姑 +##姒 +##姓 +##委 +##姗 +##姚 +##姜 +##姝 +##姣 +##姥 +##姦 +##姨 +##姪 +##姫 +##姬 +##姹 +##姻 +##姿 +##威 +##娃 +##娄 +##娅 +##娆 +##娇 +##娉 +##娑 +##娓 +##娘 +##娛 +##娜 +##娟 +##娠 +##娣 +##娥 +##娩 +##娱 +##娲 +##娴 +##娶 +##娼 +##婀 +##婁 +##婆 +##婉 +##婊 +##婕 +##婚 +##婢 +##婦 +##婧 +##婪 +##婭 +##婴 +##婵 +##婶 +##婷 +##婺 +##婿 +##媒 +##媚 +##媛 +##媞 +##媧 +##媲 +##媳 +##媽 +##媾 +##嫁 +##嫂 +##嫉 +##嫌 +##嫑 +##嫔 +##嫖 +##嫘 +##嫚 +##嫡 +##嫣 +##嫦 +##嫩 +##嫲 +##嫵 +##嫻 +##嬅 +##嬉 +##嬌 +##嬗 +##嬛 +##嬢 +##嬤 +##嬪 +##嬰 +##嬴 +##嬷 +##嬸 +##嬿 +##孀 +##孃 +##子 +##孑 +##孔 +##孕 +##孖 +##字 +##存 +##孙 +##孚 +##孛 +##孜 +##孝 +##孟 +##孢 +##季 +##孤 +##学 +##孩 +##孪 +##孫 +##孬 +##孰 +##孱 +##孳 +##孵 +##學 +##孺 +##孽 +##孿 +##宁 +##它 +##宅 +##宇 +##守 +##安 +##宋 +##完 +##宏 +##宓 +##宕 +##宗 +##官 +##宙 +##定 +##宛 +##宜 +##宝 +##实 +##実 +##宠 +##审 +##客 +##宣 +##室 +##宥 +##宦 +##宪 +##宫 +##宮 +##宰 +##害 +##宴 +##宵 +##家 +##宸 +##容 +##宽 +##宾 +##宿 +##寂 +##寄 +##寅 +##密 +##寇 +##富 +##寐 +##寒 +##寓 +##寛 +##寝 +##寞 +##察 +##寡 +##寢 +##寥 +##實 +##寧 +##寨 +##審 +##寫 +##寬 +##寮 +##寰 +##寵 +##寶 +##寸 +##对 +##寺 +##寻 +##导 +##対 +##寿 +##封 +##専 +##射 +##将 +##將 +##專 +##尉 +##尊 +##尋 +##對 +##導 +##小 +##少 +##尔 +##尕 +##尖 +##尘 +##尚 +##尝 +##尤 +##尧 +##尬 +##就 +##尴 +##尷 +##尸 +##尹 +##尺 +##尻 +##尼 +##尽 +##尾 +##尿 +##局 +##屁 +##层 +##屄 +##居 +##屆 +##屈 +##屉 +##届 +##屋 +##屌 +##屍 +##屎 +##屏 +##屐 +##屑 +##展 +##屜 +##属 +##屠 +##屡 +##屢 +##層 +##履 +##屬 +##屯 +##山 +##屹 +##屿 +##岀 +##岁 +##岂 +##岌 +##岐 +##岑 +##岔 +##岖 +##岗 +##岘 +##岙 +##岚 +##岛 +##岡 +##岩 +##岫 +##岬 +##岭 +##岱 +##岳 +##岷 +##岸 +##峇 +##峋 +##峒 +##峙 +##峡 +##峤 +##峥 +##峦 +##峨 +##峪 +##峭 +##峯 +##峰 +##峴 +##島 +##峻 +##峽 +##崁 +##崂 +##崆 +##崇 +##崎 +##崑 +##崔 +##崖 +##崗 +##崙 +##崛 +##崧 +##崩 +##崭 +##崴 +##崽 +##嵇 +##嵊 +##嵋 +##嵌 +##嵐 +##嵘 +##嵩 +##嵬 +##嵯 +##嶂 +##嶄 +##嶇 +##嶋 +##嶙 +##嶺 +##嶼 +##嶽 +##巅 +##巍 +##巒 +##巔 +##巖 +##川 +##州 +##巡 +##巢 +##工 +##左 +##巧 +##巨 +##巩 +##巫 +##差 +##己 +##已 +##巳 +##巴 +##巷 +##巻 +##巽 +##巾 +##巿 +##币 +##市 +##布 +##帅 +##帆 +##师 +##希 +##帐 +##帑 +##帕 +##帖 +##帘 +##帚 +##帛 +##帜 +##帝 +##帥 +##带 +##帧 +##師 +##席 +##帮 +##帯 +##帰 +##帳 +##帶 +##帷 +##常 +##帼 +##帽 +##幀 +##幂 +##幄 +##幅 +##幌 +##幔 +##幕 +##幟 +##幡 +##幢 +##幣 +##幫 +##干 +##平 +##年 +##并 +##幸 +##幹 +##幺 +##幻 +##幼 +##幽 +##幾 +##广 +##庁 +##広 +##庄 +##庆 +##庇 +##床 +##序 +##庐 +##库 +##应 +##底 +##庖 +##店 +##庙 +##庚 +##府 +##庞 +##废 +##庠 +##度 +##座 +##庫 +##庭 +##庵 +##庶 +##康 +##庸 +##庹 +##庾 +##廁 +##廂 +##廃 +##廈 +##廉 +##廊 +##廓 +##廖 +##廚 +##廝 +##廟 +##廠 +##廢 +##廣 +##廬 +##廳 +##延 +##廷 +##建 +##廿 +##开 +##弁 +##异 +##弃 +##弄 +##弈 +##弊 +##弋 +##式 +##弑 +##弒 +##弓 +##弔 +##引 +##弗 +##弘 +##弛 +##弟 +##张 +##弥 +##弦 +##弧 +##弩 +##弭 +##弯 +##弱 +##張 +##強 +##弹 +##强 +##弼 +##弾 +##彅 +##彆 +##彈 +##彌 +##彎 +##归 +##当 +##录 +##彗 +##彙 +##彝 +##形 +##彤 +##彥 +##彦 +##彧 +##彩 +##彪 +##彫 +##彬 +##彭 +##彰 +##影 +##彷 +##役 +##彻 +##彼 +##彿 +##往 +##征 +##径 +##待 +##徇 +##很 +##徉 +##徊 +##律 +##後 +##徐 +##徑 +##徒 +##従 +##徕 +##得 +##徘 +##徙 +##徜 +##從 +##徠 +##御 +##徨 +##復 +##循 +##徬 +##微 +##徳 +##徴 +##徵 +##德 +##徹 +##徼 +##徽 +##心 +##必 +##忆 +##忌 +##忍 +##忏 +##忐 +##忑 +##忒 +##忖 +##志 +##忘 +##忙 +##応 +##忠 +##忡 +##忤 +##忧 +##忪 +##快 +##忱 +##念 +##忻 +##忽 +##忿 +##怀 +##态 +##怂 +##怅 +##怆 +##怎 +##怏 +##怒 +##怔 +##怕 +##怖 +##怙 +##怜 +##思 +##怠 +##怡 +##急 +##怦 +##性 +##怨 +##怪 +##怯 +##怵 +##总 +##怼 +##恁 +##恃 +##恆 +##恋 +##恍 +##恐 +##恒 +##恕 +##恙 +##恚 +##恢 +##恣 +##恤 +##恥 +##恨 +##恩 +##恪 +##恫 +##恬 +##恭 +##息 +##恰 +##恳 +##恵 +##恶 +##恸 +##恺 +##恻 +##恼 +##恿 +##悄 +##悅 +##悉 +##悌 +##悍 +##悔 +##悖 +##悚 +##悟 +##悠 +##患 +##悦 +##您 +##悩 +##悪 +##悬 +##悯 +##悱 +##悲 +##悴 +##悵 +##悶 +##悸 +##悻 +##悼 +##悽 +##情 +##惆 +##惇 +##惊 +##惋 +##惑 +##惕 +##惘 +##惚 +##惜 +##惟 +##惠 +##惡 +##惦 +##惧 +##惨 +##惩 +##惫 +##惬 +##惭 +##惮 +##惯 +##惰 +##惱 +##想 +##惴 +##惶 +##惹 +##惺 +##愁 +##愆 +##愈 +##愉 +##愍 +##意 +##愕 +##愚 +##愛 +##愜 +##感 +##愣 +##愤 +##愧 +##愫 +##愷 +##愿 +##慄 +##慈 +##態 +##慌 +##慎 +##慑 +##慕 +##慘 +##慚 +##慟 +##慢 +##慣 +##慧 +##慨 +##慫 +##慮 +##慰 +##慳 +##慵 +##慶 +##慷 +##慾 +##憂 +##憊 +##憋 +##憎 +##憐 +##憑 +##憔 +##憚 +##憤 +##憧 +##憨 +##憩 +##憫 +##憬 +##憲 +##憶 +##憾 +##懂 +##懇 +##懈 +##應 +##懊 +##懋 +##懑 +##懒 +##懦 +##懲 +##懵 +##懶 +##懷 +##懸 +##懺 +##懼 +##懾 +##懿 +##戀 +##戈 +##戊 +##戌 +##戍 +##戎 +##戏 +##成 +##我 +##戒 +##戕 +##或 +##战 +##戚 +##戛 +##戟 +##戡 +##戦 +##截 +##戬 +##戮 +##戰 +##戲 +##戳 +##戴 +##戶 +##户 +##戸 +##戻 +##戾 +##房 +##所 +##扁 +##扇 +##扈 +##扉 +##手 +##才 +##扎 +##扑 +##扒 +##打 +##扔 +##払 +##托 +##扛 +##扣 +##扦 +##执 +##扩 +##扪 +##扫 +##扬 +##扭 +##扮 +##扯 +##扰 +##扱 +##扳 +##扶 +##批 +##扼 +##找 +##承 +##技 +##抄 +##抉 +##把 +##抑 +##抒 +##抓 +##投 +##抖 +##抗 +##折 +##抚 +##抛 +##抜 +##択 +##抟 +##抠 +##抡 +##抢 +##护 +##报 +##抨 +##披 +##抬 +##抱 +##抵 +##抹 +##押 +##抽 +##抿 +##拂 +##拄 +##担 +##拆 +##拇 +##拈 +##拉 +##拋 +##拌 +##拍 +##拎 +##拐 +##拒 +##拓 +##拔 +##拖 +##拗 +##拘 +##拙 +##拚 +##招 +##拜 +##拟 +##拡 +##拢 +##拣 +##拥 +##拦 +##拧 +##拨 +##择 +##括 +##拭 +##拮 +##拯 +##拱 +##拳 +##拴 +##拷 +##拼 +##拽 +##拾 +##拿 +##持 +##挂 +##指 +##挈 +##按 +##挎 +##挑 +##挖 +##挙 +##挚 +##挛 +##挝 +##挞 +##挟 +##挠 +##挡 +##挣 +##挤 +##挥 +##挨 +##挪 +##挫 +##振 +##挲 +##挹 +##挺 +##挽 +##挾 +##捂 +##捅 +##捆 +##捉 +##捋 +##捌 +##捍 +##捎 +##捏 +##捐 +##捕 +##捞 +##损 +##捡 +##换 +##捣 +##捧 +##捨 +##捩 +##据 +##捱 +##捲 +##捶 +##捷 +##捺 +##捻 +##掀 +##掂 +##掃 +##掇 +##授 +##掉 +##掌 +##掏 +##掐 +##排 +##掖 +##掘 +##掙 +##掛 +##掠 +##採 +##探 +##掣 +##接 +##控 +##推 +##掩 +##措 +##掬 +##掰 +##掲 +##掳 +##掴 +##掷 +##掸 +##掺 +##揀 +##揃 +##揄 +##揆 +##揉 +##揍 +##描 +##提 +##插 +##揖 +##揚 +##換 +##握 +##揣 +##揩 +##揪 +##揭 +##揮 +##援 +##揶 +##揸 +##揹 +##揽 +##搀 +##搁 +##搂 +##搅 +##損 +##搏 +##搐 +##搓 +##搔 +##搖 +##搗 +##搜 +##搞 +##搡 +##搪 +##搬 +##搭 +##搵 +##搶 +##携 +##搽 +##摀 +##摁 +##摄 +##摆 +##摇 +##摈 +##摊 +##摒 +##摔 +##摘 +##摞 +##摟 +##摧 +##摩 +##摯 +##摳 +##摸 +##摹 +##摺 +##摻 +##撂 +##撃 +##撅 +##撇 +##撈 +##撐 +##撑 +##撒 +##撓 +##撕 +##撚 +##撞 +##撤 +##撥 +##撩 +##撫 +##撬 +##播 +##撮 +##撰 +##撲 +##撵 +##撷 +##撸 +##撻 +##撼 +##撿 +##擀 +##擁 +##擂 +##擄 +##擅 +##擇 +##擊 +##擋 +##操 +##擎 +##擒 +##擔 +##擘 +##據 +##擞 +##擠 +##擡 +##擢 +##擦 +##擬 +##擰 +##擱 +##擲 +##擴 +##擷 +##擺 +##擼 +##擾 +##攀 +##攏 +##攒 +##攔 +##攘 +##攙 +##攜 +##攝 +##攞 +##攢 +##攣 +##攤 +##攥 +##攪 +##攫 +##攬 +##支 +##收 +##攸 +##改 +##攻 +##放 +##政 +##故 +##效 +##敌 +##敍 +##敎 +##敏 +##救 +##敕 +##敖 +##敗 +##敘 +##教 +##敛 +##敝 +##敞 +##敢 +##散 +##敦 +##敬 +##数 +##敲 +##整 +##敵 +##敷 +##數 +##斂 +##斃 +##文 +##斋 +##斌 +##斎 +##斐 +##斑 +##斓 +##斗 +##料 +##斛 +##斜 +##斟 +##斡 +##斤 +##斥 +##斧 +##斩 +##斫 +##斬 +##断 +##斯 +##新 +##斷 +##方 +##於 +##施 +##旁 +##旃 +##旅 +##旋 +##旌 +##旎 +##族 +##旖 +##旗 +##无 +##既 +##日 +##旦 +##旧 +##旨 +##早 +##旬 +##旭 +##旮 +##旱 +##时 +##旷 +##旺 +##旻 +##昀 +##昂 +##昆 +##昇 +##昉 +##昊 +##昌 +##明 +##昏 +##易 +##昔 +##昕 +##昙 +##星 +##映 +##春 +##昧 +##昨 +##昭 +##是 +##昱 +##昴 +##昵 +##昶 +##昼 +##显 +##晁 +##時 +##晃 +##晉 +##晋 +##晌 +##晏 +##晒 +##晓 +##晔 +##晕 +##晖 +##晗 +##晚 +##晝 +##晞 +##晟 +##晤 +##晦 +##晨 +##晩 +##普 +##景 +##晰 +##晴 +##晶 +##晷 +##智 +##晾 +##暂 +##暄 +##暇 +##暈 +##暉 +##暌 +##暐 +##暑 +##暖 +##暗 +##暝 +##暢 +##暧 +##暨 +##暫 +##暮 +##暱 +##暴 +##暸 +##暹 +##曄 +##曆 +##曇 +##曉 +##曖 +##曙 +##曜 +##曝 +##曠 +##曦 +##曬 +##曰 +##曲 +##曳 +##更 +##書 +##曹 +##曼 +##曾 +##替 +##最 +##會 +##月 +##有 +##朋 +##服 +##朐 +##朔 +##朕 +##朗 +##望 +##朝 +##期 +##朦 +##朧 +##木 +##未 +##末 +##本 +##札 +##朮 +##术 +##朱 +##朴 +##朵 +##机 +##朽 +##杀 +##杂 +##权 +##杆 +##杈 +##杉 +##李 +##杏 +##材 +##村 +##杓 +##杖 +##杜 +##杞 +##束 +##杠 +##条 +##来 +##杨 +##杭 +##杯 +##杰 +##東 +##杳 +##杵 +##杷 +##杼 +##松 +##板 +##极 +##构 +##枇 +##枉 +##枋 +##析 +##枕 +##林 +##枚 +##果 +##枝 +##枢 +##枣 +##枪 +##枫 +##枭 +##枯 +##枰 +##枱 +##枳 +##架 +##枷 +##枸 +##柄 +##柏 +##某 +##柑 +##柒 +##染 +##柔 +##柘 +##柚 +##柜 +##柞 +##柠 +##柢 +##查 +##柩 +##柬 +##柯 +##柱 +##柳 +##柴 +##柵 +##査 +##柿 +##栀 +##栃 +##栄 +##栅 +##标 +##栈 +##栉 +##栋 +##栎 +##栏 +##树 +##栓 +##栖 +##栗 +##校 +##栩 +##株 +##样 +##核 +##根 +##格 +##栽 +##栾 +##桀 +##桁 +##桂 +##桃 +##桅 +##框 +##案 +##桉 +##桌 +##桎 +##桐 +##桑 +##桓 +##桔 +##桜 +##桠 +##桡 +##桢 +##档 +##桥 +##桦 +##桧 +##桨 +##桩 +##桶 +##桿 +##梁 +##梅 +##梆 +##梏 +##梓 +##梗 +##條 +##梟 +##梢 +##梦 +##梧 +##梨 +##梭 +##梯 +##械 +##梳 +##梵 +##梶 +##检 +##棂 +##棄 +##棉 +##棋 +##棍 +##棒 +##棕 +##棗 +##棘 +##棚 +##棟 +##棠 +##棣 +##棧 +##森 +##棱 +##棲 +##棵 +##棹 +##棺 +##椁 +##椅 +##椋 +##植 +##椎 +##椒 +##検 +##椪 +##椭 +##椰 +##椹 +##椽 +##椿 +##楂 +##楊 +##楓 +##楔 +##楚 +##楝 +##楞 +##楠 +##楣 +##楨 +##楫 +##業 +##楮 +##極 +##楷 +##楸 +##楹 +##楼 +##楽 +##概 +##榄 +##榆 +##榈 +##榉 +##榔 +##榕 +##榖 +##榛 +##榜 +##榨 +##榫 +##榭 +##榮 +##榱 +##榴 +##榷 +##榻 +##槁 +##槃 +##構 +##槌 +##槍 +##槎 +##槐 +##槓 +##様 +##槛 +##槟 +##槤 +##槭 +##槲 +##槳 +##槻 +##槽 +##槿 +##樁 +##樂 +##樊 +##樑 +##樓 +##標 +##樞 +##樟 +##模 +##樣 +##権 +##横 +##樫 +##樯 +##樱 +##樵 +##樸 +##樹 +##樺 +##樽 +##樾 +##橄 +##橇 +##橋 +##橐 +##橘 +##橙 +##機 +##橡 +##橢 +##橫 +##橱 +##橹 +##橼 +##檀 +##檄 +##檎 +##檐 +##檔 +##檗 +##檜 +##檢 +##檬 +##檯 +##檳 +##檸 +##檻 +##櫃 +##櫚 +##櫛 +##櫥 +##櫸 +##櫻 +##欄 +##權 +##欒 +##欖 +##欠 +##次 +##欢 +##欣 +##欧 +##欲 +##欸 +##欺 +##欽 +##款 +##歆 +##歇 +##歉 +##歌 +##歎 +##歐 +##歓 +##歙 +##歛 +##歡 +##止 +##正 +##此 +##步 +##武 +##歧 +##歩 +##歪 +##歯 +##歲 +##歳 +##歴 +##歷 +##歸 +##歹 +##死 +##歼 +##殁 +##殃 +##殆 +##殇 +##殉 +##殊 +##残 +##殒 +##殓 +##殖 +##殘 +##殞 +##殡 +##殤 +##殭 +##殯 +##殲 +##殴 +##段 +##殷 +##殺 +##殼 +##殿 +##毀 +##毁 +##毂 +##毅 +##毆 +##毋 +##母 +##毎 +##每 +##毒 +##毓 +##比 +##毕 +##毗 +##毘 +##毙 +##毛 +##毡 +##毫 +##毯 +##毽 +##氈 +##氏 +##氐 +##民 +##氓 +##气 +##氖 +##気 +##氙 +##氛 +##氟 +##氡 +##氢 +##氣 +##氤 +##氦 +##氧 +##氨 +##氪 +##氫 +##氮 +##氯 +##氰 +##氲 +##水 +##氷 +##永 +##氹 +##氾 +##汀 +##汁 +##求 +##汆 +##汇 +##汉 +##汎 +##汐 +##汕 +##汗 +##汙 +##汛 +##汝 +##汞 +##江 +##池 +##污 +##汤 +##汨 +##汩 +##汪 +##汰 +##汲 +##汴 +##汶 +##汹 +##決 +##汽 +##汾 +##沁 +##沂 +##沃 +##沅 +##沈 +##沉 +##沌 +##沏 +##沐 +##沒 +##沓 +##沖 +##沙 +##沛 +##沟 +##没 +##沢 +##沣 +##沥 +##沦 +##沧 +##沪 +##沫 +##沭 +##沮 +##沱 +##河 +##沸 +##油 +##治 +##沼 +##沽 +##沾 +##沿 +##況 +##泄 +##泉 +##泊 +##泌 +##泓 +##法 +##泗 +##泛 +##泞 +##泠 +##泡 +##波 +##泣 +##泥 +##注 +##泪 +##泫 +##泮 +##泯 +##泰 +##泱 +##泳 +##泵 +##泷 +##泸 +##泻 +##泼 +##泽 +##泾 +##洁 +##洄 +##洋 +##洒 +##洗 +##洙 +##洛 +##洞 +##津 +##洩 +##洪 +##洮 +##洱 +##洲 +##洵 +##洶 +##洸 +##洹 +##活 +##洼 +##洽 +##派 +##流 +##浃 +##浄 +##浅 +##浆 +##浇 +##浊 +##测 +##济 +##浏 +##浑 +##浒 +##浓 +##浔 +##浙 +##浚 +##浜 +##浣 +##浦 +##浩 +##浪 +##浬 +##浮 +##浯 +##浴 +##海 +##浸 +##涂 +##涅 +##涇 +##消 +##涉 +##涌 +##涎 +##涓 +##涔 +##涕 +##涙 +##涛 +##涝 +##涞 +##涟 +##涠 +##涡 +##涣 +##涤 +##润 +##涧 +##涨 +##涩 +##涪 +##涮 +##涯 +##液 +##涵 +##涸 +##涼 +##涿 +##淀 +##淄 +##淅 +##淆 +##淇 +##淋 +##淌 +##淑 +##淒 +##淖 +##淘 +##淙 +##淚 +##淞 +##淡 +##淤 +##淦 +##淨 +##淩 +##淪 +##淫 +##淬 +##淮 +##深 +##淳 +##淵 +##混 +##淹 +##淺 +##添 +##淼 +##清 +##済 +##渉 +##渊 +##渋 +##渍 +##渎 +##渐 +##渔 +##渗 +##渙 +##渚 +##減 +##渝 +##渠 +##渡 +##渣 +##渤 +##渥 +##渦 +##温 +##測 +##渭 +##港 +##渲 +##渴 +##游 +##渺 +##渾 +##湃 +##湄 +##湊 +##湍 +##湖 +##湘 +##湛 +##湟 +##湧 +##湫 +##湮 +##湯 +##湳 +##湾 +##湿 +##満 +##溃 +##溅 +##溉 +##溏 +##源 +##準 +##溜 +##溝 +##溟 +##溢 +##溥 +##溧 +##溪 +##溫 +##溯 +##溱 +##溴 +##溶 +##溺 +##溼 +##滁 +##滂 +##滄 +##滅 +##滇 +##滋 +##滌 +##滑 +##滓 +##滔 +##滕 +##滙 +##滚 +##滝 +##滞 +##滟 +##满 +##滢 +##滤 +##滥 +##滦 +##滨 +##滩 +##滬 +##滯 +##滲 +##滴 +##滷 +##滸 +##滾 +##滿 +##漁 +##漂 +##漆 +##漉 +##漏 +##漓 +##演 +##漕 +##漠 +##漢 +##漣 +##漩 +##漪 +##漫 +##漬 +##漯 +##漱 +##漲 +##漳 +##漸 +##漾 +##漿 +##潆 +##潇 +##潋 +##潍 +##潑 +##潔 +##潘 +##潛 +##潜 +##潞 +##潟 +##潢 +##潤 +##潦 +##潧 +##潭 +##潮 +##潰 +##潴 +##潸 +##潺 +##潼 +##澀 +##澄 +##澆 +##澈 +##澍 +##澎 +##澗 +##澜 +##澡 +##澤 +##澧 +##澱 +##澳 +##澹 +##激 +##濁 +##濂 +##濃 +##濑 +##濒 +##濕 +##濘 +##濛 +##濟 +##濠 +##濡 +##濤 +##濫 +##濬 +##濮 +##濯 +##濱 +##濺 +##濾 +##瀅 +##瀆 +##瀉 +##瀋 +##瀏 +##瀑 +##瀕 +##瀘 +##瀚 +##瀛 +##瀝 +##瀞 +##瀟 +##瀧 +##瀨 +##瀬 +##瀰 +##瀾 +##灌 +##灏 +##灑 +##灘 +##灝 +##灞 +##灣 +##火 +##灬 +##灭 +##灯 +##灰 +##灵 +##灶 +##灸 +##灼 +##災 +##灾 +##灿 +##炀 +##炁 +##炅 +##炉 +##炊 +##炎 +##炒 +##炔 +##炕 +##炖 +##炙 +##炜 +##炫 +##炬 +##炭 +##炮 +##炯 +##炳 +##炷 +##炸 +##点 +##為 +##炼 +##炽 +##烁 +##烂 +##烃 +##烈 +##烊 +##烏 +##烘 +##烙 +##烛 +##烟 +##烤 +##烦 +##烧 +##烨 +##烩 +##烫 +##烬 +##热 +##烯 +##烷 +##烹 +##烽 +##焉 +##焊 +##焕 +##焖 +##焗 +##焘 +##焙 +##焚 +##焜 +##無 +##焦 +##焯 +##焰 +##焱 +##然 +##焼 +##煅 +##煉 +##煊 +##煌 +##煎 +##煒 +##煖 +##煙 +##煜 +##煞 +##煤 +##煥 +##煦 +##照 +##煨 +##煩 +##煮 +##煲 +##煸 +##煽 +##熄 +##熊 +##熏 +##熒 +##熔 +##熙 +##熟 +##熠 +##熨 +##熬 +##熱 +##熵 +##熹 +##熾 +##燁 +##燃 +##燄 +##燈 +##燉 +##燊 +##燎 +##燒 +##燔 +##燕 +##燙 +##燜 +##營 +##燥 +##燦 +##燧 +##燭 +##燮 +##燴 +##燻 +##燼 +##燿 +##爆 +##爍 +##爐 +##爛 +##爪 +##爬 +##爭 +##爰 +##爱 +##爲 +##爵 +##父 +##爷 +##爸 +##爹 +##爺 +##爻 +##爽 +##爾 +##牆 +##片 +##版 +##牌 +##牍 +##牒 +##牙 +##牛 +##牝 +##牟 +##牠 +##牡 +##牢 +##牦 +##牧 +##物 +##牯 +##牲 +##牴 +##牵 +##特 +##牺 +##牽 +##犀 +##犁 +##犄 +##犊 +##犍 +##犒 +##犢 +##犧 +##犬 +##犯 +##状 +##犷 +##犸 +##犹 +##狀 +##狂 +##狄 +##狈 +##狎 +##狐 +##狒 +##狗 +##狙 +##狞 +##狠 +##狡 +##狩 +##独 +##狭 +##狮 +##狰 +##狱 +##狸 +##狹 +##狼 +##狽 +##猎 +##猕 +##猖 +##猗 +##猙 +##猛 +##猜 +##猝 +##猥 +##猩 +##猪 +##猫 +##猬 +##献 +##猴 +##猶 +##猷 +##猾 +##猿 +##獄 +##獅 +##獎 +##獐 +##獒 +##獗 +##獠 +##獣 +##獨 +##獭 +##獰 +##獲 +##獵 +##獷 +##獸 +##獺 +##獻 +##獼 +##獾 +##玄 +##率 +##玉 +##王 +##玑 +##玖 +##玛 +##玟 +##玠 +##玥 +##玩 +##玫 +##玮 +##环 +##现 +##玲 +##玳 +##玷 +##玺 +##玻 +##珀 +##珂 +##珅 +##珈 +##珉 +##珊 +##珍 +##珏 +##珐 +##珑 +##珙 +##珞 +##珠 +##珣 +##珥 +##珩 +##珪 +##班 +##珮 +##珲 +##珺 +##現 +##球 +##琅 +##理 +##琇 +##琉 +##琊 +##琍 +##琏 +##琐 +##琛 +##琢 +##琥 +##琦 +##琨 +##琪 +##琬 +##琮 +##琰 +##琲 +##琳 +##琴 +##琵 +##琶 +##琺 +##琼 +##瑀 +##瑁 +##瑄 +##瑋 +##瑕 +##瑗 +##瑙 +##瑚 +##瑛 +##瑜 +##瑞 +##瑟 +##瑠 +##瑣 +##瑤 +##瑩 +##瑪 +##瑯 +##瑰 +##瑶 +##瑾 +##璀 +##璁 +##璃 +##璇 +##璉 +##璋 +##璎 +##璐 +##璜 +##璞 +##璟 +##璧 +##璨 +##環 +##璽 +##璿 +##瓊 +##瓏 +##瓒 +##瓜 +##瓢 +##瓣 +##瓤 +##瓦 +##瓮 +##瓯 +##瓴 +##瓶 +##瓷 +##甄 +##甌 +##甕 +##甘 +##甙 +##甚 +##甜 +##生 +##產 +##産 +##甥 +##甦 +##用 +##甩 +##甫 +##甬 +##甭 +##甯 +##田 +##由 +##甲 +##申 +##电 +##男 +##甸 +##町 +##画 +##甾 +##畀 +##畅 +##界 +##畏 +##畑 +##畔 +##留 +##畜 +##畝 +##畢 +##略 +##畦 +##番 +##畫 +##異 +##畲 +##畳 +##畴 +##當 +##畸 +##畹 +##畿 +##疆 +##疇 +##疊 +##疏 +##疑 +##疔 +##疖 +##疗 +##疙 +##疚 +##疝 +##疟 +##疡 +##疣 +##疤 +##疥 +##疫 +##疮 +##疯 +##疱 +##疲 +##疳 +##疵 +##疸 +##疹 +##疼 +##疽 +##疾 +##痂 +##病 +##症 +##痈 +##痉 +##痊 +##痍 +##痒 +##痔 +##痕 +##痘 +##痙 +##痛 +##痞 +##痠 +##痢 +##痣 +##痤 +##痧 +##痨 +##痪 +##痫 +##痰 +##痱 +##痴 +##痹 +##痺 +##痼 +##痿 +##瘀 +##瘁 +##瘋 +##瘍 +##瘓 +##瘘 +##瘙 +##瘟 +##瘠 +##瘡 +##瘢 +##瘤 +##瘦 +##瘧 +##瘩 +##瘪 +##瘫 +##瘴 +##瘸 +##瘾 +##療 +##癇 +##癌 +##癒 +##癖 +##癜 +##癞 +##癡 +##癢 +##癣 +##癥 +##癫 +##癬 +##癮 +##癱 +##癲 +##癸 +##発 +##登 +##發 +##白 +##百 +##皂 +##的 +##皆 +##皇 +##皈 +##皋 +##皎 +##皑 +##皓 +##皖 +##皙 +##皚 +##皮 +##皰 +##皱 +##皴 +##皺 +##皿 +##盂 +##盃 +##盅 +##盆 +##盈 +##益 +##盎 +##盏 +##盐 +##监 +##盒 +##盔 +##盖 +##盗 +##盘 +##盛 +##盜 +##盞 +##盟 +##盡 +##監 +##盤 +##盥 +##盧 +##盪 +##目 +##盯 +##盱 +##盲 +##直 +##相 +##盹 +##盼 +##盾 +##省 +##眈 +##眉 +##看 +##県 +##眙 +##眞 +##真 +##眠 +##眦 +##眨 +##眩 +##眯 +##眶 +##眷 +##眸 +##眺 +##眼 +##眾 +##着 +##睁 +##睇 +##睏 +##睐 +##睑 +##睛 +##睜 +##睞 +##睡 +##睢 +##督 +##睥 +##睦 +##睨 +##睪 +##睫 +##睬 +##睹 +##睽 +##睾 +##睿 +##瞄 +##瞅 +##瞇 +##瞋 +##瞌 +##瞎 +##瞑 +##瞒 +##瞓 +##瞞 +##瞟 +##瞠 +##瞥 +##瞧 +##瞩 +##瞪 +##瞬 +##瞭 +##瞰 +##瞳 +##瞻 +##瞼 +##瞿 +##矇 +##矍 +##矗 +##矚 +##矛 +##矜 +##矢 +##矣 +##知 +##矩 +##矫 +##短 +##矮 +##矯 +##石 +##矶 +##矽 +##矾 +##矿 +##码 +##砂 +##砌 +##砍 +##砒 +##研 +##砖 +##砗 +##砚 +##砝 +##砣 +##砥 +##砧 +##砭 +##砰 +##砲 +##破 +##砷 +##砸 +##砺 +##砼 +##砾 +##础 +##硅 +##硐 +##硒 +##硕 +##硝 +##硫 +##硬 +##确 +##硯 +##硼 +##碁 +##碇 +##碉 +##碌 +##碍 +##碎 +##碑 +##碓 +##碗 +##碘 +##碚 +##碛 +##碟 +##碣 +##碧 +##碩 +##碰 +##碱 +##碳 +##碴 +##確 +##碼 +##碾 +##磁 +##磅 +##磊 +##磋 +##磐 +##磕 +##磚 +##磡 +##磨 +##磬 +##磯 +##磲 +##磷 +##磺 +##礁 +##礎 +##礙 +##礡 +##礦 +##礪 +##礫 +##礴 +##示 +##礼 +##社 +##祀 +##祁 +##祂 +##祇 +##祈 +##祉 +##祎 +##祐 +##祕 +##祖 +##祗 +##祚 +##祛 +##祜 +##祝 +##神 +##祟 +##祠 +##祢 +##祥 +##票 +##祭 +##祯 +##祷 +##祸 +##祺 +##祿 +##禀 +##禁 +##禄 +##禅 +##禍 +##禎 +##福 +##禛 +##禦 +##禧 +##禪 +##禮 +##禱 +##禹 +##禺 +##离 +##禽 +##禾 +##禿 +##秀 +##私 +##秃 +##秆 +##秉 +##秋 +##种 +##科 +##秒 +##秘 +##租 +##秣 +##秤 +##秦 +##秧 +##秩 +##秭 +##积 +##称 +##秸 +##移 +##秽 +##稀 +##稅 +##程 +##稍 +##税 +##稔 +##稗 +##稚 +##稜 +##稞 +##稟 +##稠 +##稣 +##種 +##稱 +##稲 +##稳 +##稷 +##稹 +##稻 +##稼 +##稽 +##稿 +##穀 +##穂 +##穆 +##穌 +##積 +##穎 +##穗 +##穢 +##穩 +##穫 +##穴 +##究 +##穷 +##穹 +##空 +##穿 +##突 +##窃 +##窄 +##窈 +##窍 +##窑 +##窒 +##窓 +##窕 +##窖 +##窗 +##窘 +##窜 +##窝 +##窟 +##窠 +##窥 +##窦 +##窨 +##窩 +##窪 +##窮 +##窯 +##窺 +##窿 +##竄 +##竅 +##竇 +##竊 +##立 +##竖 +##站 +##竜 +##竞 +##竟 +##章 +##竣 +##童 +##竭 +##端 +##競 +##竹 +##竺 +##竽 +##竿 +##笃 +##笆 +##笈 +##笋 +##笏 +##笑 +##笔 +##笙 +##笛 +##笞 +##笠 +##符 +##笨 +##第 +##笹 +##笺 +##笼 +##筆 +##等 +##筊 +##筋 +##筍 +##筏 +##筐 +##筑 +##筒 +##答 +##策 +##筛 +##筝 +##筠 +##筱 +##筲 +##筵 +##筷 +##筹 +##签 +##简 +##箇 +##箋 +##箍 +##箏 +##箐 +##箔 +##箕 +##算 +##箝 +##管 +##箩 +##箫 +##箭 +##箱 +##箴 +##箸 +##節 +##篁 +##範 +##篆 +##篇 +##築 +##篑 +##篓 +##篙 +##篝 +##篠 +##篡 +##篤 +##篩 +##篪 +##篮 +##篱 +##篷 +##簇 +##簌 +##簍 +##簡 +##簦 +##簧 +##簪 +##簫 +##簷 +##簸 +##簽 +##簾 +##簿 +##籁 +##籃 +##籌 +##籍 +##籐 +##籟 +##籠 +##籤 +##籬 +##籮 +##籲 +##米 +##类 +##籼 +##籽 +##粄 +##粉 +##粑 +##粒 +##粕 +##粗 +##粘 +##粟 +##粤 +##粥 +##粧 +##粪 +##粮 +##粱 +##粲 +##粳 +##粵 +##粹 +##粼 +##粽 +##精 +##粿 +##糅 +##糊 +##糍 +##糕 +##糖 +##糗 +##糙 +##糜 +##糞 +##糟 +##糠 +##糧 +##糬 +##糯 +##糰 +##糸 +##系 +##糾 +##紀 +##紂 +##約 +##紅 +##紉 +##紊 +##紋 +##納 +##紐 +##紓 +##純 +##紗 +##紘 +##紙 +##級 +##紛 +##紜 +##素 +##紡 +##索 +##紧 +##紫 +##紮 +##累 +##細 +##紳 +##紹 +##紺 +##終 +##絃 +##組 +##絆 +##経 +##結 +##絕 +##絞 +##絡 +##絢 +##給 +##絨 +##絮 +##統 +##絲 +##絳 +##絵 +##絶 +##絹 +##綁 +##綏 +##綑 +##經 +##継 +##続 +##綜 +##綠 +##綢 +##綦 +##綫 +##綬 +##維 +##綱 +##網 +##綴 +##綵 +##綸 +##綺 +##綻 +##綽 +##綾 +##綿 +##緊 +##緋 +##総 +##緑 +##緒 +##緘 +##線 +##緝 +##緞 +##締 +##緣 +##編 +##緩 +##緬 +##緯 +##練 +##緹 +##緻 +##縁 +##縄 +##縈 +##縛 +##縝 +##縣 +##縫 +##縮 +##縱 +##縴 +##縷 +##總 +##績 +##繁 +##繃 +##繆 +##繇 +##繋 +##織 +##繕 +##繚 +##繞 +##繡 +##繩 +##繪 +##繫 +##繭 +##繳 +##繹 +##繼 +##繽 +##纂 +##續 +##纍 +##纏 +##纓 +##纔 +##纖 +##纜 +##纠 +##红 +##纣 +##纤 +##约 +##级 +##纨 +##纪 +##纫 +##纬 +##纭 +##纯 +##纰 +##纱 +##纲 +##纳 +##纵 +##纶 +##纷 +##纸 +##纹 +##纺 +##纽 +##纾 +##线 +##绀 +##练 +##组 +##绅 +##细 +##织 +##终 +##绊 +##绍 +##绎 +##经 +##绑 +##绒 +##结 +##绔 +##绕 +##绘 +##给 +##绚 +##绛 +##络 +##绝 +##绞 +##统 +##绡 +##绢 +##绣 +##绥 +##绦 +##继 +##绩 +##绪 +##绫 +##续 +##绮 +##绯 +##绰 +##绳 +##维 +##绵 +##绶 +##绷 +##绸 +##绻 +##综 +##绽 +##绾 +##绿 +##缀 +##缄 +##缅 +##缆 +##缇 +##缈 +##缉 +##缎 +##缓 +##缔 +##缕 +##编 +##缘 +##缙 +##缚 +##缜 +##缝 +##缠 +##缢 +##缤 +##缥 +##缨 +##缩 +##缪 +##缭 +##缮 +##缰 +##缱 +##缴 +##缸 +##缺 +##缽 +##罂 +##罄 +##罌 +##罐 +##网 +##罔 +##罕 +##罗 +##罚 +##罡 +##罢 +##罩 +##罪 +##置 +##罰 +##署 +##罵 +##罷 +##罹 +##羁 +##羅 +##羈 +##羊 +##羌 +##美 +##羔 +##羚 +##羞 +##羟 +##羡 +##羣 +##群 +##羥 +##羧 +##羨 +##義 +##羯 +##羲 +##羸 +##羹 +##羽 +##羿 +##翁 +##翅 +##翊 +##翌 +##翎 +##習 +##翔 +##翘 +##翟 +##翠 +##翡 +##翦 +##翩 +##翰 +##翱 +##翳 +##翹 +##翻 +##翼 +##耀 +##老 +##考 +##耄 +##者 +##耆 +##耋 +##而 +##耍 +##耐 +##耒 +##耕 +##耗 +##耘 +##耙 +##耦 +##耨 +##耳 +##耶 +##耷 +##耸 +##耻 +##耽 +##耿 +##聂 +##聆 +##聊 +##聋 +##职 +##聒 +##联 +##聖 +##聘 +##聚 +##聞 +##聪 +##聯 +##聰 +##聲 +##聳 +##聴 +##聶 +##職 +##聽 +##聾 +##聿 +##肃 +##肄 +##肅 +##肆 +##肇 +##肉 +##肋 +##肌 +##肏 +##肓 +##肖 +##肘 +##肚 +##肛 +##肝 +##肠 +##股 +##肢 +##肤 +##肥 +##肩 +##肪 +##肮 +##肯 +##肱 +##育 +##肴 +##肺 +##肽 +##肾 +##肿 +##胀 +##胁 +##胃 +##胄 +##胆 +##背 +##胍 +##胎 +##胖 +##胚 +##胛 +##胜 +##胝 +##胞 +##胡 +##胤 +##胥 +##胧 +##胫 +##胭 +##胯 +##胰 +##胱 +##胳 +##胴 +##胶 +##胸 +##胺 +##能 +##脂 +##脅 +##脆 +##脇 +##脈 +##脉 +##脊 +##脍 +##脏 +##脐 +##脑 +##脓 +##脖 +##脘 +##脚 +##脛 +##脣 +##脩 +##脫 +##脯 +##脱 +##脲 +##脳 +##脸 +##脹 +##脾 +##腆 +##腈 +##腊 +##腋 +##腌 +##腎 +##腐 +##腑 +##腓 +##腔 +##腕 +##腥 +##腦 +##腩 +##腫 +##腭 +##腮 +##腰 +##腱 +##腳 +##腴 +##腸 +##腹 +##腺 +##腻 +##腼 +##腾 +##腿 +##膀 +##膈 +##膊 +##膏 +##膑 +##膘 +##膚 +##膛 +##膜 +##膝 +##膠 +##膦 +##膨 +##膩 +##膳 +##膺 +##膻 +##膽 +##膾 +##膿 +##臀 +##臂 +##臃 +##臆 +##臉 +##臊 +##臍 +##臓 +##臘 +##臟 +##臣 +##臥 +##臧 +##臨 +##自 +##臬 +##臭 +##至 +##致 +##臺 +##臻 +##臼 +##臾 +##舀 +##舂 +##舅 +##舆 +##與 +##興 +##舉 +##舊 +##舌 +##舍 +##舎 +##舐 +##舒 +##舔 +##舖 +##舗 +##舛 +##舜 +##舞 +##舟 +##航 +##舫 +##般 +##舰 +##舱 +##舵 +##舶 +##舷 +##舸 +##船 +##舺 +##舾 +##艇 +##艋 +##艘 +##艙 +##艦 +##艮 +##良 +##艰 +##艱 +##色 +##艳 +##艷 +##艹 +##艺 +##艾 +##节 +##芃 +##芈 +##芊 +##芋 +##芍 +##芎 +##芒 +##芙 +##芜 +##芝 +##芡 +##芥 +##芦 +##芩 +##芪 +##芫 +##芬 +##芭 +##芮 +##芯 +##花 +##芳 +##芷 +##芸 +##芹 +##芻 +##芽 +##芾 +##苁 +##苄 +##苇 +##苋 +##苍 +##苏 +##苑 +##苒 +##苓 +##苔 +##苕 +##苗 +##苛 +##苜 +##苞 +##苟 +##苡 +##苣 +##若 +##苦 +##苫 +##苯 +##英 +##苷 +##苹 +##苻 +##茁 +##茂 +##范 +##茄 +##茅 +##茉 +##茎 +##茏 +##茗 +##茜 +##茧 +##茨 +##茫 +##茬 +##茭 +##茯 +##茱 +##茲 +##茴 +##茵 +##茶 +##茸 +##茹 +##茼 +##荀 +##荃 +##荆 +##草 +##荊 +##荏 +##荐 +##荒 +##荔 +##荖 +##荘 +##荚 +##荞 +##荟 +##荠 +##荡 +##荣 +##荤 +##荥 +##荧 +##荨 +##荪 +##荫 +##药 +##荳 +##荷 +##荸 +##荻 +##荼 +##荽 +##莅 +##莆 +##莉 +##莊 +##莎 +##莒 +##莓 +##莖 +##莘 +##莞 +##莠 +##莢 +##莧 +##莪 +##莫 +##莱 +##莲 +##莴 +##获 +##莹 +##莺 +##莽 +##莿 +##菀 +##菁 +##菅 +##菇 +##菈 +##菊 +##菌 +##菏 +##菓 +##菖 +##菘 +##菜 +##菟 +##菠 +##菡 +##菩 +##華 +##菱 +##菲 +##菸 +##菽 +##萁 +##萃 +##萄 +##萊 +##萋 +##萌 +##萍 +##萎 +##萘 +##萝 +##萤 +##营 +##萦 +##萧 +##萨 +##萩 +##萬 +##萱 +##萵 +##萸 +##萼 +##落 +##葆 +##葉 +##著 +##葚 +##葛 +##葡 +##董 +##葦 +##葩 +##葫 +##葬 +##葭 +##葯 +##葱 +##葳 +##葵 +##葷 +##葺 +##蒂 +##蒋 +##蒐 +##蒔 +##蒙 +##蒜 +##蒞 +##蒟 +##蒡 +##蒨 +##蒲 +##蒸 +##蒹 +##蒻 +##蒼 +##蒿 +##蓁 +##蓄 +##蓆 +##蓉 +##蓋 +##蓑 +##蓓 +##蓖 +##蓝 +##蓟 +##蓦 +##蓬 +##蓮 +##蓼 +##蓿 +##蔑 +##蔓 +##蔔 +##蔗 +##蔘 +##蔚 +##蔡 +##蔣 +##蔥 +##蔫 +##蔬 +##蔭 +##蔵 +##蔷 +##蔺 +##蔻 +##蔼 +##蔽 +##蕁 +##蕃 +##蕈 +##蕉 +##蕊 +##蕎 +##蕙 +##蕤 +##蕨 +##蕩 +##蕪 +##蕭 +##蕲 +##蕴 +##蕻 +##蕾 +##薄 +##薅 +##薇 +##薈 +##薊 +##薏 +##薑 +##薔 +##薙 +##薛 +##薦 +##薨 +##薩 +##薪 +##薬 +##薯 +##薰 +##薹 +##藉 +##藍 +##藏 +##藐 +##藓 +##藕 +##藜 +##藝 +##藤 +##藥 +##藩 +##藹 +##藻 +##藿 +##蘆 +##蘇 +##蘊 +##蘋 +##蘑 +##蘚 +##蘭 +##蘸 +##蘼 +##蘿 +##虎 +##虏 +##虐 +##虑 +##虔 +##處 +##虚 +##虛 +##虜 +##虞 +##號 +##虢 +##虧 +##虫 +##虬 +##虱 +##虹 +##虻 +##虽 +##虾 +##蚀 +##蚁 +##蚂 +##蚊 +##蚌 +##蚓 +##蚕 +##蚜 +##蚝 +##蚣 +##蚤 +##蚩 +##蚪 +##蚯 +##蚱 +##蚵 +##蛀 +##蛆 +##蛇 +##蛊 +##蛋 +##蛎 +##蛐 +##蛔 +##蛙 +##蛛 +##蛟 +##蛤 +##蛭 +##蛮 +##蛰 +##蛳 +##蛹 +##蛻 +##蛾 +##蜀 +##蜂 +##蜃 +##蜆 +##蜇 +##蜈 +##蜊 +##蜍 +##蜒 +##蜓 +##蜕 +##蜗 +##蜘 +##蜚 +##蜜 +##蜡 +##蜢 +##蜥 +##蜱 +##蜴 +##蜷 +##蜻 +##蜿 +##蝇 +##蝈 +##蝉 +##蝌 +##蝎 +##蝕 +##蝗 +##蝙 +##蝟 +##蝠 +##蝦 +##蝨 +##蝴 +##蝶 +##蝸 +##蝼 +##螂 +##螃 +##融 +##螞 +##螢 +##螨 +##螯 +##螳 +##螺 +##蟀 +##蟄 +##蟆 +##蟋 +##蟎 +##蟑 +##蟒 +##蟠 +##蟬 +##蟲 +##蟹 +##蟻 +##蟾 +##蠅 +##蠍 +##蠔 +##蠕 +##蠛 +##蠟 +##蠡 +##蠢 +##蠣 +##蠱 +##蠶 +##蠹 +##蠻 +##血 +##衄 +##衅 +##衆 +##行 +##衍 +##術 +##衔 +##街 +##衙 +##衛 +##衝 +##衞 +##衡 +##衢 +##衣 +##补 +##表 +##衩 +##衫 +##衬 +##衮 +##衰 +##衲 +##衷 +##衹 +##衾 +##衿 +##袁 +##袂 +##袄 +##袅 +##袈 +##袋 +##袍 +##袒 +##袖 +##袜 +##袞 +##袤 +##袪 +##被 +##袭 +##袱 +##裁 +##裂 +##装 +##裆 +##裊 +##裏 +##裔 +##裕 +##裘 +##裙 +##補 +##裝 +##裟 +##裡 +##裤 +##裨 +##裱 +##裳 +##裴 +##裸 +##裹 +##製 +##裾 +##褂 +##複 +##褐 +##褒 +##褓 +##褔 +##褚 +##褥 +##褪 +##褫 +##褲 +##褶 +##褻 +##襁 +##襄 +##襟 +##襠 +##襪 +##襬 +##襯 +##襲 +##西 +##要 +##覃 +##覆 +##覇 +##見 +##規 +##覓 +##視 +##覚 +##覦 +##覧 +##親 +##覬 +##観 +##覷 +##覺 +##覽 +##觀 +##见 +##观 +##规 +##觅 +##视 +##览 +##觉 +##觊 +##觎 +##觐 +##觑 +##角 +##觞 +##解 +##觥 +##触 +##觸 +##言 +##訂 +##計 +##訊 +##討 +##訓 +##訕 +##訖 +##託 +##記 +##訛 +##訝 +##訟 +##訣 +##訥 +##訪 +##設 +##許 +##訳 +##訴 +##訶 +##診 +##註 +##証 +##詆 +##詐 +##詔 +##評 +##詛 +##詞 +##詠 +##詡 +##詢 +##詣 +##試 +##詩 +##詫 +##詬 +##詭 +##詮 +##詰 +##話 +##該 +##詳 +##詹 +##詼 +##誅 +##誇 +##誉 +##誌 +##認 +##誓 +##誕 +##誘 +##語 +##誠 +##誡 +##誣 +##誤 +##誥 +##誦 +##誨 +##說 +##説 +##読 +##誰 +##課 +##誹 +##誼 +##調 +##諄 +##談 +##請 +##諏 +##諒 +##論 +##諗 +##諜 +##諡 +##諦 +##諧 +##諫 +##諭 +##諮 +##諱 +##諳 +##諷 +##諸 +##諺 +##諾 +##謀 +##謁 +##謂 +##謄 +##謊 +##謎 +##謐 +##謔 +##謗 +##謙 +##講 +##謝 +##謠 +##謨 +##謬 +##謹 +##謾 +##譁 +##證 +##譎 +##譏 +##識 +##譙 +##譚 +##譜 +##警 +##譬 +##譯 +##議 +##譲 +##譴 +##護 +##譽 +##讀 +##變 +##讓 +##讚 +##讞 +##计 +##订 +##认 +##讥 +##讧 +##讨 +##让 +##讪 +##讫 +##训 +##议 +##讯 +##记 +##讲 +##讳 +##讴 +##讶 +##讷 +##许 +##讹 +##论 +##讼 +##讽 +##设 +##访 +##诀 +##证 +##诃 +##评 +##诅 +##识 +##诈 +##诉 +##诊 +##诋 +##词 +##诏 +##译 +##试 +##诗 +##诘 +##诙 +##诚 +##诛 +##话 +##诞 +##诟 +##诠 +##诡 +##询 +##诣 +##诤 +##该 +##详 +##诧 +##诩 +##诫 +##诬 +##语 +##误 +##诰 +##诱 +##诲 +##说 +##诵 +##诶 +##请 +##诸 +##诺 +##读 +##诽 +##课 +##诿 +##谀 +##谁 +##调 +##谄 +##谅 +##谆 +##谈 +##谊 +##谋 +##谌 +##谍 +##谎 +##谏 +##谐 +##谑 +##谒 +##谓 +##谔 +##谕 +##谗 +##谘 +##谙 +##谚 +##谛 +##谜 +##谟 +##谢 +##谣 +##谤 +##谥 +##谦 +##谧 +##谨 +##谩 +##谪 +##谬 +##谭 +##谯 +##谱 +##谲 +##谴 +##谶 +##谷 +##豁 +##豆 +##豇 +##豈 +##豉 +##豊 +##豌 +##豎 +##豐 +##豔 +##豚 +##象 +##豢 +##豪 +##豫 +##豬 +##豹 +##豺 +##貂 +##貅 +##貌 +##貓 +##貔 +##貘 +##貝 +##貞 +##負 +##財 +##貢 +##貧 +##貨 +##販 +##貪 +##貫 +##責 +##貯 +##貰 +##貳 +##貴 +##貶 +##買 +##貸 +##費 +##貼 +##貽 +##貿 +##賀 +##賁 +##賂 +##賃 +##賄 +##資 +##賈 +##賊 +##賑 +##賓 +##賜 +##賞 +##賠 +##賡 +##賢 +##賣 +##賤 +##賦 +##質 +##賬 +##賭 +##賴 +##賺 +##購 +##賽 +##贅 +##贈 +##贊 +##贍 +##贏 +##贓 +##贖 +##贛 +##贝 +##贞 +##负 +##贡 +##财 +##责 +##贤 +##败 +##账 +##货 +##质 +##贩 +##贪 +##贫 +##贬 +##购 +##贮 +##贯 +##贰 +##贱 +##贲 +##贴 +##贵 +##贷 +##贸 +##费 +##贺 +##贻 +##贼 +##贾 +##贿 +##赁 +##赂 +##赃 +##资 +##赅 +##赈 +##赊 +##赋 +##赌 +##赎 +##赏 +##赐 +##赓 +##赔 +##赖 +##赘 +##赚 +##赛 +##赝 +##赞 +##赠 +##赡 +##赢 +##赣 +##赤 +##赦 +##赧 +##赫 +##赭 +##走 +##赳 +##赴 +##赵 +##赶 +##起 +##趁 +##超 +##越 +##趋 +##趕 +##趙 +##趟 +##趣 +##趨 +##足 +##趴 +##趵 +##趸 +##趺 +##趾 +##跃 +##跄 +##跆 +##跋 +##跌 +##跎 +##跑 +##跖 +##跚 +##跛 +##距 +##跟 +##跡 +##跤 +##跨 +##跩 +##跪 +##路 +##跳 +##践 +##跷 +##跹 +##跺 +##跻 +##踉 +##踊 +##踌 +##踏 +##踐 +##踝 +##踞 +##踟 +##踢 +##踩 +##踪 +##踮 +##踱 +##踴 +##踵 +##踹 +##蹂 +##蹄 +##蹇 +##蹈 +##蹉 +##蹊 +##蹋 +##蹑 +##蹒 +##蹙 +##蹟 +##蹣 +##蹤 +##蹦 +##蹩 +##蹬 +##蹭 +##蹲 +##蹴 +##蹶 +##蹺 +##蹼 +##蹿 +##躁 +##躇 +##躉 +##躊 +##躋 +##躍 +##躏 +##躪 +##身 +##躬 +##躯 +##躲 +##躺 +##軀 +##車 +##軋 +##軌 +##軍 +##軒 +##軟 +##転 +##軸 +##軼 +##軽 +##軾 +##較 +##載 +##輒 +##輓 +##輔 +##輕 +##輛 +##輝 +##輟 +##輩 +##輪 +##輯 +##輸 +##輻 +##輾 +##輿 +##轄 +##轅 +##轆 +##轉 +##轍 +##轎 +##轟 +##车 +##轧 +##轨 +##轩 +##转 +##轭 +##轮 +##软 +##轰 +##轲 +##轴 +##轶 +##轻 +##轼 +##载 +##轿 +##较 +##辄 +##辅 +##辆 +##辇 +##辈 +##辉 +##辊 +##辍 +##辐 +##辑 +##输 +##辕 +##辖 +##辗 +##辘 +##辙 +##辛 +##辜 +##辞 +##辟 +##辣 +##辦 +##辨 +##辩 +##辫 +##辭 +##辮 +##辯 +##辰 +##辱 +##農 +##边 +##辺 +##辻 +##込 +##辽 +##达 +##迁 +##迂 +##迄 +##迅 +##过 +##迈 +##迎 +##运 +##近 +##返 +##还 +##这 +##进 +##远 +##违 +##连 +##迟 +##迢 +##迤 +##迥 +##迦 +##迩 +##迪 +##迫 +##迭 +##述 +##迴 +##迷 +##迸 +##迹 +##迺 +##追 +##退 +##送 +##适 +##逃 +##逅 +##逆 +##选 +##逊 +##逍 +##透 +##逐 +##递 +##途 +##逕 +##逗 +##這 +##通 +##逛 +##逝 +##逞 +##速 +##造 +##逢 +##連 +##逮 +##週 +##進 +##逵 +##逶 +##逸 +##逻 +##逼 +##逾 +##遁 +##遂 +##遅 +##遇 +##遊 +##運 +##遍 +##過 +##遏 +##遐 +##遑 +##遒 +##道 +##達 +##違 +##遗 +##遙 +##遛 +##遜 +##遞 +##遠 +##遢 +##遣 +##遥 +##遨 +##適 +##遭 +##遮 +##遲 +##遴 +##遵 +##遶 +##遷 +##選 +##遺 +##遼 +##遽 +##避 +##邀 +##邁 +##邂 +##邃 +##還 +##邇 +##邈 +##邊 +##邋 +##邏 +##邑 +##邓 +##邕 +##邛 +##邝 +##邢 +##那 +##邦 +##邨 +##邪 +##邬 +##邮 +##邯 +##邰 +##邱 +##邳 +##邵 +##邸 +##邹 +##邺 +##邻 +##郁 +##郅 +##郊 +##郎 +##郑 +##郜 +##郝 +##郡 +##郢 +##郤 +##郦 +##郧 +##部 +##郫 +##郭 +##郴 +##郵 +##郷 +##郸 +##都 +##鄂 +##鄉 +##鄒 +##鄔 +##鄙 +##鄞 +##鄢 +##鄧 +##鄭 +##鄰 +##鄱 +##鄲 +##鄺 +##酉 +##酊 +##酋 +##酌 +##配 +##酐 +##酒 +##酗 +##酚 +##酝 +##酢 +##酣 +##酥 +##酩 +##酪 +##酬 +##酮 +##酯 +##酰 +##酱 +##酵 +##酶 +##酷 +##酸 +##酿 +##醃 +##醇 +##醉 +##醋 +##醍 +##醐 +##醒 +##醚 +##醛 +##醜 +##醞 +##醣 +##醪 +##醫 +##醬 +##醮 +##醯 +##醴 +##醺 +##釀 +##釁 +##采 +##釉 +##释 +##釋 +##里 +##重 +##野 +##量 +##釐 +##金 +##釗 +##釘 +##釜 +##針 +##釣 +##釦 +##釧 +##釵 +##鈀 +##鈉 +##鈍 +##鈎 +##鈔 +##鈕 +##鈞 +##鈣 +##鈦 +##鈪 +##鈴 +##鈺 +##鈾 +##鉀 +##鉄 +##鉅 +##鉉 +##鉑 +##鉗 +##鉚 +##鉛 +##鉤 +##鉴 +##鉻 +##銀 +##銃 +##銅 +##銑 +##銓 +##銖 +##銘 +##銜 +##銬 +##銭 +##銮 +##銳 +##銷 +##銹 +##鋁 +##鋅 +##鋒 +##鋤 +##鋪 +##鋰 +##鋸 +##鋼 +##錄 +##錐 +##錘 +##錚 +##錠 +##錢 +##錦 +##錨 +##錫 +##錮 +##錯 +##録 +##錳 +##錶 +##鍊 +##鍋 +##鍍 +##鍛 +##鍥 +##鍰 +##鍵 +##鍺 +##鍾 +##鎂 +##鎊 +##鎌 +##鎏 +##鎔 +##鎖 +##鎗 +##鎚 +##鎧 +##鎬 +##鎮 +##鎳 +##鏈 +##鏖 +##鏗 +##鏘 +##鏞 +##鏟 +##鏡 +##鏢 +##鏤 +##鏽 +##鐘 +##鐮 +##鐲 +##鐳 +##鐵 +##鐸 +##鐺 +##鑄 +##鑊 +##鑑 +##鑒 +##鑣 +##鑫 +##鑰 +##鑲 +##鑼 +##鑽 +##鑾 +##鑿 +##针 +##钉 +##钊 +##钎 +##钏 +##钒 +##钓 +##钗 +##钙 +##钛 +##钜 +##钝 +##钞 +##钟 +##钠 +##钡 +##钢 +##钣 +##钤 +##钥 +##钦 +##钧 +##钨 +##钩 +##钮 +##钯 +##钰 +##钱 +##钳 +##钴 +##钵 +##钺 +##钻 +##钼 +##钾 +##钿 +##铀 +##铁 +##铂 +##铃 +##铄 +##铅 +##铆 +##铉 +##铎 +##铐 +##铛 +##铜 +##铝 +##铠 +##铡 +##铢 +##铣 +##铤 +##铨 +##铩 +##铬 +##铭 +##铮 +##铰 +##铲 +##铵 +##银 +##铸 +##铺 +##链 +##铿 +##销 +##锁 +##锂 +##锄 +##锅 +##锆 +##锈 +##锉 +##锋 +##锌 +##锏 +##锐 +##锑 +##错 +##锚 +##锟 +##锡 +##锢 +##锣 +##锤 +##锥 +##锦 +##锭 +##键 +##锯 +##锰 +##锲 +##锵 +##锹 +##锺 +##锻 +##镀 +##镁 +##镂 +##镇 +##镉 +##镌 +##镍 +##镐 +##镑 +##镕 +##镖 +##镗 +##镛 +##镜 +##镣 +##镭 +##镯 +##镰 +##镳 +##镶 +##長 +##长 +##門 +##閃 +##閉 +##開 +##閎 +##閏 +##閑 +##閒 +##間 +##閔 +##閘 +##閡 +##関 +##閣 +##閥 +##閨 +##閩 +##閱 +##閲 +##閹 +##閻 +##閾 +##闆 +##闇 +##闊 +##闌 +##闍 +##闔 +##闕 +##闖 +##闘 +##關 +##闡 +##闢 +##门 +##闪 +##闫 +##闭 +##问 +##闯 +##闰 +##闲 +##间 +##闵 +##闷 +##闸 +##闹 +##闺 +##闻 +##闽 +##闾 +##阀 +##阁 +##阂 +##阅 +##阆 +##阇 +##阈 +##阉 +##阎 +##阐 +##阑 +##阔 +##阕 +##阖 +##阙 +##阚 +##阜 +##队 +##阡 +##阪 +##阮 +##阱 +##防 +##阳 +##阴 +##阵 +##阶 +##阻 +##阿 +##陀 +##陂 +##附 +##际 +##陆 +##陇 +##陈 +##陋 +##陌 +##降 +##限 +##陕 +##陛 +##陝 +##陞 +##陟 +##陡 +##院 +##陣 +##除 +##陨 +##险 +##陪 +##陰 +##陲 +##陳 +##陵 +##陶 +##陷 +##陸 +##険 +##陽 +##隅 +##隆 +##隈 +##隊 +##隋 +##隍 +##階 +##随 +##隐 +##隔 +##隕 +##隘 +##隙 +##際 +##障 +##隠 +##隣 +##隧 +##隨 +##險 +##隱 +##隴 +##隶 +##隸 +##隻 +##隼 +##隽 +##难 +##雀 +##雁 +##雄 +##雅 +##集 +##雇 +##雉 +##雋 +##雌 +##雍 +##雎 +##雏 +##雑 +##雒 +##雕 +##雖 +##雙 +##雛 +##雜 +##雞 +##離 +##難 +##雨 +##雪 +##雯 +##雰 +##雲 +##雳 +##零 +##雷 +##雹 +##電 +##雾 +##需 +##霁 +##霄 +##霆 +##震 +##霈 +##霉 +##霊 +##霍 +##霎 +##霏 +##霑 +##霓 +##霖 +##霜 +##霞 +##霧 +##霭 +##霰 +##露 +##霸 +##霹 +##霽 +##霾 +##靂 +##靄 +##靈 +##青 +##靓 +##靖 +##静 +##靚 +##靛 +##靜 +##非 +##靠 +##靡 +##面 +##靥 +##靦 +##革 +##靳 +##靴 +##靶 +##靼 +##鞅 +##鞋 +##鞍 +##鞏 +##鞑 +##鞘 +##鞠 +##鞣 +##鞦 +##鞭 +##韆 +##韋 +##韌 +##韓 +##韜 +##韦 +##韧 +##韩 +##韬 +##韭 +##音 +##韵 +##韶 +##韻 +##響 +##頁 +##頂 +##頃 +##項 +##順 +##須 +##頌 +##預 +##頑 +##頒 +##頓 +##頗 +##領 +##頜 +##頡 +##頤 +##頫 +##頭 +##頰 +##頷 +##頸 +##頹 +##頻 +##頼 +##顆 +##題 +##額 +##顎 +##顏 +##顔 +##願 +##顛 +##類 +##顧 +##顫 +##顯 +##顱 +##顴 +##页 +##顶 +##顷 +##项 +##顺 +##须 +##顼 +##顽 +##顾 +##顿 +##颁 +##颂 +##预 +##颅 +##领 +##颇 +##颈 +##颉 +##颊 +##颌 +##颍 +##颐 +##频 +##颓 +##颔 +##颖 +##颗 +##题 +##颚 +##颛 +##颜 +##额 +##颞 +##颠 +##颡 +##颢 +##颤 +##颦 +##颧 +##風 +##颯 +##颱 +##颳 +##颶 +##颼 +##飄 +##飆 +##风 +##飒 +##飓 +##飕 +##飘 +##飙 +##飚 +##飛 +##飞 +##食 +##飢 +##飨 +##飩 +##飪 +##飯 +##飲 +##飼 +##飽 +##飾 +##餃 +##餅 +##餉 +##養 +##餌 +##餐 +##餒 +##餓 +##餘 +##餚 +##餛 +##餞 +##餡 +##館 +##餮 +##餵 +##餾 +##饅 +##饈 +##饋 +##饌 +##饍 +##饑 +##饒 +##饕 +##饗 +##饞 +##饥 +##饨 +##饪 +##饬 +##饭 +##饮 +##饯 +##饰 +##饱 +##饲 +##饴 +##饵 +##饶 +##饷 +##饺 +##饼 +##饽 +##饿 +##馀 +##馁 +##馄 +##馅 +##馆 +##馈 +##馋 +##馍 +##馏 +##馒 +##馔 +##首 +##馗 +##香 +##馥 +##馨 +##馬 +##馭 +##馮 +##馳 +##馴 +##駁 +##駄 +##駅 +##駆 +##駐 +##駒 +##駕 +##駛 +##駝 +##駭 +##駱 +##駿 +##騁 +##騎 +##騏 +##験 +##騙 +##騨 +##騰 +##騷 +##驀 +##驅 +##驊 +##驍 +##驒 +##驕 +##驗 +##驚 +##驛 +##驟 +##驢 +##驥 +##马 +##驭 +##驮 +##驯 +##驰 +##驱 +##驳 +##驴 +##驶 +##驷 +##驸 +##驹 +##驻 +##驼 +##驾 +##驿 +##骁 +##骂 +##骄 +##骅 +##骆 +##骇 +##骈 +##骊 +##骋 +##验 +##骏 +##骐 +##骑 +##骗 +##骚 +##骛 +##骜 +##骞 +##骠 +##骡 +##骤 +##骥 +##骧 +##骨 +##骯 +##骰 +##骶 +##骷 +##骸 +##骼 +##髂 +##髅 +##髋 +##髏 +##髒 +##髓 +##體 +##髖 +##高 +##髦 +##髪 +##髮 +##髯 +##髻 +##鬃 +##鬆 +##鬍 +##鬓 +##鬚 +##鬟 +##鬢 +##鬣 +##鬥 +##鬧 +##鬱 +##鬼 +##魁 +##魂 +##魄 +##魅 +##魇 +##魍 +##魏 +##魔 +##魘 +##魚 +##魯 +##魷 +##鮑 +##鮨 +##鮪 +##鮭 +##鮮 +##鯉 +##鯊 +##鯖 +##鯛 +##鯨 +##鯰 +##鯽 +##鰍 +##鰓 +##鰭 +##鰲 +##鰻 +##鰾 +##鱈 +##鱉 +##鱔 +##鱗 +##鱷 +##鱸 +##鱼 +##鱿 +##鲁 +##鲈 +##鲍 +##鲑 +##鲛 +##鲜 +##鲟 +##鲢 +##鲤 +##鲨 +##鲫 +##鲱 +##鲲 +##鲶 +##鲷 +##鲸 +##鳃 +##鳄 +##鳅 +##鳌 +##鳍 +##鳕 +##鳖 +##鳗 +##鳝 +##鳞 +##鳥 +##鳩 +##鳳 +##鳴 +##鳶 +##鴉 +##鴕 +##鴛 +##鴦 +##鴨 +##鴻 +##鴿 +##鵑 +##鵜 +##鵝 +##鵡 +##鵬 +##鵰 +##鵲 +##鶘 +##鶩 +##鶯 +##鶴 +##鷗 +##鷲 +##鷹 +##鷺 +##鸚 +##鸞 +##鸟 +##鸠 +##鸡 +##鸢 +##鸣 +##鸥 +##鸦 +##鸨 +##鸪 +##鸭 +##鸯 +##鸳 +##鸵 +##鸽 +##鸾 +##鸿 +##鹂 +##鹃 +##鹄 +##鹅 +##鹈 +##鹉 +##鹊 +##鹌 +##鹏 +##鹑 +##鹕 +##鹘 +##鹜 +##鹞 +##鹤 +##鹦 +##鹧 +##鹫 +##鹭 +##鹰 +##鹳 +##鹵 +##鹹 +##鹼 +##鹽 +##鹿 +##麂 +##麋 +##麒 +##麓 +##麗 +##麝 +##麟 +##麥 +##麦 +##麩 +##麴 +##麵 +##麸 +##麺 +##麻 +##麼 +##麽 +##麾 +##黃 +##黄 +##黍 +##黎 +##黏 +##黑 +##黒 +##黔 +##默 +##黛 +##黜 +##黝 +##點 +##黠 +##黨 +##黯 +##黴 +##鼋 +##鼎 +##鼐 +##鼓 +##鼠 +##鼬 +##鼹 +##鼻 +##鼾 +##齁 +##齊 +##齋 +##齐 +##齒 +##齡 +##齢 +##齣 +##齦 +##齿 +##龄 +##龅 +##龈 +##龊 +##龋 +##龌 +##龍 +##龐 +##龔 +##龕 +##龙 +##龚 +##龛 +##龜 +##龟 +##︰ +##︱ +##︶ +##︿ +##﹁ +##﹂ +##﹍ +##﹏ +##﹐ +##﹑ +##﹒ +##﹔ +##﹕ +##﹖ +##﹗ +##﹙ +##﹚ +##﹝ +##﹞ +##﹡ +##﹣ +##! +##" +### +##$ +##% +##& +##' +##( +##) +##* +##, +##- +##. +##/ +##: +##; +##< +##? +##@ +##[ +##\ +##] +##^ +##_ +##` +##f +##h +##j +##u +##w +##z +##{ +##} +##。 +##「 +##」 +##、 +##・ +##ッ +##ー +##イ +##ク +##シ +##ス +##ト +##ノ +##フ +##ラ +##ル +##ン +##゙ +##゚ +## ̄ +##¥ +##👍 +##🔥 +##😂 +##😎 diff --git a/example/nlp_to_mindrecord/zhwiki/README.md b/example/nlp_to_mindrecord/zhwiki/README.md new file mode 100644 index 0000000000..d2c0fd51c0 --- /dev/null +++ b/example/nlp_to_mindrecord/zhwiki/README.md @@ -0,0 +1,107 @@ +# Guideline to Convert Training Data zhwiki to MindRecord For Bert Pre Training + + + +- [What does the example do](#what-does-the-example-do) +- [Run simple test](#run-simple-test) +- [How to use the example to process zhwiki](#how-to-use-the-example-to-process-zhwiki) + - [Download zhwiki training data](#download-zhwiki-training-data) + - [Extract the zhwiki](#extract-the-zhwiki) + - [Generate MindRecord](#generate-mindrecord) + - [Create MindDataset By MindRecord](#create-minddataset-by-mindrecord) + + + + +## What does the example do + +This example is based on [zhwiki](https://dumps.wikimedia.org/zhwiki) training data, generating MindRecord file, and finally used for Bert network training. + +1. run.sh: generate MindRecord entry script. + - create_pretraining_data.py: the script from [google-research/bert](https://github.com/google-research/bert), we just change the part of the generated tfrecord to MindRecord. + - tokenization.py: the script from [google-research/bert](https://github.com/google-research/bert). + - vocab.txt: the file from [huawei-noah/Pretrained-Language-Model](https://github.com/huawei-noah/Pretrained-Language-Model/tree/master/NEZHA-TensorFlow/nezha). + - sample_text.txt: the file from [google-research/bert](https://github.com/google-research/bert). +2. run_read.py: create MindDataset by MindRecord entry script. + - create_dataset.py: use MindDataset to read MindRecord to generate dataset. + +## Run simple test + +Follow the step: + +```bash +bash run.sh # generate zhwiki.mindrecord* by sample_text.txt +bash run_read.sh # use MindDataset to read zhwiki.mindrecord* +``` + +## How to use the example to process zhwiki + +Download zhwikidata, extract it, convert it to MindRecord, use MindDataset to read MindRecord. + +### Download zhwiki training data + +> [zhwiki dataset download address](https://dumps.wikimedia.org/zhwiki) **-> 20200401 -> zhwiki-20200401-pages-articles-multistream.xml.bz2** + +### Extract the zhwiki + +1. Download [wikiextractor](https://github.com/attardi/wikiextractor) script. + +2. Extract the zhwiki. + ```python + python WikiExtractor.py -o {output_path}/extract {input_path}/zhwiki-20200401-pages-articles-multistream.xml.bz2 + ``` + +3. Generate like this: + ``` + $ ls {output_path}/extract + AA AB AC AD AE AF AG AH AI AJ AK AL AM AN + ``` + +### Generate MindRecord + +1. Modify the parameters in run.sh: --input_file, --output_file, --partition_number. + ``` + --input_file: Input raw text file (or comma-separated list of files). + --output_file: Output MindRecord file. + --partition_number: The MindRecord file will be split into the number of partition. + ``` + +2. Run the run.sh script. + ``` + bash run.sh + ``` + > Caution: This process is slow, please wait patiently. Run it on server is recommended. + +3. The output like this: + ``` + ... + [INFO] ME(23485,python):2020-04-28-17:16:40.670.744 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully. + [INFO] ME(23485,python):2020-04-28-17:16:40.671.227 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully. + [INFO] ME(23485,python):2020-04-28-17:16:40.671.660 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully. + [INFO] ME(23485,python):2020-04-28-17:16:40.672.037 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully. + [INFO] ME(23485,python):2020-04-28-17:16:40.672.453 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully. + [INFO] ME(23485,python):2020-04-28-17:16:40.672.833 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully. + ... + [INFO] ME(23485:140354285963072,MainProcess):2020-04-28-17:16:40.718.039 [mindspore/mindrecord/filewriter.py:313] The list of mindrecord files created are: ['zhwiki.mindrecord0', 'zhwiki.mindrecord1', 'zhwiki.mindrecord2', 'zhwiki.mindrecord3'], and the list of index files are: ['zhwiki.mindrecord0.db', 'zhwiki.mindrecord1.db', 'zhwiki.mindrecord2.db', 'zhwiki.mindrecord3.db'] + ... + ``` + +### Create MindDataset By MindRecord + +1. Run the run_read.sh script. + ```bash + bash run_read.sh + ``` + +2. The output like this: + ``` + ... + example 74: input_ids: [ 101 8168 118 12847 8783 9977 15908 117 8256 9245 11643 8168 8847 8588 11575 8154 8228 143 8384 8376 9197 10241 103 10564 11421 8199 12268 112 161 8228 11541 9586 8436 8174 8363 9864 9702 103 103 119 103 9947 10564 103 8436 8806 11479 103 8912 119 103 103 103 12209 8303 103 8757 8824 117 8256 103 8619 8168 11541 102 11684 8196 103 8228 8847 11523 117 9059 9064 12410 8358 8181 10764 117 11167 11706 9920 148 8332 11390 8936 8205 10951 11997 103 8154 117 103 8670 10467 112 161 10951 13139 12413 117 10288 143 10425 8205 152 10795 8472 8196 103 161 12126 9172 13129 12106 8217 8174 12244 8205 143 103 8461 8277 10628 160 8221 119 102] + example 74: input_mask: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] + example 74: segment_ids: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] + example 74: masked_lm_positions: [ 6 22 37 38 40 43 47 50 51 52 55 60 67 76 89 92 98 109 120 0] + example 74: masked_lm_ids: [ 8118 8165 8329 8890 8554 8458 119 8850 8565 10392 8174 11467 10291 8181 8549 12718 13139 112 158 0] + example 74: masked_lm_weights: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.] + example 74: next_sentence_labels: [0] + ... + ``` diff --git a/example/nlp_to_mindrecord/zhwiki/create_dataset.py b/example/nlp_to_mindrecord/zhwiki/create_dataset.py new file mode 100644 index 0000000000..8404662bd4 --- /dev/null +++ b/example/nlp_to_mindrecord/zhwiki/create_dataset.py @@ -0,0 +1,43 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""create MindDataset by MindRecord""" +import argparse +import mindspore.dataset as ds + +def create_dataset(data_file): + """create MindDataset""" + num_readers = 4 + data_set = ds.MindDataset(dataset_file=data_file, num_parallel_workers=num_readers, shuffle=True) + index = 0 + for item in data_set.create_dict_iterator(): + # print("example {}: {}".format(index, item)) + print("example {}: input_ids: {}".format(index, item['input_ids'])) + print("example {}: input_mask: {}".format(index, item['input_mask'])) + print("example {}: segment_ids: {}".format(index, item['segment_ids'])) + print("example {}: masked_lm_positions: {}".format(index, item['masked_lm_positions'])) + print("example {}: masked_lm_ids: {}".format(index, item['masked_lm_ids'])) + print("example {}: masked_lm_weights: {}".format(index, item['masked_lm_weights'])) + print("example {}: next_sentence_labels: {}".format(index, item['next_sentence_labels'])) + index += 1 + if index % 1000 == 0: + print("read rows: {}".format(index)) + print("total rows: {}".format(index)) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--input_file", type=str, required=True, help='Input mindreord file') + args = parser.parse_args() + + create_dataset(args.input_file) diff --git a/example/nlp_to_mindrecord/zhwiki/create_pretraining_data.py b/example/nlp_to_mindrecord/zhwiki/create_pretraining_data.py new file mode 100644 index 0000000000..0de852a265 --- /dev/null +++ b/example/nlp_to_mindrecord/zhwiki/create_pretraining_data.py @@ -0,0 +1,428 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Create masked LM/next sentence masked_lm MindRecord files for BERT.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import collections +import logging +import random +import tokenization + +import numpy as np +from mindspore.mindrecord import FileWriter + +# pylint: skip-file + +logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', + datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO) + + +class TrainingInstance(object): + """A single training instance (sentence pair).""" + + def __init__(self, tokens, segment_ids, masked_lm_positions, masked_lm_labels, + is_random_next): + self.tokens = tokens + self.segment_ids = segment_ids + self.is_random_next = is_random_next + self.masked_lm_positions = masked_lm_positions + self.masked_lm_labels = masked_lm_labels + + def __str__(self): + s = "" + s += "tokens: %s\n" % (" ".join( + [tokenization.printable_text(x) for x in self.tokens])) + s += "segment_ids: %s\n" % (" ".join([str(x) for x in self.segment_ids])) + s += "is_random_next: %s\n" % self.is_random_next + s += "masked_lm_positions: %s\n" % (" ".join( + [str(x) for x in self.masked_lm_positions])) + s += "masked_lm_labels: %s\n" % (" ".join( + [tokenization.printable_text(x) for x in self.masked_lm_labels])) + s += "\n" + return s + + def __repr__(self): + return self.__str__() + + +def write_instance_to_example_files(instances, tokenizer, max_seq_length, + max_predictions_per_seq, output_file, partition_number): + """Create MindRecord files from `TrainingInstance`s.""" + writer = FileWriter(output_file, int(partition_number)) + + data_schema = {"input_ids": {"type": "int64", "shape": [-1]}, + "input_mask": {"type": "int64", "shape": [-1]}, + "segment_ids": {"type": "int64", "shape": [-1]}, + "masked_lm_positions": {"type": "int64", "shape": [-1]}, + "masked_lm_ids": {"type": "int64", "shape": [-1]}, + "masked_lm_weights": {"type": "float64", "shape": [-1]}, + "next_sentence_labels": {"type": "int64", "shape": [-1]}, + } + writer.add_schema(data_schema, "zhwiki schema") + + total_written = 0 + for (inst_index, instance) in enumerate(instances): + input_ids = tokenizer.convert_tokens_to_ids(instance.tokens) + input_mask = [1] * len(input_ids) + segment_ids = list(instance.segment_ids) + assert len(input_ids) <= max_seq_length + + while len(input_ids) < max_seq_length: + input_ids.append(0) + input_mask.append(0) + segment_ids.append(0) + + assert len(input_ids) == max_seq_length + assert len(input_mask) == max_seq_length + assert len(segment_ids) == max_seq_length + + masked_lm_positions = list(instance.masked_lm_positions) + masked_lm_ids = tokenizer.convert_tokens_to_ids(instance.masked_lm_labels) + masked_lm_weights = [1.0] * len(masked_lm_ids) + + while len(masked_lm_positions) < max_predictions_per_seq: + masked_lm_positions.append(0) + masked_lm_ids.append(0) + masked_lm_weights.append(0.0) + + next_sentence_label = 1 if instance.is_random_next else 0 + + features = collections.OrderedDict() + features["input_ids"] = np.asarray(input_ids) + features["input_mask"] = np.asarray(input_mask) + features["segment_ids"] = np.asarray(segment_ids) + features["masked_lm_positions"] = np.asarray(masked_lm_positions) + features["masked_lm_ids"] = np.asarray(masked_lm_ids) + features["masked_lm_weights"] = np.asarray(masked_lm_weights) + features["next_sentence_labels"] = np.asarray([next_sentence_label]) + + total_written += 1 + + if inst_index < 20: + logging.info("*** Example ***") + logging.info("tokens: %s" % " ".join( + [tokenization.printable_text(x) for x in instance.tokens])) + + for feature_name in features.keys(): + feature = features[feature_name] + logging.info( + "%s: %s" % (feature_name, " ".join([str(x) for x in feature]))) + writer.write_raw_data([features]) + + writer.commit() + + logging.info("Wrote %d total instances", total_written) + + +def create_training_instances(input_files, tokenizer, max_seq_length, + dupe_factor, short_seq_prob, masked_lm_prob, + max_predictions_per_seq, rng, do_whole_word_mask): + """Create `TrainingInstance`s from raw text.""" + all_documents = [[]] + + # Input file format: + # (1) One sentence per line. These should ideally be actual sentences, not + # entire paragraphs or arbitrary spans of text. (Because we use the + # sentence boundaries for the "next sentence prediction" task). + # (2) Blank lines between documents. Document boundaries are needed so + # that the "next sentence prediction" task doesn't span between documents. + for input_file in input_files: + with open(input_file, "r") as reader: + while True: + line = tokenization.convert_to_unicode(reader.readline()) + if not line: + break + line = line.strip() + + # Empty lines are used as document delimiters + if not line: + all_documents.append([]) + tokens = tokenizer.tokenize(line) + if tokens: + all_documents[-1].append(tokens) + + # Remove empty documents + all_documents = [x for x in all_documents if x] + rng.shuffle(all_documents) + + vocab_words = list(tokenizer.vocab.keys()) + instances = [] + for _ in range(dupe_factor): + for document_index in range(len(all_documents)): + instances.extend( + create_instances_from_document( + all_documents, document_index, max_seq_length, short_seq_prob, + masked_lm_prob, max_predictions_per_seq, vocab_words, rng, do_whole_word_mask)) + + rng.shuffle(instances) + return instances + + +def create_instances_from_document( + all_documents, document_index, max_seq_length, short_seq_prob, + masked_lm_prob, max_predictions_per_seq, vocab_words, rng, do_whole_word_mask): + """Creates `TrainingInstance`s for a single document.""" + document = all_documents[document_index] + + # Account for [CLS], [SEP], [SEP] + max_num_tokens = max_seq_length - 3 + + # We *usually* want to fill up the entire sequence since we are padding + # to `max_seq_length` anyways, so short sequences are generally wasted + # computation. However, we *sometimes* + # (i.e., short_seq_prob == 0.1 == 10% of the time) want to use shorter + # sequences to minimize the mismatch between pre-training and fine-tuning. + # The `target_seq_length` is just a rough target however, whereas + # `max_seq_length` is a hard limit. + target_seq_length = max_num_tokens + if rng.random() < short_seq_prob: + target_seq_length = rng.randint(2, max_num_tokens) + + # We DON'T just concatenate all of the tokens from a document into a long + # sequence and choose an arbitrary split point because this would make the + # next sentence prediction task too easy. Instead, we split the input into + # segments "A" and "B" based on the actual "sentences" provided by the user + # input. + instances = [] + current_chunk = [] + current_length = 0 + i = 0 + while i < len(document): + segment = document[i] + current_chunk.append(segment) + current_length += len(segment) + if i == len(document) - 1 or current_length >= target_seq_length: + if current_chunk: + # `a_end` is how many segments from `current_chunk` go into the `A` + # (first) sentence. + a_end = 1 + if len(current_chunk) >= 2: + a_end = rng.randint(1, len(current_chunk) - 1) + + tokens_a = [] + for j in range(a_end): + tokens_a.extend(current_chunk[j]) + + tokens_b = [] + # Random next + is_random_next = False + if len(current_chunk) == 1 or rng.random() < 0.5: + is_random_next = True + target_b_length = target_seq_length - len(tokens_a) + + # This should rarely go for more than one iteration for large + # corpora. However, just to be careful, we try to make sure that + # the random document is not the same as the document + # we're processing. + for _ in range(10): + random_document_index = rng.randint(0, len(all_documents) - 1) + if random_document_index != document_index: + break + + random_document = all_documents[random_document_index] + random_start = rng.randint(0, len(random_document) - 1) + for j in range(random_start, len(random_document)): + tokens_b.extend(random_document[j]) + if len(tokens_b) >= target_b_length: + break + # We didn't actually use these segments so we "put them back" so + # they don't go to waste. + num_unused_segments = len(current_chunk) - a_end + i -= num_unused_segments + # Actual next + else: + is_random_next = False + for j in range(a_end, len(current_chunk)): + tokens_b.extend(current_chunk[j]) + truncate_seq_pair(tokens_a, tokens_b, max_num_tokens, rng) + + assert len(tokens_a) >= 1 + assert len(tokens_b) >= 1 + + tokens = [] + segment_ids = [] + tokens.append("[CLS]") + segment_ids.append(0) + for token in tokens_a: + tokens.append(token) + segment_ids.append(0) + + tokens.append("[SEP]") + segment_ids.append(0) + + for token in tokens_b: + tokens.append(token) + segment_ids.append(1) + tokens.append("[SEP]") + segment_ids.append(1) + + (tokens, masked_lm_positions, + masked_lm_labels) = create_masked_lm_predictions( + tokens, masked_lm_prob, max_predictions_per_seq, vocab_words, rng, do_whole_word_mask) + instance = TrainingInstance( + tokens=tokens, + segment_ids=segment_ids, + is_random_next=is_random_next, + masked_lm_positions=masked_lm_positions, + masked_lm_labels=masked_lm_labels) + instances.append(instance) + current_chunk = [] + current_length = 0 + i += 1 + + return instances + + +MaskedLmInstance = collections.namedtuple("MaskedLmInstance", + ["index", "label"]) + + +def create_masked_lm_predictions(tokens, masked_lm_prob, + max_predictions_per_seq, vocab_words, rng, do_whole_word_mask): + """Creates the predictions for the masked LM objective.""" + + cand_indexes = [] + for (i, token) in enumerate(tokens): + if token == "[CLS]" or token == "[SEP]": + continue + # Whole Word Masking means that if we mask all of the wordpieces + # corresponding to an original word. When a word has been split into + # WordPieces, the first token does not have any marker and any subsequence + # tokens are prefixed with ##. So whenever we see the ## token, we + # append it to the previous set of word indexes. + # + # Note that Whole Word Masking does *not* change the training code + # at all -- we still predict each WordPiece independently, softmaxed + # over the entire vocabulary. + if (do_whole_word_mask and len(cand_indexes) >= 1 and + token.startswith("##")): + cand_indexes[-1].append(i) + else: + cand_indexes.append([i]) + + rng.shuffle(cand_indexes) + + output_tokens = list(tokens) + + num_to_predict = min(max_predictions_per_seq, + max(1, int(round(len(tokens) * masked_lm_prob)))) + + masked_lms = [] + covered_indexes = set() + for index_set in cand_indexes: + if len(masked_lms) >= num_to_predict: + break + # If adding a whole-word mask would exceed the maximum number of + # predictions, then just skip this candidate. + if len(masked_lms) + len(index_set) > num_to_predict: + continue + is_any_index_covered = False + for index in index_set: + if index in covered_indexes: + is_any_index_covered = True + break + if is_any_index_covered: + continue + for index in index_set: + covered_indexes.add(index) + + masked_token = None + # 80% of the time, replace with [MASK] + if rng.random() < 0.8: + masked_token = "[MASK]" + else: + # 10% of the time, keep original + if rng.random() < 0.5: + masked_token = tokens[index] + # 10% of the time, replace with random word + else: + masked_token = vocab_words[rng.randint(0, len(vocab_words) - 1)] + + output_tokens[index] = masked_token + + masked_lms.append(MaskedLmInstance(index=index, label=tokens[index])) + assert len(masked_lms) <= num_to_predict + masked_lms = sorted(masked_lms, key=lambda x: x.index) + + masked_lm_positions = [] + masked_lm_labels = [] + for p in masked_lms: + masked_lm_positions.append(p.index) + masked_lm_labels.append(p.label) + + return (output_tokens, masked_lm_positions, masked_lm_labels) + + +def truncate_seq_pair(tokens_a, tokens_b, max_num_tokens, rng): + """Truncates a pair of sequences to a maximum sequence length.""" + while True: + total_length = len(tokens_a) + len(tokens_b) + if total_length <= max_num_tokens: + break + + trunc_tokens = tokens_a if len(tokens_a) > len(tokens_b) else tokens_b + assert len(trunc_tokens) >= 1 + + # We want to sometimes truncate from the front and sometimes from the + # back to add more randomness and avoid biases. + if rng.random() < 0.5: + del trunc_tokens[0] + else: + trunc_tokens.pop() + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--input_file", type=str, required=True, help='Input raw text file (or comma-separated list of files).') + parser.add_argument("--output_file", type=str, required=True, help='Output MindRecord file.') + parser.add_argument("--partition_number", type=int, default=1, help='The MindRecord file will be split into the number of partition.') + parser.add_argument("--vocab_file", type=str, required=True, help='The vocabulary file than the BERT model was trained on.') + parser.add_argument("--do_lower_case", type=bool, default=False, help='Whether to lower case the input text. Should be True for uncased models and False for cased models.') + parser.add_argument("--do_whole_word_mask", type=bool, default=False, help='Whether to use whole word masking rather than per-WordPiece masking.') + parser.add_argument("--max_seq_length", type=int, default=128, help='Maximum sequence length.') + parser.add_argument("--max_predictions_per_seq", type=int, default=20, help='Maximum number of masked LM predictions per sequence.') + parser.add_argument("--random_seed", type=int, default=12345, help='Random seed for data generation.') + parser.add_argument("--dupe_factor", type=int, default=10, help='Number of times to duplicate the input data (with diffrent masks).') + parser.add_argument("--masked_lm_prob", type=float, default=0.15, help='Masked LM probability.') + parser.add_argument("--short_seq_prob", type=float, default=0.1, help='Probability of creating sequences which are shorter than the maximum length.') + args = parser.parse_args() + + tokenizer = tokenization.FullTokenizer( + vocab_file=args.vocab_file, do_lower_case=args.do_lower_case) + + input_files = [] + for input_pattern in args.input_file.split(","): + input_files.append(input_pattern) + + logging.info("*** Reading from input files ***") + for input_file in input_files: + logging.info(" %s", input_file) + + rng = random.Random(args.random_seed) + instances = create_training_instances( + input_files, tokenizer, args.max_seq_length, args.dupe_factor, + args.short_seq_prob, args.masked_lm_prob, args.max_predictions_per_seq, + rng, args.do_whole_word_mask) + + write_instance_to_example_files(instances, tokenizer, args.max_seq_length, + args.max_predictions_per_seq, args.output_file, args.partition_number) + + +if __name__ == "__main__": + main() diff --git a/example/nlp_to_mindrecord/zhwiki/run.sh b/example/nlp_to_mindrecord/zhwiki/run.sh new file mode 100644 index 0000000000..4376ff0ff4 --- /dev/null +++ b/example/nlp_to_mindrecord/zhwiki/run.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +rm zhwiki.mindrecord* + +python create_pretraining_data.py \ +--input_file=./sample_text.txt \ +--output_file=zhwiki.mindrecord \ +--partition_number=4 \ +--vocab_file=./vocab.txt \ +--do_lower_case=True \ +--max_seq_length=128 \ +--max_predictions_per_seq=20 \ +--masked_lm_prob=0.15 \ +--random_seed=12345 \ +--dupe_factor=5 diff --git a/example/nlp_to_mindrecord/zhwiki/run_read.sh b/example/nlp_to_mindrecord/zhwiki/run_read.sh new file mode 100644 index 0000000000..b2d1cfb662 --- /dev/null +++ b/example/nlp_to_mindrecord/zhwiki/run_read.sh @@ -0,0 +1,17 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +python create_dataset.py --input_file=zhwiki.mindrecord0 diff --git a/example/nlp_to_mindrecord/zhwiki/sample_text.txt b/example/nlp_to_mindrecord/zhwiki/sample_text.txt new file mode 100644 index 0000000000..a42812060c --- /dev/null +++ b/example/nlp_to_mindrecord/zhwiki/sample_text.txt @@ -0,0 +1,33 @@ +This text is included to make sure Unicode is handled properly: 力加勝北区ᴵᴺᵀᵃছজটডণত +Text should be one-sentence-per-line, with empty lines between documents. +This sample text is public domain and was randomly selected from Project Guttenberg. + +The rain had only ceased with the gray streaks of morning at Blazing Star, and the settlement awoke to a moral sense of cleanliness, and the finding of forgotten knives, tin cups, and smaller camp utensils, where the heavy showers had washed away the debris and dust heaps before the cabin doors. +Indeed, it was recorded in Blazing Star that a fortunate early riser had once picked up on the highway a solid chunk of gold quartz which the rain had freed from its incumbering soil, and washed into immediate and glittering popularity. +Possibly this may have been the reason why early risers in that locality, during the rainy season, adopted a thoughtful habit of body, and seldom lifted their eyes to the rifted or india-ink washed skies above them. +"Cass" Beard had risen early that morning, but not with a view to discovery. +A leak in his cabin roof,--quite consistent with his careless, improvident habits,--had roused him at 4 A. M., with a flooded "bunk" and wet blankets. +The chips from his wood pile refused to kindle a fire to dry his bed-clothes, and he had recourse to a more provident neighbor's to supply the deficiency. +This was nearly opposite. +Mr. Cassius crossed the highway, and stopped suddenly. +Something glittered in the nearest red pool before him. +Gold, surely! +But, wonderful to relate, not an irregular, shapeless fragment of crude ore, fresh from Nature's crucible, but a bit of jeweler's handicraft in the form of a plain gold ring. +Looking at it more attentively, he saw that it bore the inscription, "May to Cass." +Like most of his fellow gold-seekers, Cass was superstitious. + +The fountain of classic wisdom, Hypatia herself. +As the ancient sage--the name is unimportant to a monk--pumped water nightly that he might study by day, so I, the guardian of cloaks and parasols, at the sacred doors of her lecture-room, imbibe celestial knowledge. +From my youth I felt in me a soul above the matter-entangled herd. +She revealed to me the glorious fact, that I am a spark of Divinity itself. +A fallen star, I am, sir!' continued he, pensively, stroking his lean stomach--'a fallen star!--fallen, if the dignity of philosophy will allow of the simile, among the hogs of the lower world--indeed, even into the hog-bucket itself. Well, after all, I will show you the way to the Archbishop's. +There is a philosophic pleasure in opening one's treasures to the modest young. +Perhaps you will assist me by carrying this basket of fruit?' And the little man jumped up, put his basket on Philammon's head, and trotted off up a neighbouring street. +Philammon followed, half contemptuous, half wondering at what this philosophy might be, which could feed the self-conceit of anything so abject as his ragged little apish guide; +but the novel roar and whirl of the street, the perpetual stream of busy faces, the line of curricles, palanquins, laden asses, camels, elephants, which met and passed him, and squeezed him up steps and into doorways, as they threaded their way through the great Moon-gate into the ample street beyond, drove everything from his mind but wondering curiosity, and a vague, helpless dread of that great living wilderness, more terrible than any dead wilderness of sand which he had left behind. +Already he longed for the repose, the silence of the Laura--for faces which knew him and smiled upon him; but it was too late to turn back now. +His guide held on for more than a mile up the great main street, crossed in the centre of the city, at right angles, by one equally magnificent, at each end of which, miles away, appeared, dim and distant over the heads of the living stream of passengers, the yellow sand-hills of the desert; +while at the end of the vista in front of them gleamed the blue harbour, through a network of countless masts. +At last they reached the quay at the opposite end of the street; +and there burst on Philammon's astonished eyes a vast semicircle of blue sea, ringed with palaces and towers. +He stopped involuntarily; and his little guide stopped also, and looked askance at the young monk, to watch the effect which that grand panorama should produce on him. diff --git a/example/nlp_to_mindrecord/zhwiki/tokenization.py b/example/nlp_to_mindrecord/zhwiki/tokenization.py new file mode 100644 index 0000000000..50e9445a19 --- /dev/null +++ b/example/nlp_to_mindrecord/zhwiki/tokenization.py @@ -0,0 +1,394 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tokenization classes.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import re +import unicodedata +import six + +# pylint: skip-file + +def validate_case_matches_checkpoint(do_lower_case, init_checkpoint): + """Checks whether the casing config is consistent with the checkpoint name.""" + + # The casing has to be passed in by the user and there is no explicit check + # as to whether it matches the checkpoint. The casing information probably + # should have been stored in the bert_config.json file, but it's not, so + # we have to heuristically detect it to validate. + + if not init_checkpoint: + return + + m = re.match("^.*?([A-Za-z0-9_-]+)/bert_model.ckpt", init_checkpoint) + if m is None: + return + + model_name = m.group(1) + + lower_models = [ + "uncased_L-24_H-1024_A-16", "uncased_L-12_H-768_A-12", + "multilingual_L-12_H-768_A-12", "chinese_L-12_H-768_A-12" + ] + + cased_models = [ + "cased_L-12_H-768_A-12", "cased_L-24_H-1024_A-16", + "multi_cased_L-12_H-768_A-12" + ] + + is_bad_config = False + if model_name in lower_models and not do_lower_case: + is_bad_config = True + actual_flag = "False" + case_name = "lowercased" + opposite_flag = "True" + + if model_name in cased_models and do_lower_case: + is_bad_config = True + actual_flag = "True" + case_name = "cased" + opposite_flag = "False" + + if is_bad_config: + raise ValueError( + "You passed in `--do_lower_case=%s` with `--init_checkpoint=%s`. " + "However, `%s` seems to be a %s model, so you " + "should pass in `--do_lower_case=%s` so that the fine-tuning matches " + "how the model was pre-training. If this error is wrong, please " + "just comment out this check." % (actual_flag, init_checkpoint, + model_name, case_name, opposite_flag)) + + +def convert_to_unicode(text): + """Converts `text` to Unicode (if it's not already), assuming utf-8 input.""" + if six.PY3: + if isinstance(text, str): + return text + elif isinstance(text, bytes): + return text.decode("utf-8", "ignore") + else: + raise ValueError("Unsupported string type: %s" % (type(text))) + elif six.PY2: + if isinstance(text, str): + return text.decode("utf-8", "ignore") + elif isinstance(text, unicode): + return text + else: + raise ValueError("Unsupported string type: %s" % (type(text))) + else: + raise ValueError("Not running on Python2 or Python 3?") + + +def printable_text(text): + """Returns text encoded in a way suitable for print or `tf.logging`.""" + + # These functions want `str` for both Python2 and Python3, but in one case + # it's a Unicode string and in the other it's a byte string. + if six.PY3: + if isinstance(text, str): + return text + elif isinstance(text, bytes): + return text.decode("utf-8", "ignore") + else: + raise ValueError("Unsupported string type: %s" % (type(text))) + elif six.PY2: + if isinstance(text, str): + return text + elif isinstance(text, unicode): + return text.encode("utf-8") + else: + raise ValueError("Unsupported string type: %s" % (type(text))) + else: + raise ValueError("Not running on Python2 or Python 3?") + + +def load_vocab(vocab_file): + """Loads a vocabulary file into a dictionary.""" + vocab = collections.OrderedDict() + index = 0 + with open(vocab_file, "r") as reader: + while True: + token = convert_to_unicode(reader.readline()) + if not token: + break + token = token.strip() + vocab[token] = index + index += 1 + return vocab + + +def convert_by_vocab(vocab, items): + """Converts a sequence of [tokens|ids] using the vocab.""" + output = [] + for item in items: + output.append(vocab[item]) + return output + + +def convert_tokens_to_ids(vocab, tokens): + return convert_by_vocab(vocab, tokens) + + +def convert_ids_to_tokens(inv_vocab, ids): + return convert_by_vocab(inv_vocab, ids) + + +def whitespace_tokenize(text): + """Runs basic whitespace cleaning and splitting on a piece of text.""" + text = text.strip() + if not text: + return [] + tokens = text.split() + return tokens + + +class FullTokenizer(object): + """Runs end-to-end tokenziation.""" + + def __init__(self, vocab_file, do_lower_case=True): + self.vocab = load_vocab(vocab_file) + self.inv_vocab = {v: k for k, v in self.vocab.items()} + self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case) + self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab) + + def tokenize(self, text): + split_tokens = [] + for token in self.basic_tokenizer.tokenize(text): + for sub_token in self.wordpiece_tokenizer.tokenize(token): + split_tokens.append(sub_token) + + return split_tokens + + def convert_tokens_to_ids(self, tokens): + return convert_by_vocab(self.vocab, tokens) + + def convert_ids_to_tokens(self, ids): + return convert_by_vocab(self.inv_vocab, ids) + + +class BasicTokenizer(object): + """Runs basic tokenization (punctuation splitting, lower casing, etc.).""" + + def __init__(self, do_lower_case=True): + """Constructs a BasicTokenizer. + Args: + do_lower_case: Whether to lower case the input. + """ + self.do_lower_case = do_lower_case + + def tokenize(self, text): + """Tokenizes a piece of text.""" + text = convert_to_unicode(text) + text = self._clean_text(text) + + # This was added on November 1st, 2018 for the multilingual and Chinese + # models. This is also applied to the English models now, but it doesn't + # matter since the English models were not trained on any Chinese data + # and generally don't have any Chinese data in them (there are Chinese + # characters in the vocabulary because Wikipedia does have some Chinese + # words in the English Wikipedia.). + text = self._tokenize_chinese_chars(text) + + orig_tokens = whitespace_tokenize(text) + split_tokens = [] + for token in orig_tokens: + if self.do_lower_case: + token = token.lower() + token = self._run_strip_accents(token) + split_tokens.extend(self._run_split_on_punc(token)) + + output_tokens = whitespace_tokenize(" ".join(split_tokens)) + return output_tokens + + def _run_strip_accents(self, text): + """Strips accents from a piece of text.""" + text = unicodedata.normalize("NFD", text) + output = [] + for char in text: + cat = unicodedata.category(char) + if cat == "Mn": + continue + output.append(char) + return "".join(output) + + def _run_split_on_punc(self, text): + """Splits punctuation on a piece of text.""" + chars = list(text) + i = 0 + start_new_word = True + output = [] + while i < len(chars): + char = chars[i] + if _is_punctuation(char): + output.append([char]) + start_new_word = True + else: + if start_new_word: + output.append([]) + start_new_word = False + output[-1].append(char) + i += 1 + + return ["".join(x) for x in output] + + def _tokenize_chinese_chars(self, text): + """Adds whitespace around any CJK character.""" + output = [] + for char in text: + cp = ord(char) + if self._is_chinese_char(cp): + output.append(" ") + output.append(char) + output.append(" ") + else: + output.append(char) + return "".join(output) + + def _is_chinese_char(self, cp): + """Checks whether CP is the codepoint of a CJK character.""" + # This defines a "chinese character" as anything in the CJK Unicode block: + # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block) + # + # Note that the CJK Unicode block is NOT all Japanese and Korean characters, + # despite its name. The modern Korean Hangul alphabet is a different block, + # as is Japanese Hiragana and Katakana. Those alphabets are used to write + # space-separated words, so they are not treated specially and handled + # like the all of the other languages. + if ((cp >= 0x4E00 and cp <= 0x9FFF) or # + (cp >= 0x3400 and cp <= 0x4DBF) or # + (cp >= 0x20000 and cp <= 0x2A6DF) or # + (cp >= 0x2A700 and cp <= 0x2B73F) or # + (cp >= 0x2B740 and cp <= 0x2B81F) or # + (cp >= 0x2B820 and cp <= 0x2CEAF) or + (cp >= 0xF900 and cp <= 0xFAFF) or # + (cp >= 0x2F800 and cp <= 0x2FA1F)): # + return True + + return False + + def _clean_text(self, text): + """Performs invalid character removal and whitespace cleanup on text.""" + output = [] + for char in text: + cp = ord(char) + if cp == 0 or cp == 0xfffd or _is_control(char): + continue + if _is_whitespace(char): + output.append(" ") + else: + output.append(char) + return "".join(output) + + +class WordpieceTokenizer(object): + """Runs WordPiece tokenziation.""" + + def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=200): + self.vocab = vocab + self.unk_token = unk_token + self.max_input_chars_per_word = max_input_chars_per_word + + def tokenize(self, text): + """Tokenizes a piece of text into its word pieces. + This uses a greedy longest-match-first algorithm to perform tokenization + using the given vocabulary. + For example: + input = "unaffable" + output = ["un", "##aff", "##able"] + Args: + text: A single token or whitespace separated tokens. This should have + already been passed through `BasicTokenizer. + Returns: + A list of wordpiece tokens. + """ + + text = convert_to_unicode(text) + + output_tokens = [] + for token in whitespace_tokenize(text): + chars = list(token) + if len(chars) > self.max_input_chars_per_word: + output_tokens.append(self.unk_token) + continue + + is_bad = False + start = 0 + sub_tokens = [] + while start < len(chars): + end = len(chars) + cur_substr = None + while start < end: + substr = "".join(chars[start:end]) + if start > 0: + substr = "##" + substr + if substr in self.vocab: + cur_substr = substr + break + end -= 1 + if cur_substr is None: + is_bad = True + break + sub_tokens.append(cur_substr) + start = end + + if is_bad: + output_tokens.append(self.unk_token) + else: + output_tokens.extend(sub_tokens) + return output_tokens + + +def _is_whitespace(char): + """Checks whether `chars` is a whitespace character.""" + # \t, \n, and \r are technically contorl characters but we treat them + # as whitespace since they are generally considered as such. + if char == " " or char == "\t" or char == "\n" or char == "\r": + return True + cat = unicodedata.category(char) + if cat == "Zs": + return True + return False + + +def _is_control(char): + """Checks whether `chars` is a control character.""" + # These are technically control characters but we count them as whitespace + # characters. + if char == "\t" or char == "\n" or char == "\r": + return False + cat = unicodedata.category(char) + if cat in ("Cc", "Cf"): + return True + return False + + +def _is_punctuation(char): + """Checks whether `chars` is a punctuation character.""" + cp = ord(char) + # We treat all non-letter/number ASCII as punctuation. + # Characters such as "^", "$", and "`" are not in the Unicode + # Punctuation class but we treat them as punctuation anyways, for + # consistency. + if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or + (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)): + return True + cat = unicodedata.category(char) + if cat.startswith("P"): + return True + return False diff --git a/example/nlp_to_mindrecord/zhwiki/vocab.txt b/example/nlp_to_mindrecord/zhwiki/vocab.txt new file mode 100644 index 0000000000..ca4f978103 --- /dev/null +++ b/example/nlp_to_mindrecord/zhwiki/vocab.txt @@ -0,0 +1,21128 @@ +[PAD] +[unused1] +[unused2] +[unused3] +[unused4] +[unused5] +[unused6] +[unused7] +[unused8] +[unused9] +[unused10] +[unused11] +[unused12] +[unused13] +[unused14] +[unused15] +[unused16] +[unused17] +[unused18] +[unused19] +[unused20] +[unused21] +[unused22] +[unused23] +[unused24] +[unused25] +[unused26] +[unused27] +[unused28] +[unused29] +[unused30] +[unused31] +[unused32] +[unused33] +[unused34] +[unused35] +[unused36] +[unused37] +[unused38] +[unused39] +[unused40] +[unused41] +[unused42] +[unused43] +[unused44] +[unused45] +[unused46] +[unused47] +[unused48] +[unused49] +[unused50] +[unused51] +[unused52] +[unused53] +[unused54] +[unused55] +[unused56] +[unused57] +[unused58] +[unused59] +[unused60] +[unused61] +[unused62] +[unused63] +[unused64] +[unused65] +[unused66] +[unused67] +[unused68] +[unused69] +[unused70] +[unused71] +[unused72] +[unused73] +[unused74] +[unused75] +[unused76] +[unused77] +[unused78] +[unused79] +[unused80] +[unused81] +[unused82] +[unused83] +[unused84] +[unused85] +[unused86] +[unused87] +[unused88] +[unused89] +[unused90] +[unused91] +[unused92] +[unused93] +[unused94] +[unused95] +[unused96] +[unused97] +[unused98] +[unused99] +[UNK] +[CLS] +[SEP] +[MASK] + + +! +" +# +$ +% +& +' +( +) +* ++ +, +- +. +/ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +: +; +< += +> +? +@ +[ +\ +] +^ +_ +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +{ +| +} +~ +£ +¤ +¥ +§ +© +« +® +° +± +² +³ +µ +· +¹ +º +» +¼ +× +ß +æ +÷ +ø +đ +ŋ +ɔ +ə +ɡ +ʰ +ˇ +ˈ +ˊ +ˋ +ˍ +ː +˙ +˚ +ˢ +α +β +γ +δ +ε +η +θ +ι +κ +λ +μ +ν +ο +π +ρ +ς +σ +τ +υ +φ +χ +ψ +ω +а +б +в +г +д +е +ж +з +и +к +л +м +н +о +п +р +с +т +у +ф +х +ц +ч +ш +ы +ь +я +і +ا +ب +ة +ت +د +ر +س +ع +ل +م +ن +ه +و +ي +۩ +ก +ง +น +ม +ย +ร +อ +า +เ +๑ +་ +ღ +ᄀ +ᄁ +ᄂ +ᄃ +ᄅ +ᄆ +ᄇ +ᄈ +ᄉ +ᄋ +ᄌ +ᄎ +ᄏ +ᄐ +ᄑ +ᄒ +ᅡ +ᅢ +ᅣ +ᅥ +ᅦ +ᅧ +ᅨ +ᅩ +ᅪ +ᅬ +ᅭ +ᅮ +ᅯ +ᅲ +ᅳ +ᅴ +ᅵ +ᆨ +ᆫ +ᆯ +ᆷ +ᆸ +ᆺ +ᆻ +ᆼ +ᗜ +ᵃ +ᵉ +ᵍ +ᵏ +ᵐ +ᵒ +ᵘ +‖ +„ +† +• +‥ +‧ +
 +‰ +′ +″ +‹ +› +※ +‿ +⁄ +ⁱ +⁺ +ⁿ +₁ +₂ +₃ +₄ +€ +℃ +№ +™ +ⅰ +ⅱ +ⅲ +ⅳ +ⅴ +← +↑ +→ +↓ +↔ +↗ +↘ +⇒ +∀ +− +∕ +∙ +√ +∞ +∟ +∠ +∣ +∥ +∩ +∮ +∶ +∼ +∽ +≈ +≒ +≡ +≤ +≥ +≦ +≧ +≪ +≫ +⊙ +⋅ +⋈ +⋯ +⌒ +① +② +③ +④ +⑤ +⑥ +⑦ +⑧ +⑨ +⑩ +⑴ +⑵ +⑶ +⑷ +⑸ +⒈ +⒉ +⒊ +⒋ +ⓒ +ⓔ +ⓘ +─ +━ +│ +┃ +┅ +┆ +┊ +┌ +└ +├ +┣ +═ +║ +╚ +╞ +╠ +╭ +╮ +╯ +╰ +╱ +╳ +▂ +▃ +▅ +▇ +█ +▉ +▋ +▌ +▍ +▎ +■ +□ +▪ +▫ +▬ +▲ +△ +▶ +► +▼ +▽ +◆ +◇ +○ +◎ +● +◕ +◠ +◢ +◤ +☀ +★ +☆ +☕ +☞ +☺ +☼ +♀ +♂ +♠ +♡ +♣ +♥ +♦ +♪ +♫ +♬ +✈ +✔ +✕ +✖ +✦ +✨ +✪ +✰ +✿ +❀ +❤ +➜ +➤ +⦿ +、 +。 +〃 +々 +〇 +〈 +〉 +《 +》 +「 +」 +『 +』 +【 +】 +〓 +〔 +〕 +〖 +〗 +〜 +〝 +〞 +ぁ +あ +ぃ +い +う +ぇ +え +お +か +き +く +け +こ +さ +し +す +せ +そ +た +ち +っ +つ +て +と +な +に +ぬ +ね +の +は +ひ +ふ +へ +ほ +ま +み +む +め +も +ゃ +や +ゅ +ゆ +ょ +よ +ら +り +る +れ +ろ +わ +を +ん +゜ +ゝ +ァ +ア +ィ +イ +ゥ +ウ +ェ +エ +ォ +オ +カ +キ +ク +ケ +コ +サ +シ +ス +セ +ソ +タ +チ +ッ +ツ +テ +ト +ナ +ニ +ヌ +ネ +ノ +ハ +ヒ +フ +ヘ +ホ +マ +ミ +ム +メ +モ +ャ +ヤ +ュ +ユ +ョ +ヨ +ラ +リ +ル +レ +ロ +ワ +ヲ +ン +ヶ +・ +ー +ヽ +ㄅ +ㄆ +ㄇ +ㄉ +ㄋ +ㄌ +ㄍ +ㄎ +ㄏ +ㄒ +ㄚ +ㄛ +ㄞ +ㄟ +ㄢ +ㄤ +ㄥ +ㄧ +ㄨ +ㆍ +㈦ +㊣ +㎡ +㗎 +一 +丁 +七 +万 +丈 +三 +上 +下 +不 +与 +丐 +丑 +专 +且 +丕 +世 +丘 +丙 +业 +丛 +东 +丝 +丞 +丟 +両 +丢 +两 +严 +並 +丧 +丨 +个 +丫 +中 +丰 +串 +临 +丶 +丸 +丹 +为 +主 +丼 +丽 +举 +丿 +乂 +乃 +久 +么 +义 +之 +乌 +乍 +乎 +乏 +乐 +乒 +乓 +乔 +乖 +乗 +乘 +乙 +乜 +九 +乞 +也 +习 +乡 +书 +乩 +买 +乱 +乳 +乾 +亀 +亂 +了 +予 +争 +事 +二 +于 +亏 +云 +互 +五 +井 +亘 +亙 +亚 +些 +亜 +亞 +亟 +亡 +亢 +交 +亥 +亦 +产 +亨 +亩 +享 +京 +亭 +亮 +亲 +亳 +亵 +人 +亿 +什 +仁 +仃 +仄 +仅 +仆 +仇 +今 +介 +仍 +从 +仏 +仑 +仓 +仔 +仕 +他 +仗 +付 +仙 +仝 +仞 +仟 +代 +令 +以 +仨 +仪 +们 +仮 +仰 +仲 +件 +价 +任 +份 +仿 +企 +伉 +伊 +伍 +伎 +伏 +伐 +休 +伕 +众 +优 +伙 +会 +伝 +伞 +伟 +传 +伢 +伤 +伦 +伪 +伫 +伯 +估 +伴 +伶 +伸 +伺 +似 +伽 +佃 +但 +佇 +佈 +位 +低 +住 +佐 +佑 +体 +佔 +何 +佗 +佘 +余 +佚 +佛 +作 +佝 +佞 +佟 +你 +佢 +佣 +佤 +佥 +佩 +佬 +佯 +佰 +佳 +併 +佶 +佻 +佼 +使 +侃 +侄 +來 +侈 +例 +侍 +侏 +侑 +侖 +侗 +供 +依 +侠 +価 +侣 +侥 +侦 +侧 +侨 +侬 +侮 +侯 +侵 +侶 +侷 +便 +係 +促 +俄 +俊 +俎 +俏 +俐 +俑 +俗 +俘 +俚 +保 +俞 +俟 +俠 +信 +俨 +俩 +俪 +俬 +俭 +修 +俯 +俱 +俳 +俸 +俺 +俾 +倆 +倉 +個 +倌 +倍 +倏 +們 +倒 +倔 +倖 +倘 +候 +倚 +倜 +借 +倡 +値 +倦 +倩 +倪 +倫 +倬 +倭 +倶 +债 +值 +倾 +偃 +假 +偈 +偉 +偌 +偎 +偏 +偕 +做 +停 +健 +側 +偵 +偶 +偷 +偻 +偽 +偿 +傀 +傅 +傍 +傑 +傘 +備 +傚 +傢 +傣 +傥 +储 +傩 +催 +傭 +傲 +傳 +債 +傷 +傻 +傾 +僅 +働 +像 +僑 +僕 +僖 +僚 +僥 +僧 +僭 +僮 +僱 +僵 +價 +僻 +儀 +儂 +億 +儆 +儉 +儋 +儒 +儕 +儘 +償 +儡 +優 +儲 +儷 +儼 +儿 +兀 +允 +元 +兄 +充 +兆 +兇 +先 +光 +克 +兌 +免 +児 +兑 +兒 +兔 +兖 +党 +兜 +兢 +入 +內 +全 +兩 +八 +公 +六 +兮 +兰 +共 +兲 +关 +兴 +兵 +其 +具 +典 +兹 +养 +兼 +兽 +冀 +内 +円 +冇 +冈 +冉 +冊 +册 +再 +冏 +冒 +冕 +冗 +写 +军 +农 +冠 +冢 +冤 +冥 +冨 +冪 +冬 +冯 +冰 +冲 +决 +况 +冶 +冷 +冻 +冼 +冽 +冾 +净 +凄 +准 +凇 +凈 +凉 +凋 +凌 +凍 +减 +凑 +凛 +凜 +凝 +几 +凡 +凤 +処 +凪 +凭 +凯 +凰 +凱 +凳 +凶 +凸 +凹 +出 +击 +函 +凿 +刀 +刁 +刃 +分 +切 +刈 +刊 +刍 +刎 +刑 +划 +列 +刘 +则 +刚 +创 +初 +删 +判 +別 +刨 +利 +刪 +别 +刮 +到 +制 +刷 +券 +刹 +刺 +刻 +刽 +剁 +剂 +剃 +則 +剉 +削 +剋 +剌 +前 +剎 +剐 +剑 +剔 +剖 +剛 +剜 +剝 +剣 +剤 +剥 +剧 +剩 +剪 +副 +割 +創 +剷 +剽 +剿 +劃 +劇 +劈 +劉 +劊 +劍 +劏 +劑 +力 +劝 +办 +功 +加 +务 +劣 +动 +助 +努 +劫 +劭 +励 +劲 +劳 +労 +劵 +効 +劾 +势 +勁 +勃 +勇 +勉 +勋 +勐 +勒 +動 +勖 +勘 +務 +勛 +勝 +勞 +募 +勢 +勤 +勧 +勳 +勵 +勸 +勺 +勻 +勾 +勿 +匀 +包 +匆 +匈 +匍 +匐 +匕 +化 +北 +匙 +匝 +匠 +匡 +匣 +匪 +匮 +匯 +匱 +匹 +区 +医 +匾 +匿 +區 +十 +千 +卅 +升 +午 +卉 +半 +卍 +华 +协 +卑 +卒 +卓 +協 +单 +卖 +南 +単 +博 +卜 +卞 +卟 +占 +卡 +卢 +卤 +卦 +卧 +卫 +卮 +卯 +印 +危 +即 +却 +卵 +卷 +卸 +卻 +卿 +厂 +厄 +厅 +历 +厉 +压 +厌 +厕 +厘 +厚 +厝 +原 +厢 +厥 +厦 +厨 +厩 +厭 +厮 +厲 +厳 +去 +县 +叁 +参 +參 +又 +叉 +及 +友 +双 +反 +収 +发 +叔 +取 +受 +变 +叙 +叛 +叟 +叠 +叡 +叢 +口 +古 +句 +另 +叨 +叩 +只 +叫 +召 +叭 +叮 +可 +台 +叱 +史 +右 +叵 +叶 +号 +司 +叹 +叻 +叼 +叽 +吁 +吃 +各 +吆 +合 +吉 +吊 +吋 +同 +名 +后 +吏 +吐 +向 +吒 +吓 +吕 +吖 +吗 +君 +吝 +吞 +吟 +吠 +吡 +否 +吧 +吨 +吩 +含 +听 +吭 +吮 +启 +吱 +吳 +吴 +吵 +吶 +吸 +吹 +吻 +吼 +吽 +吾 +呀 +呂 +呃 +呆 +呈 +告 +呋 +呎 +呐 +呓 +呕 +呗 +员 +呛 +呜 +呢 +呤 +呦 +周 +呱 +呲 +味 +呵 +呷 +呸 +呻 +呼 +命 +咀 +咁 +咂 +咄 +咆 +咋 +和 +咎 +咏 +咐 +咒 +咔 +咕 +咖 +咗 +咘 +咙 +咚 +咛 +咣 +咤 +咦 +咧 +咨 +咩 +咪 +咫 +咬 +咭 +咯 +咱 +咲 +咳 +咸 +咻 +咽 +咿 +哀 +品 +哂 +哄 +哆 +哇 +哈 +哉 +哋 +哌 +响 +哎 +哏 +哐 +哑 +哒 +哔 +哗 +哟 +員 +哥 +哦 +哧 +哨 +哩 +哪 +哭 +哮 +哲 +哺 +哼 +哽 +唁 +唄 +唆 +唇 +唉 +唏 +唐 +唑 +唔 +唠 +唤 +唧 +唬 +售 +唯 +唰 +唱 +唳 +唷 +唸 +唾 +啃 +啄 +商 +啉 +啊 +問 +啓 +啕 +啖 +啜 +啞 +啟 +啡 +啤 +啥 +啦 +啧 +啪 +啫 +啬 +啮 +啰 +啱 +啲 +啵 +啶 +啷 +啸 +啻 +啼 +啾 +喀 +喂 +喃 +善 +喆 +喇 +喉 +喊 +喋 +喎 +喏 +喔 +喘 +喙 +喚 +喜 +喝 +喟 +喧 +喪 +喫 +喬 +單 +喰 +喱 +喲 +喳 +喵 +営 +喷 +喹 +喺 +喻 +喽 +嗅 +嗆 +嗇 +嗎 +嗑 +嗒 +嗓 +嗔 +嗖 +嗚 +嗜 +嗝 +嗟 +嗡 +嗣 +嗤 +嗦 +嗨 +嗪 +嗬 +嗯 +嗰 +嗲 +嗳 +嗶 +嗷 +嗽 +嘀 +嘅 +嘆 +嘈 +嘉 +嘌 +嘍 +嘎 +嘔 +嘖 +嘗 +嘘 +嘚 +嘛 +嘜 +嘞 +嘟 +嘢 +嘣 +嘤 +嘧 +嘩 +嘭 +嘮 +嘯 +嘰 +嘱 +嘲 +嘴 +嘶 +嘸 +嘹 +嘻 +嘿 +噁 +噌 +噎 +噓 +噔 +噗 +噙 +噜 +噠 +噢 +噤 +器 +噩 +噪 +噬 +噱 +噴 +噶 +噸 +噹 +噻 +噼 +嚀 +嚇 +嚎 +嚏 +嚐 +嚓 +嚕 +嚟 +嚣 +嚥 +嚨 +嚮 +嚴 +嚷 +嚼 +囂 +囉 +囊 +囍 +囑 +囔 +囗 +囚 +四 +囝 +回 +囟 +因 +囡 +团 +団 +囤 +囧 +囪 +囫 +园 +困 +囱 +囲 +図 +围 +囹 +固 +国 +图 +囿 +圃 +圄 +圆 +圈 +國 +圍 +圏 +園 +圓 +圖 +團 +圜 +土 +圣 +圧 +在 +圩 +圭 +地 +圳 +场 +圻 +圾 +址 +坂 +均 +坊 +坍 +坎 +坏 +坐 +坑 +块 +坚 +坛 +坝 +坞 +坟 +坠 +坡 +坤 +坦 +坨 +坪 +坯 +坳 +坵 +坷 +垂 +垃 +垄 +型 +垒 +垚 +垛 +垠 +垢 +垣 +垦 +垩 +垫 +垭 +垮 +垵 +埂 +埃 +埋 +城 +埔 +埕 +埗 +域 +埠 +埤 +埵 +執 +埸 +培 +基 +埼 +堀 +堂 +堃 +堅 +堆 +堇 +堑 +堕 +堙 +堡 +堤 +堪 +堯 +堰 +報 +場 +堵 +堺 +堿 +塊 +塌 +塑 +塔 +塗 +塘 +塚 +塞 +塢 +塩 +填 +塬 +塭 +塵 +塾 +墀 +境 +墅 +墉 +墊 +墒 +墓 +増 +墘 +墙 +墜 +增 +墟 +墨 +墩 +墮 +墳 +墻 +墾 +壁 +壅 +壆 +壇 +壊 +壑 +壓 +壕 +壘 +壞 +壟 +壢 +壤 +壩 +士 +壬 +壮 +壯 +声 +売 +壳 +壶 +壹 +壺 +壽 +处 +备 +変 +复 +夏 +夔 +夕 +外 +夙 +多 +夜 +够 +夠 +夢 +夥 +大 +天 +太 +夫 +夭 +央 +夯 +失 +头 +夷 +夸 +夹 +夺 +夾 +奂 +奄 +奇 +奈 +奉 +奋 +奎 +奏 +奐 +契 +奔 +奕 +奖 +套 +奘 +奚 +奠 +奢 +奥 +奧 +奪 +奬 +奮 +女 +奴 +奶 +奸 +她 +好 +如 +妃 +妄 +妆 +妇 +妈 +妊 +妍 +妒 +妓 +妖 +妘 +妙 +妝 +妞 +妣 +妤 +妥 +妨 +妩 +妪 +妮 +妲 +妳 +妹 +妻 +妾 +姆 +姉 +姊 +始 +姍 +姐 +姑 +姒 +姓 +委 +姗 +姚 +姜 +姝 +姣 +姥 +姦 +姨 +姪 +姫 +姬 +姹 +姻 +姿 +威 +娃 +娄 +娅 +娆 +娇 +娉 +娑 +娓 +娘 +娛 +娜 +娟 +娠 +娣 +娥 +娩 +娱 +娲 +娴 +娶 +娼 +婀 +婁 +婆 +婉 +婊 +婕 +婚 +婢 +婦 +婧 +婪 +婭 +婴 +婵 +婶 +婷 +婺 +婿 +媒 +媚 +媛 +媞 +媧 +媲 +媳 +媽 +媾 +嫁 +嫂 +嫉 +嫌 +嫑 +嫔 +嫖 +嫘 +嫚 +嫡 +嫣 +嫦 +嫩 +嫲 +嫵 +嫻 +嬅 +嬉 +嬌 +嬗 +嬛 +嬢 +嬤 +嬪 +嬰 +嬴 +嬷 +嬸 +嬿 +孀 +孃 +子 +孑 +孔 +孕 +孖 +字 +存 +孙 +孚 +孛 +孜 +孝 +孟 +孢 +季 +孤 +学 +孩 +孪 +孫 +孬 +孰 +孱 +孳 +孵 +學 +孺 +孽 +孿 +宁 +它 +宅 +宇 +守 +安 +宋 +完 +宏 +宓 +宕 +宗 +官 +宙 +定 +宛 +宜 +宝 +实 +実 +宠 +审 +客 +宣 +室 +宥 +宦 +宪 +宫 +宮 +宰 +害 +宴 +宵 +家 +宸 +容 +宽 +宾 +宿 +寂 +寄 +寅 +密 +寇 +富 +寐 +寒 +寓 +寛 +寝 +寞 +察 +寡 +寢 +寥 +實 +寧 +寨 +審 +寫 +寬 +寮 +寰 +寵 +寶 +寸 +对 +寺 +寻 +导 +対 +寿 +封 +専 +射 +将 +將 +專 +尉 +尊 +尋 +對 +導 +小 +少 +尔 +尕 +尖 +尘 +尚 +尝 +尤 +尧 +尬 +就 +尴 +尷 +尸 +尹 +尺 +尻 +尼 +尽 +尾 +尿 +局 +屁 +层 +屄 +居 +屆 +屈 +屉 +届 +屋 +屌 +屍 +屎 +屏 +屐 +屑 +展 +屜 +属 +屠 +屡 +屢 +層 +履 +屬 +屯 +山 +屹 +屿 +岀 +岁 +岂 +岌 +岐 +岑 +岔 +岖 +岗 +岘 +岙 +岚 +岛 +岡 +岩 +岫 +岬 +岭 +岱 +岳 +岷 +岸 +峇 +峋 +峒 +峙 +峡 +峤 +峥 +峦 +峨 +峪 +峭 +峯 +峰 +峴 +島 +峻 +峽 +崁 +崂 +崆 +崇 +崎 +崑 +崔 +崖 +崗 +崙 +崛 +崧 +崩 +崭 +崴 +崽 +嵇 +嵊 +嵋 +嵌 +嵐 +嵘 +嵩 +嵬 +嵯 +嶂 +嶄 +嶇 +嶋 +嶙 +嶺 +嶼 +嶽 +巅 +巍 +巒 +巔 +巖 +川 +州 +巡 +巢 +工 +左 +巧 +巨 +巩 +巫 +差 +己 +已 +巳 +巴 +巷 +巻 +巽 +巾 +巿 +币 +市 +布 +帅 +帆 +师 +希 +帐 +帑 +帕 +帖 +帘 +帚 +帛 +帜 +帝 +帥 +带 +帧 +師 +席 +帮 +帯 +帰 +帳 +帶 +帷 +常 +帼 +帽 +幀 +幂 +幄 +幅 +幌 +幔 +幕 +幟 +幡 +幢 +幣 +幫 +干 +平 +年 +并 +幸 +幹 +幺 +幻 +幼 +幽 +幾 +广 +庁 +広 +庄 +庆 +庇 +床 +序 +庐 +库 +应 +底 +庖 +店 +庙 +庚 +府 +庞 +废 +庠 +度 +座 +庫 +庭 +庵 +庶 +康 +庸 +庹 +庾 +廁 +廂 +廃 +廈 +廉 +廊 +廓 +廖 +廚 +廝 +廟 +廠 +廢 +廣 +廬 +廳 +延 +廷 +建 +廿 +开 +弁 +异 +弃 +弄 +弈 +弊 +弋 +式 +弑 +弒 +弓 +弔 +引 +弗 +弘 +弛 +弟 +张 +弥 +弦 +弧 +弩 +弭 +弯 +弱 +張 +強 +弹 +强 +弼 +弾 +彅 +彆 +彈 +彌 +彎 +归 +当 +录 +彗 +彙 +彝 +形 +彤 +彥 +彦 +彧 +彩 +彪 +彫 +彬 +彭 +彰 +影 +彷 +役 +彻 +彼 +彿 +往 +征 +径 +待 +徇 +很 +徉 +徊 +律 +後 +徐 +徑 +徒 +従 +徕 +得 +徘 +徙 +徜 +從 +徠 +御 +徨 +復 +循 +徬 +微 +徳 +徴 +徵 +德 +徹 +徼 +徽 +心 +必 +忆 +忌 +忍 +忏 +忐 +忑 +忒 +忖 +志 +忘 +忙 +応 +忠 +忡 +忤 +忧 +忪 +快 +忱 +念 +忻 +忽 +忿 +怀 +态 +怂 +怅 +怆 +怎 +怏 +怒 +怔 +怕 +怖 +怙 +怜 +思 +怠 +怡 +急 +怦 +性 +怨 +怪 +怯 +怵 +总 +怼 +恁 +恃 +恆 +恋 +恍 +恐 +恒 +恕 +恙 +恚 +恢 +恣 +恤 +恥 +恨 +恩 +恪 +恫 +恬 +恭 +息 +恰 +恳 +恵 +恶 +恸 +恺 +恻 +恼 +恿 +悄 +悅 +悉 +悌 +悍 +悔 +悖 +悚 +悟 +悠 +患 +悦 +您 +悩 +悪 +悬 +悯 +悱 +悲 +悴 +悵 +悶 +悸 +悻 +悼 +悽 +情 +惆 +惇 +惊 +惋 +惑 +惕 +惘 +惚 +惜 +惟 +惠 +惡 +惦 +惧 +惨 +惩 +惫 +惬 +惭 +惮 +惯 +惰 +惱 +想 +惴 +惶 +惹 +惺 +愁 +愆 +愈 +愉 +愍 +意 +愕 +愚 +愛 +愜 +感 +愣 +愤 +愧 +愫 +愷 +愿 +慄 +慈 +態 +慌 +慎 +慑 +慕 +慘 +慚 +慟 +慢 +慣 +慧 +慨 +慫 +慮 +慰 +慳 +慵 +慶 +慷 +慾 +憂 +憊 +憋 +憎 +憐 +憑 +憔 +憚 +憤 +憧 +憨 +憩 +憫 +憬 +憲 +憶 +憾 +懂 +懇 +懈 +應 +懊 +懋 +懑 +懒 +懦 +懲 +懵 +懶 +懷 +懸 +懺 +懼 +懾 +懿 +戀 +戈 +戊 +戌 +戍 +戎 +戏 +成 +我 +戒 +戕 +或 +战 +戚 +戛 +戟 +戡 +戦 +截 +戬 +戮 +戰 +戲 +戳 +戴 +戶 +户 +戸 +戻 +戾 +房 +所 +扁 +扇 +扈 +扉 +手 +才 +扎 +扑 +扒 +打 +扔 +払 +托 +扛 +扣 +扦 +执 +扩 +扪 +扫 +扬 +扭 +扮 +扯 +扰 +扱 +扳 +扶 +批 +扼 +找 +承 +技 +抄 +抉 +把 +抑 +抒 +抓 +投 +抖 +抗 +折 +抚 +抛 +抜 +択 +抟 +抠 +抡 +抢 +护 +报 +抨 +披 +抬 +抱 +抵 +抹 +押 +抽 +抿 +拂 +拄 +担 +拆 +拇 +拈 +拉 +拋 +拌 +拍 +拎 +拐 +拒 +拓 +拔 +拖 +拗 +拘 +拙 +拚 +招 +拜 +拟 +拡 +拢 +拣 +拥 +拦 +拧 +拨 +择 +括 +拭 +拮 +拯 +拱 +拳 +拴 +拷 +拼 +拽 +拾 +拿 +持 +挂 +指 +挈 +按 +挎 +挑 +挖 +挙 +挚 +挛 +挝 +挞 +挟 +挠 +挡 +挣 +挤 +挥 +挨 +挪 +挫 +振 +挲 +挹 +挺 +挽 +挾 +捂 +捅 +捆 +捉 +捋 +捌 +捍 +捎 +捏 +捐 +捕 +捞 +损 +捡 +换 +捣 +捧 +捨 +捩 +据 +捱 +捲 +捶 +捷 +捺 +捻 +掀 +掂 +掃 +掇 +授 +掉 +掌 +掏 +掐 +排 +掖 +掘 +掙 +掛 +掠 +採 +探 +掣 +接 +控 +推 +掩 +措 +掬 +掰 +掲 +掳 +掴 +掷 +掸 +掺 +揀 +揃 +揄 +揆 +揉 +揍 +描 +提 +插 +揖 +揚 +換 +握 +揣 +揩 +揪 +揭 +揮 +援 +揶 +揸 +揹 +揽 +搀 +搁 +搂 +搅 +損 +搏 +搐 +搓 +搔 +搖 +搗 +搜 +搞 +搡 +搪 +搬 +搭 +搵 +搶 +携 +搽 +摀 +摁 +摄 +摆 +摇 +摈 +摊 +摒 +摔 +摘 +摞 +摟 +摧 +摩 +摯 +摳 +摸 +摹 +摺 +摻 +撂 +撃 +撅 +撇 +撈 +撐 +撑 +撒 +撓 +撕 +撚 +撞 +撤 +撥 +撩 +撫 +撬 +播 +撮 +撰 +撲 +撵 +撷 +撸 +撻 +撼 +撿 +擀 +擁 +擂 +擄 +擅 +擇 +擊 +擋 +操 +擎 +擒 +擔 +擘 +據 +擞 +擠 +擡 +擢 +擦 +擬 +擰 +擱 +擲 +擴 +擷 +擺 +擼 +擾 +攀 +攏 +攒 +攔 +攘 +攙 +攜 +攝 +攞 +攢 +攣 +攤 +攥 +攪 +攫 +攬 +支 +收 +攸 +改 +攻 +放 +政 +故 +效 +敌 +敍 +敎 +敏 +救 +敕 +敖 +敗 +敘 +教 +敛 +敝 +敞 +敢 +散 +敦 +敬 +数 +敲 +整 +敵 +敷 +數 +斂 +斃 +文 +斋 +斌 +斎 +斐 +斑 +斓 +斗 +料 +斛 +斜 +斟 +斡 +斤 +斥 +斧 +斩 +斫 +斬 +断 +斯 +新 +斷 +方 +於 +施 +旁 +旃 +旅 +旋 +旌 +旎 +族 +旖 +旗 +无 +既 +日 +旦 +旧 +旨 +早 +旬 +旭 +旮 +旱 +时 +旷 +旺 +旻 +昀 +昂 +昆 +昇 +昉 +昊 +昌 +明 +昏 +易 +昔 +昕 +昙 +星 +映 +春 +昧 +昨 +昭 +是 +昱 +昴 +昵 +昶 +昼 +显 +晁 +時 +晃 +晉 +晋 +晌 +晏 +晒 +晓 +晔 +晕 +晖 +晗 +晚 +晝 +晞 +晟 +晤 +晦 +晨 +晩 +普 +景 +晰 +晴 +晶 +晷 +智 +晾 +暂 +暄 +暇 +暈 +暉 +暌 +暐 +暑 +暖 +暗 +暝 +暢 +暧 +暨 +暫 +暮 +暱 +暴 +暸 +暹 +曄 +曆 +曇 +曉 +曖 +曙 +曜 +曝 +曠 +曦 +曬 +曰 +曲 +曳 +更 +書 +曹 +曼 +曾 +替 +最 +會 +月 +有 +朋 +服 +朐 +朔 +朕 +朗 +望 +朝 +期 +朦 +朧 +木 +未 +末 +本 +札 +朮 +术 +朱 +朴 +朵 +机 +朽 +杀 +杂 +权 +杆 +杈 +杉 +李 +杏 +材 +村 +杓 +杖 +杜 +杞 +束 +杠 +条 +来 +杨 +杭 +杯 +杰 +東 +杳 +杵 +杷 +杼 +松 +板 +极 +构 +枇 +枉 +枋 +析 +枕 +林 +枚 +果 +枝 +枢 +枣 +枪 +枫 +枭 +枯 +枰 +枱 +枳 +架 +枷 +枸 +柄 +柏 +某 +柑 +柒 +染 +柔 +柘 +柚 +柜 +柞 +柠 +柢 +查 +柩 +柬 +柯 +柱 +柳 +柴 +柵 +査 +柿 +栀 +栃 +栄 +栅 +标 +栈 +栉 +栋 +栎 +栏 +树 +栓 +栖 +栗 +校 +栩 +株 +样 +核 +根 +格 +栽 +栾 +桀 +桁 +桂 +桃 +桅 +框 +案 +桉 +桌 +桎 +桐 +桑 +桓 +桔 +桜 +桠 +桡 +桢 +档 +桥 +桦 +桧 +桨 +桩 +桶 +桿 +梁 +梅 +梆 +梏 +梓 +梗 +條 +梟 +梢 +梦 +梧 +梨 +梭 +梯 +械 +梳 +梵 +梶 +检 +棂 +棄 +棉 +棋 +棍 +棒 +棕 +棗 +棘 +棚 +棟 +棠 +棣 +棧 +森 +棱 +棲 +棵 +棹 +棺 +椁 +椅 +椋 +植 +椎 +椒 +検 +椪 +椭 +椰 +椹 +椽 +椿 +楂 +楊 +楓 +楔 +楚 +楝 +楞 +楠 +楣 +楨 +楫 +業 +楮 +極 +楷 +楸 +楹 +楼 +楽 +概 +榄 +榆 +榈 +榉 +榔 +榕 +榖 +榛 +榜 +榨 +榫 +榭 +榮 +榱 +榴 +榷 +榻 +槁 +槃 +構 +槌 +槍 +槎 +槐 +槓 +様 +槛 +槟 +槤 +槭 +槲 +槳 +槻 +槽 +槿 +樁 +樂 +樊 +樑 +樓 +標 +樞 +樟 +模 +樣 +権 +横 +樫 +樯 +樱 +樵 +樸 +樹 +樺 +樽 +樾 +橄 +橇 +橋 +橐 +橘 +橙 +機 +橡 +橢 +橫 +橱 +橹 +橼 +檀 +檄 +檎 +檐 +檔 +檗 +檜 +檢 +檬 +檯 +檳 +檸 +檻 +櫃 +櫚 +櫛 +櫥 +櫸 +櫻 +欄 +權 +欒 +欖 +欠 +次 +欢 +欣 +欧 +欲 +欸 +欺 +欽 +款 +歆 +歇 +歉 +歌 +歎 +歐 +歓 +歙 +歛 +歡 +止 +正 +此 +步 +武 +歧 +歩 +歪 +歯 +歲 +歳 +歴 +歷 +歸 +歹 +死 +歼 +殁 +殃 +殆 +殇 +殉 +殊 +残 +殒 +殓 +殖 +殘 +殞 +殡 +殤 +殭 +殯 +殲 +殴 +段 +殷 +殺 +殼 +殿 +毀 +毁 +毂 +毅 +毆 +毋 +母 +毎 +每 +毒 +毓 +比 +毕 +毗 +毘 +毙 +毛 +毡 +毫 +毯 +毽 +氈 +氏 +氐 +民 +氓 +气 +氖 +気 +氙 +氛 +氟 +氡 +氢 +氣 +氤 +氦 +氧 +氨 +氪 +氫 +氮 +氯 +氰 +氲 +水 +氷 +永 +氹 +氾 +汀 +汁 +求 +汆 +汇 +汉 +汎 +汐 +汕 +汗 +汙 +汛 +汝 +汞 +江 +池 +污 +汤 +汨 +汩 +汪 +汰 +汲 +汴 +汶 +汹 +決 +汽 +汾 +沁 +沂 +沃 +沅 +沈 +沉 +沌 +沏 +沐 +沒 +沓 +沖 +沙 +沛 +沟 +没 +沢 +沣 +沥 +沦 +沧 +沪 +沫 +沭 +沮 +沱 +河 +沸 +油 +治 +沼 +沽 +沾 +沿 +況 +泄 +泉 +泊 +泌 +泓 +法 +泗 +泛 +泞 +泠 +泡 +波 +泣 +泥 +注 +泪 +泫 +泮 +泯 +泰 +泱 +泳 +泵 +泷 +泸 +泻 +泼 +泽 +泾 +洁 +洄 +洋 +洒 +洗 +洙 +洛 +洞 +津 +洩 +洪 +洮 +洱 +洲 +洵 +洶 +洸 +洹 +活 +洼 +洽 +派 +流 +浃 +浄 +浅 +浆 +浇 +浊 +测 +济 +浏 +浑 +浒 +浓 +浔 +浙 +浚 +浜 +浣 +浦 +浩 +浪 +浬 +浮 +浯 +浴 +海 +浸 +涂 +涅 +涇 +消 +涉 +涌 +涎 +涓 +涔 +涕 +涙 +涛 +涝 +涞 +涟 +涠 +涡 +涣 +涤 +润 +涧 +涨 +涩 +涪 +涮 +涯 +液 +涵 +涸 +涼 +涿 +淀 +淄 +淅 +淆 +淇 +淋 +淌 +淑 +淒 +淖 +淘 +淙 +淚 +淞 +淡 +淤 +淦 +淨 +淩 +淪 +淫 +淬 +淮 +深 +淳 +淵 +混 +淹 +淺 +添 +淼 +清 +済 +渉 +渊 +渋 +渍 +渎 +渐 +渔 +渗 +渙 +渚 +減 +渝 +渠 +渡 +渣 +渤 +渥 +渦 +温 +測 +渭 +港 +渲 +渴 +游 +渺 +渾 +湃 +湄 +湊 +湍 +湖 +湘 +湛 +湟 +湧 +湫 +湮 +湯 +湳 +湾 +湿 +満 +溃 +溅 +溉 +溏 +源 +準 +溜 +溝 +溟 +溢 +溥 +溧 +溪 +溫 +溯 +溱 +溴 +溶 +溺 +溼 +滁 +滂 +滄 +滅 +滇 +滋 +滌 +滑 +滓 +滔 +滕 +滙 +滚 +滝 +滞 +滟 +满 +滢 +滤 +滥 +滦 +滨 +滩 +滬 +滯 +滲 +滴 +滷 +滸 +滾 +滿 +漁 +漂 +漆 +漉 +漏 +漓 +演 +漕 +漠 +漢 +漣 +漩 +漪 +漫 +漬 +漯 +漱 +漲 +漳 +漸 +漾 +漿 +潆 +潇 +潋 +潍 +潑 +潔 +潘 +潛 +潜 +潞 +潟 +潢 +潤 +潦 +潧 +潭 +潮 +潰 +潴 +潸 +潺 +潼 +澀 +澄 +澆 +澈 +澍 +澎 +澗 +澜 +澡 +澤 +澧 +澱 +澳 +澹 +激 +濁 +濂 +濃 +濑 +濒 +濕 +濘 +濛 +濟 +濠 +濡 +濤 +濫 +濬 +濮 +濯 +濱 +濺 +濾 +瀅 +瀆 +瀉 +瀋 +瀏 +瀑 +瀕 +瀘 +瀚 +瀛 +瀝 +瀞 +瀟 +瀧 +瀨 +瀬 +瀰 +瀾 +灌 +灏 +灑 +灘 +灝 +灞 +灣 +火 +灬 +灭 +灯 +灰 +灵 +灶 +灸 +灼 +災 +灾 +灿 +炀 +炁 +炅 +炉 +炊 +炎 +炒 +炔 +炕 +炖 +炙 +炜 +炫 +炬 +炭 +炮 +炯 +炳 +炷 +炸 +点 +為 +炼 +炽 +烁 +烂 +烃 +烈 +烊 +烏 +烘 +烙 +烛 +烟 +烤 +烦 +烧 +烨 +烩 +烫 +烬 +热 +烯 +烷 +烹 +烽 +焉 +焊 +焕 +焖 +焗 +焘 +焙 +焚 +焜 +無 +焦 +焯 +焰 +焱 +然 +焼 +煅 +煉 +煊 +煌 +煎 +煒 +煖 +煙 +煜 +煞 +煤 +煥 +煦 +照 +煨 +煩 +煮 +煲 +煸 +煽 +熄 +熊 +熏 +熒 +熔 +熙 +熟 +熠 +熨 +熬 +熱 +熵 +熹 +熾 +燁 +燃 +燄 +燈 +燉 +燊 +燎 +燒 +燔 +燕 +燙 +燜 +營 +燥 +燦 +燧 +燭 +燮 +燴 +燻 +燼 +燿 +爆 +爍 +爐 +爛 +爪 +爬 +爭 +爰 +爱 +爲 +爵 +父 +爷 +爸 +爹 +爺 +爻 +爽 +爾 +牆 +片 +版 +牌 +牍 +牒 +牙 +牛 +牝 +牟 +牠 +牡 +牢 +牦 +牧 +物 +牯 +牲 +牴 +牵 +特 +牺 +牽 +犀 +犁 +犄 +犊 +犍 +犒 +犢 +犧 +犬 +犯 +状 +犷 +犸 +犹 +狀 +狂 +狄 +狈 +狎 +狐 +狒 +狗 +狙 +狞 +狠 +狡 +狩 +独 +狭 +狮 +狰 +狱 +狸 +狹 +狼 +狽 +猎 +猕 +猖 +猗 +猙 +猛 +猜 +猝 +猥 +猩 +猪 +猫 +猬 +献 +猴 +猶 +猷 +猾 +猿 +獄 +獅 +獎 +獐 +獒 +獗 +獠 +獣 +獨 +獭 +獰 +獲 +獵 +獷 +獸 +獺 +獻 +獼 +獾 +玄 +率 +玉 +王 +玑 +玖 +玛 +玟 +玠 +玥 +玩 +玫 +玮 +环 +现 +玲 +玳 +玷 +玺 +玻 +珀 +珂 +珅 +珈 +珉 +珊 +珍 +珏 +珐 +珑 +珙 +珞 +珠 +珣 +珥 +珩 +珪 +班 +珮 +珲 +珺 +現 +球 +琅 +理 +琇 +琉 +琊 +琍 +琏 +琐 +琛 +琢 +琥 +琦 +琨 +琪 +琬 +琮 +琰 +琲 +琳 +琴 +琵 +琶 +琺 +琼 +瑀 +瑁 +瑄 +瑋 +瑕 +瑗 +瑙 +瑚 +瑛 +瑜 +瑞 +瑟 +瑠 +瑣 +瑤 +瑩 +瑪 +瑯 +瑰 +瑶 +瑾 +璀 +璁 +璃 +璇 +璉 +璋 +璎 +璐 +璜 +璞 +璟 +璧 +璨 +環 +璽 +璿 +瓊 +瓏 +瓒 +瓜 +瓢 +瓣 +瓤 +瓦 +瓮 +瓯 +瓴 +瓶 +瓷 +甄 +甌 +甕 +甘 +甙 +甚 +甜 +生 +產 +産 +甥 +甦 +用 +甩 +甫 +甬 +甭 +甯 +田 +由 +甲 +申 +电 +男 +甸 +町 +画 +甾 +畀 +畅 +界 +畏 +畑 +畔 +留 +畜 +畝 +畢 +略 +畦 +番 +畫 +異 +畲 +畳 +畴 +當 +畸 +畹 +畿 +疆 +疇 +疊 +疏 +疑 +疔 +疖 +疗 +疙 +疚 +疝 +疟 +疡 +疣 +疤 +疥 +疫 +疮 +疯 +疱 +疲 +疳 +疵 +疸 +疹 +疼 +疽 +疾 +痂 +病 +症 +痈 +痉 +痊 +痍 +痒 +痔 +痕 +痘 +痙 +痛 +痞 +痠 +痢 +痣 +痤 +痧 +痨 +痪 +痫 +痰 +痱 +痴 +痹 +痺 +痼 +痿 +瘀 +瘁 +瘋 +瘍 +瘓 +瘘 +瘙 +瘟 +瘠 +瘡 +瘢 +瘤 +瘦 +瘧 +瘩 +瘪 +瘫 +瘴 +瘸 +瘾 +療 +癇 +癌 +癒 +癖 +癜 +癞 +癡 +癢 +癣 +癥 +癫 +癬 +癮 +癱 +癲 +癸 +発 +登 +發 +白 +百 +皂 +的 +皆 +皇 +皈 +皋 +皎 +皑 +皓 +皖 +皙 +皚 +皮 +皰 +皱 +皴 +皺 +皿 +盂 +盃 +盅 +盆 +盈 +益 +盎 +盏 +盐 +监 +盒 +盔 +盖 +盗 +盘 +盛 +盜 +盞 +盟 +盡 +監 +盤 +盥 +盧 +盪 +目 +盯 +盱 +盲 +直 +相 +盹 +盼 +盾 +省 +眈 +眉 +看 +県 +眙 +眞 +真 +眠 +眦 +眨 +眩 +眯 +眶 +眷 +眸 +眺 +眼 +眾 +着 +睁 +睇 +睏 +睐 +睑 +睛 +睜 +睞 +睡 +睢 +督 +睥 +睦 +睨 +睪 +睫 +睬 +睹 +睽 +睾 +睿 +瞄 +瞅 +瞇 +瞋 +瞌 +瞎 +瞑 +瞒 +瞓 +瞞 +瞟 +瞠 +瞥 +瞧 +瞩 +瞪 +瞬 +瞭 +瞰 +瞳 +瞻 +瞼 +瞿 +矇 +矍 +矗 +矚 +矛 +矜 +矢 +矣 +知 +矩 +矫 +短 +矮 +矯 +石 +矶 +矽 +矾 +矿 +码 +砂 +砌 +砍 +砒 +研 +砖 +砗 +砚 +砝 +砣 +砥 +砧 +砭 +砰 +砲 +破 +砷 +砸 +砺 +砼 +砾 +础 +硅 +硐 +硒 +硕 +硝 +硫 +硬 +确 +硯 +硼 +碁 +碇 +碉 +碌 +碍 +碎 +碑 +碓 +碗 +碘 +碚 +碛 +碟 +碣 +碧 +碩 +碰 +碱 +碳 +碴 +確 +碼 +碾 +磁 +磅 +磊 +磋 +磐 +磕 +磚 +磡 +磨 +磬 +磯 +磲 +磷 +磺 +礁 +礎 +礙 +礡 +礦 +礪 +礫 +礴 +示 +礼 +社 +祀 +祁 +祂 +祇 +祈 +祉 +祎 +祐 +祕 +祖 +祗 +祚 +祛 +祜 +祝 +神 +祟 +祠 +祢 +祥 +票 +祭 +祯 +祷 +祸 +祺 +祿 +禀 +禁 +禄 +禅 +禍 +禎 +福 +禛 +禦 +禧 +禪 +禮 +禱 +禹 +禺 +离 +禽 +禾 +禿 +秀 +私 +秃 +秆 +秉 +秋 +种 +科 +秒 +秘 +租 +秣 +秤 +秦 +秧 +秩 +秭 +积 +称 +秸 +移 +秽 +稀 +稅 +程 +稍 +税 +稔 +稗 +稚 +稜 +稞 +稟 +稠 +稣 +種 +稱 +稲 +稳 +稷 +稹 +稻 +稼 +稽 +稿 +穀 +穂 +穆 +穌 +積 +穎 +穗 +穢 +穩 +穫 +穴 +究 +穷 +穹 +空 +穿 +突 +窃 +窄 +窈 +窍 +窑 +窒 +窓 +窕 +窖 +窗 +窘 +窜 +窝 +窟 +窠 +窥 +窦 +窨 +窩 +窪 +窮 +窯 +窺 +窿 +竄 +竅 +竇 +竊 +立 +竖 +站 +竜 +竞 +竟 +章 +竣 +童 +竭 +端 +競 +竹 +竺 +竽 +竿 +笃 +笆 +笈 +笋 +笏 +笑 +笔 +笙 +笛 +笞 +笠 +符 +笨 +第 +笹 +笺 +笼 +筆 +等 +筊 +筋 +筍 +筏 +筐 +筑 +筒 +答 +策 +筛 +筝 +筠 +筱 +筲 +筵 +筷 +筹 +签 +简 +箇 +箋 +箍 +箏 +箐 +箔 +箕 +算 +箝 +管 +箩 +箫 +箭 +箱 +箴 +箸 +節 +篁 +範 +篆 +篇 +築 +篑 +篓 +篙 +篝 +篠 +篡 +篤 +篩 +篪 +篮 +篱 +篷 +簇 +簌 +簍 +簡 +簦 +簧 +簪 +簫 +簷 +簸 +簽 +簾 +簿 +籁 +籃 +籌 +籍 +籐 +籟 +籠 +籤 +籬 +籮 +籲 +米 +类 +籼 +籽 +粄 +粉 +粑 +粒 +粕 +粗 +粘 +粟 +粤 +粥 +粧 +粪 +粮 +粱 +粲 +粳 +粵 +粹 +粼 +粽 +精 +粿 +糅 +糊 +糍 +糕 +糖 +糗 +糙 +糜 +糞 +糟 +糠 +糧 +糬 +糯 +糰 +糸 +系 +糾 +紀 +紂 +約 +紅 +紉 +紊 +紋 +納 +紐 +紓 +純 +紗 +紘 +紙 +級 +紛 +紜 +素 +紡 +索 +紧 +紫 +紮 +累 +細 +紳 +紹 +紺 +終 +絃 +組 +絆 +経 +結 +絕 +絞 +絡 +絢 +給 +絨 +絮 +統 +絲 +絳 +絵 +絶 +絹 +綁 +綏 +綑 +經 +継 +続 +綜 +綠 +綢 +綦 +綫 +綬 +維 +綱 +網 +綴 +綵 +綸 +綺 +綻 +綽 +綾 +綿 +緊 +緋 +総 +緑 +緒 +緘 +線 +緝 +緞 +締 +緣 +編 +緩 +緬 +緯 +練 +緹 +緻 +縁 +縄 +縈 +縛 +縝 +縣 +縫 +縮 +縱 +縴 +縷 +總 +績 +繁 +繃 +繆 +繇 +繋 +織 +繕 +繚 +繞 +繡 +繩 +繪 +繫 +繭 +繳 +繹 +繼 +繽 +纂 +續 +纍 +纏 +纓 +纔 +纖 +纜 +纠 +红 +纣 +纤 +约 +级 +纨 +纪 +纫 +纬 +纭 +纯 +纰 +纱 +纲 +纳 +纵 +纶 +纷 +纸 +纹 +纺 +纽 +纾 +线 +绀 +练 +组 +绅 +细 +织 +终 +绊 +绍 +绎 +经 +绑 +绒 +结 +绔 +绕 +绘 +给 +绚 +绛 +络 +绝 +绞 +统 +绡 +绢 +绣 +绥 +绦 +继 +绩 +绪 +绫 +续 +绮 +绯 +绰 +绳 +维 +绵 +绶 +绷 +绸 +绻 +综 +绽 +绾 +绿 +缀 +缄 +缅 +缆 +缇 +缈 +缉 +缎 +缓 +缔 +缕 +编 +缘 +缙 +缚 +缜 +缝 +缠 +缢 +缤 +缥 +缨 +缩 +缪 +缭 +缮 +缰 +缱 +缴 +缸 +缺 +缽 +罂 +罄 +罌 +罐 +网 +罔 +罕 +罗 +罚 +罡 +罢 +罩 +罪 +置 +罰 +署 +罵 +罷 +罹 +羁 +羅 +羈 +羊 +羌 +美 +羔 +羚 +羞 +羟 +羡 +羣 +群 +羥 +羧 +羨 +義 +羯 +羲 +羸 +羹 +羽 +羿 +翁 +翅 +翊 +翌 +翎 +習 +翔 +翘 +翟 +翠 +翡 +翦 +翩 +翰 +翱 +翳 +翹 +翻 +翼 +耀 +老 +考 +耄 +者 +耆 +耋 +而 +耍 +耐 +耒 +耕 +耗 +耘 +耙 +耦 +耨 +耳 +耶 +耷 +耸 +耻 +耽 +耿 +聂 +聆 +聊 +聋 +职 +聒 +联 +聖 +聘 +聚 +聞 +聪 +聯 +聰 +聲 +聳 +聴 +聶 +職 +聽 +聾 +聿 +肃 +肄 +肅 +肆 +肇 +肉 +肋 +肌 +肏 +肓 +肖 +肘 +肚 +肛 +肝 +肠 +股 +肢 +肤 +肥 +肩 +肪 +肮 +肯 +肱 +育 +肴 +肺 +肽 +肾 +肿 +胀 +胁 +胃 +胄 +胆 +背 +胍 +胎 +胖 +胚 +胛 +胜 +胝 +胞 +胡 +胤 +胥 +胧 +胫 +胭 +胯 +胰 +胱 +胳 +胴 +胶 +胸 +胺 +能 +脂 +脅 +脆 +脇 +脈 +脉 +脊 +脍 +脏 +脐 +脑 +脓 +脖 +脘 +脚 +脛 +脣 +脩 +脫 +脯 +脱 +脲 +脳 +脸 +脹 +脾 +腆 +腈 +腊 +腋 +腌 +腎 +腐 +腑 +腓 +腔 +腕 +腥 +腦 +腩 +腫 +腭 +腮 +腰 +腱 +腳 +腴 +腸 +腹 +腺 +腻 +腼 +腾 +腿 +膀 +膈 +膊 +膏 +膑 +膘 +膚 +膛 +膜 +膝 +膠 +膦 +膨 +膩 +膳 +膺 +膻 +膽 +膾 +膿 +臀 +臂 +臃 +臆 +臉 +臊 +臍 +臓 +臘 +臟 +臣 +臥 +臧 +臨 +自 +臬 +臭 +至 +致 +臺 +臻 +臼 +臾 +舀 +舂 +舅 +舆 +與 +興 +舉 +舊 +舌 +舍 +舎 +舐 +舒 +舔 +舖 +舗 +舛 +舜 +舞 +舟 +航 +舫 +般 +舰 +舱 +舵 +舶 +舷 +舸 +船 +舺 +舾 +艇 +艋 +艘 +艙 +艦 +艮 +良 +艰 +艱 +色 +艳 +艷 +艹 +艺 +艾 +节 +芃 +芈 +芊 +芋 +芍 +芎 +芒 +芙 +芜 +芝 +芡 +芥 +芦 +芩 +芪 +芫 +芬 +芭 +芮 +芯 +花 +芳 +芷 +芸 +芹 +芻 +芽 +芾 +苁 +苄 +苇 +苋 +苍 +苏 +苑 +苒 +苓 +苔 +苕 +苗 +苛 +苜 +苞 +苟 +苡 +苣 +若 +苦 +苫 +苯 +英 +苷 +苹 +苻 +茁 +茂 +范 +茄 +茅 +茉 +茎 +茏 +茗 +茜 +茧 +茨 +茫 +茬 +茭 +茯 +茱 +茲 +茴 +茵 +茶 +茸 +茹 +茼 +荀 +荃 +荆 +草 +荊 +荏 +荐 +荒 +荔 +荖 +荘 +荚 +荞 +荟 +荠 +荡 +荣 +荤 +荥 +荧 +荨 +荪 +荫 +药 +荳 +荷 +荸 +荻 +荼 +荽 +莅 +莆 +莉 +莊 +莎 +莒 +莓 +莖 +莘 +莞 +莠 +莢 +莧 +莪 +莫 +莱 +莲 +莴 +获 +莹 +莺 +莽 +莿 +菀 +菁 +菅 +菇 +菈 +菊 +菌 +菏 +菓 +菖 +菘 +菜 +菟 +菠 +菡 +菩 +華 +菱 +菲 +菸 +菽 +萁 +萃 +萄 +萊 +萋 +萌 +萍 +萎 +萘 +萝 +萤 +营 +萦 +萧 +萨 +萩 +萬 +萱 +萵 +萸 +萼 +落 +葆 +葉 +著 +葚 +葛 +葡 +董 +葦 +葩 +葫 +葬 +葭 +葯 +葱 +葳 +葵 +葷 +葺 +蒂 +蒋 +蒐 +蒔 +蒙 +蒜 +蒞 +蒟 +蒡 +蒨 +蒲 +蒸 +蒹 +蒻 +蒼 +蒿 +蓁 +蓄 +蓆 +蓉 +蓋 +蓑 +蓓 +蓖 +蓝 +蓟 +蓦 +蓬 +蓮 +蓼 +蓿 +蔑 +蔓 +蔔 +蔗 +蔘 +蔚 +蔡 +蔣 +蔥 +蔫 +蔬 +蔭 +蔵 +蔷 +蔺 +蔻 +蔼 +蔽 +蕁 +蕃 +蕈 +蕉 +蕊 +蕎 +蕙 +蕤 +蕨 +蕩 +蕪 +蕭 +蕲 +蕴 +蕻 +蕾 +薄 +薅 +薇 +薈 +薊 +薏 +薑 +薔 +薙 +薛 +薦 +薨 +薩 +薪 +薬 +薯 +薰 +薹 +藉 +藍 +藏 +藐 +藓 +藕 +藜 +藝 +藤 +藥 +藩 +藹 +藻 +藿 +蘆 +蘇 +蘊 +蘋 +蘑 +蘚 +蘭 +蘸 +蘼 +蘿 +虎 +虏 +虐 +虑 +虔 +處 +虚 +虛 +虜 +虞 +號 +虢 +虧 +虫 +虬 +虱 +虹 +虻 +虽 +虾 +蚀 +蚁 +蚂 +蚊 +蚌 +蚓 +蚕 +蚜 +蚝 +蚣 +蚤 +蚩 +蚪 +蚯 +蚱 +蚵 +蛀 +蛆 +蛇 +蛊 +蛋 +蛎 +蛐 +蛔 +蛙 +蛛 +蛟 +蛤 +蛭 +蛮 +蛰 +蛳 +蛹 +蛻 +蛾 +蜀 +蜂 +蜃 +蜆 +蜇 +蜈 +蜊 +蜍 +蜒 +蜓 +蜕 +蜗 +蜘 +蜚 +蜜 +蜡 +蜢 +蜥 +蜱 +蜴 +蜷 +蜻 +蜿 +蝇 +蝈 +蝉 +蝌 +蝎 +蝕 +蝗 +蝙 +蝟 +蝠 +蝦 +蝨 +蝴 +蝶 +蝸 +蝼 +螂 +螃 +融 +螞 +螢 +螨 +螯 +螳 +螺 +蟀 +蟄 +蟆 +蟋 +蟎 +蟑 +蟒 +蟠 +蟬 +蟲 +蟹 +蟻 +蟾 +蠅 +蠍 +蠔 +蠕 +蠛 +蠟 +蠡 +蠢 +蠣 +蠱 +蠶 +蠹 +蠻 +血 +衄 +衅 +衆 +行 +衍 +術 +衔 +街 +衙 +衛 +衝 +衞 +衡 +衢 +衣 +补 +表 +衩 +衫 +衬 +衮 +衰 +衲 +衷 +衹 +衾 +衿 +袁 +袂 +袄 +袅 +袈 +袋 +袍 +袒 +袖 +袜 +袞 +袤 +袪 +被 +袭 +袱 +裁 +裂 +装 +裆 +裊 +裏 +裔 +裕 +裘 +裙 +補 +裝 +裟 +裡 +裤 +裨 +裱 +裳 +裴 +裸 +裹 +製 +裾 +褂 +複 +褐 +褒 +褓 +褔 +褚 +褥 +褪 +褫 +褲 +褶 +褻 +襁 +襄 +襟 +襠 +襪 +襬 +襯 +襲 +西 +要 +覃 +覆 +覇 +見 +規 +覓 +視 +覚 +覦 +覧 +親 +覬 +観 +覷 +覺 +覽 +觀 +见 +观 +规 +觅 +视 +览 +觉 +觊 +觎 +觐 +觑 +角 +觞 +解 +觥 +触 +觸 +言 +訂 +計 +訊 +討 +訓 +訕 +訖 +託 +記 +訛 +訝 +訟 +訣 +訥 +訪 +設 +許 +訳 +訴 +訶 +診 +註 +証 +詆 +詐 +詔 +評 +詛 +詞 +詠 +詡 +詢 +詣 +試 +詩 +詫 +詬 +詭 +詮 +詰 +話 +該 +詳 +詹 +詼 +誅 +誇 +誉 +誌 +認 +誓 +誕 +誘 +語 +誠 +誡 +誣 +誤 +誥 +誦 +誨 +說 +説 +読 +誰 +課 +誹 +誼 +調 +諄 +談 +請 +諏 +諒 +論 +諗 +諜 +諡 +諦 +諧 +諫 +諭 +諮 +諱 +諳 +諷 +諸 +諺 +諾 +謀 +謁 +謂 +謄 +謊 +謎 +謐 +謔 +謗 +謙 +講 +謝 +謠 +謨 +謬 +謹 +謾 +譁 +證 +譎 +譏 +識 +譙 +譚 +譜 +警 +譬 +譯 +議 +譲 +譴 +護 +譽 +讀 +變 +讓 +讚 +讞 +计 +订 +认 +讥 +讧 +讨 +让 +讪 +讫 +训 +议 +讯 +记 +讲 +讳 +讴 +讶 +讷 +许 +讹 +论 +讼 +讽 +设 +访 +诀 +证 +诃 +评 +诅 +识 +诈 +诉 +诊 +诋 +词 +诏 +译 +试 +诗 +诘 +诙 +诚 +诛 +话 +诞 +诟 +诠 +诡 +询 +诣 +诤 +该 +详 +诧 +诩 +诫 +诬 +语 +误 +诰 +诱 +诲 +说 +诵 +诶 +请 +诸 +诺 +读 +诽 +课 +诿 +谀 +谁 +调 +谄 +谅 +谆 +谈 +谊 +谋 +谌 +谍 +谎 +谏 +谐 +谑 +谒 +谓 +谔 +谕 +谗 +谘 +谙 +谚 +谛 +谜 +谟 +谢 +谣 +谤 +谥 +谦 +谧 +谨 +谩 +谪 +谬 +谭 +谯 +谱 +谲 +谴 +谶 +谷 +豁 +豆 +豇 +豈 +豉 +豊 +豌 +豎 +豐 +豔 +豚 +象 +豢 +豪 +豫 +豬 +豹 +豺 +貂 +貅 +貌 +貓 +貔 +貘 +貝 +貞 +負 +財 +貢 +貧 +貨 +販 +貪 +貫 +責 +貯 +貰 +貳 +貴 +貶 +買 +貸 +費 +貼 +貽 +貿 +賀 +賁 +賂 +賃 +賄 +資 +賈 +賊 +賑 +賓 +賜 +賞 +賠 +賡 +賢 +賣 +賤 +賦 +質 +賬 +賭 +賴 +賺 +購 +賽 +贅 +贈 +贊 +贍 +贏 +贓 +贖 +贛 +贝 +贞 +负 +贡 +财 +责 +贤 +败 +账 +货 +质 +贩 +贪 +贫 +贬 +购 +贮 +贯 +贰 +贱 +贲 +贴 +贵 +贷 +贸 +费 +贺 +贻 +贼 +贾 +贿 +赁 +赂 +赃 +资 +赅 +赈 +赊 +赋 +赌 +赎 +赏 +赐 +赓 +赔 +赖 +赘 +赚 +赛 +赝 +赞 +赠 +赡 +赢 +赣 +赤 +赦 +赧 +赫 +赭 +走 +赳 +赴 +赵 +赶 +起 +趁 +超 +越 +趋 +趕 +趙 +趟 +趣 +趨 +足 +趴 +趵 +趸 +趺 +趾 +跃 +跄 +跆 +跋 +跌 +跎 +跑 +跖 +跚 +跛 +距 +跟 +跡 +跤 +跨 +跩 +跪 +路 +跳 +践 +跷 +跹 +跺 +跻 +踉 +踊 +踌 +踏 +踐 +踝 +踞 +踟 +踢 +踩 +踪 +踮 +踱 +踴 +踵 +踹 +蹂 +蹄 +蹇 +蹈 +蹉 +蹊 +蹋 +蹑 +蹒 +蹙 +蹟 +蹣 +蹤 +蹦 +蹩 +蹬 +蹭 +蹲 +蹴 +蹶 +蹺 +蹼 +蹿 +躁 +躇 +躉 +躊 +躋 +躍 +躏 +躪 +身 +躬 +躯 +躲 +躺 +軀 +車 +軋 +軌 +軍 +軒 +軟 +転 +軸 +軼 +軽 +軾 +較 +載 +輒 +輓 +輔 +輕 +輛 +輝 +輟 +輩 +輪 +輯 +輸 +輻 +輾 +輿 +轄 +轅 +轆 +轉 +轍 +轎 +轟 +车 +轧 +轨 +轩 +转 +轭 +轮 +软 +轰 +轲 +轴 +轶 +轻 +轼 +载 +轿 +较 +辄 +辅 +辆 +辇 +辈 +辉 +辊 +辍 +辐 +辑 +输 +辕 +辖 +辗 +辘 +辙 +辛 +辜 +辞 +辟 +辣 +辦 +辨 +辩 +辫 +辭 +辮 +辯 +辰 +辱 +農 +边 +辺 +辻 +込 +辽 +达 +迁 +迂 +迄 +迅 +过 +迈 +迎 +运 +近 +返 +还 +这 +进 +远 +违 +连 +迟 +迢 +迤 +迥 +迦 +迩 +迪 +迫 +迭 +述 +迴 +迷 +迸 +迹 +迺 +追 +退 +送 +适 +逃 +逅 +逆 +选 +逊 +逍 +透 +逐 +递 +途 +逕 +逗 +這 +通 +逛 +逝 +逞 +速 +造 +逢 +連 +逮 +週 +進 +逵 +逶 +逸 +逻 +逼 +逾 +遁 +遂 +遅 +遇 +遊 +運 +遍 +過 +遏 +遐 +遑 +遒 +道 +達 +違 +遗 +遙 +遛 +遜 +遞 +遠 +遢 +遣 +遥 +遨 +適 +遭 +遮 +遲 +遴 +遵 +遶 +遷 +選 +遺 +遼 +遽 +避 +邀 +邁 +邂 +邃 +還 +邇 +邈 +邊 +邋 +邏 +邑 +邓 +邕 +邛 +邝 +邢 +那 +邦 +邨 +邪 +邬 +邮 +邯 +邰 +邱 +邳 +邵 +邸 +邹 +邺 +邻 +郁 +郅 +郊 +郎 +郑 +郜 +郝 +郡 +郢 +郤 +郦 +郧 +部 +郫 +郭 +郴 +郵 +郷 +郸 +都 +鄂 +鄉 +鄒 +鄔 +鄙 +鄞 +鄢 +鄧 +鄭 +鄰 +鄱 +鄲 +鄺 +酉 +酊 +酋 +酌 +配 +酐 +酒 +酗 +酚 +酝 +酢 +酣 +酥 +酩 +酪 +酬 +酮 +酯 +酰 +酱 +酵 +酶 +酷 +酸 +酿 +醃 +醇 +醉 +醋 +醍 +醐 +醒 +醚 +醛 +醜 +醞 +醣 +醪 +醫 +醬 +醮 +醯 +醴 +醺 +釀 +釁 +采 +釉 +释 +釋 +里 +重 +野 +量 +釐 +金 +釗 +釘 +釜 +針 +釣 +釦 +釧 +釵 +鈀 +鈉 +鈍 +鈎 +鈔 +鈕 +鈞 +鈣 +鈦 +鈪 +鈴 +鈺 +鈾 +鉀 +鉄 +鉅 +鉉 +鉑 +鉗 +鉚 +鉛 +鉤 +鉴 +鉻 +銀 +銃 +銅 +銑 +銓 +銖 +銘 +銜 +銬 +銭 +銮 +銳 +銷 +銹 +鋁 +鋅 +鋒 +鋤 +鋪 +鋰 +鋸 +鋼 +錄 +錐 +錘 +錚 +錠 +錢 +錦 +錨 +錫 +錮 +錯 +録 +錳 +錶 +鍊 +鍋 +鍍 +鍛 +鍥 +鍰 +鍵 +鍺 +鍾 +鎂 +鎊 +鎌 +鎏 +鎔 +鎖 +鎗 +鎚 +鎧 +鎬 +鎮 +鎳 +鏈 +鏖 +鏗 +鏘 +鏞 +鏟 +鏡 +鏢 +鏤 +鏽 +鐘 +鐮 +鐲 +鐳 +鐵 +鐸 +鐺 +鑄 +鑊 +鑑 +鑒 +鑣 +鑫 +鑰 +鑲 +鑼 +鑽 +鑾 +鑿 +针 +钉 +钊 +钎 +钏 +钒 +钓 +钗 +钙 +钛 +钜 +钝 +钞 +钟 +钠 +钡 +钢 +钣 +钤 +钥 +钦 +钧 +钨 +钩 +钮 +钯 +钰 +钱 +钳 +钴 +钵 +钺 +钻 +钼 +钾 +钿 +铀 +铁 +铂 +铃 +铄 +铅 +铆 +铉 +铎 +铐 +铛 +铜 +铝 +铠 +铡 +铢 +铣 +铤 +铨 +铩 +铬 +铭 +铮 +铰 +铲 +铵 +银 +铸 +铺 +链 +铿 +销 +锁 +锂 +锄 +锅 +锆 +锈 +锉 +锋 +锌 +锏 +锐 +锑 +错 +锚 +锟 +锡 +锢 +锣 +锤 +锥 +锦 +锭 +键 +锯 +锰 +锲 +锵 +锹 +锺 +锻 +镀 +镁 +镂 +镇 +镉 +镌 +镍 +镐 +镑 +镕 +镖 +镗 +镛 +镜 +镣 +镭 +镯 +镰 +镳 +镶 +長 +长 +門 +閃 +閉 +開 +閎 +閏 +閑 +閒 +間 +閔 +閘 +閡 +関 +閣 +閥 +閨 +閩 +閱 +閲 +閹 +閻 +閾 +闆 +闇 +闊 +闌 +闍 +闔 +闕 +闖 +闘 +關 +闡 +闢 +门 +闪 +闫 +闭 +问 +闯 +闰 +闲 +间 +闵 +闷 +闸 +闹 +闺 +闻 +闽 +闾 +阀 +阁 +阂 +阅 +阆 +阇 +阈 +阉 +阎 +阐 +阑 +阔 +阕 +阖 +阙 +阚 +阜 +队 +阡 +阪 +阮 +阱 +防 +阳 +阴 +阵 +阶 +阻 +阿 +陀 +陂 +附 +际 +陆 +陇 +陈 +陋 +陌 +降 +限 +陕 +陛 +陝 +陞 +陟 +陡 +院 +陣 +除 +陨 +险 +陪 +陰 +陲 +陳 +陵 +陶 +陷 +陸 +険 +陽 +隅 +隆 +隈 +隊 +隋 +隍 +階 +随 +隐 +隔 +隕 +隘 +隙 +際 +障 +隠 +隣 +隧 +隨 +險 +隱 +隴 +隶 +隸 +隻 +隼 +隽 +难 +雀 +雁 +雄 +雅 +集 +雇 +雉 +雋 +雌 +雍 +雎 +雏 +雑 +雒 +雕 +雖 +雙 +雛 +雜 +雞 +離 +難 +雨 +雪 +雯 +雰 +雲 +雳 +零 +雷 +雹 +電 +雾 +需 +霁 +霄 +霆 +震 +霈 +霉 +霊 +霍 +霎 +霏 +霑 +霓 +霖 +霜 +霞 +霧 +霭 +霰 +露 +霸 +霹 +霽 +霾 +靂 +靄 +靈 +青 +靓 +靖 +静 +靚 +靛 +靜 +非 +靠 +靡 +面 +靥 +靦 +革 +靳 +靴 +靶 +靼 +鞅 +鞋 +鞍 +鞏 +鞑 +鞘 +鞠 +鞣 +鞦 +鞭 +韆 +韋 +韌 +韓 +韜 +韦 +韧 +韩 +韬 +韭 +音 +韵 +韶 +韻 +響 +頁 +頂 +頃 +項 +順 +須 +頌 +預 +頑 +頒 +頓 +頗 +領 +頜 +頡 +頤 +頫 +頭 +頰 +頷 +頸 +頹 +頻 +頼 +顆 +題 +額 +顎 +顏 +顔 +願 +顛 +類 +顧 +顫 +顯 +顱 +顴 +页 +顶 +顷 +项 +顺 +须 +顼 +顽 +顾 +顿 +颁 +颂 +预 +颅 +领 +颇 +颈 +颉 +颊 +颌 +颍 +颐 +频 +颓 +颔 +颖 +颗 +题 +颚 +颛 +颜 +额 +颞 +颠 +颡 +颢 +颤 +颦 +颧 +風 +颯 +颱 +颳 +颶 +颼 +飄 +飆 +风 +飒 +飓 +飕 +飘 +飙 +飚 +飛 +飞 +食 +飢 +飨 +飩 +飪 +飯 +飲 +飼 +飽 +飾 +餃 +餅 +餉 +養 +餌 +餐 +餒 +餓 +餘 +餚 +餛 +餞 +餡 +館 +餮 +餵 +餾 +饅 +饈 +饋 +饌 +饍 +饑 +饒 +饕 +饗 +饞 +饥 +饨 +饪 +饬 +饭 +饮 +饯 +饰 +饱 +饲 +饴 +饵 +饶 +饷 +饺 +饼 +饽 +饿 +馀 +馁 +馄 +馅 +馆 +馈 +馋 +馍 +馏 +馒 +馔 +首 +馗 +香 +馥 +馨 +馬 +馭 +馮 +馳 +馴 +駁 +駄 +駅 +駆 +駐 +駒 +駕 +駛 +駝 +駭 +駱 +駿 +騁 +騎 +騏 +験 +騙 +騨 +騰 +騷 +驀 +驅 +驊 +驍 +驒 +驕 +驗 +驚 +驛 +驟 +驢 +驥 +马 +驭 +驮 +驯 +驰 +驱 +驳 +驴 +驶 +驷 +驸 +驹 +驻 +驼 +驾 +驿 +骁 +骂 +骄 +骅 +骆 +骇 +骈 +骊 +骋 +验 +骏 +骐 +骑 +骗 +骚 +骛 +骜 +骞 +骠 +骡 +骤 +骥 +骧 +骨 +骯 +骰 +骶 +骷 +骸 +骼 +髂 +髅 +髋 +髏 +髒 +髓 +體 +髖 +高 +髦 +髪 +髮 +髯 +髻 +鬃 +鬆 +鬍 +鬓 +鬚 +鬟 +鬢 +鬣 +鬥 +鬧 +鬱 +鬼 +魁 +魂 +魄 +魅 +魇 +魍 +魏 +魔 +魘 +魚 +魯 +魷 +鮑 +鮨 +鮪 +鮭 +鮮 +鯉 +鯊 +鯖 +鯛 +鯨 +鯰 +鯽 +鰍 +鰓 +鰭 +鰲 +鰻 +鰾 +鱈 +鱉 +鱔 +鱗 +鱷 +鱸 +鱼 +鱿 +鲁 +鲈 +鲍 +鲑 +鲛 +鲜 +鲟 +鲢 +鲤 +鲨 +鲫 +鲱 +鲲 +鲶 +鲷 +鲸 +鳃 +鳄 +鳅 +鳌 +鳍 +鳕 +鳖 +鳗 +鳝 +鳞 +鳥 +鳩 +鳳 +鳴 +鳶 +鴉 +鴕 +鴛 +鴦 +鴨 +鴻 +鴿 +鵑 +鵜 +鵝 +鵡 +鵬 +鵰 +鵲 +鶘 +鶩 +鶯 +鶴 +鷗 +鷲 +鷹 +鷺 +鸚 +鸞 +鸟 +鸠 +鸡 +鸢 +鸣 +鸥 +鸦 +鸨 +鸪 +鸭 +鸯 +鸳 +鸵 +鸽 +鸾 +鸿 +鹂 +鹃 +鹄 +鹅 +鹈 +鹉 +鹊 +鹌 +鹏 +鹑 +鹕 +鹘 +鹜 +鹞 +鹤 +鹦 +鹧 +鹫 +鹭 +鹰 +鹳 +鹵 +鹹 +鹼 +鹽 +鹿 +麂 +麋 +麒 +麓 +麗 +麝 +麟 +麥 +麦 +麩 +麴 +麵 +麸 +麺 +麻 +麼 +麽 +麾 +黃 +黄 +黍 +黎 +黏 +黑 +黒 +黔 +默 +黛 +黜 +黝 +點 +黠 +黨 +黯 +黴 +鼋 +鼎 +鼐 +鼓 +鼠 +鼬 +鼹 +鼻 +鼾 +齁 +齊 +齋 +齐 +齒 +齡 +齢 +齣 +齦 +齿 +龄 +龅 +龈 +龊 +龋 +龌 +龍 +龐 +龔 +龕 +龙 +龚 +龛 +龜 +龟 +︰ +︱ +︶ +︿ +﹁ +﹂ +﹍ +﹏ +﹐ +﹑ +﹒ +﹔ +﹕ +﹖ +﹗ +﹙ +﹚ +﹝ +﹞ +﹡ +﹣ +! +" +# +$ +% +& +' +( +) +* ++ +, +- +. +/ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +: +; +< += +> +? +@ +[ +\ +] +^ +_ +` +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +{ +| +} +~ +。 +「 +」 +、 +・ +ッ +ー +イ +ク +シ +ス +ト +ノ +フ +ラ +ル +ン +゙ +゚ + ̄ +¥ +👍 +🔥 +😂 +😎 +... +yam +10 +2017 +12 +11 +2016 +20 +30 +15 +06 +lofter +##s +2015 +by +16 +14 +18 +13 +24 +17 +2014 +21 +##0 +22 +19 +25 +23 +com +100 +00 +05 +2013 +##a +03 +09 +08 +28 +##2 +50 +01 +04 +##1 +27 +02 +2012 +##3 +26 +##e +07 +##8 +##5 +##6 +##4 +##9 +##7 +29 +2011 +40 +##t +2010 +##o +##d +##i +2009 +##n +app +www +the +##m +31 +##c +##l +##y +##r +##g +2008 +60 +http +200 +qq +##p +80 +##f +google +pixnet +90 +cookies +tripadvisor +500 +##er +##k +35 +##h +facebook +2007 +2000 +70 +##b +of +##x +##u +45 +300 +iphone +32 +1000 +2006 +48 +ip +36 +in +38 +3d +##w +##ing +55 +ctrip +##on +##v +33 +##の +to +34 +400 +id +2005 +it +37 +windows +llc +top +99 +42 +39 +000 +led +at +##an +41 +51 +52 +46 +49 +43 +53 +44 +##z +android +58 +and +59 +2004 +56 +vr +##か +5000 +2003 +47 +blogthis +twitter +54 +##le +150 +ok +2018 +57 +75 +cn +no +ios +##in +##mm +##00 +800 +on +te +3000 +65 +2001 +360 +95 +ig +lv +120 +##ng +##を +##us +##に +pc +てす +── +600 +##te +85 +2002 +88 +##ed +html +ncc +wifi +email +64 +blog +is +##10 +##て +mail +online +##al +dvd +##ic +studio +##は +##℃ +##ia +##と +line +vip +72 +##q +98 +##ce +##en +for +##is +##ra +##es +##j +usb +net +cp +1999 +asia +4g +##cm +diy +new +3c +##お +ta +66 +language +vs +apple +tw +86 +web +##ne +ipad +62 +you +##re +101 +68 +##tion +ps +de +bt +pony +atm +##2017 +1998 +67 +##ch +ceo +##or +go +##na +av +pro +cafe +96 +pinterest +97 +63 +pixstyleme3c +##ta +more +said +##2016 +1997 +mp3 +700 +##ll +nba +jun +##20 +92 +tv +1995 +pm +61 +76 +nbsp +250 +##ie +linux +##ma +cd +110 +hd +##17 +78 +##ion +77 +6000 +am +##th +##st +94 +##se +##et +69 +180 +gdp +my +105 +81 +abc +89 +flash +79 +one +93 +1990 +1996 +##ck +gps +##も +##ly +web885 +106 +2020 +91 +##ge +4000 +1500 +xd +boss +isbn +1994 +org +##ry +me +love +##11 +0fork +73 +##12 +3g +##ter +##ar +71 +82 +##la +hotel +130 +1970 +pk +83 +87 +140 +ie +##os +##30 +##el +74 +##50 +seo +cpu +##ml +p2p +84 +may +##る +sun +tue +internet +cc +posted +youtube +##at +##ン +##man +ii +##ル +##15 +abs +nt +pdf +yahoo +ago +1980 +##it +news +mac +104 +##てす +##me +##り +java +1992 +spa +##de +##nt +hk +all +plus +la +1993 +##mb +##16 +##ve +west +##da +160 +air +##い +##ps +から +##to +1989 +logo +htc +php +https +fi +momo +##son +sat +##ke +##80 +ebd +suv +wi +day +apk +##88 +##um +mv +galaxy +wiki +or +brake +##ス +1200 +する +this +1991 +mon +##こ +❤2017 +po +##ない +javascript +life +home +june +##ss +system +900 +##ー +##0 +pp +1988 +world +fb +4k +br +##as +ic +ai +leonardo +safari +##60 +live +free +xx +wed +win7 +kiehl +##co +lg +o2o +##go +us +235 +1949 +mm +しい +vfm +kanye +##90 +##2015 +##id +jr +##ey +123 +rss +##sa +##ro +##am +##no +thu +fri +350 +##sh +##ki +103 +comments +name +##のて +##pe +##ine +max +1987 +8000 +uber +##mi +##ton +wordpress +office +1986 +1985 +##ment +107 +bd +win10 +##ld +##li +gmail +bb +dior +##rs +##ri +##rd +##ます +up +cad +##® +dr +して +read +##21 +をお +##io +##99 +url +1984 +pvc +paypal +show +policy +##40 +##ty +##18 +with +##★ +##01 +txt +102 +##ba +dna +from +post +mini +ar +taiwan +john +##ga +privacy +agoda +##13 +##ny +word +##24 +##22 +##by +##ur +##hz +1982 +##ang +265 +cookie +netscape +108 +##ka +##~ +##ad +house +share +note +ibm +code +hello +nike +sim +survey +##016 +1979 +1950 +wikia +##32 +##017 +5g +cbc +##tor +##kg +1983 +##rt +##14 +campaign +store +2500 +os +##ct +##ts +##° +170 +api +##ns +365 +excel +##な +##ao +##ら +##し +~~ +##nd +university +163 +には +518 +##70 +##ya +##il +##25 +pierre +ipo +0020 +897 +##23 +hotels +##ian +のお +125 +years +6606 +##ers +##26 +high +##day +time +##ay +bug +##line +##く +##す +##be +xp +talk2yam +yamservice +10000 +coco +##dy +sony +##ies +1978 +microsoft +david +people +##ha +1960 +instagram +intel +その +##ot +iso +1981 +##va +115 +##mo +##land +xxx +man +co +ltxsw +##ation +baby +220 +##pa +##ol +1945 +7000 +tag +450 +##ue +msn +##31 +oppo +##ト +##ca +control +##om +st +chrome +##ure +##ん +be +##き +lol +##19 +した +##bo +240 +lady +##100 +##way +##から +4600 +##ko +##do +##un +4s +corporation +168 +##ni +herme +##28 +cp +978 +##up +##06 +ui +##ds +ppt +admin +three +します +bbc +re +128 +##48 +ca +##015 +##35 +hp +##ee +tpp +##た +##ive +×× +root +##cc +##ました +##ble +##ity +adobe +park +114 +et +oled +city +##ex +##ler +##ap +china +##book +20000 +view +##ice +global +##km +your +hong +##mg +out +##ms +ng +ebay +##29 +menu +ubuntu +##cy +rom +##view +open +ktv +do +server +##lo +if +english +##ね +##5 +##oo +1600 +##02 +step1 +kong +club +135 +july +inc +1976 +mr +hi +##net +touch +##ls +##ii +michael +lcd +##05 +##33 +phone +james +step2 +1300 +ios9 +##box +dc +##2 +##ley +samsung +111 +280 +pokemon +css +##ent +##les +いいえ +##1 +s8 +atom +play +bmw +##said +sa +etf +ctrl +♥yoyo♥ +##55 +2025 +##2014 +##66 +adidas +amazon +1958 +##ber +##ner +visa +##77 +##der +1800 +connectivity +##hi +firefox +109 +118 +hr +so +style +mark +pop +ol +skip +1975 +as +##27 +##ir +##61 +190 +mba +##う +##ai +le +##ver +1900 +cafe2017 +lte +super +113 +129 +##ron +amd +like +##☆ +are +##ster +we +##sk +paul +data +international +##ft +longchamp +ssd +good +##ート +##ti +reply +##my +↓↓↓ +apr +star +##ker +source +136 +js +112 +get +force +photo +##one +126 +##2013 +##ow +link +bbs +1972 +goods +##lin +python +119 +##ip +game +##ics +##ません +blue +##● +520 +##45 +page +itunes +##03 +1955 +260 +1968 +gt +gif +618 +##ff +##47 +group +くたさい +about +bar +ganji +##nce +music +lee +not +1977 +1971 +1973 +##per +an +faq +comment +##って +days +##ock +116 +##bs +1974 +1969 +v1 +player +1956 +xbox +sql +fm +f1 +139 +##ah +210 +##lv +##mp +##000 +melody +1957 +##3 +550 +17life +199 +1966 +xml +market +##au +##71 +999 +##04 +what +gl +##95 +##age +tips +##68 +book +##ting +mysql +can +1959 +230 +##ung +wonderland +watch +10℃ +##ction +9000 +mar +mobile +1946 +1962 +article +##db +part +▲top +party +って +1967 +1964 +1948 +##07 +##ore +##op +この +dj +##78 +##38 +010 +main +225 +1965 +##ong +art +320 +ad +134 +020 +##73 +117 +pm2 +japan +228 +##08 +ts +1963 +##ica +der +sm +##36 +2019 +##wa +ct +##7 +##や +##64 +1937 +homemesh +search +##85 +##れは +##tv +##di +macbook +##9 +##くたさい +service +##♥ +type +った +750 +##ier +##si +##75 +##います +##ok +best +##ット +goris +lock +##った +cf +3m +big +##ut +ftp +carol +##vi +10 +1961 +happy +sd +##ac +122 +anti +pe +cnn +iii +1920 +138 +##ラ +1940 +esp +jan +tags +##98 +##51 +august +vol +##86 +154 +##™ +##fs +##れ +##sion +design +ac +##ム +press +jordan +ppp +that +key +check +##6 +##tt +##㎡ +1080p +##lt +power +##42 +1952 +##bc +vivi +##ック +he +133 +121 +jpg +##rry +201 +175 +3500 +1947 +nb +##ted +##rn +しています +1954 +usd +##t00 +master +##ンク +001 +model +##58 +al +##09 +1953 +##34 +ram +goo +ても +##ui +127 +1930 +red +##ary +rpg +item +##pm +##41 +270 +##za +project +##2012 +hot +td +blogabstract +##ger +##62 +650 +##44 +gr2 +##します +##m +black +electronic +nfc +year +asus +また +html5 +cindy +##hd +m3 +132 +esc +##od +booking +##53 +fed +tvb +##81 +##ina +mit +165 +##いる +chan +192 +distribution +next +になる +peter +bios +steam +cm +1941 +にも +pk10 +##ix +##65 +##91 +dec +nasa +##ana +icecat +00z +b1 +will +##46 +li +se +##ji +##み +##ard +oct +##ain +jp +##ze +##bi +cio +##56 +smart +h5 +##39 +##port +curve +vpn +##nm +##dia +utc +##あり +12345678910 +##52 +rmvb +chanel +a4 +miss +##and +##im +media +who +##63 +she +girl +5s +124 +vera +##して +class +vivo +king +##フ +##ei +national +ab +1951 +5cm +888 +145 +ipod +ap +1100 +5mm +211 +ms +2756 +##69 +mp4 +msci +##po +##89 +131 +mg +index +380 +##bit +##out +##zz +##97 +##67 +158 +apec +##8 +photoshop +opec +¥799 +ては +##96 +##tes +##ast +2g +○○ +##ール +¥2899 +##ling +##よ +##ory +1938 +##ical +kitty +content +##43 +step3 +##cn +win8 +155 +vc +1400 +iphone7 +robert +##した +tcl +137 +beauty +##87 +en +dollars +##ys +##oc +step +pay +yy +a1 +##2011 +##lly +##ks +##♪ +1939 +188 +download +1944 +sep +exe +ph +います +school +gb +center +pr +street +##board +uv +##37 +##lan +winrar +##que +##ua +##com +1942 +1936 +480 +gpu +##4 +ettoday +fu +tom +##54 +##ren +##via +149 +##72 +b2b +144 +##79 +##tch +rose +arm +mb +##49 +##ial +##nn +nvidia +step4 +mvp +00㎡ +york +156 +##イ +how +cpi +591 +2765 +gov +kg +joe +##xx +mandy +pa +##ser +copyright +fashion +1935 +don +##け +ecu +##ist +##art +erp +wap +have +##lm +talk +##ek +##ning +##if +ch +##ite +video +1943 +cs +san +iot +look +##84 +##2010 +##ku +october +##ux +trump +##hs +##ide +box +141 +first +##ins +april +##ight +##83 +185 +angel +protected +aa +151 +162 +x1 +m2 +##fe +##× +##ho +size +143 +min +ofo +fun +gomaji +ex +hdmi +food +dns +march +chris +kevin +##のか +##lla +##pp +##ec +ag +ems +6s +720p +##rm +##ham +off +##92 +asp +team +fandom +ed +299 +▌♥ +##ell +info +されています +##82 +sina +4066 +161 +##able +##ctor +330 +399 +315 +dll +rights +ltd +idc +jul +3kg +1927 +142 +ma +surface +##76 +##ク +~~~ +304 +mall +eps +146 +green +##59 +map +space +donald +v2 +sodu +##light +1931 +148 +1700 +まて +310 +reserved +htm +##han +##57 +2d +178 +mod +##ise +##tions +152 +ti +##shi +doc +1933 +icp +055 +wang +##ram +shopping +aug +##pi +##well +now +wam +b2 +からお +##hu +236 +1928 +##gb +266 +f2 +##93 +153 +mix +##ef +##uan +bwl +##plus +##res +core +##ess +tea +5℃ +hktvmall +nhk +##ate +list +##ese +301 +feb +4m +inn +ての +nov +159 +12345 +daniel +##ci +pass +##bet +##nk +coffee +202 +ssl +airbnb +##ute +fbi +woshipm +skype +ea +cg +sp +##fc +##www +yes +edge +alt +007 +##94 +fpga +##ght +##gs +iso9001 +さい +##ile +##wood +##uo +image +lin +icon +american +##em +1932 +set +says +##king +##tive +blogger +##74 +なと +256 +147 +##ox +##zy +##red +##ium +##lf +nokia +claire +##リ +##ding +november +lohas +##500 +##tic +##マ +##cs +##ある +##che +##ire +##gy +##ult +db +january +win +##カ +166 +road +ptt +##ま +##つ +198 +##fa +##mer +anna +pchome +はい +udn +ef +420 +##time +##tte +2030 +##ア +g20 +white +かかります +1929 +308 +garden +eleven +di +##おります +chen +309b +777 +172 +young +cosplay +ちてない +4500 +bat +##123 +##tra +##ては +kindle +npc +steve +etc +##ern +##| +call +xperia +ces +travel +sk +s7 +##ous +1934 +##int +みいたたけます +183 +edu +file +cho +qr +##car +##our +186 +##ant +##d +eric +1914 +rends +##jo +##する +mastercard +##2000 +kb +##min +290 +##ino +vista +##ris +##ud +jack +2400 +##set +169 +pos +1912 +##her +##ou +taipei +しく +205 +beta +##ませんか +232 +##fi +express +255 +body +##ill +aphojoy +user +december +meiki +##ick +tweet +richard +##av +##ᆫ +iphone6 +##dd +ちてすか +views +##mark +321 +pd +##00 +times +##▲ +level +##ash +10g +point +5l +##ome +208 +koreanmall +##ak +george +q2 +206 +wma +tcp +##200 +スタッフ +full +mlb +##lle +##watch +tm +run +179 +911 +smith +business +##und +1919 +color +##tal +222 +171 +##less +moon +4399 +##rl +update +pcb +shop +499 +157 +little +なし +end +##mhz +van +dsp +easy +660 +##house +##key +history +##o +oh +##001 +##hy +##web +oem +let +was +##2009 +##gg +review +##wan +182 +##°c +203 +uc +title +##val +united +233 +2021 +##ons +doi +trivago +overdope +sbs +##ance +##ち +grand +special +573032185 +imf +216 +wx17house +##so +##ーム +audi +##he +london +william +##rp +##ake +science +beach +cfa +amp +ps4 +880 +##800 +##link +##hp +crm +ferragamo +bell +make +##eng +195 +under +zh +photos +2300 +##style +##ント +via +176 +da +##gi +company +i7 +##ray +thomas +370 +ufo +i5 +##max +plc +ben +back +research +8g +173 +mike +##pc +##ッフ +september +189 +##ace +vps +february +167 +pantos +wp +lisa +1921 +★★ +jquery +night +long +offer +##berg +##news +1911 +##いて +ray +fks +wto +せます +over +164 +340 +##all +##rus +1924 +##888 +##works +blogtitle +loftpermalink +##→ +187 +martin +test +ling +km +##め +15000 +fda +v3 +##ja +##ロ +wedding +かある +outlet +family +##ea +をこ +##top +story +##ness +salvatore +##lu +204 +swift +215 +room +している +oracle +##ul +1925 +sam +b2c +week +pi +rock +##のは +##a +##けと +##ean +##300 +##gle +cctv +after +chinese +##back +powered +x2 +##tan +1918 +##nes +##イン +canon +only +181 +##zi +##las +say +##oe +184 +##sd +221 +##bot +##world +##zo +sky +made +top100 +just +1926 +pmi +802 +234 +gap +##vr +177 +les +174 +▲topoct +ball +vogue +vi +ing +ofweek +cos +##list +##ort +▲topmay +##なら +##lon +として +last +##tc +##of +##bus +##gen +real +eva +##コ +a3 +nas +##lie +##ria +##coin +##bt +▲topapr +his +212 +cat +nata +vive +health +⋯⋯ +drive +sir +▲topmar +du +cup +##カー +##ook +##よう +##sy +alex +msg +tour +しました +3ce +##word +193 +ebooks +r8 +block +318 +##より +2200 +nice +pvp +207 +months +1905 +rewards +##ther +1917 +0800 +##xi +##チ +##sc +micro +850 +gg +blogfp +op +1922 +daily +m1 +264 +true +##bb +ml +##tar +##のお +##ky +anthony +196 +253 +##yo +state +218 +##ara +##aa +##rc +##tz +##ston +より +gear +##eo +##ade +ge +see +1923 +##win +##ura +ss +heart +##den +##ita +down +##sm +el +png +2100 +610 +rakuten +whatsapp +bay +dream +add +##use +680 +311 +pad +gucci +mpv +##ode +##fo +island +▲topjun +##▼ +223 +jason +214 +chicago +##❤ +しの +##hone +io +##れる +##ことか +sogo +be2 +##ology +990 +cloud +vcd +##con +2~3 +##ford +##joy +##kb +##こさいます +##rade +but +##ach +docker +##ful +rfid +ul +##ase +hit +ford +##star +580 +##○ +11 +a2 +sdk +reading +edited +##are +cmos +##mc +238 +siri +light +##ella +##ため +bloomberg +##read +pizza +##ison +jimmy +##vm +college +node +journal +ba +18k +##play +245 +##cer +20 +magic +##yu +191 +jump +288 +tt +##ings +asr +##lia +3200 +step5 +network +##cd +mc +いします +1234 +pixstyleme +273 +##600 +2800 +money +★★★★★ +1280 +12 +430 +bl +みの +act +##tus +tokyo +##rial +##life +emba +##ae +saas +tcs +##rk +##wang +summer +##sp +ko +##ving +390 +premium +##その +netflix +##ヒ +uk +mt +##lton +right +frank +two +209 +える +##ple +##cal +021 +##んな +##sen +##ville +hold +nexus +dd +##ius +てお +##mah +##なく +tila +zero +820 +ce +##tin +resort +##ws +charles +old +p10 +5d +report +##360 +##ru +##には +bus +vans +lt +##est +pv +##レ +links +rebecca +##ツ +##dm +azure +##365 +きな +limited +bit +4gb +##mon +1910 +moto +##eam +213 +1913 +var +eos +なとの +226 +blogspot +された +699 +e3 +dos +dm +fc +##ments +##ik +##kw +boy +##bin +##ata +960 +er +##せ +219 +##vin +##tu +##ula +194 +##∥ +station +##ろ +##ature +835 +files +zara +hdr +top10 +nature +950 +magazine +s6 +marriott +##シ +avira +case +##っと +tab +##ran +tony +##home +oculus +im +##ral +jean +saint +cry +307 +rosie +##force +##ini +ice +##bert +のある +##nder +##mber +pet +2600 +##◆ +plurk +▲topdec +##sis +00kg +▲topnov +720 +##ence +tim +##ω +##nc +##ても +##name +log +ips +great +ikea +malaysia +unix +##イト +3600 +##ncy +##nie +12000 +akb48 +##ye +##oid +404 +##chi +##いた +oa +xuehai +##1000 +##orm +##rf +275 +さん +##ware +##リー +980 +ho +##pro +text +##era +560 +bob +227 +##ub +##2008 +8891 +scp +avi +##zen +2022 +mi +wu +museum +qvod +apache +lake +jcb +▲topaug +★★★ +ni +##hr +hill +302 +ne +weibo +490 +ruby +##ーシ +##ヶ +##row +4d +▲topjul +iv +##ish +github +306 +mate +312 +##スト +##lot +##ane +andrew +のハイト +##tina +t1 +rf +ed2k +##vel +##900 +way +final +りの +ns +5a +705 +197 +##メ +sweet +bytes +##ene +▲topjan +231 +##cker +##2007 +##px +100g +topapp +229 +helpapp +rs +low +14k +g4g +care +630 +ldquo +あり +##fork +leave +rm +edition +##gan +##zon +##qq +▲topsep +##google +##ism +gold +224 +explorer +##zer +toyota +category +select +visual +##labels +restaurant +##md +posts +s1 +##ico +もっと +angelababy +123456 +217 +sports +s3 +mbc +1915 +してくたさい +shell +x86 +candy +##new +kbs +face +xl +470 +##here +4a +swissinfo +v8 +▲topfeb +dram +##ual +##vice +3a +##wer +sport +q1 +ios10 +public +int +card +##c +ep +au +rt +##れた +1080 +bill +##mll +kim +30 +460 +wan +##uk +##ミ +x3 +298 +0t +scott +##ming +239 +e5 +##3d +h7n9 +worldcat +brown +##あります +##vo +##led +##580 +##ax +249 +410 +##ert +paris +##~6 +polo +925 +##lr +599 +##ナ +capital +##hing +bank +cv +1g +##chat +##s +##たい +adc +##ule +2m +##e +digital +hotmail +268 +##pad +870 +bbq +quot +##ring +before +wali +##まて +mcu +2k +2b +という +costco +316 +north +333 +switch +##city +##p +philips +##mann +management +panasonic +##cl +##vd +##ping +##rge +alice +##lk +##ましょう +css3 +##ney +vision +alpha +##ular +##400 +##tter +lz +にお +##ありません +mode +gre +1916 +pci +##tm +237 +1~2 +##yan +##そ +について +##let +##キ +work +war +coach +ah +mary +##ᅵ +huang +##pt +a8 +pt +follow +##berry +1895 +##ew +a5 +ghost +##ション +##wn +##og +south +##code +girls +##rid +action +villa +git +r11 +table +games +##cket +error +##anonymoussaid +##ag +here +##ame +##gc +qa +##■ +##lis +gmp +##gin +vmalife +##cher +yu +wedding +##tis +demo +dragon +530 +soho +social +bye +##rant +river +orz +acer +325 +##↑ +##ース +##ats +261 +del +##ven +440 +ups +##ように +##ター +305 +value +macd +yougou +##dn +661 +##ano +ll +##urt +##rent +continue +script +##wen +##ect +paper +263 +319 +shift +##chel +##フト +##cat +258 +x5 +fox +243 +##さん +car +aaa +##blog +loading +##yn +##tp +kuso +799 +si +sns +イカせるテンマ +ヒンクテンマ3 +rmb +vdc +forest +central +prime +help +ultra +##rmb +##ような +241 +square +688 +##しい +のないフロクに +##field +##reen +##ors +##ju +c1 +start +510 +##air +##map +cdn +##wo +cba +stephen +m8 +100km +##get +opera +##base +##ood +vsa +com™ +##aw +##ail +251 +なのて +count +t2 +##ᅡ +##een +2700 +hop +##gp +vsc +tree +##eg +##ose +816 +285 +##ories +##shop +alphago +v4 +1909 +simon +##ᆼ +fluke62max +zip +スホンサー +##sta +louis +cr +bas +##~10 +bc +##yer +hadoop +##ube +##wi +1906 +0755 +hola +##low +place +centre +5v +d3 +##fer +252 +##750 +##media +281 +540 +0l +exchange +262 +series +##ハー +##san +eb +##bank +##k +q3 +##nge +##mail +take +##lp +259 +1888 +client +east +cache +event +vincent +##ールを +きを +##nse +sui +855 +adchoice +##и +##stry +##なたの +246 +##zone +ga +apps +sea +##ab +248 +cisco +##タ +##rner +kymco +##care +dha +##pu +##yi +minkoff +royal +p1 +への +annie +269 +collection +kpi +playstation +257 +になります +866 +bh +##bar +queen +505 +radio +1904 +andy +armani +##xy +manager +iherb +##ery +##share +spring +raid +johnson +1908 +##ob +volvo +hall +##ball +v6 +our +taylor +##hk +bi +242 +##cp +kate +bo +water +technology +##rie +サイトは +277 +##ona +##sl +hpv +303 +gtx +hip +rdquo +jayz +stone +##lex +##rum +namespace +##やり +620 +##ale +##atic +des +##erson +##ql +##ves +##type +enter +##この +##てきます +d2 +##168 +##mix +##bian +との +a9 +jj +ky +##lc +access +movie +##hc +リストに +tower +##ration +##mit +ます +##nch +ua +tel +prefix +##o2 +1907 +##point +1901 +ott +~10 +##http +##ury +baidu +##ink +member +##logy +bigbang +nownews +##js +##shot +##tb +##こと +247 +eba +##tics +##lus +ける +v5 +spark +##ama +there +##ions +god +##lls +##down +hiv +##ress +burberry +day2 +##kv +◆◆ +jeff +related +film +edit +joseph +283 +##ark +cx +32gb +order +g9 +30000 +##ans +##tty +s5 +##bee +かあります +thread +xr +buy +sh +005 +land +spotify +mx +##ari +276 +##verse +×email +sf +why +##ことて +244 +7headlines +nego +sunny +dom +exo +401 +666 +positioning +fit +rgb +##tton +278 +kiss +alexa +adam +lp +みリストを +##g +mp +##ties +##llow +amy +##du +np +002 +institute +271 +##rth +##lar +2345 +590 +##des +sidebar +15 +imax +site +##cky +##kit +##ime +##009 +season +323 +##fun +##ンター +##ひ +gogoro +a7 +pu +lily +fire +twd600 +##ッセーシを +いて +##vis +30ml +##cture +##をお +information +##オ +close +friday +##くれる +yi +nick +てすか +##tta +##tel +6500 +##lock +cbd +economy +254 +かお +267 +tinker +double +375 +8gb +voice +##app +oops +channel +today +985 +##right +raw +xyz +##+ +jim +edm +##cent +7500 +supreme +814 +ds +##its +##asia +dropbox +##てすか +##tti +books +272 +100ml +##tle +##ller +##ken +##more +##boy +sex +309 +##dom +t3 +##ider +##なります +##unch +1903 +810 +feel +5500 +##かった +##put +により +s2 +mo +##gh +men +ka +amoled +div +##tr +##n1 +port +howard +##tags +ken +dnf +##nus +adsense +##а +ide +##へ +buff +thunder +##town +##ique +has +##body +auto +pin +##erry +tee +てした +295 +number +##the +##013 +object +psp +cool +udnbkk +16gb +##mic +miui +##tro +most +r2 +##alk +##nity +1880 +±0 +##いました +428 +s4 +law +version +##oa +n1 +sgs +docomo +##tf +##ack +henry +fc2 +##ded +##sco +##014 +##rite +286 +0mm +linkedin +##ada +##now +wii +##ndy +ucbug +##◎ +sputniknews +legalminer +##ika +##xp +2gb +##bu +q10 +oo +b6 +come +##rman +cheese +ming +maker +##gm +nikon +##fig +ppi +kelly +##ります +jchere +てきます +ted +md +003 +fgo +tech +##tto +dan +soc +##gl +##len +hair +earth +640 +521 +img +##pper +##a1 +##てきる +##ロク +acca +##ition +##ference +suite +##ig +outlook +##mond +##cation +398 +##pr +279 +101vip +358 +##999 +282 +64gb +3800 +345 +airport +##over +284 +##おり +jones +##ith +lab +##su +##いるのて +co2 +town +piece +##llo +no1 +vmware +24h +##qi +focus +reader +##admin +##ora +tb +false +##log +1898 +know +lan +838 +##ces +f4 +##ume +motel +stop +##oper +na +flickr +netcomponents +##af +##─ +pose +williams +local +##ound +##cg +##site +##iko +いお +274 +5m +gsm +con +##ath +1902 +friends +##hip +cell +317 +##rey +780 +cream +##cks +012 +##dp +facebooktwitterpinterestgoogle +sso +324 +shtml +song +swiss +##mw +##キンク +lumia +xdd +string +tiffany +522 +marc +られた +insee +russell +sc +dell +##ations +ok +camera +289 +##vs +##flow +##late +classic +287 +##nter +stay +g1 +mtv +512 +##ever +##lab +##nger +qe +sata +ryan +d1 +50ml +cms +##cing +su +292 +3300 +editor +296 +##nap +security +sunday +association +##ens +##700 +##bra +acg +##かり +sofascore +とは +mkv +##ign +jonathan +gary +build +labels +##oto +tesla +moba +qi +gohappy +general +ajax +1024 +##かる +サイト +society +##test +##urs +wps +fedora +##ich +mozilla +328 +##480 +##dr +usa +urn +##lina +##r +grace +##die +##try +##ader +1250 +##なり +elle +570 +##chen +##ᆯ +price +##ten +uhz +##ough +eq +##hen +states +push +session +balance +wow +506 +##cus +##py +when +##ward +##ep +34e +wong +library +prada +##サイト +##cle +running +##ree +313 +ck +date +q4 +##ctive +##ool +##> +mk +##ira +##163 +388 +die +secret +rq +dota +buffet +は1ヶ +e6 +##ez +pan +368 +ha +##card +##cha +2a +##さ +alan +day3 +eye +f3 +##end +france +keep +adi +rna +tvbs +##ala +solo +nova +##え +##tail +##ょう +support +##ries +##なる +##ved +base +copy +iis +fps +##ways +hero +hgih +profile +fish +mu +ssh +entertainment +chang +##wd +click +cake +##ond +pre +##tom +kic +pixel +##ov +##fl +product +6a +##pd +dear +##gate +es +yumi +audio +##² +##sky +echo +bin +where +##ture +329 +##ape +find +sap +isis +##なと +nand +##101 +##load +##ream +band +a6 +525 +never +##post +festival +50cm +##we +555 +guide +314 +zenfone +##ike +335 +gd +forum +jessica +strong +alexander +##ould +software +allen +##ious +program +360° +else +lohasthree +##gar +することかてきます +please +##れます +rc +##ggle +##ric +bim +50000 +##own +eclipse +355 +brian +3ds +##side +061 +361 +##other +##ける +##tech +##ator +485 +engine +##ged +##t +plaza +##fit +cia +ngo +westbrook +shi +tbs +50mm +##みませんか +sci +291 +reuters +##ily +contextlink +##hn +af +##cil +bridge +very +##cel +1890 +cambridge +##ize +15g +##aid +##data +790 +frm +##head +award +butler +##sun +meta +##mar +america +ps3 +puma +pmid +##すか +lc +670 +kitchen +##lic +オーフン5 +きなしソフトサーヒス +そして +day1 +future +★★★★ +##text +##page +##rris +pm1 +##ket +fans +##っています +1001 +christian +bot +kids +trackback +##hai +c3 +display +##hl +n2 +1896 +idea +さんも +##sent +airmail +##ug +##men +pwm +けます +028 +##lution +369 +852 +awards +schemas +354 +asics +wikipedia +font +##tional +##vy +c2 +293 +##れている +##dget +##ein +っている +contact +pepper +スキル +339 +##~5 +294 +##uel +##ument +730 +##hang +みてす +q5 +##sue +rain +##ndi +wei +swatch +##cept +わせ +331 +popular +##ste +##tag +p2 +501 +trc +1899 +##west +##live +justin +honda +ping +messenger +##rap +v9 +543 +##とは +unity +appqq +はすへて +025 +leo +##tone +##テ +##ass +uniqlo +##010 +502 +her +jane +memory +moneydj +##tical +human +12306 +していると +##m2 +coc +miacare +##mn +tmt +##core +vim +kk +##may +fan +target +use +too +338 +435 +2050 +867 +737 +fast +##2c +services +##ope +omega +energy +##わ +pinkoi +1a +##なから +##rain +jackson +##ement +##シャンルの +374 +366 +そんな +p9 +rd +##ᆨ +1111 +##tier +##vic +zone +##│ +385 +690 +dl +isofix +cpa +m4 +322 +kimi +めて +davis +##lay +lulu +##uck +050 +weeks +qs +##hop +920 +##n +ae +##ear +~5 +eia +405 +##fly +korea +jpeg +boost +##ship +small +##リア +1860 +eur +297 +425 +valley +##iel +simple +##ude +rn +k2 +##ena +されます +non +patrick +しているから +##ナー +feed +5757 +30g +process +well +qqmei +##thing +they +aws +lu +pink +##ters +##kin +または +board +##vertisement +wine +##ien +unicode +##dge +r1 +359 +##tant +いを +##twitter +##3c +cool1 +される +##れて +##l +isp +##012 +standard +45㎡2 +402 +##150 +matt +##fu +326 +##iner +googlemsn +pixnetfacebookyahoo +##ラン +x7 +886 +##uce +メーカー +sao +##ev +##きました +##file +9678 +403 +xddd +shirt +6l +##rio +##hat +3mm +givenchy +ya +bang +##lio +monday +crystal +ロクイン +##abc +336 +head +890 +ubuntuforumwikilinuxpastechat +##vc +##~20 +##rity +cnc +7866 +ipv6 +null +1897 +##ost +yang +imsean +tiger +##fet +##ンス +352 +##= +dji +327 +ji +maria +##come +##んて +foundation +3100 +##beth +##なった +1m +601 +active +##aft +##don +3p +sr +349 +emma +##khz +living +415 +353 +1889 +341 +709 +457 +sas +x6 +##face +pptv +x4 +##mate +han +sophie +##jing +337 +fifa +##mand +other +sale +inwedding +##gn +てきちゃいます +##mmy +##pmlast +bad +nana +nbc +してみてくたさいね +なとはお +##wu +##かあります +##あ +note7 +single +##340 +せからこ +してくたさい♪この +しにはとんとんワークケートを +するとあなたにもっとマッチした +ならワークケートへ +もみつかっちゃうかも +ワークケートの +##bel +window +##dio +##ht +union +age +382 +14 +##ivity +##y +コメント +domain +neo +##isa +##lter +5k +f5 +steven +##cts +powerpoint +tft +self +g2 +ft +##テル +zol +##act +mwc +381 +343 +もう +nbapop +408 +てある +eds +ace +##room +previous +author +tomtom +il +##ets +hu +financial +☆☆☆ +っています +bp +5t +chi +1gb +##hg +fairmont +cross +008 +gay +h2 +function +##けて +356 +also +1b +625 +##ータ +##raph +1894 +3~5 +##ils +i3 +334 +avenue +##host +による +##bon +##tsu +message +navigation +50g +fintech +h6 +##ことを +8cm +##ject +##vas +##firm +credit +##wf +xxxx +form +##nor +##space +huawei +plan +json +sbl +##dc +machine +921 +392 +wish +##120 +##sol +windows7 +edward +##ために +development +washington +##nsis +lo +818 +##sio +##ym +##bor +planet +##~8 +##wt +ieee +gpa +##めて +camp +ann +gm +##tw +##oka +connect +##rss +##work +##atus +wall +chicken +soul +2mm +##times +fa +##ather +##cord +009 +##eep +hitachi +gui +harry +##pan +e1 +disney +##press +##ーション +wind +386 +frigidaire +##tl +liu +hsu +332 +basic +von +ev +いた +てきる +スホンサーサイト +learning +##ull +expedia +archives +change +##wei +santa +cut +ins +6gb +turbo +brand +cf1 +508 +004 +return +747 +##rip +h1 +##nis +##をこ +128gb +##にお +3t +application +しており +emc +rx +##oon +384 +quick +412 +15058 +wilson +wing +chapter +##bug +beyond +##cms +##dar +##oh +zoom +e2 +trip +sb +##nba +rcep +342 +aspx +ci +080 +gc +gnu +める +##count +advanced +dance +dv +##url +##ging +367 +8591 +am09 +shadow +battle +346 +##i +##cia +##という +emily +##のてす +##tation +host +ff +techorz +sars +##mini +##mporary +##ering +nc +4200 +798 +##next +cma +##mbps +##gas +##ift +##dot +##ィ +455 +##~17 +amana +##りの +426 +##ros +ir +00㎡1 +##eet +##ible +##↓ +710 +ˋ▽ˊ +##aka +dcs +iq +##v +l1 +##lor +maggie +##011 +##iu +588 +##~1 +830 +##gt +1tb +articles +create +##burg +##iki +database +fantasy +##rex +##cam +dlc +dean +##you +hard +path +gaming +victoria +maps +cb +##lee +##itor +overchicstoretvhome +systems +##xt +416 +p3 +sarah +760 +##nan +407 +486 +x9 +install +second +626 +##ann +##ph +##rcle +##nic +860 +##nar +ec +##とう +768 +metro +chocolate +##rian +~4 +##table +##しています +skin +##sn +395 +mountain +##0mm +inparadise +6m +7x24 +ib +4800 +##jia +eeworld +creative +g5 +g3 +357 +parker +ecfa +village +からの +18000 +sylvia +サーヒス +hbl +##ques +##onsored +##x2 +##きます +##v4 +##tein +ie6 +383 +##stack +389 +ver +##ads +##baby +sound +bbe +##110 +##lone +##uid +ads +022 +gundam +351 +thinkpad +006 +scrum +match +##ave +mems +##470 +##oy +##なりました +##talk +glass +lamigo +span +##eme +job +##a5 +jay +wade +kde +498 +##lace +ocean +tvg +##covery +##r3 +##ners +##rea +junior +think +##aine +cover +##ision +##sia +↓↓ +##bow +msi +413 +458 +406 +##love +711 +801 +soft +z2 +##pl +456 +1840 +mobil +mind +##uy +427 +nginx +##oi +めた +##rr +6221 +##mple +##sson +##ーシてす +371 +##nts +91tv +comhd +crv3000 +##uard +1868 +397 +deep +lost +field +gallery +##bia +rate +spf +redis +traction +930 +icloud +011 +なら +fe +jose +372 +##tory +into +sohu +fx +899 +379 +kicstart2 +##hia +すく +##~3 +##sit +ra +24 +##walk +##xure +500g +##pact +pacific +xa +natural +carlo +##250 +##walker +1850 +##can +cto +gigi +516 +##サー +pen +##hoo +ob +matlab +##b +##yy +13913459 +##iti +mango +##bbs +sense +c5 +oxford +##ニア +walker +jennifer +##ola +course +##bre +701 +##pus +##rder +lucky +075 +##ぁ +ivy +なお +##nia +sotheby +side +##ugh +joy +##orage +##ush +##bat +##dt +364 +r9 +##2d +##gio +511 +country +wear +##lax +##~7 +##moon +393 +seven +study +411 +348 +lonzo +8k +##ェ +evolution +##イフ +##kk +gs +kd +##レス +arduino +344 +b12 +##lux +arpg +##rdon +cook +##x5 +dark +five +##als +##ida +とても +sign +362 +##ちの +something +20mm +##nda +387 +##posted +fresh +tf +1870 +422 +cam +##mine +##skip +##form +##ssion +education +394 +##tee +dyson +stage +##jie +want +##night +epson +pack +あります +##ppy +テリヘル +##█ +wd +##eh +##rence +left +##lvin +golden +mhz +discovery +##trix +##n2 +loft +##uch +##dra +##sse +speed +~1 +1mdb +sorry +welcome +##urn +wave +gaga +##lmer +teddy +##160 +トラックハック +せよ +611 +##f2016 +378 +rp +##sha +rar +##あなたに +##きた +840 +holiday +##ュー +373 +074 +##vg +##nos +##rail +gartner +gi +6p +##dium +kit +488 +b3 +eco +##ろう +20g +sean +##stone +autocad +nu +##np +f16 +write +029 +m5 +##ias +images +atp +##dk +fsm +504 +1350 +ve +52kb +##xxx +##のに +##cake +414 +unit +lim +ru +1v +##ification +published +angela +16g +analytics +ak +##q +##nel +gmt +##icon +again +##₂ +##bby +ios11 +445 +かこさいます +waze +いてす +##ハ +9985 +##ust +##ティー +framework +##007 +iptv +delete +52sykb +cl +wwdc +027 +30cm +##fw +##ての +1389 +##xon +brandt +##ses +##dragon +tc +vetements +anne +monte +modern +official +##へて +##ere +##nne +##oud +もちろん +50 +etnews +##a2 +##graphy +421 +863 +##ちゃん +444 +##rtex +##てお +l2 +##gma +mount +ccd +たと +archive +morning +tan +ddos +e7 +##ホ +day4 +##ウ +gis +453 +its +495 +factory +bruce +pg +##ito +ってくたさい +guest +cdma +##lling +536 +n3 +しかし +3~4 +mega +eyes +ro +13 +women +dac +church +##jun +singapore +##facebook +6991 +starbucks +##tos +##stin +##shine +zen +##mu +tina +20℃ +1893 +##たけて +503 +465 +request +##gence +qt +##っ +1886 +347 +363 +q7 +##zzi +diary +##tore +409 +##ead +468 +cst +##osa +canada +agent +va +##jiang +##ちは +##ーク +##lam +sg +##nix +##sday +##よって +g6 +##master +bing +##zl +charlie +16 +8mm +nb40 +##ーン +thai +##ルフ +ln284ct +##itz +##2f +bonnie +##food +##lent +originals +##stro +##lts +418 +∟∣ +##bscribe +children +ntd +yesstyle +##かも +hmv +##tment +d5 +2cm +arts +sms +##pn +##я +##いい +topios9 +539 +lifestyle +virtual +##ague +xz +##deo +muji +024 +unt +##nnis +##ᅩ +faq1 +1884 +396 +##ette +fly +64㎡ +はしめまして +441 +curry +##pop +のこ +release +##← +##◆◆ +##cast +073 +ありな +500ml +##ews +5c +##stle +ios7 +##ima +787 +dog +lenovo +##r4 +roger +013 +cbs +vornado +100m +417 +##desk +##クok +##ald +1867 +9595 +2900 +##van +oil +##x +some +break +common +##jy +##lines +g7 +twice +419 +ella +nano +belle +にこ +##mes +##self +##note +jb +##ことかてきます +benz +##との +##ova +451 +save +##wing +##ますのて +kai +りは +##hua +##rect +rainer +##unge +448 +##0m +adsl +##かな +guestname +##uma +##kins +##zu +tokichoi +##price +county +##med +##mus +rmk +391 +address +vm +えて +openload +##group +##hin +##iginal +amg +urban +##oz +jobs +emi +##public +beautiful +##sch +album +##dden +##bell +jerry +works +hostel +miller +##drive +##rmin +##10 +376 +boot +828 +##370 +##fx +##cm~ +1885 +##nome +##ctionary +##oman +##lish +##cr +##hm +433 +##how +432 +francis +xi +c919 +b5 +evernote +##uc +vga +##3000 +coupe +##urg +##cca +##uality +019 +6g +れる +multi +##また +##ett +em +hey +##ani +##tax +##rma +inside +than +740 +leonnhurt +##jin +ict +れた +bird +notes +200mm +くの +##dical +##lli +result +442 +iu +ee +438 +smap +gopro +##last +yin +pure +998 +32g +けた +5kg +##dan +##rame +mama +##oot +bean +marketing +##hur +2l +bella +sync +xuite +##ground +515 +discuz +##getrelax +##ince +##bay +##5s +cj +##イス +gmat +apt +##pass +jing +##rix +c4 +rich +##とても +niusnews +##ello +bag +770 +##eting +##mobile +18 +culture +015 +##のてすか +377 +1020 +area +##ience +616 +details +gp +universal +silver +dit +はお +private +ddd +u11 +kanshu +##ified +fung +##nny +dx +##520 +tai +475 +023 +##fr +##lean +3s +##pin +429 +##rin +25000 +ly +rick +##bility +usb3 +banner +##baru +##gion +metal +dt +vdf +1871 +karl +qualcomm +bear +1010 +oldid +ian +jo +##tors +population +##ernel +1882 +mmorpg +##mv +##bike +603 +##© +ww +friend +##ager +exhibition +##del +##pods +fpx +structure +##free +##tings +kl +##rley +##copyright +##mma +california +3400 +orange +yoga +4l +canmake +honey +##anda +##コメント +595 +nikkie +##ルハイト +dhl +publishing +##mall +##gnet +20cm +513 +##クセス +##┅ +e88 +970 +##dog +fishbase +##! +##" +### +##$ +##% +##& +##' +##( +##) +##* +##+ +##, +##- +##. +##/ +##: +##; +##< +##= +##> +##? +##@ +##[ +##\ +##] +##^ +##_ +##{ +##| +##} +##~ +##£ +##¤ +##¥ +##§ +##« +##± +##³ +##µ +##· +##¹ +##º +##» +##¼ +##ß +##æ +##÷ +##ø +##đ +##ŋ +##ɔ +##ə +##ɡ +##ʰ +##ˇ +##ˈ +##ˊ +##ˋ +##ˍ +##ː +##˙ +##˚ +##ˢ +##α +##β +##γ +##δ +##ε +##η +##θ +##ι +##κ +##λ +##μ +##ν +##ο +##π +##ρ +##ς +##σ +##τ +##υ +##φ +##χ +##ψ +##б +##в +##г +##д +##е +##ж +##з +##к +##л +##м +##н +##о +##п +##р +##с +##т +##у +##ф +##х +##ц +##ч +##ш +##ы +##ь +##і +##ا +##ب +##ة +##ت +##د +##ر +##س +##ع +##ل +##م +##ن +##ه +##و +##ي +##۩ +##ก +##ง +##น +##ม +##ย +##ร +##อ +##า +##เ +##๑ +##་ +##ღ +##ᄀ +##ᄁ +##ᄂ +##ᄃ +##ᄅ +##ᄆ +##ᄇ +##ᄈ +##ᄉ +##ᄋ +##ᄌ +##ᄎ +##ᄏ +##ᄐ +##ᄑ +##ᄒ +##ᅢ +##ᅣ +##ᅥ +##ᅦ +##ᅧ +##ᅨ +##ᅪ +##ᅬ +##ᅭ +##ᅮ +##ᅯ +##ᅲ +##ᅳ +##ᅴ +##ᆷ +##ᆸ +##ᆺ +##ᆻ +##ᗜ +##ᵃ +##ᵉ +##ᵍ +##ᵏ +##ᵐ +##ᵒ +##ᵘ +##‖ +##„ +##† +##• +##‥ +##‧ +##
 +##‰ +##′ +##″ +##‹ +##› +##※ +##‿ +##⁄ +##ⁱ +##⁺ +##ⁿ +##₁ +##₃ +##₄ +##€ +##№ +##ⅰ +##ⅱ +##ⅲ +##ⅳ +##ⅴ +##↔ +##↗ +##↘ +##⇒ +##∀ +##− +##∕ +##∙ +##√ +##∞ +##∟ +##∠ +##∣ +##∩ +##∮ +##∶ +##∼ +##∽ +##≈ +##≒ +##≡ +##≤ +##≥ +##≦ +##≧ +##≪ +##≫ +##⊙ +##⋅ +##⋈ +##⋯ +##⌒ +##① +##② +##③ +##④ +##⑤ +##⑥ +##⑦ +##⑧ +##⑨ +##⑩ +##⑴ +##⑵ +##⑶ +##⑷ +##⑸ +##⒈ +##⒉ +##⒊ +##⒋ +##ⓒ +##ⓔ +##ⓘ +##━ +##┃ +##┆ +##┊ +##┌ +##└ +##├ +##┣ +##═ +##║ +##╚ +##╞ +##╠ +##╭ +##╮ +##╯ +##╰ +##╱ +##╳ +##▂ +##▃ +##▅ +##▇ +##▉ +##▋ +##▌ +##▍ +##▎ +##□ +##▪ +##▫ +##▬ +##△ +##▶ +##► +##▽ +##◇ +##◕ +##◠ +##◢ +##◤ +##☀ +##☕ +##☞ +##☺ +##☼ +##♀ +##♂ +##♠ +##♡ +##♣ +##♦ +##♫ +##♬ +##✈ +##✔ +##✕ +##✖ +##✦ +##✨ +##✪ +##✰ +##✿ +##❀ +##➜ +##➤ +##⦿ +##、 +##。 +##〃 +##々 +##〇 +##〈 +##〉 +##《 +##》 +##「 +##」 +##『 +##』 +##【 +##】 +##〓 +##〔 +##〕 +##〖 +##〗 +##〜 +##〝 +##〞 +##ぃ +##ぇ +##ぬ +##ふ +##ほ +##む +##ゃ +##ゅ +##ゆ +##ょ +##゜ +##ゝ +##ァ +##ゥ +##エ +##ォ +##ケ +##サ +##セ +##ソ +##ッ +##ニ +##ヌ +##ネ +##ノ +##ヘ +##モ +##ャ +##ヤ +##ュ +##ユ +##ョ +##ヨ +##ワ +##ヲ +##・ +##ヽ +##ㄅ +##ㄆ +##ㄇ +##ㄉ +##ㄋ +##ㄌ +##ㄍ +##ㄎ +##ㄏ +##ㄒ +##ㄚ +##ㄛ +##ㄞ +##ㄟ +##ㄢ +##ㄤ +##ㄥ +##ㄧ +##ㄨ +##ㆍ +##㈦ +##㊣ +##㗎 +##一 +##丁 +##七 +##万 +##丈 +##三 +##上 +##下 +##不 +##与 +##丐 +##丑 +##专 +##且 +##丕 +##世 +##丘 +##丙 +##业 +##丛 +##东 +##丝 +##丞 +##丟 +##両 +##丢 +##两 +##严 +##並 +##丧 +##丨 +##个 +##丫 +##中 +##丰 +##串 +##临 +##丶 +##丸 +##丹 +##为 +##主 +##丼 +##丽 +##举 +##丿 +##乂 +##乃 +##久 +##么 +##义 +##之 +##乌 +##乍 +##乎 +##乏 +##乐 +##乒 +##乓 +##乔 +##乖 +##乗 +##乘 +##乙 +##乜 +##九 +##乞 +##也 +##习 +##乡 +##书 +##乩 +##买 +##乱 +##乳 +##乾 +##亀 +##亂 +##了 +##予 +##争 +##事 +##二 +##于 +##亏 +##云 +##互 +##五 +##井 +##亘 +##亙 +##亚 +##些 +##亜 +##亞 +##亟 +##亡 +##亢 +##交 +##亥 +##亦 +##产 +##亨 +##亩 +##享 +##京 +##亭 +##亮 +##亲 +##亳 +##亵 +##人 +##亿 +##什 +##仁 +##仃 +##仄 +##仅 +##仆 +##仇 +##今 +##介 +##仍 +##从 +##仏 +##仑 +##仓 +##仔 +##仕 +##他 +##仗 +##付 +##仙 +##仝 +##仞 +##仟 +##代 +##令 +##以 +##仨 +##仪 +##们 +##仮 +##仰 +##仲 +##件 +##价 +##任 +##份 +##仿 +##企 +##伉 +##伊 +##伍 +##伎 +##伏 +##伐 +##休 +##伕 +##众 +##优 +##伙 +##会 +##伝 +##伞 +##伟 +##传 +##伢 +##伤 +##伦 +##伪 +##伫 +##伯 +##估 +##伴 +##伶 +##伸 +##伺 +##似 +##伽 +##佃 +##但 +##佇 +##佈 +##位 +##低 +##住 +##佐 +##佑 +##体 +##佔 +##何 +##佗 +##佘 +##余 +##佚 +##佛 +##作 +##佝 +##佞 +##佟 +##你 +##佢 +##佣 +##佤 +##佥 +##佩 +##佬 +##佯 +##佰 +##佳 +##併 +##佶 +##佻 +##佼 +##使 +##侃 +##侄 +##來 +##侈 +##例 +##侍 +##侏 +##侑 +##侖 +##侗 +##供 +##依 +##侠 +##価 +##侣 +##侥 +##侦 +##侧 +##侨 +##侬 +##侮 +##侯 +##侵 +##侶 +##侷 +##便 +##係 +##促 +##俄 +##俊 +##俎 +##俏 +##俐 +##俑 +##俗 +##俘 +##俚 +##保 +##俞 +##俟 +##俠 +##信 +##俨 +##俩 +##俪 +##俬 +##俭 +##修 +##俯 +##俱 +##俳 +##俸 +##俺 +##俾 +##倆 +##倉 +##個 +##倌 +##倍 +##倏 +##們 +##倒 +##倔 +##倖 +##倘 +##候 +##倚 +##倜 +##借 +##倡 +##値 +##倦 +##倩 +##倪 +##倫 +##倬 +##倭 +##倶 +##债 +##值 +##倾 +##偃 +##假 +##偈 +##偉 +##偌 +##偎 +##偏 +##偕 +##做 +##停 +##健 +##側 +##偵 +##偶 +##偷 +##偻 +##偽 +##偿 +##傀 +##傅 +##傍 +##傑 +##傘 +##備 +##傚 +##傢 +##傣 +##傥 +##储 +##傩 +##催 +##傭 +##傲 +##傳 +##債 +##傷 +##傻 +##傾 +##僅 +##働 +##像 +##僑 +##僕 +##僖 +##僚 +##僥 +##僧 +##僭 +##僮 +##僱 +##僵 +##價 +##僻 +##儀 +##儂 +##億 +##儆 +##儉 +##儋 +##儒 +##儕 +##儘 +##償 +##儡 +##優 +##儲 +##儷 +##儼 +##儿 +##兀 +##允 +##元 +##兄 +##充 +##兆 +##兇 +##先 +##光 +##克 +##兌 +##免 +##児 +##兑 +##兒 +##兔 +##兖 +##党 +##兜 +##兢 +##入 +##內 +##全 +##兩 +##八 +##公 +##六 +##兮 +##兰 +##共 +##兲 +##关 +##兴 +##兵 +##其 +##具 +##典 +##兹 +##养 +##兼 +##兽 +##冀 +##内 +##円 +##冇 +##冈 +##冉 +##冊 +##册 +##再 +##冏 +##冒 +##冕 +##冗 +##写 +##军 +##农 +##冠 +##冢 +##冤 +##冥 +##冨 +##冪 +##冬 +##冯 +##冰 +##冲 +##决 +##况 +##冶 +##冷 +##冻 +##冼 +##冽 +##冾 +##净 +##凄 +##准 +##凇 +##凈 +##凉 +##凋 +##凌 +##凍 +##减 +##凑 +##凛 +##凜 +##凝 +##几 +##凡 +##凤 +##処 +##凪 +##凭 +##凯 +##凰 +##凱 +##凳 +##凶 +##凸 +##凹 +##出 +##击 +##函 +##凿 +##刀 +##刁 +##刃 +##分 +##切 +##刈 +##刊 +##刍 +##刎 +##刑 +##划 +##列 +##刘 +##则 +##刚 +##创 +##初 +##删 +##判 +##別 +##刨 +##利 +##刪 +##别 +##刮 +##到 +##制 +##刷 +##券 +##刹 +##刺 +##刻 +##刽 +##剁 +##剂 +##剃 +##則 +##剉 +##削 +##剋 +##剌 +##前 +##剎 +##剐 +##剑 +##剔 +##剖 +##剛 +##剜 +##剝 +##剣 +##剤 +##剥 +##剧 +##剩 +##剪 +##副 +##割 +##創 +##剷 +##剽 +##剿 +##劃 +##劇 +##劈 +##劉 +##劊 +##劍 +##劏 +##劑 +##力 +##劝 +##办 +##功 +##加 +##务 +##劣 +##动 +##助 +##努 +##劫 +##劭 +##励 +##劲 +##劳 +##労 +##劵 +##効 +##劾 +##势 +##勁 +##勃 +##勇 +##勉 +##勋 +##勐 +##勒 +##動 +##勖 +##勘 +##務 +##勛 +##勝 +##勞 +##募 +##勢 +##勤 +##勧 +##勳 +##勵 +##勸 +##勺 +##勻 +##勾 +##勿 +##匀 +##包 +##匆 +##匈 +##匍 +##匐 +##匕 +##化 +##北 +##匙 +##匝 +##匠 +##匡 +##匣 +##匪 +##匮 +##匯 +##匱 +##匹 +##区 +##医 +##匾 +##匿 +##區 +##十 +##千 +##卅 +##升 +##午 +##卉 +##半 +##卍 +##华 +##协 +##卑 +##卒 +##卓 +##協 +##单 +##卖 +##南 +##単 +##博 +##卜 +##卞 +##卟 +##占 +##卡 +##卢 +##卤 +##卦 +##卧 +##卫 +##卮 +##卯 +##印 +##危 +##即 +##却 +##卵 +##卷 +##卸 +##卻 +##卿 +##厂 +##厄 +##厅 +##历 +##厉 +##压 +##厌 +##厕 +##厘 +##厚 +##厝 +##原 +##厢 +##厥 +##厦 +##厨 +##厩 +##厭 +##厮 +##厲 +##厳 +##去 +##县 +##叁 +##参 +##參 +##又 +##叉 +##及 +##友 +##双 +##反 +##収 +##发 +##叔 +##取 +##受 +##变 +##叙 +##叛 +##叟 +##叠 +##叡 +##叢 +##口 +##古 +##句 +##另 +##叨 +##叩 +##只 +##叫 +##召 +##叭 +##叮 +##可 +##台 +##叱 +##史 +##右 +##叵 +##叶 +##号 +##司 +##叹 +##叻 +##叼 +##叽 +##吁 +##吃 +##各 +##吆 +##合 +##吉 +##吊 +##吋 +##同 +##名 +##后 +##吏 +##吐 +##向 +##吒 +##吓 +##吕 +##吖 +##吗 +##君 +##吝 +##吞 +##吟 +##吠 +##吡 +##否 +##吧 +##吨 +##吩 +##含 +##听 +##吭 +##吮 +##启 +##吱 +##吳 +##吴 +##吵 +##吶 +##吸 +##吹 +##吻 +##吼 +##吽 +##吾 +##呀 +##呂 +##呃 +##呆 +##呈 +##告 +##呋 +##呎 +##呐 +##呓 +##呕 +##呗 +##员 +##呛 +##呜 +##呢 +##呤 +##呦 +##周 +##呱 +##呲 +##味 +##呵 +##呷 +##呸 +##呻 +##呼 +##命 +##咀 +##咁 +##咂 +##咄 +##咆 +##咋 +##和 +##咎 +##咏 +##咐 +##咒 +##咔 +##咕 +##咖 +##咗 +##咘 +##咙 +##咚 +##咛 +##咣 +##咤 +##咦 +##咧 +##咨 +##咩 +##咪 +##咫 +##咬 +##咭 +##咯 +##咱 +##咲 +##咳 +##咸 +##咻 +##咽 +##咿 +##哀 +##品 +##哂 +##哄 +##哆 +##哇 +##哈 +##哉 +##哋 +##哌 +##响 +##哎 +##哏 +##哐 +##哑 +##哒 +##哔 +##哗 +##哟 +##員 +##哥 +##哦 +##哧 +##哨 +##哩 +##哪 +##哭 +##哮 +##哲 +##哺 +##哼 +##哽 +##唁 +##唄 +##唆 +##唇 +##唉 +##唏 +##唐 +##唑 +##唔 +##唠 +##唤 +##唧 +##唬 +##售 +##唯 +##唰 +##唱 +##唳 +##唷 +##唸 +##唾 +##啃 +##啄 +##商 +##啉 +##啊 +##問 +##啓 +##啕 +##啖 +##啜 +##啞 +##啟 +##啡 +##啤 +##啥 +##啦 +##啧 +##啪 +##啫 +##啬 +##啮 +##啰 +##啱 +##啲 +##啵 +##啶 +##啷 +##啸 +##啻 +##啼 +##啾 +##喀 +##喂 +##喃 +##善 +##喆 +##喇 +##喉 +##喊 +##喋 +##喎 +##喏 +##喔 +##喘 +##喙 +##喚 +##喜 +##喝 +##喟 +##喧 +##喪 +##喫 +##喬 +##單 +##喰 +##喱 +##喲 +##喳 +##喵 +##営 +##喷 +##喹 +##喺 +##喻 +##喽 +##嗅 +##嗆 +##嗇 +##嗎 +##嗑 +##嗒 +##嗓 +##嗔 +##嗖 +##嗚 +##嗜 +##嗝 +##嗟 +##嗡 +##嗣 +##嗤 +##嗦 +##嗨 +##嗪 +##嗬 +##嗯 +##嗰 +##嗲 +##嗳 +##嗶 +##嗷 +##嗽 +##嘀 +##嘅 +##嘆 +##嘈 +##嘉 +##嘌 +##嘍 +##嘎 +##嘔 +##嘖 +##嘗 +##嘘 +##嘚 +##嘛 +##嘜 +##嘞 +##嘟 +##嘢 +##嘣 +##嘤 +##嘧 +##嘩 +##嘭 +##嘮 +##嘯 +##嘰 +##嘱 +##嘲 +##嘴 +##嘶 +##嘸 +##嘹 +##嘻 +##嘿 +##噁 +##噌 +##噎 +##噓 +##噔 +##噗 +##噙 +##噜 +##噠 +##噢 +##噤 +##器 +##噩 +##噪 +##噬 +##噱 +##噴 +##噶 +##噸 +##噹 +##噻 +##噼 +##嚀 +##嚇 +##嚎 +##嚏 +##嚐 +##嚓 +##嚕 +##嚟 +##嚣 +##嚥 +##嚨 +##嚮 +##嚴 +##嚷 +##嚼 +##囂 +##囉 +##囊 +##囍 +##囑 +##囔 +##囗 +##囚 +##四 +##囝 +##回 +##囟 +##因 +##囡 +##团 +##団 +##囤 +##囧 +##囪 +##囫 +##园 +##困 +##囱 +##囲 +##図 +##围 +##囹 +##固 +##国 +##图 +##囿 +##圃 +##圄 +##圆 +##圈 +##國 +##圍 +##圏 +##園 +##圓 +##圖 +##團 +##圜 +##土 +##圣 +##圧 +##在 +##圩 +##圭 +##地 +##圳 +##场 +##圻 +##圾 +##址 +##坂 +##均 +##坊 +##坍 +##坎 +##坏 +##坐 +##坑 +##块 +##坚 +##坛 +##坝 +##坞 +##坟 +##坠 +##坡 +##坤 +##坦 +##坨 +##坪 +##坯 +##坳 +##坵 +##坷 +##垂 +##垃 +##垄 +##型 +##垒 +##垚 +##垛 +##垠 +##垢 +##垣 +##垦 +##垩 +##垫 +##垭 +##垮 +##垵 +##埂 +##埃 +##埋 +##城 +##埔 +##埕 +##埗 +##域 +##埠 +##埤 +##埵 +##執 +##埸 +##培 +##基 +##埼 +##堀 +##堂 +##堃 +##堅 +##堆 +##堇 +##堑 +##堕 +##堙 +##堡 +##堤 +##堪 +##堯 +##堰 +##報 +##場 +##堵 +##堺 +##堿 +##塊 +##塌 +##塑 +##塔 +##塗 +##塘 +##塚 +##塞 +##塢 +##塩 +##填 +##塬 +##塭 +##塵 +##塾 +##墀 +##境 +##墅 +##墉 +##墊 +##墒 +##墓 +##増 +##墘 +##墙 +##墜 +##增 +##墟 +##墨 +##墩 +##墮 +##墳 +##墻 +##墾 +##壁 +##壅 +##壆 +##壇 +##壊 +##壑 +##壓 +##壕 +##壘 +##壞 +##壟 +##壢 +##壤 +##壩 +##士 +##壬 +##壮 +##壯 +##声 +##売 +##壳 +##壶 +##壹 +##壺 +##壽 +##处 +##备 +##変 +##复 +##夏 +##夔 +##夕 +##外 +##夙 +##多 +##夜 +##够 +##夠 +##夢 +##夥 +##大 +##天 +##太 +##夫 +##夭 +##央 +##夯 +##失 +##头 +##夷 +##夸 +##夹 +##夺 +##夾 +##奂 +##奄 +##奇 +##奈 +##奉 +##奋 +##奎 +##奏 +##奐 +##契 +##奔 +##奕 +##奖 +##套 +##奘 +##奚 +##奠 +##奢 +##奥 +##奧 +##奪 +##奬 +##奮 +##女 +##奴 +##奶 +##奸 +##她 +##好 +##如 +##妃 +##妄 +##妆 +##妇 +##妈 +##妊 +##妍 +##妒 +##妓 +##妖 +##妘 +##妙 +##妝 +##妞 +##妣 +##妤 +##妥 +##妨 +##妩 +##妪 +##妮 +##妲 +##妳 +##妹 +##妻 +##妾 +##姆 +##姉 +##姊 +##始 +##姍 +##姐 +##姑 +##姒 +##姓 +##委 +##姗 +##姚 +##姜 +##姝 +##姣 +##姥 +##姦 +##姨 +##姪 +##姫 +##姬 +##姹 +##姻 +##姿 +##威 +##娃 +##娄 +##娅 +##娆 +##娇 +##娉 +##娑 +##娓 +##娘 +##娛 +##娜 +##娟 +##娠 +##娣 +##娥 +##娩 +##娱 +##娲 +##娴 +##娶 +##娼 +##婀 +##婁 +##婆 +##婉 +##婊 +##婕 +##婚 +##婢 +##婦 +##婧 +##婪 +##婭 +##婴 +##婵 +##婶 +##婷 +##婺 +##婿 +##媒 +##媚 +##媛 +##媞 +##媧 +##媲 +##媳 +##媽 +##媾 +##嫁 +##嫂 +##嫉 +##嫌 +##嫑 +##嫔 +##嫖 +##嫘 +##嫚 +##嫡 +##嫣 +##嫦 +##嫩 +##嫲 +##嫵 +##嫻 +##嬅 +##嬉 +##嬌 +##嬗 +##嬛 +##嬢 +##嬤 +##嬪 +##嬰 +##嬴 +##嬷 +##嬸 +##嬿 +##孀 +##孃 +##子 +##孑 +##孔 +##孕 +##孖 +##字 +##存 +##孙 +##孚 +##孛 +##孜 +##孝 +##孟 +##孢 +##季 +##孤 +##学 +##孩 +##孪 +##孫 +##孬 +##孰 +##孱 +##孳 +##孵 +##學 +##孺 +##孽 +##孿 +##宁 +##它 +##宅 +##宇 +##守 +##安 +##宋 +##完 +##宏 +##宓 +##宕 +##宗 +##官 +##宙 +##定 +##宛 +##宜 +##宝 +##实 +##実 +##宠 +##审 +##客 +##宣 +##室 +##宥 +##宦 +##宪 +##宫 +##宮 +##宰 +##害 +##宴 +##宵 +##家 +##宸 +##容 +##宽 +##宾 +##宿 +##寂 +##寄 +##寅 +##密 +##寇 +##富 +##寐 +##寒 +##寓 +##寛 +##寝 +##寞 +##察 +##寡 +##寢 +##寥 +##實 +##寧 +##寨 +##審 +##寫 +##寬 +##寮 +##寰 +##寵 +##寶 +##寸 +##对 +##寺 +##寻 +##导 +##対 +##寿 +##封 +##専 +##射 +##将 +##將 +##專 +##尉 +##尊 +##尋 +##對 +##導 +##小 +##少 +##尔 +##尕 +##尖 +##尘 +##尚 +##尝 +##尤 +##尧 +##尬 +##就 +##尴 +##尷 +##尸 +##尹 +##尺 +##尻 +##尼 +##尽 +##尾 +##尿 +##局 +##屁 +##层 +##屄 +##居 +##屆 +##屈 +##屉 +##届 +##屋 +##屌 +##屍 +##屎 +##屏 +##屐 +##屑 +##展 +##屜 +##属 +##屠 +##屡 +##屢 +##層 +##履 +##屬 +##屯 +##山 +##屹 +##屿 +##岀 +##岁 +##岂 +##岌 +##岐 +##岑 +##岔 +##岖 +##岗 +##岘 +##岙 +##岚 +##岛 +##岡 +##岩 +##岫 +##岬 +##岭 +##岱 +##岳 +##岷 +##岸 +##峇 +##峋 +##峒 +##峙 +##峡 +##峤 +##峥 +##峦 +##峨 +##峪 +##峭 +##峯 +##峰 +##峴 +##島 +##峻 +##峽 +##崁 +##崂 +##崆 +##崇 +##崎 +##崑 +##崔 +##崖 +##崗 +##崙 +##崛 +##崧 +##崩 +##崭 +##崴 +##崽 +##嵇 +##嵊 +##嵋 +##嵌 +##嵐 +##嵘 +##嵩 +##嵬 +##嵯 +##嶂 +##嶄 +##嶇 +##嶋 +##嶙 +##嶺 +##嶼 +##嶽 +##巅 +##巍 +##巒 +##巔 +##巖 +##川 +##州 +##巡 +##巢 +##工 +##左 +##巧 +##巨 +##巩 +##巫 +##差 +##己 +##已 +##巳 +##巴 +##巷 +##巻 +##巽 +##巾 +##巿 +##币 +##市 +##布 +##帅 +##帆 +##师 +##希 +##帐 +##帑 +##帕 +##帖 +##帘 +##帚 +##帛 +##帜 +##帝 +##帥 +##带 +##帧 +##師 +##席 +##帮 +##帯 +##帰 +##帳 +##帶 +##帷 +##常 +##帼 +##帽 +##幀 +##幂 +##幄 +##幅 +##幌 +##幔 +##幕 +##幟 +##幡 +##幢 +##幣 +##幫 +##干 +##平 +##年 +##并 +##幸 +##幹 +##幺 +##幻 +##幼 +##幽 +##幾 +##广 +##庁 +##広 +##庄 +##庆 +##庇 +##床 +##序 +##庐 +##库 +##应 +##底 +##庖 +##店 +##庙 +##庚 +##府 +##庞 +##废 +##庠 +##度 +##座 +##庫 +##庭 +##庵 +##庶 +##康 +##庸 +##庹 +##庾 +##廁 +##廂 +##廃 +##廈 +##廉 +##廊 +##廓 +##廖 +##廚 +##廝 +##廟 +##廠 +##廢 +##廣 +##廬 +##廳 +##延 +##廷 +##建 +##廿 +##开 +##弁 +##异 +##弃 +##弄 +##弈 +##弊 +##弋 +##式 +##弑 +##弒 +##弓 +##弔 +##引 +##弗 +##弘 +##弛 +##弟 +##张 +##弥 +##弦 +##弧 +##弩 +##弭 +##弯 +##弱 +##張 +##強 +##弹 +##强 +##弼 +##弾 +##彅 +##彆 +##彈 +##彌 +##彎 +##归 +##当 +##录 +##彗 +##彙 +##彝 +##形 +##彤 +##彥 +##彦 +##彧 +##彩 +##彪 +##彫 +##彬 +##彭 +##彰 +##影 +##彷 +##役 +##彻 +##彼 +##彿 +##往 +##征 +##径 +##待 +##徇 +##很 +##徉 +##徊 +##律 +##後 +##徐 +##徑 +##徒 +##従 +##徕 +##得 +##徘 +##徙 +##徜 +##從 +##徠 +##御 +##徨 +##復 +##循 +##徬 +##微 +##徳 +##徴 +##徵 +##德 +##徹 +##徼 +##徽 +##心 +##必 +##忆 +##忌 +##忍 +##忏 +##忐 +##忑 +##忒 +##忖 +##志 +##忘 +##忙 +##応 +##忠 +##忡 +##忤 +##忧 +##忪 +##快 +##忱 +##念 +##忻 +##忽 +##忿 +##怀 +##态 +##怂 +##怅 +##怆 +##怎 +##怏 +##怒 +##怔 +##怕 +##怖 +##怙 +##怜 +##思 +##怠 +##怡 +##急 +##怦 +##性 +##怨 +##怪 +##怯 +##怵 +##总 +##怼 +##恁 +##恃 +##恆 +##恋 +##恍 +##恐 +##恒 +##恕 +##恙 +##恚 +##恢 +##恣 +##恤 +##恥 +##恨 +##恩 +##恪 +##恫 +##恬 +##恭 +##息 +##恰 +##恳 +##恵 +##恶 +##恸 +##恺 +##恻 +##恼 +##恿 +##悄 +##悅 +##悉 +##悌 +##悍 +##悔 +##悖 +##悚 +##悟 +##悠 +##患 +##悦 +##您 +##悩 +##悪 +##悬 +##悯 +##悱 +##悲 +##悴 +##悵 +##悶 +##悸 +##悻 +##悼 +##悽 +##情 +##惆 +##惇 +##惊 +##惋 +##惑 +##惕 +##惘 +##惚 +##惜 +##惟 +##惠 +##惡 +##惦 +##惧 +##惨 +##惩 +##惫 +##惬 +##惭 +##惮 +##惯 +##惰 +##惱 +##想 +##惴 +##惶 +##惹 +##惺 +##愁 +##愆 +##愈 +##愉 +##愍 +##意 +##愕 +##愚 +##愛 +##愜 +##感 +##愣 +##愤 +##愧 +##愫 +##愷 +##愿 +##慄 +##慈 +##態 +##慌 +##慎 +##慑 +##慕 +##慘 +##慚 +##慟 +##慢 +##慣 +##慧 +##慨 +##慫 +##慮 +##慰 +##慳 +##慵 +##慶 +##慷 +##慾 +##憂 +##憊 +##憋 +##憎 +##憐 +##憑 +##憔 +##憚 +##憤 +##憧 +##憨 +##憩 +##憫 +##憬 +##憲 +##憶 +##憾 +##懂 +##懇 +##懈 +##應 +##懊 +##懋 +##懑 +##懒 +##懦 +##懲 +##懵 +##懶 +##懷 +##懸 +##懺 +##懼 +##懾 +##懿 +##戀 +##戈 +##戊 +##戌 +##戍 +##戎 +##戏 +##成 +##我 +##戒 +##戕 +##或 +##战 +##戚 +##戛 +##戟 +##戡 +##戦 +##截 +##戬 +##戮 +##戰 +##戲 +##戳 +##戴 +##戶 +##户 +##戸 +##戻 +##戾 +##房 +##所 +##扁 +##扇 +##扈 +##扉 +##手 +##才 +##扎 +##扑 +##扒 +##打 +##扔 +##払 +##托 +##扛 +##扣 +##扦 +##执 +##扩 +##扪 +##扫 +##扬 +##扭 +##扮 +##扯 +##扰 +##扱 +##扳 +##扶 +##批 +##扼 +##找 +##承 +##技 +##抄 +##抉 +##把 +##抑 +##抒 +##抓 +##投 +##抖 +##抗 +##折 +##抚 +##抛 +##抜 +##択 +##抟 +##抠 +##抡 +##抢 +##护 +##报 +##抨 +##披 +##抬 +##抱 +##抵 +##抹 +##押 +##抽 +##抿 +##拂 +##拄 +##担 +##拆 +##拇 +##拈 +##拉 +##拋 +##拌 +##拍 +##拎 +##拐 +##拒 +##拓 +##拔 +##拖 +##拗 +##拘 +##拙 +##拚 +##招 +##拜 +##拟 +##拡 +##拢 +##拣 +##拥 +##拦 +##拧 +##拨 +##择 +##括 +##拭 +##拮 +##拯 +##拱 +##拳 +##拴 +##拷 +##拼 +##拽 +##拾 +##拿 +##持 +##挂 +##指 +##挈 +##按 +##挎 +##挑 +##挖 +##挙 +##挚 +##挛 +##挝 +##挞 +##挟 +##挠 +##挡 +##挣 +##挤 +##挥 +##挨 +##挪 +##挫 +##振 +##挲 +##挹 +##挺 +##挽 +##挾 +##捂 +##捅 +##捆 +##捉 +##捋 +##捌 +##捍 +##捎 +##捏 +##捐 +##捕 +##捞 +##损 +##捡 +##换 +##捣 +##捧 +##捨 +##捩 +##据 +##捱 +##捲 +##捶 +##捷 +##捺 +##捻 +##掀 +##掂 +##掃 +##掇 +##授 +##掉 +##掌 +##掏 +##掐 +##排 +##掖 +##掘 +##掙 +##掛 +##掠 +##採 +##探 +##掣 +##接 +##控 +##推 +##掩 +##措 +##掬 +##掰 +##掲 +##掳 +##掴 +##掷 +##掸 +##掺 +##揀 +##揃 +##揄 +##揆 +##揉 +##揍 +##描 +##提 +##插 +##揖 +##揚 +##換 +##握 +##揣 +##揩 +##揪 +##揭 +##揮 +##援 +##揶 +##揸 +##揹 +##揽 +##搀 +##搁 +##搂 +##搅 +##損 +##搏 +##搐 +##搓 +##搔 +##搖 +##搗 +##搜 +##搞 +##搡 +##搪 +##搬 +##搭 +##搵 +##搶 +##携 +##搽 +##摀 +##摁 +##摄 +##摆 +##摇 +##摈 +##摊 +##摒 +##摔 +##摘 +##摞 +##摟 +##摧 +##摩 +##摯 +##摳 +##摸 +##摹 +##摺 +##摻 +##撂 +##撃 +##撅 +##撇 +##撈 +##撐 +##撑 +##撒 +##撓 +##撕 +##撚 +##撞 +##撤 +##撥 +##撩 +##撫 +##撬 +##播 +##撮 +##撰 +##撲 +##撵 +##撷 +##撸 +##撻 +##撼 +##撿 +##擀 +##擁 +##擂 +##擄 +##擅 +##擇 +##擊 +##擋 +##操 +##擎 +##擒 +##擔 +##擘 +##據 +##擞 +##擠 +##擡 +##擢 +##擦 +##擬 +##擰 +##擱 +##擲 +##擴 +##擷 +##擺 +##擼 +##擾 +##攀 +##攏 +##攒 +##攔 +##攘 +##攙 +##攜 +##攝 +##攞 +##攢 +##攣 +##攤 +##攥 +##攪 +##攫 +##攬 +##支 +##收 +##攸 +##改 +##攻 +##放 +##政 +##故 +##效 +##敌 +##敍 +##敎 +##敏 +##救 +##敕 +##敖 +##敗 +##敘 +##教 +##敛 +##敝 +##敞 +##敢 +##散 +##敦 +##敬 +##数 +##敲 +##整 +##敵 +##敷 +##數 +##斂 +##斃 +##文 +##斋 +##斌 +##斎 +##斐 +##斑 +##斓 +##斗 +##料 +##斛 +##斜 +##斟 +##斡 +##斤 +##斥 +##斧 +##斩 +##斫 +##斬 +##断 +##斯 +##新 +##斷 +##方 +##於 +##施 +##旁 +##旃 +##旅 +##旋 +##旌 +##旎 +##族 +##旖 +##旗 +##无 +##既 +##日 +##旦 +##旧 +##旨 +##早 +##旬 +##旭 +##旮 +##旱 +##时 +##旷 +##旺 +##旻 +##昀 +##昂 +##昆 +##昇 +##昉 +##昊 +##昌 +##明 +##昏 +##易 +##昔 +##昕 +##昙 +##星 +##映 +##春 +##昧 +##昨 +##昭 +##是 +##昱 +##昴 +##昵 +##昶 +##昼 +##显 +##晁 +##時 +##晃 +##晉 +##晋 +##晌 +##晏 +##晒 +##晓 +##晔 +##晕 +##晖 +##晗 +##晚 +##晝 +##晞 +##晟 +##晤 +##晦 +##晨 +##晩 +##普 +##景 +##晰 +##晴 +##晶 +##晷 +##智 +##晾 +##暂 +##暄 +##暇 +##暈 +##暉 +##暌 +##暐 +##暑 +##暖 +##暗 +##暝 +##暢 +##暧 +##暨 +##暫 +##暮 +##暱 +##暴 +##暸 +##暹 +##曄 +##曆 +##曇 +##曉 +##曖 +##曙 +##曜 +##曝 +##曠 +##曦 +##曬 +##曰 +##曲 +##曳 +##更 +##書 +##曹 +##曼 +##曾 +##替 +##最 +##會 +##月 +##有 +##朋 +##服 +##朐 +##朔 +##朕 +##朗 +##望 +##朝 +##期 +##朦 +##朧 +##木 +##未 +##末 +##本 +##札 +##朮 +##术 +##朱 +##朴 +##朵 +##机 +##朽 +##杀 +##杂 +##权 +##杆 +##杈 +##杉 +##李 +##杏 +##材 +##村 +##杓 +##杖 +##杜 +##杞 +##束 +##杠 +##条 +##来 +##杨 +##杭 +##杯 +##杰 +##東 +##杳 +##杵 +##杷 +##杼 +##松 +##板 +##极 +##构 +##枇 +##枉 +##枋 +##析 +##枕 +##林 +##枚 +##果 +##枝 +##枢 +##枣 +##枪 +##枫 +##枭 +##枯 +##枰 +##枱 +##枳 +##架 +##枷 +##枸 +##柄 +##柏 +##某 +##柑 +##柒 +##染 +##柔 +##柘 +##柚 +##柜 +##柞 +##柠 +##柢 +##查 +##柩 +##柬 +##柯 +##柱 +##柳 +##柴 +##柵 +##査 +##柿 +##栀 +##栃 +##栄 +##栅 +##标 +##栈 +##栉 +##栋 +##栎 +##栏 +##树 +##栓 +##栖 +##栗 +##校 +##栩 +##株 +##样 +##核 +##根 +##格 +##栽 +##栾 +##桀 +##桁 +##桂 +##桃 +##桅 +##框 +##案 +##桉 +##桌 +##桎 +##桐 +##桑 +##桓 +##桔 +##桜 +##桠 +##桡 +##桢 +##档 +##桥 +##桦 +##桧 +##桨 +##桩 +##桶 +##桿 +##梁 +##梅 +##梆 +##梏 +##梓 +##梗 +##條 +##梟 +##梢 +##梦 +##梧 +##梨 +##梭 +##梯 +##械 +##梳 +##梵 +##梶 +##检 +##棂 +##棄 +##棉 +##棋 +##棍 +##棒 +##棕 +##棗 +##棘 +##棚 +##棟 +##棠 +##棣 +##棧 +##森 +##棱 +##棲 +##棵 +##棹 +##棺 +##椁 +##椅 +##椋 +##植 +##椎 +##椒 +##検 +##椪 +##椭 +##椰 +##椹 +##椽 +##椿 +##楂 +##楊 +##楓 +##楔 +##楚 +##楝 +##楞 +##楠 +##楣 +##楨 +##楫 +##業 +##楮 +##極 +##楷 +##楸 +##楹 +##楼 +##楽 +##概 +##榄 +##榆 +##榈 +##榉 +##榔 +##榕 +##榖 +##榛 +##榜 +##榨 +##榫 +##榭 +##榮 +##榱 +##榴 +##榷 +##榻 +##槁 +##槃 +##構 +##槌 +##槍 +##槎 +##槐 +##槓 +##様 +##槛 +##槟 +##槤 +##槭 +##槲 +##槳 +##槻 +##槽 +##槿 +##樁 +##樂 +##樊 +##樑 +##樓 +##標 +##樞 +##樟 +##模 +##樣 +##権 +##横 +##樫 +##樯 +##樱 +##樵 +##樸 +##樹 +##樺 +##樽 +##樾 +##橄 +##橇 +##橋 +##橐 +##橘 +##橙 +##機 +##橡 +##橢 +##橫 +##橱 +##橹 +##橼 +##檀 +##檄 +##檎 +##檐 +##檔 +##檗 +##檜 +##檢 +##檬 +##檯 +##檳 +##檸 +##檻 +##櫃 +##櫚 +##櫛 +##櫥 +##櫸 +##櫻 +##欄 +##權 +##欒 +##欖 +##欠 +##次 +##欢 +##欣 +##欧 +##欲 +##欸 +##欺 +##欽 +##款 +##歆 +##歇 +##歉 +##歌 +##歎 +##歐 +##歓 +##歙 +##歛 +##歡 +##止 +##正 +##此 +##步 +##武 +##歧 +##歩 +##歪 +##歯 +##歲 +##歳 +##歴 +##歷 +##歸 +##歹 +##死 +##歼 +##殁 +##殃 +##殆 +##殇 +##殉 +##殊 +##残 +##殒 +##殓 +##殖 +##殘 +##殞 +##殡 +##殤 +##殭 +##殯 +##殲 +##殴 +##段 +##殷 +##殺 +##殼 +##殿 +##毀 +##毁 +##毂 +##毅 +##毆 +##毋 +##母 +##毎 +##每 +##毒 +##毓 +##比 +##毕 +##毗 +##毘 +##毙 +##毛 +##毡 +##毫 +##毯 +##毽 +##氈 +##氏 +##氐 +##民 +##氓 +##气 +##氖 +##気 +##氙 +##氛 +##氟 +##氡 +##氢 +##氣 +##氤 +##氦 +##氧 +##氨 +##氪 +##氫 +##氮 +##氯 +##氰 +##氲 +##水 +##氷 +##永 +##氹 +##氾 +##汀 +##汁 +##求 +##汆 +##汇 +##汉 +##汎 +##汐 +##汕 +##汗 +##汙 +##汛 +##汝 +##汞 +##江 +##池 +##污 +##汤 +##汨 +##汩 +##汪 +##汰 +##汲 +##汴 +##汶 +##汹 +##決 +##汽 +##汾 +##沁 +##沂 +##沃 +##沅 +##沈 +##沉 +##沌 +##沏 +##沐 +##沒 +##沓 +##沖 +##沙 +##沛 +##沟 +##没 +##沢 +##沣 +##沥 +##沦 +##沧 +##沪 +##沫 +##沭 +##沮 +##沱 +##河 +##沸 +##油 +##治 +##沼 +##沽 +##沾 +##沿 +##況 +##泄 +##泉 +##泊 +##泌 +##泓 +##法 +##泗 +##泛 +##泞 +##泠 +##泡 +##波 +##泣 +##泥 +##注 +##泪 +##泫 +##泮 +##泯 +##泰 +##泱 +##泳 +##泵 +##泷 +##泸 +##泻 +##泼 +##泽 +##泾 +##洁 +##洄 +##洋 +##洒 +##洗 +##洙 +##洛 +##洞 +##津 +##洩 +##洪 +##洮 +##洱 +##洲 +##洵 +##洶 +##洸 +##洹 +##活 +##洼 +##洽 +##派 +##流 +##浃 +##浄 +##浅 +##浆 +##浇 +##浊 +##测 +##济 +##浏 +##浑 +##浒 +##浓 +##浔 +##浙 +##浚 +##浜 +##浣 +##浦 +##浩 +##浪 +##浬 +##浮 +##浯 +##浴 +##海 +##浸 +##涂 +##涅 +##涇 +##消 +##涉 +##涌 +##涎 +##涓 +##涔 +##涕 +##涙 +##涛 +##涝 +##涞 +##涟 +##涠 +##涡 +##涣 +##涤 +##润 +##涧 +##涨 +##涩 +##涪 +##涮 +##涯 +##液 +##涵 +##涸 +##涼 +##涿 +##淀 +##淄 +##淅 +##淆 +##淇 +##淋 +##淌 +##淑 +##淒 +##淖 +##淘 +##淙 +##淚 +##淞 +##淡 +##淤 +##淦 +##淨 +##淩 +##淪 +##淫 +##淬 +##淮 +##深 +##淳 +##淵 +##混 +##淹 +##淺 +##添 +##淼 +##清 +##済 +##渉 +##渊 +##渋 +##渍 +##渎 +##渐 +##渔 +##渗 +##渙 +##渚 +##減 +##渝 +##渠 +##渡 +##渣 +##渤 +##渥 +##渦 +##温 +##測 +##渭 +##港 +##渲 +##渴 +##游 +##渺 +##渾 +##湃 +##湄 +##湊 +##湍 +##湖 +##湘 +##湛 +##湟 +##湧 +##湫 +##湮 +##湯 +##湳 +##湾 +##湿 +##満 +##溃 +##溅 +##溉 +##溏 +##源 +##準 +##溜 +##溝 +##溟 +##溢 +##溥 +##溧 +##溪 +##溫 +##溯 +##溱 +##溴 +##溶 +##溺 +##溼 +##滁 +##滂 +##滄 +##滅 +##滇 +##滋 +##滌 +##滑 +##滓 +##滔 +##滕 +##滙 +##滚 +##滝 +##滞 +##滟 +##满 +##滢 +##滤 +##滥 +##滦 +##滨 +##滩 +##滬 +##滯 +##滲 +##滴 +##滷 +##滸 +##滾 +##滿 +##漁 +##漂 +##漆 +##漉 +##漏 +##漓 +##演 +##漕 +##漠 +##漢 +##漣 +##漩 +##漪 +##漫 +##漬 +##漯 +##漱 +##漲 +##漳 +##漸 +##漾 +##漿 +##潆 +##潇 +##潋 +##潍 +##潑 +##潔 +##潘 +##潛 +##潜 +##潞 +##潟 +##潢 +##潤 +##潦 +##潧 +##潭 +##潮 +##潰 +##潴 +##潸 +##潺 +##潼 +##澀 +##澄 +##澆 +##澈 +##澍 +##澎 +##澗 +##澜 +##澡 +##澤 +##澧 +##澱 +##澳 +##澹 +##激 +##濁 +##濂 +##濃 +##濑 +##濒 +##濕 +##濘 +##濛 +##濟 +##濠 +##濡 +##濤 +##濫 +##濬 +##濮 +##濯 +##濱 +##濺 +##濾 +##瀅 +##瀆 +##瀉 +##瀋 +##瀏 +##瀑 +##瀕 +##瀘 +##瀚 +##瀛 +##瀝 +##瀞 +##瀟 +##瀧 +##瀨 +##瀬 +##瀰 +##瀾 +##灌 +##灏 +##灑 +##灘 +##灝 +##灞 +##灣 +##火 +##灬 +##灭 +##灯 +##灰 +##灵 +##灶 +##灸 +##灼 +##災 +##灾 +##灿 +##炀 +##炁 +##炅 +##炉 +##炊 +##炎 +##炒 +##炔 +##炕 +##炖 +##炙 +##炜 +##炫 +##炬 +##炭 +##炮 +##炯 +##炳 +##炷 +##炸 +##点 +##為 +##炼 +##炽 +##烁 +##烂 +##烃 +##烈 +##烊 +##烏 +##烘 +##烙 +##烛 +##烟 +##烤 +##烦 +##烧 +##烨 +##烩 +##烫 +##烬 +##热 +##烯 +##烷 +##烹 +##烽 +##焉 +##焊 +##焕 +##焖 +##焗 +##焘 +##焙 +##焚 +##焜 +##無 +##焦 +##焯 +##焰 +##焱 +##然 +##焼 +##煅 +##煉 +##煊 +##煌 +##煎 +##煒 +##煖 +##煙 +##煜 +##煞 +##煤 +##煥 +##煦 +##照 +##煨 +##煩 +##煮 +##煲 +##煸 +##煽 +##熄 +##熊 +##熏 +##熒 +##熔 +##熙 +##熟 +##熠 +##熨 +##熬 +##熱 +##熵 +##熹 +##熾 +##燁 +##燃 +##燄 +##燈 +##燉 +##燊 +##燎 +##燒 +##燔 +##燕 +##燙 +##燜 +##營 +##燥 +##燦 +##燧 +##燭 +##燮 +##燴 +##燻 +##燼 +##燿 +##爆 +##爍 +##爐 +##爛 +##爪 +##爬 +##爭 +##爰 +##爱 +##爲 +##爵 +##父 +##爷 +##爸 +##爹 +##爺 +##爻 +##爽 +##爾 +##牆 +##片 +##版 +##牌 +##牍 +##牒 +##牙 +##牛 +##牝 +##牟 +##牠 +##牡 +##牢 +##牦 +##牧 +##物 +##牯 +##牲 +##牴 +##牵 +##特 +##牺 +##牽 +##犀 +##犁 +##犄 +##犊 +##犍 +##犒 +##犢 +##犧 +##犬 +##犯 +##状 +##犷 +##犸 +##犹 +##狀 +##狂 +##狄 +##狈 +##狎 +##狐 +##狒 +##狗 +##狙 +##狞 +##狠 +##狡 +##狩 +##独 +##狭 +##狮 +##狰 +##狱 +##狸 +##狹 +##狼 +##狽 +##猎 +##猕 +##猖 +##猗 +##猙 +##猛 +##猜 +##猝 +##猥 +##猩 +##猪 +##猫 +##猬 +##献 +##猴 +##猶 +##猷 +##猾 +##猿 +##獄 +##獅 +##獎 +##獐 +##獒 +##獗 +##獠 +##獣 +##獨 +##獭 +##獰 +##獲 +##獵 +##獷 +##獸 +##獺 +##獻 +##獼 +##獾 +##玄 +##率 +##玉 +##王 +##玑 +##玖 +##玛 +##玟 +##玠 +##玥 +##玩 +##玫 +##玮 +##环 +##现 +##玲 +##玳 +##玷 +##玺 +##玻 +##珀 +##珂 +##珅 +##珈 +##珉 +##珊 +##珍 +##珏 +##珐 +##珑 +##珙 +##珞 +##珠 +##珣 +##珥 +##珩 +##珪 +##班 +##珮 +##珲 +##珺 +##現 +##球 +##琅 +##理 +##琇 +##琉 +##琊 +##琍 +##琏 +##琐 +##琛 +##琢 +##琥 +##琦 +##琨 +##琪 +##琬 +##琮 +##琰 +##琲 +##琳 +##琴 +##琵 +##琶 +##琺 +##琼 +##瑀 +##瑁 +##瑄 +##瑋 +##瑕 +##瑗 +##瑙 +##瑚 +##瑛 +##瑜 +##瑞 +##瑟 +##瑠 +##瑣 +##瑤 +##瑩 +##瑪 +##瑯 +##瑰 +##瑶 +##瑾 +##璀 +##璁 +##璃 +##璇 +##璉 +##璋 +##璎 +##璐 +##璜 +##璞 +##璟 +##璧 +##璨 +##環 +##璽 +##璿 +##瓊 +##瓏 +##瓒 +##瓜 +##瓢 +##瓣 +##瓤 +##瓦 +##瓮 +##瓯 +##瓴 +##瓶 +##瓷 +##甄 +##甌 +##甕 +##甘 +##甙 +##甚 +##甜 +##生 +##產 +##産 +##甥 +##甦 +##用 +##甩 +##甫 +##甬 +##甭 +##甯 +##田 +##由 +##甲 +##申 +##电 +##男 +##甸 +##町 +##画 +##甾 +##畀 +##畅 +##界 +##畏 +##畑 +##畔 +##留 +##畜 +##畝 +##畢 +##略 +##畦 +##番 +##畫 +##異 +##畲 +##畳 +##畴 +##當 +##畸 +##畹 +##畿 +##疆 +##疇 +##疊 +##疏 +##疑 +##疔 +##疖 +##疗 +##疙 +##疚 +##疝 +##疟 +##疡 +##疣 +##疤 +##疥 +##疫 +##疮 +##疯 +##疱 +##疲 +##疳 +##疵 +##疸 +##疹 +##疼 +##疽 +##疾 +##痂 +##病 +##症 +##痈 +##痉 +##痊 +##痍 +##痒 +##痔 +##痕 +##痘 +##痙 +##痛 +##痞 +##痠 +##痢 +##痣 +##痤 +##痧 +##痨 +##痪 +##痫 +##痰 +##痱 +##痴 +##痹 +##痺 +##痼 +##痿 +##瘀 +##瘁 +##瘋 +##瘍 +##瘓 +##瘘 +##瘙 +##瘟 +##瘠 +##瘡 +##瘢 +##瘤 +##瘦 +##瘧 +##瘩 +##瘪 +##瘫 +##瘴 +##瘸 +##瘾 +##療 +##癇 +##癌 +##癒 +##癖 +##癜 +##癞 +##癡 +##癢 +##癣 +##癥 +##癫 +##癬 +##癮 +##癱 +##癲 +##癸 +##発 +##登 +##發 +##白 +##百 +##皂 +##的 +##皆 +##皇 +##皈 +##皋 +##皎 +##皑 +##皓 +##皖 +##皙 +##皚 +##皮 +##皰 +##皱 +##皴 +##皺 +##皿 +##盂 +##盃 +##盅 +##盆 +##盈 +##益 +##盎 +##盏 +##盐 +##监 +##盒 +##盔 +##盖 +##盗 +##盘 +##盛 +##盜 +##盞 +##盟 +##盡 +##監 +##盤 +##盥 +##盧 +##盪 +##目 +##盯 +##盱 +##盲 +##直 +##相 +##盹 +##盼 +##盾 +##省 +##眈 +##眉 +##看 +##県 +##眙 +##眞 +##真 +##眠 +##眦 +##眨 +##眩 +##眯 +##眶 +##眷 +##眸 +##眺 +##眼 +##眾 +##着 +##睁 +##睇 +##睏 +##睐 +##睑 +##睛 +##睜 +##睞 +##睡 +##睢 +##督 +##睥 +##睦 +##睨 +##睪 +##睫 +##睬 +##睹 +##睽 +##睾 +##睿 +##瞄 +##瞅 +##瞇 +##瞋 +##瞌 +##瞎 +##瞑 +##瞒 +##瞓 +##瞞 +##瞟 +##瞠 +##瞥 +##瞧 +##瞩 +##瞪 +##瞬 +##瞭 +##瞰 +##瞳 +##瞻 +##瞼 +##瞿 +##矇 +##矍 +##矗 +##矚 +##矛 +##矜 +##矢 +##矣 +##知 +##矩 +##矫 +##短 +##矮 +##矯 +##石 +##矶 +##矽 +##矾 +##矿 +##码 +##砂 +##砌 +##砍 +##砒 +##研 +##砖 +##砗 +##砚 +##砝 +##砣 +##砥 +##砧 +##砭 +##砰 +##砲 +##破 +##砷 +##砸 +##砺 +##砼 +##砾 +##础 +##硅 +##硐 +##硒 +##硕 +##硝 +##硫 +##硬 +##确 +##硯 +##硼 +##碁 +##碇 +##碉 +##碌 +##碍 +##碎 +##碑 +##碓 +##碗 +##碘 +##碚 +##碛 +##碟 +##碣 +##碧 +##碩 +##碰 +##碱 +##碳 +##碴 +##確 +##碼 +##碾 +##磁 +##磅 +##磊 +##磋 +##磐 +##磕 +##磚 +##磡 +##磨 +##磬 +##磯 +##磲 +##磷 +##磺 +##礁 +##礎 +##礙 +##礡 +##礦 +##礪 +##礫 +##礴 +##示 +##礼 +##社 +##祀 +##祁 +##祂 +##祇 +##祈 +##祉 +##祎 +##祐 +##祕 +##祖 +##祗 +##祚 +##祛 +##祜 +##祝 +##神 +##祟 +##祠 +##祢 +##祥 +##票 +##祭 +##祯 +##祷 +##祸 +##祺 +##祿 +##禀 +##禁 +##禄 +##禅 +##禍 +##禎 +##福 +##禛 +##禦 +##禧 +##禪 +##禮 +##禱 +##禹 +##禺 +##离 +##禽 +##禾 +##禿 +##秀 +##私 +##秃 +##秆 +##秉 +##秋 +##种 +##科 +##秒 +##秘 +##租 +##秣 +##秤 +##秦 +##秧 +##秩 +##秭 +##积 +##称 +##秸 +##移 +##秽 +##稀 +##稅 +##程 +##稍 +##税 +##稔 +##稗 +##稚 +##稜 +##稞 +##稟 +##稠 +##稣 +##種 +##稱 +##稲 +##稳 +##稷 +##稹 +##稻 +##稼 +##稽 +##稿 +##穀 +##穂 +##穆 +##穌 +##積 +##穎 +##穗 +##穢 +##穩 +##穫 +##穴 +##究 +##穷 +##穹 +##空 +##穿 +##突 +##窃 +##窄 +##窈 +##窍 +##窑 +##窒 +##窓 +##窕 +##窖 +##窗 +##窘 +##窜 +##窝 +##窟 +##窠 +##窥 +##窦 +##窨 +##窩 +##窪 +##窮 +##窯 +##窺 +##窿 +##竄 +##竅 +##竇 +##竊 +##立 +##竖 +##站 +##竜 +##竞 +##竟 +##章 +##竣 +##童 +##竭 +##端 +##競 +##竹 +##竺 +##竽 +##竿 +##笃 +##笆 +##笈 +##笋 +##笏 +##笑 +##笔 +##笙 +##笛 +##笞 +##笠 +##符 +##笨 +##第 +##笹 +##笺 +##笼 +##筆 +##等 +##筊 +##筋 +##筍 +##筏 +##筐 +##筑 +##筒 +##答 +##策 +##筛 +##筝 +##筠 +##筱 +##筲 +##筵 +##筷 +##筹 +##签 +##简 +##箇 +##箋 +##箍 +##箏 +##箐 +##箔 +##箕 +##算 +##箝 +##管 +##箩 +##箫 +##箭 +##箱 +##箴 +##箸 +##節 +##篁 +##範 +##篆 +##篇 +##築 +##篑 +##篓 +##篙 +##篝 +##篠 +##篡 +##篤 +##篩 +##篪 +##篮 +##篱 +##篷 +##簇 +##簌 +##簍 +##簡 +##簦 +##簧 +##簪 +##簫 +##簷 +##簸 +##簽 +##簾 +##簿 +##籁 +##籃 +##籌 +##籍 +##籐 +##籟 +##籠 +##籤 +##籬 +##籮 +##籲 +##米 +##类 +##籼 +##籽 +##粄 +##粉 +##粑 +##粒 +##粕 +##粗 +##粘 +##粟 +##粤 +##粥 +##粧 +##粪 +##粮 +##粱 +##粲 +##粳 +##粵 +##粹 +##粼 +##粽 +##精 +##粿 +##糅 +##糊 +##糍 +##糕 +##糖 +##糗 +##糙 +##糜 +##糞 +##糟 +##糠 +##糧 +##糬 +##糯 +##糰 +##糸 +##系 +##糾 +##紀 +##紂 +##約 +##紅 +##紉 +##紊 +##紋 +##納 +##紐 +##紓 +##純 +##紗 +##紘 +##紙 +##級 +##紛 +##紜 +##素 +##紡 +##索 +##紧 +##紫 +##紮 +##累 +##細 +##紳 +##紹 +##紺 +##終 +##絃 +##組 +##絆 +##経 +##結 +##絕 +##絞 +##絡 +##絢 +##給 +##絨 +##絮 +##統 +##絲 +##絳 +##絵 +##絶 +##絹 +##綁 +##綏 +##綑 +##經 +##継 +##続 +##綜 +##綠 +##綢 +##綦 +##綫 +##綬 +##維 +##綱 +##網 +##綴 +##綵 +##綸 +##綺 +##綻 +##綽 +##綾 +##綿 +##緊 +##緋 +##総 +##緑 +##緒 +##緘 +##線 +##緝 +##緞 +##締 +##緣 +##編 +##緩 +##緬 +##緯 +##練 +##緹 +##緻 +##縁 +##縄 +##縈 +##縛 +##縝 +##縣 +##縫 +##縮 +##縱 +##縴 +##縷 +##總 +##績 +##繁 +##繃 +##繆 +##繇 +##繋 +##織 +##繕 +##繚 +##繞 +##繡 +##繩 +##繪 +##繫 +##繭 +##繳 +##繹 +##繼 +##繽 +##纂 +##續 +##纍 +##纏 +##纓 +##纔 +##纖 +##纜 +##纠 +##红 +##纣 +##纤 +##约 +##级 +##纨 +##纪 +##纫 +##纬 +##纭 +##纯 +##纰 +##纱 +##纲 +##纳 +##纵 +##纶 +##纷 +##纸 +##纹 +##纺 +##纽 +##纾 +##线 +##绀 +##练 +##组 +##绅 +##细 +##织 +##终 +##绊 +##绍 +##绎 +##经 +##绑 +##绒 +##结 +##绔 +##绕 +##绘 +##给 +##绚 +##绛 +##络 +##绝 +##绞 +##统 +##绡 +##绢 +##绣 +##绥 +##绦 +##继 +##绩 +##绪 +##绫 +##续 +##绮 +##绯 +##绰 +##绳 +##维 +##绵 +##绶 +##绷 +##绸 +##绻 +##综 +##绽 +##绾 +##绿 +##缀 +##缄 +##缅 +##缆 +##缇 +##缈 +##缉 +##缎 +##缓 +##缔 +##缕 +##编 +##缘 +##缙 +##缚 +##缜 +##缝 +##缠 +##缢 +##缤 +##缥 +##缨 +##缩 +##缪 +##缭 +##缮 +##缰 +##缱 +##缴 +##缸 +##缺 +##缽 +##罂 +##罄 +##罌 +##罐 +##网 +##罔 +##罕 +##罗 +##罚 +##罡 +##罢 +##罩 +##罪 +##置 +##罰 +##署 +##罵 +##罷 +##罹 +##羁 +##羅 +##羈 +##羊 +##羌 +##美 +##羔 +##羚 +##羞 +##羟 +##羡 +##羣 +##群 +##羥 +##羧 +##羨 +##義 +##羯 +##羲 +##羸 +##羹 +##羽 +##羿 +##翁 +##翅 +##翊 +##翌 +##翎 +##習 +##翔 +##翘 +##翟 +##翠 +##翡 +##翦 +##翩 +##翰 +##翱 +##翳 +##翹 +##翻 +##翼 +##耀 +##老 +##考 +##耄 +##者 +##耆 +##耋 +##而 +##耍 +##耐 +##耒 +##耕 +##耗 +##耘 +##耙 +##耦 +##耨 +##耳 +##耶 +##耷 +##耸 +##耻 +##耽 +##耿 +##聂 +##聆 +##聊 +##聋 +##职 +##聒 +##联 +##聖 +##聘 +##聚 +##聞 +##聪 +##聯 +##聰 +##聲 +##聳 +##聴 +##聶 +##職 +##聽 +##聾 +##聿 +##肃 +##肄 +##肅 +##肆 +##肇 +##肉 +##肋 +##肌 +##肏 +##肓 +##肖 +##肘 +##肚 +##肛 +##肝 +##肠 +##股 +##肢 +##肤 +##肥 +##肩 +##肪 +##肮 +##肯 +##肱 +##育 +##肴 +##肺 +##肽 +##肾 +##肿 +##胀 +##胁 +##胃 +##胄 +##胆 +##背 +##胍 +##胎 +##胖 +##胚 +##胛 +##胜 +##胝 +##胞 +##胡 +##胤 +##胥 +##胧 +##胫 +##胭 +##胯 +##胰 +##胱 +##胳 +##胴 +##胶 +##胸 +##胺 +##能 +##脂 +##脅 +##脆 +##脇 +##脈 +##脉 +##脊 +##脍 +##脏 +##脐 +##脑 +##脓 +##脖 +##脘 +##脚 +##脛 +##脣 +##脩 +##脫 +##脯 +##脱 +##脲 +##脳 +##脸 +##脹 +##脾 +##腆 +##腈 +##腊 +##腋 +##腌 +##腎 +##腐 +##腑 +##腓 +##腔 +##腕 +##腥 +##腦 +##腩 +##腫 +##腭 +##腮 +##腰 +##腱 +##腳 +##腴 +##腸 +##腹 +##腺 +##腻 +##腼 +##腾 +##腿 +##膀 +##膈 +##膊 +##膏 +##膑 +##膘 +##膚 +##膛 +##膜 +##膝 +##膠 +##膦 +##膨 +##膩 +##膳 +##膺 +##膻 +##膽 +##膾 +##膿 +##臀 +##臂 +##臃 +##臆 +##臉 +##臊 +##臍 +##臓 +##臘 +##臟 +##臣 +##臥 +##臧 +##臨 +##自 +##臬 +##臭 +##至 +##致 +##臺 +##臻 +##臼 +##臾 +##舀 +##舂 +##舅 +##舆 +##與 +##興 +##舉 +##舊 +##舌 +##舍 +##舎 +##舐 +##舒 +##舔 +##舖 +##舗 +##舛 +##舜 +##舞 +##舟 +##航 +##舫 +##般 +##舰 +##舱 +##舵 +##舶 +##舷 +##舸 +##船 +##舺 +##舾 +##艇 +##艋 +##艘 +##艙 +##艦 +##艮 +##良 +##艰 +##艱 +##色 +##艳 +##艷 +##艹 +##艺 +##艾 +##节 +##芃 +##芈 +##芊 +##芋 +##芍 +##芎 +##芒 +##芙 +##芜 +##芝 +##芡 +##芥 +##芦 +##芩 +##芪 +##芫 +##芬 +##芭 +##芮 +##芯 +##花 +##芳 +##芷 +##芸 +##芹 +##芻 +##芽 +##芾 +##苁 +##苄 +##苇 +##苋 +##苍 +##苏 +##苑 +##苒 +##苓 +##苔 +##苕 +##苗 +##苛 +##苜 +##苞 +##苟 +##苡 +##苣 +##若 +##苦 +##苫 +##苯 +##英 +##苷 +##苹 +##苻 +##茁 +##茂 +##范 +##茄 +##茅 +##茉 +##茎 +##茏 +##茗 +##茜 +##茧 +##茨 +##茫 +##茬 +##茭 +##茯 +##茱 +##茲 +##茴 +##茵 +##茶 +##茸 +##茹 +##茼 +##荀 +##荃 +##荆 +##草 +##荊 +##荏 +##荐 +##荒 +##荔 +##荖 +##荘 +##荚 +##荞 +##荟 +##荠 +##荡 +##荣 +##荤 +##荥 +##荧 +##荨 +##荪 +##荫 +##药 +##荳 +##荷 +##荸 +##荻 +##荼 +##荽 +##莅 +##莆 +##莉 +##莊 +##莎 +##莒 +##莓 +##莖 +##莘 +##莞 +##莠 +##莢 +##莧 +##莪 +##莫 +##莱 +##莲 +##莴 +##获 +##莹 +##莺 +##莽 +##莿 +##菀 +##菁 +##菅 +##菇 +##菈 +##菊 +##菌 +##菏 +##菓 +##菖 +##菘 +##菜 +##菟 +##菠 +##菡 +##菩 +##華 +##菱 +##菲 +##菸 +##菽 +##萁 +##萃 +##萄 +##萊 +##萋 +##萌 +##萍 +##萎 +##萘 +##萝 +##萤 +##营 +##萦 +##萧 +##萨 +##萩 +##萬 +##萱 +##萵 +##萸 +##萼 +##落 +##葆 +##葉 +##著 +##葚 +##葛 +##葡 +##董 +##葦 +##葩 +##葫 +##葬 +##葭 +##葯 +##葱 +##葳 +##葵 +##葷 +##葺 +##蒂 +##蒋 +##蒐 +##蒔 +##蒙 +##蒜 +##蒞 +##蒟 +##蒡 +##蒨 +##蒲 +##蒸 +##蒹 +##蒻 +##蒼 +##蒿 +##蓁 +##蓄 +##蓆 +##蓉 +##蓋 +##蓑 +##蓓 +##蓖 +##蓝 +##蓟 +##蓦 +##蓬 +##蓮 +##蓼 +##蓿 +##蔑 +##蔓 +##蔔 +##蔗 +##蔘 +##蔚 +##蔡 +##蔣 +##蔥 +##蔫 +##蔬 +##蔭 +##蔵 +##蔷 +##蔺 +##蔻 +##蔼 +##蔽 +##蕁 +##蕃 +##蕈 +##蕉 +##蕊 +##蕎 +##蕙 +##蕤 +##蕨 +##蕩 +##蕪 +##蕭 +##蕲 +##蕴 +##蕻 +##蕾 +##薄 +##薅 +##薇 +##薈 +##薊 +##薏 +##薑 +##薔 +##薙 +##薛 +##薦 +##薨 +##薩 +##薪 +##薬 +##薯 +##薰 +##薹 +##藉 +##藍 +##藏 +##藐 +##藓 +##藕 +##藜 +##藝 +##藤 +##藥 +##藩 +##藹 +##藻 +##藿 +##蘆 +##蘇 +##蘊 +##蘋 +##蘑 +##蘚 +##蘭 +##蘸 +##蘼 +##蘿 +##虎 +##虏 +##虐 +##虑 +##虔 +##處 +##虚 +##虛 +##虜 +##虞 +##號 +##虢 +##虧 +##虫 +##虬 +##虱 +##虹 +##虻 +##虽 +##虾 +##蚀 +##蚁 +##蚂 +##蚊 +##蚌 +##蚓 +##蚕 +##蚜 +##蚝 +##蚣 +##蚤 +##蚩 +##蚪 +##蚯 +##蚱 +##蚵 +##蛀 +##蛆 +##蛇 +##蛊 +##蛋 +##蛎 +##蛐 +##蛔 +##蛙 +##蛛 +##蛟 +##蛤 +##蛭 +##蛮 +##蛰 +##蛳 +##蛹 +##蛻 +##蛾 +##蜀 +##蜂 +##蜃 +##蜆 +##蜇 +##蜈 +##蜊 +##蜍 +##蜒 +##蜓 +##蜕 +##蜗 +##蜘 +##蜚 +##蜜 +##蜡 +##蜢 +##蜥 +##蜱 +##蜴 +##蜷 +##蜻 +##蜿 +##蝇 +##蝈 +##蝉 +##蝌 +##蝎 +##蝕 +##蝗 +##蝙 +##蝟 +##蝠 +##蝦 +##蝨 +##蝴 +##蝶 +##蝸 +##蝼 +##螂 +##螃 +##融 +##螞 +##螢 +##螨 +##螯 +##螳 +##螺 +##蟀 +##蟄 +##蟆 +##蟋 +##蟎 +##蟑 +##蟒 +##蟠 +##蟬 +##蟲 +##蟹 +##蟻 +##蟾 +##蠅 +##蠍 +##蠔 +##蠕 +##蠛 +##蠟 +##蠡 +##蠢 +##蠣 +##蠱 +##蠶 +##蠹 +##蠻 +##血 +##衄 +##衅 +##衆 +##行 +##衍 +##術 +##衔 +##街 +##衙 +##衛 +##衝 +##衞 +##衡 +##衢 +##衣 +##补 +##表 +##衩 +##衫 +##衬 +##衮 +##衰 +##衲 +##衷 +##衹 +##衾 +##衿 +##袁 +##袂 +##袄 +##袅 +##袈 +##袋 +##袍 +##袒 +##袖 +##袜 +##袞 +##袤 +##袪 +##被 +##袭 +##袱 +##裁 +##裂 +##装 +##裆 +##裊 +##裏 +##裔 +##裕 +##裘 +##裙 +##補 +##裝 +##裟 +##裡 +##裤 +##裨 +##裱 +##裳 +##裴 +##裸 +##裹 +##製 +##裾 +##褂 +##複 +##褐 +##褒 +##褓 +##褔 +##褚 +##褥 +##褪 +##褫 +##褲 +##褶 +##褻 +##襁 +##襄 +##襟 +##襠 +##襪 +##襬 +##襯 +##襲 +##西 +##要 +##覃 +##覆 +##覇 +##見 +##規 +##覓 +##視 +##覚 +##覦 +##覧 +##親 +##覬 +##観 +##覷 +##覺 +##覽 +##觀 +##见 +##观 +##规 +##觅 +##视 +##览 +##觉 +##觊 +##觎 +##觐 +##觑 +##角 +##觞 +##解 +##觥 +##触 +##觸 +##言 +##訂 +##計 +##訊 +##討 +##訓 +##訕 +##訖 +##託 +##記 +##訛 +##訝 +##訟 +##訣 +##訥 +##訪 +##設 +##許 +##訳 +##訴 +##訶 +##診 +##註 +##証 +##詆 +##詐 +##詔 +##評 +##詛 +##詞 +##詠 +##詡 +##詢 +##詣 +##試 +##詩 +##詫 +##詬 +##詭 +##詮 +##詰 +##話 +##該 +##詳 +##詹 +##詼 +##誅 +##誇 +##誉 +##誌 +##認 +##誓 +##誕 +##誘 +##語 +##誠 +##誡 +##誣 +##誤 +##誥 +##誦 +##誨 +##說 +##説 +##読 +##誰 +##課 +##誹 +##誼 +##調 +##諄 +##談 +##請 +##諏 +##諒 +##論 +##諗 +##諜 +##諡 +##諦 +##諧 +##諫 +##諭 +##諮 +##諱 +##諳 +##諷 +##諸 +##諺 +##諾 +##謀 +##謁 +##謂 +##謄 +##謊 +##謎 +##謐 +##謔 +##謗 +##謙 +##講 +##謝 +##謠 +##謨 +##謬 +##謹 +##謾 +##譁 +##證 +##譎 +##譏 +##識 +##譙 +##譚 +##譜 +##警 +##譬 +##譯 +##議 +##譲 +##譴 +##護 +##譽 +##讀 +##變 +##讓 +##讚 +##讞 +##计 +##订 +##认 +##讥 +##讧 +##讨 +##让 +##讪 +##讫 +##训 +##议 +##讯 +##记 +##讲 +##讳 +##讴 +##讶 +##讷 +##许 +##讹 +##论 +##讼 +##讽 +##设 +##访 +##诀 +##证 +##诃 +##评 +##诅 +##识 +##诈 +##诉 +##诊 +##诋 +##词 +##诏 +##译 +##试 +##诗 +##诘 +##诙 +##诚 +##诛 +##话 +##诞 +##诟 +##诠 +##诡 +##询 +##诣 +##诤 +##该 +##详 +##诧 +##诩 +##诫 +##诬 +##语 +##误 +##诰 +##诱 +##诲 +##说 +##诵 +##诶 +##请 +##诸 +##诺 +##读 +##诽 +##课 +##诿 +##谀 +##谁 +##调 +##谄 +##谅 +##谆 +##谈 +##谊 +##谋 +##谌 +##谍 +##谎 +##谏 +##谐 +##谑 +##谒 +##谓 +##谔 +##谕 +##谗 +##谘 +##谙 +##谚 +##谛 +##谜 +##谟 +##谢 +##谣 +##谤 +##谥 +##谦 +##谧 +##谨 +##谩 +##谪 +##谬 +##谭 +##谯 +##谱 +##谲 +##谴 +##谶 +##谷 +##豁 +##豆 +##豇 +##豈 +##豉 +##豊 +##豌 +##豎 +##豐 +##豔 +##豚 +##象 +##豢 +##豪 +##豫 +##豬 +##豹 +##豺 +##貂 +##貅 +##貌 +##貓 +##貔 +##貘 +##貝 +##貞 +##負 +##財 +##貢 +##貧 +##貨 +##販 +##貪 +##貫 +##責 +##貯 +##貰 +##貳 +##貴 +##貶 +##買 +##貸 +##費 +##貼 +##貽 +##貿 +##賀 +##賁 +##賂 +##賃 +##賄 +##資 +##賈 +##賊 +##賑 +##賓 +##賜 +##賞 +##賠 +##賡 +##賢 +##賣 +##賤 +##賦 +##質 +##賬 +##賭 +##賴 +##賺 +##購 +##賽 +##贅 +##贈 +##贊 +##贍 +##贏 +##贓 +##贖 +##贛 +##贝 +##贞 +##负 +##贡 +##财 +##责 +##贤 +##败 +##账 +##货 +##质 +##贩 +##贪 +##贫 +##贬 +##购 +##贮 +##贯 +##贰 +##贱 +##贲 +##贴 +##贵 +##贷 +##贸 +##费 +##贺 +##贻 +##贼 +##贾 +##贿 +##赁 +##赂 +##赃 +##资 +##赅 +##赈 +##赊 +##赋 +##赌 +##赎 +##赏 +##赐 +##赓 +##赔 +##赖 +##赘 +##赚 +##赛 +##赝 +##赞 +##赠 +##赡 +##赢 +##赣 +##赤 +##赦 +##赧 +##赫 +##赭 +##走 +##赳 +##赴 +##赵 +##赶 +##起 +##趁 +##超 +##越 +##趋 +##趕 +##趙 +##趟 +##趣 +##趨 +##足 +##趴 +##趵 +##趸 +##趺 +##趾 +##跃 +##跄 +##跆 +##跋 +##跌 +##跎 +##跑 +##跖 +##跚 +##跛 +##距 +##跟 +##跡 +##跤 +##跨 +##跩 +##跪 +##路 +##跳 +##践 +##跷 +##跹 +##跺 +##跻 +##踉 +##踊 +##踌 +##踏 +##踐 +##踝 +##踞 +##踟 +##踢 +##踩 +##踪 +##踮 +##踱 +##踴 +##踵 +##踹 +##蹂 +##蹄 +##蹇 +##蹈 +##蹉 +##蹊 +##蹋 +##蹑 +##蹒 +##蹙 +##蹟 +##蹣 +##蹤 +##蹦 +##蹩 +##蹬 +##蹭 +##蹲 +##蹴 +##蹶 +##蹺 +##蹼 +##蹿 +##躁 +##躇 +##躉 +##躊 +##躋 +##躍 +##躏 +##躪 +##身 +##躬 +##躯 +##躲 +##躺 +##軀 +##車 +##軋 +##軌 +##軍 +##軒 +##軟 +##転 +##軸 +##軼 +##軽 +##軾 +##較 +##載 +##輒 +##輓 +##輔 +##輕 +##輛 +##輝 +##輟 +##輩 +##輪 +##輯 +##輸 +##輻 +##輾 +##輿 +##轄 +##轅 +##轆 +##轉 +##轍 +##轎 +##轟 +##车 +##轧 +##轨 +##轩 +##转 +##轭 +##轮 +##软 +##轰 +##轲 +##轴 +##轶 +##轻 +##轼 +##载 +##轿 +##较 +##辄 +##辅 +##辆 +##辇 +##辈 +##辉 +##辊 +##辍 +##辐 +##辑 +##输 +##辕 +##辖 +##辗 +##辘 +##辙 +##辛 +##辜 +##辞 +##辟 +##辣 +##辦 +##辨 +##辩 +##辫 +##辭 +##辮 +##辯 +##辰 +##辱 +##農 +##边 +##辺 +##辻 +##込 +##辽 +##达 +##迁 +##迂 +##迄 +##迅 +##过 +##迈 +##迎 +##运 +##近 +##返 +##还 +##这 +##进 +##远 +##违 +##连 +##迟 +##迢 +##迤 +##迥 +##迦 +##迩 +##迪 +##迫 +##迭 +##述 +##迴 +##迷 +##迸 +##迹 +##迺 +##追 +##退 +##送 +##适 +##逃 +##逅 +##逆 +##选 +##逊 +##逍 +##透 +##逐 +##递 +##途 +##逕 +##逗 +##這 +##通 +##逛 +##逝 +##逞 +##速 +##造 +##逢 +##連 +##逮 +##週 +##進 +##逵 +##逶 +##逸 +##逻 +##逼 +##逾 +##遁 +##遂 +##遅 +##遇 +##遊 +##運 +##遍 +##過 +##遏 +##遐 +##遑 +##遒 +##道 +##達 +##違 +##遗 +##遙 +##遛 +##遜 +##遞 +##遠 +##遢 +##遣 +##遥 +##遨 +##適 +##遭 +##遮 +##遲 +##遴 +##遵 +##遶 +##遷 +##選 +##遺 +##遼 +##遽 +##避 +##邀 +##邁 +##邂 +##邃 +##還 +##邇 +##邈 +##邊 +##邋 +##邏 +##邑 +##邓 +##邕 +##邛 +##邝 +##邢 +##那 +##邦 +##邨 +##邪 +##邬 +##邮 +##邯 +##邰 +##邱 +##邳 +##邵 +##邸 +##邹 +##邺 +##邻 +##郁 +##郅 +##郊 +##郎 +##郑 +##郜 +##郝 +##郡 +##郢 +##郤 +##郦 +##郧 +##部 +##郫 +##郭 +##郴 +##郵 +##郷 +##郸 +##都 +##鄂 +##鄉 +##鄒 +##鄔 +##鄙 +##鄞 +##鄢 +##鄧 +##鄭 +##鄰 +##鄱 +##鄲 +##鄺 +##酉 +##酊 +##酋 +##酌 +##配 +##酐 +##酒 +##酗 +##酚 +##酝 +##酢 +##酣 +##酥 +##酩 +##酪 +##酬 +##酮 +##酯 +##酰 +##酱 +##酵 +##酶 +##酷 +##酸 +##酿 +##醃 +##醇 +##醉 +##醋 +##醍 +##醐 +##醒 +##醚 +##醛 +##醜 +##醞 +##醣 +##醪 +##醫 +##醬 +##醮 +##醯 +##醴 +##醺 +##釀 +##釁 +##采 +##釉 +##释 +##釋 +##里 +##重 +##野 +##量 +##釐 +##金 +##釗 +##釘 +##釜 +##針 +##釣 +##釦 +##釧 +##釵 +##鈀 +##鈉 +##鈍 +##鈎 +##鈔 +##鈕 +##鈞 +##鈣 +##鈦 +##鈪 +##鈴 +##鈺 +##鈾 +##鉀 +##鉄 +##鉅 +##鉉 +##鉑 +##鉗 +##鉚 +##鉛 +##鉤 +##鉴 +##鉻 +##銀 +##銃 +##銅 +##銑 +##銓 +##銖 +##銘 +##銜 +##銬 +##銭 +##銮 +##銳 +##銷 +##銹 +##鋁 +##鋅 +##鋒 +##鋤 +##鋪 +##鋰 +##鋸 +##鋼 +##錄 +##錐 +##錘 +##錚 +##錠 +##錢 +##錦 +##錨 +##錫 +##錮 +##錯 +##録 +##錳 +##錶 +##鍊 +##鍋 +##鍍 +##鍛 +##鍥 +##鍰 +##鍵 +##鍺 +##鍾 +##鎂 +##鎊 +##鎌 +##鎏 +##鎔 +##鎖 +##鎗 +##鎚 +##鎧 +##鎬 +##鎮 +##鎳 +##鏈 +##鏖 +##鏗 +##鏘 +##鏞 +##鏟 +##鏡 +##鏢 +##鏤 +##鏽 +##鐘 +##鐮 +##鐲 +##鐳 +##鐵 +##鐸 +##鐺 +##鑄 +##鑊 +##鑑 +##鑒 +##鑣 +##鑫 +##鑰 +##鑲 +##鑼 +##鑽 +##鑾 +##鑿 +##针 +##钉 +##钊 +##钎 +##钏 +##钒 +##钓 +##钗 +##钙 +##钛 +##钜 +##钝 +##钞 +##钟 +##钠 +##钡 +##钢 +##钣 +##钤 +##钥 +##钦 +##钧 +##钨 +##钩 +##钮 +##钯 +##钰 +##钱 +##钳 +##钴 +##钵 +##钺 +##钻 +##钼 +##钾 +##钿 +##铀 +##铁 +##铂 +##铃 +##铄 +##铅 +##铆 +##铉 +##铎 +##铐 +##铛 +##铜 +##铝 +##铠 +##铡 +##铢 +##铣 +##铤 +##铨 +##铩 +##铬 +##铭 +##铮 +##铰 +##铲 +##铵 +##银 +##铸 +##铺 +##链 +##铿 +##销 +##锁 +##锂 +##锄 +##锅 +##锆 +##锈 +##锉 +##锋 +##锌 +##锏 +##锐 +##锑 +##错 +##锚 +##锟 +##锡 +##锢 +##锣 +##锤 +##锥 +##锦 +##锭 +##键 +##锯 +##锰 +##锲 +##锵 +##锹 +##锺 +##锻 +##镀 +##镁 +##镂 +##镇 +##镉 +##镌 +##镍 +##镐 +##镑 +##镕 +##镖 +##镗 +##镛 +##镜 +##镣 +##镭 +##镯 +##镰 +##镳 +##镶 +##長 +##长 +##門 +##閃 +##閉 +##開 +##閎 +##閏 +##閑 +##閒 +##間 +##閔 +##閘 +##閡 +##関 +##閣 +##閥 +##閨 +##閩 +##閱 +##閲 +##閹 +##閻 +##閾 +##闆 +##闇 +##闊 +##闌 +##闍 +##闔 +##闕 +##闖 +##闘 +##關 +##闡 +##闢 +##门 +##闪 +##闫 +##闭 +##问 +##闯 +##闰 +##闲 +##间 +##闵 +##闷 +##闸 +##闹 +##闺 +##闻 +##闽 +##闾 +##阀 +##阁 +##阂 +##阅 +##阆 +##阇 +##阈 +##阉 +##阎 +##阐 +##阑 +##阔 +##阕 +##阖 +##阙 +##阚 +##阜 +##队 +##阡 +##阪 +##阮 +##阱 +##防 +##阳 +##阴 +##阵 +##阶 +##阻 +##阿 +##陀 +##陂 +##附 +##际 +##陆 +##陇 +##陈 +##陋 +##陌 +##降 +##限 +##陕 +##陛 +##陝 +##陞 +##陟 +##陡 +##院 +##陣 +##除 +##陨 +##险 +##陪 +##陰 +##陲 +##陳 +##陵 +##陶 +##陷 +##陸 +##険 +##陽 +##隅 +##隆 +##隈 +##隊 +##隋 +##隍 +##階 +##随 +##隐 +##隔 +##隕 +##隘 +##隙 +##際 +##障 +##隠 +##隣 +##隧 +##隨 +##險 +##隱 +##隴 +##隶 +##隸 +##隻 +##隼 +##隽 +##难 +##雀 +##雁 +##雄 +##雅 +##集 +##雇 +##雉 +##雋 +##雌 +##雍 +##雎 +##雏 +##雑 +##雒 +##雕 +##雖 +##雙 +##雛 +##雜 +##雞 +##離 +##難 +##雨 +##雪 +##雯 +##雰 +##雲 +##雳 +##零 +##雷 +##雹 +##電 +##雾 +##需 +##霁 +##霄 +##霆 +##震 +##霈 +##霉 +##霊 +##霍 +##霎 +##霏 +##霑 +##霓 +##霖 +##霜 +##霞 +##霧 +##霭 +##霰 +##露 +##霸 +##霹 +##霽 +##霾 +##靂 +##靄 +##靈 +##青 +##靓 +##靖 +##静 +##靚 +##靛 +##靜 +##非 +##靠 +##靡 +##面 +##靥 +##靦 +##革 +##靳 +##靴 +##靶 +##靼 +##鞅 +##鞋 +##鞍 +##鞏 +##鞑 +##鞘 +##鞠 +##鞣 +##鞦 +##鞭 +##韆 +##韋 +##韌 +##韓 +##韜 +##韦 +##韧 +##韩 +##韬 +##韭 +##音 +##韵 +##韶 +##韻 +##響 +##頁 +##頂 +##頃 +##項 +##順 +##須 +##頌 +##預 +##頑 +##頒 +##頓 +##頗 +##領 +##頜 +##頡 +##頤 +##頫 +##頭 +##頰 +##頷 +##頸 +##頹 +##頻 +##頼 +##顆 +##題 +##額 +##顎 +##顏 +##顔 +##願 +##顛 +##類 +##顧 +##顫 +##顯 +##顱 +##顴 +##页 +##顶 +##顷 +##项 +##顺 +##须 +##顼 +##顽 +##顾 +##顿 +##颁 +##颂 +##预 +##颅 +##领 +##颇 +##颈 +##颉 +##颊 +##颌 +##颍 +##颐 +##频 +##颓 +##颔 +##颖 +##颗 +##题 +##颚 +##颛 +##颜 +##额 +##颞 +##颠 +##颡 +##颢 +##颤 +##颦 +##颧 +##風 +##颯 +##颱 +##颳 +##颶 +##颼 +##飄 +##飆 +##风 +##飒 +##飓 +##飕 +##飘 +##飙 +##飚 +##飛 +##飞 +##食 +##飢 +##飨 +##飩 +##飪 +##飯 +##飲 +##飼 +##飽 +##飾 +##餃 +##餅 +##餉 +##養 +##餌 +##餐 +##餒 +##餓 +##餘 +##餚 +##餛 +##餞 +##餡 +##館 +##餮 +##餵 +##餾 +##饅 +##饈 +##饋 +##饌 +##饍 +##饑 +##饒 +##饕 +##饗 +##饞 +##饥 +##饨 +##饪 +##饬 +##饭 +##饮 +##饯 +##饰 +##饱 +##饲 +##饴 +##饵 +##饶 +##饷 +##饺 +##饼 +##饽 +##饿 +##馀 +##馁 +##馄 +##馅 +##馆 +##馈 +##馋 +##馍 +##馏 +##馒 +##馔 +##首 +##馗 +##香 +##馥 +##馨 +##馬 +##馭 +##馮 +##馳 +##馴 +##駁 +##駄 +##駅 +##駆 +##駐 +##駒 +##駕 +##駛 +##駝 +##駭 +##駱 +##駿 +##騁 +##騎 +##騏 +##験 +##騙 +##騨 +##騰 +##騷 +##驀 +##驅 +##驊 +##驍 +##驒 +##驕 +##驗 +##驚 +##驛 +##驟 +##驢 +##驥 +##马 +##驭 +##驮 +##驯 +##驰 +##驱 +##驳 +##驴 +##驶 +##驷 +##驸 +##驹 +##驻 +##驼 +##驾 +##驿 +##骁 +##骂 +##骄 +##骅 +##骆 +##骇 +##骈 +##骊 +##骋 +##验 +##骏 +##骐 +##骑 +##骗 +##骚 +##骛 +##骜 +##骞 +##骠 +##骡 +##骤 +##骥 +##骧 +##骨 +##骯 +##骰 +##骶 +##骷 +##骸 +##骼 +##髂 +##髅 +##髋 +##髏 +##髒 +##髓 +##體 +##髖 +##高 +##髦 +##髪 +##髮 +##髯 +##髻 +##鬃 +##鬆 +##鬍 +##鬓 +##鬚 +##鬟 +##鬢 +##鬣 +##鬥 +##鬧 +##鬱 +##鬼 +##魁 +##魂 +##魄 +##魅 +##魇 +##魍 +##魏 +##魔 +##魘 +##魚 +##魯 +##魷 +##鮑 +##鮨 +##鮪 +##鮭 +##鮮 +##鯉 +##鯊 +##鯖 +##鯛 +##鯨 +##鯰 +##鯽 +##鰍 +##鰓 +##鰭 +##鰲 +##鰻 +##鰾 +##鱈 +##鱉 +##鱔 +##鱗 +##鱷 +##鱸 +##鱼 +##鱿 +##鲁 +##鲈 +##鲍 +##鲑 +##鲛 +##鲜 +##鲟 +##鲢 +##鲤 +##鲨 +##鲫 +##鲱 +##鲲 +##鲶 +##鲷 +##鲸 +##鳃 +##鳄 +##鳅 +##鳌 +##鳍 +##鳕 +##鳖 +##鳗 +##鳝 +##鳞 +##鳥 +##鳩 +##鳳 +##鳴 +##鳶 +##鴉 +##鴕 +##鴛 +##鴦 +##鴨 +##鴻 +##鴿 +##鵑 +##鵜 +##鵝 +##鵡 +##鵬 +##鵰 +##鵲 +##鶘 +##鶩 +##鶯 +##鶴 +##鷗 +##鷲 +##鷹 +##鷺 +##鸚 +##鸞 +##鸟 +##鸠 +##鸡 +##鸢 +##鸣 +##鸥 +##鸦 +##鸨 +##鸪 +##鸭 +##鸯 +##鸳 +##鸵 +##鸽 +##鸾 +##鸿 +##鹂 +##鹃 +##鹄 +##鹅 +##鹈 +##鹉 +##鹊 +##鹌 +##鹏 +##鹑 +##鹕 +##鹘 +##鹜 +##鹞 +##鹤 +##鹦 +##鹧 +##鹫 +##鹭 +##鹰 +##鹳 +##鹵 +##鹹 +##鹼 +##鹽 +##鹿 +##麂 +##麋 +##麒 +##麓 +##麗 +##麝 +##麟 +##麥 +##麦 +##麩 +##麴 +##麵 +##麸 +##麺 +##麻 +##麼 +##麽 +##麾 +##黃 +##黄 +##黍 +##黎 +##黏 +##黑 +##黒 +##黔 +##默 +##黛 +##黜 +##黝 +##點 +##黠 +##黨 +##黯 +##黴 +##鼋 +##鼎 +##鼐 +##鼓 +##鼠 +##鼬 +##鼹 +##鼻 +##鼾 +##齁 +##齊 +##齋 +##齐 +##齒 +##齡 +##齢 +##齣 +##齦 +##齿 +##龄 +##龅 +##龈 +##龊 +##龋 +##龌 +##龍 +##龐 +##龔 +##龕 +##龙 +##龚 +##龛 +##龜 +##龟 +##︰ +##︱ +##︶ +##︿ +##﹁ +##﹂ +##﹍ +##﹏ +##﹐ +##﹑ +##﹒ +##﹔ +##﹕ +##﹖ +##﹗ +##﹙ +##﹚ +##﹝ +##﹞ +##﹡ +##﹣ +##! +##" +### +##$ +##% +##& +##' +##( +##) +##* +##, +##- +##. +##/ +##: +##; +##< +##? +##@ +##[ +##\ +##] +##^ +##_ +##` +##f +##h +##j +##u +##w +##z +##{ +##} +##。 +##「 +##」 +##、 +##・ +##ッ +##ー +##イ +##ク +##シ +##ス +##ト +##ノ +##フ +##ラ +##ル +##ン +##゙ +##゚ +## ̄ +##¥ +##👍 +##🔥 +##😂 +##😎 diff --git a/example/resnet101_imagenet2012/README.md b/example/resnet101_imagenet2012/README.md new file mode 100644 index 0000000000..8514b8759d --- /dev/null +++ b/example/resnet101_imagenet2012/README.md @@ -0,0 +1,135 @@ +# ResNet101 Example + +## Description + +This is an example of training ResNet101 with ImageNet dataset in MindSpore. + +## Requirements + +- Install [MindSpore](https://www.mindspore.cn/install/en). + +- Download the dataset [ImageNet](http://image-net.org/download). + +> Unzip the ImageNet dataset to any path you want, the folder should include train and eval dataset as follows: + +``` +. +└─dataset + ├─ilsvrc + │ + └─validation_preprocess +``` + +## Example structure + +```shell +. +├── crossentropy.py # CrossEntropy loss function +├── config.py # parameter configuration +├── dataset.py # data preprocessing +├── eval.py # eval net +├── lr_generator.py # generate learning rate +├── run_distribute_train.sh # launch distributed training(8p) +├── run_infer.sh # launch evaluating +├── run_standalone_train.sh # launch standalone training(1p) +└── train.py # train net +``` + +## Parameter configuration + +Parameters for both training and evaluating can be set in config.py. + +``` +"class_num": 1001, # dataset class number +"batch_size": 32, # batch size of input tensor +"loss_scale": 1024, # loss scale +"momentum": 0.9, # momentum optimizer +"weight_decay": 1e-4, # weight decay +"epoch_size": 120, # epoch sizes for training +"buffer_size": 1000, # number of queue size in data preprocessing +"image_height": 224, # image height +"image_width": 224, # image width +"save_checkpoint": True, # whether save checkpoint or not +"save_checkpoint_epochs": 1, # the epoch interval between two checkpoints. By default, the last checkpoint will be saved after the last epoch +"keep_checkpoint_max": 10, # only keep the last keep_checkpoint_max checkpoint +"save_checkpoint_path": "./", # path to save checkpoint relative to the executed path +"warmup_epochs": 0, # number of warmup epoch +"lr_decay_mode": "cosine" # decay mode for generating learning rate +"label_smooth": 1, # label_smooth +"label_smooth_factor": 0.1, # label_smooth_factor +"lr": 0.1 # base learning rate +``` + +## Running the example + +### Train + +#### Usage + +``` +# distributed training +sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATASET_PATH] + +# standalone training +sh run_standalone_train.sh [DATASET_PATH] +``` + +#### Launch + +```bash +# distributed training example(8p) +sh run_distribute_train.sh rank_table_8p.json dataset/ilsvrc + +# standalone training example(1p) +sh run_standalone_train.sh dataset/ilsvrc +``` + +> About rank_table.json, you can refer to the [distributed training tutorial](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html). + +#### Result + +Training result will be stored in the example path, whose folder name begins with "train" or "train_parallel". You can find checkpoint file together with result like the followings in log. + + +``` +# distribute training result(8p) +epoch: 1 step: 5004, loss is 4.805483 +epoch: 2 step: 5004, loss is 3.2121816 +epoch: 3 step: 5004, loss is 3.429647 +epoch: 4 step: 5004, loss is 3.3667371 +epoch: 5 step: 5004, loss is 3.1718972 +... +epoch: 67 step: 5004, loss is 2.2768745 +epoch: 68 step: 5004, loss is 1.7223864 +epoch: 69 step: 5004, loss is 2.0665488 +epoch: 70 step: 5004, loss is 1.8717369 +... +``` + +### Infer + +#### Usage + +``` +# infer +sh run_infer.sh [VALIDATION_DATASET_PATH] [CHECKPOINT_PATH] +``` + +#### Launch + +```bash +# infer with checkpoint +sh run_infer.sh dataset/validation_preprocess/ train_parallel0/resnet-120_5004.ckpt + +``` + +> checkpoint can be produced in training process. + + +#### Result + +Inference result will be stored in the example path, whose folder name is "infer". Under this, you can find result like the followings in log. + +``` +result: {'top_5_accuracy': 0.9429417413572343, 'top_1_accuracy': 0.7853513124199744} ckpt=train_parallel0/resnet-120_5004.ckpt +``` diff --git a/example/resnet101_imagenet2012/config.py b/example/resnet101_imagenet2012/config.py new file mode 100755 index 0000000000..0b9f16b504 --- /dev/null +++ b/example/resnet101_imagenet2012/config.py @@ -0,0 +1,39 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +network config setting, will be used in train.py and eval.py +""" +from easydict import EasyDict as ed + +config = ed({ + "class_num": 1001, + "batch_size": 32, + "loss_scale": 1024, + "momentum": 0.9, + "weight_decay": 1e-4, + "epoch_size": 120, + "buffer_size": 1000, + "image_height": 224, + "image_width": 224, + "save_checkpoint": True, + "save_checkpoint_epochs": 1, + "keep_checkpoint_max": 10, + "save_checkpoint_path": "./", + "warmup_epochs": 0, + "lr_decay_mode": "cosine", + "label_smooth": 1, + "label_smooth_factor": 0.1, + "lr": 0.1 +}) diff --git a/example/resnet101_imagenet2012/crossentropy.py b/example/resnet101_imagenet2012/crossentropy.py new file mode 100755 index 0000000000..1145a41804 --- /dev/null +++ b/example/resnet101_imagenet2012/crossentropy.py @@ -0,0 +1,36 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""define loss function for network""" +from mindspore.nn.loss.loss import _Loss +from mindspore.ops import operations as P +from mindspore.ops import functional as F +from mindspore import Tensor +from mindspore.common import dtype as mstype +import mindspore.nn as nn + +class CrossEntropy(_Loss): + """the redefined loss function with SoftmaxCrossEntropyWithLogits""" + def __init__(self, smooth_factor=0., num_classes=1001): + super(CrossEntropy, self).__init__() + self.onehot = P.OneHot() + self.on_value = Tensor(1.0 - smooth_factor, mstype.float32) + self.off_value = Tensor(1.0 * smooth_factor / (num_classes -1), mstype.float32) + self.ce = nn.SoftmaxCrossEntropyWithLogits() + self.mean = P.ReduceMean(False) + def construct(self, logit, label): + one_hot_label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value) + loss = self.ce(logit, one_hot_label) + loss = self.mean(loss, 0) + return loss diff --git a/example/resnet101_imagenet2012/dataset.py b/example/resnet101_imagenet2012/dataset.py new file mode 100755 index 0000000000..27d93dc086 --- /dev/null +++ b/example/resnet101_imagenet2012/dataset.py @@ -0,0 +1,89 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +create train or eval dataset. +""" +import os +import mindspore.common.dtype as mstype +import mindspore.dataset.engine as de +import mindspore.dataset.transforms.vision.c_transforms as C +import mindspore.dataset.transforms.c_transforms as C2 +from config import config + +def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32): + """ + create a train or evaluate dataset + Args: + dataset_path(string): the path of dataset. + do_train(bool): whether dataset is used for train or eval. + repeat_num(int): the repeat times of dataset. Default: 1 + batch_size(int): the batch size of dataset. Default: 32 + + Returns: + dataset + """ + device_num = int(os.getenv("RANK_SIZE")) + rank_id = int(os.getenv("RANK_ID")) + + if device_num == 1: + ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True) + else: + ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, + num_shards=device_num, shard_id=rank_id) + resize_height = 224 + rescale = 1.0 / 255.0 + shift = 0.0 + + # define map operations + decode_op = C.Decode() + + random_resize_crop_op = C.RandomResizedCrop(resize_height, (0.08, 1.0), (0.75, 1.33), max_attempts=100) + horizontal_flip_op = C.RandomHorizontalFlip(rank_id / (rank_id + 1)) + resize_op_256 = C.Resize((256, 256)) + center_crop = C.CenterCrop(224) + rescale_op = C.Rescale(rescale, shift) + normalize_op = C.Normalize((0.475, 0.451, 0.392), (0.275, 0.267, 0.278)) + changeswap_op = C.HWC2CHW() + + trans = [] + if do_train: + trans = [decode_op, + random_resize_crop_op, + horizontal_flip_op, + rescale_op, + normalize_op, + changeswap_op] + + else: + trans = [decode_op, + resize_op_256, + center_crop, + rescale_op, + normalize_op, + changeswap_op] + + type_cast_op = C2.TypeCast(mstype.int32) + + ds = ds.map(input_columns="image", operations=trans) + ds = ds.map(input_columns="label", operations=type_cast_op) + + # apply shuffle operations + ds = ds.shuffle(buffer_size=config.buffer_size) + # apply batch operations + ds = ds.batch(batch_size, drop_remainder=True) + # apply dataset repeat operation + ds = ds.repeat(repeat_num) + + return ds diff --git a/example/resnet101_imagenet2012/eval.py b/example/resnet101_imagenet2012/eval.py new file mode 100755 index 0000000000..bdf6e89ca8 --- /dev/null +++ b/example/resnet101_imagenet2012/eval.py @@ -0,0 +1,78 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +eval. +""" +import os +import argparse +import random +import numpy as np +from dataset import create_dataset +from config import config +from mindspore import context +from mindspore.model_zoo.resnet import resnet101 +from mindspore.parallel._auto_parallel_context import auto_parallel_context +from mindspore.train.model import Model, ParallelMode +from mindspore.train.serialization import load_checkpoint, load_param_into_net +import mindspore.dataset.engine as de +from mindspore.communication.management import init +from crossentropy import CrossEntropy + +random.seed(1) +np.random.seed(1) +de.config.set_seed(1) + +parser = argparse.ArgumentParser(description='Image classification') +parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute') +parser.add_argument('--device_num', type=int, default=1, help='Device num.') +parser.add_argument('--do_train', type=bool, default=False, help='Do train or not.') +parser.add_argument('--do_eval', type=bool, default=True, help='Do eval or not.') +parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path') +parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path') +args_opt = parser.parse_args() + +device_id = int(os.getenv('DEVICE_ID')) + +context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False, device_id=device_id) +context.set_context(enable_task_sink=True) +context.set_context(enable_loop_sink=True) +context.set_context(enable_mem_reuse=True) + +if __name__ == '__main__': + if not args_opt.do_eval and args_opt.run_distribute: + context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL, + mirror_mean=True, parameter_broadcast=True) + auto_parallel_context().set_all_reduce_fusion_split_indices([180, 313]) + init() + + epoch_size = config.epoch_size + net = resnet101(class_num=config.class_num) + + if not config.label_smooth: + config.label_smooth_factor = 0.0 + loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num) + + if args_opt.do_eval: + dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=False, batch_size=config.batch_size) + step_size = dataset.get_dataset_size() + + if args_opt.checkpoint_path: + param_dict = load_checkpoint(args_opt.checkpoint_path) + load_param_into_net(net, param_dict) + net.set_train(False) + + model = Model(net, loss_fn=loss, metrics={'top_1_accuracy', 'top_5_accuracy'}) + res = model.eval(dataset) + print("result:", res, "ckpt=", args_opt.checkpoint_path) diff --git a/example/resnet101_imagenet2012/lr_generator.py b/example/resnet101_imagenet2012/lr_generator.py new file mode 100755 index 0000000000..88cb85cc5b --- /dev/null +++ b/example/resnet101_imagenet2012/lr_generator.py @@ -0,0 +1,52 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""learning rate generator""" +import math +import numpy as np + +def linear_warmup_lr(current_step, warmup_steps, base_lr, init_lr): + lr_inc = (float(base_lr) - float(init_lr)) / float(warmup_steps) + lr = float(init_lr) + lr_inc * current_step + return lr + +def warmup_cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch): + """ + generate learning rate array with cosine + + Args: + lr(float): base learning rate + steps_per_epoch(int): steps size of one epoch + warmup_epochs(int): number of warmup epochs + max_epoch(int): total epochs of training + Returns: + np.array, learning rate array + """ + base_lr = lr + warmup_init_lr = 0 + total_steps = int(max_epoch * steps_per_epoch) + warmup_steps = int(warmup_epochs * steps_per_epoch) + decay_steps = total_steps - warmup_steps + + lr_each_step = [] + for i in range(total_steps): + if i < warmup_steps: + lr = linear_warmup_lr(i + 1, warmup_steps, base_lr, warmup_init_lr) + else: + linear_decay = (total_steps - i) / decay_steps + cosine_decay = 0.5 * (1 + math.cos(math.pi * 2 * 0.47 * i / decay_steps)) + decayed = linear_decay * cosine_decay + 0.00001 + lr = base_lr * decayed + lr_each_step.append(lr) + return np.array(lr_each_step).astype(np.float32) diff --git a/example/resnet101_imagenet2012/run_distribute_train.sh b/example/resnet101_imagenet2012/run_distribute_train.sh new file mode 100755 index 0000000000..ecdcd66859 --- /dev/null +++ b/example/resnet101_imagenet2012/run_distribute_train.sh @@ -0,0 +1,66 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 2 ] +then + echo "Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATASET_PATH]" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} +PATH1=$(get_real_path $1) +PATH2=$(get_real_path $2) +echo $PATH1 +echo $PATH2 + +if [ ! -f $PATH1 ] +then + echo "error: MINDSPORE_HCCL_CONFIG_PATH=$PATH1 is not a file" +exit 1 +fi + +if [ ! -d $PATH2 ] +then + echo "error: DATASET_PATH=$PATH2 is not a directory" +exit 1 +fi + +ulimit -u unlimited +export DEVICE_NUM=8 +export RANK_SIZE=8 +export MINDSPORE_HCCL_CONFIG_PATH=$PATH1 +export RANK_TABLE_FILE=$PATH1 + +for((i=0; i<${DEVICE_NUM}; i++)) +do + export DEVICE_ID=$i + export RANK_ID=$i + rm -rf ./train_parallel$i + mkdir ./train_parallel$i + cp *.py ./train_parallel$i + cp *.sh ./train_parallel$i + cd ./train_parallel$i || exit + echo "start training for rank $RANK_ID, device $DEVICE_ID" + env > env.log + python train.py --do_train=True --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 &> log & + cd .. +done diff --git a/example/resnet101_imagenet2012/run_infer.sh b/example/resnet101_imagenet2012/run_infer.sh new file mode 100755 index 0000000000..b82427e15f --- /dev/null +++ b/example/resnet101_imagenet2012/run_infer.sh @@ -0,0 +1,64 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 2 ] +then + echo "Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH]" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} +PATH1=$(get_real_path $1) +PATH2=$(get_real_path $2) +echo $PATH1 +echo $PATH2 + +if [ ! -d $PATH1 ] +then + echo "error: DATASET_PATH=$PATH1 is not a directory" +exit 1 +fi + +if [ ! -f $PATH2 ] +then + echo "error: CHECKPOINT_PATH=$PATH2 is not a file" +exit 1 +fi + +ulimit -u unlimited +export DEVICE_NUM=1 +export DEVICE_ID=0 +export RANK_SIZE=$DEVICE_NUM +export RANK_ID=0 + +if [ -d "infer" ]; +then + rm -rf ./infer +fi +mkdir ./infer +cp *.py ./infer +cp *.sh ./infer +cd ./infer || exit +env > env.log +echo "start infering for device $DEVICE_ID" +python eval.py --do_eval=True --dataset_path=$PATH1 --checkpoint_path=$PATH2 &> log & +cd .. diff --git a/example/resnet101_imagenet2012/run_standalone_train.sh b/example/resnet101_imagenet2012/run_standalone_train.sh new file mode 100755 index 0000000000..dde018b8eb --- /dev/null +++ b/example/resnet101_imagenet2012/run_standalone_train.sh @@ -0,0 +1,56 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 1 ] +then + echo "Usage: sh run_standalone_train.sh [DATASET_PATH]" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} +PATH1=$(get_real_path $1) +echo $PATH1 + +if [ ! -d $PATH1 ] +then + echo "error: DATASET_PATH=$PATH1 is not a directory" +exit 1 +fi + +ulimit -u unlimited +export DEVICE_NUM=1 +export DEVICE_ID=0 +export RANK_ID=0 +export RANK_SIZE=1 + +if [ -d "train" ]; +then + rm -rf ./train +fi +mkdir ./train +cp *.py ./train +cp *.sh ./train +cd ./train || exit +echo "start training for device $DEVICE_ID" +env > env.log +python train.py --do_train=True --dataset_path=$PATH1 &> log & +cd .. diff --git a/example/resnet101_imagenet2012/train.py b/example/resnet101_imagenet2012/train.py new file mode 100755 index 0000000000..365a859395 --- /dev/null +++ b/example/resnet101_imagenet2012/train.py @@ -0,0 +1,98 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""train_imagenet.""" +import os +import argparse +import random +import numpy as np +from dataset import create_dataset +from lr_generator import warmup_cosine_annealing_lr +from config import config +from mindspore import context +from mindspore import Tensor +from mindspore.model_zoo.resnet import resnet101 +from mindspore.parallel._auto_parallel_context import auto_parallel_context +from mindspore.nn.optim.momentum import Momentum +from mindspore.train.model import Model, ParallelMode +from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor +from mindspore.train.loss_scale_manager import FixedLossScaleManager +import mindspore.dataset.engine as de +from mindspore.communication.management import init +import mindspore.nn as nn +import mindspore.common.initializer as weight_init +from crossentropy import CrossEntropy + +random.seed(1) +np.random.seed(1) +de.config.set_seed(1) + +parser = argparse.ArgumentParser(description='Image classification') +parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute') +parser.add_argument('--device_num', type=int, default=1, help='Device num.') +parser.add_argument('--do_train', type=bool, default=True, help='Do train or not.') +parser.add_argument('--do_eval', type=bool, default=False, help='Do eval or not.') +parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path') +args_opt = parser.parse_args() + +device_id = int(os.getenv('DEVICE_ID')) + +context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False, device_id=device_id) +context.set_context(enable_task_sink=True) +context.set_context(enable_loop_sink=True) +context.set_context(enable_mem_reuse=True) + +if __name__ == '__main__': + if not args_opt.do_eval and args_opt.run_distribute: + context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL, + mirror_mean=True, parameter_broadcast=True) + auto_parallel_context().set_all_reduce_fusion_split_indices([180, 313]) + init() + + epoch_size = config.epoch_size + net = resnet101(class_num=config.class_num) + # weight init + for _, cell in net.cells_and_names(): + if isinstance(cell, nn.Conv2d): + cell.weight.default_input = weight_init.initializer(weight_init.XavierUniform(), + cell.weight.default_input.shape(), + cell.weight.default_input.dtype()) + if isinstance(cell, nn.Dense): + cell.weight.default_input = weight_init.initializer(weight_init.TruncatedNormal(), + cell.weight.default_input.shape(), + cell.weight.default_input.dtype()) + if not config.label_smooth: + config.label_smooth_factor = 0.0 + loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num) + if args_opt.do_train: + dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True, + repeat_num=epoch_size, batch_size=config.batch_size) + step_size = dataset.get_dataset_size() + loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False) + + # learning rate strategy with cosine + lr = Tensor(warmup_cosine_annealing_lr(config.lr, step_size, config.warmup_epochs, config.epoch_size)) + opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum, + config.weight_decay, config.loss_scale) + model = Model(net, loss_fn=loss, optimizer=opt, amp_level='O2', keep_batchnorm_fp32=False, + loss_scale_manager=loss_scale, metrics={'acc'}) + time_cb = TimeMonitor(data_size=step_size) + loss_cb = LossMonitor() + cb = [time_cb, loss_cb] + if config.save_checkpoint: + config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_epochs*step_size, + keep_checkpoint_max=config.keep_checkpoint_max) + ckpt_cb = ModelCheckpoint(prefix="resnet", directory=config.save_checkpoint_path, config=config_ck) + cb += [ckpt_cb] + model.train(epoch_size, dataset, callbacks=cb) diff --git a/example/resnet50_cifar10/README.md b/example/resnet50_cifar10/README.md new file mode 100644 index 0000000000..afa8519bee --- /dev/null +++ b/example/resnet50_cifar10/README.md @@ -0,0 +1,125 @@ +# ResNet-50 Example + +## Description + +This is an example of training ResNet-50 with CIFAR-10 dataset in MindSpore. + +## Requirements + +- Install [MindSpore](https://www.mindspore.cn/install/en). + +- Download the dataset [CIFAR-10](http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz). + +> Unzip the CIFAR-10 dataset to any path you want and the folder structure should be as follows: +> ``` +> . +> ├── cifar-10-batches-bin # train dataset +> └── cifar-10-verify-bin # infer dataset +> ``` + + +## Example structure + +```shell +. +├── config.py # parameter configuration +├── dataset.py # data preprocessing +├── eval.py # infer script +├── lr_generator.py # generate learning rate for each step +├── run_distribute_train.sh # launch distributed training +├── run_infer.sh # launch infering +├── run_standalone_train.sh # launch standalone training +└── train.py # train script +``` + + +## Parameter configuration + +Parameters for both training and inference can be set in config.py. + +``` +"class_num": 10, # dataset class num +"batch_size": 32, # batch size of input tensor +"loss_scale": 1024, # loss scale +"momentum": 0.9, # momentum +"weight_decay": 1e-4, # weight decay +"epoch_size": 90, # only valid for taining, which is always 1 for inference +"buffer_size": 100, # number of queue size in data preprocessing +"image_height": 224, # image height +"image_width": 224, # image width +"save_checkpoint": True, # whether save checkpoint or not +"save_checkpoint_steps": 195, # the step interval between two checkpoints. By default, the last checkpoint will be saved after the last step +"keep_checkpoint_max": 10, # only keep the last keep_checkpoint_max checkpoint +"save_checkpoint_path": "./", # path to save checkpoint +"lr_init": 0.01, # initial learning rate +"lr_end": 0.00001, # final learning rate +"lr_max": 0.1, # maximum learning rate +"warmup_epochs": 5, # number of warmup epoch +"lr_decay_mode": "poly" # decay mode can be selected in steps, ploy and default +``` + +## Running the example + +### Train + +#### Usage + +``` +# distribute training +Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATASET_PATH] + +# standalone training +Usage: sh run_standalone_train.sh [DATASET_PATH] +``` + + +#### Launch + +``` +# distribute training example +sh run_distribute_train.sh rank_table.json ~/cifar-10-batches-bin + +# standalone training example +sh run_standalone_train.sh ~/cifar-10-batches-bin +``` + +> About rank_table.json, you can refer to the [distributed training tutorial](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html). + +#### Result + +Training result will be stored in the example path, whose folder name begins with "train" or "train_parallel". Under this, you can find checkpoint file together with result like the followings in log. + +``` +# distribute training result(8p) +epoch: 1 step: 195, loss is 1.9601055 +epoch: 2 step: 195, loss is 1.8555021 +epoch: 3 step: 195, loss is 1.6707983 +epoch: 4 step: 195, loss is 1.8162166 +epoch: 5 step: 195, loss is 1.393667 +``` + +### Infer + +#### Usage + +``` +# infer +Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH] +``` + +#### Launch + +``` +# infer example +sh run_infer.sh ~/cifar10-10-verify-bin ~/resnet50_cifar10/train_parallel0/resnet-90_195.ckpt +``` + +> checkpoint can be produced in training process. + +#### Result + +Inference result will be stored in the example path, whose folder name is "infer". Under this, you can find result like the followings in log. + +``` +result: {'acc': 0.91446314102564111} ckpt=~/resnet50_cifar10/train_parallel0/resnet-90_195.ckpt +``` diff --git a/example/resnet50_cifar10/eval.py b/example/resnet50_cifar10/eval.py index 243dc2a332..1134d0bd2e 100755 --- a/example/resnet50_cifar10/eval.py +++ b/example/resnet50_cifar10/eval.py @@ -51,17 +51,11 @@ context.set_context(enable_loop_sink=True) context.set_context(enable_mem_reuse=True) if __name__ == '__main__': - if args_opt.do_eval: - context.set_context(enable_hccl=False) - else: - if args_opt.run_distribute: - context.set_context(enable_hccl=True) - context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL, - mirror_mean=True) - auto_parallel_context().set_all_reduce_fusion_split_indices([140]) - init() - else: - context.set_context(enable_hccl=False) + if not args_opt.do_eval and args_opt.run_distribute: + context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL, + mirror_mean=True) + auto_parallel_context().set_all_reduce_fusion_split_indices([140]) + init() epoch_size = config.epoch_size net = resnet50(class_num=config.class_num) diff --git a/example/resnet50_cifar10/run_distribute_train.sh b/example/resnet50_cifar10/run_distribute_train.sh index e78e2bf104..e4bdd775b3 100755 --- a/example/resnet50_cifar10/run_distribute_train.sh +++ b/example/resnet50_cifar10/run_distribute_train.sh @@ -20,22 +20,33 @@ then exit 1 fi -if [ ! -f $1 ] +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) +PATH2=$(get_real_path $2) + +if [ ! -f "$PATH1" ] then - echo "error: DMINDSPORE_HCCL_CONFIG_PATH=$1 is not a file" + echo "error: MINDSPORE_HCCL_CONFIG_PATH=$PATH1 is not a file" exit 1 fi -if [ ! -d $2 ] +if [ ! -d "$PATH2" ] then - echo "error: DATASET_PATH=$2 is not a directory" + echo "error: DATASET_PATH=$PATH2 is not a directory" exit 1 fi ulimit -u unlimited export DEVICE_NUM=8 export RANK_SIZE=8 -export MINDSPORE_HCCL_CONFIG_PATH=$1 +export MINDSPORE_HCCL_CONFIG_PATH=$PATH1 for((i=0; i<${DEVICE_NUM}; i++)) do @@ -48,6 +59,6 @@ do cd ./train_parallel$i || exit echo "start training for rank $RANK_ID, device $DEVICE_ID" env > env.log - python train.py --do_train=True --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$2 &> log & + python train.py --do_train=True --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 &> log & cd .. done diff --git a/example/resnet50_cifar10/run_standalone_train.sh b/example/resnet50_cifar10/run_standalone_train.sh index 90423630aa..cb08cde6c9 100755 --- a/example/resnet50_cifar10/run_standalone_train.sh +++ b/example/resnet50_cifar10/run_standalone_train.sh @@ -20,9 +20,19 @@ then exit 1 fi -if [ ! -d $1 ] +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) + +if [ ! -d "$PATH1" ] then - echo "error: DATASET_PATH=$1 is not a directory" + echo "error: DATASET_PATH=$PATH1 is not a directory" exit 1 fi @@ -41,5 +51,5 @@ cp *.sh ./train cd ./train || exit echo "start training for device $DEVICE_ID" env > env.log -python train.py --do_train=True --dataset_path=$1 &> log & +python train.py --do_train=True --dataset_path=$PATH1 &> log & cd .. diff --git a/example/resnet50_cifar10/train.py b/example/resnet50_cifar10/train.py index b18c3778de..c39d1bcf88 100755 --- a/example/resnet50_cifar10/train.py +++ b/example/resnet50_cifar10/train.py @@ -54,21 +54,15 @@ context.set_context(enable_loop_sink=True) context.set_context(enable_mem_reuse=True) if __name__ == '__main__': - if args_opt.do_eval: - context.set_context(enable_hccl=False) - else: - if args_opt.run_distribute: - context.set_context(enable_hccl=True) - context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL, - mirror_mean=True) - auto_parallel_context().set_all_reduce_fusion_split_indices([140]) - init() - else: - context.set_context(enable_hccl=False) + if not args_opt.do_eval and args_opt.run_distribute: + context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL, + mirror_mean=True) + auto_parallel_context().set_all_reduce_fusion_split_indices([107, 160]) + init() epoch_size = config.epoch_size net = resnet50(class_num=config.class_num) - loss = SoftmaxCrossEntropyWithLogits(sparse=True) + loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') if args_opt.do_train: diff --git a/example/vgg16_cifar10/README.md b/example/vgg16_cifar10/README.md new file mode 100644 index 0000000000..d41f373a8b --- /dev/null +++ b/example/vgg16_cifar10/README.md @@ -0,0 +1,106 @@ +# VGG16 Example + +## Description + +This example is for VGG16 model training and evaluation. + +## Requirements + +- Install [MindSpore](https://www.mindspore.cn/install/en). + +- Download the dataset [CIFAR-10](http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz). + +> Unzip the CIFAR-10 dataset to any path you want and the folder structure should be as follows: +> ``` +> . +> ├── cifar-10-batches-bin # train dataset +> └── cifar-10-verify-bin # infer dataset +> ``` + +## Running the Example + +### Training + +``` +python train.py --data_path=your_data_path --device_id=6 > out.train.log 2>&1 & +``` +The python command above will run in the background, you can view the results through the file `out.train.log`. + +After training, you'll get some checkpoint files under the script folder by default. + +You will get the loss value as following: +``` +# grep "loss is " out.train.log +epoch: 1 step: 781, loss is 2.093086 +epcoh: 2 step: 781, loss is 1.827582 +... +``` + +### Evaluation + +``` +python eval.py --data_path=your_data_path --device_id=6 --checkpoint_path=./train_vgg_cifar10-70-781.ckpt > out.eval.log 2>&1 & +``` +The above python command will run in the background, you can view the results through the file `out.eval.log`. + +You will get the accuracy as following: +``` +# grep "result: " out.eval.log +result: {'acc': 0.92} +``` + +### Distribute Training +``` +sh run_distribute_train.sh rank_table.json your_data_path +``` +The above shell script will run distribute training in the background, you can view the results through the file `train_parallel[X]/log`. + +You will get the loss value as following: +``` +# grep "result: " train_parallel*/log +train_parallel0/log:epoch: 1 step: 97, loss is 1.9060308 +train_parallel0/log:epcoh: 2 step: 97, loss is 1.6003821 +... +train_parallel1/log:epoch: 1 step: 97, loss is 1.7095519 +train_parallel1/log:epcoh: 2 step: 97, loss is 1.7133579 +... +... +``` +> About rank_table.json, you can refer to the [distributed training tutorial](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html). + +## Usage: + +### Training +``` +usage: train.py [--device_target TARGET][--data_path DATA_PATH] + [--device_id DEVICE_ID] + +parameters/options: + --device_target the training backend type, default is Ascend. + --data_path the storage path of dataset + --device_id the device which used to train model. + +``` + +### Evaluation + +``` +usage: eval.py [--device_target TARGET][--data_path DATA_PATH] + [--device_id DEVICE_ID][--checkpoint_path CKPT_PATH] + +parameters/options: + --device_target the evaluation backend type, default is Ascend. + --data_path the storage path of datasetd + --device_id the device which used to evaluate model. + --checkpoint_path the checkpoint file path used to evaluate model. +``` + +### Distribute Training + +``` +Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATA_PATH] + +parameters/options: + MINDSPORE_HCCL_CONFIG_PATH HCCL configuration file path. + DATA_PATH the storage path of dataset. +``` diff --git a/example/vgg16_cifar10/dataset.py b/example/vgg16_cifar10/dataset.py index 4e82beb2e3..e8dfd777e6 100644 --- a/example/vgg16_cifar10/dataset.py +++ b/example/vgg16_cifar10/dataset.py @@ -28,7 +28,11 @@ def create_dataset(data_home, repeat_num=1, training=True): data_dir = os.path.join(data_home, "cifar-10-batches-bin") if not training: data_dir = os.path.join(data_home, "cifar-10-verify-bin") - data_set = ds.Cifar10Dataset(data_dir) + + rank_size = int(os.environ.get("RANK_SIZE")) if os.environ.get("RANK_SIZE") else None + rank_id = int(os.environ.get("RANK_ID")) if os.environ.get("RANK_ID") else None + data_set = ds.Cifar10Dataset(data_dir, num_shards=rank_size, shard_id=rank_id) + resize_height = cfg.image_height resize_width = cfg.image_width rescale = 1.0 / 255.0 diff --git a/example/vgg16_cifar10/eval.py b/example/vgg16_cifar10/eval.py index b034183373..68c23d250f 100644 --- a/example/vgg16_cifar10/eval.py +++ b/example/vgg16_cifar10/eval.py @@ -37,9 +37,9 @@ if __name__ == '__main__': context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target) context.set_context(device_id=args_opt.device_id) - context.set_context(enable_mem_reuse=True, enable_hccl=False) + context.set_context(enable_mem_reuse=True) - net = vgg16(batch_size=cfg.batch_size, num_classes=cfg.num_classes) + net = vgg16(num_classes=cfg.num_classes) opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, cfg.momentum, weight_decay=cfg.weight_decay) loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False) diff --git a/example/vgg16_cifar10/run_distribute_train.sh b/example/vgg16_cifar10/run_distribute_train.sh new file mode 100755 index 0000000000..c9b8dfc48f --- /dev/null +++ b/example/vgg16_cifar10/run_distribute_train.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 2 ] +then + echo "Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATA_PATH]" +exit 1 +fi + +if [ ! -f $1 ] +then + echo "error: MINDSPORE_HCCL_CONFIG_PATH=$1 is not a file" +exit 1 +fi + +if [ ! -d $2 ] +then + echo "error: DATA_PATH=$2 is not a directory" +exit 1 +fi + +ulimit -u unlimited +export DEVICE_NUM=8 +export RANK_SIZE=8 +export MINDSPORE_HCCL_CONFIG_PATH=$1 + +for((i=0; i<${DEVICE_NUM}; i++)) +do + export DEVICE_ID=$i + export RANK_ID=$i + rm -rf ./train_parallel$i + mkdir ./train_parallel$i + cp *.py ./train_parallel$i + cp *.sh ./train_parallel$i + cd ./train_parallel$i || exit + echo "start training for rank $RANK_ID, device $DEVICE_ID" + env > env.log + python train.py --data_path=$2 --device_id=$i &> log & + cd .. +done diff --git a/example/vgg16_cifar10/train.py b/example/vgg16_cifar10/train.py index 32cd344d50..52ba0ecdf4 100644 --- a/example/vgg16_cifar10/train.py +++ b/example/vgg16_cifar10/train.py @@ -17,16 +17,18 @@ python train.py --data_path=$DATA_HOME --device_id=$DEVICE_ID """ import argparse +import os import random import numpy as np import mindspore.nn as nn from mindspore import Tensor +from mindspore.communication.management import init from mindspore.nn.optim.momentum import Momentum -from mindspore.train.model import Model +from mindspore.train.model import Model, ParallelMode from mindspore import context -from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor +from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor from mindspore.model_zoo.vgg import vgg16 -import dataset +from dataset import create_dataset from config import cifar_cfg as cfg random.seed(1) np.random.seed(1) @@ -62,17 +64,31 @@ if __name__ == '__main__': context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target) context.set_context(device_id=args_opt.device_id) - context.set_context(enable_mem_reuse=True, enable_hccl=False) + context.set_context(enable_task_sink=True) + context.set_context(enable_loop_sink=True) + context.set_context(enable_mem_reuse=True) - net = vgg16(batch_size=cfg.batch_size, num_classes=cfg.num_classes) - lr = lr_steps(0, lr_max=cfg.lr_init, total_epochs=cfg.epoch_size, steps_per_epoch=50000 // cfg.batch_size) + device_num = int(os.environ.get("DEVICE_NUM", 1)) + if device_num > 1: + context.reset_auto_parallel_context() + context.set_context(enable_hccl=True) + context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL, + mirror_mean=True) + init() + + dataset = create_dataset(args_opt.data_path, cfg.epoch_size) + batch_num = dataset.get_dataset_size() + + net = vgg16(num_classes=cfg.num_classes) + lr = lr_steps(0, lr_max=cfg.lr_init, total_epochs=cfg.epoch_size, steps_per_epoch=batch_num) opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), Tensor(lr), cfg.momentum, weight_decay=cfg.weight_decay) loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False) - model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'}) + model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'}, + amp_level="O2", keep_batchnorm_fp32=False, loss_scale_manager=None) - dataset = dataset.create_dataset(args_opt.data_path, cfg.epoch_size) - batch_num = dataset.get_dataset_size() config_ck = CheckpointConfig(save_checkpoint_steps=batch_num * 5, keep_checkpoint_max=cfg.keep_checkpoint_max) + time_cb = TimeMonitor(data_size=batch_num) ckpoint_cb = ModelCheckpoint(prefix="train_vgg_cifar10", directory="./", config=config_ck) loss_cb = LossMonitor() - model.train(cfg.epoch_size, dataset, callbacks=[ckpoint_cb, loss_cb]) + model.train(cfg.epoch_size, dataset, callbacks=[time_cb, ckpoint_cb, loss_cb]) + print("train success") diff --git a/example/yolov3_coco2017/README.md b/example/yolov3_coco2017/README.md new file mode 100644 index 0000000000..b35d22f4b3 --- /dev/null +++ b/example/yolov3_coco2017/README.md @@ -0,0 +1,94 @@ +# YOLOv3 Example + +## Description + +YOLOv3 network based on ResNet-18, with support for training and evaluation. + +## Requirements + +- Install [MindSpore](https://www.mindspore.cn/install/en). + +- Dataset + + We use coco2017 as training dataset. + + 1. Download coco2017: [train2017](http://images.cocodataset.org/zips/train2017.zip), [val2017](http://images.cocodataset.org/zips/val2017.zip), [test2017](http://images.cocodataset.org/zips/test2017.zip), [annotations](http://images.cocodataset.org/annotations/annotations_trainval2017.zip). The directory structure is as follows: + > ``` + > . + > ├── annotations # annotation jsons + > ├── train2017 # train dataset + > └── val2017 # infer dataset + > ``` + + 2. Organize the dataset infomation into a TXT file, each row in the file is as follows: + + ``` + train2017/0000001.jpg 0,259,401,459,7 35,28,324,201,2 0,30,59,80,2 + ``` + + Each row is an image annotation which split by space, the first column is a relative path of image, the others are box and class infomations of the format [xmin,ymin,xmax,ymax,class]. `dataset.py` is the parsing script, we read image from an image path joined by the `image_dir`(dataset directory) and the relative path in `anno_path`(the TXT file path), `image_dir` and `anno_path` are external inputs. + + +## Running the Example + +### Training + +To train the model, run `train.py` with the dataset `image_dir`, `anno_path` and `mindrecord_dir`. If the `mindrecord_dir` is empty, it wil generate [mindrecord](https://www.mindspore.cn/tutorial/en/master/use/data_preparation/converting_datasets.html) file by `image_dir` and `anno_path`(the absolute image path is joined by the `image_dir` and the relative path in `anno_path`). **Note if `mindrecord_dir` isn't empty, it will use `mindrecord_dir` rather than `image_dir` and `anno_path`.** + +- Stand alone mode + + ``` + sh run_standalone_train.sh 0 50 ./Mindrecord_train ./dataset ./dataset/train.txt + + ``` + + The input variables are device id, epoch size, mindrecord directory path, dataset directory path and train TXT file path. + + +- Distributed mode + + ``` + sh run_distribute_train.sh 8 150 /data/Mindrecord_train /data /data/train.txt /data/hccl.json + ``` + + The input variables are device numbers, epoch size, mindrecord directory path, dataset directory path, train TXT file path and [hccl json configuration file](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html). **It is better to use absolute path.** + +You will get the loss value and time of each step as following: + +``` +epoch: 145 step: 156, loss is 12.202981 +epoch time: 25599.22742843628, per step time: 164.0976117207454 +epoch: 146 step: 156, loss is 16.91706 +epoch time: 23199.971675872803, per step time: 148.7177671530308 +epoch: 147 step: 156, loss is 13.04007 +epoch time: 23801.95164680481, per step time: 152.57661312054364 +epoch: 148 step: 156, loss is 10.431475 +epoch time: 23634.241580963135, per step time: 151.50154859591754 +epoch: 149 step: 156, loss is 14.665991 +epoch time: 24118.8325881958, per step time: 154.60790120638333 +epoch: 150 step: 156, loss is 10.779521 +epoch time: 25319.57221031189, per step time: 162.30495006610187 +``` + +Note the results is two-classification(person and face) used our own annotations with coco2017, you can change `num_classes` in `config.py` to train your dataset. And we will suport 80 classifications in coco2017 the near future. + +### Evaluation + +To eval, run `eval.py` with the dataset `image_dir`, `anno_path`(eval txt), `mindrecord_dir` and `ckpt_path`. `ckpt_path` is the path of [checkpoint](https://www.mindspore.cn/tutorial/en/master/use/saving_and_loading_model_parameters.html) file. + +``` +sh run_eval.sh 0 yolo.ckpt ./Mindrecord_eval ./dataset ./dataset/eval.txt +``` + +The input variables are device id, checkpoint path, mindrecord directory path, dataset directory path and train TXT file path. + +You will get the precision and recall value of each class: + +``` +class 0 precision is 88.18%, recall is 66.00% +class 1 precision is 85.34%, recall is 79.13% +``` + +Note the precision and recall values are results of two-classification(person and face) used our own annotations with coco2017. + + diff --git a/example/yolov3_coco2017/dataset.py b/example/yolov3_coco2017/dataset.py index 9c6a0f362d..23d34e0f4f 100644 --- a/example/yolov3_coco2017/dataset.py +++ b/example/yolov3_coco2017/dataset.py @@ -18,8 +18,8 @@ from __future__ import division import os import numpy as np -from PIL import Image from matplotlib.colors import rgb_to_hsv, hsv_to_rgb +from PIL import Image import mindspore.dataset as de from mindspore.mindrecord import FileWriter import mindspore.dataset.transforms.vision.c_transforms as C diff --git a/example/yolov3_coco2017/train.py b/example/yolov3_coco2017/train.py index c7d28a8350..bccc66d996 100644 --- a/example/yolov3_coco2017/train.py +++ b/example/yolov3_coco2017/train.py @@ -90,13 +90,11 @@ if __name__ == '__main__': if args_opt.distribute: device_num = args_opt.device_num context.reset_auto_parallel_context() - context.set_context(enable_hccl=True) context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True, device_num=device_num) init() rank = args_opt.device_id % device_num else: - context.set_context(enable_hccl=False) rank = 0 device_num = 1 diff --git a/mindspore/_akg/__init__.py b/mindspore/_akg/__init__.py index e3dceaf35e..d0c1f0ffe4 100644 --- a/mindspore/_akg/__init__.py +++ b/mindspore/_akg/__init__.py @@ -13,51 +13,6 @@ # limitations under the License. """__init__""" -from __future__ import absolute_import as _abs -import sys -import os - -def AKGAddPath(): - """_akg add path.""" - pwd = os.path.dirname(os.path.realpath(__file__)) - tvm_path = os.path.realpath(pwd) - if tvm_path not in sys.path: - sys.path.insert(0, tvm_path) - else: - sys.path.remove(tvm_path) - sys.path.insert(0, tvm_path) - - -class AKGMetaPathFinder: - """class AKGMetaPath finder.""" - - def find_module(self, fullname, path=None): - """method _akg find module.""" - if fullname.startswith("_akg.tvm"): - rname = fullname[5:] - return AKGMetaPathLoader(rname) - if fullname.startswith("_akg.topi"): - rname = fullname[5:] - return AKGMetaPathLoader(rname) - return None - - -class AKGMetaPathLoader: - """class AKGMetaPathLoader loader.""" - def __init__(self, rname): - self.__rname = rname - - def load_module(self, fullname): - if self.__rname in sys.modules: - sys.modules.pop(self.__rname) - AKGAddPath() - __import__(self.__rname, globals(), locals()) - self.__target_module = sys.modules[self.__rname] - sys.modules[fullname] = self.__target_module - return self.__target_module - - -sys.meta_path.insert(0, AKGMetaPathFinder()) - +from . import add_path from .op_build import op_build from .message import compilewithjson diff --git a/mindspore/_akg/add_path.py b/mindspore/_akg/add_path.py new file mode 100644 index 0000000000..a9fd0d4a09 --- /dev/null +++ b/mindspore/_akg/add_path.py @@ -0,0 +1,61 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""add tvm path""" +import sys +import os + + +def AKGAddPath(): + """_akg add path.""" + pwd = os.path.dirname(os.path.realpath(__file__)) + tvm_path = os.path.realpath(pwd) + if tvm_path not in sys.path: + sys.path.insert(0, tvm_path) + else: + sys.path.remove(tvm_path) + sys.path.insert(0, tvm_path) + + +class AKGMetaPathFinder: + """class AKGMetaPath finder.""" + + def find_module(self, fullname, path=None): + """method _akg find module.""" + if fullname.startswith("_akg.tvm"): + rname = fullname[5:] + return AKGMetaPathLoader(rname) + if fullname.startswith("_akg.topi"): + rname = fullname[5:] + return AKGMetaPathLoader(rname) + return None + + +class AKGMetaPathLoader: + """class AKGMetaPathLoader loader.""" + + def __init__(self, rname): + self.__rname = rname + + def load_module(self, fullname): + if self.__rname in sys.modules: + sys.modules.pop(self.__rname) + AKGAddPath() + __import__(self.__rname, globals(), locals()) + self.__target_module = sys.modules[self.__rname] + sys.modules[fullname] = self.__target_module + return self.__target_module + + +sys.meta_path.insert(0, AKGMetaPathFinder()) diff --git a/mindspore/_akg/gpu/squeeze_grad.py b/mindspore/_akg/gpu/squeeze_grad.py index 8180ff9638..ae31de8e84 100644 --- a/mindspore/_akg/gpu/squeeze_grad.py +++ b/mindspore/_akg/gpu/squeeze_grad.py @@ -15,14 +15,14 @@ """squeeze grad""" import _akg.topi as topi -def SqueezeGrad(y_grad, x_shape, axis=None): + +def SqueezeGrad(y_grad, x_shape): """ Computes gradients for squeeze op. Args: y_grad (tvm.tensor.Tensor): the gradient needed to be propagation. x_shape (Union[list, tuple]): output Tensor shape. - axis (Union[list, tuple, int, None], optional): eliminated axis by squeeze. Returns: tvm.tensor.Tensor: output gradient. diff --git a/mindspore/_akg/message.py b/mindspore/_akg/message.py index 4528771848..3d1f81f914 100644 --- a/mindspore/_akg/message.py +++ b/mindspore/_akg/message.py @@ -46,7 +46,8 @@ def compilewithjson(json_str): impl_path = os.path.realpath(kernel_info['impl_path']) if os.path.isfile(impl_path): custom_mod_name = Path(impl_path).resolve().stem - mod_spec = importlib.util.spec_from_file_location(custom_mod_name, impl_path) + mod_spec = importlib.util.spec_from_file_location( + custom_mod_name, impl_path) custom_mod = importlib.util.module_from_spec(mod_spec) mod_spec.loader.exec_module(custom_mod) op_func = getattr(custom_mod, op_name, None) @@ -57,7 +58,8 @@ def compilewithjson(json_str): op_func = getattr(gpu, op_name, None) if op_func is None: - logging.error("this op not supported, please check op name %s", str(op_name)) + logging.error( + "this op not supported, please check op name %s", str(op_name)) return False args = {} @@ -87,25 +89,16 @@ def compilewithjson(json_str): output = op_func(**args) - schedule_func = None - attrs = {} if isinstance(output, (list, tuple)): from inspect import isfunction tmp_outputs = [] for elem in output: - if isfunction(elem): - schedule_func = elem - elif isinstance(elem, dict): - for key, value in elem.items(): - if key not in attrs or not attrs[key]: - attrs[key] = value - else: + if not isfunction(elem) or isinstance(elem, dict): tmp_outputs.append(elem) output = tmp_outputs else: output = [output] - tsr = tsr + [i for i in output if TensorUtils.is_output_value(i)] - return op_build([op_name], output, tsr, schedule_func, processor, kernel_info['op'], attrs) + return op_build([op_name], output, tsr, processor, kernel_info['op']) diff --git a/mindspore/_akg/op_build.py b/mindspore/_akg/op_build.py index 44a250bd9e..92101f657e 100644 --- a/mindspore/_akg/op_build.py +++ b/mindspore/_akg/op_build.py @@ -24,13 +24,13 @@ import _akg from _akg import save_gpu_param as gpu_utils from _akg.utils import validation_check as vc_util -MS_CUDA_KERNEL_PATH = "/tmp/cuda_meta/" -@vc_util.check_input_type(list, (list, tuple), (list, tuple), (types.FunctionType, type(None)), str, str, dict) -def op_build(opnames, computes, args, custom_schedule, device, kernel_name, attrs): +@vc_util.check_input_type(list, (list, tuple), (list, tuple), str, str) +def op_build(opnames, computes, args, device, kernel_name): """op_build""" + kernel_meta_path = "./cuda_meta_" + str(os.getpid()) + "/" if device == "cuda": - cuda_path = os.path.realpath(MS_CUDA_KERNEL_PATH) + cuda_path = os.path.realpath(kernel_meta_path) if not os.path.isdir(cuda_path): os.makedirs(cuda_path) if not opnames: @@ -43,7 +43,7 @@ def op_build(opnames, computes, args, custom_schedule, device, kernel_name, attr logging.error("no schedule func found %s", str(schedule_name)) return None - ptx_file = os.path.realpath(MS_CUDA_KERNEL_PATH + kernel_name + ".ptx") + ptx_file = os.path.realpath(kernel_meta_path + kernel_name + ".ptx") if os.path.exists(ptx_file): os.chmod(ptx_file, 0o600) try: @@ -55,11 +55,12 @@ def op_build(opnames, computes, args, custom_schedule, device, kernel_name, attr foo = _akg.tvm.build(s, args, device, name=kernel_name) ptx_code = foo.imported_modules[0].get_source("ptx") file.write(ptx_code) - json_file = os.path.realpath(MS_CUDA_KERNEL_PATH + kernel_name + ".json") + json_file = os.path.realpath( + kernel_meta_path + kernel_name + ".json") kernel_info = (ptx_code, json_file, kernel_name) gpu_utils.save_gpu_params(s, args, kernel_info) os.chmod(ptx_file, 0o400) - except Exception: + except IOError: logging.error(traceback.format_exc()) return None return True diff --git a/mindspore/_akg/ops/math/mean.py b/mindspore/_akg/ops/math/mean.py index 8764387d33..e8300f22fc 100644 --- a/mindspore/_akg/ops/math/mean.py +++ b/mindspore/_akg/ops/math/mean.py @@ -17,7 +17,7 @@ import _akg.topi import _akg.tvm from _akg.utils import format_transform as ft_util from _akg.utils import validation_check as vc_util -from _akg.ops.math import sum +from _akg.ops.math import sum_value @vc_util.check_input_type(_akg.tvm.tensor.Tensor, (list, tuple, int, type(None)), (bool, type(None))) @@ -41,7 +41,7 @@ def mean(data, axis=None, keepdims=False): count = 1 for i in axis: count *= shape[i] - output, _ = sum.sum_value(data, axis, keepdims) + output, _ = sum_value.sum_value(data, axis, keepdims) res = _akg.topi.divide(output, count) return res diff --git a/mindspore/_akg/ops/math/sum.py b/mindspore/_akg/ops/math/sum_value.py similarity index 100% rename from mindspore/_akg/ops/math/sum.py rename to mindspore/_akg/ops/math/sum_value.py diff --git a/mindspore/_checkparam.py b/mindspore/_checkparam.py index 707ca748b4..0c101bf1a8 100644 --- a/mindspore/_checkparam.py +++ b/mindspore/_checkparam.py @@ -128,7 +128,7 @@ class Validator: @staticmethod def check_number(arg_name, arg_value, value, rel, prim_name): - """Integer value judgment.""" + """Number value judgment.""" rel_fn = Rel.get_fns(rel) if not rel_fn(arg_value, value): rel_str = Rel.get_strs(rel).format(value) @@ -210,7 +210,7 @@ class Validator: type_names = [] for t in valid_values: type_names.append(str(t)) - types_info = '[' + ", ".join(type_names) + ']' + types_info = '[' + ', '.join(type_names) + ']' raise TypeError(f'For \'{prim_name}\' type of `{arg_key}` should be in {types_info},' f' but got {elem_type}.') return (arg_key, elem_type) @@ -653,30 +653,6 @@ def check_output_data(data): raise RuntimeError('Executor return data ' + str(data) + ', please check your net or input data.') -def check_axis_type_int(axis): - """Check axis type.""" - if not isinstance(axis, int): - raise TypeError('Wrong type for axis, should be int.') - - -def check_axis_range(axis, rank): - """Check axis range.""" - if not -rank <= axis < rank: - raise ValueError('The axis should be in range [{}, {}),'' but got {}.'.format(-rank, rank, axis)) - - -def check_attr_int(attr_name, attr): - """Check int type.""" - if not isinstance(attr, int): - raise TypeError("The attr {} should be int, but got {}.".format(attr_name, type(attr))) - - -def check_t_in_range(t): - """Check input range.""" - if t not in (mstype.float16, mstype.float32, mstype.float64, mstype.int32, mstype.int64): - raise ValueError("The param T should be (float16, float32, float64, int32, int64).") - - once = _expand_tuple(1) twice = _expand_tuple(2) triple = _expand_tuple(3) diff --git a/mindspore/_extends/builtin_operations.py b/mindspore/_extends/builtin_operations.py index 6fea07425e..a423fe6395 100644 --- a/mindspore/_extends/builtin_operations.py +++ b/mindspore/_extends/builtin_operations.py @@ -86,7 +86,7 @@ def identity(x): def zeros_like_tensor(x): """Implement `zeros_like_tensor`.""" x = x.asnumpy() - value = Tensor(np.zeros(x.shape)) + value = Tensor(np.zeros(x.shape).astype(np.float32)) return value diff --git a/mindspore/_extends/parallel_compile/tbe_compiler/common.py b/mindspore/_extends/parallel_compile/tbe_compiler/common.py index 6258cf8d45..39866d2bac 100644 --- a/mindspore/_extends/parallel_compile/tbe_compiler/common.py +++ b/mindspore/_extends/parallel_compile/tbe_compiler/common.py @@ -122,10 +122,12 @@ def get_args(op_info, arg_type): elif arg_type == 'attrs': for item in op_info[arg_type]: - if 'value' not in item: - raise ValueError("Json string Errors, attr key:value not found.") - if item["name"] != "isRef": - args.append(item['value']) + if item["valid"]: + if 'value' not in item: + raise ValueError("Json string Errors, attr key:value not found.") + if item["name"] != "isRef": + args.append(item['value']) + return args diff --git a/mindspore/_extends/parse/__init__.py b/mindspore/_extends/parse/__init__.py index f8a34057c5..62ba2e5406 100644 --- a/mindspore/_extends/parse/__init__.py +++ b/mindspore/_extends/parse/__init__.py @@ -18,15 +18,15 @@ Interfaces for parser module in c++. from .parser import (Parser, create_obj_instance, generate_scope, get_bprop_method_of_class, get_class_instance_type, - get_class_member_namespace_symbol, + get_class_member_namespace_symbol, create_slice_obj, get_dataclass_attributes, get_dataclass_methods, get_module_namespace, get_obj_type, get_object_key, get_parse_method_of_class, get_scope_name, - is_class_member, parse_cb, resolve_symbol) + is_class_member, parse_cb, resolve_symbol, create_ellipsis_obj) from .serialize import * __all__ = ['parse_cb', 'get_parse_method_of_class', 'get_bprop_method_of_class', 'resolve_symbol', 'get_object_key', 'get_class_instance_type', 'is_class_member', 'get_obj_type', 'create_obj_instance', 'get_module_namespace', 'get_class_member_namespace_symbol', 'Parser', 'get_dataclass_attributes', 'get_dataclass_methods', 'dump_obj', 'load_obj', - 'get_dataclass_methods', 'get_scope_name'] + 'get_dataclass_methods', 'get_scope_name', 'create_slice_obj', 'create_ellipsis_obj'] diff --git a/mindspore/_extends/parse/parser.py b/mindspore/_extends/parse/parser.py index e88c9c15e9..34a3a6c59e 100644 --- a/mindspore/_extends/parse/parser.py +++ b/mindspore/_extends/parse/parser.py @@ -29,6 +29,7 @@ from mindspore.common.dtype import pytype_to_dtype from mindspore.common.api import _MindSporeFunction from .namespace import CellNamespace, ClosureNamespace, ClassMemberNamespace from .resources import parse_object_map, convert_object_map, trope_ns, SYMBOL_UNDEFINE, NO_IMPLEMENT +from ..utils import Slice, Ellipsis_ # define return value RET_SUCCESS = 0 @@ -69,6 +70,15 @@ parse_expr_statement_white_list = ( "append", ) +def create_ellipsis_obj(): + """Create Slice object""" + return Ellipsis_() + + +def create_slice_obj(start, end, step): + """Create Slice object""" + return Slice(start, end, step) + def parse_cb(func, parse_method=None): """Implements the function of parse.""" diff --git a/mindspore/_extends/utils.py b/mindspore/_extends/utils.py index 8469ddda8b..fecbf546f5 100644 --- a/mindspore/_extends/utils.py +++ b/mindspore/_extends/utils.py @@ -19,6 +19,7 @@ import logging import os import inspect from functools import wraps +from dataclasses import dataclass def cal_sha256(file_path): @@ -99,3 +100,20 @@ def cell_attr_register(fn=None, attrs=None): if fn is not None: return wrap_cell(fn) return wrap_cell + + +@dataclass +class Slice: + """ + Slice class + """ + start: int + end: int + step: int + + +@dataclass +class Ellipsis_: + """ + Ellipsis class + """ diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt index eb33de1c4b..4c6ceb38e1 100644 --- a/mindspore/ccsrc/CMakeLists.txt +++ b/mindspore/ccsrc/CMakeLists.txt @@ -1,14 +1,11 @@ +## common setting include_directories(${CMAKE_CURRENT_SOURCE_DIR}) -if(ENABLE_CPU) - include(ExternalProject) - add_compile_definitions(CPUSESSION) - file(GLOB_RECURSE CPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "device/cpu/*.cc" - ) - if (CMAKE_SYSTEM_NAME MATCHES "Windows") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-attributes -DHAVE_SNPRINTF") - add_compile_definitions(BUILDING_DLL) - endif() +include_directories(${CMAKE_BINARY_DIR}) +link_directories(${CMAKE_SOURCE_DIR}/build/mindspore/graphengine) + +if (CMAKE_SYSTEM_NAME MATCHES "Windows") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-attributes -DHAVE_SNPRINTF") + add_compile_definitions(BUILDING_DLL) endif() if(ENABLE_GPU) @@ -20,7 +17,7 @@ if(ENABLE_GPU) enable_language(CUDA) if(NOT CUDA_PATH OR CUDA_PATH STREQUAL "") if(DEFINED ENV{CUDA_HOME}) - set(CUDA_PATH $ENV{CUDA_HOME}) + set(CUDA_PATH $ENV{CUDA_HOME}) else() set(CUDA_PATH ${CUDA_TOOLKIT_ROOT_DIR}) endif() @@ -41,261 +38,101 @@ if(ENABLE_GPU) "kernel/akg/akgkernelbuild.cc" "kernel/akg/akg_kernel_attrs_process.cc" ) - file(GLOB_RECURSE GPU_KERNEL_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "kernel/gpu/*.cc" - ) + list(APPEND CUDA_NVCC_FLAGS -arch=sm_53) list(REMOVE_ITEM GPU_SRC_LIST "device/gpu/blocking_queue.cc" "device/gpu/gpu_buffer_mgr.cc") - add_library(gpu_queue SHARED "device/gpu/blocking_queue.cc" "device/gpu/gpu_buffer_mgr.cc") - target_link_libraries(gpu_queue ${CMAKE_THREAD_LIBS_INIT} ${CUDA_PATH}/lib64/libcudart.so) - - - file(GLOB_RECURSE MS_STEPS_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "session/gpu_session.cc" - ) list(REMOVE_ITEM GPU_SRC_LIST "device/gpu/mpi/mpi_initializer.cc" "device/gpu/distribution/collective_wrapper.cc" "device/gpu/distribution/mpi_wrapper.cc" "device/gpu/distribution/nccl_wrapper.cc" ) - list(REMOVE_ITEM GPU_KERNEL_SRC_LIST "device/gpu/mpi/mpi_initializer.cc" - "kernel/gpu/nccl/nccl_gpu_kernel.cc" - ) set(NVCC_TMP_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) string(REPLACE "-std=c++17" "-std=c++11" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") cuda_add_library(gpu_cuda_lib STATIC ${GPU_SRC_LIST}) set(CMAKE_CXX_FLAGS ${NVCC_TMP_CMAKE_CXX_FLAGS}) +endif () - if(ENABLE_MPI) - include(ExternalProject) - - file(GLOB_RECURSE GPU_NCCL_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "kernel/gpu/nccl/*.cc" - ) - file(GLOB_RECURSE GPU_MPI_PYTHON_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "device/gpu/mpi/mpi_initializer.cc" - ) - add_library(gpu_collective SHARED "device/gpu/distribution/collective_wrapper.cc" - "device/gpu/distribution/mpi_wrapper.cc" - "device/gpu/distribution/nccl_wrapper.cc" - ) - endif() -endif() - +## make flatuffer files include_directories("${CMAKE_BINARY_DIR}/predict/schema/inner") file(GLOB_RECURSE FLATBUFFER_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/schema/*.fbs") set(FLATBUFFER_OU "${CMAKE_BINARY_DIR}/predict/schema/inner") -ms_build_flatbuffers("${FLATBUFFER_IN}" "${FLATBUFFER_IN}" GENERATED_OUTPUT_DIR "${FLATBUFFER_OU}") +ms_build_flatbuffers("${FLATBUFFER_IN}" "${FLATBUFFER_IN}" flat_input "${FLATBUFFER_OU}") -file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "ir/*.cc" - "ir/dtype/*.cc" - "utils/context/ms_context.cc" - "utils/symbolic.cc" - "utils/tensorprint_utils.cc" - "utils/convert_utils.cc" - "utils/graph_utils.cc" - "utils/misc.cc" - "utils/callbacks.cc" - "utils/profile.cc" - "utils/base_ref.cc" - "utils/summary/event_writer.cc" - "utils/log_adapter.cc" - "utils/comm_manager.cc" - "utils/any.cc" - "utils/config_manager.cc" - "utils/system/file_system.cc" - "utils/system/crc32c.cc" - "common/*.cc" - "parallel/*.cc" - "pipeline/pipeline.cc" - "pipeline/resource.cc" - "pipeline/pass.cc" - "pipeline/action.cc" - "pipeline/validator.cc" - "pipeline/remove_value_node_dup.cc" - "pipeline/parse/*.cc" - "pipeline/static_analysis/*.cc" - "optimizer/*.cc" - "debug/*.cc" - "onnx/onnx_exporter.cc" - "operator/*.cc" - "session/kernel_graph.cc" - "utils/node_utils.cc" - "session/session_basic.cc" - "session/session_factory.cc" - "session/anf_runtime_algorithm.cc" - "vm/*.cc" - "pynative/base.cc" - "pynative/pynative_execute.cc" - "pybind_api/*.cc" - "device/common/*.cc" - "kernel/kernel_query.cc" - "kernel/kernel_build_info.cc" - "kernel/kash/*.cc" - "device/kernel_info.cc" - "device/kernel_runtime.cc" - "device/memory_manager.cc" - "device/kernel_runtime_manager.cc" - "device/convert_tensor_utils.cc" - "pre_activate/common/*.cc" - "pre_activate/pass/*.cc" - "pre_activate/gpu/*.cc" - "pre_activate/mem_reuse/*.cc" - "predict/predict.cc" - "predict/generator/utils/ir_model_util.cc" - "predict/converter/*.cc" - "predict/converter/attr_utils/*.cc" - "predict/converter/lite_model/*.cc" - "predict/converter/lite_model/operations/*.cc" - "kernel/common_utils.cc" - "kernel/oplib/*.cc" - "kernel/kash/*.cc" - "device/gpu/distribution/collective_init.cc" - ) -if (ENABLE_CPU) - list(REMOVE_ITEM MINDSPORE_SRC_LIST "device/gpu/distribution/collective_init.cc") - if (${CMAKE_SYSTEM_NAME} MATCHES "Windows") - list(REMOVE_ITEM MINDSPORE_SRC_LIST "kernel/kernel_query.cc") - endif() -endif() -if (NOT ENABLE_GPU) - list(APPEND MINDSPORE_SRC_LIST "device/gpu/distribution/collective_fake_init.cc") -endif() -file(GLOB_RECURSE MEM_REUSE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "pre_activate/mem_reuse/*.cc" - ) -if(NOT ENABLE_DUMP_E2E) - list(REMOVE_ITEM MINDSPORE_SRC_LIST "debug/e2e_dump.cc") -endif() -file(COPY "${ms_onnx_INC}/onnx/onnx.proto" DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}) -file(GLOB_RECURSE ONNX_PROTO RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "${CMAKE_CURRENT_SOURCE_DIR}/onnx.proto") -message("onnx proto path is : ${ONNX_PROTO}") +## make protobuf files +file(COPY "${ms_onnx_INC}/onnx/onnx.proto" DESTINATION ${CMAKE_BINARY_DIR}/proto) +file(GLOB ONNX_PROTO "" ${CMAKE_BINARY_DIR}/proto/onnx.proto) +message("onnx proto path is :" ${ONNX_PROTO}) ms_protobuf_generate(ONNX_PROTO_SRCS ONNX_PROTO_HDRS ${ONNX_PROTO}) list(APPEND MINDSPORE_PROTO_LIST ${ONNX_PROTO_SRCS}) -if(ENABLE_DUMP_PROTO) +if (ENABLE_DUMP_PROTO) include_directories(${CMAKE_BINARY_DIR}) - file(GLOB_RECURSE PROTO_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "utils/node_strategy.proto" - ) + file(GLOB_RECURSE PROTO_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "utils/node_strategy.proto") ms_protobuf_generate(PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) file(GLOB_RECURSE PROTO_PY RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "utils/anf_ir.proto" - "utils/summary.proto" - "utils/checkpoint.proto" - ) + "utils/anf_ir.proto" + "utils/summary.proto" + "utils/checkpoint.proto" + ) ms_protobuf_generate_py(PY_SRCS PY_HDRS PY_PYS ${PROTO_PY}) - list(APPEND MINDSPORE_PROTO_DUMP_LIST ${PROTO_SRCS}) - list(APPEND MINDSPORE_PROTO_DUMP_LIST ${PY_SRCS}) - list(APPEND MINDSPORE_SRC_LIST "debug/dump_proto.cc") - list(APPEND MINDSPORE_SRC_LIST "parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc") - add_compile_definitions(ENABLE_DUMP_PROTO) -endif() - -if(ENABLE_GE) - file(GLOB_RECURSE GE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "transform/*.cc" - "pynative/pynative_execute_ge.cc" - "utils/callbacks_ge.cc" - "pipeline/pipeline_ge.cc" - ) - list(APPEND MINDSPORE_SRC_LIST ${GE_SRC_LIST}) -endif() + list(APPEND MINDSPORE_PROTO_LIST ${PROTO_SRCS}) + list(APPEND MINDSPORE_PROTO_LIST ${PY_SRCS}) +endif () -if(ENABLE_D) +if (ENABLE_D) include_directories("${CMAKE_BINARY_DIR}/kernel/aicpu") - file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "kernel/aicpu/proto/*.proto" - ) - ms_protobuf_generate(PROTOSRCS PROTOHDRS ${PROTO_IN}) - include_directories("${CMAKE_BINARY_DIR}/predict/generator/ir") - file(GLOB_RECURSE PROTO_INNER RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "predict/proto/*.proto" - ) + file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "kernel/aicpu/proto/*.proto") + ms_protobuf_generate(PROTOSRCS PROTOHDRS ${PROTO_IN}) + + file(GLOB_RECURSE PROTO_INNER RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/proto/*.proto") ms_protobuf_generate(PREDICT_PROTOSRCS PREDICT_PROTOHDRS ${PROTO_INNER}) - file(GLOB_RECURSE D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "device/ascend/*.cc" - "device/ascend/profiling/*.cc" - "device/ascend/tasksink/*.cc" - "device/kernel_adjust.cc" - "kernel/kernel_fusion.cc" - "kernel/tbe/*.cc" - "pre_activate/ascend/*.cc" - "transform/*.cc" - "pipeline/pipeline_ge.cc" - ) - list(APPEND MINDSPORE_SRC_LIST ${D_SRC_LIST}) - list(APPEND MINDSPORE_PROTO_AICPU_LIST ${PROTOSRCS}) - list(APPEND MINDSPORE_PROTO_PREDICT_LIST ${PREDICT_PROTOSRCS}) + list(APPEND MINDSPORE_PROTO_LIST ${PROTOSRCS}) + list(APPEND MINDSPORE_PROTO_LIST ${PREDICT_PROTOSRCS}) - file(GLOB_RECURSE MS_STEPS_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "session/ascend_session.cc" - ) - file(GLOB_RECURSE MS_TASKINFO_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "device/ascend/tasksink/taskinfo/*.cc") - file(GLOB_RECURSE MS_AICPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "kernel/aicpu/*.cc" - ) - file(GLOB_RECURSE MS_RT_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "kernel/mng/*.cc" - ) - file(GLOB_RECURSE MS_HCCL_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "kernel/hccl/*.cc" - ) - file(GLOB_RECURSE MS_PREDICT_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "predict/generator/ir/*.cc" - ) add_compile_definitions(ENABLE_D) -endif() - -file(GLOB_RECURSE MS_GVAR_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "gvar/*.cc" - ) - -add_library(mindspore_gvar SHARED ${MS_GVAR_SRC_LIST}) -add_library(mindspore STATIC ${MINDSPORE_SRC_LIST}) -add_dependencies(mindspore GENERATED_OUTPUT_DIR) +endif () -if(ENABLE_D) - list(APPEND MINDSPORE_PROTO_LIST ${MINDSPORE_PROTO_AICPU_LIST}) -endif() -if(ENABLE_DUMP_PROTO) - list(APPEND MINDSPORE_PROTO_LIST ${MINDSPORE_PROTO_DUMP_LIST}) -endif() -list(APPEND MINDSPORE_PROTO_LIST ${MINDSPORE_PROTO_PREDICT_LIST}) -if(MINDSPORE_PROTO_LIST) +if (MINDSPORE_PROTO_LIST) add_library(proto_input STATIC ${MINDSPORE_PROTO_LIST}) set_target_properties(proto_input PROPERTIES COMPILE_FLAGS "-Wno-unused-variable") - target_link_libraries(mindspore proto_input) endif() -if(APPLE) - set_target_properties(mindspore_gvar PROPERTIES MACOSX_RPATH ON) -endif() +## make sub objects +set(SUB_COMP + transform pre_activate parallel pipeline device kernel common debug gvar ir onnx operator optimizer predict + pybind_api pynative session utils vm +) -link_directories(${CMAKE_SOURCE_DIR}/build/mindspore/graphengine) +foreach (_comp ${SUB_COMP}) + add_subdirectory(${_comp}) + if (TARGET _mindspore_${_comp}_obj) + list(APPEND SUB_OBJECTS_SRC $) + add_dependencies(_mindspore_${_comp}_obj proto_input flat_input) + endif () +endforeach () + +add_library(mindspore STATIC ${SUB_OBJECTS_SRC}) +target_link_libraries(mindspore proto_input) +target_link_libraries(mindspore securec mindspore::flatbuffers) +if (NOT WIN32) + target_link_libraries(mindspore dl) +endif() if (ENABLE_GE) if(ENABLE_TRAIN) - target_link_libraries(mindspore graph ge_client_train) - else() - target_link_libraries(mindspore graph ge_client) - endif() - target_link_libraries(mindspore tsdclient) -elseif(ENABLE_D) - add_compile_definitions(NO_GE_CLIENT) - target_link_libraries(mindspore graph) -else() - add_compile_definitions(NO_GE_CLIENT) + target_link_libraries(mindspore ge_client_train) + else () + target_link_libraries(mindspore ge_client) + endif () + target_link_libraries(mindspore graph tsdclient) endif() -if(ENABLE_D) +if (ENABLE_D) if (DEFINED ENV{D_LINK_PATH}) if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") MESSAGE("system processor matches aarch64") @@ -306,13 +143,13 @@ if(ENABLE_D) else () MESSAGE("system ${CMAKE_HOST_SYSTEM_PROCESSOR} not support") endif() - else() + else () MESSAGE("use system default lib") - if(DEFINED ENV{ASCEND_CUSTOM_PATH}) + if (DEFINED ENV{ASCEND_CUSTOM_PATH}) set(ASCEND_PATH $ENV{ASCEND_CUSTOM_PATH}) - else() + else () set(ASCEND_PATH /usr/local/Ascend) - endif() + endif () set(ASCEND_DRIVER_PATH ${ASCEND_PATH}/driver/lib64/common) set(ASCEND_DRIVER_BACK_PATH ${ASCEND_PATH}/driver/lib64/driver) set(ASCEND_RUNTIME_PATH ${ASCEND_PATH}/fwkacllib/lib64) @@ -327,37 +164,14 @@ if(ENABLE_D) target_link_libraries(mindspore ge_runtime ${CCE_LIB} ${RUNTIME_LIB} ${TSDCLIENT} ${PROFILING} ${HCCL} ${TSDCLIENT}) endif() -target_link_libraries(mindspore securec) -if (NOT WIN32) - target_link_libraries(mindspore dl) -endif() -target_link_libraries(mindspore mindspore::flatbuffers) # link protobuf if (ENABLE_D) target_link_libraries(mindspore mindspore::protobuf) endif() -if (${CMAKE_SYSTEM_NAME} MATCHES "Windows") - target_link_libraries(mindspore ${PYTHON_LIBRARIES} mindspore_gvar) -endif() - # set c_expression building -if (${CMAKE_SYSTEM_NAME} MATCHES "Windows") - set(PYTHON_MODULE_SOURCE ${MS_GVAR_SRC_LIST} - pipeline/init.cc - kernel/oplib/oplib.cc - ${MINDSPORE_SRC_LIST} ${MS_STEPS_SRC_LIST} ${MS_CCE_SRC_LIST} ${MS_AICPU_SRC_LIST} ${MS_TASKINFO_LIST} ${MS_RT_SRC_LIST} - ${GPU_NCCL_LIST} ${MS_HCCL_SRC_LIST} ${MS_PREDICT_SRC_LIST} ${CPU_SRC_LIST} ${MEM_REUSE_SRC_LIST} ${GPU_KERNEL_SRC_LIST}) -else() - set(PYTHON_MODULE_SOURCE - pipeline/init.cc - kernel/oplib/oplib.cc - ${MS_STEPS_SRC_LIST} ${MS_CCE_SRC_LIST} ${MS_AICPU_SRC_LIST} ${MS_TASKINFO_LIST} ${MS_RT_SRC_LIST} - ${GPU_NCCL_LIST} ${MS_HCCL_SRC_LIST} ${MS_PREDICT_SRC_LIST} ${CPU_SRC_LIST} ${MEM_REUSE_SRC_LIST} ${GPU_KERNEL_SRC_LIST}) -endif() - set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) -pybind11_add_module(_c_expression ${PYTHON_MODULE_SOURCE}) +pybind11_add_module(_c_expression "pipeline/init.cc") MESSAGE(STATUS "operation system is ${CMAKE_SYSTEM}") if (CMAKE_SYSTEM_NAME MATCHES "Linux") @@ -372,55 +186,41 @@ else () MESSAGE(FATAL_ERROR "other platform: ${CMAKE_SYSTEM_NAME}") endif () - set(ORIGIN_PATH ${ORIGIN_PATH}/lib) set_target_properties(_c_expression PROPERTIES INSTALL_RPATH ${ORIGIN_PATH}) -if (WIN32) - target_link_libraries(_c_expression PRIVATE - mindspore::pybind11_module - securec - proto_input - mindspore::flatbuffers - ) -else() - target_link_libraries(_c_expression PRIVATE - mindspore::pybind11_module - mindspore - mindspore_gvar - ) -endif() -if(USE_GLOG) +if (CMAKE_SYSTEM_NAME MATCHES "Windows") + target_link_libraries(mindspore mindspore::pybind11_module) + target_link_libraries(mindspore mindspore_gvar) + target_link_libraries(_c_expression PRIVATE -Wl,--whole-archive mindspore -Wl,--no-whole-archive) +else () + target_link_libraries(_c_expression PRIVATE -Wl,--whole-archive mindspore -Wl,--no-whole-archive) + target_link_libraries(_c_expression PRIVATE mindspore::pybind11_module) + target_link_libraries(_c_expression PRIVATE mindspore_gvar) +endif () + +if (USE_GLOG) target_link_libraries(_c_expression PRIVATE mindspore::glog) -endif() +endif () -if(ENABLE_DUMP_PROTO) +if (ENABLE_DUMP_PROTO) target_link_libraries(_c_expression PRIVATE mindspore::protobuf) -endif() +endif () -if(ENABLE_GPU) +if (ENABLE_GPU) message("add gpu lib to c_expression") - target_link_libraries(_c_expression PRIVATE - gpu_cuda_lib - gpu_queue - cublas + target_link_libraries(_c_expression PRIVATE gpu_cuda_lib gpu_queue cublas ${CUDA_PATH}/lib64/libcurand.so ${CUDNN_PATH}/lib64/libcudnn.so ${CUDA_PATH}/lib64/libcudart.so ${CUDA_PATH}/lib64/stubs/libcuda.so) - if(ENABLE_MPI) - pybind11_add_module(_ms_mpi ${GPU_MPI_PYTHON_LIST}) - target_link_libraries(_ms_mpi PRIVATE mindspore::pybind11_module mindspore::ompi) - target_link_libraries(gpu_collective PRIVATE mindspore::ompi mindspore::nccl) - endif() -endif() - +endif () -if(ENABLE_CPU) +if (ENABLE_CPU) target_link_libraries(_c_expression PRIVATE mindspore::dnnl mindspore::mkldnn) -endif() +endif () -if(ENABLE_MINDDATA) +if (ENABLE_MINDDATA) add_subdirectory(mindrecord) add_subdirectory(dataset) -endif() +endif () diff --git a/mindspore/ccsrc/common/CMakeLists.txt b/mindspore/ccsrc/common/CMakeLists.txt index 1a1a5ae9e6..3d9634280d 100644 --- a/mindspore/ccsrc/common/CMakeLists.txt +++ b/mindspore/ccsrc/common/CMakeLists.txt @@ -1,2 +1,2 @@ - -add_library(_mindspore_common_obj OBJECT ${CMAKE_CURRENT_SOURCE_DIR}/*.cc) \ No newline at end of file +file(GLOB_RECURSE _COMMON_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") +add_library(_mindspore_common_obj OBJECT ${_COMMON_ALL_SRC_FILES}) diff --git a/mindspore/ccsrc/common/trans.cc b/mindspore/ccsrc/common/trans.cc index 1174be1f48..3e8d922971 100644 --- a/mindspore/ccsrc/common/trans.cc +++ b/mindspore/ccsrc/common/trans.cc @@ -101,13 +101,20 @@ const std::map, DataTypeTransMode> mode_map{ {std::pair(kNumberTypeInt64, kNumberTypeInt32), FROM_INT64_TO_INT32}, {std::pair(kNumberTypeUInt16, kNumberTypeInt32), FROM_UINT16_TO_INT32}}; -template -void TransDataSrc2Dst(const TypeIdArgs &args, void *dst, const size_t data_size) { - auto src_id = TypeIdSize(args.src_type); - auto dst_id = TypeIdSize(args.dst_type); - if (args.src_size / src_id != args.src_shape_size || args.dst_size / dst_id != args.dst_shape_size) { +void CheckMemSize(const TypeIdArgs &args) { + auto src_type_size = TypeIdSize(args.host_data_type); + auto dst_type_size = TypeIdSize(args.device_data_type); + if (src_type_size < 1 || dst_type_size < 1) { + MS_LOG(EXCEPTION) << "Invalid src or dst data type."; + } + if (args.data_size / src_type_size != args.host_shape_size) { MS_LOG(EXCEPTION) << "Invalid src or dst data size."; } +} + +template +void TransDataSrc2Dst(const TypeIdArgs &args, void *dst, const size_t data_size) { + CheckMemSize(args); for (size_t idx = 0; idx != data_size; idx++) { SrcT src_data = static_cast(args.data)[idx]; static_cast(dst)[idx] = static_cast(src_data); @@ -116,11 +123,7 @@ void TransDataSrc2Dst(const TypeIdArgs &args, void *dst, const size_t data_size) template void TransDataSrc2Fp16(const TypeIdArgs &args, void *dst, const size_t data_size) { - auto src_id = TypeIdSize(args.src_type); - auto dst_id = TypeIdSize(args.dst_type); - if (args.src_size / src_id != args.src_shape_size || args.dst_size / dst_id != args.dst_shape_size) { - MS_LOG(EXCEPTION) << "Invalid src or dst data size."; - } + CheckMemSize(args); auto src_data = static_cast(args.data); auto half_data = static_cast(dst); for (size_t i = 0; i < data_size; i++) { @@ -394,27 +397,18 @@ bool CheckArgs(const FormatArgs &args, size_t *size, size_t *total_size) { } bool TransDataType(const TypeIdArgs &args, void *result) { - MS_LOG(DEBUG) << "Begin trans datatype from " << TypeIdLabel(args.src_type) << " to " << TypeIdLabel(args.dst_type); + MS_LOG(DEBUG) << "Begin trans datatype from " << TypeIdLabel(args.host_data_type) << " to " + << TypeIdLabel(args.device_data_type); MS_EXCEPTION_IF_NULL(result); - std::pair type_info(args.src_type, args.dst_type); + std::pair type_info(args.host_data_type, args.device_data_type); auto iter = mode_map.find(type_info); if (iter == mode_map.end()) { - MS_LOG(ERROR) << "Unsupported datatype trans. src_type :" << TypeIdLabel(args.src_type) - << ", dst_type:" << TypeIdLabel(args.dst_type); + MS_LOG(ERROR) << "Unsupported datatype trans. src_type :" << TypeIdLabel(args.host_data_type) + << ", dst_type:" << TypeIdLabel(args.device_data_type); return false; } auto trans_mode = iter->second; - auto src_id = TypeIdSize(args.src_type); - auto dst_id = TypeIdSize(args.dst_type); - if (src_id < 1 || dst_id < 1) { - MS_LOG(ERROR) << "Invalid src or dst data type."; - return false; - } - if (args.src_size / src_id != args.src_shape_size || args.dst_size / dst_id != args.dst_shape_size) { - MS_LOG(ERROR) << "Invalid src or dst data size."; - return false; - } - if (!CastKernel(args, result, args.dst_shape_size, trans_mode)) { + if (!CastKernel(args, result, args.host_shape_size, trans_mode)) { MS_LOG(ERROR) << "Failed to trans datatype.."; return false; } diff --git a/mindspore/ccsrc/common/trans.h b/mindspore/ccsrc/common/trans.h index e6e81ed359..0593466c38 100644 --- a/mindspore/ccsrc/common/trans.h +++ b/mindspore/ccsrc/common/trans.h @@ -31,12 +31,10 @@ namespace mindspore { namespace trans { struct TypeIdArgs { const void *data; - size_t src_size; - size_t dst_size; - TypeId src_type; - TypeId dst_type; - size_t src_shape_size; - size_t dst_shape_size; + size_t host_shape_size; // Multiply each dimension elements. [a, b, c, d] => a*b*c*d + TypeId host_data_type; + TypeId device_data_type; + size_t data_size; }; struct FormatArgs { diff --git a/mindspore/ccsrc/dataset/CMakeLists.txt b/mindspore/ccsrc/dataset/CMakeLists.txt index 8e9b2664dc..c60ff64604 100644 --- a/mindspore/ccsrc/dataset/CMakeLists.txt +++ b/mindspore/ccsrc/dataset/CMakeLists.txt @@ -74,7 +74,6 @@ else () add_library(_c_dataengine SHARED ${submodules}) endif () - set_target_properties(_c_dataengine PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}" SUFFIX "${PYTHON_MODULE_EXTENSION}" @@ -113,5 +112,10 @@ endif() if (USE_GLOG) target_link_libraries(_c_dataengine PRIVATE mindspore::glog) +else() + if (CMAKE_SYSTEM_NAME MATCHES "Linux") + target_link_options(_c_dataengine PRIVATE -Wl,-init,mindspore_log_init) + elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin") + set_target_properties(_c_dataengine PROPERTIES MACOSX_RPATH ON) + endif () endif() - diff --git a/mindspore/ccsrc/dataset/api/de_pipeline.cc b/mindspore/ccsrc/dataset/api/de_pipeline.cc index c3dfeafe48..be133ea7a9 100644 --- a/mindspore/ccsrc/dataset/api/de_pipeline.cc +++ b/mindspore/ccsrc/dataset/api/de_pipeline.cc @@ -28,6 +28,7 @@ #include "dataset/engine/datasetops/source/manifest_op.h" #include "dataset/engine/datasetops/source/cifar_op.h" #include "dataset/engine/datasetops/source/celeba_op.h" +#include "dataset/engine/datasetops/source/random_data_op.h" #include "dataset/engine/datasetops/source/text_file_op.h" #include "dataset/engine/datasetops/filter_op.h" #include "mindrecord/include/shard_category.h" @@ -65,6 +66,7 @@ static std::unordered_map g_parse_op_func_ = {{kStorage, &D {kCifar10, &DEPipeline::ParseCifar10Op}, {kCifar100, &DEPipeline::ParseCifar100Op}, {kCelebA, &DEPipeline::ParseCelebAOp}, + {kRandomData, &DEPipeline::ParseRandomDataOp}, {kTextFile, &DEPipeline::ParseTextFileOp}}; DEPipeline::DEPipeline() : iterator_(nullptr) { @@ -972,6 +974,45 @@ Status DEPipeline::ParseCifar100Op(const py::dict &args, std::shared_ptr *ptr) { + // Required arguments + RandomDataOp::Builder builder; + + if (args["num_samples"].is_none()) { + std::string err_msg = "Error: num_samples is a required argument"; + RETURN_STATUS_UNEXPECTED(err_msg); + } + std::vector columns_to_load; + bool schema_exists = false; + // Optional arguments + for (auto arg : args) { + std::string key = py::str(arg.first); + py::handle value = arg.second; + if (key == "num_parallel_workers") { + (void)builder.SetNumWorkers(ToInt(value)); + } else if (key == "schema_file_path" || key == "schema_json_string") { + schema_exists = true; + } else if (key == "num_samples") { + (void)builder.SetTotalRows(ToInt(value)); + } else if (key == "columns_list") { + columns_to_load = ToStringVector(value); + } + } + if (schema_exists) { + std::unique_ptr schema = std::make_unique(); + if (args.contains("schema_file_path")) { + RETURN_IF_NOT_OK(schema->LoadSchemaFile(ToString(args["schema_file_path"]), columns_to_load)); + } else { + RETURN_IF_NOT_OK(schema->LoadSchemaString(ToString(args["schema_json_string"]), columns_to_load)); + } + (void)builder.SetDataSchema(std::move(schema)); + } + std::shared_ptr op; + RETURN_IF_NOT_OK(builder.Build(&op)); + *ptr = op; + return Status::OK(); +} + int32_t DEPipeline::GetNumClasses() const { return num_classes_; } Status DEPipeline::ParseMnistOp(const py::dict &args, std::shared_ptr *ptr) { diff --git a/mindspore/ccsrc/dataset/api/de_pipeline.h b/mindspore/ccsrc/dataset/api/de_pipeline.h index 7f9c6c459a..699348f157 100644 --- a/mindspore/ccsrc/dataset/api/de_pipeline.h +++ b/mindspore/ccsrc/dataset/api/de_pipeline.h @@ -60,6 +60,7 @@ enum OpName { kCifar10, kCifar100, kCelebA, + kRandomData, kTextFile }; @@ -142,6 +143,8 @@ class DEPipeline { Status ParseCifar100Op(const py::dict &args, std::shared_ptr *ptr); + Status ParseRandomDataOp(const py::dict &args, std::shared_ptr *ptr); + void PrintTree(); int32_t GetNumClasses() const; diff --git a/mindspore/ccsrc/dataset/api/python_bindings.cc b/mindspore/ccsrc/dataset/api/python_bindings.cc index ea2e8352da..41a4143283 100644 --- a/mindspore/ccsrc/dataset/api/python_bindings.cc +++ b/mindspore/ccsrc/dataset/api/python_bindings.cc @@ -47,6 +47,7 @@ #include "dataset/engine/datasetops/source/mnist_op.h" #include "dataset/engine/datasetops/source/manifest_op.h" #include "dataset/engine/datasetops/source/mindrecord_op.h" +#include "dataset/engine/datasetops/source/random_data_op.h" #include "dataset/engine/datasetops/source/sampler/distributed_sampler.h" #include "dataset/engine/datasetops/source/sampler/pk_sampler.h" #include "dataset/engine/datasetops/source/sampler/random_sampler.h" @@ -435,12 +436,12 @@ void bindSamplerOps(py::module *m) { .def(py::init, uint32_t>(), py::arg("indices"), py::arg("seed") = GetSeed()); (void)py::class_>( *m, "MindrecordPkSampler") - .def(py::init([](int64_t kVal, bool shuffle) { + .def(py::init([](int64_t kVal, std::string kColumn, bool shuffle) { if (shuffle == true) { - return std::make_shared("label", kVal, std::numeric_limits::max(), + return std::make_shared(kColumn, kVal, std::numeric_limits::max(), GetSeed()); } else { - return std::make_shared("label", kVal); + return std::make_shared(kColumn, kVal); } })); @@ -489,6 +490,7 @@ PYBIND11_MODULE(_c_dataengine, m) { .value("VOC", OpName::kVoc) .value("CIFAR10", OpName::kCifar10) .value("CIFAR100", OpName::kCifar100) + .value("RANDOMDATA", OpName::kRandomData) .value("CELEBA", OpName::kCelebA) .value("TEXTFILE", OpName::kTextFile); diff --git a/mindspore/ccsrc/dataset/engine/data_schema.cc b/mindspore/ccsrc/dataset/engine/data_schema.cc index 4fe5d665c6..db2fed41bd 100644 --- a/mindspore/ccsrc/dataset/engine/data_schema.cc +++ b/mindspore/ccsrc/dataset/engine/data_schema.cc @@ -466,5 +466,23 @@ Status DataSchema::PreLoadExceptionCheck(const nlohmann::json &js) { "\"columns\" node is required in the schema json file."); return Status::OK(); } + +// Loops through all columns in the schema and returns a map with the column +// name to column index number. +Status DataSchema::GetColumnNameMap(std::unordered_map *out_column_name_map) { + if (out_column_name_map == nullptr) { + return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "unexpected null output column name map."); + } + + for (int32_t i = 0; i < col_descs_.size(); ++i) { + if (col_descs_[i].name().empty()) { + return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, + "Constructing column name map from schema, but found empty column name."); + } + (*out_column_name_map)[col_descs_[i].name()] = i; + } + + return Status::OK(); +} } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/dataset/engine/data_schema.h b/mindspore/ccsrc/dataset/engine/data_schema.h index 4b2be76f07..9debd2d466 100644 --- a/mindspore/ccsrc/dataset/engine/data_schema.h +++ b/mindspore/ccsrc/dataset/engine/data_schema.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include "dataset/core/constants.h" @@ -180,6 +181,12 @@ class DataSchema { static const char DEFAULT_DATA_SCHEMA_FILENAME[]; + // Loops through all columns in the schema and returns a map with the column + // name to column index number. + // @param out_column_name_map - The output map of columns names to column index + // @return Status - The error code return + Status GetColumnNameMap(std::unordered_map *out_column_name_map); + private: // Internal helper function. Parses the json schema file in any order and produces a schema that // does not follow any particular order (json standard does not enforce any ordering protocol). diff --git a/mindspore/ccsrc/dataset/engine/datasetops/barrier_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/barrier_op.cc index b0ea7dbd07..3d5b682155 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/barrier_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/barrier_op.cc @@ -14,6 +14,7 @@ * limitations under the License. */ #include "dataset/engine/datasetops/barrier_op.h" +#include #include #include "dataset/core/constants.h" #include "dataset/engine/data_buffer.h" @@ -65,8 +66,8 @@ Status BarrierOp::operator()() { TaskManager::FindMe()->Post(); // create child iterator, right now this barrier is a pipeline operator - int32_t worker_id = 0; - int32_t child_idx = 0; + const int32_t worker_id = 0; + const int32_t child_idx = 0; child_iterator_ = std::make_unique(this, worker_id, child_idx); // Loop until eof is true @@ -214,10 +215,19 @@ Status BarrierOp::getNextTensorRow(TensorRow *new_row) { // A function that prints info about the Operator void BarrierOp::Print(std::ostream &out, bool show_all) const { - // Call base class printer first - PipelineOp::Print(out, show_all); - out << "\nBarrierOp:\n" - << "\nCondition " << condition_name_ << "\n\n"; + // Always show the id and name as first line regardless if this summary or detailed print + out << "(" << std::setw(2) << operator_id_ << ") :"; + if (!show_all) { + // Call the super class for displaying any common 1-liner info + PipelineOp::Print(out, show_all); + // Then show any custom derived-internal 1-liner info for this op + out << "\n"; + } else { + // Call the super class for displaying any common detailed info + PipelineOp::Print(out, show_all); + // Then show any custom derived-internal stuff + out << "\nCondition: " << condition_name_ << "\n\n"; + } } // overwrite function and handle eof diff --git a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc index c80078cb44..ad8b95b625 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc @@ -15,6 +15,7 @@ */ #include "dataset/engine/datasetops/batch_op.h" #include +#include #include "common/utils.h" #include "dataset/engine/data_buffer.h" #include "dataset/engine/db_connector.h" @@ -102,10 +103,19 @@ Status BatchOp::operator()() { } void BatchOp::Print(std::ostream &out, bool show_all) const { - ParallelOp::Print(out, show_all); - out << "\nBatchOp:\n" - << "number of parallel workers: " << num_workers_ << "\nBatch size: " << start_batch_size_ - << "\nDrop remainder: " << (drop_ ? "yes" : "no") << "\n\n"; + // Always show the id and name as first line regardless if this summary or detailed print + out << "(" << std::setw(2) << operator_id_ << ") :"; + if (!show_all) { + // Call the super class for displaying any common 1-liner info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal 1-liner info for this op + out << " [batch size: " << start_batch_size_ << "]\n"; + } else { + // Call the super class for displaying any common detailed info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal stuff + out << "\nStart batch size: " << start_batch_size_ << "\nDrop remainder: " << (drop_ ? "yes" : "no") << "\n\n"; + } } Status BatchOp::BatchRows(const std::unique_ptr *source_table, diff --git a/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc index 5e3ea3dc44..adbf42487e 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc @@ -92,18 +92,24 @@ void DatasetOp::CreateConnector(int32_t num_producers, int32_t num_consumers) { // A print method typically used for debugging. showAll of true will recursively descend to child prints void DatasetOp::Print(std::ostream &out, bool show_all) const { + // When show_all is false, we display a 1 liner piece of text for the op. + // When show_all is true, we display more detailed output for the op. + // Derived printers should show their own header info, then call base class printer, followed by + // derived-specific items. + // For now, the base class doesn't have any summary info to show so it's a no-op in that case. if (show_all) { + // The detailed display will show common base class info of the op. Allow the derived class to print + // it's own id and name though as the first line. + out << "\nNumber of children : " << child_.size(); for (size_t i = 0; i < child_.size(); i++) { - child_[i]->Print(out, show_all); + out << "\n Child[" << i << "] id: " << child_[i]->id(); } - } - out << "\n-------------------------" - << "\nOperator # : " << operator_id_ << "\nNumber of children : " << child_.size() - << "\nNumber of parents : " << parent_.size() << "\nConnector queue size : " << oc_queue_size_ - << "\nOperator control flags : 0x" << std::hex << std::setw(8) << std::setfill('0') << op_ctrl_flags_ << std::dec - << std::setfill(' ') << "\nHas parents:\n"; - for (size_t i = 0; i < parent_.size(); i++) { - out << "Parent[" << i << "] id: " << parent_[i]->id() << "\n"; + out << "\nNumber of parents : " << parent_.size(); + for (size_t i = 0; i < parent_.size(); i++) { + out << "\n Parent[" << i << "] id: " << parent_[i]->id(); + } + out << "\nConnector queue size : " << oc_queue_size_ << "\nOperator control flags : 0x" << std::hex + << std::setw(8) << std::setfill('0') << op_ctrl_flags_ << std::dec << std::setfill(' '); } } diff --git a/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc index 71e4ce64a4..2c91d36259 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc @@ -14,7 +14,7 @@ * limitations under the License. */ #include "dataset/engine/datasetops/device_queue_op.h" - +#include #include #include @@ -246,9 +246,19 @@ Status DeviceQueueOp::SendDataToCPU() { } void DeviceQueueOp::Print(std::ostream &out, bool show_all) const { - PipelineOp::Print(out, show_all); - - out << "DeviceQueueOp: channelName: " << channel_name_ << ", prefetchSize: " << prefetch_size_ << '\n'; + // Always show the id and name as first line regardless if this summary or detailed print + out << "(" << std::setw(2) << operator_id_ << ") :"; + if (!show_all) { + // Call the super class for displaying any common 1-liner info + PipelineOp::Print(out, show_all); + // Then show any custom derived-internal 1-liner info for this op + out << "\n"; + } else { + // Call the super class for displaying any common detailed info + PipelineOp::Print(out, show_all); + // Then show any custom derived-internal stuff + out << "\nChannel name: " << channel_name_ << "\nPrefetch size: " << prefetch_size_ << "\n\n"; + } } } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/dataset/engine/datasetops/filter_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/filter_op.cc index ce312ce3d9..5ede8ad6f4 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/filter_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/filter_op.cc @@ -16,6 +16,7 @@ #include "dataset/engine/datasetops/filter_op.h" #include #include +#include #include #include #include @@ -88,14 +89,22 @@ Status FilterOp::ValidateInColumns(const std::unordered_map:"; + if (!show_all) { + // Call the super class for displaying any common 1-liner info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal 1-liner info for this op + out << "\n"; + } else { + // Call the super class for displaying any common detailed info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal stuff + out << "\nInput column names:"; + for (size_t i = 0; i < in_columns_.size(); i++) { + out << " " << in_columns_[i]; + } + out << "\n\n"; } } diff --git a/mindspore/ccsrc/dataset/engine/datasetops/map_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/map_op.cc index b6d603bac9..4cbe2ac603 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/map_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/map_op.cc @@ -15,6 +15,7 @@ */ #include "dataset/engine/datasetops/map_op.h" #include +#include #include #include #include @@ -81,20 +82,27 @@ int32_t MapOp::num_consumers() const { // A print method typically used for debugging void MapOp::Print(std::ostream &out, bool show_all) const { - // Call base class printer first - ParallelOp::Print(out, show_all); - - // Then display our own stuff - out << "\nMapOp:"; - out << "\n Input column names:"; - for (size_t i = 0; i < in_columns_.size(); i++) { - out << " " << in_columns_[i]; - } - out << "\n TensorOps:"; - for (size_t i = 0; i < tfuncs_.size(); i++) { - out << " " << tfuncs_[i]; + // Always show the id and name as first line regardless if this summary or detailed print + out << "(" << std::setw(2) << operator_id_ << ") :"; + if (!show_all) { + // Call the super class for displaying any common 1-liner info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal 1-liner info for this op + out << "\n"; + } else { + // Call the super class for displaying any common detailed info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal stuff + out << "\nInput column names:"; + for (size_t i = 0; i < in_columns_.size(); i++) { + out << " " << in_columns_[i]; + } + out << "\n TensorOps:"; + for (size_t i = 0; i < tfuncs_.size(); i++) { + out << " " << tfuncs_[i]; + } + out << "\n\n"; } - out << "\n"; } // This class functor will provide the master loop that drives the logic for performing the work diff --git a/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.cc index 4b2af2250a..2eeb931554 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.cc @@ -55,12 +55,16 @@ Status ParallelOp::CreateWorkerConnector(int32_t worker_connector_size) { // A print method typically used for debugging void ParallelOp::Print(std::ostream &out, bool show_all) const { - // Call base class printer first - DatasetOp::Print(out, show_all); - - // Then show our own stuff - out << "ParallelOp:"; - out << "\n Num workers : " << num_workers_ << "\n"; + // Summary 1-liner print + if (!show_all) { + out << " [workers: " << num_workers_ << "]"; + // Call super class printer + DatasetOp::Print(out, show_all); + } else { + // Detailed print + DatasetOp::Print(out, show_all); + out << "\nNum workers: " << num_workers_; + } } // Override base class reset to provide reset actions specific to the ParallelOp class. diff --git a/mindspore/ccsrc/dataset/engine/datasetops/pipeline_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/pipeline_op.cc index 56fc24883a..69ace1ed9a 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/pipeline_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/pipeline_op.cc @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include #include #include "dataset/engine/datasetops/pipeline_op.h" @@ -23,11 +24,26 @@ PipelineOp::PipelineOp(int32_t op_connector_size) : DatasetOp(op_connector_size) // A print method typically used for debugging void PipelineOp::Print(std::ostream &out, bool show_all) const { - // Call base class printer first - DatasetOp::Print(out, show_all); - - // Then display our own stuff for the pipeline op - // out << "This is a pipeline op print. nothing to display here at the moment.\n"; + // Summary 1-liner print + if (!show_all) { + out << " [workers: "; + if (this->inlined()) { + out << "0 (inlined)]"; + } else { + out << "1]"; // Pipeline ops only have 1 worker + } + // Call super class printer + DatasetOp::Print(out, show_all); + } else { + // Detailed print + DatasetOp::Print(out, show_all); + out << "\nNum workers: "; + if (this->inlined()) { + out << "0 (inlined)"; + } else { + out << "1"; // Pipeline ops only have 1 worker + } + } } } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/dataset/engine/datasetops/project_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/project_op.cc index b87967dde8..128d3e68e5 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/project_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/project_op.cc @@ -16,6 +16,7 @@ #include "dataset/engine/datasetops/project_op.h" #include +#include #include #include #include @@ -49,12 +50,23 @@ ProjectOp::ProjectOp(const std::vector &columns_to_project) : PipelineOp(0), columns_to_project_(columns_to_project) {} void ProjectOp::Print(std::ostream &out, bool show_all) const { - PipelineOp::Print(out, show_all); - out << "ProjectOp: columns that are projected: "; - for (size_t i = 0; i < columns_to_project_.size(); i++) { - out << columns_to_project_[i] << " "; + // Always show the id and name as first line regardless if this summary or detailed print + out << "(" << std::setw(2) << operator_id_ << ") :"; + if (!show_all) { + // Call the super class for displaying any common 1-liner info + PipelineOp::Print(out, show_all); + // Then show any custom derived-internal 1-liner info for this op + out << "\n"; + } else { + // Call the super class for displaying any common detailed info + PipelineOp::Print(out, show_all); + // Then show any custom derived-internal stuff + out << "\nColumns that are projected:"; + for (size_t i = 0; i < columns_to_project_.size(); i++) { + out << "\n" << columns_to_project_[i]; + } + out << "\n\n"; } - out << '\n'; } // Gets a buffer from the child operator and projects the buffer. diff --git a/mindspore/ccsrc/dataset/engine/datasetops/rename_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/rename_op.cc index 725476bf91..5f354abb04 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/rename_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/rename_op.cc @@ -14,7 +14,7 @@ * limitations under the License. */ #include "dataset/engine/datasetops/rename_op.h" - +#include #include #include #include @@ -138,11 +138,25 @@ Status RenameOp::RenameBuffer(std::unique_ptr *input_buffer) { // prints rename void RenameOp::Print(std::ostream &out, // In: The output stream to print to bool show_all) const { // In: T/F if it should print everything - // Call base class printer first - PipelineOp::Print(out, show_all); - out << "\nRenameOp:\n"; - for (size_t i = 0; i < in_columns_.size(); ++i) { - out << "\nin Columns: " << in_columns_[i] << "\nOut Columns: " << out_columns_[i] << "\n\n"; + // Always show the id and name as first line regardless if this summary or detailed print + out << "(" << std::setw(2) << operator_id_ << ") :"; + if (!show_all) { + // Call the super class for displaying any common 1-liner info + PipelineOp::Print(out, show_all); + // Then show any custom derived-internal 1-liner info for this op + out << "\n"; + } else { + // Call the super class for displaying any common detailed info + PipelineOp::Print(out, show_all); + // Then show any custom derived-internal stuff + out << "\nIn columns:"; + for (size_t i = 0; i < in_columns_.size(); ++i) { + out << "\n " << in_columns_[i]; + } + for (size_t i = 0; i < out_columns_.size(); ++i) { + out << "\n " << out_columns_[i]; + } + out << "\n\n"; } } diff --git a/mindspore/ccsrc/dataset/engine/datasetops/repeat_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/repeat_op.cc index 33c731c400..065631eb31 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/repeat_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/repeat_op.cc @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include #include #include @@ -51,22 +52,28 @@ RepeatOp::~RepeatOp() {} // A print method typically used for debugging void RepeatOp::Print(std::ostream &out, bool show_all) const { - // Call base class printer first - PipelineOp::Print(out, show_all); - - // Then display our own stuff - out << "RepeatOp:" - << "\nCurrent repeat count: " << repeat_count_ << "\nMax repeat count: " << max_repeats_ - << "\nLeaf Nodes in my execution path:"; - if (!eoe_ops_.empty()) { - out << "\n"; - for (size_t i = 0; i < eoe_ops_.size(); i++) { - out << " Operator: " << eoe_ops_[i]->id() << "\n"; - } + // Always show the id and name as first line regardless if this summary or detailed print + out << "(" << std::setw(2) << operator_id_ << ") :"; + if (!show_all) { + // Call the super class for displaying any common 1-liner info + PipelineOp::Print(out, show_all); + // Then show any custom derived-internal 1-liner info for this op + out << " [repeats: " << max_repeats_ << "]\n"; } else { - out << " kNone."; + // Call the super class for displaying any common detailed info + PipelineOp::Print(out, show_all); + // Then show any custom derived-internal stuff + out << "\nCurrent repeat count: " << repeat_count_ << "\nMax repeat count: " << max_repeats_ + << "\nLeaf Nodes in execution path:"; + if (!eoe_ops_.empty()) { + for (size_t i = 0; i < eoe_ops_.size(); i++) { + out << "\n Operator: " << eoe_ops_[i]->id(); + } + } else { + out << " None."; + } + out << "\n\n"; } - out << "\n-------------------------\n\n"; // End the display with this line } // Base-class override for executing specific RepeatOp configurations. This code will be called diff --git a/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.cc index 422c38f2f2..7b09bcef4d 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.cc @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -108,13 +109,20 @@ Status ShuffleOp::SelfReset() { // A print method typically used for debugging void ShuffleOp::Print(std::ostream &out, bool show_all) const { - // Call base class printer first - PipelineOp::Print(out, show_all); - - // Then display our own stuff - out << "ShuffleOp:\n Shuffle size: " << shuffle_size_ << "\n rows_per_buffer_: " << rows_per_buffer_ - << "\n shuffle_buffer_state_: " << shuffle_buffer_state_ << "\n shuffle_seed_: " << shuffle_seed_; - out << "\n-------------------------\n\n"; // End the display with this line + // Always show the id and name as first line regardless if this summary or detailed print + out << "(" << std::setw(2) << operator_id_ << ") :"; + if (!show_all) { + // Call the super class for displaying any common 1-liner info + PipelineOp::Print(out, show_all); + // Then show any custom derived-internal 1-liner info for this op + out << " [shuffle size: " << shuffle_size_ << "]\n"; + } else { + // Call the super class for displaying any common detailed info + PipelineOp::Print(out, show_all); + // Then show any custom derived-internal stuff + out << "\nShuffle size: " << shuffle_size_ << "\nRows per buffer: " << rows_per_buffer_ + << "\nShuffle buffer state: " << shuffle_buffer_state_ << "\nShuffle seed: " << shuffle_seed_ << "\n\n"; + } } // Private function to add a new row to the shuffle buffer. diff --git a/mindspore/ccsrc/dataset/engine/datasetops/skip_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/skip_op.cc index d851f2c699..f6eec1cf94 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/skip_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/skip_op.cc @@ -13,9 +13,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include #include #include +#include "dataset/core/config_manager.h" #include "dataset/engine/data_buffer.h" #include "dataset/engine/datasetops/skip_op.h" #include "dataset/engine/db_connector.h" @@ -26,7 +28,10 @@ namespace mindspore { namespace dataset { // Builder constructor. Creates the builder object. -SkipOp::Builder::Builder(int32_t count) : build_max_skips_(count) {} +SkipOp::Builder::Builder(int32_t count) : build_max_skips_(count) { + std::shared_ptr cfg = GlobalContext::config_manager(); + builder_op_connector_size_ = cfg->op_connector_size(); +} Status SkipOp::Builder::SanityCheck() const { if (build_max_skips_ < 0) { @@ -39,67 +44,32 @@ Status SkipOp::Builder::SanityCheck() const { // The builder "build" method creates the final object. Status SkipOp::Builder::Build(std::shared_ptr *ptr) { RETURN_IF_NOT_OK(SanityCheck()); - *ptr = std::make_shared(build_max_skips_); + *ptr = std::make_shared(build_max_skips_, builder_op_connector_size_); return Status::OK(); } // Constructor of the SkipOp. -SkipOp::SkipOp(int32_t count) : PipelineOp(0), max_skips_(count), skip_count_(0) {} +SkipOp::SkipOp(int32_t count, int32_t op_connector_size) + : PipelineOp(op_connector_size), max_skips_(count), skip_count_(0) {} // Destructor SkipOp::~SkipOp() {} // A print method typically used for debugging void SkipOp::Print(std::ostream &out, bool show_all) const { - // Call base class printer first - PipelineOp::Print(out, show_all); - - // Then display our own stuff - out << "SkipOp:" - << "\nCurrent skip count: " << skip_count_ << "\nMax skip count: " << max_skips_; -} - -// Since the buffer may contain multi rows, this function will drop the rows -// that need to skip in it, and then return the buffer. -Status SkipOp::GetNextBuffer(std::unique_ptr *p_buffer, int32_t worker_id, bool retry_if_eoe) { - if (child_.empty()) { - RETURN_STATUS_UNEXPECTED("SkipOp can't be the leaf node."); - } - - std::unique_ptr buf; - RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&buf, worker_id, true)); - - // Drop first max_skips_ rows - while (skip_count_ < max_skips_) { - if (buf->eoe() || buf->eof()) { - break; - } - - // Consider the rows of buffer more than 1 - TensorRow drop_row; - int row_num = buf->NumRows(); - int drop_num = row_num + skip_count_ < max_skips_ ? row_num : max_skips_ - skip_count_; - skip_count_ += drop_num; - for (int i = 0; i < drop_num; i++) { - RETURN_IF_NOT_OK(buf->PopRow(&drop_row)); - } - if (buf->NumRows() == 0) { - RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&buf, worker_id, true)); - } + // Always show the id and name as first line regardless if this summary or detailed print + out << "(" << std::setw(2) << operator_id_ << ") :"; + if (!show_all) { + // Call the super class for displaying any common 1-liner info + PipelineOp::Print(out, show_all); + // Then show any custom derived-internal 1-liner info for this op + out << " [skips: " << max_skips_ << "]\n"; + } else { + // Call the super class for displaying any common detailed info + PipelineOp::Print(out, show_all); + // Then show any custom derived-internal stuff + out << "\nSkip count: " << skip_count_ << "\nMax skips: " << max_skips_ << "\n\n"; } - - // Handling eoe - if (buf->eoe()) { - RETURN_IF_NOT_OK(EoeReceived(worker_id)); - } - - // Handling eof - if (buf->eof()) { - RETURN_IF_NOT_OK(EofReceived(worker_id)); - } - - *p_buffer = std::move(buf); - return Status::OK(); } // Base-class override for handling cases when an eoe is received. @@ -109,13 +79,45 @@ Status SkipOp::EoeReceived(int32_t worker_id) { return Status::OK(); } -// Class functor operator () override. -// Most dataset ops operate by launching a thread (see ExecutionTree). -// However, the SkipOp is defined as a inlined operator, so it is invalid to -// launch the functor since this op runs inlined inside another operator. The -// function is overloaded to ensure that it is not called by mistake (it will -// generate an error). -Status SkipOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. SkipOp is an inlined operator."); } +// main entry point for skip +Status SkipOp::operator()() { + TaskManager::FindMe()->Post(); + std::unique_ptr curr_buffer; + RETURN_IF_NOT_OK(GetNextInput(&curr_buffer)); + while (curr_buffer->eof() == false) { + // Reset count + skip_count_ = 0; + while (curr_buffer->eoe() == false) { + // Drop first count rows + while (skip_count_ < max_skips_) { + if (curr_buffer->eoe() || curr_buffer->eof()) { + break; + } + // Consider the rows of buffer more than one + TensorRow drop_row; + int row_num = curr_buffer->NumRows(); + int drop_num = row_num + skip_count_ < max_skips_ ? row_num : max_skips_ - skip_count_; + skip_count_ += drop_num; + for (int i = 0; i < drop_num; i++) { + RETURN_IF_NOT_OK(curr_buffer->PopRow(&drop_row)); + } + if (curr_buffer->NumRows() == 0) { + RETURN_IF_NOT_OK(GetNextInput(&curr_buffer)); + } + } + RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(curr_buffer))); + RETURN_IF_NOT_OK(GetNextInput(&curr_buffer)); + } + // we got eoe, now try again until we got eof + MS_LOG(DEBUG) << "Skip operator EOE Received."; + RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(std::make_unique(0, DataBuffer::kDeBFlagEOE)))); + RETURN_IF_NOT_OK(GetNextInput(&curr_buffer)); + } + + MS_LOG(DEBUG) << "Skip operator EOF Received."; + RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(std::make_unique(0, DataBuffer::kDeBFlagEOF)))); + return Status::OK(); +} // Base-class override for handling cases when an eof is received. Status SkipOp::EofReceived(int32_t worker_id) { diff --git a/mindspore/ccsrc/dataset/engine/datasetops/skip_op.h b/mindspore/ccsrc/dataset/engine/datasetops/skip_op.h index 0ae520c3ad..a16b82ed21 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/skip_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/skip_op.h @@ -42,6 +42,7 @@ class SkipOp : public PipelineOp { private: int32_t build_max_skips_; + int32_t builder_op_connector_size_; Status SanityCheck() const; }; @@ -49,7 +50,7 @@ class SkipOp : public PipelineOp { // Constructor of the SkipOp. // @note The builder class should be used to call it // @param count - The number of skips to do - explicit SkipOp(int32_t count); + explicit SkipOp(int32_t count, int32_t op_connector_size); // Destructor ~SkipOp(); @@ -60,23 +61,11 @@ class SkipOp : public PipelineOp { void Print(std::ostream &out, bool show_all) const override; // Class functor operator () override. - // Most dataset ops operate by launching a thread (see ExecutionTree). - // However, the SkipOp is defined as a inlined operator, so it is invalid to launch the - // functor since this op runs inlined inside another operator. The function is overloaded to - // ensure that it is not called by mistake (it will generate an error). + // All dataset ops operate by launching a thread (see ExecutionTree). This class functor will + // provide the master loop that drives the logic for performing the work // @return Status - The error code return Status operator()() override; - // This function returns the buffer that is at the top of our output connector. The caller is - // typically our parent node, when the parent is asking us to provide the next buffer of data. - // Since SkipOp is an inlined op, getting a buffer from us will simply bounce you to get - // a buffer from our child. - // @param p_buffer - output pointer to the buffer that it will fetch. - // @param worker_id - The worker id - // @param retry_if_eoe Set this flag to true to allow calling pop() again after the first pop() returns EOE. - // @return Status - The error code return - Status GetNextBuffer(std::unique_ptr *p_buffer, int32_t worker_id, bool retry_if_eoe) override; - // Base-class override for handling cases when an eoe is received. // @param worker_id - The worker id Status EoeReceived(int32_t worker_id) override; diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt b/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt index 8801205f6c..b29c11226d 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt @@ -17,6 +17,7 @@ add_library(engine-datasetops-source OBJECT ${FEATURE_SRCS} manifest_op.cc cifar_op.cc + random_data_op.cc celeba_op.cc text_file_op.cc ) diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc index 87a7b3c687..896cf94044 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc @@ -16,6 +16,7 @@ #include "dataset/engine/datasetops/source/celeba_op.h" #include +#include #include "dataset/core/config_manager.h" #include "dataset/util/path.h" #include "dataset/engine/datasetops/source/sampler/sequential_sampler.h" @@ -33,8 +34,8 @@ CelebAOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr) } Status CelebAOp::Builder::Build(std::shared_ptr *op) { - MS_LOG(INFO) << "Celeba dataset directory is " << builder_dir_.c_str() << "."; - MS_LOG(INFO) << "Celeba dataset type is " << builder_dataset_type_.c_str() << "."; + MS_LOG(DEBUG) << "Celeba dataset directory is " << builder_dir_.c_str() << "."; + MS_LOG(DEBUG) << "Celeba dataset type is " << builder_dataset_type_.c_str() << "."; RETURN_IF_NOT_OK(SanityCheck()); if (builder_sampler_ == nullptr) { builder_sampler_ = std::make_shared(); @@ -94,7 +95,7 @@ Status CelebAOp::LaunchThreadsAndInitOp() { RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks())); RETURN_IF_NOT_OK(attr_info_queue_->Register(tree_->AllTasks())); - wp_.Register(tree_->AllTasks()); + RETURN_IF_NOT_OK(wp_.Register(tree_->AllTasks())); RETURN_IF_NOT_OK(tree_->AllTasks()->CreateAsyncTask("Walking attr file", std::bind(&CelebAOp::ParseAttrFile, this))); RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_, std::bind(&CelebAOp::WorkerEntry, this, std::placeholders::_1))); @@ -240,9 +241,11 @@ Status CelebAOp::ParseImageAttrInfo() { num_rows_exact_ = image_labels_vec_.size(); num_samples_ = (num_samples_ == 0 || num_samples_ > num_rows_exact_) ? num_rows_exact_ : num_samples_; if (num_rows_exact_ == 0) { - RETURN_STATUS_UNEXPECTED("Number of rows in celeba dataset is zero"); + RETURN_STATUS_UNEXPECTED( + "There is no valid data matching the dataset API CelebADataset.Please check file path or dataset API " + "validation first."); } - MS_LOG(INFO) << "Celeba dataset rows number is " << num_rows_exact_ << "."; + MS_LOG(DEBUG) << "Celeba dataset rows number is " << num_rows_exact_ << "."; return Status::OK(); } @@ -267,7 +270,9 @@ std::vector CelebAOp::Split(const std::string &line) { // Derived from RandomAccessOp Status CelebAOp::GetNumSamples(int64_t *num) const { if (num == nullptr || num_samples_ == 0) { - RETURN_STATUS_UNEXPECTED("NumSample not set"); + RETURN_STATUS_UNEXPECTED( + "There is no valid data matching the dataset API CelebADataset.Please check file path or dataset API " + "validation first."); } (*num) = num_samples_; return Status::OK(); @@ -275,7 +280,9 @@ Status CelebAOp::GetNumSamples(int64_t *num) const { Status CelebAOp::GetNumRowsInDataset(int64_t *num) const { if (num == nullptr || num_rows_exact_ == 0) { - return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "NumRow not set"); + RETURN_STATUS_UNEXPECTED( + "There is no valid data matching the dataset API CelebADataset.Please check file path or dataset API " + "validation first."); } *num = num_rows_exact_; @@ -428,9 +435,19 @@ Status CelebAOp::LoadTensorRow(const std::pair } void CelebAOp::Print(std::ostream &out, bool show_all) const { - DatasetOp::Print(out, show_all); - out << "\nnumber of parallel workers:" << num_workers_ << "\nNumber of rows:" << num_rows_exact_ - << "\nceleba dir: " << folder_path_ << "\n-------------------------\n"; + // Always show the id and name as first line regardless if this summary or detailed print + out << "(" << std::setw(2) << operator_id_ << ") :"; + if (!show_all) { + // Call the super class for displaying any common 1-liner info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal 1-liner info for this op + out << "\n"; + } else { + // Call the super class for displaying any common detailed info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal stuff + out << "\nNumber of rows:" << num_rows_exact_ << "\nceleba dir: " << folder_path_ << "\n\n"; + } } // Reset Sampler and wakeup Master thread (functor) diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc index 60de5a6bdf..7e880dd51c 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc @@ -17,6 +17,7 @@ #include #include +#include #include #include "common/utils.h" @@ -149,7 +150,7 @@ Status CifarOp::LaunchThreadsAndInitOp() { RETURN_STATUS_UNEXPECTED("tree_ not set"); } RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks())); - wp_.Register(tree_->AllTasks()); + RETURN_IF_NOT_OK(wp_.Register(tree_->AllTasks())); RETURN_IF_NOT_OK( tree_->AllTasks()->CreateAsyncTask("Get cifar data block", std::bind(&CifarOp::ReadCifarBlockDataAsync, this))); RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_, std::bind(&CifarOp::WorkerEntry, this, std::placeholders::_1))); @@ -225,9 +226,19 @@ Status CifarOp::LoadBuffer(const std::vector &keys, std::unique_ptr:"; + if (!show_all) { + // Call the super class for displaying any common 1-liner info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal 1-liner info for this op + out << "\n"; + } else { + // Call the super class for displaying any common detailed info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal stuff + out << "\nNumber of rows:" << num_rows_ << "\nCifar directory: " << folder_path_ << "\n\n"; + } } // Reset Sampler and wakeup Master thread (functor) @@ -247,7 +258,10 @@ Status CifarOp::InitSampler() { // Derived from RandomAccessOp Status CifarOp::GetNumSamples(int64_t *num) const { if (num == nullptr || num_rows_ == 0) { - RETURN_STATUS_UNEXPECTED("NumRow not set"); + std::string api = cifar_type_ == kCifar10 ? "Cifar10Dataset" : "Cifar100Dataset"; + std::string err_msg = "There is no valid data matching the dataset API " + api + + ".Please check file path or dataset API validation first."; + RETURN_STATUS_UNEXPECTED(err_msg); } (*num) = num_samples_; return Status::OK(); @@ -256,7 +270,10 @@ Status CifarOp::GetNumSamples(int64_t *num) const { // Derived from RandomAccessOp Status CifarOp::GetNumRowsInDataset(int64_t *num) const { if (num == nullptr || num_rows_ == 0) { - RETURN_STATUS_UNEXPECTED("NumRow not set"); + std::string api = cifar_type_ == kCifar10 ? "Cifar10Dataset" : "Cifar100Dataset"; + std::string err_msg = "There is no valid data matching the dataset API " + api + + ".Please check file path or dataset API validation first."; + RETURN_STATUS_UNEXPECTED(err_msg); } (*num) = num_rows_; return Status::OK(); @@ -389,7 +406,10 @@ Status CifarOp::ParseCifarData() { num_rows_ = cifar_image_label_pairs_.size(); num_samples_ = (num_samples_ == 0 || num_samples_ > num_rows_) ? num_rows_ : num_samples_; if (num_rows_ == 0) { - RETURN_STATUS_UNEXPECTED("Init Cifar failed, not a single row read from dataset!"); + std::string api = cifar_type_ == kCifar10 ? "Cifar10Dataset" : "Cifar100Dataset"; + std::string err_msg = "There is no valid data matching the dataset API " + api + + ".Please check file path or dataset API validation first."; + RETURN_STATUS_UNEXPECTED(err_msg); } cifar_raw_data_block_->Reset(); return Status::OK(); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc index 37a74f019a..a3d3eb5cce 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc @@ -13,8 +13,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "dataset/core/global_context.h" #include "dataset/engine/datasetops/source/generator_op.h" +#include +#include "dataset/core/global_context.h" #include "dataset/engine/db_connector.h" #include "dataset/engine/data_buffer.h" #include "dataset/engine/execution_tree.h" @@ -58,6 +59,26 @@ GeneratorOp::GeneratorOp(py::function generator_function, std::vectorDealloc(); } +void GeneratorOp::Print(std::ostream &out, bool show_all) const { + // Always show the id and name as first line regardless if this summary or detailed print + out << "(" << std::setw(2) << operator_id_ << ") :"; + if (!show_all) { + // Call the super class for displaying any common 1-liner info + PipelineOp::Print(out, show_all); + // Then show any custom derived-internal 1-liner info for this op + out << "\n"; + } else { + // Call the super class for displaying any common detailed info + PipelineOp::Print(out, show_all); + // Then show any custom derived-internal stuff + out << "\nColumn names:\n"; + for (int i = 0; i < column_names_.size(); ++i) { + out << "\n " << column_names_[i]; + } + out << "\n\n"; + } +} + void GeneratorOp::Dealloc() noexcept { // Setup GIL state PyGILState_STATE gstate; @@ -168,7 +189,7 @@ Status GeneratorOp::FillBuffer(TensorQTable *tt) { Status GeneratorOp::operator()() { // Handshake with TaskManager to synchronize thread creation TaskManager::FindMe()->Post(); - wp_.Register(tree_->AllTasks()); + RETURN_IF_NOT_OK(wp_.Register(tree_->AllTasks())); std::unique_ptr fetched_buffer; bool eof = false; while (!eof) { diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.h b/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.h index a5407a9b09..8165fed970 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.h @@ -95,6 +95,11 @@ class GeneratorOp : public PipelineOp { ~GeneratorOp(); + // A print method typically used for debugging + // @param out - The output stream to write output to + // @param show_all - A bool to control if you want to show all info or just a summary + void Print(std::ostream &out, bool show_all) const override; + // << Stream output operator overload // @notes This allows you to write the debug print info using stream operators // @param out - reference to the output stream being overloaded diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc index 0ac579a865..81bac3aee7 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc @@ -14,9 +14,8 @@ * limitations under the License. */ #include "dataset/engine/datasetops/source/image_folder_op.h" - #include - +#include #include "common/utils.h" #include "dataset/core/config_manager.h" #include "dataset/core/tensor_shape.h" @@ -243,9 +242,19 @@ Status ImageFolderOp::LoadBuffer(const std::vector &keys, std::unique_p } void ImageFolderOp::Print(std::ostream &out, bool show_all) const { - DatasetOp::Print(out, show_all); - out << "\nnumber of parallel workers:" << num_workers_ << "\nNumber of rows:" << num_rows_ - << "\nImageFolder Directory: " << folder_path_ << "\n-------------------------\n"; + // Always show the id and name as first line regardless if this summary or detailed print + out << "(" << std::setw(2) << operator_id_ << ") :"; + if (!show_all) { + // Call the super class for displaying any common 1-liner info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal 1-liner info for this op + out << "\n"; + } else { + // Call the super class for displaying any common detailed info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal stuff + out << "\nNumber of rows:" << num_rows_ << "\nImageFolder directory: " << folder_path_ << "\n\n"; + } } // Reset Sampler and wakeup Master thread (functor) @@ -265,7 +274,9 @@ Status ImageFolderOp::InitSampler() { // Derived from RandomAccessOp Status ImageFolderOp::GetNumSamples(int64_t *num) const { if (num == nullptr || num_samples_ == 0) { - RETURN_STATUS_UNEXPECTED("NumRow not set"); + RETURN_STATUS_UNEXPECTED( + "There is no valid data matching the dataset API ImageFolderDatasetV2.Please check file path or dataset API " + "validation first."); } (*num) = num_samples_; return Status::OK(); @@ -274,7 +285,9 @@ Status ImageFolderOp::GetNumSamples(int64_t *num) const { // Derived from RandomAccessOp Status ImageFolderOp::GetNumRowsInDataset(int64_t *num) const { if (num == nullptr || num_rows_ == 0) { - RETURN_STATUS_UNEXPECTED("NumRow not set"); + RETURN_STATUS_UNEXPECTED( + "There is no valid data matching the dataset API ImageFolderDatasetV2.Please check file path or dataset API " + "validation first."); } (*num) = num_rows_; return Status::OK(); @@ -382,7 +395,7 @@ Status ImageFolderOp::LaunchThreadsAndInitOp() { RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks())); RETURN_IF_NOT_OK(folder_name_queue_->Register(tree_->AllTasks())); RETURN_IF_NOT_OK(image_name_queue_->Register(tree_->AllTasks())); - wp_.Register(tree_->AllTasks()); + RETURN_IF_NOT_OK(wp_.Register(tree_->AllTasks())); // The following code launch 3 threads group // 1) A thread that walks all folders and push the folder names to a util:Queue mFoldernameQueue. // 2) Workers that pull foldername from mFoldernameQueue, walk it and return the sorted images to mImagenameQueue diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc index 0139af4d9d..065162c095 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc @@ -17,6 +17,7 @@ #include #include +#include #include #include "common/utils.h" @@ -140,7 +141,7 @@ Status ManifestOp::LaunchThreadsAndInitOp() { RETURN_STATUS_UNEXPECTED("tree_ not set"); } RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks())); - wp_.Register(tree_->AllTasks()); + RETURN_IF_NOT_OK(wp_.Register(tree_->AllTasks())); RETURN_IF_NOT_OK( tree_->LaunchWorkers(num_workers_, std::bind(&ManifestOp::WorkerEntry, this, std::placeholders::_1))); @@ -239,9 +240,19 @@ Status ManifestOp::LoadBuffer(const std::vector &keys, std::unique_ptr< } void ManifestOp::Print(std::ostream &out, bool show_all) const { - DatasetOp::Print(out, show_all); - out << "\nnumber of parallel workers:" << num_workers_ << "\nNumber of rows:" << num_rows_ - << "\nManifest file: " << file_ << "\n-------------------------\n"; + // Always show the id and name as first line regardless if this summary or detailed print + out << "(" << std::setw(2) << operator_id_ << ") :"; + if (!show_all) { + // Call the super class for displaying any common 1-liner info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal 1-liner info for this op + out << "\n"; + } else { + // Call the super class for displaying any common detailed info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal stuff + out << "\nNumber of rows:" << num_rows_ << "\nManifest file: " << file_ << "\n\n"; + } } // Reset Sampler and wakeup Master thread (functor) @@ -261,7 +272,9 @@ Status ManifestOp::InitSampler() { // Derived from RandomAccessOp Status ManifestOp::GetNumSamples(int64_t *num) const { if (num == nullptr || num_rows_ == 0) { - RETURN_STATUS_UNEXPECTED("NumRow not set"); + RETURN_STATUS_UNEXPECTED( + "There is no valid data matching the dataset API ManifestDataset.Please check file path or dataset API " + "validation first."); } (*num) = num_samples_; return Status::OK(); @@ -270,7 +283,9 @@ Status ManifestOp::GetNumSamples(int64_t *num) const { // Derived from RandomAccessOp Status ManifestOp::GetNumRowsInDataset(int64_t *num) const { if (num == nullptr || num_rows_ == 0) { - RETURN_STATUS_UNEXPECTED("NumRow not set"); + RETURN_STATUS_UNEXPECTED( + "There is no valid data matching the dataset API ManifestDataset.Please check file path or dataset API " + "validation first."); } (*num) = num_rows_; return Status::OK(); @@ -279,7 +294,7 @@ Status ManifestOp::GetNumRowsInDataset(int64_t *num) const { // Derived from RandomAccessOp Status ManifestOp::GetClassIds(std::map> *cls_ids) const { if (cls_ids == nullptr || !cls_ids->empty() || image_labelname_.empty()) { - RETURN_STATUS_UNEXPECTED("Number rows is 0"); + RETURN_STATUS_UNEXPECTED("Class indexing is invalid."); } for (size_t i = 0; i < image_labelname_.size(); i++) { @@ -395,7 +410,9 @@ Status ManifestOp::CountDatasetInfo() { num_rows_ = static_cast(image_labelname_.size()); num_samples_ = (num_samples_ == 0 || num_samples_ > num_rows_) ? num_rows_ : num_samples_; if (num_rows_ == 0) { - RETURN_STATUS_UNEXPECTED("Number of rows is 0"); + RETURN_STATUS_UNEXPECTED( + "There is no valid data matching the dataset API ManifestDataset.Please check file path or dataset API " + "validation first."); } return Status::OK(); } diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc index 72dee6f2e6..171ad49fa7 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -179,18 +180,21 @@ MindRecordOp::~MindRecordOp() {} // A print method typically used for debugging void MindRecordOp::Print(std::ostream &out, bool show_all) const { - // Call base class printer first - ParallelOp::Print(out, show_all); - - // Then display our own stuff - out << "\nMindRecordOp:"; - out << "\n 1 Dataset file : " << dataset_file_; - out << "\n Number of rows : " << num_rows_; - out << "\n Rows per buffer : " << rows_per_buffer_; - out << "\n Number of buffers : " << buffers_needed_; - out << "\n Number of ShardReader workers : " << num_mind_record_workers_; - - out << "\n\n"; + // Always show the id and name as first line regardless if this summary or detailed print + out << "(" << std::setw(2) << operator_id_ << ") :"; + if (!show_all) { + // Call the super class for displaying any common 1-liner info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal 1-liner info for this op + out << "\n"; + } else { + // Call the super class for displaying any common detailed info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal stuff + out << "\n1 Dataset file : " << dataset_file_ << "\nNumber of rows : " << num_rows_ + << "\nRows per buffer : " << rows_per_buffer_ << "\nNumber of buffers : " << buffers_needed_ + << "\nNumber of ShardReader workers : " << num_mind_record_workers_ << "\n\n"; + } } template @@ -644,7 +648,7 @@ Status MindRecordOp::LaunchThreadAndInitOp() { } RETURN_IF_NOT_OK(io_blk_queues_.Register(tree_->AllTasks())); - shard_reader_wait_post_.Register(tree_->AllTasks()); + RETURN_IF_NOT_OK(shard_reader_wait_post_.Register(tree_->AllTasks())); if (shard_reader_->Launch(!block_reader_) == MSRStatus::FAILED) { RETURN_STATUS_UNEXPECTED("MindRecordOp launch failed."); } diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc index 71900f8a91..c2abc129c8 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc @@ -16,7 +16,7 @@ #include "dataset/engine/datasetops/source/mnist_op.h" #include - +#include #include "common/utils.h" #include "dataset/core/config_manager.h" #include "dataset/core/tensor_shape.h" @@ -190,9 +190,19 @@ Status MnistOp::LoadBuffer(const std::vector &keys, std::unique_ptr:"; + if (!show_all) { + // Call the super class for displaying any common 1-liner info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal 1-liner info for this op + out << "\n"; + } else { + // Call the super class for displaying any common detailed info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal stuff + out << "\nNumber of rows:" << num_rows_ << "\nMNIST Directory: " << folder_path_ << "\n\n"; + } } // Reset Sampler and wakeup Master thread (functor) @@ -212,7 +222,9 @@ Status MnistOp::InitSampler() { // Derived from RandomAccessOp Status MnistOp::GetNumSamples(int64_t *num) const { if (num == nullptr || num_rows_ == 0) { - RETURN_STATUS_UNEXPECTED("NumRow not set"); + RETURN_STATUS_UNEXPECTED( + "There is no valid data matching the dataset API MnistDataset.Please check file path or dataset API " + "validation first."); } (*num) = num_samples_; return Status::OK(); @@ -393,7 +405,7 @@ Status MnistOp::LaunchThreadsAndInitOp() { RETURN_STATUS_UNEXPECTED("tree_ not set"); } RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks())); - wp_.Register(tree_->AllTasks()); + RETURN_IF_NOT_OK(wp_.Register(tree_->AllTasks())); RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_, std::bind(&MnistOp::WorkerEntry, this, std::placeholders::_1))); TaskManager::FindMe()->Post(); RETURN_IF_NOT_OK(this->WalkAllFiles()); @@ -436,7 +448,9 @@ Status MnistOp::CountTotalRows(const std::string &dir, int64_t numSamples, int64 // Derived from RandomAccessOp Status MnistOp::GetNumRowsInDataset(int64_t *num) const { if (num == nullptr || num_rows_ == 0) { - RETURN_STATUS_UNEXPECTED("NumRow not set"); + RETURN_STATUS_UNEXPECTED( + "There is no valid data matching the dataset API MnistDataset.Please check file path or dataset API " + "validation first."); } (*num) = num_rows_; return Status::OK(); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.cc new file mode 100644 index 0000000000..306f74ad6f --- /dev/null +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.cc @@ -0,0 +1,411 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dataset/engine/datasetops/source/random_data_op.h" +#include +#include +#include "dataset/engine/execution_tree.h" +#include "dataset/core/config_manager.h" +#include "dataset/util/random.h" +#include "dataset/util/wait_post.h" + +namespace mindspore { +namespace dataset { +// Builder constructor. Creates the builder object. +RandomDataOp::Builder::Builder() + : builder_data_schema_(nullptr), + builder_num_workers_(0), + builder_op_connector_size_(0), + builder_rows_per_buffer_(0), + builder_total_rows_(0) { + // Some arguments to the RandomDataOp have a default argument that is taken from the config. + // The user may override these defaults by using the builder set methods. + std::shared_ptr cfg = GlobalContext::config_manager(); + builder_rows_per_buffer_ = cfg->rows_per_buffer(); + builder_num_workers_ = cfg->num_parallel_workers(); + builder_op_connector_size_ = cfg->op_connector_size(); +} + +// The build method that produces the instantiated RandomDataOp as a shared pointer +Status RandomDataOp::Builder::Build(std::shared_ptr *out_op) { + RETURN_IF_NOT_OK(SanityCheck()); + + *out_op = std::make_shared(builder_num_workers_, builder_op_connector_size_, builder_rows_per_buffer_, + builder_total_rows_, std::move(builder_data_schema_)); + + // If the user did not provide a schema, then we will ask the op to generate a pseudo-random + // schema. + // See details of generateSchema function to learn what type of schema it will create. + if ((*out_op)->data_schema_ == nullptr) { + RETURN_IF_NOT_OK((*out_op)->GenerateSchema()); + } + + // Extract the column name mapping from the schema and save it in the class. + // This will be needed when constructing buffers. + RETURN_IF_NOT_OK((*out_op)->data_schema_->GetColumnNameMap(&((*out_op)->column_name_map_))); + + return Status::OK(); +} + +// Check if the required parameters are set by the builder. +Status RandomDataOp::Builder::SanityCheck() const { + // There actually is no required arguments for the random data op at all. + // Some arguments are preset with global values from config, and if they are not given by the user + // then we create them randomly. Leaving this function here for consistency with other operators. + return Status::OK(); +} + +// Constructor for RandomDataOp +RandomDataOp::RandomDataOp(int32_t num_workers, int32_t op_connector_size, int64_t rows_per_buffer, int64_t total_rows, + std::unique_ptr data_schema) + : ParallelOp(num_workers, op_connector_size), + buffer_id_(0), + rows_per_buffer_(rows_per_buffer), + total_rows_(total_rows), + epoch_buffers_sent_(0), + guys_in_(0), + guys_out_(num_workers_), + eoe_worker_id_(0), + data_schema_(std::move(data_schema)) { + rand_gen_.seed(GetSeed()); // seed the random generator + // If total rows was not given, then randomly pick a number + if (total_rows_ == 0) { + total_rows_ = GenRandomInt(1, kMaxTotalRows); + } + // Everyone is already out from the sync area. + all_out_.Set(); +} + +// A print method typically used for debugging +void RandomDataOp::Print(std::ostream &out, bool show_all) const { + // Always show the id and name as first line regardless if this summary or detailed print + out << "(" << std::setw(2) << operator_id_ << ") :"; + if (!show_all) { + // Call the super class for displaying any common 1-liner info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal 1-liner info for this op + out << " [total rows: " << total_rows_ << "]\n"; + } else { + // Call the super class for displaying any common detailed info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal stuff + out << "\nTotal_rows: " << total_rows_ << "\nRows per buffer: " << rows_per_buffer_ << "\nSchema:\n" + << *data_schema_ << "\n\n"; + } +} + +// Helper function to produce a default/random schema if one didn't exist +Status RandomDataOp::GenerateSchema() { + if (data_schema_ != nullptr) { + return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Generating a schema but one already exists!"); + } + + // To randomly create a schema, we need to choose: + // a) how many columns + // b) the type of each column + // c) the shape of each column (number of dimensions i.e. rank) + // d) the shape of each column (dimension values) + data_schema_ = std::make_unique(); + std::unique_ptr newShape; + std::unique_ptr newCol; + + // Loop over the number of chosen columns + int32_t numColumns = GenRandomInt(1, kMaxNumColumns); + for (int32_t i = 0; i < numColumns; i++) { + // For each column: + // - choose a datatype + // - generate a shape that randomly chooses the number of dimensions and the dimension values. + DataType::Type newType = static_cast(GenRandomInt(0, kMaxDataType)); + int32_t rank = GenRandomInt(1, kMaxRank); + std::vector dims; + for (int32_t d = 0; d < rank; d++) { + // 0 is not a valid dimension value. however, we can support "*" or unknown, so map the random + // 0 value to the unknown attribute if 0 is chosen + dsize_t dim_value = static_cast(GenRandomInt(0, kMaxDimValue)); + if (dim_value == 0) dim_value = TensorShape::kDimUnknown; + dims.push_back(dim_value); + } + newShape = std::make_unique(dims); + + // Create the column descriptor + std::string colName = "c" + std::to_string(i); + newCol = std::make_unique(colName, DataType(newType), TensorImpl::kFlexible, rank, newShape.get()); + + data_schema_->AddColumn(*newCol); + } + + return Status::OK(); +} + +// Class functor operator () override. +// All DatasetOps operate by launching a thread (see ExecutionTree). This class functor will +// provide the master loop that drives the logic for performing the work. +Status RandomDataOp::operator()() { + // First, compute how many buffers we'll need to satisfy the total row count. + // The only reason we do this is for the purpose of throttling worker count if needed. + int64_t buffers_needed = total_rows_ / rows_per_buffer_; + if (total_rows_ % rows_per_buffer_ != 0) { + buffers_needed++; + } + + // If the amount of workers we have exceeds the number of buffers to produce, then we'll have + // idle workers doing nothing. In that case, let's throttle the worker count. + if (num_workers_ > buffers_needed) { + MS_LOG(INFO) << "RandomDataOp throttling worker count from " << num_workers_ << "to " << buffers_needed; + num_workers_ = buffers_needed; + num_producers_ = num_workers_; + guys_out_ = num_workers_; + // The output connector was already created with a different worker count. We have to drop and recreate + // that connector. + DatasetOp::CreateConnector(num_producers_, num_workers_); + } + + // Assign the number of rows to each worker in a round robin fashion. + worker_max_rows_.reserve(num_workers_); + worker_rows_packed_.reserve(num_workers_); + // init the counts to zero to start. + for (int32_t w = 0; w < num_workers_; w++) { + worker_max_rows_.push_back(0); + worker_rows_packed_.push_back(0); + } + // then assign round robin row counts + int32_t currentWorker = 0; + for (int64_t r = 0; r < total_rows_; r++) { + worker_max_rows_[currentWorker]++; + currentWorker = (currentWorker + 1) % num_workers_; + } + + // Next, compute the total buffer count. This stat is needed during reset logic + for (int32_t w = 0; w < num_workers_; w++) { + int64_t worker_buffers = 0; + worker_buffers = worker_max_rows_[w] / rows_per_buffer_; + if (worker_max_rows_[w] % rows_per_buffer_ != 0) worker_buffers++; + epoch_buffers_sent_ += worker_buffers; + } + + // For the connector to work, we need to target the correct worker channel for the eoe. + // This will initialize it for the first one. reset() handles for the rest of the epochs. + eoe_worker_id_ = epoch_buffers_sent_ % num_workers_; + epoch_buffers_sent_++; // Add the eoe buffer to the count for subsequent epochs + + // RandomDataOp doesn't need the master thread to stay around. Kick off the workers and then master exits. + RETURN_IF_NOT_OK( + tree_->LaunchWorkers(num_workers_, std::bind(&RandomDataOp::WorkerEntry, this, std::placeholders::_1))); + + // required task group setup after launching workers + TaskManager::FindMe()->Post(); + RETURN_IF_NOT_OK(epoch_sync_wait_post_.Register(tree_->AllTasks())); + + return Status::OK(); +} + +// Performs a synchronization between workers at the end of an epoch +Status RandomDataOp::EpochSync(int32_t worker_id, bool *quitting) { + MS_LOG(INFO) << "RandomDataOp worker " << worker_id << " syncing at end of epoch"; + + // Sync on the guys_in counter + // We have to wait the last guy is out. + all_out_.Wait(); + // If we are not in a repeat loop, or that was the last repeat already, then setup our exit + // condition from the master loop. + if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) { + *quitting = true; + } + + auto prev = guys_in_.fetch_add(1); + bool last_guy_in = (prev + 1) == num_workers_; + // If we are the last worker to hit this sync point, we have some extra tasks + if (last_guy_in) { + MS_LOG(INFO) << "RandomDataOp worker " << worker_id << " is the last one to sync. eoe sent as worker " + << eoe_worker_id_; + // Prepare for sync + all_out_.Clear(); + // Always flow eoe at the end + std::unique_ptr eoe_buffer = std::make_unique(0, DataBuffer::kDeBFlagEOE); + RETURN_IF_NOT_OK(out_connector_->Add(eoe_worker_id_, std::move(eoe_buffer))); + // If we're done then also flow the eof + if (*quitting) { + // The eof needs to be sent from the next sender in the round robin, so +1 + int32_t eof_worker_id = (eoe_worker_id_ + 1) % num_workers_; + MS_LOG(INFO) << "RandomDataOp worker " << worker_id << " has no more epochs. sending eof as worker " + << eof_worker_id; + std::unique_ptr eof_buffer = std::make_unique(0, DataBuffer::kDeBFlagEOF); + RETURN_IF_NOT_OK(out_connector_->Add(eof_worker_id, std::move(eof_buffer))); + } + } + + // Wait for the reset to wake us up if we're not quitting + if (!(*quitting)) { + MS_LOG(INFO) << "RandomDataOp worker " << worker_id << " entering sync wait."; + RETURN_IF_NOT_OK(epoch_sync_wait_post_.Wait()); + prev = guys_out_.fetch_add(1); + bool last_guy_out = (prev + 1) == num_workers_; + // Last guy out will clear the wait post and set the row counts + if (last_guy_out) { + MS_LOG(INFO) << "RandomDataOp worker " << worker_id << " last guy out clearing wait post."; + epoch_sync_wait_post_.Clear(); + guys_in_ = 0; + all_out_.Set(); + } + } + + MS_LOG(INFO) << "RandomDataOp worker " << worker_id << " epoch sync complete."; + return Status::OK(); +} + +// The entry point code for when workers are launched +Status RandomDataOp::WorkerEntry(int32_t worker_id) { + MS_LOG(INFO) << "RandomDataOp worker " << worker_id << " entry"; + + // handshake with the master first to tell it we're alive + TaskManager::FindMe()->Post(); + + bool quitting = false; + std::unique_ptr new_tensor_table = nullptr; + + // Loop until the quitting variable gets set to true + do { + // If we have not yet reached the row count for this worker then produce another record + if (worker_rows_packed_[worker_id] < worker_max_rows_[worker_id]) { + TensorRow new_row; + + // Start a new tensor table if needed + if (new_tensor_table == nullptr) { + new_tensor_table = std::make_unique(); + } + + // Create the data for the row + RETURN_IF_NOT_OK(CreateRandomRow(worker_id, &new_row)); + + // Add the row to our table + new_tensor_table->push_back(std::move(new_row)); + worker_rows_packed_[worker_id]++; + + // If the tensor table is at capacity then it's time to send it to output + if (new_tensor_table->size() == rows_per_buffer_) { + RETURN_IF_NOT_OK(PackAndSend(worker_id, std::move(new_tensor_table))); + } + } else { + // We've reached the total row count for this worker, so it's time for epoch sync. + // There is likely some records built but not sent yet, so take care of those first + // (this buffer will be smaller than rows_per_buffer) + if (new_tensor_table != nullptr && new_tensor_table->size() > 0) { + RETURN_IF_NOT_OK(PackAndSend(worker_id, std::move(new_tensor_table))); + } + + // Now, let's enter the epoch sync + RETURN_IF_NOT_OK(EpochSync(worker_id, &quitting)); + } + } while (!quitting); + + MS_LOG(INFO) << "RandomDataOp worker " << worker_id << " is now quitting."; + + return Status::OK(); +} + +// A helper function to stuff the tensor table into a buffer and send it to output connector +Status RandomDataOp::PackAndSend(int32_t worker_id, std::unique_ptr in_table) { + auto new_buffer = std::make_unique(GetNextBufferId(), DataBuffer::kDeBFlagNone); + new_buffer->set_tensor_table(std::move(in_table)); + new_buffer->set_column_name_map(column_name_map_); + RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(new_buffer))); + return Status::OK(); +} + +// A helper function to create random data for the row +Status RandomDataOp::CreateRandomRow(int32_t worker_id, TensorRow *new_row) { + if (new_row == nullptr) { + return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Missing tensor row output"); + } + + // Create a tensor for each column, then add the tensor to the row + for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { + const ColDescriptor current_col = data_schema_->column(i); + std::vector current_shape = current_col.shape().AsVector(); + std::unique_ptr new_shape = nullptr; + std::unique_ptr buf = nullptr; + std::shared_ptr new_tensor = nullptr; + + // We need to resolve the shape to fill in any unknown dimensions with random + // values, then use that as our shape for this tensor. + for (int j = 0; j < current_shape.size(); ++j) { + if (current_shape[j] == TensorShape::kDimUnknown) { + current_shape[j] = static_cast(GenRandomInt(1, kMaxDimValue)); + } + } + + new_shape = std::make_unique(current_shape); + int64_t size_in_bytes = new_shape->NumOfElements() * current_col.type().SizeInBytes(); + + // Generate a random byte of data. This may cause some funny data for things like doubles,floats, bools + // however the random data op is not too concerned about the physical data itself. + std::uniform_int_distribution uniDist(0, 255); + uint8_t random_byte = uniDist(rand_gen_); + + // Now, create a chunk of memory for the entire tensor and copy this byte in repeatedly. + buf = std::make_unique(size_in_bytes); + int ret_code = memset_s(buf.get(), size_in_bytes, random_byte, size_in_bytes); + if (ret_code != 0) { + return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Failed to set random bytes for a tensor."); + } + + RETURN_IF_NOT_OK( + Tensor::CreateTensor(&new_tensor, current_col.tensorImpl(), *new_shape, current_col.type(), buf.get())); + + // Add this tensor to the tensor row for output + (*new_row).push_back(std::move(new_tensor)); + } + return Status::OK(); +} + +// Overrides base class reset method. When an operator does a reset, it cleans up any state +// info from it's previous execution and then initializes itself so that it can be executed +// again. +Status RandomDataOp::Reset() { + MS_LOG(INFO) << "RandomDataOp resetting."; + + // Ensure all guys are in the waitpost + if (guys_in_ != num_workers_) { + return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, + "Issuing a reset, but some workers are missing from epochSync!"); + } + + // reset the row counters for all workers + for (int32_t w = 0; w < num_workers_; w++) { + worker_rows_packed_[w] = 0; + worker_max_rows_[w] = 0; + } + buffer_id_ = 0; + + // Re-assign round robin row counts, starting from the worker after the one that gave + // the eoe last time + int32_t currentWorker = (eoe_worker_id_ + 1) % num_workers_; + for (int64_t r = 0; r < total_rows_; r++) { + worker_max_rows_[currentWorker]++; + currentWorker = (currentWorker + 1) % num_workers_; + } + + // Compute which worker should get the eoe for the next epoch + eoe_worker_id_ = ((epoch_buffers_sent_ % num_workers_) + eoe_worker_id_) % num_workers_; + + // Wake up the workers to get them going again in a new epoch + guys_out_ = 0; + epoch_sync_wait_post_.Set(); + + return Status::OK(); +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.h b/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.h new file mode 100644 index 0000000000..84e4c42702 --- /dev/null +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.h @@ -0,0 +1,271 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_ENGINE_DATASETOPS_SOURCE_RANDOM_DATA_OP_ +#define DATASET_ENGINE_DATASETOPS_SOURCE_RANDOM_DATA_OP_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "dataset/util/status.h" +#include "dataset/core/tensor.h" +#include "dataset/core/data_type.h" +#include "dataset/engine/data_schema.h" +#include "dataset/engine/datasetops/parallel_op.h" +#include "dataset/util/wait_post.h" + +namespace mindspore { +namespace dataset { +// The RandomDataOp is a leaf node storage operator that generates random data based +// on the schema specifications. Typically, it's used for testing and demonstrating +// various dataset operator pipelines. It is not "real" data to train with. +// The data that is random created is just random and repeated bytes, there is no +// "meaning" behind what these bytes are. +class RandomDataOp : public ParallelOp { + public: + // Some constants to provide limits to random generation. + static constexpr int32_t kMaxNumColumns = 4; + static constexpr int32_t kMaxRank = 4; + static constexpr int32_t kMaxDimValue = 2048; + static constexpr int32_t kMaxDataType = (DataType::DE_UNKNOWN - 1); + static constexpr int32_t kMaxTotalRows = 1024; + + // A nested builder class to aid in the construction of a RandomDataOp + class Builder { + public: + /** + * Builder constructor. Creates the builder object. + * @note No default args. + * @return This is a constructor. + */ + Builder(); + + /** + * Default destructor + */ + ~Builder() = default; + + /** + * The build method that produces the instantiated RandomDataOp as a shared pointer + * @param out_op - The output RandomDataOperator that was constructed + * @return Status - The error code return + */ + Status Build(std::shared_ptr *out_op); + + /** + * Builder set method + * @param data_schema - A user-provided schema + * @return Builder - The modified builder by reference + */ + Builder &SetDataSchema(std::unique_ptr data_schema) { + builder_data_schema_ = std::move(data_schema); + return *this; + } + + /** + * Builder set method + * @param num_workers - The number of workers + * @return Builder - The modified builder by reference + */ + Builder &SetNumWorkers(int32_t num_workers) { + builder_num_workers_ = num_workers; + return *this; + } + + /** + * Builder set method + * @param op_connector_size - The size of the output connector + * @return Builder - The modified builder by reference + */ + Builder &SetOpConnectorSize(int32_t op_connector_size) { + builder_op_connector_size_ = op_connector_size; + return *this; + } + + /** + * Builder set method + * @param rows_per_buffer - The number of rows in each DataBuffer + * @return Builder - The modified builder by reference + */ + Builder &SetRowsPerBuffer(int64_t rows_per_buffer) { + builder_rows_per_buffer_ = rows_per_buffer; + return *this; + } + + /** + * Builder set method + * @param total_rows - The total number of rows in the dataset + * @return Builder - The modified builder by reference + */ + Builder &SetTotalRows(int64_t total_rows) { + builder_total_rows_ = total_rows; + return *this; + } + + private: + /** + * Check if the required parameters are set by the builder. + * @return Status - The error code return + */ + Status SanityCheck() const; + + std::unique_ptr builder_data_schema_; + int32_t builder_num_workers_; + int32_t builder_op_connector_size_; + int64_t builder_rows_per_buffer_; + int64_t builder_total_rows_; + }; // class Builder + + /** + * Constructor for RandomDataOp + * @note Private constructor. Must use builder to construct. + * @param num_workers - The number of workers + * @param op_connector_size - The size of the output connector + * @param rows_per_buffer - The number of rows in each DataBuffer + * @param data_schema - A user-provided schema + * @param total_rows - The total number of rows in the dataset + * @return Builder - The modified builder by reference + */ + RandomDataOp(int32_t num_workers, int32_t op_connector_size, int64_t rows_per_buffer, int64_t total_rows, + std::unique_ptr data_schema); + + /** + * Destructor + */ + ~RandomDataOp() = default; + + /** + * A print method typically used for debugging + * @param out - The output stream to write output to + * @param show_all - A bool to control if you want to show all info or just a summary + */ + void Print(std::ostream &out, bool show_all) const override; + + /** + * << Stream output operator overload + * @notes This allows you to write the debug print info using stream operators + * @param out - reference to the output stream being overloaded + * @param so - reference to the ShuffleOp to display + * @return - the output stream must be returned + */ + friend std::ostream &operator<<(std::ostream &out, const RandomDataOp &op) { + op.Print(out, false); + return out; + } + + /** + * Class functor operator () override. + * All DatasetOps operate by launching a thread (see ExecutionTree). This class functor will + * provide the master loop that drives the logic for performing the work. + * @return Status - The error code return + */ + Status operator()() override; + + /** + * Overrides base class reset method. When an operator does a reset, it cleans up any state + * info from it's previous execution and then initializes itself so that it can be executed + * again. + * @return Status - The error code return + */ + Status Reset() override; + + /** + * Quick getter for total rows. + */ + int64_t GetTotalRows() const { return total_rows_; } + + private: + /** + * The entry point code for when workers are launched + * @param worker_id - The worker id + * @return Status - The error code return + */ + Status WorkerEntry(int32_t worker_id) override; + + /** + * Helper function to produce a default/random schema if one didn't exist + @return Status - The error code return + */ + Status GenerateSchema(); + + /** + * Performs a synchronization between workers at the end of an epoch + * @param worker_id - The worker id + * @return Status - The error code return + */ + Status EpochSync(int32_t worker_id, bool *quitting); + + /** + * A helper function to stuff the tensor table into a buffer and send it to output connector + * @param worker_id - The worker id + * @param in_table - The tensor table to pack and send + * @return Status - The error code return + */ + Status PackAndSend(int32_t worker_id, std::unique_ptr in_table); + + /** + * A helper function to create random data for the row + * @param worker_id - The worker id + * @param new_row - The output row to produce + * @return Status - The error code return + */ + Status CreateRandomRow(int32_t worker_id, TensorRow *new_row); + + /** + * A quick inline for producing a random number between (and including) min/max + * @param min - minimum number that can be generated + * @param max - maximum number that can be generated + * @return - The generated random number + */ + inline int32_t GenRandomInt(int32_t min, int32_t max) { + std::uniform_int_distribution uniDist(min, max); + return uniDist(rand_gen_); + } + + /** + * A quick inline for producing the next buffer id in sequence, threadsafe + * @return - The next buffer id. + */ + inline int32_t GetNextBufferId() { + std::unique_lock lock(buffer_id_mutex_); + return ++buffer_id_; + } + + int32_t buffer_id_; + int64_t rows_per_buffer_; + int64_t total_rows_; + int64_t epoch_buffers_sent_; + std::atomic guys_in_; + std::atomic guys_out_; + int32_t eoe_worker_id_; + std::unique_ptr data_schema_; + std::vector worker_max_rows_; + std::vector worker_rows_packed_; + std::unordered_map column_name_map_; + std::mt19937 rand_gen_; + WaitPost epoch_sync_wait_post_; + WaitPost all_out_; + std::mutex buffer_id_mutex_; +}; // class RandomDataOp +} // namespace dataset +} // namespace mindspore + +#endif // DATASET_ENGINE_DATASETOPS_SOURCE_RANDOM_DATA_OP_ diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc index de8cde409f..e0efda6e53 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc @@ -53,6 +53,7 @@ Status RandomSampler::InitSampler() { num_samples_ = (user_num_samples_ < num_samples_) ? user_num_samples_ : num_samples_; CHECK_FAIL_RETURN_UNEXPECTED(num_samples_ > 0 && num_rows_ > 0, "both num_samples & num_rows need to be positive"); samples_per_buffer_ = samples_per_buffer_ > num_samples_ ? num_samples_ : samples_per_buffer_; + rnd_.seed(seed_++); if (replacement_ == false) { shuffled_ids_.reserve(num_rows_); for (int64_t i = 0; i < num_rows_; i++) { @@ -62,7 +63,6 @@ Status RandomSampler::InitSampler() { } else { dist = std::make_unique>(0, num_rows_ - 1); } - rnd_.seed(seed_++); return Status::OK(); } diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/storage_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/storage_op.cc index 2ca957ae6d..f310a097ee 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/storage_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/storage_op.cc @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -319,31 +320,18 @@ StorageOp::~StorageOp() {} // A print method typically used for debugging void StorageOp::Print(std::ostream &out, bool show_all) const { - // Call base class printer first - ParallelOp::Print(out, show_all); - - // Then display our own stuff - out << "\nStorageOp:"; - out << "\n Dataset files dir : " << dataset_files_dir_ << "\n Dataset schema file : " << schema_file_; - if (!dataset_file_list_.empty()) { - out << "\n Dataset Files List:\n"; - for (auto filename : dataset_file_list_) { - out << " " << filename << "\n"; - } - } - out << "\n\n"; - if (!data_buffers_.empty()) { - out << std::boolalpha << " Number of DataBuffers inside StorageOp: " << data_buffers_.size() - << "\n Number of rows: " << num_rows_ << "\n Rows per buffer: " << rows_per_buffer_ << "\n\n DataBuffers:\n"; - - // Iterate over each DataBuffer and display the buffer id and the buffer - int32_t i = 0; - for (i = 0; i < data_buffers_.size(); i++) { - out << " " << i << ")\n"; - data_buffers_[i]->Print(out, show_all); - } + // Always show the id and name as first line regardless if this summary or detailed print + out << "(" << std::setw(2) << operator_id_ << ") :"; + if (!show_all) { + // Call the super class for displaying any common 1-liner info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal 1-liner info for this op + out << "\n"; } else { - out << "DataCache is empty!\n"; + // Call the super class for displaying any common detailed info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal stuff + out << "\nDetailed operator printing has not been implemented for this op.\n\n"; } } diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc index 2b62616366..16f2e29824 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -90,6 +91,30 @@ TextFileOp::TextFileOp(int32_t num_workers, int64_t rows_per_buffer, int64_t num worker_connector_size_ = worker_connector_size; } +// A print method typically used for debugging +void TextFileOp::Print(std::ostream &out, bool show_all) const { + // Always show the id and name as first line regardless if this summary or detailed print + out << "(" << std::setw(2) << operator_id_ << ") :"; + if (!show_all) { + // Call the super class for displaying any common 1-liner info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal 1-liner info for this op + out << "\n"; + } else { + // Call the super class for displaying any common detailed info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal stuff + out << "\nRows per buffer: " << rows_per_buffer_ << "\nSample count: " << num_samples_ + << "\nDevice id: " << device_id_ << "\nNumber of devices: " << num_devices_ + << "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no") << "\nText files list:\n"; + for (int i = 0; i < text_files_list_.size(); ++i) { + out << " " << text_files_list_[i]; + } + out << "\nData Schema:\n"; + out << *data_schema_ << "\n\n"; + } +} + Status TextFileOp::Init() { RETURN_IF_NOT_OK(filename_index_->insert(text_files_list_)); @@ -143,6 +168,9 @@ Status TextFileOp::LoadFile(const std::string &file, const int64_t start_offset, std::unique_ptr tensor_table = std::make_unique(); while (getline(handle, line)) { + if (line.empty()) { + continue; + } // If read to the end offset of this file, break. if (rows_total >= end_offset) { break; @@ -367,7 +395,7 @@ Status TextFileOp::operator()() { // must be called after launching workers. TaskManager::FindMe()->Post(); - io_block_queue_wait_post_.Register(tree_->AllTasks()); + RETURN_IF_NOT_OK(io_block_queue_wait_post_.Register(tree_->AllTasks())); NotifyToFillIOBlockQueue(); while (!finished_reading_dataset_) { int64_t buffer_id = 0; @@ -425,7 +453,9 @@ int64_t TextFileOp::CountTotalRows(const std::string &file) { std::string line; int64_t count = 0; while (getline(handle, line)) { - count++; + if (!line.empty()) { + count++; + } } return count; @@ -438,7 +468,9 @@ Status TextFileOp::CalculateNumRowsPerShard() { all_num_rows_ += count; } if (all_num_rows_ == 0) { - RETURN_STATUS_UNEXPECTED("Number of rows can not be zero"); + RETURN_STATUS_UNEXPECTED( + "There is no valid data matching the dataset API TextFileDataset.Please check file path or dataset API " + "validation first."); } num_rows_per_shard_ = static_cast(std::ceil(all_num_rows_ * 1.0 / num_devices_)); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.h b/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.h index 49f224ffc3..305b2596fa 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.h @@ -144,6 +144,11 @@ class TextFileOp : public ParallelOp { // Default destructor ~TextFileOp() = default; + // A print method typically used for debugging + // @param out - The output stream to write output to + // @param show_all - A bool to control if you want to show all info or just a summary + void Print(std::ostream &out, bool show_all) const override; + // Instantiates the internal queues and connectors // @return Status - the error code returned Status Init(); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_buffer.h b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_buffer.h index 5745ff8071..389f4a76d9 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_buffer.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_buffer.h @@ -21,7 +21,7 @@ #include #include #include "dataset/engine/data_buffer.h" -#include "./example.pb.h" +#include "proto/example.pb.h" #include "dataset/engine/datasetops/source/tf_client.h" namespace mindspore { diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_client.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_client.cc index d41ff121af..b6e68aafb9 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_client.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_client.cc @@ -24,7 +24,7 @@ #include #include "common/utils.h" -#include "./example.pb.h" +#include "proto/example.pb.h" #include "dataset/engine/datasetops/source/storage_client.h" #include "dataset/util/path.h" #include "dataset/util/status.h" diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_client.h b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_client.h index 6ff76e202a..3602f93351 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_client.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_client.h @@ -25,7 +25,7 @@ #include #include #include -#include "./example.pb.h" +#include "proto/example.pb.h" #include "dataset/engine/datasetops/source/storage_client.h" #include "dataset/util/status.h" diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc index 6132f628d7..a2985b7656 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc @@ -18,12 +18,13 @@ #include #include #include +#include #include #include #include #include -#include "./example.pb.h" +#include "proto/example.pb.h" #include "./securec.h" #include "common/utils.h" #include "dataset/core/config_manager.h" @@ -155,6 +156,36 @@ TFReaderOp::TFReaderOp(int32_t num_workers, int32_t worker_connector_size, int64 worker_connector_size_ = worker_connector_size; } +// A print method typically used for debugging +void TFReaderOp::Print(std::ostream &out, bool show_all) const { + // Always show the id and name as first line regardless if this summary or detailed print + out << "(" << std::setw(2) << operator_id_ << ") :"; + if (!show_all) { + // Call the super class for displaying any common 1-liner info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal 1-liner info for this op + out << "\n"; + } else { + // Call the super class for displaying any common detailed info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal stuff + out << "\nRows per buffer: " << rows_per_buffer_ << "\nTotal rows: " << total_rows_ << "\nDevice id: " << device_id_ + << "\nNumber of devices: " << num_devices_ << "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no") + << "\nDataset files list:\n"; + for (int i = 0; i < dataset_files_list_.size(); ++i) { + out << " " << dataset_files_list_[i]; + } + if (!columns_to_load_.empty()) { + out << "\nColumns to load:\n"; + for (int i = 0; i < columns_to_load_.size(); ++i) { + out << " " << columns_to_load_[i]; + } + } + out << "\nData Schema:\n"; + out << *data_schema_ << "\n\n"; + } +} + Status TFReaderOp::Init() { if (data_schema_->Empty()) { RETURN_IF_NOT_OK(CreateSchema(dataset_files_list_[0], columns_to_load_)); @@ -198,7 +229,9 @@ Status TFReaderOp::CalculateNumRowsPerShard() { } num_rows_per_shard_ = static_cast(std::ceil(num_rows_ * 1.0 / num_devices_)); if (num_rows_per_shard_ == 0) { - RETURN_STATUS_UNEXPECTED("Number of rows can not be zero"); + RETURN_STATUS_UNEXPECTED( + "There is no valid data matching the dataset API TFRecordDataset.Please check file path or dataset API " + "validation first."); } return Status::OK(); } @@ -220,7 +253,7 @@ Status TFReaderOp::operator()() { // so workers have to be kept alive until the end of the program TaskManager::FindMe()->Post(); - io_block_queue_wait_post_.Register(tree_->AllTasks()); + RETURN_IF_NOT_OK(io_block_queue_wait_post_.Register(tree_->AllTasks())); NotifyToFillIOBlockQueue(); while (!finished_reading_dataset_) { diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.h b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.h index 560cff114f..f0f08c7971 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.h @@ -188,6 +188,11 @@ class TFReaderOp : public ParallelOp { // Default destructor ~TFReaderOp() = default; + // A print method typically used for debugging + // @param out - The output stream to write output to + // @param show_all - A bool to control if you want to show all info or just a summary + void Print(std::ostream &out, bool show_all) const override; + // Instantiates the internal queues and connectors. // @return Status - the error code returned. Status Init(); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc index 1731ed14ba..c8fc6edbf9 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc @@ -16,7 +16,7 @@ #include "dataset/engine/datasetops/source/voc_op.h" #include - +#include #include "common/utils.h" #include "dataset/core/config_manager.h" #include "dataset/core/tensor_shape.h" @@ -133,9 +133,19 @@ Status VOCOp::operator()() { } void VOCOp::Print(std::ostream &out, bool show_all) const { - DatasetOp::Print(out, show_all); - out << "\nnumber of parallel workers:" << num_workers_ << "\nNumber of rows:" << num_rows_ - << "\nVOC Directory: " << folder_path_ << "\n-------------------\n"; + // Always show the id and name as first line regardless if this summary or detailed print + out << "(" << std::setw(2) << operator_id_ << ") :"; + if (!show_all) { + // Call the super class for displaying any common 1-liner info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal 1-liner info for this op + out << "\n"; + } else { + // Call the super class for displaying any common detailed info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal stuff + out << "\nNumber of rows: " << num_rows_ << "\nVOC Directory: " << folder_path_ << "\n\n"; + } } Status VOCOp::Reset() { @@ -147,7 +157,9 @@ Status VOCOp::Reset() { Status VOCOp::GetNumSamples(int64_t *num) const { if (num == nullptr || num_rows_ == 0) { - RETURN_STATUS_UNEXPECTED("NumRow not set"); + RETURN_STATUS_UNEXPECTED( + "There is no valid data matching the dataset API VOCDataset.Please check file path or dataset API " + "validation first."); } (*num) = num_samples_; return Status::OK(); @@ -229,7 +241,7 @@ Status VOCOp::LaunchThreadsAndInitOp() { RETURN_STATUS_UNEXPECTED("tree_ not set"); } RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks())); - wp_.Register(tree_->AllTasks()); + RETURN_IF_NOT_OK(wp_.Register(tree_->AllTasks())); RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_, std::bind(&VOCOp::WorkerEntry, this, std::placeholders::_1))); TaskManager::FindMe()->Post(); RETURN_IF_NOT_OK(this->ParseImageIds()); @@ -261,7 +273,9 @@ Status VOCOp::ReadImageToTensor(const std::string &path, const ColDescriptor &co // Derived from RandomAccessOp Status VOCOp::GetNumRowsInDataset(int64_t *num) const { if (num == nullptr || num_rows_ == 0) { - RETURN_STATUS_UNEXPECTED("NumRow not set"); + RETURN_STATUS_UNEXPECTED( + "There is no valid data matching the dataset API VOCDataset.Please check file path or dataset API " + "validation first."); } (*num) = num_rows_; return Status::OK(); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/take_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/take_op.cc index 5d7df58153..7e6055027e 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/take_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/take_op.cc @@ -13,10 +13,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - +#include #include #include "common/utils.h" +#include "dataset/core/config_manager.h" #include "dataset/engine/data_buffer.h" #include "dataset/engine/datasetops/take_op.h" #include "dataset/engine/db_connector.h" @@ -25,7 +26,10 @@ namespace mindspore { namespace dataset { // Builder constructor. Creates the builder object. -TakeOp::Builder::Builder(int32_t count) : build_max_takes_(count) {} +TakeOp::Builder::Builder(int32_t count) : build_max_takes_(count) { + std::shared_ptr cfg = GlobalContext::config_manager(); + builder_op_connector_size_ = cfg->op_connector_size(); +} Status TakeOp::Builder::SanityCheck() const { if (build_max_takes_ <= 0) { @@ -38,76 +42,66 @@ Status TakeOp::Builder::SanityCheck() const { // The builder "build" method creates the final object. Status TakeOp::Builder::Build(std::shared_ptr *ptr) { RETURN_IF_NOT_OK(SanityCheck()); - *ptr = std::make_shared(build_max_takes_); + *ptr = std::make_shared(build_max_takes_, builder_op_connector_size_); return Status::OK(); } // Constructor of the TakeOp. -TakeOp::TakeOp(int32_t count) : PipelineOp(0), max_takes_(count), take_count_(0) {} +TakeOp::TakeOp(int32_t count, int32_t op_connector_size) + : PipelineOp(op_connector_size), max_takes_(count), take_count_(0) {} // A print method typically used for debugging void TakeOp::Print(std::ostream &out, bool show_all) const { - // Call base class printer first - PipelineOp::Print(out, show_all); - - // Then display our own stuff - out << "TakeOp:" - << "\nCurrent take count: " << take_count_ << "\nMax take count: " << max_takes_; -} - -// This function will be call muti times to returns the buffer, when meet required max take count or meet -// EOF buffer then this will stop. -Status TakeOp::GetNextBuffer(std::unique_ptr *p_buffer, int32_t worker_id, bool retry_if_eoe) { - if (child_.empty()) { - RETURN_STATUS_UNEXPECTED("TakeOp can't be the leaf node."); + // Always show the id and name as first line regardless if this summary or detailed print + out << "(" << std::setw(2) << operator_id_ << ") :"; + if (!show_all) { + // Call the super class for displaying any common 1-liner info + PipelineOp::Print(out, show_all); + // Then show any custom derived-internal 1-liner info for this op + out << " [takes: " << max_takes_ << "]\n"; + } else { + // Call the super class for displaying any common detailed info + PipelineOp::Print(out, show_all); + // Then show any custom derived-internal stuff + out << "\nTake count: " << take_count_ << "\nMax takes: " << max_takes_ << "\n\n"; } +} +// Main entry point for Take +Status TakeOp::operator()() { + TaskManager::FindMe()->Post(); std::unique_ptr buf; + RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&buf)); - bool last_repeat = !BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat); - if (take_count_ == max_takes_) { - if (state_ == OpState::kDeOpRunning) { - MS_LOG(DEBUG) << "Meet max count and push-back eoe buffer."; - auto eoe_buffer = std::make_unique(0, DataBuffer::kDeBFlagEOE); - *p_buffer = std::move(eoe_buffer); - state_ = OpState::kDeOpIdle; - - // Reset the count and drain - if (!last_repeat) { - take_count_ = 0; - RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&buf, worker_id, true)); - while (!buf->eoe() && !buf->eof()) { - RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&buf, worker_id, true)); - } + while (buf->eof() == false) { + if (take_count_ == max_takes_) { + // Do drain Operation + while (!buf->eoe() && !buf->eof()) { + RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&buf)); } - } else if (state_ == OpState::kDeOpIdle) { - MS_LOG(DEBUG) << "Meet max count and push-back eof buffer."; - auto eof_buffer = std::make_unique(0, DataBuffer::kDeBFlagEOF); - *p_buffer = std::move(eof_buffer); + } + + // Loop until non EOE is received + if (buf->eoe()) { take_count_ = 0; - } else { - MS_LOG(WARNING) << "Invalid OpState: " << state_; + RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(buf))); + RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&buf)); + continue; } - return Status::OK(); - } - RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&buf, worker_id, true)); - // Loop until non EOE is received - if (buf->eoe()) { - take_count_ = 0; - *p_buffer = std::move(buf); - return Status::OK(); - } - // Check if the last buf is next eof - if (buf->eof()) { - *p_buffer = std::move(buf); - return Status::OK(); + // Get buffer and push back when take_count is still small + if (take_count_ < max_takes_) { + std::unique_ptr p_buffer; + RETURN_IF_NOT_OK(FillBuffer(&buf, &p_buffer)); + RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(p_buffer))); + } + RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&buf)); } - // Get buffer and push back when take_count is still small - if (take_count_ < max_takes_) { - RETURN_IF_NOT_OK(FillBuffer(&buf, p_buffer)); - } + take_count_ = 0; + MS_LOG(DEBUG) << "Meet the end and push-back eof buffer."; + auto eof_buffer = std::make_unique(0, DataBuffer::kDeBFlagEOF); + RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eof_buffer))); return Status::OK(); } @@ -132,13 +126,6 @@ Status TakeOp::FillBuffer(std::unique_ptr *buffer, std::unique_ptrAddToRepeatStack(shared_from_this()); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/take_op.h b/mindspore/ccsrc/dataset/engine/datasetops/take_op.h index 02218cf610..f70a1e91a3 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/take_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/take_op.h @@ -45,6 +45,7 @@ class TakeOp : public PipelineOp { private: int32_t build_max_takes_; + int32_t builder_op_connector_size_; Status SanityCheck() const; }; @@ -52,7 +53,7 @@ class TakeOp : public PipelineOp { // Constructor of the TakeOp. // @note The builder class should be used to call it // @param count - The number of takes to do - explicit TakeOp(int32_t count); + explicit TakeOp(int32_t count, int32_t op_connector_size); // Destructor ~TakeOp() = default; @@ -72,23 +73,11 @@ class TakeOp : public PipelineOp { return out; } - // Class functor operator () override. - // Most dataset ops operate by launching a thread (see ExecutionTree). - // However, the TakeOp is defined as a inlined operator, so it is invalid to launch the - // functor since this op runs inlined inside another operator. The function is overloaded to - // ensure that it is not called by mistake (it will generate an error). + // All dataset ops operate by launching a thread (see ExecutionTree). This class functor will + // provide the master loop that drives the logic for performing the work // @return Status - The error code return Status operator()() override; - // Gets a buffer from the child node. The caller is typically our parent node. - // @note This function sets the `retryIfEoe` flag when popping from the child connector. This way, - // this function will retry to pop the connector again and will get the non-EOE buffer if any. - // @param p_buffer - output pointer to the buffer that it will fetch. - // @param worker_id - The worker id - // @param retry_if_eoe Set this flag to true to allow calling pop() again after the first pop() returns EOE. - // @return Status - The error code return - Status GetNextBuffer(std::unique_ptr *p_buffer, int32_t worker_id, bool retry_if_eoe) override; - // During tree prepare phase, operators may have specific post-operations to perform depending on // their role. // @notes Derived versions of this function should always call it's superclass version first diff --git a/mindspore/ccsrc/dataset/engine/datasetops/zip_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/zip_op.cc index ec771740c1..bb8bddcc09 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/zip_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/zip_op.cc @@ -15,6 +15,7 @@ */ #include "dataset/engine/datasetops/zip_op.h" #include +#include #include "dataset/core/constants.h" #include "dataset/engine/data_buffer.h" #include "dataset/engine/db_connector.h" @@ -224,10 +225,19 @@ Status ZipOp::drainPipeline() { // A function that prints info about the Operator void ZipOp::Print(std::ostream &out, // In: The output stream to print to bool show_all) const { // In: T/F if it should print everything - // Call base class printer first - PipelineOp::Print(out, show_all); - out << "\nZipOp:\n" - << "\nDatasets: " << children_num_ << "\n\n"; + // Always show the id and name as first line regardless if this is summary or detailed print + out << "(" << std::setw(2) << operator_id_ << ") :"; + if (!show_all) { + // Call the super class for displaying any common 1-liner info + PipelineOp::Print(out, show_all); + // Then show any custom derived-internal 1-liner info for this op + out << "\n"; + } else { + // Call the super class for displaying any common detailed info + PipelineOp::Print(out, show_all); + // Then show any custom derived-internal stuff + out << "\nDatasets: " << children_num_ << "\n\n"; + } } // overwrite function and handle eof diff --git a/mindspore/ccsrc/dataset/engine/execution_tree.cc b/mindspore/ccsrc/dataset/engine/execution_tree.cc index ebfa532195..dbcc201d48 100644 --- a/mindspore/ccsrc/dataset/engine/execution_tree.cc +++ b/mindspore/ccsrc/dataset/engine/execution_tree.cc @@ -81,13 +81,29 @@ Status ExecutionTree::AssignRoot(const std::shared_ptr &op) { } // A print method typically used for debugging -void ExecutionTree::Print(std::ostream &out, bool show_all) const { - out << "Total number of nodes in the ExecutionTree (may or may not be connected nodes): " << id_count_ - << "\nTree state: " << static_cast(tree_state_) << "\n"; - if (root_ != nullptr) { - // Just call the printer on the root node. Each node descends to it's children to print them if - // showAll is true. - root_->Print(out, show_all); +void ExecutionTree::Print(std::ostream &out) const { + out << "Execution tree summary:\n" + << "-----------------------\n"; + this->PrintNode(out, root_, "", true, false); + out << "\nExecution tree operator details:\n" + << "--------------------------------\n"; + this->PrintNode(out, root_, "", true, true); +} + +// A helper functions for doing the recursive printing +void ExecutionTree::PrintNode(std::ostream &out, const std::shared_ptr &dataset_op, std::string indent, + bool last, bool detailed) const { + // Decide which printer to use based on detailed arg. + if (!detailed) { + out << indent << "+- " << *dataset_op; + indent += (last ? " " : "| "); + } else { + dataset_op->Print(out, detailed); + } + + // Descend to children + for (int32_t i = 0; i < dataset_op->child_.size(); ++i) { + this->PrintNode(out, dataset_op->child_[i], indent, (i == (dataset_op->child_.size() - 1)), detailed); } } @@ -100,6 +116,9 @@ Status ExecutionTree::Launch() { " Expected state: " + std::to_string(static_cast(kDeTStateReady)); RETURN_STATUS_UNEXPECTED(err_msg); } + std::ostringstream ss; + ss << *this; + MS_LOG(INFO) << "Printing the tree before launch tasks:\n" << ss.str(); for (auto itr = this->begin(); itr != this->end(); ++itr) { // An inlined operator is one that has an output connector size of 0, and it does not // require a thread to execute. Instead, the work of this operator is executed inlined diff --git a/mindspore/ccsrc/dataset/engine/execution_tree.h b/mindspore/ccsrc/dataset/engine/execution_tree.h index 0f6cdfc165..838eb3a014 100644 --- a/mindspore/ccsrc/dataset/engine/execution_tree.h +++ b/mindspore/ccsrc/dataset/engine/execution_tree.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "dataset/engine/datasetops/dataset_op.h" #include "dataset/util/status.h" @@ -114,8 +115,7 @@ class ExecutionTree { // A print method typically used for debugging // @param out - The output stream to write output to - // @param show_all - A bool to control if you want to show all info or just a summary - void Print(std::ostream &out, bool show_all) const; + void Print(std::ostream &out) const; // Returns an iterator positioned at the start // @return Iterator - The iterator @@ -133,7 +133,7 @@ class ExecutionTree { // @param exe_tree - reference to the execution tree to display // @return - the output stream must be returned friend std::ostream &operator<<(std::ostream &out, ExecutionTree &exe_tree) { - exe_tree.Print(out, false); + exe_tree.Print(out); return out; } @@ -178,6 +178,14 @@ class ExecutionTree { TaskGroup *AllTasks() const { return tg_.get(); } private: + // A helper functions for doing the recursive printing + // @param dataset_op - The dataset op to print + // @param indent - an indent string for aligning child levels in output + // @param last - an indicator if it's the last child or not + // @param detailed - should it display the detailed node output or the summary line + void PrintNode(std::ostream &out, const std::shared_ptr &dataset_op, std::string indent, bool last, + bool detailed) const; + std::unique_ptr tg_; // Class for worker management std::shared_ptr root_; // The root node of the tree int32_t id_count_; // Counter for generating operator id's diff --git a/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.cc b/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.cc index 5725c10908..cbc5aaa2e5 100644 --- a/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.cc +++ b/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.cc @@ -25,51 +25,41 @@ UniformAugOp::UniformAugOp(py::list op_list, int32_t num_ops) : num_ops_(num_ops std::shared_ptr tensor_op; // iterate over the op list, cast them to TensorOp and add them to tensor_op_list_ for (auto op : op_list) { - if (py::isinstance(op)) { - // python op - tensor_op = std::make_shared(op.cast()); - } else if (py::isinstance(op)) { - // C++ op - tensor_op = op.cast>(); - } + // only C++ op is accepted + tensor_op = op.cast>(); tensor_op_list_.insert(tensor_op_list_.begin(), tensor_op); } rnd_.seed(GetSeed()); } + // compute method to apply uniformly random selected augmentations from a list Status UniformAugOp::Compute(const std::vector> &input, std::vector> *output) { IO_CHECK_VECTOR(input, output); - // variables to generate random number to select ops from the list - std::vector random_indexes; - // variables to copy the result to output if it is not already std::vector> even_out; std::vector> *even_out_ptr = &even_out; int count = 1; - // select random indexes for candidates to be applied - for (int i = 0; i < num_ops_; ++i) { - random_indexes.insert(random_indexes.end(), - std::uniform_int_distribution(0, tensor_op_list_.size() - 1)(rnd_)); - } + // randomly select ops to be applied + std::vector> selected_tensor_ops; + std::sample(tensor_op_list_.begin(), tensor_op_list_.end(), std::back_inserter(selected_tensor_ops), num_ops_, rnd_); - for (auto it = random_indexes.begin(); it != random_indexes.end(); ++it) { + for (auto tensor_op = selected_tensor_ops.begin(); tensor_op != selected_tensor_ops.end(); ++tensor_op) { // Do NOT apply the op, if second random generator returned zero if (std::uniform_int_distribution(0, 1)(rnd_)) { continue; } - std::shared_ptr tensor_op = tensor_op_list_[*it]; - // apply python/C++ op + // apply C++ ops (note: python OPs are not accepted) if (count == 1) { - (*tensor_op).Compute(input, output); + (**tensor_op).Compute(input, output); } else if (count % 2 == 0) { - (*tensor_op).Compute(*output, even_out_ptr); + (**tensor_op).Compute(*output, even_out_ptr); } else { - (*tensor_op).Compute(even_out, output); + (**tensor_op).Compute(even_out, output); } count++; } diff --git a/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.h b/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.h index 336bc8c859..a70edc2777 100644 --- a/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.h +++ b/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.h @@ -36,7 +36,7 @@ class UniformAugOp : public TensorOp { static const int kDefNumOps; // Constructor for UniformAugOp - // @param list op_list: list of candidate python operations + // @param list op_list: list of candidate C++ operations // @param list num_ops: number of augemtation operations to applied UniformAugOp(py::list op_list, int32_t num_ops); diff --git a/mindspore/ccsrc/dataset/util/CMakeLists.txt b/mindspore/ccsrc/dataset/util/CMakeLists.txt index ff14d772ca..9ae93618ab 100644 --- a/mindspore/ccsrc/dataset/util/CMakeLists.txt +++ b/mindspore/ccsrc/dataset/util/CMakeLists.txt @@ -3,7 +3,6 @@ add_library(utils OBJECT circular_pool.cc memory_pool.cc cond_var.cc - semaphore.cc intrp_service.cc task.cc task_manager.cc diff --git a/mindspore/ccsrc/dataset/util/semaphore.cc b/mindspore/ccsrc/dataset/util/semaphore.cc deleted file mode 100644 index 983c387df5..0000000000 --- a/mindspore/ccsrc/dataset/util/semaphore.cc +++ /dev/null @@ -1,38 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "dataset/util/semaphore.h" -#include "dataset/util/task_manager.h" - -namespace mindspore { -namespace dataset { -Status Semaphore::P() { - std::unique_lock lck(mutex_); - return (wait_cond_.Wait(&lck, [this]() { return value_ != 0; })); -} - -void Semaphore::V() { - std::unique_lock lck(mutex_); - ++value_; - wait_cond_.NotifyOne(); -} - -void Semaphore::Register(TaskGroup *vg) { (void)wait_cond_.Register(vg->GetIntrpService()); } - -Status Semaphore::Deregister() { return (wait_cond_.Deregister()); } - -void Semaphore::ResetIntrpState() { wait_cond_.ResetIntrpState(); } -} // namespace dataset -} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/util/task_manager.cc b/mindspore/ccsrc/dataset/util/task_manager.cc index a9f509385e..06340e90ea 100644 --- a/mindspore/ccsrc/dataset/util/task_manager.cc +++ b/mindspore/ccsrc/dataset/util/task_manager.cc @@ -53,7 +53,7 @@ Status TaskManager::CreateAsyncTask(const std::string &my_name, const std::funct LockGuard lck(&tg_lock_); this->grp_list_.insert(vg); } - (*task)->wp_.Register(vg); + RETURN_IF_NOT_OK((*task)->wp_.Register(vg)); RETURN_IF_NOT_OK((*task)->Run()); // Wait for the thread to initialize successfully. RETURN_IF_NOT_OK((*task)->Wait()); diff --git a/mindspore/ccsrc/dataset/util/wait_post.cc b/mindspore/ccsrc/dataset/util/wait_post.cc index 99ee0cb77f..204f203d9a 100644 --- a/mindspore/ccsrc/dataset/util/wait_post.cc +++ b/mindspore/ccsrc/dataset/util/wait_post.cc @@ -36,7 +36,7 @@ void WaitPost::Clear() { value_ = 0; } -void WaitPost::Register(TaskGroup *vg) { (void)wait_cond_.Register(vg->GetIntrpService()); } +Status WaitPost::Register(TaskGroup *vg) { return wait_cond_.Register(vg->GetIntrpService()); } void WaitPost::ResetIntrpState() { wait_cond_.ResetIntrpState(); } diff --git a/mindspore/ccsrc/dataset/util/wait_post.h b/mindspore/ccsrc/dataset/util/wait_post.h index bac43f7a4e..4e60995bd9 100644 --- a/mindspore/ccsrc/dataset/util/wait_post.h +++ b/mindspore/ccsrc/dataset/util/wait_post.h @@ -36,7 +36,7 @@ class WaitPost { void Clear(); - void Register(TaskGroup *vg); + Status Register(TaskGroup *vg); Status Deregister(); diff --git a/mindspore/ccsrc/debug/CMakeLists.txt b/mindspore/ccsrc/debug/CMakeLists.txt index a88745b864..c8c50dd471 100644 --- a/mindspore/ccsrc/debug/CMakeLists.txt +++ b/mindspore/ccsrc/debug/CMakeLists.txt @@ -1,12 +1,16 @@ - set(_DEBUG_SRC_LIST - "${CMAKE_CURRENT_SOURCE_DIR}/anf_ir_dump.cc" - "${CMAKE_CURRENT_SOURCE_DIR}/anf_ir_utils.cc" - "${CMAKE_CURRENT_SOURCE_DIR}/draw.cc" - "${CMAKE_CURRENT_SOURCE_DIR}/dump_proto.cc") + "${CMAKE_CURRENT_SOURCE_DIR}/anf_ir_dump.cc" + "${CMAKE_CURRENT_SOURCE_DIR}/anf_ir_utils.cc" + "${CMAKE_CURRENT_SOURCE_DIR}/draw.cc" + "${CMAKE_CURRENT_SOURCE_DIR}/dump_proto.cc" + "${CMAKE_CURRENT_SOURCE_DIR}/info.cc" + "${CMAKE_CURRENT_SOURCE_DIR}/label.cc" + "${CMAKE_CURRENT_SOURCE_DIR}/trace_info.cc" + "${CMAKE_CURRENT_SOURCE_DIR}/trace.cc" +) -if(ENABLE_DUMP_E2E) +if (ENABLE_DUMP_E2E) list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/e2e_dump.cc") -endif(ENABLE_DUMP_E2E) +endif (ENABLE_DUMP_E2E) -add_library(_mindspore_debug_obj OBJECT ${_DEBUG_SRC_LIST}) \ No newline at end of file +add_library(_mindspore_debug_obj OBJECT ${_DEBUG_SRC_LIST}) diff --git a/mindspore/ccsrc/debug/anf_ir_dump.cc b/mindspore/ccsrc/debug/anf_ir_dump.cc index e977084ab8..1fd3096e7c 100644 --- a/mindspore/ccsrc/debug/anf_ir_dump.cc +++ b/mindspore/ccsrc/debug/anf_ir_dump.cc @@ -91,6 +91,14 @@ void PrintNodeInputType(std::ostringstream &buffer, const AnfNodePtr &nd) { } } +void PrintInputAndOutputInferType(std::ostringstream &buffer, const AnfNodePtr &nd) { + buffer << " : ("; + PrintNodeInputType(buffer, nd); + buffer << ") -> ("; + PrintNodeOutputType(buffer, nd); + buffer << ")"; +} + struct SubGraphIRInfo { int32_t local_var; std::ostringstream buffer; diff --git a/mindspore/ccsrc/debug/anf_ir_dump.h b/mindspore/ccsrc/debug/anf_ir_dump.h index a53888348d..9fa447046f 100644 --- a/mindspore/ccsrc/debug/anf_ir_dump.h +++ b/mindspore/ccsrc/debug/anf_ir_dump.h @@ -18,12 +18,14 @@ #include #include +#include "ir/dtype/type.h" #include "ir/anf.h" namespace mindspore { constexpr char PARALLEL_STRATEGY[] = "strategy"; void DumpIR(const std::string &filename, const FuncGraphPtr &func_graph, bool dump_full_name = false); - +void PrintInputAndOutputInferType(std::ostringstream &buffer, const AnfNodePtr &nd); +const std::string ToShortString(const TypeId &typeId); } // namespace mindspore #endif // MINDSPORE_CCSRC_DEBUG_ANF_IR_DUMP_H_ diff --git a/mindspore/ccsrc/debug/dump_proto.cc b/mindspore/ccsrc/debug/dump_proto.cc index 83ab1e4505..ab2ce1322a 100644 --- a/mindspore/ccsrc/debug/dump_proto.cc +++ b/mindspore/ccsrc/debug/dump_proto.cc @@ -23,7 +23,7 @@ #include #include "debug/anf_ir_utils.h" -#include "utils/anf_ir.pb.h" +#include "proto/anf_ir.pb.h" #include "utils/graph_utils.h" #include "utils/symbolic.h" diff --git a/mindspore/ccsrc/debug/info.h b/mindspore/ccsrc/debug/info.h index a34d6e3df5..e8d02827d8 100644 --- a/mindspore/ccsrc/debug/info.h +++ b/mindspore/ccsrc/debug/info.h @@ -134,7 +134,7 @@ class DebugInfo : public Base { explicit DebugInfo(const LocationPtr &loc); - virtual ~DebugInfo() = default; + ~DebugInfo() override = default; MS_DECLARE_PARENT(DebugInfo, Base); int64_t debug_id(); int64_t unique_id() const { return unique_id_; } diff --git a/mindspore/ccsrc/debug/trace.cc b/mindspore/ccsrc/debug/trace.cc index 16ce77725e..a78d8446d8 100644 --- a/mindspore/ccsrc/debug/trace.cc +++ b/mindspore/ccsrc/debug/trace.cc @@ -231,10 +231,10 @@ std::string AnalyzedFuncGraphExporter::GetNodeType(const AnfNodePtr &node) { auto engine = node_cfg_->engine(); auto cfg = engine->MakeConfig(node, ctx); auto abs = engine->cache().GetValue(cfg); - if (abs == nullptr) { return "Undefined"; } + auto dtype = abs->BuildType(); auto shape = abs->BuildShape(); std::ostringstream oss; diff --git a/mindspore/ccsrc/debug/trace_info.h b/mindspore/ccsrc/debug/trace_info.h index e7a8c83dad..85eae0e958 100644 --- a/mindspore/ccsrc/debug/trace_info.h +++ b/mindspore/ccsrc/debug/trace_info.h @@ -321,7 +321,7 @@ class TraceTransform : public TraceInfo { std::string full_name() override { return full_name_ + transform_name_; } MS_DECLARE_PARENT(TraceTransform, TraceInfo); - virtual std::string symbol() { + std::string symbol() override { if (transform_name_.empty()) { return ""; } diff --git a/mindspore/ccsrc/device/CMakeLists.txt b/mindspore/ccsrc/device/CMakeLists.txt index 93ef7adc84..0a6514f65a 100644 --- a/mindspore/ccsrc/device/CMakeLists.txt +++ b/mindspore/ccsrc/device/CMakeLists.txt @@ -1,34 +1,50 @@ -file(GLOB_RECURSE _DEVICE_ALL_SRC_FILES *.cc) -add_library(_mindspore_device_obj OBJECT ${_DEVICE_ALL_SRC_FILES}) - -if(ENABLE_CPU) - target_compile_definitions(_mindspore_device_obj PRIVATE CPUSESSION) - file(GLOB_RECURSE _CPU_SRC_LIST cpu/*.cc) - add_library(_c_expression_cpu_device_obj OBJECT ${_CPU_SRC_LIST}) -endif() - -if(ENABLE_GPU) - file(GLOB_RECURSE _GPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "gpu/*.cc" - "gpu/*.cu" - ) - list(REMOVE_ITEM _GPU_SRC_LIST "gpu/blocking_queue.cc" - "gpu/gpu_buffer_mgr.cc" - "gpu/mpi/mpi_initializer.cc" +file(GLOB_RECURSE DEVICE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "common/*.cc" + "kernel_info.cc" "kernel_runtime.cc" "memory_manager.cc" "kernel_runtime_manager.cc" "convert_tensor_utils.cc" +) + +if (ENABLE_GPU) + list(APPEND DEVICE_SRC_LIST "gpu/distribution/collective_init.cc") +else () + list(APPEND DEVICE_SRC_LIST "gpu/distribution/collective_fake_init.cc") +endif () + +if (ENABLE_D) + file(GLOB_RECURSE D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "ascend/*.cc" "kernel_adjust.cc") +endif () + +if (ENABLE_CPU) + file(GLOB_RECURSE CPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "cpu/*.cc") +endif () + +# gpu +if (ENABLE_GPU) + file(GLOB_RECURSE CUDA_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "gpu/*.cc" "gpu/*.cu") + + # gpu_queue + list(REMOVE_ITEM CUDA_SRC_LIST "gpu/blocking_queue.cc" "gpu/gpu_buffer_mgr.cc") + add_library(gpu_queue SHARED "gpu/blocking_queue.cc" "gpu/gpu_buffer_mgr.cc") + target_link_libraries(gpu_queue ${CMAKE_THREAD_LIBS_INIT} ${CUDA_PATH}/lib64/libcudart.so) + + list(REMOVE_ITEM CUDA_SRC_LIST "gpu/mpi/mpi_initializer.cc" "gpu/distribution/collective_wrapper.cc" "gpu/distribution/mpi_wrapper.cc" - "gpu/distribution/nccl_wrapper.cc") - add_library(_cuda_gpu_device_obj OBJECT ${_GPU_SRC_LIST}) -endif() - -if(ENABLE_D) - file(GLOB_RECURSE _D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "ascend/*.cc" - "ascend/profiling/*.cc" - "ascend/tasksink/*.cc" - "kernel_adjust.cc" - "ascend/tasksink/taskinfo/*.cc" - ) - target_sources(_mindspore_device_obj PRIVATE ${_D_SRC_LIST}) -endif() + "gpu/distribution/nccl_wrapper.cc" + ) + + if (ENABLE_MPI) + include(ExternalProject) + # gpu_collective + add_library(gpu_collective SHARED "gpu/distribution/collective_wrapper.cc" + "gpu/distribution/mpi_wrapper.cc" + "gpu/distribution/nccl_wrapper.cc" + ) + # _ms_mpi + pybind11_add_module(_ms_mpi "gpu/mpi/mpi_initializer.cc") + target_link_libraries(_ms_mpi PRIVATE mindspore::pybind11_module mindspore::ompi) + target_link_libraries(gpu_collective PRIVATE mindspore::ompi mindspore::nccl) + endif () + + # add_library(_mindspore_device_cuda_obj OBJECT ${CUDA_SRC_LIST}) +endif () +add_library(_mindspore_device_obj OBJECT ${DEVICE_SRC_LIST} ${D_SRC_LIST} ${CPU_SRC_LIST}) diff --git a/mindspore/ccsrc/device/ascend/ascend_device_address.cc b/mindspore/ccsrc/device/ascend/ascend_device_address.cc index df49400341..1f452ce9e2 100644 --- a/mindspore/ccsrc/device/ascend/ascend_device_address.cc +++ b/mindspore/ccsrc/device/ascend/ascend_device_address.cc @@ -104,10 +104,10 @@ bool AscendDeviceAddress::SyncDeviceToHost(const std::vector &shape, size_t } else if (type_id_ == kNumberTypeFloat32 && type == kNumberTypeFloat64) { sync_ok = SyncDeviceToHostAndFloatToFloat64(host_ptr, size, ptr_, size_); } else { - auto host_size = trans::ShapeSize(host_shape); + auto shape_size = trans::ShapeSize(host_shape); auto host = std::vector(size_); SyncMemory(host.data(), ptr_, size_, RT_MEMCPY_DEVICE_TO_HOST); - const trans::TypeIdArgs type_args{host.data(), size_, size, type_id_, type, host_size, host_size}; + const trans::TypeIdArgs type_args{host.data(), shape_size, type_id_, type, size}; sync_ok = trans::TransDataType(type_args, host_ptr); if (!sync_ok) { MS_LOG(ERROR) << "trans data type failed."; @@ -156,9 +156,8 @@ bool AscendDeviceAddress::SyncDeviceToHostAndConvertFormat(const std::vector &shape, size_t } else if (type_id_ == kNumberTypeFloat32 && type == kNumberTypeFloat64) { sync_ok = Float64ToFloatAndSyncHostToDevice(ptr_, size_, host_ptr, size); } else { - auto host_size = trans::ShapeSize(host_shape); - const trans::TypeIdArgs type_args{host_ptr, size, size_, type, type_id_, host_size, host_size}; + auto shape_size = trans::ShapeSize(host_shape); + const trans::TypeIdArgs type_args{host_ptr, shape_size, type, type_id_, size}; auto host_tmp = std::vector(size_); sync_ok = trans::TransDataType(type_args, host_tmp.data()); if (!sync_ok) { @@ -235,9 +234,8 @@ bool AscendDeviceAddress::ConvertFormatAndSyncHostToDevice(const std::vector(size_); sync_ok = trans::TransDataType(type_args, host_tmp.data()); if (!sync_ok) { diff --git a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc index 44cf3f8fa8..d9b3e6ebe4 100644 --- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc +++ b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc @@ -343,6 +343,22 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) { return true; } +void AscendKernelRuntime::DebugTaskIdName(GraphId graph_id) { + auto task_ids = ge::model_runner::ModelRunner::Instance().GetTaskIdList(graph_id); + auto graph_task_names = ProfilingUtils::graph_kernel_name(); + auto iter = graph_task_names.find(graph_id); + if (iter != graph_task_names.end()) { + const auto &task_names = iter->second; + if (task_ids.size() != task_names.size()) { + MS_LOG(WARNING) << "Task_ids and task_names size not match"; + return; + } + for (size_t i = 0; i < task_ids.size(); ++i) { + MS_LOG(INFO) << "Task_id:" << task_ids[i] << " task_name:" << task_names[i]; + } + } +} + bool AscendKernelRuntime::RunTask(const session::KernelGraph *graph) { MS_EXCEPTION_IF_NULL(graph); MS_LOG(INFO) << "RunTask start. GraphId:" << graph->graph_id(); @@ -363,7 +379,8 @@ bool AscendKernelRuntime::RunTask(const session::KernelGraph *graph) { bool status = ge::model_runner::ModelRunner::Instance().RunModel(graph->graph_id(), input_tensors, output_tensors); if (!status) { - MS_LOG(INFO) << "run task failed"; + MS_LOG(ERROR) << "run task failed"; + DebugTaskIdName(graph->graph_id()); return false; } return true; @@ -453,25 +470,26 @@ bool AscendKernelRuntime::HcclInit() { } MS_LOG(INFO) << "do hcom init"; - std::string path; const char *config_path_str = std::getenv("MINDSPORE_HCCL_CONFIG_PATH"); if (config_path_str == nullptr) { MS_LOG(ERROR) << "get hccl json config failed, please set env MINDSPORE_HCCL_CONFIG_PATH"; return false; } - path = config_path_str; - char fullPath[PATH_MAX] = {0}; - if (path.size() > PATH_MAX || realpath(path.c_str(), fullPath) == nullptr) { - MS_LOG(ERROR) << "file " << path << " is not exist"; + auto full_path = realpath(config_path_str, nullptr); + if (full_path == nullptr) { + MS_LOG(ERROR) << "file path " << config_path_str << " does not exist"; return false; } + const char *identify = std::getenv("RANK_ID"); if (identify == nullptr) { MS_LOG(ERROR) << "get hccl rankid failed, please set env RANK_ID"; + free(full_path); return false; } - MS_LOG(INFO) << "MINDSPORE_HCCL_CONFIG_PATH : " << fullPath << ", RANK_ID: " << identify; - hcclResult_t res = hcom_init(fullPath, identify); + MS_LOG(INFO) << "MINDSPORE_HCCL_CONFIG_PATH : " << full_path << ", RANK_ID: " << identify; + hcclResult_t res = hcom_init(full_path, identify); + free(full_path); if (res != HCCL_SUCCESS) { MS_LOG(ERROR) << "hcom init failed, res is " << static_cast(res); return false; diff --git a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h index 5d0f61d0a6..b3097c7031 100644 --- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h +++ b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h @@ -57,6 +57,7 @@ class AscendKernelRuntime : public KernelRuntime { void ReleaseDeviceRes() override; bool GraphWithEmptyTaskList(const session::KernelGraph *graph) const; bool CheckGraphIdValid(GraphId graph_id) const; + static void DebugTaskIdName(GraphId graph_id); rtContext_t rt_context_{nullptr}; bool initialized_{false}; diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc b/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc index 42830f54fa..c2373d3c7e 100644 --- a/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc +++ b/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc @@ -22,7 +22,7 @@ namespace mindspore { namespace device { namespace ascend { const uint64_t kAscendDeviceMemGB = 20; -const uint64_t kAscendMemPoolGB = 5; +const uint64_t kAscendMemPoolGB = 10; const uint64_t kAscendDeviceMemSize = (kAscendDeviceMemGB << 30); const uint64_t kAscendMemPoolSize = (kAscendMemPoolGB << 30); diff --git a/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc b/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc index 36c622cbc5..9e54adc635 100644 --- a/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc +++ b/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc @@ -18,14 +18,15 @@ #include #include #include -#include -#include +#include +#include #include "kernel/oplib/oplib.h" #include "kernel/kernel_query.h" #include "session/anf_runtime_algorithm.h" #include "kernel/kernel_build_info.h" #include "utils/context/ms_context.h" #include "operator/ops.h" +#include "debug/anf_ir_dump.h" namespace mindspore { namespace device { @@ -180,6 +181,7 @@ void SetTensorDeviceInfo(const kernel::KernelBuildInfo &selected_kernel_info, co } void AddSupportMixedPrecisionDataTypeIndex(TypeId data_type, std::vector *support_index) { + MS_EXCEPTION_IF_NULL(support_index); int index = kUnSupportMixedDataTypeIndex; switch (data_type) { case kNumberTypeFloat16: @@ -197,6 +199,7 @@ void AddSupportMixedPrecisionDataTypeIndex(TypeId data_type, std::vector *s void AddKernelInputSupportDataType(const kernel::KernelBuildInfo &kernel_build_info, size_t input_index, std::vector *support_datatype_index, std::vector *support_datatype) { + MS_EXCEPTION_IF_NULL(support_datatype); auto data_type = kernel_build_info.GetInputDeviceType(input_index); support_datatype->push_back(data_type); AddSupportMixedPrecisionDataTypeIndex(data_type, support_datatype_index); @@ -204,6 +207,7 @@ void AddKernelInputSupportDataType(const kernel::KernelBuildInfo &kernel_build_i void AddKernelOutputSupportDataType(const kernel::KernelBuildInfo &kernel_build_info, size_t output_index, std::vector *support_datatype_index, std::vector *support_datatype) { + MS_EXCEPTION_IF_NULL(support_datatype); auto data_type = kernel_build_info.GetOutputDeviceType(output_index); support_datatype->push_back(data_type); AddSupportMixedPrecisionDataTypeIndex(data_type, support_datatype_index); @@ -214,16 +218,7 @@ void AddNodeInputDataType(const CNodePtr &kernel_node, size_t input_index, std::vector *node_mix_precision_datatype) { AnfNodePtr cur_input = AnfAlgo::GetInputNode(kernel_node, input_index); MS_EXCEPTION_IF_NULL(cur_input); - TypeId input_origin_type; - if (cur_input->isa() && AnfAlgo::IsParameterWeight(cur_input->cast())) { - // weight - input_origin_type = AnfAlgo::GetOutputDeviceDataType(cur_input, 0); - } else if (cur_input->isa()) { - input_origin_type = AnfAlgo::GetOutputDeviceDataType(cur_input, 0); - } else { - // feature map - input_origin_type = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, input_index); - } + TypeId input_origin_type = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, input_index); AddSupportMixedPrecisionDataTypeIndex(input_origin_type, node_mix_precision_datatype_index); node_mix_precision_datatype->push_back(input_origin_type); } @@ -238,8 +233,8 @@ void AddNodeOutputDataType(const CNodePtr &kernel_node, size_t output_index, void CheckDataTypeInputs(const std::vector &node_mix_precision_datatype_index, const std::vector &node_mix_precision_datatype, - const std::unordered_map> &kernel_support_datatypes, - std::unordered_map> *kernel_match_datatype_idx) { + const std::map> &kernel_support_datatypes, + std::map> *kernel_match_datatype_idx) { if (node_mix_precision_datatype_index.size() != node_mix_precision_datatype.size()) { MS_LOG(EXCEPTION) << "node datatype index size " << node_mix_precision_datatype_index.size() << " != datatype size " << node_mix_precision_datatype.size(); @@ -251,10 +246,11 @@ void CheckDataTypeInputs(const std::vector &node_mix_precision_datatype_ind } } -int RaiseDataTypePrecisionSelect(const std::vector &node_mix_precision_datatype_index, - const std::vector &node_mix_precision_datatype, - const std::unordered_map> &kernel_support_datatypes, - std::unordered_map> *kernel_match_datatype_idx) { +bool RaiseDataTypePrecisionSelect(const std::vector &node_mix_precision_datatype_index, + const std::vector &node_mix_precision_datatype, + const std::map> &kernel_support_datatypes, + std::map> *kernel_match_datatype_idx) { + MS_EXCEPTION_IF_NULL(kernel_match_datatype_idx); CheckDataTypeInputs(node_mix_precision_datatype_index, node_mix_precision_datatype, kernel_support_datatypes, kernel_match_datatype_idx); for (size_t i = 0; i < node_mix_precision_datatype_index.size(); ++i) { @@ -289,40 +285,22 @@ int RaiseDataTypePrecisionSelect(const std::vector &node_mix_precision_data } } } - - if (kernel_match_datatype_idx->size() >= 1) { - return SizeToInt(kernel_match_datatype_idx->begin()->first); - } - return -1; + return !kernel_match_datatype_idx->empty(); } -int GetMinReducePrecisionCountIndex(std::unordered_map> *kernel_match_datatype_idx, - const std::unordered_map &precision_reduce_count) { - int selected_index = -1; - size_t min_reduce_precision_count = kMaxCount; - auto iter = kernel_match_datatype_idx->begin(); - while (iter != kernel_match_datatype_idx->end()) { - auto find_iter = precision_reduce_count.find(iter->first); - if (find_iter == precision_reduce_count.end()) { - continue; - } - if (min_reduce_precision_count > find_iter->second) { - selected_index = SizeToInt(iter->first); - min_reduce_precision_count = find_iter->second; - } - ++iter; - } - return selected_index; +bool CanDataTypeReduce(const std::vector &datatype_indexes, int check_index, + const std::vector &node_mix_precision_datatype_index) { + return datatype_indexes[check_index] != kUnSupportMixedDataTypeIndex && + datatype_indexes[check_index] <= node_mix_precision_datatype_index[check_index]; } -int RaiseOrReduceDataTypePrecisionSelect( - const std::vector &node_mix_precision_datatype_index, const std::vector &node_mix_precision_datatype, - const std::unordered_map> &kernel_support_datatypes, - std::unordered_map> *kernel_match_datatype_idx) { +bool RaiseOrReduceDataTypePrecisionSelect(const std::vector &node_mix_precision_datatype_index, + const std::vector &node_mix_precision_datatype, + const std::map> &kernel_support_datatypes, + std::map> *kernel_match_datatype_idx) { + MS_EXCEPTION_IF_NULL(kernel_match_datatype_idx); CheckDataTypeInputs(node_mix_precision_datatype_index, node_mix_precision_datatype, kernel_support_datatypes, kernel_match_datatype_idx); - // reduce / raise - std::unordered_map precision_reduce_count; for (size_t i = 0; i < node_mix_precision_datatype_index.size(); ++i) { if (node_mix_precision_datatype[i] == kTypeUnknown) { continue; @@ -348,31 +326,23 @@ int RaiseOrReduceDataTypePrecisionSelect( if (i >= datatype_indexes.size()) { MS_LOG(EXCEPTION) << "index " << i << "> kernel datatype indexes size " << datatype_indexes.size(); } - if (datatype_indexes[i] == kUnSupportMixedDataTypeIndex) { + if (!CanDataTypeReduce(datatype_indexes, i, node_mix_precision_datatype_index)) { iter = kernel_match_datatype_idx->erase(iter); } else { - if (datatype_indexes[i] < node_mix_precision_datatype_index[i]) { - auto count_iter = precision_reduce_count.find(iter->first); - if (count_iter != precision_reduce_count.end()) { - count_iter->second++; - } else { - precision_reduce_count[iter->first] = 1; - } - } ++iter; } } } - - return GetMinReducePrecisionCountIndex(kernel_match_datatype_idx, precision_reduce_count); + return !kernel_match_datatype_idx->empty(); } void AddNodeAndKernelDataType(const CNodePtr &kernel_node, const kernel::KernelBuildInfo &kernel_build_info, std::vector *support_indexes, std::vector *node_mix_precision_datatype, std::vector *support_datatypes, std::vector *node_mix_precision_datatype_index) { + MS_EXCEPTION_IF_NULL(node_mix_precision_datatype); bool add_node_datatype_flag = false; - if (node_mix_precision_datatype->size() == 0) { + if (node_mix_precision_datatype->empty()) { add_node_datatype_flag = true; } for (size_t input_index = 0; input_index < kernel_build_info.GetInputNum(); ++input_index) { @@ -390,104 +360,59 @@ void AddNodeAndKernelDataType(const CNodePtr &kernel_node, const kernel::KernelB } } -int PrecisionReduce(const std::vector &node_mix_precision_datatype_index, - const std::vector &node_mix_precision_datatype, - const std::unordered_map> &kernel_support_datatype, - std::unordered_map> *kernel_match_datatype_idx, bool *precision_reduce) { +void PrecisionReduce(const std::vector &node_mix_precision_datatype_index, + const std::vector &node_mix_precision_datatype, + const std::map> &kernel_support_datatype, + std::map> *kernel_match_datatype_idx, bool *precision_reduce) { + MS_EXCEPTION_IF_NULL(kernel_match_datatype_idx); auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); MS_EXCEPTION_IF_NULL(precision_reduce); - std::unordered_map> kernel_match_datatype_idx_copy = *kernel_match_datatype_idx; + std::map> kernel_match_datatype_idx_copy = *kernel_match_datatype_idx; // raise precision - int selected_index = RaiseDataTypePrecisionSelect(node_mix_precision_datatype_index, node_mix_precision_datatype, - kernel_support_datatype, kernel_match_datatype_idx); - if (selected_index != -1) { - int max_match = 0; - auto iter = kernel_match_datatype_idx->begin(); - int match_count = 0; - while (iter != kernel_match_datatype_idx->end()) { - auto kernel_datatypes = kernel_support_datatype.find(iter->first); - if (kernel_datatypes == kernel_support_datatype.end()) { - MS_LOG(EXCEPTION) << "Can not find kernel index" << iter->first << "'s datatype."; - } - if (kernel_datatypes->second.size() < node_mix_precision_datatype.size()) { - MS_LOG(EXCEPTION) << "Kernel datatype size is not equal to node datatype size!"; - } - for (size_t i = 0; i < node_mix_precision_datatype.size(); ++i) { - if (node_mix_precision_datatype[i] == kernel_datatypes->second[i]) { - ++match_count; - } - } - if (match_count > max_match) { - selected_index = SizeToInt(iter->first); - } - ++iter; - } + bool selected_ret = RaiseDataTypePrecisionSelect(node_mix_precision_datatype_index, node_mix_precision_datatype, + kernel_support_datatype, kernel_match_datatype_idx); + if (selected_ret) { + *precision_reduce = false; + return; } - if (selected_index == -1 && context_ptr->enable_reduce_precision()) { - selected_index = - RaiseOrReduceDataTypePrecisionSelect(node_mix_precision_datatype_index, node_mix_precision_datatype, - kernel_support_datatype, &kernel_match_datatype_idx_copy); - if (selected_index != -1) { - *precision_reduce = true; - } + if (context_ptr->enable_reduce_precision()) { + selected_ret = RaiseOrReduceDataTypePrecisionSelect(node_mix_precision_datatype_index, node_mix_precision_datatype, + kernel_support_datatype, &kernel_match_datatype_idx_copy); + } + if (selected_ret) { + *precision_reduce = true; + *kernel_match_datatype_idx = kernel_match_datatype_idx_copy; } - return selected_index; } -void SelectKernel(const CNodePtr &kernel_node, bool precision_reduce, const std::vector &node_datatype, - const std::shared_ptr &selected_kernel_info_ptr) { - MS_EXCEPTION_IF_NULL(selected_kernel_info_ptr); +void PrintRaiseOrReducePrecisionSelectedInfo(const CNodePtr &cnode, + const std::shared_ptr &selected_kernel_build_info, + bool precision_reduce) { + MS_EXCEPTION_IF_NULL(selected_kernel_build_info); + MS_EXCEPTION_IF_NULL(cnode); + std::ostringstream buffer; + buffer << cnode->DebugString(); if (precision_reduce) { - std::ostringstream datatype; - size_t input_num = selected_kernel_info_ptr->GetInputNum(); - size_t i = 0; - datatype << "("; - for (; i < input_num && i < node_datatype.size(); ++i) { - datatype << static_cast(node_datatype[i]); - if (i < input_num - 1) { - datatype << ", "; - } - } - datatype << ") -> ("; - for (; i < node_datatype.size(); ++i) { - datatype << static_cast(node_datatype[i]); - if (i < node_datatype.size() - 1) { - datatype << ", "; - } - } - datatype << ")"; - MS_LOG(WARNING) << kernel_node->DebugString() << " reduce precision, node datatype: " << datatype.str() - << ", select kernel: %s" << selected_kernel_info_ptr->ToString(); + buffer << " reduce precision, node datatype: "; + } else { + buffer << " raise precision, node datatype: "; } - AnfAlgo::SetSelectKernelBuildInfo(selected_kernel_info_ptr, kernel_node.get()); - // Set format and data type for input tensor. - SetTensorDeviceInfo(*selected_kernel_info_ptr, kernel_node); + PrintInputAndOutputInferType(buffer, cnode); + buffer << ", select kernel:" << selected_kernel_build_info->ToString(); + MS_LOG(INFO) << buffer.str(); } -} // namespace -void SelectKernelInfo(const CNodePtr &kernel_node) { - std::vector> kernel_info_list; - MS_EXCEPTION_IF_NULL(kernel_node); - kernel::KernelQuery(kernel_node, &kernel_info_list); +std::shared_ptr ChooseMatchedKernelInfo( + const CNodePtr &kernel_node, const std::vector> &kernel_info_list) { + if (kernel_info_list.empty()) { + return nullptr; + } std::vector most_match_counts = {-1, -1, -1, -1}; - int selected_index = -1; - std::unordered_map> kernel_match_datatype_idx; - std::unordered_map> kernel_support_datatype; - std::vector node_mix_precision_datatype_index; - std::vector node_mix_precision_datatype; + size_t selected_index = 0; for (size_t info_index = 0; info_index < kernel_info_list.size(); ++info_index) { std::vector cur_kernel_info_match_counts = {0, 0, 0, 0}; auto kernel_build_info = *(kernel_info_list[info_index]); - std::vector support_indexes; - std::vector support_datatypes; - AddNodeAndKernelDataType(kernel_node, kernel_build_info, &support_indexes, &node_mix_precision_datatype, - &support_datatypes, &node_mix_precision_datatype_index); - kernel_match_datatype_idx[info_index] = support_indexes; - kernel_support_datatype[info_index] = support_datatypes; - if (!MatchInferOutputDataType(kernel_node, kernel_build_info)) { - continue; - } std::shared_ptr kernel_info_ptr = kernel_info_list[info_index]; UpdateCurMatchCounts(*kernel_info_ptr, kernel_node, &cur_kernel_info_match_counts); // Currently the selection policy is the match format count first, and then is datatype counts. @@ -495,22 +420,80 @@ void SelectKernelInfo(const CNodePtr &kernel_node) { selected_index = SizeToInt(info_index); } } + return kernel_info_list[selected_index]; +} - bool precision_reduce = false; - if (selected_index == -1) { - selected_index = PrecisionReduce(node_mix_precision_datatype_index, node_mix_precision_datatype, - kernel_support_datatype, &kernel_match_datatype_idx, &precision_reduce); +std::vector> GetAllMatchedFilteredKernelInfo( + const CNodePtr &cnode, const std::vector> &kernel_info_list) { + std::vector> result; + for (const auto &kernel_build_info : kernel_info_list) { + MS_EXCEPTION_IF_NULL(kernel_build_info); + if (!MatchInferOutputDataType(cnode, *kernel_build_info)) { + continue; + } + result.push_back(kernel_build_info); } - if (selected_index == -1) { - MS_LOG(EXCEPTION) << kernel_node->DebugString() << "Cannot find valid kernel Info !"; + return result; +} + +std::vector> FilterRaisedOrReducePrecisionMatchedKernelInfo( + const CNodePtr &cnode, const std::vector> &kernel_info_list, + bool *precision_reduce) { + std::vector> filtered_kernel_info_list; + std::map> kernel_match_datatype_idx; + std::map> kernel_support_datatype; + std::vector node_mix_precision_datatype_index; + std::vector node_mix_precision_datatype; + for (size_t info_index = 0; info_index < kernel_info_list.size(); ++info_index) { + std::vector support_indexes; + std::vector support_datatypes; + MS_EXCEPTION_IF_NULL(kernel_info_list[info_index]); + AddNodeAndKernelDataType(cnode, *kernel_info_list[info_index], &support_indexes, &node_mix_precision_datatype, + &support_datatypes, &node_mix_precision_datatype_index); + kernel_match_datatype_idx[info_index] = support_indexes; + kernel_support_datatype[info_index] = support_datatypes; } - auto index = IntToSize(selected_index); - if (index >= kernel_info_list.size()) { - MS_LOG(EXCEPTION) << "index outof range"; + PrecisionReduce(node_mix_precision_datatype_index, node_mix_precision_datatype, kernel_support_datatype, + &kernel_match_datatype_idx, precision_reduce); + std::transform( + kernel_match_datatype_idx.begin(), kernel_match_datatype_idx.end(), std::back_inserter(filtered_kernel_info_list), + [&](const std::pair> &matched_idx) -> std::shared_ptr { + return kernel_info_list[matched_idx.first]; + }); + return filtered_kernel_info_list; +} +} // namespace + +int SelectKernelInfo(const CNodePtr &kernel_node) { + std::vector> kernel_info_list; + int status = kStatusAllMatched; + MS_EXCEPTION_IF_NULL(kernel_node); + bool precision_reduce = false; + std::shared_ptr selected_kernel_info = nullptr; + kernel::KernelQuery(kernel_node, &kernel_info_list); + // filter kernel info matched with me infered type + auto filtered_kernel_info_list = GetAllMatchedFilteredKernelInfo(kernel_node, kernel_info_list); + if (!filtered_kernel_info_list.empty()) { + selected_kernel_info = ChooseMatchedKernelInfo(kernel_node, filtered_kernel_info_list); + } else { + // selected kernel info using raised precision or reduce precision + filtered_kernel_info_list = + FilterRaisedOrReducePrecisionMatchedKernelInfo(kernel_node, kernel_info_list, &precision_reduce); + selected_kernel_info = ChooseMatchedKernelInfo(kernel_node, filtered_kernel_info_list); + if (selected_kernel_info == nullptr) { + std::ostringstream buffer; + PrintInputAndOutputInferType(buffer, kernel_node); + MS_EXCEPTION(TypeError) << "The node [" << kernel_node->DebugString() + << "] cannot find valid kernel info, not supported the type" << buffer.str(); + } else { + PrintRaiseOrReducePrecisionSelectedInfo(kernel_node, selected_kernel_info, precision_reduce); + status = precision_reduce ? kStatusReducePrecision : kStatusRaisePrecision; + } } - std::shared_ptr selected_kernel_info_ptr = kernel_info_list[index]; - MS_EXCEPTION_IF_NULL(selected_kernel_info_ptr); - SelectKernel(kernel_node, precision_reduce, node_mix_precision_datatype, selected_kernel_info_ptr); + AnfAlgo::SetSelectKernelBuildInfo(selected_kernel_info, kernel_node.get()); + // Set format and data type for input tensor. + SetTensorDeviceInfo(*selected_kernel_info, kernel_node); + return status; } bool CheckKernelAccuracySupported(const CNodePtr &kernel_node, diff --git a/mindspore/ccsrc/device/ascend/kernel_select_ascend.h b/mindspore/ccsrc/device/ascend/kernel_select_ascend.h index 100cd8e1e1..af353815bf 100644 --- a/mindspore/ccsrc/device/ascend/kernel_select_ascend.h +++ b/mindspore/ccsrc/device/ascend/kernel_select_ascend.h @@ -21,7 +21,7 @@ namespace mindspore { namespace device { namespace ascend { -void SelectKernelInfo(const CNodePtr &kernel_node); +int SelectKernelInfo(const CNodePtr &kernel_node); bool CheckKernelAccuracySupported(const CNodePtr &kernel_node, const kernel::KernelBuildInfoPtr &new_kernel_build_info); } // namespace ascend } // namespace device diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc index 7960a08938..1f87bf7bfa 100644 --- a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc +++ b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc @@ -33,7 +33,7 @@ constexpr char kIterEndNode[] = "PROFILING_ITER_END"; std::unordered_map> ProfilingUtils::graph_kernel_name_; uint32_t ProfilingUtils::custom_node_index_ = 1; -ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(NotNull graph_ptr) { +ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(NotNull graph_ptr) { MS_LOG(INFO) << "get env start"; custom_node_index_ = 1; auto &cnode_exec_order = graph_ptr->execution_order(); @@ -73,9 +73,45 @@ void ProfilingUtils::GetTraceHccl(const std::vector &cnode_exec_order, std::string ProfilingUtils::GetTraceBegin(const std::vector &cnode_exec_order) { const char *trace_begin = std::getenv(kFpStartNode); - auto &first_cnode = cnode_exec_order.front(); - MS_EXCEPTION_IF_NULL(first_cnode); - return trace_begin == nullptr ? first_cnode->fullname_with_scope() : std::string(trace_begin); + if (trace_begin != nullptr) { + return std::string(trace_begin); + } + + std::string fp_start_str = ""; + std::set getnext_outputs; + GetCNodeOutputRealNode(kGetNextOpName, cnode_exec_order, NOT_NULL(&getnext_outputs)); + if (getnext_outputs.empty()) { + auto first_node = cnode_exec_order.front(); + MS_EXCEPTION_IF_NULL(first_node); + fp_start_str = first_node->fullname_with_scope(); + } else { + for (auto &cnode : cnode_exec_order) { + if (getnext_outputs.count(cnode->fullname_with_scope()) != 0) { + fp_start_str = cnode->fullname_with_scope(); + break; + } + } + } + return fp_start_str; +} + +void ProfilingUtils::GetCNodeOutputRealNode(const std::string &node_name, const std::vector &cnode_exec_order, + NotNull *> getnext_outputs) { + for (auto cnode : cnode_exec_order) { + for (auto input : cnode->inputs()) { + auto prev_cnode = AnfAlgo::VisitKernel(input, 0); + if (!prev_cnode.first->isa()) { + continue; + } + if (AnfAlgo::GetCNodeName(prev_cnode.first) == node_name) { + getnext_outputs->insert(cnode->fullname_with_scope()); + MS_LOG(INFO) << "Find GetNext Output CNode:" << cnode->fullname_with_scope(); + } + } + } + if (getnext_outputs->empty()) { + MS_LOG(WARNING) << "GetNext not found"; + } } std::string ProfilingUtils::GetTraceBpEnd(const std::vector &cnode_exec_order) { @@ -112,18 +148,29 @@ std::string ProfilingUtils::GetTraceBpEnd(const std::vector &cnode_exe } if (bp_end_str.empty()) { - auto last_cnode = cnode_exec_order.back(); - MS_EXCEPTION_IF_NULL(last_cnode); - bp_end_str = last_cnode->fullname_with_scope(); + bp_end_str = GetGraphLastTbeKernelName(cnode_exec_order); } return bp_end_str; } +std::string ProfilingUtils::GetGraphLastTbeKernelName(const std::vector &cnode_exec_order) { + std::string last_tbe_kernel_name = ""; + // find last tbe_kernel + for (auto iter = cnode_exec_order.rbegin(); iter != cnode_exec_order.rend(); ++iter) { + if (AnfAlgo::GetKernelType(*iter) == TBE_KERNEL) { + last_tbe_kernel_name = (*iter)->fullname_with_scope(); + break; + } + } + if (last_tbe_kernel_name.empty()) { + MS_LOG(WARNING) << "tbe kernel not found in graph"; + } + return last_tbe_kernel_name; +} + std::string ProfilingUtils::GetTraceNetoutput(const std::vector &cnode_exec_order) { const char *trace_netoutput = std::getenv(kIterEndNode); - auto &last_cnode = cnode_exec_order.back(); - MS_EXCEPTION_IF_NULL(last_cnode); - return trace_netoutput == nullptr ? last_cnode->fullname_with_scope() : std::string(trace_netoutput); + return trace_netoutput == nullptr ? GetGraphLastTbeKernelName(cnode_exec_order) : std::string(trace_netoutput); } NotNull ProfilingUtils::CreateProfilingCNode(const ProfilingContent &profiling_content, @@ -158,17 +205,22 @@ void ProfilingUtils::ProfilingTraceFpStart(const mindspore::AnfNodePtr &anf_node NotNull *> kernel_list) { if (profiling_trace_info.trace_begin == anf_node->fullname_with_scope()) { MS_LOG(INFO) << "Profiling Match FpStart:" << profiling_trace_info.trace_begin; - auto job_id = ProfilingManager::GetInstance().GetJobId(); - ProfilingContent job_profiling_context = {false, job_id, 0}; - auto job_profiling_node = CreateProfilingCNodeWithStream(anf_node, job_profiling_context, graph_ptr); - kernel_list->emplace_back(job_profiling_node); - + ProfilingTraceJobId(anf_node, graph_ptr, kernel_list); ProfilingContent fp_profiling_content = {false, kProfilingFpStartLogId, 0}; auto fp_profiling_node = CreateProfilingCNodeWithStream(anf_node, fp_profiling_content, graph_ptr); kernel_list->emplace_back(fp_profiling_node); } } +void ProfilingUtils::ProfilingTraceJobId(const AnfNodePtr &anf_node, NotNull graph_ptr, + NotNull *> kernel_list) { + MS_LOG(INFO) << "Profiling Match start"; + auto job_id = ProfilingManager::GetInstance().GetJobId(); + ProfilingContent job_profiling_context = {false, job_id, 0}; + auto job_profiling_node = CreateProfilingCNodeWithStream(anf_node, job_profiling_context, graph_ptr); + kernel_list->emplace_back(job_profiling_node); +} + CNodePtr ProfilingUtils::CreateProfilingCNodeWithStream(const mindspore::AnfNodePtr &anf_node, const ProfilingContent &profiling_content, NotNull graph_ptr) { diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h index f9f08c9d3f..6986eaab54 100644 --- a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h +++ b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h @@ -65,6 +65,9 @@ class ProfilingUtils { NotNull graph_ptr, NotNull *> kernel_list); + static void ProfilingTraceJobId(const AnfNodePtr &anf_node, NotNull graph_ptr, + NotNull *> kernel_list); + // Insert net output profiling node, which tells the device to stop profiling. // The notify in struct ProfilingContent should be 'true', which tells the device to send data to host. static void ProfilingTraceEnd(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info, @@ -91,13 +94,15 @@ class ProfilingUtils { // And other cnode, like AllReduce, export PROFILING_CUSTOM_1='full name of AllReduce cnode' // GetNext, export PROFIFLING_CUSTOM_2='full name fo GetNext cnode' // The variable i in PROFILING_CUSTOM_i should start from 1 without interruption. - static ProfilingTraceInfo GetProfilingTraceFromEnv(NotNull graph_ptr); + static ProfilingTraceInfo GetProfilingTraceFromEnv(NotNull graph_ptr); // Insert two profiling trace points, one in front and one behind static void ProfilingCustomOp(const mindspore::AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info, NotNull graph_ptr, NotNull *> kernel_list); + static std::unordered_map> graph_kernel_name() { return graph_kernel_name_; } + inline static constexpr char kProfiling[] = "Profiling"; inline static constexpr char kNotify[] = "notify"; inline static constexpr char kProfilerTraceId[] = "profiler_trace_id"; @@ -111,8 +116,11 @@ class ProfilingUtils { static std::string GetTraceBegin(const std::vector &cnode_exec_order); static std::string GetTraceBpEnd(const std::vector &cnode_exec_order); static std::string GetTraceNetoutput(const std::vector &cnode_exec_order); + static std::string GetGraphLastTbeKernelName(const std::vector &cnode_exec_order); static void GetTraceHccl(const std::vector &cnode_exec_order, NotNull profiling_trace); + static void GetCNodeOutputRealNode(const std::string &node_name, const std::vector &cnode_exec_order, + NotNull *> getnext_outputs); // graph id --> (kernel name list) static std::unordered_map> graph_kernel_name_; diff --git a/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc b/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc index 62cf809c21..bdcc178b54 100644 --- a/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc +++ b/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc @@ -121,8 +121,10 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i LaunchAddrCleanKernel(anf_node_ptr, &kernel_inputs); } - std::vector task_info_ptrs = dynamic_cast(kernel_mod) - ->GenTask(kernel_inputs, kernel_workspaces, kernel_outputs, stream_id); + auto ascend_kernel_mod = dynamic_cast(kernel_mod); + MS_EXCEPTION_IF_NULL(ascend_kernel_mod); + std::vector task_info_ptrs = + ascend_kernel_mod->GenTask(kernel_inputs, kernel_workspaces, kernel_outputs, stream_id); task_info_list->insert(task_info_list->end(), task_info_ptrs.begin(), task_info_ptrs.end()); return true; } @@ -145,9 +147,7 @@ bool TaskGenerator::LaunchAllKernel(const std::vector &anf_node_list, } current_op_index++; } - if (ProfilingManager::GetInstance().IsProfiling()) { - ProfilingUtils::SetGraphKernelName(graph_id, kernel_name_list); - } + ProfilingUtils::SetGraphKernelName(graph_id, kernel_name_list); return true; } } // namespace tasksink diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc b/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc index 2a2a2be065..f9d2cb878f 100644 --- a/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc +++ b/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc @@ -28,11 +28,7 @@ namespace gpu { namespace py = pybind11; void GpuBuild(const KernelGraphPtr &kernel_graph) { kernel::KernelMeta *bin_map = kernel::KernelMeta::GetInstance(); - if (!bin_map->ReadIndex(kernel::kGpuKernelMeta)) { - MS_LOG(INFO) << "kernel cache miss, cache directory will be created later."; - } else { - MS_LOG(INFO) << "cache initialize to[" << kernel::kGpuKernelMeta << "]."; - } + bin_map->Initialize(); MS_EXCEPTION_IF_NULL(kernel_graph); auto kernels = kernel_graph->execution_order(); for (const auto &kernel : kernels) { diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc index 5dd4facb25..17817ebeba 100644 --- a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc +++ b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc @@ -27,6 +27,7 @@ #include "device/gpu/gpu_common.h" #include "common/utils.h" #include "device/gpu/gpu_memory_manager.h" +#include "kernel/common_utils.h" namespace mindspore { namespace device { @@ -104,6 +105,7 @@ void GPUKernelRuntime::ReleaseDeviceRes() { if (mem_manager_ != nullptr) { mem_manager_->FreeDeviceMemory(); } + kernel::KernelMeta::GetInstance()->RemoveKernelCache(); } void GPUKernelRuntime::AssignMemory(session::KernelGraph *graph) { @@ -223,23 +225,24 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod MS_EXCEPTION_IF_NULL(input); input->addr = device_address->ptr_; input->size = device_address->size_; - kernel_inputs->push_back(input); + kernel_inputs->emplace_back(input); } - auto output_sizes = kernel_mod.GetOutputSizeList(); for (size_t i = 0; i < output_sizes.size(); ++i) { auto device_address = AnfAlgo::GetMutableOutputAddr(kernel, i); MS_EXCEPTION_IF_NULL(device_address); if (device_address->ptr_ == nullptr) { - mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]); + auto ret = mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]); + if (!ret) { + MS_LOG(EXCEPTION) << "Malloc device memory failed."; + } } kernel::AddressPtr output = std::make_shared(); MS_EXCEPTION_IF_NULL(output); output->addr = device_address->ptr_; output->size = output_sizes[i]; - kernel_outputs->push_back(output); + kernel_outputs->emplace_back(output); } - auto workspace_sizes = kernel_mod.GetWorkspaceSizeList(); for (size_t i = 0; i < workspace_sizes.size(); ++i) { if (workspace_sizes[i] == 0) { @@ -247,12 +250,14 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod continue; } auto device_ptr = mem_manager_->MallocMemFromMemPool(workspace_sizes[i]); - MS_EXCEPTION_IF_NULL(device_ptr); + if (!device_ptr) { + MS_LOG(EXCEPTION) << "Malloc device memory failed."; + } kernel::AddressPtr workspace = std::make_shared(); MS_EXCEPTION_IF_NULL(workspace); workspace->addr = device_ptr; workspace->size = workspace_sizes[i]; - kernel_workspaces->push_back(workspace); + kernel_workspaces->emplace_back(workspace); } } @@ -261,8 +266,7 @@ void GPUKernelRuntime::AllocCommunicationOpDynamicRes(const session::KernelGraph auto &kernels = graph->execution_order(); for (auto &kernel : kernels) { MS_EXCEPTION_IF_NULL(kernel); - auto kernel_name = AnfAlgo::GetCNodeName(kernel); - if (kernel_name == kAllReduceOpName) { + if (AnfAlgo::IsCommunicationOp(kernel)) { AllocCommunicationOpInputDynamicRes(kernel); AllocCommunicationOpOutputDynamicRes(kernel); } @@ -272,27 +276,31 @@ void GPUKernelRuntime::AllocCommunicationOpDynamicRes(const session::KernelGraph void GPUKernelRuntime::AllocCommunicationOpInputDynamicRes(const mindspore::AnfNodePtr &kernel) { MS_EXCEPTION_IF_NULL(kernel); MS_EXCEPTION_IF_NULL(mem_manager_); + bool is_need_alloc_memory = false; + bool is_need_free_memory = false; size_t total_size = 0; std::vector size_list; DeviceAddressPtrList addr_list; for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(kernel); ++i) { auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, i); MS_EXCEPTION_IF_NULL(device_address); - // The inputs of communication kernel are not released. - if (device_address->ptr_ != nullptr) { - MS_LOG(INFO) << "The inputs of communication kernel are not released."; - mem_manager_->FreeMemFromMemPool(device_address); + if (device_address->ptr_ == nullptr) { + is_need_alloc_memory = true; + } else { + is_need_free_memory = true; } total_size += device_address->size_; size_list.emplace_back(device_address->size_); addr_list.emplace_back(device_address); } - mem_manager_->MallocContinuousMemFromMemPool(addr_list, total_size, size_list); + AllocCommunicationOpMemory(is_need_alloc_memory, is_need_free_memory, addr_list, total_size, size_list); } void GPUKernelRuntime::AllocCommunicationOpOutputDynamicRes(const mindspore::AnfNodePtr &kernel) { MS_EXCEPTION_IF_NULL(kernel); MS_EXCEPTION_IF_NULL(mem_manager_); + bool is_need_alloc_memory = false; + bool is_need_free_memory = false; size_t total_size = 0; std::vector size_list; DeviceAddressPtrList addr_list; @@ -302,16 +310,37 @@ void GPUKernelRuntime::AllocCommunicationOpOutputDynamicRes(const mindspore::Anf for (size_t i = 0; i < output_sizes.size(); ++i) { auto device_address = AnfAlgo::GetMutableOutputAddr(kernel, i); MS_EXCEPTION_IF_NULL(device_address); - // The outputs of communication kernel are not released. - if (device_address->ptr_ != nullptr) { - MS_LOG(INFO) << "The outputs of communication kernel are not released."; - mem_manager_->FreeMemFromMemPool(device_address); + if (device_address->ptr_ == nullptr) { + is_need_alloc_memory = true; + } else { + is_need_free_memory = true; } total_size += output_sizes[i]; size_list.emplace_back(output_sizes[i]); addr_list.emplace_back(device_address); } - mem_manager_->MallocContinuousMemFromMemPool(addr_list, total_size, size_list); + AllocCommunicationOpMemory(is_need_alloc_memory, is_need_free_memory, addr_list, total_size, size_list); +} + +void GPUKernelRuntime::AllocCommunicationOpMemory(bool is_need_alloc_memory, bool is_need_free_memory, + const DeviceAddressPtrList addr_list, size_t total_size, + std::vector size_list) { + if (!is_need_alloc_memory) { + return; + } + if (is_need_free_memory) { + for (const auto &iter : addr_list) { + MS_EXCEPTION_IF_NULL(iter); + // Free the inputs/outputs of communication kernel which are not released. + if (iter->ptr_ != nullptr) { + mem_manager_->FreeMemFromMemPool(iter); + } + } + } + auto ret = mem_manager_->MallocContinuousMemFromMemPool(addr_list, total_size, size_list); + if (!ret) { + MS_LOG(EXCEPTION) << "Malloc device memory failed."; + } } void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel, @@ -322,6 +351,9 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel, MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr); auto cnode = kernel->cast(); MS_EXCEPTION_IF_NULL(cnode); + if (AnfAlgo::GetCNodeName(kernel) == kAllReduceOpName) { + return; + } // Free the input of kernel by reference count. for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(kernel); ++i) { auto kernel_ref_count_ptr = mem_reuse_util_ptr->GetKernelInputRef(cnode, i); diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h index 33d4b4be70..6f0eefc27a 100644 --- a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h +++ b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h @@ -58,6 +58,9 @@ class GPUKernelRuntime : public KernelRuntime { void AllocCommunicationOpDynamicRes(const session::KernelGraph *graph); void AllocCommunicationOpInputDynamicRes(const mindspore::AnfNodePtr &kernel); void AllocCommunicationOpOutputDynamicRes(const mindspore::AnfNodePtr &kernel); + void AllocCommunicationOpMemory(bool is_need_alloc_memory, bool is_need_free_memory, + const DeviceAddressPtrList addr_list, size_t total_size, + std::vector size_list); void FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel, const AddressPtrList &kernel_workspaces, uint32_t graph_id); std::unordered_map mem_reuse_util_map_; diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc b/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc index 6e81130b9c..9a63921add 100644 --- a/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc +++ b/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc @@ -40,7 +40,7 @@ void GPUMemoryManager::MallocDeviceMemory() { if (context_ptr->enable_dynamic_mem_pool()) { auto device_addr = MallocMemFromMemPool(1); if (!device_addr) { - MS_LOG(ERROR) << "Dynamic memory pool init error."; + MS_LOG(EXCEPTION) << "Dynamic memory pool init error."; } } else { // Need to reserve 20% space for dynamic memory diff --git a/mindspore/ccsrc/device/kernel_adjust.cc b/mindspore/ccsrc/device/kernel_adjust.cc index b557436db9..596cf6790d 100644 --- a/mindspore/ccsrc/device/kernel_adjust.cc +++ b/mindspore/ccsrc/device/kernel_adjust.cc @@ -55,6 +55,24 @@ void KernelAdjust::Reorder(const std::shared_ptr &kernel_g kernel_graph_ptr->set_execution_order(new_order_list); } +void KernelAdjust::ReorderGetNext(const std::shared_ptr &kernel_graph_ptr) { + MS_EXCEPTION_IF_NULL(kernel_graph_ptr); + const std::vector &origin_cnode_list = kernel_graph_ptr->execution_order(); + std::vector getnext_list; + std::vector other_list; + for (const auto &cnode : origin_cnode_list) { + if (AnfAlgo::GetCNodeName(cnode) == kGetNextOpName) { + getnext_list.emplace_back(cnode); + } else { + other_list.emplace_back(cnode); + } + } + std::vector new_order_list; + new_order_list.insert(new_order_list.end(), getnext_list.begin(), getnext_list.end()); + new_order_list.insert(new_order_list.end(), other_list.begin(), other_list.end()); + kernel_graph_ptr->set_execution_order(new_order_list); +} + bool KernelAdjust::NeedInsertSwitch() { auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); @@ -124,6 +142,7 @@ void KernelAdjust::InsertSwitchLoop(const std::shared_ptr return; } MS_EXCEPTION_IF_NULL(kernel_graph_ptr); + ReorderGetNext(kernel_graph_ptr); std::map switch_loop_input; CreateSwitchOpParameters(kernel_graph_ptr, &switch_loop_input); @@ -464,10 +483,13 @@ void KernelAdjust::InsertProfilingKernel(const ProfilingTraceInfo &profiling_tra } std::vector new_cnode_list; std::vector cnode_ptr_list = kernel_graph_ptr->execution_order(); + if (cnode_ptr_list.empty()) { + MS_LOG(ERROR) << "No CNode in graph"; + return; + } for (const auto &cnode_ptr : cnode_ptr_list) { ProfilingUtils::ProfilingTraceFpStart(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list)); new_cnode_list.emplace_back(cnode_ptr); - ProfilingUtils::ProfilingCustomOp(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list)); ProfilingUtils::ProfilingTraceBpEnd(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list)); ProfilingUtils::ProfilingTraceEnd(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list)); diff --git a/mindspore/ccsrc/device/kernel_adjust.h b/mindspore/ccsrc/device/kernel_adjust.h index 3dced257c1..4c69641a34 100644 --- a/mindspore/ccsrc/device/kernel_adjust.h +++ b/mindspore/ccsrc/device/kernel_adjust.h @@ -63,6 +63,7 @@ class KernelAdjust { KernelAdjust() = default; ~KernelAdjust() = default; + void ReorderGetNext(const std::shared_ptr &kernel_graph_ptr); CNodePtr CreateRecvApplyKernel(const std::shared_ptr &graph_ptr, uint32_t event_id); CNodePtr CreateSendApplyKernel(const std::shared_ptr &graph_ptr, uint32_t event_id); uint32_t FindFirstStreamSwitchLabel(const std::shared_ptr &kernel_graph_ptr); diff --git a/mindspore/ccsrc/device/kernel_runtime.cc b/mindspore/ccsrc/device/kernel_runtime.cc index d1a068b584..e77d348630 100644 --- a/mindspore/ccsrc/device/kernel_runtime.cc +++ b/mindspore/ccsrc/device/kernel_runtime.cc @@ -180,7 +180,10 @@ void KernelRuntime::RunOpAssignInputMemory(const std::vector auto device_address = CreateDeviceAddress(nullptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id); MS_EXCEPTION_IF_NULL(device_address); - mem_manager_->MallocMemFromMemPool(device_address, tensor_size); + auto ret = mem_manager_->MallocMemFromMemPool(device_address, tensor_size); + if (!ret) { + MS_LOG(EXCEPTION) << "Malloc device memory failed."; + } AnfAlgo::SetOutputAddr(device_address, index, item.get()); } } @@ -198,6 +201,7 @@ void KernelRuntime::RunOpAssignOutputMemory(const AnfNodePtr &kernel) { if (AnfAlgo::GetCNodeName(kernel) == "ApplyMomentum") { auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, 0); AnfAlgo::SetOutputAddr(device_address, 0, kernel.get()); + AnfAlgo::SetOutputAddr(device_address, 1, kernel.get()); return; } @@ -209,7 +213,10 @@ void KernelRuntime::RunOpAssignOutputMemory(const AnfNodePtr &kernel) { auto output_type = AnfAlgo::GetOutputDeviceDataType(kernel, i); auto device_address = CreateDeviceAddress(nullptr, output_sizes[i], output_format, output_type); MS_EXCEPTION_IF_NULL(device_address); - mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]); + auto ret = mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]); + if (!ret) { + MS_LOG(EXCEPTION) << "Malloc device memory failed."; + } AnfAlgo::SetOutputAddr(device_address, i, kernel.get()); } } @@ -224,7 +231,10 @@ void KernelRuntime::RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel) { for (size_t i = 0; i < workspace_lists.size(); ++i) { auto device_address = CreateDeviceAddress(nullptr, workspace_lists[i], "", kTypeUnknown); MS_EXCEPTION_IF_NULL(device_address); - mem_manager_->MallocMemFromMemPool(device_address, workspace_lists[i]); + auto ret = mem_manager_->MallocMemFromMemPool(device_address, workspace_lists[i]); + if (!ret) { + MS_LOG(EXCEPTION) << "Malloc device memory failed."; + } AnfAlgo::SetWorkspaceAddr(device_address, i, kernel.get()); } } diff --git a/mindspore/ccsrc/device/memory_manager.cc b/mindspore/ccsrc/device/memory_manager.cc index dce54495b0..d2a38038c6 100644 --- a/mindspore/ccsrc/device/memory_manager.cc +++ b/mindspore/ccsrc/device/memory_manager.cc @@ -141,11 +141,14 @@ uint8_t *MemoryManager::MallocDynamicMem(size_t size, bool communication_mem) { } } -void MemoryManager::MallocMemFromMemPool(const DeviceAddressPtr address, size_t size) { +bool MemoryManager::MallocMemFromMemPool(const DeviceAddressPtr address, size_t size) { auto device_ptr = MallocMemFromMemPool(size); - MS_EXCEPTION_IF_NULL(device_ptr); + if (!device_ptr) { + return false; + } address->ptr_ = device_ptr; address->from_mem_pool_ = true; + return true; } void *MemoryManager::MallocMemFromMemPool(size_t size) { @@ -168,11 +171,14 @@ void MemoryManager::FreeMemFromMemPool(void *device_ptr) { } } -void MemoryManager::MallocContinuousMemFromMemPool(const DeviceAddressPtrList addr_list, size_t total_size, +bool MemoryManager::MallocContinuousMemFromMemPool(const DeviceAddressPtrList addr_list, size_t total_size, std::vector size_list) { auto device_ptr_list = MallocContinuousMemFromMemPool(total_size, size_list); + if (device_ptr_list.size() == 0) { + return false; + } if (addr_list.size() != device_ptr_list.size()) { - MS_LOG(EXCEPTION) << "The size of device list is not equal to the size of address list."; + MS_LOG(EXCEPTION) << "The size of device list is not equal to the size of address list."; } for (size_t i = 0; i < addr_list.size(); i++) { MS_EXCEPTION_IF_NULL(device_ptr_list[i]); @@ -180,6 +186,7 @@ void MemoryManager::MallocContinuousMemFromMemPool(const DeviceAddressPtrList ad addr_list[i]->ptr_ = device_ptr_list[i]; addr_list[i]->from_mem_pool_ = true; } + return true; } std::vector MemoryManager::MallocContinuousMemFromMemPool(size_t total_size, std::vector size_list) { diff --git a/mindspore/ccsrc/device/memory_manager.h b/mindspore/ccsrc/device/memory_manager.h index dae0861506..be250e0f3f 100644 --- a/mindspore/ccsrc/device/memory_manager.h +++ b/mindspore/ccsrc/device/memory_manager.h @@ -46,11 +46,11 @@ class MemoryManager { uint8_t *MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, int flag, size_t size); virtual uint8_t *MallocMem(int flag, size_t size); - virtual void MallocMemFromMemPool(const DeviceAddressPtr address, size_t size); + virtual bool MallocMemFromMemPool(const DeviceAddressPtr address, size_t size); virtual void *MallocMemFromMemPool(size_t size); virtual void FreeMemFromMemPool(const DeviceAddressPtr address); virtual void FreeMemFromMemPool(void *device_ptr); - virtual void MallocContinuousMemFromMemPool(const DeviceAddressPtrList addr_list, size_t total_size, + virtual bool MallocContinuousMemFromMemPool(const DeviceAddressPtrList addr_list, size_t total_size, std::vector size_list); virtual std::vector MallocContinuousMemFromMemPool(size_t total_size, std::vector size_list); diff --git a/mindspore/ccsrc/gvar/CMakeLists.txt b/mindspore/ccsrc/gvar/CMakeLists.txt new file mode 100644 index 0000000000..552ba742f1 --- /dev/null +++ b/mindspore/ccsrc/gvar/CMakeLists.txt @@ -0,0 +1,5 @@ +file(GLOB_RECURSE MS_GVAR_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cc) +add_library(mindspore_gvar SHARED ${MS_GVAR_SRC_LIST}) +if (APPLE) + set_target_properties(mindspore_gvar PROPERTIES MACOSX_RPATH ON) +endif () diff --git a/mindspore/ccsrc/ir/CMakeLists.txt b/mindspore/ccsrc/ir/CMakeLists.txt index 278ad492e2..2b17eecbed 100644 --- a/mindspore/ccsrc/ir/CMakeLists.txt +++ b/mindspore/ccsrc/ir/CMakeLists.txt @@ -1,5 +1,2 @@ -file(GLOB_RECURSE _IR_ALL_SRC_FILES - ./*.cc - dtype/*.cc) - -add_library(_mindspore_ir_obj OBJECT ${_IR_ALL_SRC_FILES}) \ No newline at end of file +file(GLOB_RECURSE _IR_SRC_LIST ./*.cc dtype/*.cc) +add_library(_mindspore_ir_obj OBJECT ${_IR_SRC_LIST}) diff --git a/mindspore/ccsrc/ir/dtype.cc b/mindspore/ccsrc/ir/dtype.cc index 97291a3dc0..968ee9a524 100644 --- a/mindspore/ccsrc/ir/dtype.cc +++ b/mindspore/ccsrc/ir/dtype.cc @@ -345,7 +345,7 @@ TypePtr StringToNumberType(const std::string &type_name, const std::string &num_ auto bits = std::stoi(type_name.substr(num_type_name.size())); type = std::make_shared(bits); } catch (const std::exception &e) { - MS_LOG(EXCEPTION) << "" << num_type_name << " convert from string error " << e.what(); + MS_LOG(EXCEPTION) << num_type_name << " convert from string error " << e.what(); } } return type; @@ -389,7 +389,7 @@ TypePtr TensorStrToType(const std::string &type_name) { } type = std::make_shared(element_type); } catch (const std::exception &e) { - MS_LOG(EXCEPTION) << "" << type_name << " convert from string error " << e.what(); + MS_LOG(EXCEPTION) << type_name << " convert from string error " << e.what(); } } @@ -416,7 +416,7 @@ TypePtr ListStrToType(const std::string &type_name) { } type = std::make_shared(element_types); } catch (const std::exception &e) { - MS_LOG(EXCEPTION) << "" << type_name << " convert from string error " << e.what(); + MS_LOG(EXCEPTION) << type_name << " convert from string error " << e.what(); } } @@ -443,7 +443,7 @@ TypePtr TupleStrToType(const std::string &type_name) { } type = std::make_shared(element_types); } catch (const std::exception &e) { - MS_LOG(EXCEPTION) << "" << type_name << " convert from string error " << e.what(); + MS_LOG(EXCEPTION) << type_name << " convert from string error " << e.what(); } } return type; @@ -484,7 +484,7 @@ TypePtr FunctionStrToType(const std::string &type_name) { } type = std::make_shared(args_type, retval); } catch (const std::exception &e) { - MS_LOG(EXCEPTION) << "" << type_name << " convert from string error " << e.what(); + MS_LOG(EXCEPTION) << type_name << " convert from string error " << e.what(); } } return type; @@ -695,6 +695,7 @@ REGISTER_PYBIND_DEFINE( (void)py::class_>(m_sub, "String").def(py::init()); (void)py::class_>(m_sub, "RefKeyType").def(py::init()); (void)py::class_>(m_sub, "RefType").def(py::init()); + (void)py::class_>(m_sub, "TypeAnything").def(py::init()); })); const TypePtr kTypeExternal = std::make_shared(); diff --git a/mindspore/ccsrc/ir/dtype/type.cc b/mindspore/ccsrc/ir/dtype/type.cc index 30bf0c8e3f..56954495df 100644 --- a/mindspore/ccsrc/ir/dtype/type.cc +++ b/mindspore/ccsrc/ir/dtype/type.cc @@ -87,6 +87,12 @@ const char *MetaIdLabel(const TypeId &v) { return "kMetaTypeExternal"; case kMetaTypeNone: return "kMetaTypeNone"; + case kMetaTypeNull: + return "kMetaTypeNull"; + case kMetaTypeEllipsis: + return "kMetaTypeEllipsis"; + case kMetaTypeEnd: + return "kMetaTypeEnd"; default: return "[Unknown Type Id]"; } diff --git a/mindspore/ccsrc/ir/func_graph.cc b/mindspore/ccsrc/ir/func_graph.cc index 8a58f320f1..40417a33da 100644 --- a/mindspore/ccsrc/ir/func_graph.cc +++ b/mindspore/ccsrc/ir/func_graph.cc @@ -263,18 +263,15 @@ const FuncGraphSet &FuncGraph::func_graphs_used_total() { return used; } -const FuncGraphCounterMap &FuncGraph::func_graph_users() { - auto mng = manager_.lock(); - MS_EXCEPTION_IF_NULL(mng); - auto &users = mng->func_graph_users(); - return users[shared_from_base()]; -} - -const AnfNodeCounterMap &FuncGraph::func_graph_user_cnodes() { +const CNodeIndexCounterMap &FuncGraph::func_graph_cnodes_index() { auto mng = manager_.lock(); + if (mng == nullptr) { + MS_LOG(EXCEPTION) << "BUG: no manager for this func graph: " << ToString() + << " NodeInfo: " << trace::GetDebugInfo(debug_info()); + } MS_EXCEPTION_IF_NULL(mng); - auto &users = mng->func_graph_user_cnodes(); - return users[shared_from_base()]; + auto &cnode = mng->func_graph_cnodes_index(); + return cnode[shared_from_base()]; } FuncGraphPtr FuncGraph::parent() { diff --git a/mindspore/ccsrc/ir/func_graph.h b/mindspore/ccsrc/ir/func_graph.h index 9c3752cd81..bca5759807 100644 --- a/mindspore/ccsrc/ir/func_graph.h +++ b/mindspore/ccsrc/ir/func_graph.h @@ -37,6 +37,7 @@ namespace mindspore { using BaseRefCounterMap = OrderedMap; using FuncGraphCounterMap = OrderedMap; using AnfNodeCounterMap = OrderedMap; +using CNodeIndexCounterMap = OrderedMap; const char FUNC_GRAPH_FLAG_IGNORE_VALUES[] = "ignore_values"; const char FUNC_GRAPH_FLAG_DEFER_INLINE[] = "defer_inline"; @@ -203,11 +204,8 @@ class FuncGraph : public FuncGraphBase { // get all func graphs nested used by this func graph const FuncGraphSet &func_graphs_used_total(); - // get all users of this func graph - const FuncGraphCounterMap &func_graph_users(); - - // get all user cnodes of this func graph - const AnfNodeCounterMap &func_graph_user_cnodes(); + // get all user value nodes of this func graph + const CNodeIndexCounterMap &func_graph_cnodes_index(); // Return the parent of this graph. FuncGraphPtr parent(); diff --git a/mindspore/ccsrc/ir/func_graph_cloner.cc b/mindspore/ccsrc/ir/func_graph_cloner.cc index c086b8d7d1..c8012276f1 100644 --- a/mindspore/ccsrc/ir/func_graph_cloner.cc +++ b/mindspore/ccsrc/ir/func_graph_cloner.cc @@ -182,9 +182,11 @@ void Cloner::CloneFuncGraphValueNodes(const FuncGraphPtr &func_graph, const Func } target_func_graph->set_return(return_node); - auto &value_nodes = manager_->func_graph_valuenodes()[func_graph]; - for (auto &value_node : value_nodes) { - CloneValueNode(value_node.first, target_func_graph); + auto &cnodes = manager_->func_graph_cnodes_index()[func_graph]; + for (auto &cnode : cnodes) { + auto parent = cnode.first->first->cast(); + auto valuenode = parent->input(cnode.first->second); + CloneValueNode(valuenode, target_func_graph); } } @@ -386,8 +388,8 @@ void Cloner::LiftParameters(const FuncGraphPtr &func_graph_user, const FuncGraph if (lift_params.empty()) { return; } - for (auto &user : func_graph_user->func_graph_users()) { - LiftParameters(user.first, func_graph_user, lift_params); + for (auto &cnode : func_graph_user->func_graph_cnodes_index()) { + LiftParameters(cnode.first->first->func_graph(), func_graph_user, lift_params); } } @@ -395,8 +397,8 @@ void Cloner::Lift() { for (auto &func_graph_params : repl_func_graph_params_) { auto &func_graph = func_graph_params.first; auto ¶ms = func_graph_params.second; - for (auto &user : func_graph->func_graph_users()) { - LiftParameters(user.first, func_graph, params); + for (auto &cnode : func_graph->func_graph_cnodes_index()) { + LiftParameters(cnode.first->first->func_graph(), func_graph, params); } } } diff --git a/mindspore/ccsrc/ir/func_graph_cloner.h b/mindspore/ccsrc/ir/func_graph_cloner.h index 426cf447a3..10b4b0111e 100644 --- a/mindspore/ccsrc/ir/func_graph_cloner.h +++ b/mindspore/ccsrc/ir/func_graph_cloner.h @@ -59,7 +59,7 @@ class Cloner { // Map of replicate nodes and graphs std::unordered_map *cloned_node() { return &repl_node_; } - std::unordered_map cloned_func_graph() { return repl_func_graph_; } + std::unordered_map &cloned_func_graph() { return repl_func_graph_; } // Scope of cloned graphs void set_scope(const ScopePtr &scope) { scope_ = scope; } diff --git a/mindspore/ccsrc/ir/manager.cc b/mindspore/ccsrc/ir/manager.cc index a53c9e95ae..1ed747eefd 100644 --- a/mindspore/ccsrc/ir/manager.cc +++ b/mindspore/ccsrc/ir/manager.cc @@ -78,13 +78,16 @@ void FuncGraphManager::Reset() { node_users_ = NodeUsersMap(); signals_ = std::make_shared(); + // FuncGraph --> AnfNode nodes_ = std::make_shared(this); + + // FuncGraph --> {AnfNode, Count} valuenodes_ = std::make_shared(this); free_variables_direct_ = std::make_shared(this); - func_graph_valuenodes_ = std::make_shared(this); + func_graph_cnodes_index_ = std::make_shared(this); + + // FuncGraph --> {FuncGraph, Count} func_graphs_used_ = std::make_shared(this); - func_graph_users_ = std::make_shared(this); - func_graph_user_cnodes_ = std::make_shared(this); func_graph_child_direct_ = std::make_shared(this); func_graph_parents_direct_ = std::make_shared(this); func_graph_j_direct_ = std::make_shared(this); @@ -300,9 +303,9 @@ void FuncGraphManager::MaybeDropFuncGraphs(const FuncGraphSet &func_graphs, bool MS_LOG(DEBUG) << "Cannot drop as roots contains func graph: " << func_graph->ToString(); continue; } - MS_EXCEPTION_IF_NULL(func_graph_users_); - auto &users = func_graph_users_->count_func_graphs_map()[func_graph]; - if (!users.empty() && !ignore_users) { + MS_EXCEPTION_IF_NULL(func_graph_cnodes_index_); + auto &users_cnode_index = func_graph_cnodes_index_->count_nodes_map()[func_graph]; + if (!users_cnode_index.empty() && !ignore_users) { MS_LOG(DEBUG) << "Cannot drop as users not empty: " << func_graph->ToString(); continue; } @@ -472,10 +475,6 @@ void FuncGraphManager::MoveAllCNodeDropGraph(FuncGraphPtr source, FuncGraphPtr t node->set_scope(scope); } } - for (auto &used : source->func_graphs_used()) { - (void)func_graph_users_->Inc(used.first, target, used.second); - (void)this->func_graph_users()[used.first].erase(source); - } for (auto &child : this->func_graph_child_direct()[source]) { (void)func_graph_parents_direct_->Inc(child.first, target, child.second); (void)this->func_graph_parents_direct()[child.first].erase(source); @@ -661,7 +660,9 @@ DepCollector::DepCollector(const FuncGraphManager *const manager) : FuncGraphAna void DepCollector::OnDropEdge(AnfNodePtr node, int index, AnfNodePtr inp) { OnModEdge(node, index, inp, kDecEdge); } -bool CounterAnfNodeCollector::Inc(const FuncGraphPtr &func_graph, const AnfNodePtr &key, int count = 1) { +template +bool CounterAnfNodeCollector::Inc(const FuncGraphPtr &func_graph, + const ValueT &key, int count) { auto &d = count_nodes_map_[func_graph]; if (d.count(key) == 0) { d[key] = count; @@ -672,7 +673,9 @@ bool CounterAnfNodeCollector::Inc(const FuncGraphPtr &func_graph, const AnfNodeP return false; } -bool CounterAnfNodeCollector::Dec(const FuncGraphPtr &func_graph, const AnfNodePtr &key, int count = 1) { +template +bool CounterAnfNodeCollector::Dec(const FuncGraphPtr &func_graph, + const ValueT &key, int count) { MS_EXCEPTION_IF_NULL(func_graph); auto &d = count_nodes_map_[func_graph]; if (d.count(key) != 0) { @@ -682,7 +685,7 @@ bool CounterAnfNodeCollector::Dec(const FuncGraphPtr &func_graph, const AnfNodeP } else { d[key] -= count; if (d[key] < 0) { - MS_LOG(EXCEPTION) << "Count of key '" << key->ToString() + MS_LOG(EXCEPTION) << "Count of key '" << key << "' dec from 0. NodeInfo: " << trace::GetDebugInfo(func_graph->debug_info()); } } @@ -690,52 +693,15 @@ bool CounterAnfNodeCollector::Dec(const FuncGraphPtr &func_graph, const AnfNodeP return false; } -bool CounterAnfNodeCollector::Mod(const FuncGraphPtr &func_graph, const AnfNodePtr &key, int count) { +template +bool CounterAnfNodeCollector::Mod(const FuncGraphPtr &func_graph, + const ValueT &key, int count) { if (count > 0) { return Inc(func_graph, key, count); } else if (count < 0) { return Dec(func_graph, key, -count); } else { - MS_LOG(EXCEPTION) << "Count of key '" << key->ToString() - << "' cannot be 0. NodeInfo: " << trace::GetDebugInfo(func_graph->debug_info()); - } -} - -bool CounterFuncGraphCollector::Inc(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count = 1) { - auto &d = count_func_graphs_map_[func_graph]; - if (d.count(key) == 0) { - d[key] = count; - return true; - } else { - d[key] += count; - } - return false; -} - -bool CounterFuncGraphCollector::Dec(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count = 1) { - auto &d = count_func_graphs_map_[func_graph]; - if (d.count(key) != 0) { - if (d[key] == count) { - (void)d.erase(key); - return true; - } else { - d[key] -= count; - if (d[key] < 0) { - MS_LOG(EXCEPTION) << "Count of key '" << key->ToString() - << "' dec from 0. NodeInfo: " << trace::GetDebugInfo(func_graph->debug_info()); - } - } - } - return false; -} - -bool CounterFuncGraphCollector::Mod(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count) { - if (count > 0) { - return Inc(func_graph, key, count); - } else if (count < 0) { - return Dec(func_graph, key, -count); - } else { - MS_LOG(EXCEPTION) << "Count of key '" << key->ToString() + MS_LOG(EXCEPTION) << "Count of key '" << key << "' cannot be 0. NodeInfo: " << trace::GetDebugInfo(func_graph->debug_info()); } } @@ -754,16 +720,21 @@ void ValueNodesCollector::OnMoveAllCNode(FuncGraphPtr src, FuncGraphPtr dst) { (void)count_nodes_map_.erase(src); } -// if inp is a graph ValueNode, this graph's FuncGraphValueNodesCollector's value is inp self -void FuncGraphValueNodesCollector::OnModEdge(AnfNodePtr, int, AnfNodePtr inp, EdgeProcessDirection direction) { +void FuncGraphUsersCNodeIndexCollector::OnModEdge(AnfNodePtr node, int index, AnfNodePtr inp, + EdgeProcessDirection direction) { + MS_EXCEPTION_IF_NULL(node); if (IsValueNode(inp)) { - (void)Mod(GetValueNode(inp), inp, direction); + (void)Mod(GetValueNode(inp), std::make_shared(std::make_pair(node, index)), + direction); } } -void FuncGraphValueNodesCollector::OnMoveAllCNode(FuncGraphPtr src, FuncGraphPtr dst) { +void FuncGraphUsersCNodeIndexCollector::OnMoveAllCNode(FuncGraphPtr src, FuncGraphPtr dst) { for (auto &it : count_nodes_map_[src]) { - (void)Inc(dst, it.first, it.second); + // Ignore the user graph who may own itself. + if (dst != it.first->first->func_graph()) { + (void)Inc(dst, it.first, it.second); + } } (void)count_nodes_map_.erase(src); } @@ -794,6 +765,45 @@ static FuncGraphPtr ParentProxy(const FuncGraphPtr &fg) { return gn; } +bool CounterFuncGraphCollector::Inc(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count = 1) { + auto &d = count_func_graphs_map_[func_graph]; + if (d.count(key) == 0) { + d[key] = count; + return true; + } else { + d[key] += count; + } + return false; +} + +bool CounterFuncGraphCollector::Dec(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count = 1) { + auto &d = count_func_graphs_map_[func_graph]; + if (d.count(key) != 0) { + if (d[key] == count) { + (void)d.erase(key); + return true; + } else { + d[key] -= count; + if (d[key] < 0) { + MS_LOG(EXCEPTION) << "Count of key '" << key->ToString() + << "' dec from 0. NodeInfo: " << trace::GetDebugInfo(func_graph->debug_info()); + } + } + } + return false; +} + +bool CounterFuncGraphCollector::Mod(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count) { + if (count > 0) { + return Inc(func_graph, key, count); + } else if (count < 0) { + return Dec(func_graph, key, -count); + } else { + MS_LOG(EXCEPTION) << "Count of key '" << key->ToString() + << "' cannot be 0. NodeInfo: " << trace::GetDebugInfo(func_graph->debug_info()); + } +} + void FuncGraphChildDirect::OnModEdge(AnfNodePtr node, int, AnfNodePtr inp, EdgeProcessDirection direction) { MS_EXCEPTION_IF_NULL(node); MS_EXCEPTION_IF_NULL(inp); @@ -859,36 +869,10 @@ void FuncGraphsUsedCollector::OnMoveAllCNode(FuncGraphPtr src, FuncGraphPtr dst) (void)count_func_graphs_map_.erase(src); } -void FuncGraphUsersCollector::OnModEdge(AnfNodePtr node, int, AnfNodePtr inp, EdgeProcessDirection direction) { - MS_EXCEPTION_IF_NULL(node); - if (IsValueNode(inp)) { - (void)Mod(GetValueNode(inp), node->func_graph(), direction); - } -} - -void FuncGraphUsersCollector::OnMoveAllCNode(FuncGraphPtr src, FuncGraphPtr) { - // all graph use in src need to change to dst, so add dst user - (void)count_func_graphs_map_.erase(src); -} - -void FuncGraphUserNodesCollector::OnModEdge(AnfNodePtr node, int, AnfNodePtr inp, EdgeProcessDirection direction) { - MS_EXCEPTION_IF_NULL(node); - if (IsValueNode(inp)) { - (void)Mod(GetValueNode(inp), node, direction); - } -} - -void FuncGraphUserNodesCollector::OnMoveAllCNode(FuncGraphPtr src, FuncGraphPtr dst) { - for (auto &it : count_nodes_map_[src]) { - (void)Inc(dst, it.first, it.second); - } - (void)count_nodes_map_.erase(src); -} - void FuncGraphJDirectCollector::OnModEdge(AnfNodePtr node, int, AnfNodePtr inp, EdgeProcessDirection direction) { if (IsValueNode(inp) && IsPrimitiveCNode(node, prim::kPrimJ)) { (void)Mod(node->func_graph(), GetValueNode(inp), direction); - MS_LOG(DEBUG) << "" << node->func_graph()->ToString() << " users func graph " + MS_LOG(DEBUG) << node->func_graph()->ToString() << " users func graph " << GetValueNode(inp)->ToString() << " which contains J(func_graph), dir: " << direction; } } @@ -945,7 +929,7 @@ FuncGraphSetPtr FuncGraphParentsTotalComputer::SeekParents(const FuncGraphPtr &f void FuncGraphParentsTotalComputer::RealRecompute(FuncGraphPtr fg) { MS_EXCEPTION_IF_NULL(fg); all_parents_direct_ = &(manager_->func_graph_parents_direct()); - MS_LOG(DEBUG) << "" << fg->ToString() << " total func graph dep size:" << (*all_parents_direct_)[fg].size(); + MS_LOG(DEBUG) << fg->ToString() << " total func graph dep size:" << (*all_parents_direct_)[fg].size(); func_graph_parents_total_analysis_[fg].update(SeekParents(fg)); MS_LOG(DEBUG) << "FuncGraphParentsTotalComputer end: " << func_graph_parents_total_analysis_[fg].size(); } @@ -1074,7 +1058,7 @@ void FuncGraphsUsedTotalComputer::RealRecompute(FuncGraphPtr fg) { if (func_graph_used_total_analysis_[fg].count(used_fg) == 0) { todo_new.push_back(used_fg); } - MS_LOG(DEBUG) << "" << fg->ToString() << " add func graph " << used_fg->ToString(); + MS_LOG(DEBUG) << fg->ToString() << " add func graph " << used_fg->ToString(); func_graph_used_total_analysis_[fg].add(used_fg); } } @@ -1138,7 +1122,7 @@ void RecursiveComputer::CheckRecursiveGraphs(const FuncGraphPtr &fg, std::listcontains(fg)) { - MS_LOG(DEBUG) << "" << fg->ToString() << " had been checked"; + MS_LOG(DEBUG) << fg->ToString() << " had been checked"; return false; } MS_EXCEPTION_IF_NULL(manager_); @@ -1149,7 +1133,7 @@ bool FuncGraphJTotalComputer::SeekJ(const FuncGraphPtr &fg, const FuncGraphSetPt std::find_if(func_graph_counter_map[fg].begin(), func_graph_counter_map[fg].end(), [path](const std::pair iter) { return !path->contains(iter.first); }); if (contains_j != func_graph_counter_map[fg].end()) { - MS_LOG(DEBUG) << "" << fg->ToString() << " contains J(" << contains_j->first->ToString() << ")"; + MS_LOG(DEBUG) << fg->ToString() << " contains J(" << contains_j->first->ToString() << ")"; return true; } } @@ -1160,12 +1144,11 @@ bool FuncGraphJTotalComputer::SeekJ(const FuncGraphPtr &fg, const FuncGraphSetPt for (auto &item : used[fg]) { auto used_g = item.first; if (SeekJ(used_g, path)) { - MS_LOG(DEBUG) << "" << fg->ToString() << " users func graph " << used_g->ToString() - << " which contains J(func_graph)"; + MS_LOG(DEBUG) << fg->ToString() << " users func graph " << used_g->ToString() << " which contains J(func_graph)"; return true; } } - MS_LOG(DEBUG) << "" << fg->ToString() << " doesn't contain J(func_graph)"; + MS_LOG(DEBUG) << fg->ToString() << " doesn't contain J(func_graph)"; return false; } diff --git a/mindspore/ccsrc/ir/manager.h b/mindspore/ccsrc/ir/manager.h index 54c1e8a692..7f36b53205 100644 --- a/mindspore/ccsrc/ir/manager.h +++ b/mindspore/ccsrc/ir/manager.h @@ -100,8 +100,12 @@ struct Signals { enum EdgeProcessDirection { kDecEdge = -1, kIncEdge = 1 }; +using CNodeIndexPair = std::pair; +using CNodeIndexPairPtr = std::shared_ptr; + using FuncGraphToFuncGraphCounterMap = OrderedMap>; -using FuncGraphToAnfNodeCounterMap = OrderedMap>; +template , class CollectorEqual = std::equal_to> +using FuncGraphToAnfNodeCounterMap = OrderedMap>; // analysis base class class FuncGraphAnalysis { @@ -174,46 +178,56 @@ class NodesCollector final : public DepCollector { void OnDropNode(AnfNodePtr n) override; }; -class CounterFuncGraphCollector : public DepCollector { - public: - explicit CounterFuncGraphCollector(const FuncGraphManager *m) : DepCollector(m) {} - ~CounterFuncGraphCollector() override = default; - FuncGraphToFuncGraphCounterMap &count_func_graphs_map() { return count_func_graphs_map_; } - // inherit from FuncGraphAnalysis - size_t size() const override { return count_func_graphs_map_.size(); } - void OnAddFuncGraph(FuncGraphPtr fg) final { count_func_graphs_map_[fg] = OrderedMap(); } - void OnDropFuncGraph(FuncGraphPtr fg) final { (void)count_func_graphs_map_.erase(fg); } - bool Inc(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count); - bool Dec(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count); - bool Mod(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count); - - FuncGraphToFuncGraphCounterMap count_func_graphs_map_; +struct CNodeIndexHasher { + std::size_t operator()(const CNodeIndexPairPtr pair) const { + MS_EXCEPTION_IF_NULL(pair); + MS_EXCEPTION_IF_NULL(pair->first); + return hash_combine(pair->first->hash(), std::hash()(pair->second)); + } +}; - protected: - void ExtraReset() override { count_func_graphs_map_.clear(); } +struct CNodeIndexEqual { + bool operator()(const CNodeIndexPairPtr lhs, const CNodeIndexPairPtr rhs) const { + if (lhs == nullptr || rhs == nullptr) { + return false; + } + if (lhs == rhs) { + return true; + } + if (lhs->first != rhs->first) { + return false; + } + if (lhs->second != rhs->second) { + return false; + } + return true; + } }; +template , class CollectorEqual = std::equal_to> class CounterAnfNodeCollector : public DepCollector { public: explicit CounterAnfNodeCollector(const FuncGraphManager *m) : DepCollector(m) {} ~CounterAnfNodeCollector() override = default; - FuncGraphToAnfNodeCounterMap &count_nodes_map() { return count_nodes_map_; } + FuncGraphToAnfNodeCounterMap &count_nodes_map() { return count_nodes_map_; } size_t size() const override { return count_nodes_map_.size(); } - void OnAddFuncGraph(FuncGraphPtr fg) final { count_nodes_map_[fg] = OrderedMap(); } + void OnAddFuncGraph(FuncGraphPtr fg) final { + count_nodes_map_[fg] = OrderedMap(); + } void OnDropFuncGraph(FuncGraphPtr fg) final { (void)count_nodes_map_.erase(fg); } - bool Inc(const FuncGraphPtr &func_graph, const AnfNodePtr &key, int count); - bool Dec(const FuncGraphPtr &func_graph, const AnfNodePtr &key, int count); - bool Mod(const FuncGraphPtr &func_graph, const AnfNodePtr &key, int count); + bool Inc(const FuncGraphPtr &func_graph, const ValueT &key, int count); + bool Dec(const FuncGraphPtr &func_graph, const ValueT &key, int count); + bool Mod(const FuncGraphPtr &func_graph, const ValueT &key, int count); - FuncGraphToAnfNodeCounterMap count_nodes_map_; + FuncGraphToAnfNodeCounterMap count_nodes_map_; protected: void ExtraReset() override { count_nodes_map_.clear(); } }; -class ValueNodesCollector final : public CounterAnfNodeCollector { +class ValueNodesCollector final : public CounterAnfNodeCollector { public: explicit ValueNodesCollector(const FuncGraphManager *m) : CounterAnfNodeCollector(m) {} ~ValueNodesCollector() override = default; @@ -223,17 +237,19 @@ class ValueNodesCollector final : public CounterAnfNodeCollector { void OnModEdge(AnfNodePtr node, int index, AnfNodePtr inp, EdgeProcessDirection direction) override; }; -class FuncGraphValueNodesCollector final : public CounterAnfNodeCollector { +// Record the CNode and its input index, who points to the function graph. +class FuncGraphUsersCNodeIndexCollector final + : public CounterAnfNodeCollector { public: - explicit FuncGraphValueNodesCollector(const FuncGraphManager *m) : CounterAnfNodeCollector(m) {} - ~FuncGraphValueNodesCollector() override = default; + explicit FuncGraphUsersCNodeIndexCollector(const FuncGraphManager *m) : CounterAnfNodeCollector(m) {} + ~FuncGraphUsersCNodeIndexCollector() override = default; void OnMoveAllCNode(FuncGraphPtr src, FuncGraphPtr dst) override; protected: void OnModEdge(AnfNodePtr node, int index, AnfNodePtr inp, EdgeProcessDirection direction) override; }; -class FVDirectCollector final : public CounterAnfNodeCollector { +class FVDirectCollector final : public CounterAnfNodeCollector { public: explicit FVDirectCollector(const FuncGraphManager *m) : CounterAnfNodeCollector(m) {} ~FVDirectCollector() override = default; @@ -243,6 +259,25 @@ class FVDirectCollector final : public CounterAnfNodeCollector { void OnModEdge(AnfNodePtr node, int index, AnfNodePtr inp, EdgeProcessDirection direction) override; }; +class CounterFuncGraphCollector : public DepCollector { + public: + explicit CounterFuncGraphCollector(const FuncGraphManager *m) : DepCollector(m) {} + ~CounterFuncGraphCollector() override = default; + FuncGraphToFuncGraphCounterMap &count_func_graphs_map() { return count_func_graphs_map_; } + // inherit from FuncGraphAnalysis + size_t size() const override { return count_func_graphs_map_.size(); } + void OnAddFuncGraph(FuncGraphPtr fg) final { count_func_graphs_map_[fg] = OrderedMap(); } + void OnDropFuncGraph(FuncGraphPtr fg) final { (void)count_func_graphs_map_.erase(fg); } + bool Inc(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count); + bool Dec(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count); + bool Mod(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count); + + FuncGraphToFuncGraphCounterMap count_func_graphs_map_; + + protected: + void ExtraReset() override { count_func_graphs_map_.clear(); } +}; + class FuncGraphChildDirect final : public CounterFuncGraphCollector { public: explicit FuncGraphChildDirect(const FuncGraphManager *m) : CounterFuncGraphCollector(m) {} @@ -279,28 +314,6 @@ class FuncGraphsUsedCollector final : public CounterFuncGraphCollector { void OnModEdge(AnfNodePtr node, int index, AnfNodePtr inp, EdgeProcessDirection direction) override; }; -// graph's all user graphs: key is g, value is graphs who used g -class FuncGraphUsersCollector final : public CounterFuncGraphCollector { - public: - explicit FuncGraphUsersCollector(const FuncGraphManager *m) : CounterFuncGraphCollector(m) {} - void OnMoveAllCNode(FuncGraphPtr src, FuncGraphPtr dst) override; - ~FuncGraphUsersCollector() override = default; - - protected: - void OnModEdge(AnfNodePtr node, int index, AnfNodePtr inp, EdgeProcessDirection direction) override; -}; - -// graph's all user cnodes: key is g, value is cnodes who used g -class FuncGraphUserNodesCollector final : public CounterAnfNodeCollector { - public: - explicit FuncGraphUserNodesCollector(const FuncGraphManager *m) : CounterAnfNodeCollector(m) {} - void OnMoveAllCNode(FuncGraphPtr src, FuncGraphPtr dst) override; - ~FuncGraphUserNodesCollector() override = default; - - protected: - void OnModEdge(AnfNodePtr node, int index, AnfNodePtr inp, EdgeProcessDirection direction) override; -}; - class FuncGraphJDirectCollector final : public CounterFuncGraphCollector { public: explicit FuncGraphJDirectCollector(const FuncGraphManager *m) : CounterFuncGraphCollector(m) {} @@ -433,7 +446,9 @@ class ScopeComputer final : public DepComputer { using FVTotalMap = OrderedMap>; -class FVTotalComputer final : public DepComputer, public CounterAnfNodeCollector, public CounterFuncGraphCollector { +class FVTotalComputer final : public DepComputer, + public CounterAnfNodeCollector, + public CounterFuncGraphCollector { public: explicit FVTotalComputer(const FuncGraphManager *m) : DepComputer(m), CounterAnfNodeCollector(m), CounterFuncGraphCollector(m) {} @@ -549,18 +564,18 @@ class FuncGraphManager : public std::enable_shared_from_this { FuncGraphToAnfNodeMap &nodes() const { return nodes_->nodes_analysis_; } - FuncGraphToAnfNodeCounterMap &valuenodes() const { return valuenodes_->count_nodes_map_; } + FuncGraphToAnfNodeCounterMap &valuenodes() const { return valuenodes_->count_nodes_map_; } - FuncGraphToAnfNodeCounterMap &free_variables_direct() const { return free_variables_direct_->count_nodes_map_; } + FuncGraphToAnfNodeCounterMap &free_variables_direct() const { + return free_variables_direct_->count_nodes_map_; + } - FuncGraphToAnfNodeCounterMap &func_graph_valuenodes() const { return func_graph_valuenodes_->count_nodes_map_; } + FuncGraphToAnfNodeCounterMap &func_graph_cnodes_index() const { + return func_graph_cnodes_index_->count_nodes_map_; + } FuncGraphToFuncGraphCounterMap &func_graphs_used() const { return func_graphs_used_->count_func_graphs_map_; } - FuncGraphToFuncGraphCounterMap &func_graph_users() const { return func_graph_users_->count_func_graphs_map_; } - - FuncGraphToAnfNodeCounterMap &func_graph_user_cnodes() const { return func_graph_user_cnodes_->count_nodes_map_; } - FuncGraphToFuncGraphCounterMap &func_graph_child_direct() const { return func_graph_child_direct_->count_func_graphs_map_; } @@ -598,10 +613,8 @@ class FuncGraphManager : public std::enable_shared_from_this { std::shared_ptr nodes_; std::shared_ptr valuenodes_; std::shared_ptr free_variables_direct_; - std::shared_ptr func_graph_valuenodes_; + std::shared_ptr func_graph_cnodes_index_; std::shared_ptr func_graphs_used_; - std::shared_ptr func_graph_users_; - std::shared_ptr func_graph_user_cnodes_; std::shared_ptr func_graph_child_direct_; std::shared_ptr func_graph_parents_direct_; std::shared_ptr func_graph_j_direct_; diff --git a/mindspore/ccsrc/ir/meta_tensor.cc b/mindspore/ccsrc/ir/meta_tensor.cc index fe41abcef4..8718a82cbc 100644 --- a/mindspore/ccsrc/ir/meta_tensor.cc +++ b/mindspore/ccsrc/ir/meta_tensor.cc @@ -164,11 +164,9 @@ Tensor::Tensor(const py::float_ &input, const TypePtr &data_type) { init(py::arr Tensor::Tensor(const py::int_ &input, const TypePtr &data_type) { init(py::array(input), data_type); } Tensor::Tensor(const Tensor &tensor, const TypePtr &data_type) - : MetaTensor(tensor), dirty_(tensor.dirty_), device_address_(tensor.device_address_) { + : MetaTensor(tensor), device_address_(tensor.device_address_) { init(tensor.data_, data_type); - if (device_address_ != nullptr) { - (void)data_sync(); - } + dirty_ = tensor.is_dirty(); } Tensor &Tensor::operator=(const Tensor &tensor) { @@ -185,14 +183,6 @@ bool Tensor::operator==(const Tensor &tensor) const { return (MetaTensor::operator==(tensor) && data_ == tensor.data_); } -bool Tensor::ValueEqualPy(const py::object &other) const { - if (!py::isinstance(other)) { - MS_LOG(WARNING) << "compare other not a tensor"; - return false; - } - return ValueEqual(py::cast(other)); -} - bool Tensor::ValueEqual(const Tensor &other) const { auto equal = [&other, this]() -> bool { auto np = py::module::import("numpy"); @@ -302,6 +292,7 @@ void Tensor::init(const py::array &input, const TypeId &data_type) { } else { data_ = input; } + dirty_ = true; } void Tensor::init(TypeId data_type, const std::vector &shape, py::array *const data) { @@ -542,7 +533,6 @@ REGISTER_PYBIND_DEFINE(Tensor, ([](const py::module *m) { )mydelimiter") .def("__str__", &Tensor::ToString) .def("__repr__", &Tensor::ToStringRepr) - .def("__eq__", &Tensor::ValueEqualPy) .def(py::pickle( [](const Tensor &t) { // __getstate__ /* Return a tuple that fully encodes the state of the object */ diff --git a/mindspore/ccsrc/ir/meta_tensor.h b/mindspore/ccsrc/ir/meta_tensor.h index 1f6c866f11..ff76a1d4f9 100644 --- a/mindspore/ccsrc/ir/meta_tensor.h +++ b/mindspore/ccsrc/ir/meta_tensor.h @@ -329,9 +329,6 @@ class Tensor : public MetaTensor { // It is different from 'operator==' which just compare shape/type/address, it do real value comparison. bool ValueEqual(const Tensor &other) const; - // It is different from 'operator==' which just compare shape/type/address, it do real value comparison. - bool ValueEqualPy(const py::object &other) const; - bool operator==(const Value &other) const override { if (other.isa()) { auto other_ = static_cast(other); diff --git a/mindspore/ccsrc/ir/primitive.cc b/mindspore/ccsrc/ir/primitive.cc index d40f8a265d..d848f9c0d8 100644 --- a/mindspore/ccsrc/ir/primitive.cc +++ b/mindspore/ccsrc/ir/primitive.cc @@ -145,14 +145,14 @@ py::function PrimitivePy::GetComputeFunction() { static const char *const compute_func_name = "vm_impl"; if (py::hasattr(python_obj_, compute_func_name)) { - MS_LOG(INFO) << "" << name() << " compute_func_name"; + MS_LOG(INFO) << name() << " compute_func_name"; py::function fn = python_obj_.attr(compute_func_name).cast(); return fn; } static const std::string vm_module = "mindspore.ops.vm_impl_registry"; static const std::string get_vm_impl_fn = "get_vm_impl_fn"; - MS_LOG(INFO) << "" << name() << ": get_vm_impl_fn"; + MS_LOG(INFO) << name() << ": get_vm_impl_fn"; py::function get_fn = parse::python_adapter::GetPyFn(vm_module, get_vm_impl_fn); py::function vm_fn = get_fn(python_obj_); diff --git a/mindspore/ccsrc/ir/primitive.h b/mindspore/ccsrc/ir/primitive.h index 73941c1058..08c6b7dc9b 100644 --- a/mindspore/ccsrc/ir/primitive.h +++ b/mindspore/ccsrc/ir/primitive.h @@ -52,7 +52,11 @@ class Primitive : public Named { : Named(name), signatures_(), prim_type_(prim_type) {} Primitive(const Primitive &prim) - : Named(prim), attrs_(prim.attrs_), signatures_(prim.signatures_), prim_type_(prim.prim_type_) {} + : Named(prim), + attrs_(prim.attrs_), + signatures_(prim.signatures_), + instance_name_(prim.instance_name_), + prim_type_(prim.prim_type_) {} MS_DECLARE_PARENT(Primitive, Named); diff --git a/mindspore/ccsrc/ir/value.h b/mindspore/ccsrc/ir/value.h index c80e22f735..160eac7b5c 100644 --- a/mindspore/ccsrc/ir/value.h +++ b/mindspore/ccsrc/ir/value.h @@ -123,6 +123,9 @@ class ValueSlice : public Value { abstract::AbstractBasePtr ToAbstract() override; std::string DumpText() const override { return ToString(); } + ValuePtr start() const { return start_; } + ValuePtr stop() const { return stop_; } + ValuePtr step() const { return step_; } private: ValuePtr start_; diff --git a/mindspore/ccsrc/kernel/CMakeLists.txt b/mindspore/ccsrc/kernel/CMakeLists.txt index 9c5e2c1890..a3a5077a1a 100644 --- a/mindspore/ccsrc/kernel/CMakeLists.txt +++ b/mindspore/ccsrc/kernel/CMakeLists.txt @@ -1,34 +1,46 @@ -file(GLOB_RECURSE _SESSION_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "kernel_query.cc" - "kernel_fusion.cc" - "kernel_build_info.cc" - "kash/*.cc" - "common_utils.cc" - "oplib/*.cc" - ) - -add_library(_mindspore_kernel_obj OBJECT ${_SESSION_ALL_SRC_FILES}) - -if(ENABLE_GPU) - file(GLOB_RECURSE _CUDA_GPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "gpu/*.cu" - "akg/gpu/*.cc" - ) - add_library(_cuda_gpu_kernel_obj OBJECT ${_CUDA_GPU_SRC_LIST}) - - file(GLOB_RECURSE _C_EXPRESSION_GPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "gpu/*.cc" - ) - list(REMOVE_ITEM _C_EXPRESSION_GPU_SRC_LIST "gpu/nccl/nccl_gpu_kernel.cc") - add_library(_c_expression_gpu_device_obj OBJECT ${_C_EXPRESSION_GPU_SRC_LIST}) -endif() - -if(ENABLE_D) - file(GLOB_RECURSE _D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "tbe/*.cc" - "aicpu/*.cc" - "mng/*.cc" - "hccl/*.cc" - ) - target_sources(_mindspore_kernel_obj PRIVATE ${_D_SRC_LIST}) -endif() +file(GLOB_RECURSE KERNEL_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} + "kernel_build_info.cc" + "kash/*.cc" + "common_utils.cc" + "oplib/*.cc" +) + +if (ENABLE_D) + file(GLOB_RECURSE D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} + "kernel_query.cc" + "kernel_fusion.cc" + "tbe/*.cc" + "aicpu/*.cc" + "mng/*.cc" + "hccl/*.cc" + ) + add_compile_definitions(ENABLE_D) +endif () + +if (ENABLE_CPU) + file(GLOB_RECURSE CPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} + "cpu/*.cc" + ) +endif () + +if (ENABLE_GPU) + file(GLOB_RECURSE CUDA_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} + "gpu/*.cu" + "akg/gpu/*.cc" + "akg/akgkernelbuild.cc" + "akg/akg_kernel_attrs_process.cc" + ) + + file(GLOB_RECURSE GPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "gpu/*.cc") + list(REMOVE_ITEM GPU_SRC_LIST "gpu/nccl/nccl_gpu_kernel.cc") + + if (ENABLE_MPI) + include(ExternalProject) + file(GLOB_RECURSE GPU_NCCL_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "gpu/nccl/*.cc") + list(APPEND GPU_SRC_LIST ${GPU_NCCL_LIST}) + endif () + + # add_library(_mindspore_kernel_cuda_obj OBJECT ${CUDA_SRC_LIST}) +endif() + +add_library(_mindspore_kernel_obj OBJECT ${KERNEL_SRC_LIST} ${CPU_SRC_LIST} ${GPU_SRC_LIST} ${D_SRC_LIST}) diff --git a/mindspore/ccsrc/kernel/common_utils.cc b/mindspore/ccsrc/kernel/common_utils.cc index 5abaff412e..54980c2cb7 100644 --- a/mindspore/ccsrc/kernel/common_utils.cc +++ b/mindspore/ccsrc/kernel/common_utils.cc @@ -114,53 +114,35 @@ bool IsAtomicNode(const CNodePtr &kernel_node) { return atomic_flag; } -bool KernelMeta::ReadIndex(const std::string &bin_dir) { - DIR *dir = opendir(bin_dir.c_str()); - if (dir == nullptr) { +void KernelMeta::Initialize() { + kernel_meta_path_ = std::string(kGpuKernelMeta) + "_" + std::to_string(getpid()) + "/"; + // remove old kernel cache + RemoveKernelCache(); + #if defined(_WIN32) || defined(_WIN64) - auto ret = mkdir(bin_dir.c_str()); + auto ret = mkdir(kernel_meta_path_.c_str()); #else - auto ret = mkdir(bin_dir.c_str(), S_IRWXG | S_IRWXU); + auto ret = mkdir(kernel_meta_path_.c_str(), S_IRWXG | S_IRWXU); #endif - if (ret != 0) { - MS_LOG(INFO) << "kernel dir not exist[" << bin_dir << "]."; - return false; - } - dir = opendir(bin_dir.c_str()); + if (ret != 0) { + MS_LOG(INFO) << "kernel dir [" << kernel_meta_path_ << "], will be created later"; } + initialized_ = true; +} +void KernelMeta::RemoveKernelCache() { + DIR *dir = opendir(kernel_meta_path_.c_str()); + if (dir == nullptr) { + return; + } struct dirent *entry; while ((entry = readdir(dir)) != nullptr) { - string bin_dir_tmp = bin_dir; - std::string cce_json = entry->d_name; - if (cce_json.length() <= 5) { - continue; - } - - std::string suffix = cce_json.substr(cce_json.length() - 5); - if (suffix != kJsonSuffix) { - continue; - } - - auto sp = cce_json.rfind('/'); - if (sp != std::string::npos) { - continue; - } - - sp = cce_json.rfind('.'); - if (sp == std::string::npos) { - continue; - } - auto kernel_name = cce_json.substr(0, sp); - (void)bin_dir_tmp.append("/"); - (void)bin_dir_tmp.append(cce_json); - kernel_meta_map_[kernel_name] = bin_dir_tmp; + std::string kernel_file = entry->d_name; + std::string kernel_file_realpath = kernel_meta_path_ + kernel_file; + (void)remove(kernel_file_realpath.c_str()); } (void)closedir(dir); - - MS_LOG(INFO) << "Cache kernel initialized, kernel size[" << kernel_meta_map_.size() << "]."; - initialized_ = true; - return true; + (void)rmdir(kernel_meta_path_.c_str()); } std::string KernelMeta::Search(const std::string &kernel_name) const { @@ -176,11 +158,11 @@ std::string KernelMeta::Search(const std::string &kernel_name) const { } } -bool KernelMeta::Insert(const std::string &kernel_name, const std::string &cce_json) { +bool KernelMeta::Insert(const std::string &kernel_name, const std::string &kernel_json) { if (!initialized_) { return false; } - kernel_meta_map_[kernel_name] = cce_json; + kernel_meta_map_[kernel_name] = kernel_json; return true; } @@ -191,8 +173,8 @@ bool CheckCache(const std::string &kernel_name) { MS_LOG(DEBUG) << "kernel cache is invalid."; return false; } - std::string cce_json = bin_map->Search(kernel_name); - bool ret = (!cce_json.empty()); + std::string kernel_json = bin_map->Search(kernel_name); + bool ret = (!kernel_json.empty()); if (ret) { MS_LOG(INFO) << "Kernel name:" << kernel_name << " has registed."; } else { @@ -209,12 +191,12 @@ KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &pro return nullptr; } - std::string cce_json = bin_map->Search(kernel_name); - if (!cce_json.empty()) { + std::string kernel_json = bin_map->Search(kernel_name); + if (!kernel_json.empty()) { KernelPackPtr kernel_pack = std::make_shared(); // just a tmp solution. - if (!kernel_pack->ReadFromJsonFile(cce_json, processor)) { - MS_LOG(DEBUG) << "Read cache json and bin file failed[" << cce_json << "]."; + if (!kernel_pack->ReadFromJsonFile(kernel_json, processor)) { + MS_LOG(DEBUG) << "Read cache json and bin file failed[" << kernel_json << "]."; return nullptr; } else { return kernel_pack; @@ -227,26 +209,26 @@ KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &pro KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &processor) { MS_LOG(INFO) << "kernel name:" << kernel_name << ", processr:" << processor; - std::string cce_json; + KernelMeta *bin_map = KernelMeta::GetInstance(); + std::string kernel_json; if (processor == kProcessorAiCore || processor == kProcessorAiCpu) { - cce_json = kCceKernelMeta; + kernel_json = kCceKernelMeta; } else { - cce_json = kGpuKernelMeta; + kernel_json = bin_map->GetKernelMetaPath(); } - (void)cce_json.append(kernel_name).append(kJsonSuffix); + (void)kernel_json.append(kernel_name).append(kJsonSuffix); KernelPackPtr kernel_pack = std::make_shared(); - if (!kernel_pack->ReadFromJsonFile(cce_json, processor)) { - MS_LOG(DEBUG) << "Read json and bin file failed[" << cce_json << "]."; + if (!kernel_pack->ReadFromJsonFile(kernel_json, processor)) { + MS_LOG(DEBUG) << "Read json and bin file failed[" << kernel_json << "]."; return nullptr; } - KernelMeta *bin_map = KernelMeta::GetInstance(); if (bin_map == nullptr) { MS_LOG(DEBUG) << "kernel cache is invalid."; return nullptr; } - if (bin_map->Insert(kernel_name, cce_json)) { - MS_LOG(INFO) << "Insert to cache success[" << cce_json << "], kernelname[" << kernel_name << "]."; + if (bin_map->Insert(kernel_name, kernel_json)) { + MS_LOG(INFO) << "Insert to cache success[" << kernel_json << "], kernelname[" << kernel_name << "]."; } return kernel_pack; } diff --git a/mindspore/ccsrc/kernel/common_utils.h b/mindspore/ccsrc/kernel/common_utils.h index 07f191cc7b..47fe96c4c9 100644 --- a/mindspore/ccsrc/kernel/common_utils.h +++ b/mindspore/ccsrc/kernel/common_utils.h @@ -30,7 +30,7 @@ namespace mindspore { namespace kernel { constexpr auto kCceKernelMeta = "./kernel_meta/"; -constexpr auto kGpuKernelMeta = "/tmp/cuda_meta/"; +constexpr auto kGpuKernelMeta = "./cuda_meta"; constexpr auto kProcessorAiCore = "aicore"; constexpr auto kProcessorAiCpu = "aicpu"; constexpr auto kProcessorCuda = "cuda"; @@ -51,9 +51,11 @@ using KernelMetaPtr = std::shared_ptr; class KernelMeta { public: KernelMeta() = default; - bool ReadIndex(const std::string &bin_dir); + void Initialize(); + void RemoveKernelCache(); std::string Search(const std::string &kernel_name) const; - bool Insert(const std::string &kernel_name, const std::string &cce_json); + bool Insert(const std::string &kernel_name, const std::string &kernel_json); + std::string GetKernelMetaPath() { return kernel_meta_path_; } static KernelMeta *GetInstance() { static KernelMeta kernel_meta; @@ -63,6 +65,7 @@ class KernelMeta { private: bool initialized_ = false; + std::string kernel_meta_path_; std::unordered_map kernel_meta_map_; }; diff --git a/mindspore/ccsrc/device/cpu/kernel/apply_momentum_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.cc similarity index 90% rename from mindspore/ccsrc/device/cpu/kernel/apply_momentum_cpu_kernel.cc rename to mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.cc index 94b6c306ef..3cd6c57413 100644 --- a/mindspore/ccsrc/device/cpu/kernel/apply_momentum_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.cc @@ -13,14 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "device/cpu/kernel/apply_momentum_cpu_kernel.h" -#include "device/cpu/kernel/mkldnn/mkl_kernel_engine.h" +#include "kernel/cpu/apply_momentum_cpu_kernel.h" +#include "kernel/cpu/mkldnn/mkl_kernel_engine.h" #include "device/cpu/cpu_device_address.h" #include "common/utils.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { void ApplyMomentumCPUKernel::InitKernel(const CNodePtr & /*kernel_node*/) {} bool ApplyMomentumCPUKernel::Launch(const std::vector &inputs, @@ -44,6 +43,5 @@ bool ApplyMomentumCPUKernel::Launch(const std::vector &input } return true; } -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/device/cpu/kernel/apply_momentum_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.h similarity index 77% rename from mindspore/ccsrc/device/cpu/kernel/apply_momentum_cpu_kernel.h rename to mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.h index 84ed340430..91e159cf74 100644 --- a/mindspore/ccsrc/device/cpu/kernel/apply_momentum_cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.h @@ -13,16 +13,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_DEVICE_CPU_APPLY_MOMENTUM_CPU_KERNEL_H_ -#define MINDSPORE_CCSRC_DEVICE_CPU_APPLY_MOMENTUM_CPU_KERNEL_H_ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_APPLY_MOMENTUM_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_APPLY_MOMENTUM_CPU_KERNEL_H_ #include #include -#include "device/cpu/kernel/mkldnn/mkl_cpu_kernel.h" +#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { class ApplyMomentumCPUKernel : public MKLCPUKernel { public: ApplyMomentumCPUKernel() = default; @@ -35,8 +34,7 @@ class ApplyMomentumCPUKernel : public MKLCPUKernel { }; MS_REG_CPU_KERNEL(ApplyMomentum, ApplyMomentumCPUKernel); -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore -#endif // MINDSPORE_CCSRC_DEVICE_CPU_APPLY_MOMENTUM_CPU_KERNEL_H_ +#endif // MINDSPORE_CCSRC_KERNEL_CPU_APPLY_MOMENTUM_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/device/cpu/kernel/argmax_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/argmax_cpu_kernel.cc similarity index 94% rename from mindspore/ccsrc/device/cpu/kernel/argmax_cpu_kernel.cc rename to mindspore/ccsrc/kernel/cpu/argmax_cpu_kernel.cc index a492bf7969..ee328df721 100644 --- a/mindspore/ccsrc/device/cpu/kernel/argmax_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/argmax_cpu_kernel.cc @@ -13,12 +13,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "device/cpu/kernel/argmax_cpu_kernel.h" +#include "kernel/cpu/argmax_cpu_kernel.h" #include "device/cpu/cpu_device_address.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { void ArgmaxCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); std::vector shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); @@ -64,6 +63,5 @@ bool ArgmaxCPUKernel::Launch(const std::vector &inputs, } return true; } -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/device/cpu/kernel/argmax_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/argmax_cpu_kernel.h similarity index 77% rename from mindspore/ccsrc/device/cpu/kernel/argmax_cpu_kernel.h rename to mindspore/ccsrc/kernel/cpu/argmax_cpu_kernel.h index 993b012c4e..b50b5fc272 100644 --- a/mindspore/ccsrc/device/cpu/kernel/argmax_cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/argmax_cpu_kernel.h @@ -13,16 +13,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_DEVICE_CPU_ARGMAX_CPU_KERNEL_H_ -#define MINDSPORE_CCSRC_DEVICE_CPU_ARGMAX_CPU_KERNEL_H_ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_ARGMAX_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_ARGMAX_CPU_KERNEL_H_ #include #include -#include "device/cpu/cpu_kernel.h" -#include "device/cpu/cpu_kernel_factory.h" +#include "kernel/cpu/cpu_kernel.h" +#include "kernel/cpu/cpu_kernel_factory.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { class ArgmaxCPUKernel : public CPUKernel { public: ArgmaxCPUKernel() = default; @@ -39,8 +38,7 @@ class ArgmaxCPUKernel : public CPUKernel { }; MS_REG_CPU_KERNEL(Argmax, ArgmaxCPUKernel); -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore -#endif // MINDSPORE_CCSRC_DEVICE_CPU_ARGMAX_CPU_KERNEL_H_ +#endif // MINDSPORE_CCSRC_KERNEL_CPU_ARGMAX_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/device/cpu/kernel/bias_add_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/bias_add_cpu_kernel.cc similarity index 91% rename from mindspore/ccsrc/device/cpu/kernel/bias_add_cpu_kernel.cc rename to mindspore/ccsrc/kernel/cpu/bias_add_cpu_kernel.cc index 4661ee73cd..00f3017231 100644 --- a/mindspore/ccsrc/device/cpu/kernel/bias_add_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/bias_add_cpu_kernel.cc @@ -14,12 +14,11 @@ * limitations under the License. */ -#include "device/cpu/kernel/bias_add_cpu_kernel.h" +#include "kernel/cpu/bias_add_cpu_kernel.h" namespace mindspore { -namespace device { -namespace cpu { -void BiasAddCpuKernel::InitKernel(const CNodePtr &kernel_node) { +namespace kernel { +void BiasAddCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); bias_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1); @@ -41,7 +40,7 @@ void BiasAddCpuKernel::InitKernel(const CNodePtr &kernel_node) { } } -bool BiasAddCpuKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, +bool BiasAddCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { if (inputs.size() != 2 || outputs.size() != 1) { MS_LOG(EXCEPTION) << "inputs outputs size not supoort"; @@ -79,6 +78,5 @@ bool BiasAddCpuKernel::Launch(const std::vector &inputs, const std:: } return true; } -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/device/cpu/kernel/bias_add_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/bias_add_cpu_kernel.h similarity index 65% rename from mindspore/ccsrc/device/cpu/kernel/bias_add_cpu_kernel.h rename to mindspore/ccsrc/kernel/cpu/bias_add_cpu_kernel.h index 45028523bd..9c1a23b4e1 100644 --- a/mindspore/ccsrc/device/cpu/kernel/bias_add_cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/bias_add_cpu_kernel.h @@ -13,21 +13,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_CPU_KERNEL_BIAS_ADD_CPU_KERNEL_H_ -#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_CPU_KERNEL_BIAS_ADD_CPU_KERNEL_H_ +#ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIAS_ADD_CPU_KERNEL_H_ +#define MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIAS_ADD_CPU_KERNEL_H_ #include #include -#include "device/cpu/cpu_kernel.h" -#include "device/cpu/cpu_kernel_factory.h" +#include "kernel/cpu/cpu_kernel.h" +#include "kernel/cpu/cpu_kernel_factory.h" namespace mindspore { -namespace device { -namespace cpu { -class BiasAddCpuKernel : public CPUKernel { +namespace kernel { +class BiasAddCPUKernel : public CPUKernel { public: - BiasAddCpuKernel() = default; - ~BiasAddCpuKernel() override = default; + BiasAddCPUKernel() = default; + ~BiasAddCPUKernel() override = default; void InitKernel(const CNodePtr &kernel_node) override; bool Launch(const std::vector &inputs, const std::vector &workspace, @@ -38,8 +37,7 @@ class BiasAddCpuKernel : public CPUKernel { std::vector input_shape_; std::vector bias_shape_; }; -MS_REG_CPU_KERNEL(BiasAdd, BiasAddCpuKernel); -} // namespace cpu -} // namespace device +MS_REG_CPU_KERNEL(BiasAdd, BiasAddCPUKernel); +} // namespace kernel } // namespace mindspore -#endif // MINDSPORE_MINDSPORE_CCSRC_DEVICE_CPU_KERNEL_BIAS_ADD_CPU_KERNEL_H_ +#endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIAS_ADD_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/device/cpu/kernel/bias_add_grad_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/bias_add_grad_cpu_kernel.cc similarity index 89% rename from mindspore/ccsrc/device/cpu/kernel/bias_add_grad_cpu_kernel.cc rename to mindspore/ccsrc/kernel/cpu/bias_add_grad_cpu_kernel.cc index 6846ca2555..1d9c7d076e 100644 --- a/mindspore/ccsrc/device/cpu/kernel/bias_add_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/bias_add_grad_cpu_kernel.cc @@ -14,12 +14,11 @@ * limitations under the License. */ -#include "device/cpu/kernel/bias_add_grad_cpu_kernel.h" +#include "kernel/cpu/bias_add_grad_cpu_kernel.h" namespace mindspore { -namespace device { -namespace cpu { -void BiasAddGradCpuKernel::InitKernel(const CNodePtr &kernel_node) { +namespace kernel { +void BiasAddGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); if (input_shape_.size() != 4 && input_shape_.size() != 2) { @@ -27,7 +26,7 @@ void BiasAddGradCpuKernel::InitKernel(const CNodePtr &kernel_node) { } } -bool BiasAddGradCpuKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, +bool BiasAddGradCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { if (inputs.size() != 1 || outputs.size() != 1) { MS_LOG(EXCEPTION) << "input output size not support"; @@ -65,6 +64,5 @@ bool BiasAddGradCpuKernel::Launch(const std::vector &inputs, const s } return true; } -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/device/cpu/kernel/bias_add_grad_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/bias_add_grad_cpu_kernel.h similarity index 62% rename from mindspore/ccsrc/device/cpu/kernel/bias_add_grad_cpu_kernel.h rename to mindspore/ccsrc/kernel/cpu/bias_add_grad_cpu_kernel.h index 736540b8a3..3c4d6e9a76 100644 --- a/mindspore/ccsrc/device/cpu/kernel/bias_add_grad_cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/bias_add_grad_cpu_kernel.h @@ -14,21 +14,20 @@ * limitations under the License. */ -#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_CPU_KERNEL_BIASADDGRADCPUKERNEL_H_ -#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_CPU_KERNEL_BIASADDGRADCPUKERNEL_H_ +#ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIASADDGRADCPUKERNEL_H_ +#define MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIASADDGRADCPUKERNEL_H_ #include #include -#include "device/cpu/cpu_kernel.h" -#include "device/cpu/cpu_kernel_factory.h" +#include "kernel/cpu/cpu_kernel.h" +#include "kernel/cpu/cpu_kernel_factory.h" namespace mindspore { -namespace device { -namespace cpu { -class BiasAddGradCpuKernel : public CPUKernel { +namespace kernel { +class BiasAddGradCPUKernel : public CPUKernel { public: - BiasAddGradCpuKernel() = default; - ~BiasAddGradCpuKernel() override = default; + BiasAddGradCPUKernel() = default; + ~BiasAddGradCPUKernel() override = default; void InitKernel(const CNodePtr &kernel_node) override; bool Launch(const std::vector &inputs, const std::vector &workspace, @@ -37,8 +36,7 @@ class BiasAddGradCpuKernel : public CPUKernel { private: std::vector input_shape_; }; -MS_REG_CPU_KERNEL(BiasAddGrad, BiasAddGradCpuKernel); -} // namespace cpu -} // namespace device +MS_REG_CPU_KERNEL(BiasAddGrad, BiasAddGradCPUKernel); +} // namespace kernel } // namespace mindspore -#endif // MINDSPORE_MINDSPORE_CCSRC_DEVICE_CPU_KERNEL_BIASADDGRADCPUKERNEL_H_ +#endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIASADDGRADCPUKERNEL_H_ diff --git a/mindspore/ccsrc/device/cpu/cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/cpu_kernel.cc similarity index 91% rename from mindspore/ccsrc/device/cpu/cpu_kernel.cc rename to mindspore/ccsrc/kernel/cpu/cpu_kernel.cc index 5f810ff522..7150c06eb5 100644 --- a/mindspore/ccsrc/device/cpu/cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/cpu_kernel.cc @@ -13,11 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "device/cpu/cpu_kernel.h" +#include "kernel/cpu/cpu_kernel.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { void CPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); @@ -41,6 +40,5 @@ void CPUKernel::Init(const CNodePtr &kernel_node) { InitInputOutputSize(kernel_node); InitKernel(kernel_node); } -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/device/cpu/cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/cpu_kernel.h similarity index 89% rename from mindspore/ccsrc/device/cpu/cpu_kernel.h rename to mindspore/ccsrc/kernel/cpu/cpu_kernel.h index ebd182ee49..f9121cb175 100644 --- a/mindspore/ccsrc/device/cpu/cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/cpu_kernel.h @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_DEVICE_CPU_CPU_KERNEL_H_ -#define MINDSPORE_CCSRC_DEVICE_CPU_CPU_KERNEL_H_ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_H_ #include #include @@ -28,8 +28,7 @@ using mindspore::kernel::Address; using mindspore::kernel::AddressPtr; namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { const char KSIZE[] = "ksize"; const char STRIDE[] = "stride"; const char STRIDES[] = "strides"; @@ -70,8 +69,7 @@ class CPUKernel : public kernel::KernelMod { std::vector output_size_list_; std::vector workspace_size_list_; }; -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore -#endif // MINDSPORE_CCSRC_DEVICE_CPU_CPU_KERNEL_H_ +#endif // MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/device/cpu/cpu_kernel_factory.cc b/mindspore/ccsrc/kernel/cpu/cpu_kernel_factory.cc similarity index 92% rename from mindspore/ccsrc/device/cpu/cpu_kernel_factory.cc rename to mindspore/ccsrc/kernel/cpu/cpu_kernel_factory.cc index 77a3345344..66949cb4fa 100644 --- a/mindspore/ccsrc/device/cpu/cpu_kernel_factory.cc +++ b/mindspore/ccsrc/kernel/cpu/cpu_kernel_factory.cc @@ -14,15 +14,14 @@ * limitations under the License. */ -#include "device/cpu/cpu_kernel_factory.h" +#include "kernel/cpu/cpu_kernel_factory.h" #include #include #include namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { CPUKernelFactory &CPUKernelFactory::Get() { static CPUKernelFactory instance; return instance; @@ -45,6 +44,5 @@ std::shared_ptr CPUKernelFactory::Create(const std::string &kernel_na } return nullptr; } -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/device/cpu/cpu_kernel_factory.h b/mindspore/ccsrc/kernel/cpu/cpu_kernel_factory.h similarity index 86% rename from mindspore/ccsrc/device/cpu/cpu_kernel_factory.h rename to mindspore/ccsrc/kernel/cpu/cpu_kernel_factory.h index 6a86f94709..f546758632 100644 --- a/mindspore/ccsrc/device/cpu/cpu_kernel_factory.h +++ b/mindspore/ccsrc/kernel/cpu/cpu_kernel_factory.h @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_DEVICE_CPU_CPU_KERNEL_FACTORY_H_ -#define MINDSPORE_CCSRC_DEVICE_CPU_CPU_KERNEL_FACTORY_H_ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_FACTORY_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_FACTORY_H_ #include #include @@ -22,10 +22,9 @@ #include #include #include "common/utils.h" -#include "device/cpu/cpu_kernel.h" +#include "kernel/cpu/cpu_kernel.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { using CPUKernelCreator = std::function()>; class CPUKernelFactory { public: @@ -51,8 +50,7 @@ class CPUKernelRegistrar { #define MS_REG_CPU_KERNEL(KERNEL_NAME, KERNEL_CLASS) \ static const CPUKernelRegistrar g_cpu_kernel_##KERNEL_NAME##_reg(#KERNEL_NAME, \ []() { return std::make_shared(); }); -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore -#endif // MINDSPORE_CCSRC_DEVICE_CPU_CPU_KERNEL_FACTORY_H_ +#endif // MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_FACTORY_H_ diff --git a/mindspore/ccsrc/device/cpu/kernel/equal_count_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/equal_count_cpu_kernel.cc similarity index 92% rename from mindspore/ccsrc/device/cpu/kernel/equal_count_cpu_kernel.cc rename to mindspore/ccsrc/kernel/cpu/equal_count_cpu_kernel.cc index ee6bb9f144..60e7eafa78 100644 --- a/mindspore/ccsrc/device/cpu/kernel/equal_count_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/equal_count_cpu_kernel.cc @@ -13,12 +13,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "device/cpu/kernel/equal_count_cpu_kernel.h" +#include "kernel/cpu/equal_count_cpu_kernel.h" #include "device/cpu/cpu_device_address.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { void EqualCountCPUKernel::InitKernel(const CNodePtr & /*kernel_node*/) {} bool EqualCountCPUKernel::Launch(const std::vector &inputs, @@ -43,6 +42,5 @@ bool EqualCountCPUKernel::Launch(const std::vector &inputs, output[0] = count; return true; } -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/device/cpu/kernel/equal_count_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/equal_count_cpu_kernel.h similarity index 76% rename from mindspore/ccsrc/device/cpu/kernel/equal_count_cpu_kernel.h rename to mindspore/ccsrc/kernel/cpu/equal_count_cpu_kernel.h index 26f343e855..ecfe24a90f 100644 --- a/mindspore/ccsrc/device/cpu/kernel/equal_count_cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/equal_count_cpu_kernel.h @@ -13,16 +13,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_DEVICE_CPU_EQUAL_COUNT_CPU_KERNEL_H_ -#define MINDSPORE_CCSRC_DEVICE_CPU_EQUAL_COUNT_CPU_KERNEL_H_ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_EQUAL_COUNT_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_EQUAL_COUNT_CPU_KERNEL_H_ #include #include -#include "device/cpu/cpu_kernel.h" -#include "device/cpu/cpu_kernel_factory.h" +#include "kernel/cpu/cpu_kernel.h" +#include "kernel/cpu/cpu_kernel_factory.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { class EqualCountCPUKernel : public CPUKernel { public: EqualCountCPUKernel() = default; @@ -35,8 +34,7 @@ class EqualCountCPUKernel : public CPUKernel { }; MS_REG_CPU_KERNEL(EqualCount, EqualCountCPUKernel); -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore -#endif // MINDSPORE_CCSRC_DEVICE_CPU_EQUAL_COUNT_CPU_KERNEL_H_ +#endif // MINDSPORE_CCSRC_KERNEL_CPU_EQUAL_COUNT_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_cpu_kernel.cc similarity index 95% rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_cpu_kernel.cc rename to mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_cpu_kernel.cc index 5d63aee6cd..657c85dc48 100644 --- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_cpu_kernel.cc @@ -13,15 +13,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "device/cpu/kernel/mkldnn/conv2d_cpu_kernel.h" +#include "kernel/cpu/mkldnn/conv2d_cpu_kernel.h" #include #include "common/utils.h" -#include "device/cpu/kernel/mkldnn/mkl_kernel_engine.h" +#include "kernel/cpu/mkldnn/mkl_kernel_engine.h" #include "device/cpu/cpu_device_address.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { void Conv2dCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); std::vector src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); @@ -88,6 +87,5 @@ bool Conv2dCPUKernel::Launch(const std::vector &inputs, ExecutePrimitive(); return true; } -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_cpu_kernel.h similarity index 78% rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_cpu_kernel.h rename to mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_cpu_kernel.h index d975b537ca..b91059a0d1 100644 --- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_cpu_kernel.h @@ -13,16 +13,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_DEVICE_CPU_CONV2D_CPU_KERNEL_H_ -#define MINDSPORE_CCSRC_DEVICE_CPU_CONV2D_CPU_KERNEL_H_ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_CPU_KERNEL_H_ #include #include -#include "device/cpu/kernel/mkldnn/mkl_cpu_kernel.h" +#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { class Conv2dCPUKernel : public MKLCPUKernel { public: Conv2dCPUKernel() = default; @@ -35,8 +34,7 @@ class Conv2dCPUKernel : public MKLCPUKernel { }; MS_REG_CPU_KERNEL(Conv2D, Conv2dCPUKernel); -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore -#endif // MINDSPORE_CCSRC_DEVICE_CPU_CONV2D_CPU_KERNEL_H_ +#endif // MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_grad_filter_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.cc similarity index 95% rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_grad_filter_cpu_kernel.cc rename to mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.cc index 1a7c10a531..fbfebaf56e 100644 --- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_grad_filter_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.cc @@ -13,15 +13,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "device/cpu/kernel/mkldnn/conv2d_grad_filter_cpu_kernel.h" +#include "kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h" #include #include "common/utils.h" -#include "device/cpu/kernel/mkldnn/mkl_kernel_engine.h" +#include "kernel/cpu/mkldnn/mkl_kernel_engine.h" #include "device/cpu/cpu_device_address.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { void Conv2dGradFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); std::vector src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); @@ -90,6 +89,5 @@ bool Conv2dGradFilterCPUKernel::Launch(const std::vector &in ExecutePrimitive(); return true; } -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_grad_filter_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h similarity index 78% rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_grad_filter_cpu_kernel.h rename to mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h index d42c1166f2..b6cd78171a 100644 --- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_grad_filter_cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h @@ -13,16 +13,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_DEVICE_CPU_CONV2D_GRAD_FILTER_CPU_KERNEL_H_ -#define MINDSPORE_CCSRC_DEVICE_CPU_CONV2D_GRAD_FILTER_CPU_KERNEL_H_ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_FILTER_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_FILTER_CPU_KERNEL_H_ #include #include -#include "device/cpu/kernel/mkldnn/mkl_cpu_kernel.h" +#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { class Conv2dGradFilterCPUKernel : public MKLCPUKernel { public: Conv2dGradFilterCPUKernel() = default; @@ -35,8 +34,7 @@ class Conv2dGradFilterCPUKernel : public MKLCPUKernel { }; MS_REG_CPU_KERNEL(Conv2DBackpropFilter, Conv2dGradFilterCPUKernel); -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore -#endif // MINDSPORE_CCSRC_DEVICE_CPU_CONV2D_GRAD_FILTER_CPU_KERNEL_H_ +#endif // MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_FILTER_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_grad_input_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.cc similarity index 95% rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_grad_input_cpu_kernel.cc rename to mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.cc index 04dda20acd..ff0b8633d4 100644 --- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_grad_input_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.cc @@ -13,15 +13,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "device/cpu/kernel/mkldnn/conv2d_grad_input_cpu_kernel.h" +#include "kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h" #include -#include "device/cpu/kernel/mkldnn/mkl_kernel_engine.h" +#include "kernel/cpu/mkldnn/mkl_kernel_engine.h" #include "device/cpu/cpu_device_address.h" #include "common/utils.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { void Conv2dGradInputCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); std::vector src_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); @@ -89,6 +88,5 @@ bool Conv2dGradInputCPUKernel::Launch(const std::vector &inp ExecutePrimitive(); return true; } -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_grad_input_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h similarity index 78% rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_grad_input_cpu_kernel.h rename to mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h index fb6e14688d..c61d8133ee 100644 --- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_grad_input_cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h @@ -13,16 +13,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_DEVICE_CPU_CONV2D_GRAD_INPUT_CPU_KERNEL_H_ -#define MINDSPORE_CCSRC_DEVICE_CPU_CONV2D_GRAD_INPUT_CPU_KERNEL_H_ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_INPUT_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_INPUT_CPU_KERNEL_H_ #include #include -#include "device/cpu/kernel/mkldnn/mkl_cpu_kernel.h" +#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { class Conv2dGradInputCPUKernel : public MKLCPUKernel { public: Conv2dGradInputCPUKernel() = default; @@ -35,8 +34,7 @@ class Conv2dGradInputCPUKernel : public MKLCPUKernel { }; MS_REG_CPU_KERNEL(Conv2DBackpropInput, Conv2dGradInputCPUKernel); -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore -#endif // MINDSPORE_CCSRC_DEVICE_CPU_CONV2D_GRAD_INPUT_CPU_KERNEL_H_ +#endif // MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_INPUT_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/matmul_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/matmul_cpu_kernel.cc similarity index 93% rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/matmul_cpu_kernel.cc rename to mindspore/ccsrc/kernel/cpu/mkldnn/matmul_cpu_kernel.cc index 84d9508c71..28266f2aa0 100644 --- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/matmul_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/matmul_cpu_kernel.cc @@ -13,16 +13,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "device/cpu/kernel/mkldnn/matmul_cpu_kernel.h" +#include "kernel/cpu/mkldnn/matmul_cpu_kernel.h" #include #include -#include "device/cpu/kernel/mkldnn/mkl_kernel_engine.h" +#include "kernel/cpu/mkldnn/mkl_kernel_engine.h" #include "common/utils.h" #include "device/cpu/cpu_device_address.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { void MatMulCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); std::vector src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); @@ -68,6 +67,5 @@ bool MatMulCPUKernel::Launch(const std::vector &inputs, (void)dnnl_sgemm(trans_a_, trans_b_, dim_m_, dim_n_, dim_k_, 1.f, input_a, lda, input_b, ldb, 0.f, output, dim_n_); return true; } -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/matmul_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/matmul_cpu_kernel.h similarity index 80% rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/matmul_cpu_kernel.h rename to mindspore/ccsrc/kernel/cpu/mkldnn/matmul_cpu_kernel.h index b096e76740..ecca5dec73 100644 --- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/matmul_cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/matmul_cpu_kernel.h @@ -13,16 +13,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_DEVICE_CPU_MATMUL_CPU_KERNEL_H_ -#define MINDSPORE_CCSRC_DEVICE_CPU_MATMUL_CPU_KERNEL_H_ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_MATMUL_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_MATMUL_CPU_KERNEL_H_ #include #include -#include "device/cpu/kernel/mkldnn/mkl_cpu_kernel.h" +#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { class MatMulCPUKernel : public MKLCPUKernel { public: MatMulCPUKernel() = default; @@ -42,8 +41,7 @@ class MatMulCPUKernel : public MKLCPUKernel { }; MS_REG_CPU_KERNEL(MatMul, MatMulCPUKernel); -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore -#endif // MINDSPORE_CCSRC_DEVICE_CPU_MATMUL_CPU_KERNEL_H_ +#endif // MINDSPORE_CCSRC_KERNEL_CPU_MATMUL_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/mkl_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.cc similarity index 95% rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/mkl_cpu_kernel.cc rename to mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.cc index 677df1d0f8..1f7ccf9e41 100644 --- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/mkl_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.cc @@ -13,16 +13,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "device/cpu/kernel/mkldnn/mkl_cpu_kernel.h" +#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h" #include #include #include #include "common/utils.h" -#include "device/cpu/kernel/mkldnn/mkl_kernel_engine.h" +#include "kernel/cpu/mkldnn/mkl_kernel_engine.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { void MKLCPUKernel::GetPadding(const CNodePtr &kernel_node, const std::string &pad_mode, const std::vector &src_shape, int kernel_size, int stride, std::vector *padding_l, std::vector *padding_r) { @@ -99,6 +98,5 @@ void MKLCPUKernel::SetArgumentHandle(int arg_key, void *ptr) { } void MKLCPUKernel::ExecutePrimitive() { MKLKernelEngine::Get().Execute(primitive_, arguments_); } -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/mkl_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.h similarity index 82% rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/mkl_cpu_kernel.h rename to mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.h index 0a38de7060..cd06032ff0 100644 --- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/mkl_cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.h @@ -13,20 +13,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_DEVICE_CPU_MKL_CPU_KERNEL_H_ -#define MINDSPORE_CCSRC_DEVICE_CPU_MKL_CPU_KERNEL_H_ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_MKL_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_MKL_CPU_KERNEL_H_ #include #include #include #include #include "dnnl.hpp" -#include "device/cpu/cpu_kernel.h" -#include "device/cpu/cpu_kernel_factory.h" +#include "kernel/cpu/cpu_kernel.h" +#include "kernel/cpu/cpu_kernel_factory.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { class MKLCPUKernel : public CPUKernel { public: MKLCPUKernel() = default; @@ -43,8 +42,7 @@ class MKLCPUKernel : public CPUKernel { std::unordered_map arguments_; std::shared_ptr primitive_{nullptr}; }; -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore -#endif // MINDSPORE_CCSRC_DEVICE_CPU_MKL_CPU_KERNEL_H_ +#endif // MINDSPORE_CCSRC_KERNEL_CPU_MKL_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/mkl_kernel_engine.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.cc similarity index 89% rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/mkl_kernel_engine.cc rename to mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.cc index 7025148732..ae4dbb26d8 100644 --- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/mkl_kernel_engine.cc +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.cc @@ -13,13 +13,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "device/cpu/kernel/mkldnn/mkl_kernel_engine.h" +#include "kernel/cpu/mkldnn/mkl_kernel_engine.h" #include "utils/log_adapter.h" #include "dnnl.hpp" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { void MKLKernelEngine::Execute(const std::shared_ptr &primitive, const std::unordered_map &arguments) { MS_EXCEPTION_IF_NULL(primitive); @@ -34,6 +33,5 @@ dnnl::memory MKLKernelEngine::CreateMemory(const dnnl::memory::desc &mem_desc, b return dnnl::memory(mem_desc, engine_, nullptr); } } -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/mkl_kernel_engine.h b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.h similarity index 95% rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/mkl_kernel_engine.h rename to mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.h index ea764359b6..36a3ceff6d 100644 --- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/mkl_kernel_engine.h +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.h @@ -23,8 +23,7 @@ #include "common/utils.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { class MKLKernelEngine { public: static MKLKernelEngine &Get() { @@ -46,8 +45,7 @@ class MKLKernelEngine { dnnl::engine engine_; dnnl::stream stream_; }; -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore #endif // MINDSPORE_MKL_KERNEL_ENGINE_H_ diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/mul_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/mul_cpu_kernel.cc similarity index 93% rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/mul_cpu_kernel.cc rename to mindspore/ccsrc/kernel/cpu/mkldnn/mul_cpu_kernel.cc index bdaa85559e..4f77508004 100644 --- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/mul_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/mul_cpu_kernel.cc @@ -13,14 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "device/cpu/kernel/mkldnn/mul_cpu_kernel.h" -#include "device/cpu/kernel/mkldnn/mkl_kernel_engine.h" +#include "kernel/cpu/mkldnn/mul_cpu_kernel.h" +#include "kernel/cpu/mkldnn/mkl_kernel_engine.h" #include "device/cpu/cpu_device_address.h" #include "common/utils.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { void MulCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); std::vector src0_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); @@ -58,6 +57,5 @@ bool MulCPUKernel::Launch(const std::vector &inputs, ExecutePrimitive(); return true; } -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/mul_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/mul_cpu_kernel.h similarity index 78% rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/mul_cpu_kernel.h rename to mindspore/ccsrc/kernel/cpu/mkldnn/mul_cpu_kernel.h index e666197632..746c2925ec 100644 --- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/mul_cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/mul_cpu_kernel.h @@ -13,16 +13,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_DEVICE_CPU_MUL_CPU_KERNEL_H_ -#define MINDSPORE_CCSRC_DEVICE_CPU_MUL_CPU_KERNEL_H_ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_MUL_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_MUL_CPU_KERNEL_H_ #include #include -#include "device/cpu/kernel/mkldnn/mkl_cpu_kernel.h" +#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { class MulCPUKernel : public MKLCPUKernel { public: MulCPUKernel() = default; @@ -35,8 +34,7 @@ class MulCPUKernel : public MKLCPUKernel { }; MS_REG_CPU_KERNEL(Mul, MulCPUKernel); -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore -#endif // MINDSPORE_CCSRC_DEVICE_CPU_MUL_CPU_KERNEL_H_ +#endif // MINDSPORE_CCSRC_KERNEL_CPU_MUL_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/pooling_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_cpu_kernel.cc similarity index 94% rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/pooling_cpu_kernel.cc rename to mindspore/ccsrc/kernel/cpu/mkldnn/pooling_cpu_kernel.cc index 9417105e2f..5225050dc1 100644 --- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/pooling_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_cpu_kernel.cc @@ -13,16 +13,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "device/cpu/kernel/mkldnn/pooling_cpu_kernel.h" +#include "kernel/cpu/mkldnn/pooling_cpu_kernel.h" #include #include #include "common/utils.h" -#include "device/cpu/kernel/mkldnn/mkl_kernel_engine.h" +#include "kernel/cpu/mkldnn/mkl_kernel_engine.h" #include "device/cpu/cpu_device_address.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { void PoolingCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); std::vector src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); @@ -66,6 +65,5 @@ bool PoolingCPUKernel::Launch(const std::vector &inputs, ExecutePrimitive(); return true; } -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/pooling_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_cpu_kernel.h similarity index 78% rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/pooling_cpu_kernel.h rename to mindspore/ccsrc/kernel/cpu/mkldnn/pooling_cpu_kernel.h index 50f4ff5da7..a082015137 100644 --- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/pooling_cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_cpu_kernel.h @@ -13,16 +13,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_DEVICE_CPU_POOLING_CPU_KERNEL_H_ -#define MINDSPORE_CCSRC_DEVICE_CPU_POOLING_CPU_KERNEL_H_ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_POOLING_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_POOLING_CPU_KERNEL_H_ #include #include -#include "device/cpu/kernel/mkldnn/mkl_cpu_kernel.h" +#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { class PoolingCPUKernel : public MKLCPUKernel { public: PoolingCPUKernel() = default; @@ -35,8 +34,7 @@ class PoolingCPUKernel : public MKLCPUKernel { }; MS_REG_CPU_KERNEL(MaxPool, PoolingCPUKernel); -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore -#endif // MINDSPORE_CCSRC_DEVICE_CPU_POOLING_CPU_KERNEL_H_ +#endif // MINDSPORE_CCSRC_KERNEL_CPU_POOLING_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/pooling_grad_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_grad_cpu_kernel.cc similarity index 96% rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/pooling_grad_cpu_kernel.cc rename to mindspore/ccsrc/kernel/cpu/mkldnn/pooling_grad_cpu_kernel.cc index 724b78f19f..c0459de790 100644 --- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/pooling_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_grad_cpu_kernel.cc @@ -13,17 +13,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "device/cpu/kernel/mkldnn/pooling_grad_cpu_kernel.h" +#include "kernel/cpu/mkldnn/pooling_grad_cpu_kernel.h" #include #include #include #include "common/utils.h" -#include "device/cpu/kernel/mkldnn/mkl_kernel_engine.h" +#include "kernel/cpu/mkldnn/mkl_kernel_engine.h" #include "device/cpu/cpu_device_address.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { void PoolingGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); src_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); @@ -121,6 +120,5 @@ bool PoolingGradCPUKernel::Launch(const std::vector &inputs, } return true; } -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/pooling_grad_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_grad_cpu_kernel.h similarity index 82% rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/pooling_grad_cpu_kernel.h rename to mindspore/ccsrc/kernel/cpu/mkldnn/pooling_grad_cpu_kernel.h index e557f7f95f..16ca6901d4 100644 --- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/pooling_grad_cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_grad_cpu_kernel.h @@ -13,17 +13,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_DEVICE_CPU_POOLING_GRAD_CPU_KERNEL_H_ -#define MINDSPORE_CCSRC_DEVICE_CPU_POOLING_GRAD_CPU_KERNEL_H_ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_POOLING_GRAD_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_POOLING_GRAD_CPU_KERNEL_H_ #include #include #include -#include "device/cpu/kernel/mkldnn/mkl_cpu_kernel.h" +#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { class PoolingGradCPUKernel : public MKLCPUKernel { public: PoolingGradCPUKernel() = default; @@ -45,8 +44,7 @@ class PoolingGradCPUKernel : public MKLCPUKernel { }; MS_REG_CPU_KERNEL(MaxPoolGrad, PoolingGradCPUKernel); -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore -#endif // MINDSPORE_CCSRC_DEVICE_CPU_POOLING_GRAD_CPU_KERNEL_H_ +#endif // MINDSPORE_CCSRC_KERNEL_CPU_POOLING_GRAD_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/relu_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/relu_cpu_kernel.cc similarity index 91% rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/relu_cpu_kernel.cc rename to mindspore/ccsrc/kernel/cpu/mkldnn/relu_cpu_kernel.cc index c8bf63eaf0..d5ef20a25e 100644 --- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/relu_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/relu_cpu_kernel.cc @@ -13,14 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "device/cpu/kernel/mkldnn/relu_cpu_kernel.h" -#include "device/cpu/kernel/mkldnn/mkl_kernel_engine.h" +#include "kernel/cpu/mkldnn/relu_cpu_kernel.h" +#include "kernel/cpu/mkldnn/mkl_kernel_engine.h" #include "device/cpu/cpu_device_address.h" #include "common/utils.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { void ReluCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); std::vector src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); @@ -49,6 +48,5 @@ bool ReluCPUKernel::Launch(const std::vector &inputs, ExecutePrimitive(); return true; } -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/relu_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/relu_cpu_kernel.h similarity index 78% rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/relu_cpu_kernel.h rename to mindspore/ccsrc/kernel/cpu/mkldnn/relu_cpu_kernel.h index 8811539f40..b9ccb12f25 100644 --- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/relu_cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/relu_cpu_kernel.h @@ -13,16 +13,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_DEVICE_CPU_RELU_CPU_KERNEL_H_ -#define MINDSPORE_CCSRC_DEVICE_CPU_RELU_CPU_KERNEL_H_ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_RELU_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_RELU_CPU_KERNEL_H_ #include #include -#include "device/cpu/kernel/mkldnn/mkl_cpu_kernel.h" +#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { class ReluCPUKernel : public MKLCPUKernel { public: ReluCPUKernel() = default; @@ -35,8 +34,7 @@ class ReluCPUKernel : public MKLCPUKernel { }; MS_REG_CPU_KERNEL(ReLU, ReluCPUKernel); -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore -#endif // MINDSPORE_CCSRC_DEVICE_CPU_RELU_CPU_KERNEL_H_ +#endif // MINDSPORE_CCSRC_KERNEL_CPU_RELU_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/relu_grad_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/relu_grad_cpu_kernel.cc similarity index 93% rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/relu_grad_cpu_kernel.cc rename to mindspore/ccsrc/kernel/cpu/mkldnn/relu_grad_cpu_kernel.cc index b831562d10..4a6213ddf2 100644 --- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/relu_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/relu_grad_cpu_kernel.cc @@ -13,14 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "device/cpu/kernel/mkldnn/relu_grad_cpu_kernel.h" -#include "device/cpu/kernel/mkldnn/mkl_kernel_engine.h" +#include "kernel/cpu/mkldnn/relu_grad_cpu_kernel.h" +#include "kernel/cpu/mkldnn/mkl_kernel_engine.h" #include "device/cpu/cpu_device_address.h" #include "common/utils.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { void ReluGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); std::vector src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); @@ -66,6 +65,5 @@ bool ReluGradCPUKernel::Launch(const std::vector &inputs, } return true; } -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/relu_grad_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/relu_grad_cpu_kernel.h similarity index 77% rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/relu_grad_cpu_kernel.h rename to mindspore/ccsrc/kernel/cpu/mkldnn/relu_grad_cpu_kernel.h index 81b84916ba..1ff9184b2e 100644 --- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/relu_grad_cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/relu_grad_cpu_kernel.h @@ -13,16 +13,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_DEVICE_CPU_RELU_GRAD_CPU_KERNEL_H_ -#define MINDSPORE_CCSRC_DEVICE_CPU_RELU_GRAD_CPU_KERNEL_H_ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_RELU_GRAD_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_RELU_GRAD_CPU_KERNEL_H_ #include #include -#include "device/cpu/kernel/mkldnn/mkl_cpu_kernel.h" +#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { class ReluGradCPUKernel : public MKLCPUKernel { public: ReluGradCPUKernel() = default; @@ -35,8 +34,7 @@ class ReluGradCPUKernel : public MKLCPUKernel { }; MS_REG_CPU_KERNEL(ReluGrad, ReluGradCPUKernel); -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore -#endif // MINDSPORE_CCSRC_DEVICE_CPU_RELU_GRAD_CPU_KERNEL_H_ +#endif // MINDSPORE_CCSRC_KERNEL_CPU_RELU_GRAD_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/softmax_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cpu_kernel.cc similarity index 91% rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/softmax_cpu_kernel.cc rename to mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cpu_kernel.cc index 495f833c16..7fa740cfc0 100644 --- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/softmax_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cpu_kernel.cc @@ -13,14 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "device/cpu/kernel/mkldnn/softmax_cpu_kernel.h" -#include "device/cpu/kernel/mkldnn/mkl_kernel_engine.h" +#include "kernel/cpu/mkldnn/softmax_cpu_kernel.h" +#include "kernel/cpu/mkldnn/mkl_kernel_engine.h" #include "device/cpu/cpu_device_address.h" #include "common/utils.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { void SoftmaxCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); std::vector src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); @@ -51,6 +50,5 @@ bool SoftmaxCPUKernel::Launch(const std::vector &inputs, ExecutePrimitive(); return true; } -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/softmax_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cpu_kernel.h similarity index 78% rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/softmax_cpu_kernel.h rename to mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cpu_kernel.h index 8f4ccae1b2..de51247493 100644 --- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/softmax_cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cpu_kernel.h @@ -13,16 +13,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_DEVICE_CPU_SOFTMAX_CPU_KERNEL_H_ -#define MINDSPORE_CCSRC_DEVICE_CPU_SOFTMAX_CPU_KERNEL_H_ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_SOFTMAX_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_SOFTMAX_CPU_KERNEL_H_ #include #include -#include "device/cpu/kernel/mkldnn/mkl_cpu_kernel.h" +#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { class SoftmaxCPUKernel : public MKLCPUKernel { public: SoftmaxCPUKernel() = default; @@ -35,8 +34,7 @@ class SoftmaxCPUKernel : public MKLCPUKernel { }; MS_REG_CPU_KERNEL(Softmax, SoftmaxCPUKernel); -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore -#endif // MINDSPORE_CCSRC_DEVICE_CPU_SOFTMAX_CPU_KERNEL_H_ +#endif // MINDSPORE_CCSRC_KERNEL_CPU_SOFTMAX_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc similarity index 95% rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc rename to mindspore/ccsrc/kernel/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc index ca06b4a617..c33fcd246f 100644 --- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc @@ -13,17 +13,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "device/cpu/kernel/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h" +#include "kernel/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h" #include #include #include -#include "device/cpu/kernel/mkldnn/mkl_kernel_engine.h" +#include "kernel/cpu/mkldnn/mkl_kernel_engine.h" #include "device/cpu/cpu_device_address.h" #include "common/utils.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { void SparseSoftmaxCrossEntropyWithLogitsCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { CPUKernel::InitInputOutputSize(kernel_node); MS_EXCEPTION_IF_NULL(kernel_node); @@ -126,6 +125,5 @@ bool SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Launch(const std::vector #include -#include "device/cpu/kernel/mkldnn/mkl_cpu_kernel.h" +#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { class SparseSoftmaxCrossEntropyWithLogitsCPUKernel : public MKLCPUKernel { public: SparseSoftmaxCrossEntropyWithLogitsCPUKernel() = default; @@ -45,8 +44,7 @@ class SparseSoftmaxCrossEntropyWithLogitsCPUKernel : public MKLCPUKernel { }; MS_REG_CPU_KERNEL(SparseSoftmaxCrossEntropyWithLogits, SparseSoftmaxCrossEntropyWithLogitsCPUKernel); -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore -#endif // MINDSPORE_CCSRC_DEVICE_CPU_SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_CPU_KERNEL_H_ +#endif // MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/device/cpu/kernel/one_hot_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/one_hot_cpu_kernel.cc similarity index 95% rename from mindspore/ccsrc/device/cpu/kernel/one_hot_cpu_kernel.cc rename to mindspore/ccsrc/kernel/cpu/one_hot_cpu_kernel.cc index e4b3f03f58..00dfe73f28 100644 --- a/mindspore/ccsrc/device/cpu/kernel/one_hot_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/one_hot_cpu_kernel.cc @@ -13,12 +13,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "device/cpu/kernel/one_hot_cpu_kernel.h" +#include "kernel/cpu/one_hot_cpu_kernel.h" #include "device/cpu/cpu_device_address.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { void OneHotCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); @@ -69,6 +68,5 @@ bool OneHotCPUKernel::Launch(const std::vector &inputs, return true; } -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/device/cpu/kernel/one_hot_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/one_hot_cpu_kernel.h similarity index 77% rename from mindspore/ccsrc/device/cpu/kernel/one_hot_cpu_kernel.h rename to mindspore/ccsrc/kernel/cpu/one_hot_cpu_kernel.h index f41ac63265..bb69236123 100644 --- a/mindspore/ccsrc/device/cpu/kernel/one_hot_cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/one_hot_cpu_kernel.h @@ -13,16 +13,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_DEVICE_CPU_ONE_HOT_CPU_KERNEL_H_ -#define MINDSPORE_CCSRC_DEVICE_CPU_ONE_HOT_CPU_KERNEL_H_ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_ONE_HOT_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_ONE_HOT_CPU_KERNEL_H_ #include #include -#include "device/cpu/cpu_kernel.h" -#include "device/cpu/cpu_kernel_factory.h" +#include "kernel/cpu/cpu_kernel.h" +#include "kernel/cpu/cpu_kernel_factory.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { class OneHotCPUKernel : public CPUKernel { public: OneHotCPUKernel() = default; @@ -40,8 +39,7 @@ class OneHotCPUKernel : public CPUKernel { }; MS_REG_CPU_KERNEL(OneHot, OneHotCPUKernel); -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore -#endif // MINDSPORE_CCSRC_DEVICE_CPU_ONE_HOT_CPU_KERNEL_H_ +#endif // MINDSPORE_CCSRC_KERNEL_CPU_ONE_HOT_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/device/cpu/kernel/reshape_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/reshape_cpu_kernel.cc similarity index 92% rename from mindspore/ccsrc/device/cpu/kernel/reshape_cpu_kernel.cc rename to mindspore/ccsrc/kernel/cpu/reshape_cpu_kernel.cc index a742e3a550..7342a19e99 100644 --- a/mindspore/ccsrc/device/cpu/kernel/reshape_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/reshape_cpu_kernel.cc @@ -13,12 +13,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "device/cpu/kernel/reshape_cpu_kernel.h" +#include "kernel/cpu/reshape_cpu_kernel.h" #include "device/cpu/cpu_device_address.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { void ReshapeCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); } bool ReshapeCPUKernel::Launch(const std::vector &inputs, @@ -43,6 +42,5 @@ bool ReshapeCPUKernel::Launch(const std::vector &inputs, } return true; } -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/device/cpu/kernel/reshape_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/reshape_cpu_kernel.h similarity index 78% rename from mindspore/ccsrc/device/cpu/kernel/reshape_cpu_kernel.h rename to mindspore/ccsrc/kernel/cpu/reshape_cpu_kernel.h index d371e3a7ac..837873d48c 100644 --- a/mindspore/ccsrc/device/cpu/kernel/reshape_cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/reshape_cpu_kernel.h @@ -13,16 +13,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_DEVICE_CPU_RESHAPE_CPU_KERNEL_H_ -#define MINDSPORE_CCSRC_DEVICE_CPU_RESHAPE_CPU_KERNEL_H_ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_RESHAPE_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_RESHAPE_CPU_KERNEL_H_ #include #include -#include "device/cpu/cpu_kernel.h" -#include "device/cpu/cpu_kernel_factory.h" +#include "kernel/cpu/cpu_kernel.h" +#include "kernel/cpu/cpu_kernel_factory.h" namespace mindspore { -namespace device { -namespace cpu { +namespace kernel { class ReshapeCPUKernel : public CPUKernel { public: ReshapeCPUKernel() = default; @@ -37,8 +36,7 @@ class ReshapeCPUKernel : public CPUKernel { MS_REG_CPU_KERNEL(Reshape, ReshapeCPUKernel); MS_REG_CPU_KERNEL(Flatten, ReshapeCPUKernel); MS_REG_CPU_KERNEL(ExpandDims, ReshapeCPUKernel); -} // namespace cpu -} // namespace device +} // namespace kernel } // namespace mindspore -#endif // MINDSPORE_CCSRC_DEVICE_CPU_RESHAPE_CPU_KERNEL_H_ +#endif // MINDSPORE_CCSRC_KERNEL_CPU_RESHAPE_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/kernel/gpu/arrays/slice_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/arrays/slice_gpu_kernel.h index 96e899da60..091a150fcb 100644 --- a/mindspore/ccsrc/kernel/gpu/arrays/slice_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/arrays/slice_gpu_kernel.h @@ -129,10 +129,10 @@ class SliceGpuFwdKernel : public GpuKernel { } begin_ = GetAttr>(kernel_node, "begin"); for (size_t i = 0; i < input_shape.size(); i++) { - if ((begin_[i] > 0 && (begin_[i] >= SizeToInt(input_shape[i]))) || + if ((begin_[i] > 0 && (begin_[i] > SizeToInt(input_shape[i]))) || (begin_[i] < 0 && (std::abs(begin_[i]) > SizeToInt(input_shape[i])))) { - MS_LOG(ERROR) << "Error input, out of bounds " << input_shape[i] << " in axis " << i << "."; - return false; + MS_LOG(INFO) << "Input out of bounds " << input_shape[i] << " in axis " << i << "."; + begin_[i] = 0; } } return true; diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cu b/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cu new file mode 100644 index 0000000000..f8377fd721 --- /dev/null +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cu @@ -0,0 +1,205 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include "kernel/gpu/cuda_impl/layer_norm_grad_impl.cuh" + +constexpr int NUM_PER_THREAD_REDUCE = 4; +constexpr int WARP_SIZE = 32; + +template +inline __device__ void GammaAndBetaThreadReduce(const int& col, const int& row_dim, const int& col_dim, + const T& epsilon, const T* dy, const T* x, const T* mean, const T* var, + T* dg, T* db) { + int loop_num = (row_dim + NUM_PER_THREAD_REDUCE - 1) / NUM_PER_THREAD_REDUCE; + for (int i = threadIdx.x; i < loop_num; i += blockDim.x) { + for (int j = 0; j < NUM_PER_THREAD_REDUCE; j++) { + int row = NUM_PER_THREAD_REDUCE * i + j; + if (row >= row_dim) { + return; + } + + int pos = row * col_dim + col; + dg[0] += dy[pos] * pow(var[row] + epsilon, -0.5) * (x[pos] - mean[row]); + db[0] += dy[pos]; + } + } +} + +template +inline __device__ void GammaAndBetaWarpReduce(T* dg, T* db) { + for (int delta = (WARP_SIZE >> 1); delta > 0; delta >>= 1) { + dg[0] += __shfl_down_sync(0xffffffff, dg[0], delta); + db[0] += __shfl_down_sync(0xffffffff, db[0], delta); + } +} + +template +inline __device__ void GammaAndBetaBlockReduce(const int& col, const int& row_dim, T* dg, T* db, T* dg_addr, + T* db_addr) { + if (threadIdx.x >= row_dim) { + return; + } + + // load data to share memory + // thread(0, 32, 64, 96, ...) keep the data + extern __shared__ T share_mem[]; + if (threadIdx.x % WARP_SIZE == 0) { + int offset = threadIdx.x / WARP_SIZE * 2; + share_mem[offset] = dg[0]; + share_mem[offset + 1] = db[0]; + } + __syncthreads(); + + for (int stride = blockDim.x / WARP_SIZE / 2; stride > 0; stride >>= 1) { + if (threadIdx.x < stride) { + int offset = (threadIdx.x + stride) * 2; + share_mem[threadIdx.x * 2] += share_mem[offset]; + share_mem[threadIdx.x * 2 + 1] += share_mem[offset + 1]; + } + } + __syncthreads(); + + if (threadIdx.x == 0) { + dg_addr[col] = share_mem[0]; + db_addr[col] = share_mem[1]; + } +} + +template +__global__ void GammaAndBetaPropKernel(const int row_dim, const int col_dim, const T epsilon, const T* dy, const T* x, + const T* mean_addr, const T* var_addr, T* dg_addr, T* db_addr) { + // row: [0:param_axis] + // col: [param_axis:] + // dg[i][j] = dy[i][j] * (var[i] + epsilon, -0.5) * (x[i][j] - mean[i]) + // dg[j] = \Sigma_{j}dg[i][j] + for (int col = blockIdx.x; col < col_dim; col += gridDim.x) { + T dg = 0; + T db = 0; + GammaAndBetaThreadReduce(col, row_dim, col_dim, epsilon, dy, x, mean_addr, var_addr, &dg, &db); + GammaAndBetaWarpReduce(&dg, &db); + GammaAndBetaBlockReduce(col, row_dim, &dg, &db, dg_addr, db_addr); + } +} + +template +inline __device__ void InputThreadReduce(const int& row, const int& col_dim, const int& param_dim, const T& epsilon, + T* sum1, T* sum2, T* sum3, const T* dy, const T* x, const T* mean, + const T* var, const T* gamma) { + int loop_num = (col_dim + NUM_PER_THREAD_REDUCE - 1) / NUM_PER_THREAD_REDUCE; + for (int i = threadIdx.x; i < loop_num; i += blockDim.x) { + for (int j = 0; j < NUM_PER_THREAD_REDUCE; j++) { + int col = NUM_PER_THREAD_REDUCE * i + j; + if (col >= col_dim) { + return; + } + + int pos = row * col_dim + col; + int gamma_offset = pos % param_dim; + T v1 = dy[pos] * gamma[gamma_offset]; + T v2 = x[pos] - mean[row]; + + sum1[0] += -0.5 * v1 * v2 * pow(var[row] + epsilon, -1.5); + sum2[0] += v1; + sum3[0] += -2.0 * v2; + } + } +} + +template +inline __device__ void InputWarpReduce(T* sum1, T* sum2, T* sum3) { + for (int delta = (WARP_SIZE >> 1); delta > 0; delta >>= 1) { + sum1[0] += __shfl_down_sync(0xffffffff, sum1[0], delta); + sum2[0] += __shfl_down_sync(0xffffffff, sum2[0], delta); + sum3[0] += __shfl_down_sync(0xffffffff, sum3[0], delta); + } +} + +template +inline __device__ void InputBlockReduce(const int& col_dim, T* sum1, T* sum2, T* sum3, T* share_mem) { + if (threadIdx.x >= col_dim) { + return; + } + + // load data to share memory + // thread(0, 32, 64, 96, ...) keep the data + if (threadIdx.x % WARP_SIZE == 0) { + int offset = threadIdx.x / WARP_SIZE * 3; + share_mem[offset] = sum1[0]; + share_mem[offset + 1] = sum2[0]; + share_mem[offset + 2] = sum3[0]; + } + __syncthreads(); + + for (int stride = blockDim.x / WARP_SIZE / 2; stride > 0; stride >>= 1) { + if (threadIdx.x < stride) { + int offset = (threadIdx.x + stride) * 3; + share_mem[threadIdx.x * 3] += share_mem[offset]; + share_mem[threadIdx.x * 3 + 1] += share_mem[offset + 1]; + share_mem[threadIdx.x * 3 + 2] += share_mem[offset + 2]; + } + } + __syncthreads(); +} + +template +inline __device__ void InputProp(const int& row, const int& col_dim, const int& param_dim, const T& epsilon, + const T* dy, const T* x, const T* mean, const T* var, const T* gamma, T* dx, + const T* share_mem) { + for (int col = threadIdx.x; col < col_dim; col += blockDim.x) { + int pos = (row * col_dim + col); + int gamma_offset = pos % param_dim; + T v1 = dy[pos] * gamma[gamma_offset]; + T v2 = x[pos] - mean[row]; + T v3 = pow(var[row] + epsilon, -0.5); + dx[pos] = v1 * v3 + share_mem[0] * (2.0 / col_dim) * v2 + + (-1.0 * v3 * share_mem[1] + (1.0 / col_dim) * share_mem[0] * share_mem[2]) * (1.0 / col_dim); + } +} + +template +__global__ void InputPropKernel(const int row_dim, const int col_dim, const int param_dim, const T epsilon, const T* dy, + const T* x, const T* mean, const T* var, const T* gamma, T* dx) { + for (int row = blockIdx.x; row < row_dim; row += gridDim.x) { + T sum1 = 0; + T sum2 = 0; + T sum3 = 0; + extern __shared__ T share_mem[]; + InputThreadReduce(row, col_dim, param_dim, epsilon, &sum1, &sum2, &sum3, dy, x, mean, var, gamma); + InputWarpReduce(&sum1, &sum2, &sum3); + InputBlockReduce(col_dim, &sum1, &sum2, &sum3, share_mem); + InputProp(row, col_dim, param_dim, epsilon, dy, x, mean, var, gamma, dx, share_mem); + } +} + +template +void LayerNormGrad(const int& row_dim, const int& col_dim, const int& param_dim, const T& epsilon, const T* dy, + const T* x, const T* mean, const T* var, const T* gamma, T* dx, T* dg, T* db, cudaStream_t stream) { + int share_mem = + ((col_dim + NUM_PER_THREAD_REDUCE - 1) / NUM_PER_THREAD_REDUCE + WARP_SIZE - 1) / WARP_SIZE * 3 * sizeof(T); + InputPropKernel<<>>(row_dim, col_dim, param_dim, epsilon, dy, x, mean, var, gamma, + dx); + + share_mem = + ((row_dim + NUM_PER_THREAD_REDUCE - 1) / NUM_PER_THREAD_REDUCE + WARP_SIZE - 1) / WARP_SIZE * 2 * sizeof(T); + GammaAndBetaPropKernel<<>>(row_dim, col_dim, epsilon, dy, x, mean, var, dg, db); +} + +template void LayerNormGrad(const int& row_dim, const int& col_dim, const int& param_dim, const float& epsilon, + const float* dy, const float* x, const float* mean, const float* var, const float* gamma, + float* dx, float* dg, float* db, cudaStream_t stream); diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cuh b/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cuh new file mode 100644 index 0000000000..9f7d57cdb9 --- /dev/null +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cuh @@ -0,0 +1,26 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LAYER_NORM_GRAD_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LAYER_NORM_GRAD_H_ + +#include "device/gpu/cuda_common.h" + +template +void LayerNormGrad(const int& row_dim, const int& col_dim, const int& param_dim, const T& epsilon, const T* dy, + const T* x, const T* mean, const T* var, const T* gamma, T* dx, T* dg, T* db, cudaStream_t stream); + +#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LAYER_NORM_GRAD_H_ diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cu b/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cu new file mode 100644 index 0000000000..db33673744 --- /dev/null +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cu @@ -0,0 +1,148 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include "kernel/gpu/cuda_impl/layer_norm_impl.cuh" + +constexpr int NUM_PER_THREAD_REDUCE = 4; +constexpr int WARP_SIZE = 32; + +template +inline __device__ void MeanAndVarAccumulation(T* mean, T* var, T* num, const T& val) { + // Welford Algorithm: + // \mu_k = \mu_{k-1} + (x_k - \mu_{k-1})/k + // \sigma_k^2 = \sigma_{k-1}^2 + (x_k - \mu_{k-1}) * (x_k - \mu_k) + num[0]++; + T mean_new = mean[0] + (val - mean[0]) / num[0]; + var[0] = var[0] + (val - mean[0]) * (val - mean_new); + mean[0] = mean_new; +} + +template +inline __device__ void MeanAndVarMerge(T* m1, T* v1, T* n1, const T& m2, const T& v2, const T& n2) { + if (n2 == 0) { + return; + } + + T count = n1[0] + n2; + v1[0] = v1[0] + v2 + (m1[0] - m2) * (m1[0] - m2) * n1[0] * n2 / count; + m1[0] = (n1[0] * m1[0] + n2 * m2) / count; + n1[0] = count; +} + +template +inline __device__ void ThreadReduce(const int& col_dim, const T* block_addr, T* mean, T* var, T* num) { + int loop_num = (col_dim + NUM_PER_THREAD_REDUCE - 1) / NUM_PER_THREAD_REDUCE; + for (int i = threadIdx.x; i < loop_num; i += blockDim.x) { + for (int j = 0; j < NUM_PER_THREAD_REDUCE; j++) { + int pos = NUM_PER_THREAD_REDUCE * i + j; + if (pos >= col_dim) { + return; + } + MeanAndVarAccumulation(mean, var, num, block_addr[pos]); + } + } +} + +template +inline __device__ void WarpReduce(T* mean, T* var, T* num) { + for (int delta = (WARP_SIZE >> 1); delta > 0; delta >>= 1) { + T mean_other = __shfl_down_sync(0xffffffff, mean[0], delta); + T var_other = __shfl_down_sync(0xffffffff, var[0], delta); + T num_other = __shfl_down_sync(0xffffffff, num[0], delta); + MeanAndVarMerge(mean, var, num, mean_other, var_other, num_other); + } +} + +template +inline __device__ void BlockReduce(const int& col_dim, T* mean, T* var, T* num, T* mean_addr, T* var_addr, + T* share_mem) { + if (threadIdx.x >= col_dim) { + return; + } + + // load data to share memory + // thread(0, 32, 64, 96, ...) keep the data + if (threadIdx.x % WARP_SIZE == 0) { + int offset = threadIdx.x / WARP_SIZE * 3; + share_mem[offset] = mean[0]; + share_mem[offset + 1] = var[0]; + share_mem[offset + 2] = num[0]; + } + __syncthreads(); + + for (int stride = blockDim.x / WARP_SIZE / 2; stride > 0; stride >>= 1) { + if (threadIdx.x < stride) { + int offset = (threadIdx.x + stride) * 3; + MeanAndVarMerge(&share_mem[threadIdx.x * 3], &share_mem[threadIdx.x * 3 + 1], &share_mem[threadIdx.x * 3 + 2], + share_mem[offset], share_mem[offset + 1], share_mem[offset + 2]); + } + } + __syncthreads(); + + if (threadIdx.x == 0) { + mean_addr[blockIdx.x] = share_mem[0]; // todo: blockDim.x < row + share_mem[1] /= col_dim; + var_addr[blockIdx.x] = share_mem[1]; + } +} + +template +inline __device__ void LayerNorm(const int& row, const int& col_dim, const int& param_dim, const T* x, + const T* share_mem, const T* gamma, const T* beta, const T epsilon, T* y) { + for (int col = threadIdx.x; col < col_dim; col += blockDim.x) { + int pos = row * col_dim + col; + int i = pos % param_dim; + y[pos] = (x[pos] - share_mem[0]) / sqrt(share_mem[1] + epsilon) * gamma[i] + beta[i]; + } +} + +template +__global__ void LayerNormKernel(const int row_dim, const int col_dim, const int param_dim, const T epsilon, const T* x, + const T* gamma, const T* beta, T* y, T* mean_addr, T* var_addr) { + for (auto row = blockIdx.x; row < row_dim; row += gridDim.x) { + T mean = 0; + T var = 0; + T num = 0; + const T* block_addr = x + row * col_dim; + extern __shared__ T share_mem[]; + + ThreadReduce(col_dim, block_addr, &mean, &var, &num); + WarpReduce(&mean, &var, &num); + BlockReduce(col_dim, &mean, &var, &num, mean_addr, var_addr, share_mem); + + __syncthreads(); + LayerNorm(row, col_dim, param_dim, x, share_mem, gamma, beta, epsilon, y); + } +} + +template +void LayerNorm(const int& row_dim, const int& col_dim, const int& param_dim, const T& epsilon, const T* x, + const T* gamma, const T* beta, T* y, T* mean, T* var, cudaStream_t stream) { + const dim3 block(row_dim); + const dim3 thread(256); + // keep the mean/var/num after warp reduce + int share_mem = + ((col_dim + NUM_PER_THREAD_REDUCE - 1) / NUM_PER_THREAD_REDUCE + WARP_SIZE - 1) / WARP_SIZE * 3 * sizeof(T); + LayerNormKernel<<>>(row_dim, col_dim, param_dim, epsilon, x, gamma, beta, y, mean, + var); +} + +template void LayerNorm(const int& row_dim, const int& col_dim, const int& param_dim, const float& epsilon, + const float* x, const float* gamma, const float* beta, float* y, float* mean, float* var, + cudaStream_t stream); diff --git a/tests/ut/cpp/mindrecord/ut_shard_writer_test.h b/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cuh similarity index 54% rename from tests/ut/cpp/mindrecord/ut_shard_writer_test.h rename to mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cuh index f665297b17..4832b08746 100644 --- a/tests/ut/cpp/mindrecord/ut_shard_writer_test.h +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cuh @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,13 +14,13 @@ * limitations under the License. */ -#ifndef TESTS_MINDRECORD_UT_SHARDWRITER_H -#define TESTS_MINDRECORD_UT_SHARDWRITER_H +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LAYER_NORM_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LAYER_NORM_H_ -namespace mindspore { -namespace mindrecord { -void TestShardWriterImageNet(); -} // namespace mindrecord -} // namespace mindspore +#include "device/gpu/cuda_common.h" -#endif // TESTS_MINDRECORD_UT_SHARDWRITER_H +template +void LayerNorm(const int& outer, const int& inner, const int& param_dim, const T& epsilon, const T* x, const T* gamma, + const T* beta, T* y, T* mean, T* var, cudaStream_t stream); + +#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LAYER_NORM_H_ diff --git a/mindspore/ccsrc/kernel/gpu/gpu_kernel_factory.cc b/mindspore/ccsrc/kernel/gpu/gpu_kernel_factory.cc index e38cc02e23..b00b5c263d 100644 --- a/mindspore/ccsrc/kernel/gpu/gpu_kernel_factory.cc +++ b/mindspore/ccsrc/kernel/gpu/gpu_kernel_factory.cc @@ -68,13 +68,18 @@ std::string GpuKernelFactory::SupportedTypeList(const std::string &kernel_name) return type_lists; } for (size_t attr_index = 0; attr_index < (iter->second).size(); ++attr_index) { - std::string type_list = "["; + std::string type_list = "in["; auto attr = (iter->second)[attr_index].first; for (size_t input_index = 0; input_index < attr.GetInputSize(); ++input_index) { type_list = type_list + TypeId2String(attr.GetInputAttr(input_index).first) + ((input_index == (attr.GetInputSize() - 1)) ? "" : " "); } - type_lists = type_lists + type_list + "] "; + type_list = type_list + "], out["; + for (size_t input_index = 0; input_index < attr.GetOutputSize(); ++input_index) { + type_list = type_list + TypeId2String(attr.GetOutputAttr(input_index).first) + + ((input_index == (attr.GetOutputSize() - 1)) ? "" : " "); + } + type_lists = type_lists + type_list + "]; "; } return type_lists; } diff --git a/mindspore/ccsrc/kernel/gpu/math/binary_op_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/math/binary_op_gpu_kernel.h index b929bbee50..3bf141fc0b 100644 --- a/mindspore/ccsrc/kernel/gpu/math/binary_op_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/math/binary_op_gpu_kernel.h @@ -218,7 +218,7 @@ class BinaryOpGpuKernel : public GpuKernel { } } CHECK_CUDNN_RET_WITH_EXCEPT( - cudnnSetOpTensorDescriptor(opTensor_descriptor_, tensor_op_, cudnn_data_type_, CUDNN_NOT_PROPAGATE_NAN), + cudnnSetOpTensorDescriptor(opTensor_descriptor_, tensor_op_, CUDNN_DATA_FLOAT, CUDNN_NOT_PROPAGATE_NAN), "cudnnSetOpTensorDescriptor failed"); return; } diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h index 75b2a97cf8..e58aeacefb 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h @@ -114,23 +114,7 @@ class Conv2dGpuFwdKernel : public GpuKernel { pad_height_ = GetAttr(kernel_node, "pad"); pad_width_ = pad_height_; pad_mode_ = GetAttr(kernel_node, "pad_mode"); - auto stride_ori = AnfAlgo::GetNodeAttr>(kernel_node, "stride"); - auto dilation_ori = AnfAlgo::GetNodeAttr>(kernel_node, "dilation"); - if (stride_ori.size() != 4 || stride_ori[2] != stride_ori[3]) { - MS_LOG(EXCEPTION) << "conv2d only support equal stride, and stride must be 4d!"; - } - if (stride_ori[0] != 1 || stride_ori[1] != 1) { - MS_LOG(EXCEPTION) << "conv2d stride only support 1 in N axis and C axis!"; - } - if (dilation_ori.size() != 4 || dilation_ori[2] != dilation_ori[3]) { - MS_LOG(EXCEPTION) << "conv2d only support equal dilation, and dilation must be 4d!"; - } - if (dilation_ori[0] != 1 || dilation_ori[1] != 1) { - MS_LOG(EXCEPTION) << "conv2d dilation only support 1 in N axis and C axis!"; - } - stride_ = stride_ori[2]; - dilation_ = dilation_ori[2]; - + SetStrideAndDilation(kernel_node); cudnnTensorDescriptor_t input_descriptor_real = nullptr; if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) { SetPad(in_shape, kernel_node); @@ -142,10 +126,14 @@ class Conv2dGpuFwdKernel : public GpuKernel { } CHECK_CUDNN_RET_WITH_EXCEPT( cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_, stride_, dilation_, dilation_, - CUDNN_CROSS_CORRELATION, cudnn_data_type_), + CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), "cudnnSetConvolution2dDescriptor failed"); input_descriptor_real = input_desc_; } + if (cudnn_data_type_ == CUDNN_DATA_HALF) { + CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionMathType(conv_desc_, CUDNN_TENSOR_OP_MATH), + "cudnnSetConvolutionMathType failed.") + } SelectAlgorithm(input_descriptor_real); InitSizeLists(); return true; @@ -240,7 +228,7 @@ class Conv2dGpuFwdKernel : public GpuKernel { "cudnnSetTensor4dDescriptor failed"); CHECK_CUDNN_RET_WITH_EXCEPT( cudnnSetConvolution2dDescriptor(conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_, stride_, - dilation_, dilation_, CUDNN_CROSS_CORRELATION, cudnn_data_type_), + dilation_, dilation_, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), "cudnnSetConvolution2dDescriptor failed"); } @@ -276,6 +264,27 @@ class Conv2dGpuFwdKernel : public GpuKernel { "cudnnGetConvolutionForwardAlgorithm_v7 failed"); conv_algorithm_ = perf_results.algo; } + if (cudnn_data_type_ == CUDNN_DATA_HALF) { + conv_algorithm_ = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM; + } + } + void SetStrideAndDilation(const CNodePtr &kernel_node) { + auto stride_ori = AnfAlgo::GetNodeAttr>(kernel_node, "stride"); + auto dilation_ori = AnfAlgo::GetNodeAttr>(kernel_node, "dilation"); + if (stride_ori.size() != 4 || stride_ori[2] != stride_ori[3]) { + MS_LOG(EXCEPTION) << "conv2d only support equal stride, and stride must be 4d!"; + } + if (stride_ori[0] != 1 || stride_ori[1] != 1) { + MS_LOG(EXCEPTION) << "conv2d stride only support 1 in N axis and C axis!"; + } + if (dilation_ori.size() != 4 || dilation_ori[2] != dilation_ori[3]) { + MS_LOG(EXCEPTION) << "conv2d only support equal dilation, and dilation must be 4d!"; + } + if (dilation_ori[0] != 1 || dilation_ori[1] != 1) { + MS_LOG(EXCEPTION) << "conv2d dilation only support 1 in N axis and C axis!"; + } + stride_ = stride_ori[2]; + dilation_ = dilation_ori[2]; } cudnnHandle_t cudnn_handle_; cudnnTensorDescriptor_t input_desc_; diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h index e481fd448e..b9d74b036e 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h @@ -117,19 +117,7 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel { pad_height_ = GetAttr(kernel_node, "pad"); pad_width_ = pad_height_; pad_mode_ = GetAttr(kernel_node, "pad_mode"); - auto stride_ori = AnfAlgo::GetNodeAttr>(kernel_node, "stride"); - auto dilation_ori = AnfAlgo::GetNodeAttr>(kernel_node, "dilation"); - if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) { - MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel only support equal stride, and stride must be 2d!"; - } - if (dilation_ori.size() != 4 || dilation_ori[2] != dilation_ori[3]) { - MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel only support equal dilation, and dilation must be 4d!"; - } - if (dilation_ori[0] != 1 || dilation_ori[1] != 1) { - MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel dilation only support 1 in N axis and C axis!"; - } - stride_ = stride_ori[0]; - dilation_ = dilation_ori[2]; + SetStrideAndDilation(kernel_node); cudnnTensorDescriptor_t x_desc_real = nullptr; if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) { SetPad(in_shape, kernel_node); @@ -141,10 +129,14 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel { } CHECK_CUDNN_RET_WITH_EXCEPT( cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_, stride_, dilation_, dilation_, - CUDNN_CROSS_CORRELATION, cudnn_data_type_), + CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), "GetConvolution2dDescriptor failed"); x_desc_real = x_desc_; } + if (cudnn_data_type_ == CUDNN_DATA_HALF) { + CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionMathType(conv_desc_, CUDNN_TENSOR_OP_MATH), + "cudnnSetConvolutionMathType failed.") + } SelectAlgorithm(x_desc_real); InitSizeLists(); return true; @@ -239,7 +231,7 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel { "cudnnSetTensor4dDescriptor failed"); CHECK_CUDNN_RET_WITH_EXCEPT( cudnnSetConvolution2dDescriptor(conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_, stride_, - dilation_, dilation_, CUDNN_CROSS_CORRELATION, cudnn_data_type_), + dilation_, dilation_, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), "cudnnSetConvolution2dDescriptor failed"); } void SelectAlgorithm(cudnnTensorDescriptor_t x_desc_real) { @@ -258,6 +250,9 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel { "GetConvolutionBackwardFilterAlgorithm failed"); algo_ = perf_results.algo; } + if (cudnn_data_type_ == CUDNN_DATA_HALF) { + algo_ = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1; + } } void GetFilterShape(const CNodePtr &kernel_node, std::vector *filter_shape) { auto shp_tuple_x = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("filter_sizes")->cast()->value(); @@ -281,7 +276,21 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel { SizeToInt(in_shape[1]), SizeToInt(in_shape[2]), SizeToInt(in_shape[3])), "SetTensor4dDescriptor failed"); } - + void SetStrideAndDilation(const CNodePtr &kernel_node) { + auto stride_ori = AnfAlgo::GetNodeAttr>(kernel_node, "stride"); + auto dilation_ori = AnfAlgo::GetNodeAttr>(kernel_node, "dilation"); + if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) { + MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel only support equal stride, and stride must be 2d!"; + } + if (dilation_ori.size() != 4 || dilation_ori[2] != dilation_ori[3]) { + MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel only support equal dilation, and dilation must be 4d!"; + } + if (dilation_ori[0] != 1 || dilation_ori[1] != 1) { + MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel dilation only support 1 in N axis and C axis!"; + } + stride_ = stride_ori[0]; + dilation_ = dilation_ori[2]; + } cudnnHandle_t cudnn_handle_; cudnnFilterDescriptor_t dw_desc_; cudnnConvolutionDescriptor_t conv_desc_; diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h index 008abcc658..a1fb7f324f 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h @@ -118,19 +118,7 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { pad_height_ = GetAttr(kernel_node, "pad"); pad_width_ = pad_height_; pad_mode_ = GetAttr(kernel_node, "pad_mode"); - auto stride_ori = AnfAlgo::GetNodeAttr>(kernel_node, "stride"); - auto dilation_ori = AnfAlgo::GetNodeAttr>(kernel_node, "dilation"); - if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) { - MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel only support equal stride, and stride must be 2d!"; - } - if (dilation_ori.size() != 4 || dilation_ori[2] != dilation_ori[3]) { - MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel only support equal dilation, and dilation must be 4d!"; - } - if (dilation_ori[0] != 1 || dilation_ori[1] != 1) { - MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel dilation only support 1 in N axis and C axis!"; - } - stride_ = stride_ori[0]; - dilation_ = dilation_ori[2]; + SetStrideAndDilation(kernel_node); cudnnTensorDescriptor_t dx_desc_real = nullptr; if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) { SetPad(input_shape, kernel_node); @@ -142,10 +130,14 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { } CHECK_CUDNN_RET_WITH_EXCEPT( cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_, stride_, dilation_, dilation_, - CUDNN_CROSS_CORRELATION, cudnn_data_type_), + CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), "cudnnSetConvolution2dDescriptor failed"); dx_desc_real = dx_desc_; } + if (cudnn_data_type_ == CUDNN_DATA_HALF) { + CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionMathType(conv_desc_, CUDNN_TENSOR_OP_MATH), + "cudnnSetConvolutionMathType failed.") + } SelectAlgorithm(dx_desc_real); InitSizeLists(); return true; @@ -239,7 +231,7 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { "cudnnSetTensor4dDescriptor failed"); CHECK_CUDNN_RET_WITH_EXCEPT( cudnnSetConvolution2dDescriptor(conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_, stride_, - dilation_, dilation_, CUDNN_CROSS_CORRELATION, cudnn_data_type_), + dilation_, dilation_, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), "cudnnSetConvolution2dDescriptor failed"); } void SelectAlgorithm(cudnnTensorDescriptor_t dx_desc_real) { @@ -258,6 +250,9 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { "cudnnGetConvolutionBackwardDataAlgorithm_v7 failed"); algo_ = perf_results.algo; } + if (cudnn_data_type_ == CUDNN_DATA_HALF) { + algo_ = CUDNN_CONVOLUTION_BWD_DATA_ALGO_1; + } } void GetInputShape(const CNodePtr &kernel_node, std::vector *input_shape) { auto shp_tuple_x = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("input_sizes")->cast()->value(); @@ -279,6 +274,21 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { input_shape[2], input_shape[3]), "SetTensor4dDescriptor failed"); } + void SetStrideAndDilation(const CNodePtr &kernel_node) { + auto stride_ori = AnfAlgo::GetNodeAttr>(kernel_node, "stride"); + auto dilation_ori = AnfAlgo::GetNodeAttr>(kernel_node, "dilation"); + if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) { + MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel only support equal stride, and stride must be 2d!"; + } + if (dilation_ori.size() != 4 || dilation_ori[2] != dilation_ori[3]) { + MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel only support equal dilation, and dilation must be 4d!"; + } + if (dilation_ori[0] != 1 || dilation_ori[1] != 1) { + MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel dilation only support 1 in N axis and C axis!"; + } + stride_ = stride_ori[0]; + dilation_ = dilation_ori[2]; + } cudnnHandle_t cudnn_handle_; cudnnFilterDescriptor_t w_desc_; cudnnConvolutionDescriptor_t conv_desc_; diff --git a/mindspore/ccsrc/kernel/gpu/nn/fused_batch_norm_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/fused_batch_norm_gpu_kernel.h index 5ca85f8e63..3cdf480540 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/fused_batch_norm_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/fused_batch_norm_gpu_kernel.h @@ -82,6 +82,7 @@ class FusedBatchNormGpuKernel : public GpuKernel { } bool Init(const CNodePtr &kernel_node) override { InitResource(); + cudnn_data_type_ = kCudnnDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))]; size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 5) { MS_LOG(EXCEPTION) << "input tensor size is " << input_num << ", FusedBatchNormGpuKernel should be 5"; @@ -112,11 +113,11 @@ class FusedBatchNormGpuKernel : public GpuKernel { } CHECK_CUDNN_RET_WITH_EXCEPT( - cudnnSetTensor4dDescriptor(x_desc_, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch_, channel_, height_, width_), + cudnnSetTensor4dDescriptor(x_desc_, CUDNN_TENSOR_NCHW, cudnn_data_type_, batch_, channel_, height_, width_), "Set x desc failed"); CHECK_CUDNN_RET_WITH_EXCEPT( - cudnnSetTensor4dDescriptor(y_desc_, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch_, channel_, height_, width_), + cudnnSetTensor4dDescriptor(y_desc_, CUDNN_TENSOR_NCHW, cudnn_data_type_, batch_, channel_, height_, width_), "Set y desc failed"); CHECK_CUDNN_RET_WITH_EXCEPT( diff --git a/mindspore/ccsrc/kernel/gpu/nn/fused_batchnorm_grad_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/fused_batchnorm_grad_gpu_kernel.h index 08eac28af7..07372ad22d 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/fused_batchnorm_grad_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/fused_batchnorm_grad_gpu_kernel.h @@ -110,7 +110,7 @@ class FusedBatchNormGradGpuKernel : public GpuKernel { cudnnSetTensor4dDescriptor(dx_desc_, CUDNN_TENSOR_NCHW, cudnn_data_type_, batch_, channel_, height_, width_), "Set dx desc failed"); CHECK_CUDNN_RET_WITH_EXCEPT( - cudnnSetTensor4dDescriptor(scale_bias_desc_, CUDNN_TENSOR_NCHW, cudnn_data_type_, 1, channel_, 1, 1), + cudnnSetTensor4dDescriptor(scale_bias_desc_, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, channel_, 1, 1), "Set para desc failed"); InitSizeLists(); diff --git a/mindspore/ccsrc/dataset/util/semaphore.h b/mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.cc similarity index 50% rename from mindspore/ccsrc/dataset/util/semaphore.h rename to mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.cc index 74c344f7d3..e67b745ab3 100644 --- a/mindspore/ccsrc/dataset/util/semaphore.h +++ b/mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,37 +13,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef DATASET_UTIL_SEMAPHORE_H_ -#define DATASET_UTIL_SEMAPHORE_H_ -#include "dataset/util/cond_var.h" +#include "kernel/gpu/nn/layer_norm_gpu_kernel.h" namespace mindspore { -namespace dataset { -class TaskGroup; - -class Semaphore { - public: - explicit Semaphore(int init) : value_(init) {} - - virtual ~Semaphore() {} - - Status P(); - - void V(); - - void Register(TaskGroup *vg); - - Status Deregister(); - - void ResetIntrpState(); - - private: - int value_; - - std::mutex mutex_; - CondVar wait_cond_; -}; -} // namespace dataset +namespace kernel { +MS_REG_GPU_KERNEL_ONE(LayerNorm, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + LayerNormGpuKernel, float) +} // namespace kernel } // namespace mindspore -#endif // DATASET_UTIL_SEMAPHORE_H_ diff --git a/mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.h new file mode 100644 index 0000000000..e80cd091e5 --- /dev/null +++ b/mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.h @@ -0,0 +1,103 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_NN_LAYER_NORM_GPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_NN_LAYER_NORM_GPU_KERNEL_H_ + +#include +#include "kernel/gpu/gpu_kernel.h" +#include "kernel/gpu/gpu_kernel_factory.h" +#include "kernel/gpu/cuda_impl/layer_norm_impl.cuh" + +namespace mindspore { +namespace kernel { +template +class LayerNormGpuKernel : public GpuKernel { + public: + LayerNormGpuKernel() : input_row_(1), input_col_(1), param_dim_(1) {} + ~LayerNormGpuKernel() override = default; + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs, uintptr_t stream_ptr) override { + auto x = GetDeviceAddress(inputs, 0); + auto gamma = GetDeviceAddress(inputs, 1); + auto beta = GetDeviceAddress(inputs, 2); + auto y = GetDeviceAddress(outputs, 0); + auto mean = GetDeviceAddress(outputs, 1); + auto variance = GetDeviceAddress(outputs, 2); + + T epsilon = 10e-12; + LayerNorm(input_row_, input_col_, param_dim_, epsilon, x, gamma, beta, y, mean, variance, + reinterpret_cast(stream_ptr)); + return true; + } + bool Init(const CNodePtr &kernel_node) override { + int begin_norm_axis = GetAttr(kernel_node, "begin_norm_axis"); + int begin_params_axis = GetAttr(kernel_node, "begin_params_axis"); + + auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + if (begin_norm_axis < 0) { + begin_norm_axis += input_shape.size(); + } + + if (begin_params_axis < 0) { + begin_params_axis += input_shape.size(); + } + + for (size_t i = 0; i < IntToSize(begin_norm_axis); i++) { + input_row_ *= input_shape[i]; + } + + for (size_t i = begin_norm_axis; i < input_shape.size(); i++) { + input_col_ *= input_shape[i]; + } + + for (size_t i = begin_params_axis; i < input_shape.size(); i++) { + param_dim_ *= input_shape[i]; + } + + InitSizeLists(); + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(input_row_ * input_col_ * sizeof(T)); + input_size_list_.push_back(param_dim_ * sizeof(T)); + input_size_list_.push_back(param_dim_ * sizeof(T)); + + output_size_list_.push_back(input_row_ * input_col_ * sizeof(T)); + output_size_list_.push_back(input_row_ * sizeof(T)); + output_size_list_.push_back(input_row_ * sizeof(T)); + return; + } + + private: + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + + int input_row_; + int input_col_; + int param_dim_; +}; +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_CCSRC_KERNEL_GPU_NN_LAYER_NORM_GPU_KERNEL_H_ diff --git a/mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.cc b/mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.cc new file mode 100644 index 0000000000..e268161349 --- /dev/null +++ b/mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.cc @@ -0,0 +1,33 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/gpu/nn/layer_norm_grad_gpu_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_ONE(LayerNormGrad, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + LayerNormGradGpuKernel, float) +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.h new file mode 100644 index 0000000000..84049206db --- /dev/null +++ b/mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.h @@ -0,0 +1,107 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_NN_LAYER_NORM_GRAD_GPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_NN_LAYER_NORM_GRAD_GPU_KERNEL_H_ + +#include +#include "kernel/gpu/gpu_kernel.h" +#include "kernel/gpu/gpu_kernel_factory.h" +#include "kernel/gpu/cuda_impl/layer_norm_grad_impl.cuh" + +namespace mindspore { +namespace kernel { +template +class LayerNormGradGpuKernel : public GpuKernel { + public: + LayerNormGradGpuKernel() : input_row_(1), input_col_(1), param_dim_(1) {} + ~LayerNormGradGpuKernel() override = default; + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs, uintptr_t stream_ptr) override { + auto dy = GetDeviceAddress(inputs, 0); + auto x = GetDeviceAddress(inputs, 1); + auto var = GetDeviceAddress(inputs, 2); + auto mean = GetDeviceAddress(inputs, 3); + auto gamma = GetDeviceAddress(inputs, 4); + auto dx = GetDeviceAddress(outputs, 0); + auto dg = GetDeviceAddress(outputs, 1); + auto db = GetDeviceAddress(outputs, 2); + + T epsilon = 10e-12; + LayerNormGrad(input_row_, input_col_, param_dim_, epsilon, dy, x, mean, var, gamma, dx, dg, db, + reinterpret_cast(stream_ptr)); + return true; + } + bool Init(const CNodePtr &kernel_node) override { + int begin_norm_axis = GetAttr(kernel_node, "begin_norm_axis"); + int begin_params_axis = GetAttr(kernel_node, "begin_params_axis"); + + auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + if (begin_norm_axis < 0) { + begin_norm_axis += input_shape.size(); + } + + if (begin_params_axis < 0) { + begin_params_axis += input_shape.size(); + } + + for (size_t i = 0; i < IntToSize(begin_norm_axis); i++) { + input_row_ *= input_shape[i]; + } + + for (size_t i = begin_norm_axis; i < input_shape.size(); i++) { + input_col_ *= input_shape[i]; + } + + for (size_t i = begin_params_axis; i < input_shape.size(); i++) { + param_dim_ *= input_shape[i]; + } + + InitSizeLists(); + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(input_row_ * input_col_ * sizeof(T)); + input_size_list_.push_back(input_row_ * input_col_ * sizeof(T)); + input_size_list_.push_back(input_row_ * sizeof(T)); + input_size_list_.push_back(input_row_ * sizeof(T)); + input_size_list_.push_back(param_dim_ * sizeof(T)); + + output_size_list_.push_back(input_row_ * input_col_ * sizeof(T)); + output_size_list_.push_back(param_dim_ * sizeof(T)); + output_size_list_.push_back(param_dim_ * sizeof(T)); + return; + } + + private: + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + + int input_row_; + int input_col_; + int param_dim_; +}; +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_CCSRC_KERNEL_GPU_NN_LAYER_NORM_GRAD_GPU_KERNEL_H_ diff --git a/mindspore/ccsrc/kernel/hccl/hcom_util.cc b/mindspore/ccsrc/kernel/hccl/hcom_util.cc index d1c0a30113..5665475c84 100644 --- a/mindspore/ccsrc/kernel/hccl/hcom_util.cc +++ b/mindspore/ccsrc/kernel/hccl/hcom_util.cc @@ -136,7 +136,7 @@ bool HcomUtil::GetHcomCount(const AnfNodePtr &anf_node, const vector #include "utils/log_adapter.h" +#include "debug/anf_ir_dump.h" namespace mindspore { namespace kernel { std::string KernelBuildInfo::GetInputFormat(size_t input_index) const { if (input_index >= inputs_format_.size()) { - MS_LOG(EXCEPTION) << "The index [" << input_index << "] is exceed the number of input node"; + MS_LOG(ERROR) << "The index [" << input_index << "] is exceed the number of input node"; + return kInvalidFormat; } return inputs_format_[input_index]; } std::string KernelBuildInfo::GetOutputFormat(size_t output_index) const { if (output_index >= outputs_format_.size()) { - MS_LOG(EXCEPTION) << "The index [" << output_index << "] is exceed the number of input node"; + MS_LOG(ERROR) << "The index [" << output_index << "] is exceed the number of input node"; + return kInvalidFormat; } return outputs_format_[output_index]; } TypeId KernelBuildInfo::GetInputDeviceType(size_t input_index) const { if (input_index >= inputs_device_type_.size()) { - MS_LOG(EXCEPTION) << "The index [" << input_index << "] is exceed the number of input node"; + MS_LOG(ERROR) << "The index [" << input_index << "] is exceed the number of input"; + return TypeId::kNumberTypeEnd; } return inputs_device_type_[input_index]; } TypeId KernelBuildInfo::GetOutputDeviceType(size_t output_index) const { if (output_index >= outputs_device_type_.size()) { - MS_LOG(EXCEPTION) << "The index [" << output_index << "] is exceed the number of input node"; + MS_LOG(ERROR) << "The index [" << output_index << "] is exceed the number of output"; + return TypeId::kNumberTypeEnd; } return outputs_device_type_[output_index]; } @@ -82,14 +87,14 @@ std::string KernelBuildInfo::ToString() const { if (index != 0) { output_buffer << ", "; } - output_buffer << "<" << static_cast(GetInputDeviceType(index)) << "x" << GetInputFormat(index) << ">"; + output_buffer << "<" << ToShortString(GetInputDeviceType(index)) << "x" << GetInputFormat(index) << ">"; } output_buffer << ") -> ("; for (size_t index = 0; index < GetOutputNum(); ++index) { if (index != 0) { output_buffer << ", "; } - output_buffer << "<" << static_cast(GetOutputDeviceType(index)) << "x" << GetOutputFormat(index) << ">"; + output_buffer << "<" << ToShortString(GetOutputDeviceType(index)) << "x" << GetOutputFormat(index) << ">"; } output_buffer << ")"; return output_buffer.str(); diff --git a/mindspore/ccsrc/kernel/kernel_build_info.h b/mindspore/ccsrc/kernel/kernel_build_info.h index 76ebc7a572..779be057f6 100644 --- a/mindspore/ccsrc/kernel/kernel_build_info.h +++ b/mindspore/ccsrc/kernel/kernel_build_info.h @@ -82,6 +82,9 @@ class KernelBuildInfo { bool operator==(const KernelBuildInfo &other) const; + public: + static auto constexpr kInvalidFormat = "InvalidFormat"; + private: KernelType kernel_type_; std::vector inputs_format_; diff --git a/mindspore/ccsrc/kernel/kernel_fusion.cc b/mindspore/ccsrc/kernel/kernel_fusion.cc index cd8936f218..4e1ad97e23 100644 --- a/mindspore/ccsrc/kernel/kernel_fusion.cc +++ b/mindspore/ccsrc/kernel/kernel_fusion.cc @@ -108,7 +108,8 @@ std::map KernelFusion(const std::vector } if ((task_result != nullptr) && (strcmp(task_result, "Success") != 0)) { - MS_LOG(DEBUG) << "fuison op build failed, err log: " << task_result << " change to single op build."; + MS_LOG(INFO) << "Fusion warning: Fuison op build failed, err log: " << task_result + << " change to single op build."; build_failed_num++; } auto kernel_mod_item = build_manger->TaskFinishProcess(task_id, false); diff --git a/mindspore/ccsrc/kernel/kernel_query.cc b/mindspore/ccsrc/kernel/kernel_query.cc index 3d3282e7b5..e4a1af7f50 100755 --- a/mindspore/ccsrc/kernel/kernel_query.cc +++ b/mindspore/ccsrc/kernel/kernel_query.cc @@ -26,7 +26,7 @@ namespace mindspore { namespace kernel { namespace { -void FilterInvaildKernelInfo(const CNodePtr &kernel_node, +void FilterInvalidKernelInfo(const CNodePtr &kernel_node, std::vector> *kernel_info_list) { MS_EXCEPTION_IF_NULL(kernel_info_list); std::vector> filtered_list; @@ -63,9 +63,9 @@ void KernelQuery(const CNodePtr &kernel_node, std::vectorempty()) { - MS_LOG(EXCEPTION) << "op" << kernel_node->DebugString() << "kernel query fail!"; + MS_LOG(EXCEPTION) << "Op " << kernel_node->DebugString() << "kernel query fail!"; } - FilterInvaildKernelInfo(kernel_node, kernel_info_list); + FilterInvalidKernelInfo(kernel_node, kernel_info_list); } } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/mng/rt_kernel_info.cc b/mindspore/ccsrc/kernel/mng/rt_kernel_info.cc index a87bb4d514..cb230bc706 100755 --- a/mindspore/ccsrc/kernel/mng/rt_kernel_info.cc +++ b/mindspore/ccsrc/kernel/mng/rt_kernel_info.cc @@ -46,24 +46,40 @@ RtKerDescFactory &RtKerDescFactory::Get() { void GetRtKelInfo(const CNodePtr &kernel_node, std::vector> *kernel_info_list) { - MS_LOG(INFO) << "Mng kernel Info."; MS_EXCEPTION_IF_NULL(kernel_info_list); MS_EXCEPTION_IF_NULL(kernel_node); std::string opNameLower = AnfAlgo::GetCNodeName(kernel_node); (void)std::transform(opNameLower.begin(), opNameLower.end(), opNameLower.begin(), ::tolower); auto ker_desc_ptr = RtKerDescFactory::Create(opNameLower); - if (ker_desc_ptr == nullptr) { - MS_LOG(DEBUG) << "Mng can't find op [" << opNameLower << "]."; + if (ker_desc_ptr != nullptr && !ker_desc_ptr->GetKernelInfo().empty()) { + *kernel_info_list = ker_desc_ptr->GetKernelInfo(); return; } - MS_EXCEPTION_IF_NULL(ker_desc_ptr); - auto kernel_info = ker_desc_ptr->GetKernelInfo(); - if (kernel_info.empty()) { - MS_LOG(DEBUG) << "Rt dose not have op [" << opNameLower << "]."; + // if can't find kernel info in kernel info database, use the default kernel info + auto node_name = AnfAlgo::GetCNodeName(kernel_node); + if (node_name == "StreamSwitch" || node_name == "StreamActive") { + auto kernel_build_info_builder = std::make_shared(); + // set input infos + auto input_num = AnfAlgo::GetInputTensorNum(kernel_node); + kernel_build_info_builder->SetInputsFormat(std::vector(input_num, kOpFormat_DEFAULT)); + std::vector input_types = {}; + for (size_t i = 0; i < input_num; i++) { + input_types.push_back(AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, i)); + } + kernel_build_info_builder->SetInputsDeviceType(input_types); + // set output info + auto output_num = AnfAlgo::GetOutputTensorNum(kernel_node); + kernel_build_info_builder->SetOutputsFormat(std::vector(output_num, kOpFormat_DEFAULT)); + kernel_build_info_builder->SetOutputsDeviceType(std::vector(output_num, TypeId::kTypeUnknown)); + // set ohter info + kernel_build_info_builder->SetFusionType(kernel::FusionType::OPAQUE); + kernel_build_info_builder->SetProcessor(kernel::Processor::AICORE); + kernel_build_info_builder->SetKernelType(KernelType::RT_KERNEL); + kernel_info_list->push_back(kernel_build_info_builder->Build()); return; } - *kernel_info_list = kernel_info; + MS_LOG(DEBUG) << "Rt dose not have op [" << opNameLower << "]."; } } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc b/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc index 44750fab4f..8ce5504b8e 100644 --- a/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc +++ b/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc @@ -30,6 +30,9 @@ namespace mindspore { namespace kernel { namespace tbe { static std::map tbe_func_adapter_map = { + {"softmax", "softmax_v2"}, + {"log_softmax", "log_softmax_v2"}, + {"apply_momentum", "apply_momentum_d"}, {"re_lu6", "relu6"}, {"re_lu6_grad", "relu6_grad"}, {"re_lu", "relu"}, @@ -38,6 +41,7 @@ static std::map tbe_func_adapter_map = { {"reduce_mean", "reduce_mean_d"}, {"reduce_max", "reduce_max_d"}, {"reduce_min", "reduce_min_d"}, + {"avg_pool_grad", "avg_pool_grad_d"}, {"conv2d_backprop_filter", "conv2d_backprop_filter_d"}, {"conv2d_backprop_input", "conv2d_backprop_input_d"}, {"depthwise_conv2d_native", "depthwise_conv2d"}, @@ -153,6 +157,52 @@ void TbeAdapter::InputOrderPass(const std::string &op_name, std::vector &inputs_list, + std::vector *inputs_json) { + MS_EXCEPTION_IF_NULL(inputs_json); + if (input_order_adjusted_ops.find(op_name) == input_order_adjusted_ops.end()) { + (void)std::copy(inputs_list.begin(), inputs_list.end(), std::back_inserter((*inputs_json))); + } else { + if (op_name == "MinimumGrad" || op_name == "MaximumGrad") { + inputs_json->emplace_back(inputs_list[2]); + inputs_json->emplace_back(inputs_list[0]); + inputs_json->emplace_back(inputs_list[1]); + for (size_t i = 3; i < inputs_list.size(); ++i) { + inputs_json->emplace_back(inputs_list[i]); + } + } else { + inputs_json->emplace_back(inputs_list[1]); + inputs_json->emplace_back(inputs_list[0]); + for (size_t i = 2; i < inputs_list.size(); ++i) { + inputs_json->emplace_back(inputs_list[i]); + } + } + } +} + +void TbeAdapter::FusionDataOrderPass(const std::string &op_name, const std::vector &data_layer, + std::vector *reorder_data_layer) { + MS_EXCEPTION_IF_NULL(reorder_data_layer); + if (input_order_adjusted_ops.find(op_name) == input_order_adjusted_ops.end()) { + (void)std::copy(data_layer.begin(), data_layer.end(), std::back_inserter((*reorder_data_layer))); + } else { + if (op_name == "MinimumGrad" || op_name == "MaximumGrad") { + reorder_data_layer->emplace_back(data_layer[2]); + reorder_data_layer->emplace_back(data_layer[0]); + reorder_data_layer->emplace_back(data_layer[1]); + for (size_t i = 3; i < data_layer.size(); ++i) { + reorder_data_layer->emplace_back(data_layer[i]); + } + } else { + reorder_data_layer->emplace_back(data_layer[1]); + reorder_data_layer->emplace_back(data_layer[0]); + for (size_t i = 2; i < data_layer.size(); ++i) { + reorder_data_layer->emplace_back(data_layer[i]); + } + } + } +} + std::map TbeAdapter::build_json_attr_pass_map_ = { {"MaximumGrad", TbeAdapter::MaximumGradAttrJsonPass}, {"MinimumGrad", TbeAdapter::MinimumGradAttrJsonPass}, diff --git a/mindspore/ccsrc/kernel/tbe/tbe_adapter.h b/mindspore/ccsrc/kernel/tbe/tbe_adapter.h index 27f6d315f6..0208d6c6a6 100644 --- a/mindspore/ccsrc/kernel/tbe/tbe_adapter.h +++ b/mindspore/ccsrc/kernel/tbe/tbe_adapter.h @@ -44,15 +44,12 @@ class TbeAdapter { static void GenTopKV2IndicesTensorInfo(const std::shared_ptr &anf_node, size_t real_input_index, std::vector *input_list, kCreaterType creater_type); + static void FusionInputOrderPass(const std::string &op_name, const std::vector &inputs_list, + std::vector *inputs_json); + static void FusionDataOrderPass(const std::string &op_name, const std::vector &data_layer, + std::vector *reorder_data_layer); + private: - static void Conv2DAttrJsonPass(const AnfNodePtr &anf_node, const std::vector> &op_info_attrs, - nlohmann::json *attrs_json); - static void Conv2DBackpropFilterAttrJsonPass(const AnfNodePtr &anf_node, - const std::vector> &op_info_attrs, - nlohmann::json *attrs_json); - static void Conv2DBackpropInputAttrJsonPass(const AnfNodePtr &anf_node, - const std::vector> &op_info_attrs, - nlohmann::json *attrs_json); static void MaximumGradAttrJsonPass(const AnfNodePtr &anf_node, const std::vector> &op_info_attrs, nlohmann::json *attrs_json); diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc b/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc index 5255cc6450..24823b9275 100644 --- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc +++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc @@ -375,19 +375,26 @@ bool TbeKernelJsonCreator::GenTbeAttrJson(const std::shared_ptr &anf_no MS_EXCEPTION_IF_NULL(primitive); for (const auto &attr_ptr : attrs_ptr) { std::string attr_name = attr_ptr->name(); + nlohmann::json attr_obj; + attr_obj["name"] = attr_name; if (primitive->GetAttr(attr_name) != nullptr) { - nlohmann::json attr_obj; auto value = primitive->GetAttr(attr_name); std::string type = attr_ptr->type(); ParseAttrValue(type, value, &attr_obj); - attr_obj["name"] = attr_name; attr_obj["valid"] = true; - (*attrs_json).push_back(attr_obj); } else { - if (attr_ptr->param_type() == "required" && creater_type_ == SINGLE_BUILD && op_info->impl_path() != "") { - MS_LOG(EXCEPTION) << "op name: " << op_info->op_name() << " attr: " << attr_name << "is required, but not set."; + if (op_info->impl_path().empty()) { + attr_obj["valid"] = false; + } else { + if (attr_ptr->param_type() == "required" && creater_type_ == SINGLE_BUILD) { + MS_LOG(EXCEPTION) << "op name: " << op_info->op_name() << " attr: " << attr_name + << " is required, but not set."; + } else { + attr_obj["valid"] = false; + } } } + (*attrs_json).push_back(attr_obj); } return true; } @@ -483,7 +490,8 @@ bool TbeKernelBuild::GenFusionScopeJson(const vector &inp MS_EXCEPTION_IF_NULL(fusion_kernel); // get input layer info std::vector> input_layers; - if (!GetInputLayers(input_nodes, compute_nodes, &input_layers)) { + std::map spec_data_input; + if (!GetInputLayers(input_nodes, compute_nodes, &input_layers, &spec_data_input)) { return false; } // gen fusion scopre_op jsom @@ -504,8 +512,8 @@ bool TbeKernelBuild::GenFusionScopeJson(const vector &inp for (const auto &layer : input_layers) { for (const auto &data_input : layer) { nlohmann::json data_str; - if (!GenFusionDataInputJson(data_input, &data_str, &index)) { - MS_LOG(DEBUG) << "GenFusionDataInputJson faild."; + if (!GenFusionDataInputJson(data_input, spec_data_input, &data_str, &index)) { + MS_LOG(INFO) << "Fusion error: gen fusion datainput json faild."; return false; } data_list.push_back(data_str); @@ -518,7 +526,7 @@ bool TbeKernelBuild::GenFusionScopeJson(const vector &inp } void TbeKernelBuild::GenDescJson(const std::shared_ptr &anf_node, size_t node_out_idx, - size_t desc_output_idx, nlohmann::json *output_desc) { + size_t desc_output_idx, nlohmann::json *output_desc, FusionDataType fusion_data_type) { std::string output_desc_name = anf_node->fullname_with_scope(); if (node_out_idx > 0) { output_desc_name = output_desc_name + "_" + std::to_string(node_out_idx); @@ -538,58 +546,109 @@ void TbeKernelBuild::GenDescJson(const std::shared_ptr &anf_ (*output_desc)["shape"] = shape; auto format = AnfAlgo::GetOutputFormat(anf_node, node_out_idx); if (format == kOpFormat_DEFAULT) { - if (ori_shape.size() == 4) { - format = kOpFormat_NCHW; - } else { - format = kOpFormat_ND; - } + format = ori_shape.size() == 4 ? kOpFormat_NCHW : kOpFormat_ND; } (*output_desc)["format"] = format; (*output_desc)["ori_format"] = kOpFormat_NCHW; (*output_desc)["output_index"] = desc_output_idx; + if (fusion_data_type == kFusionAddN && format == kOpFormat_NC1HWC0) { + std::vector spec_shape = {}; + spec_shape.emplace_back(shape[0]); + spec_shape.emplace_back(shape[1]); + spec_shape.emplace_back(shape[2] * shape[3]); + spec_shape.emplace_back(shape[4]); + (*output_desc)["shape"] = spec_shape; + } else if (fusion_data_type == kFusionReLUGradV2 && (*output_desc)["data_type"] == "uint8") { + std::vector spec_shape = {}; + spec_shape.emplace_back(shape[0]); + spec_shape.emplace_back(shape[1]); + spec_shape.emplace_back(shape[2] * shape[3]); + spec_shape.emplace_back(16); + (*output_desc)["shape"] = spec_shape; + (*output_desc)["data_type"] = "bool"; + } } void TbeKernelBuild::GenReusedOutputDesc(const shared_ptr &anf_node, size_t index, size_t output_index, nlohmann::json *output_desc) { std::string output_desc_name = anf_node->fullname_with_scope() + "_" + std::to_string(index); (*output_desc)["name"] = NormalizeFullScopeName(output_desc_name); - (*output_desc)["data_type"] = tbe::TypeIdToString(kNumberTypeFloat32); (*output_desc)["output_index"] = output_index; std::vector shape; (*output_desc)["shape"] = shape; } -bool TbeKernelBuild::GetInputLayers(const vector &input_nodes, - const vector &compute_nodes, - std::vector> *input_layers) { +bool TbeKernelBuild::GetSpecInputLayers(const std::string &op_name, + const std::vector &reorder_layer, + std::map *spec_data_input) { + if ((op_name == kReluGradV2OpName || op_name == kAddNOpName) && reorder_layer.empty()) { + MS_LOG(INFO) << "Fusion error: node(" << op_name << " )'s input is null. "; + return false; + } + MS_LOG(INFO) << "Fusion info: op_name: " << op_name << "input layer size: " << reorder_layer.size(); + if (op_name == kReluGradV2OpName) { + (*spec_data_input)[reorder_layer[0]] = kFusionReLUGradV2; + } else if (op_name == kAddNOpName) { + for (const auto &it : reorder_layer) { + (*spec_data_input)[it] = kFusionAddN; + } + } + return true; +} + +bool TbeKernelBuild::GetInputLayers(const std::vector &input_nodes, + const std::vector &compute_nodes, + std::vector> *input_layers, + std::map *spec_data_input) { + auto result = std::find_if(compute_nodes.begin(), compute_nodes.end(), [](const auto &it) { + auto op_name = AnfAlgo::GetCNodeName(it); + return op_name == kConv2DBackpropInputOpName; + }); + bool need_spec = (result != compute_nodes.end()); size_t input_size = 0; for (const auto &compute_node : compute_nodes) { - std::vector layer; + std::vector layer = {}; + std::vector reorder_layer = {}; MS_EXCEPTION_IF_NULL(compute_node); + auto op_name = AnfAlgo::GetCNodeName(compute_node); auto ccompute_node = compute_node->cast(); if (ccompute_node == nullptr) { - MS_LOG(DEBUG) << "fusion compute node must be cnode"; + MS_LOG(INFO) << "Fusion error: fusion compute node must be cnode"; return false; } + MS_LOG(INFO) << "Fusion info: compute name: " << compute_node->fullname_with_scope(); for (size_t i = 1; i < ccompute_node->inputs().size(); ++i) { auto input = ccompute_node->input(i); auto find_iter = std::find(input_nodes.begin(), input_nodes.end(), input); if (find_iter != input_nodes.end()) { + MS_LOG(INFO) << "Fusion info: add compute node's [" << i << "] input: " << input->fullname_with_scope(); layer.emplace_back((*find_iter)); + } else { + MS_LOG(INFO) << "Fusion warnig: this input [" << i << "] may be pre compute(" << input->fullname_with_scope() + << ") node's output."; + } + } + TbeAdapter::FusionDataOrderPass(op_name, layer, &reorder_layer); + if (need_spec) { + MS_LOG(INFO) << "Fusion info: match conv2d backprop input + ... patten."; + if (!GetSpecInputLayers(op_name, reorder_layer, spec_data_input)) { + return false; } } - input_size += layer.size(); - input_layers->emplace_back(layer); + input_size += reorder_layer.size(); + input_layers->emplace_back(reorder_layer); } if (input_nodes.size() != input_size) { - MS_LOG(DEBUG) << "fusion scope error, layer input:" << input_size << ", input_node:" << input_nodes.size(); + MS_LOG(INFO) << "Fusion error: fusion scope error, layer input:" << input_size + << ", input_node:" << input_nodes.size(); return false; } return true; } -bool TbeKernelBuild::GenFusionDataInputJson(const shared_ptr &data_input, nlohmann::json *data_str, - size_t *index) { +bool TbeKernelBuild::GenFusionDataInputJson(const std::shared_ptr &data_input, + const std::map &spec_data_input, + nlohmann::json *data_str, size_t *index) { MS_EXCEPTION_IF_NULL(data_str); MS_EXCEPTION_IF_NULL(index); std::vector output_desc_list; @@ -603,13 +662,17 @@ bool TbeKernelBuild::GenFusionDataInputJson(const shared_ptr output_desc_list.push_back(output_desc); (*index)++; } else { + FusionDataType fusion_data_type = kFusionNormal; + if (spec_data_input.find(data_input) != spec_data_input.end()) { + fusion_data_type = spec_data_input.at(data_input); + } auto kernel_idx = AnfAlgo::VisitKernel(data_input, 0); auto real_node = kernel_idx.first; size_t real_idx = kernel_idx.second; MS_LOG(INFO) << "real name " << real_node->fullname_with_scope() << " index:" << real_idx; // "output_desc" nlohmann::json output_desc; - GenDescJson(real_node, real_idx, real_idx, &output_desc); + GenDescJson(real_node, real_idx, real_idx, &output_desc, fusion_data_type); output_desc_list.push_back(output_desc); (*data_str)["name"] = NormalizeFullScopeName(real_node->fullname_with_scope()); } @@ -631,11 +694,12 @@ bool TbeKernelBuild::IsDynamicInput(const mindspore::CNodePtr &cnode) { auto real_input_size = cnode->inputs().size() - 1; auto dyn_input_size = dyn_input_sizes.size(); if (dyn_input_size != 1) { - MS_LOG(DEBUG) << "fusion build not support dyn_input_sizes > 1"; + MS_LOG(INFO) << "Fusion error: fusion build not support dyn_input_sizes > 1"; return ret; } if (IntToSize(dyn_input_sizes[0]) != real_input_size) { - MS_LOG(DEBUG) << " dyn_input_size" << dyn_input_sizes[0] << "not equal real_input_size" << real_input_size; + MS_LOG(INFO) << "Fusion error: dyn_input_size" << dyn_input_sizes[0] << "not equal real_input_size" + << real_input_size; return ret; } ret = true; @@ -662,6 +726,7 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode, std::vector *input_desc_list, size_t *index) { MS_EXCEPTION_IF_NULL(cnode); MS_EXCEPTION_IF_NULL(input_desc_list); + std::vector input_desc_list_tmp = {}; bool is_dynamic_input = IsDynamicInput(cnode); for (size_t i = 1; i < cnode->inputs().size(); ++i) { auto input = cnode->input(i); @@ -675,7 +740,7 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode, MS_LOG(INFO) << "node has dynamic input."; input_desc["dyn_index"] = (i - 1); } - (*input_desc_list).emplace_back(input_desc); + input_desc_list_tmp.emplace_back(input_desc); } size_t optional_num = GetOptionalInput(cnode, is_dynamic_input); if (optional_num > 0) { @@ -685,35 +750,24 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode, optional_input_desc["name"] = std::string(kOptional) + std::to_string(*index); (*index)++; (*layer_iter)->emplace_back(nullptr); - (*input_desc_list).emplace_back(optional_input_desc); + input_desc_list_tmp.emplace_back(optional_input_desc); } } + auto op_name = AnfAlgo::GetCNodeName(cnode); + TbeAdapter::FusionInputOrderPass(op_name, input_desc_list_tmp, input_desc_list); return true; } std::vector TbeKernelBuild::GetDescOutputIndex(const std::vector &output_used_nums) { std::vector desc_output_index = {}; - bool find_reused = false; - size_t reused_num = 0; for (size_t idx = 0; idx < output_used_nums.size(); ++idx) { auto output_use_num_item = output_used_nums[idx]; MS_LOG(INFO) << "output used num[" << idx << "] = " << output_use_num_item; - if (output_use_num_item == 1 || output_use_num_item == 0) { + desc_output_index.emplace_back(idx); + if (output_use_num_item > 1) { desc_output_index.emplace_back(idx); - } else { - if (!find_reused) { - desc_output_index.emplace_back(idx); - } else { - desc_output_index.emplace_back(desc_output_index[idx - 1]); - } - reused_num += (output_use_num_item - 1); - find_reused = true; } } - auto pad_value = output_used_nums.size() == 1 ? 0 : desc_output_index[desc_output_index.size() - 1] + 1; - for (size_t i = 0; i < reused_num; ++i) { - desc_output_index.emplace_back(pad_value); - } return desc_output_index; } @@ -721,8 +775,7 @@ bool TbeKernelBuild::GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode std::vector *output_desc_list) { auto output_size = AnfAlgo::GetOutputTensorNum(cnode); if (AnfAlgo::HasNodeAttr(kAttrOutputUsedNum, cnode)) { - // wait anther pr: auto output_used_nums = AnfAlgo::GetNodeAttr>(cnode, kAttrOutputUsedNum); - auto output_used_nums = {SizeToInt(AnfAlgo::GetNodeAttr(cnode, kAttrOutputUsedNum))}; + auto output_used_nums = AnfAlgo::GetNodeAttr>(cnode, kAttrOutputUsedNum); MS_LOG(INFO) << "This node's output has been reused, node name: " << cnode->fullname_with_scope(); if (output_used_nums.size() != output_size) { MS_LOG(INFO) << "Fusion error: output tenor num(" << output_size << ")" @@ -811,6 +864,7 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list, const vecto } auto ret = GetIOSizeImpl(data_output); input_size_list->push_back(ret); + MS_LOG(INFO) << "Fusion info: scope input name: " << op["name"] << ", size: " << ret; } } } @@ -819,26 +873,31 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list, const vecto auto kernel_idx = AnfAlgo::VisitKernel(output_node, 0); auto real_node = kernel_idx.first; size_t real_idx = kernel_idx.second; + auto normal_name = NormalizeFullScopeName(real_node->fullname_with_scope()); + MS_LOG(INFO) << "Fusion info: real node name: " << normal_name << ", real output index: " << real_idx; for (const auto &op : fusion_op_list) { - auto normal_name = NormalizeFullScopeName(real_node->fullname_with_scope()); if (op["name"] == normal_name) { auto op_output_desces = op["output_desc"]; if (output_node != real_node) { // tuple_get item - MS_LOG(DEBUG) << "output is a tuple getitem node"; + MS_LOG(INFO) << "output is a tuple getitem node"; auto output_desc = op_output_desces[real_idx]; if (output_desc["shape"].empty()) { - continue; + MS_LOG(INFO) << "Fusion error: output_desc's shape is empty. real_index " << real_idx; + return false; } auto ret = GetIOSizeImpl(output_desc); output_size_list->push_back(ret); + MS_LOG(INFO) << "Fusion info: scope output index: " << real_idx << ", size: " << ret; } else { for (const auto &output_desc : op_output_desces) { if (output_desc["shape"].empty()) { + MS_LOG(INFO) << "Fusion info: output_desc's shape is empty, may be this node output"; continue; } auto ret = GetIOSizeImpl(output_desc); output_size_list->push_back(ret); + MS_LOG(INFO) << "Fusion info: scope output size: " << ret; } } } diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h b/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h index 1a3eee7fd9..f6e28327d4 100644 --- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h +++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h @@ -35,6 +35,8 @@ namespace kernel { // kernel operate type used for generate json class TbeKernelBuild { + enum FusionDataType { kFusionNormal = 0, kFusionAddN, kFusionReLUGradV2 }; + public: static bool GetIOSize(const nlohmann::json &kernel_json, std::vector *input_size_list, std::vector *output_size_list); @@ -48,8 +50,9 @@ class TbeKernelBuild { private: TbeKernelBuild() = default; ~TbeKernelBuild() = default; - static bool GenFusionDataInputJson(const std::shared_ptr &data_input, nlohmann::json *data_str, - size_t *index); + static bool GenFusionDataInputJson(const std::shared_ptr &data_input, + const std::map &spec_data_input, + nlohmann::json *data_str, size_t *index); static bool GenFusionComputeJson(const mindspore::AnfNodePtr &compute_node, std::vector>::iterator *layer_iter, nlohmann::json *compute_op_str, std::string *fusion_kernel_name, size_t *index); @@ -60,13 +63,17 @@ class TbeKernelBuild { static bool GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode, std::vector *output_desc_list); static void GenDescJson(const std::shared_ptr &anf_node, size_t node_out_idx, - size_t desc_output_idx, nlohmann::json *output_desc); + size_t desc_output_idx, nlohmann::json *output_desc, + FusionDataType fusion_data_type = kFusionNormal); static void GenReusedOutputDesc(const std::shared_ptr &anf_node, size_t index, size_t output_index, nlohmann::json *output_desc); static size_t GetIOSizeImpl(const nlohmann::json &desc); + static bool GetSpecInputLayers(const std::string &op_name, const std::vector &reorder_layer, + std::map *spec_data_input); static bool GetInputLayers(const std::vector &input_nodes, const std::vector &compute_nodes, - std::vector> *input_layers); + std::vector> *input_layers, + std::map *spec_data_input); static bool IsDynamicInput(const CNodePtr &cnode); static size_t GetOptionalInput(const CNodePtr &cnode, bool is_dynamic_input); }; diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select.cc b/mindspore/ccsrc/kernel/tbe/tbe_kernel_select.cc index 127451851e..63e0fb888d 100644 --- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select.cc +++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_select.cc @@ -543,11 +543,6 @@ bool IsValidKernelInfo(const std::shared_ptr &kernel_node, const kernel:: if (!IsShapeMatchFormat(shape, format)) { return false; } - for (auto shape_value : shape) { - if (shape_value == 0) { - MS_LOG(EXCEPTION) << "Dimension size of the tensor shape should be a positive integer, but got " << shape_value; - } - } return true; }; for (size_t index = 0; index < kernel_build_info.GetOutputNum(); ++index) { @@ -593,10 +588,12 @@ void TbeMetadataInfo(const CNodePtr &kernel_node, std::vectorexecution_mode() == kPynativeMode) { kernel_info_list->push_back(parse_info); } else { - if (IsValidKernelInfo(kernel_node, *(parse_info)) && CheckSupported(kernel_node, parse_info)) { - kernel_info_list->push_back(parse_info); - } else { - MS_LOG(INFO) << "CheckSupported Failed for TBE op" << op_name << " kernel info."; + if (IsValidKernelInfo(kernel_node, *(parse_info))) { + if (CheckSupported(kernel_node, parse_info)) { + kernel_info_list->push_back(parse_info); + } else { + MS_LOG(INFO) << "CheckSupported Failed for TBE op" << op_name << " kernel info."; + } } } } diff --git a/mindspore/ccsrc/mindrecord/CMakeLists.txt b/mindspore/ccsrc/mindrecord/CMakeLists.txt index fdd648a50f..f523eae569 100644 --- a/mindspore/ccsrc/mindrecord/CMakeLists.txt +++ b/mindspore/ccsrc/mindrecord/CMakeLists.txt @@ -34,4 +34,10 @@ endif() if (USE_GLOG) target_link_libraries(_c_mindrecord PRIVATE mindspore::glog) +else() + if (CMAKE_SYSTEM_NAME MATCHES "Linux") + target_link_options(_c_mindrecord PRIVATE -Wl,-init,mindspore_log_init) + elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin") + set_target_properties(_c_mindrecord PROPERTIES MACOSX_RPATH ON) + endif () endif() diff --git a/mindspore/ccsrc/mindrecord/common/shard_utils.cc b/mindspore/ccsrc/mindrecord/common/shard_utils.cc index 51de0c5f64..edeabb3cde 100644 --- a/mindspore/ccsrc/mindrecord/common/shard_utils.cc +++ b/mindspore/ccsrc/mindrecord/common/shard_utils.cc @@ -39,7 +39,7 @@ std::vector StringSplit(const std::string &field, char separator) { } s_pos = e_pos + 1; } - return std::move(res); + return res; } bool ValidateFieldName(const std::string &str) { diff --git a/mindspore/ccsrc/mindrecord/io/shard_reader.cc b/mindspore/ccsrc/mindrecord/io/shard_reader.cc index 9cd02d9120..804613e40a 100644 --- a/mindspore/ccsrc/mindrecord/io/shard_reader.cc +++ b/mindspore/ccsrc/mindrecord/io/shard_reader.cc @@ -316,6 +316,10 @@ MSRStatus ShardReader::ReadAllRowsInShard(int shard_id, const std::string &sql, } MSRStatus ShardReader::GetAllClasses(const std::string &category_field, std::set &categories) { + if (column_schema_id_.find(category_field) == column_schema_id_.end()) { + MS_LOG(ERROR) << "Field " << category_field << " does not exist."; + return FAILED; + } auto ret = ShardIndexGenerator::GenerateFieldName(std::make_pair(column_schema_id_[category_field], category_field)); if (SUCCESS != ret.first) { return FAILED; @@ -346,7 +350,8 @@ void ShardReader::GetClassesInShard(sqlite3 *db, int shard_id, const std::string MS_LOG(ERROR) << "Error in select sql statement, sql:" << common::SafeCStr(sql) << ", error: " << errmsg; return; } - MS_LOG(INFO) << "Get" << static_cast(columns.size()) << " records from shard " << shard_id << " index."; + MS_LOG(INFO) << "Get " << static_cast(columns.size()) << " records from shard " << shard_id << " index."; + std::lock_guard lck(shard_locker_); for (int i = 0; i < static_cast(columns.size()); ++i) { categories.emplace(columns[i][0]); } @@ -718,6 +723,11 @@ int64_t ShardReader::GetNumClasses(const std::string &file_path, const std::stri for (auto &field : index_fields) { map_schema_id_fields[field.second] = field.first; } + + if (map_schema_id_fields.find(category_field) == map_schema_id_fields.end()) { + MS_LOG(ERROR) << "Field " << category_field << " does not exist."; + return -1; + } auto ret = ShardIndexGenerator::GenerateFieldName(std::make_pair(map_schema_id_fields[category_field], category_field)); if (SUCCESS != ret.first) { @@ -904,7 +914,7 @@ vector ShardReader::GetAllColumns() { } else { columns = selected_columns_; } - return std::move(columns); + return columns; } MSRStatus ShardReader::CreateTasksByBlock(const std::vector> &row_group_summary, diff --git a/mindspore/ccsrc/mindrecord/meta/shard_category.cc b/mindspore/ccsrc/mindrecord/meta/shard_category.cc index 80816e7a79..2a9c2c0966 100644 --- a/mindspore/ccsrc/mindrecord/meta/shard_category.cc +++ b/mindspore/ccsrc/mindrecord/meta/shard_category.cc @@ -38,7 +38,7 @@ MSRStatus ShardCategory::execute(ShardTask &tasks) { return SUCCESS; } int64_t ShardCategory::GetNumSamples(int64_t dataset_size, int64_t num_classes) { if (dataset_size == 0) return dataset_size; - if (dataset_size > 0 && num_categories_ > 0 && num_elements_ > 0) { + if (dataset_size > 0 && num_classes > 0 && num_categories_ > 0 && num_elements_ > 0) { return std::min(num_categories_, num_classes) * num_elements_; } return -1; diff --git a/mindspore/ccsrc/onnx/CMakeLists.txt b/mindspore/ccsrc/onnx/CMakeLists.txt index 2a25e67634..78884b5d05 100644 --- a/mindspore/ccsrc/onnx/CMakeLists.txt +++ b/mindspore/ccsrc/onnx/CMakeLists.txt @@ -1,5 +1,2 @@ -file(GLOB_RECURSE _ONNX_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "*.cc" - ) - -add_library(_mindspore_onnx_obj OBJECT ${_ONNX_ALL_SRC_FILES}) +file(GLOB_RECURSE _ONNX_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") +add_library(_mindspore_onnx_obj OBJECT ${_ONNX_SRC_FILES}) diff --git a/mindspore/ccsrc/onnx/onnx_exporter.cc b/mindspore/ccsrc/onnx/onnx_exporter.cc index 772986d714..1c5a7b93c3 100644 --- a/mindspore/ccsrc/onnx/onnx_exporter.cc +++ b/mindspore/ccsrc/onnx/onnx_exporter.cc @@ -24,16 +24,17 @@ #include #include "debug/anf_ir_utils.h" -#include "./onnx.pb.h" +#include "proto/onnx.pb.h" #include "operator/ops.h" namespace mindspore { enum OpMergeMode { - OP_MERGE_UNDEFINED = 0, // undefined behavior - OP_MERGE_IGNORE = 1, // indicate an input op merged into other op in compute node list - OP_MERGE_CONV = 2, // indicate `MindSpore Conv + BiasAdd` --> `ONNX Conv` - OP_MERGE_GEMM = 3, // indicate `MindSpore MatMul + BiasAdd` --> `ONNX Gemm` - OP_MERGE_BATCH_NORM = 4, // indicate `MindSpore BatchNorm(x)[0]` --> `ONNX BatchNormalization` + OP_MERGE_UNDEFINED = 0, // undefined behavior + OP_MERGE_IGNORE = 1, // indicate an input op merged into other op in compute node list + OP_MERGE_CONV = 2, // indicate `MindSpore Conv + BiasAdd` --> `ONNX Conv` + OP_MERGE_GEMM = 3, // indicate `MindSpore MatMul + BiasAdd` --> `ONNX Gemm` + OP_MERGE_BATCH_NORM = 4, // indicate `MindSpore BatchNorm(x)[0]` --> `ONNX BatchNormalization` + OP_MERGE_MAXPOOL_WITH_ARGMAX = 5, // indicate `MindSpore MaxPoolWithArgmax(x)[0]` --> `ONNX MaxPool` }; struct OpMergedInfo { @@ -233,6 +234,13 @@ OPERATOR_ONNX_CONVERT_DEFINE( .Attr("padding", "auto_pad", onnx::AttributeProto_AttributeType_STRING, SetPoolingPadMode) .Attr("strides", "strides", onnx::AttributeProto_AttributeType_INTS, SetAttrTupleValueToProto<2>)) +OPERATOR_ONNX_CONVERT_DEFINE( + MaxPoolWithArgmax, MaxPool, + OpNameInfo() + .Attr("ksize", "kernel_shape", onnx::AttributeProto_AttributeType_INTS, SetAttrTupleValueToProto<2>) + .Attr("padding", "auto_pad", onnx::AttributeProto_AttributeType_STRING, SetPoolingPadMode) + .Attr("strides", "strides", onnx::AttributeProto_AttributeType_INTS, SetAttrTupleValueToProto<2>)) + OPERATOR_ONNX_CONVERT_DEFINE( AvgPool, AveragePool, OpNameInfo() @@ -254,6 +262,7 @@ void RegisterOpConverters(const std::function &fn) { fn(OP_CONVERT_FUNCTION_NAME(Flatten)()); fn(OP_CONVERT_FUNCTION_NAME(MaxPool)()); + fn(OP_CONVERT_FUNCTION_NAME(MaxPoolWithArgmax)()); fn(OP_CONVERT_FUNCTION_NAME(AvgPool)()); fn(OP_CONVERT_FUNCTION_NAME(Squeeze)()); @@ -328,6 +337,8 @@ class OnnxExporter { onnx::GraphProto *graph_proto); void ExportMergeBatchNorm(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map *node_map_ptr, onnx::GraphProto *graph_proto); + void ExportMergeMaxPoolWithArgmax(const FuncGraphPtr &func_graph, const CNodePtr &node, + std::map *node_map_ptr, onnx::GraphProto *graph_proto); void ExportOutput(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map *node_map_ptr, onnx::GraphProto *graph_proto); @@ -516,6 +527,12 @@ void OnnxExporter::MatchAndMark(const FuncGraphPtr &func_graph, const std::vecto op_merged_infos[cnode].mode = OP_MERGE_BATCH_NORM; op_merged_infos[cnode->input(1)].mode = OP_MERGE_IGNORE; op_merged_infos[cnode->input(1)].referred_count -= 1; + } else if (cnode->IsApply(prim::kPrimTupleGetItem) && + IsPrimitiveCNode(cnode->input(1), std::make_shared("MaxPoolWithArgmax")) && + GetInt32Value(cnode->input(2)) == 0) { + op_merged_infos[cnode].mode = OP_MERGE_MAXPOOL_WITH_ARGMAX; + op_merged_infos[cnode->input(1)].mode = OP_MERGE_IGNORE; + op_merged_infos[cnode->input(1)].referred_count -= 1; } } } @@ -563,6 +580,9 @@ void OnnxExporter::ExportNodes(const FuncGraphPtr &func_graph, std::map *node_map_ptr, + onnx::GraphProto *const graph_proto) { + auto maxpool_with_argmax_node = dyn_cast(node->input(1)); + + PrimitivePtr prim_maxpool_with_argmax = + dyn_cast((dyn_cast(maxpool_with_argmax_node->input(0)))->value()); + std::vector inputs; + for (size_t i = 1; i < maxpool_with_argmax_node->inputs().size(); i++) { + inputs.push_back(maxpool_with_argmax_node->input(i)); + } + (*node_map_ptr)[node] = ExportPrimitive(func_graph, node_map_ptr, prim_maxpool_with_argmax, inputs, graph_proto); +} + void OnnxExporter::ExportOutput(const FuncGraphPtr & /*func_graph*/, const CNodePtr &node, std::map *node_map_ptr, onnx::GraphProto *const graph_proto) { if (node->inputs().size() != 2) { diff --git a/mindspore/ccsrc/operator/CMakeLists.txt b/mindspore/ccsrc/operator/CMakeLists.txt index 328b4cf787..9a54ec047c 100644 --- a/mindspore/ccsrc/operator/CMakeLists.txt +++ b/mindspore/ccsrc/operator/CMakeLists.txt @@ -1,5 +1,2 @@ -file(GLOB_RECURSE _OPERATOR_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "*.cc" - ) - -add_library(_mindspore_operator_obj OBJECT ${_OPERATOR_ALL_SRC_FILES}) +file(GLOB_RECURSE _OPERATOR_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") +add_library(_mindspore_operator_obj OBJECT ${_OPERATOR_SRC_FILES}) diff --git a/mindspore/ccsrc/operator/composite/composite.cc b/mindspore/ccsrc/operator/composite/composite.cc index 11ab31a292..da4700b053 100644 --- a/mindspore/ccsrc/operator/composite/composite.cc +++ b/mindspore/ccsrc/operator/composite/composite.cc @@ -676,7 +676,7 @@ void MultitypeFuncGraph::Register(const std::vector &types_name, co for (auto &type_name : types_name) { auto type_ptr = StringToType(type_name); if (type_ptr == nullptr) { - MS_LOG(EXCEPTION) << "" << type_name << " convert from string error "; + MS_LOG(EXCEPTION) << type_name << " convert from string error "; } types.push_back(type_ptr); } @@ -955,8 +955,7 @@ int CheckSliceMember(const AbstractBasePtr &member, int default_value, const std return default_value; } - MS_LOG(EXCEPTION) << "" << member_name << " should be a AbstractScalar or AbstractNone, but got " - << member->ToString(); + MS_LOG(EXCEPTION) << member_name << " should be a AbstractScalar or AbstractNone, but got " << member->ToString(); } void GenerateTupleSliceParameter(const AbstractTuplePtr &tuple, const AbstractSlicePtr &slice, int *start_index, @@ -1084,6 +1083,7 @@ int GenerateStridedSliceParametersFromTuple(const AbstractTuplePtr &slice_tuple, std::vector shrink; auto slice_tuple_eles = slice_tuple->elements(); size_t ellipsis_num = 0; + for (size_t index = 0; index < slice_tuple_size; index++) { if (slice_tuple_eles[index]->isa()) { AbstractSlicePtr slice = dyn_cast(slice_tuple_eles[index]); @@ -1118,12 +1118,13 @@ int GenerateStridedSliceParametersFromTuple(const AbstractTuplePtr &slice_tuple, << slice_tuple_eles[index]->ToString(); } - for (size_t index = slice_tuple_size; index < shape_size; index++) { - begin->push_back(0); - end->push_back(shape[index]); - strides->push_back(1); + if (ellipsis_num == 0) { + for (size_t index = slice_tuple_size; index < shape_size; index++) { + begin->push_back(0); + end->push_back(shape[index]); + strides->push_back(1); + } } - return ConvertBinaryToDecimal(shrink); } @@ -1199,6 +1200,7 @@ FuncGraphPtr TensorSlice::GenerateFuncGraph(const AbstractBasePtrList &args_spec if (scalar_ptr->BuildValue()->cast()->value()) { return ExpandADim(ret_graph, tensor_node); } + MS_LOG(EXCEPTION) << "TensorSlice not support the index is False."; } shrink_axis_mask = GenerateStridedSliceParametersFromNumber(scalar_ptr, shape, &begin, &end, &strides); } else if (args_spec_list[1]->isa()) { diff --git a/mindspore/ccsrc/operator/composite/composite.h b/mindspore/ccsrc/operator/composite/composite.h index 429cf5341a..6c4bede82b 100644 --- a/mindspore/ccsrc/operator/composite/composite.h +++ b/mindspore/ccsrc/operator/composite/composite.h @@ -210,7 +210,6 @@ class TensorSlice : public MetaFuncGraph { FuncGraphPtr ExpandADim(const FuncGraphPtr &ret_graph, const AnfNodePtr &tensor_node) const; }; using TensorSlicePtr = std::shared_ptr; - } // namespace prim } // namespace mindspore diff --git a/mindspore/ccsrc/operator/composite/do_signature.cc b/mindspore/ccsrc/operator/composite/do_signature.cc index c3fe45a48a..1098ed1520 100644 --- a/mindspore/ccsrc/operator/composite/do_signature.cc +++ b/mindspore/ccsrc/operator/composite/do_signature.cc @@ -195,6 +195,8 @@ AnfNodePtr BuildNewCNode(const FuncGraphPtr &func_graph, const std::string &func param = func_graph->NewCNode({NewValueNode(prim::kPrimGetRefKey), param}); } // If sig is SignatureEnumRW::kRWRef, not do anything. + } else if (sig == SignatureEnumRW::kRWWrite && type->type_id() != kObjectTypeRefKey) { + MS_EXCEPTION(TypeError) << "Function " << func_name << "'s input " << i << " should be a Parameter."; } // add cast op here if (assign_source != nullptr && sig != SignatureEnumRW::kRWWrite) { diff --git a/mindspore/ccsrc/operator/composite/unpack_call.cc b/mindspore/ccsrc/operator/composite/unpack_call.cc index 122f276657..6363d495c5 100644 --- a/mindspore/ccsrc/operator/composite/unpack_call.cc +++ b/mindspore/ccsrc/operator/composite/unpack_call.cc @@ -89,6 +89,5 @@ REGISTER_PYBIND_DEFINE(UnpackCall_, ([](const py::module *m) { (void)py::class_>(*m, "UnpackCall_") .def(py::init()); })); - } // namespace prim } // namespace mindspore diff --git a/mindspore/ccsrc/operator/composite/unpack_call.h b/mindspore/ccsrc/operator/composite/unpack_call.h index 2f39615c1a..8c055a9386 100644 --- a/mindspore/ccsrc/operator/composite/unpack_call.h +++ b/mindspore/ccsrc/operator/composite/unpack_call.h @@ -35,7 +35,6 @@ namespace mindspore { // namespace to support composite operators definition namespace prim { - // Expand the tuple and dict parameters generated when parsing the function call, // and generate positional parameters and key-value pairs for function. class UnpackCall : public MetaFuncGraph { @@ -47,7 +46,6 @@ class UnpackCall : public MetaFuncGraph { friend bool operator==(const UnpackCall &lhs, const UnpackCall &rhs) { return lhs.name_ == rhs.name_; } }; using UnpackCallPtr = std::shared_ptr; - } // namespace prim } // namespace mindspore diff --git a/mindspore/ccsrc/operator/ops.cc b/mindspore/ccsrc/operator/ops.cc index 91a54e1fdb..0a6fb0b3f6 100755 --- a/mindspore/ccsrc/operator/ops.cc +++ b/mindspore/ccsrc/operator/ops.cc @@ -170,9 +170,12 @@ const PrimitivePtr kPrimPooling = std::make_shared("Pooling"); const PrimitivePtr kPrimPoolingGrad = std::make_shared("PoolingGrad"); const PrimitivePtr kPrimMaxPool = std::make_shared("MaxPool"); const PrimitivePtr kPrimMaxPoolGrad = std::make_shared("MaxPoolGrad"); +const PrimitivePtr kPrimAvgPoolGrad = std::make_shared("AvgPoolGrad"); const PrimitivePtr kPrimFusedBatchNorm = std::make_shared("FusedBatchNorm"); const PrimitivePtr kPrimConv2D = std::make_shared("Conv2D"); const PrimitivePtr kPrimFusedBatchNormGrad = std::make_shared("FusedBatchNormGrad"); +const PrimitivePtr kPrimBatchNorm = std::make_shared("BatchNorm"); +const PrimitivePtr kPrimBatchNormGrad = std::make_shared("BatchNormGrad"); const PrimitivePtr kPrimReluGrad = std::make_shared("ReluGrad"); const PrimitivePtr kPrimConv2DBackpropInput = std::make_shared("Conv2DBackpropInput"); const PrimitivePtr kPrimConv2DBackpropFilter = std::make_shared("Conv2DBackpropFilter"); @@ -213,6 +216,7 @@ const PrimitivePtr kPrimGetRefOrigin = std::make_shared("get_ref_orig const PrimitivePtr kPrimInsertGradientOf = std::make_shared("InsertGradientOf"); const PrimitivePtr kPrimPrintShapeType = std::make_shared("PrintShapeType"); const PrimitivePtr kPrimSameTypeShape = std::make_shared("SameTypeShape"); +const PrimitivePtr kPrimCheckBprop = std::make_shared("CheckBprop"); const PrimitivePtr kPrimPrint = std::make_shared("Print"); const PrimitivePtr kPrimMakeRef = std::make_shared("make_ref"); diff --git a/mindspore/ccsrc/operator/ops.h b/mindspore/ccsrc/operator/ops.h index d84b2e4738..8c63660c3e 100755 --- a/mindspore/ccsrc/operator/ops.h +++ b/mindspore/ccsrc/operator/ops.h @@ -175,9 +175,12 @@ extern const PrimitivePtr kPrimTanhGrad; extern const PrimitivePtr kPrimPooling; extern const PrimitivePtr kPrimPoolingGrad; extern const PrimitivePtr kPrimFusedBatchNorm; +extern const PrimitivePtr kPrimBatchNorm; +extern const PrimitivePtr kPrimBatchNormGrad; extern const PrimitivePtr kPrimConv2D; extern const PrimitivePtr kPrimMaxPool; extern const PrimitivePtr kPrimMaxPoolGrad; +extern const PrimitivePtr kPrimAvgPoolGrad; extern const PrimitivePtr kPrimFusedBatchNormGrad; extern const PrimitivePtr kPrimReluGrad; extern const PrimitivePtr kPrimConv2DBackpropInput; @@ -220,6 +223,7 @@ extern const PrimitivePtr kPrimInsertGradientOf; extern const PrimitivePtr kPrimPrintShapeType; extern const PrimitivePtr kPrimPrint; extern const PrimitivePtr kPrimSameTypeShape; +extern const PrimitivePtr kPrimCheckBprop; extern const PrimitivePtr kPrimDepend; extern const PrimitivePtr kPrimStateSetItem; extern const PrimitivePtr kPrimScalarSummary; diff --git a/mindspore/ccsrc/operator/prim_debug.cc b/mindspore/ccsrc/operator/prim_debug.cc index c8db775320..d73c34bf85 100644 --- a/mindspore/ccsrc/operator/prim_debug.cc +++ b/mindspore/ccsrc/operator/prim_debug.cc @@ -51,7 +51,7 @@ AbstractBasePtr InferImplScalarSummary(const AnalysisEnginePtr &, const Primitiv // Reomve the force check to support batch set summary use 'for' loop auto item_v = descriptions->BuildValue(); if (!item_v->isa()) { - MS_LOG(ERROR) << "First parameter shoule be string"; + MS_EXCEPTION(TypeError) << "Summary first parameter should be string"; } return std::make_shared(kAnyValue, kBool); @@ -75,7 +75,7 @@ AbstractBasePtr InferImplTensorSummary(const AnalysisEnginePtr &, const Primitiv // Reomve the force check to support batch set summary use 'for' loop auto item_v = descriptions->BuildValue(); if (!item_v->isa()) { - MS_LOG(WARNING) << "Summary first parameter must be string"; + MS_EXCEPTION(TypeError) << "Summary first parameter should be string"; } return std::make_shared(kAnyValue, std::make_shared()); diff --git a/mindspore/ccsrc/operator/prim_nn.cc b/mindspore/ccsrc/operator/prim_nn.cc index 3591168187..1f9f650ac9 100644 --- a/mindspore/ccsrc/operator/prim_nn.cc +++ b/mindspore/ccsrc/operator/prim_nn.cc @@ -246,7 +246,7 @@ AbstractBasePtr InferImplBiasAddGrad(const AnalysisEnginePtr &, const PrimitiveP // Inputs: at least one tensor(y_backprop) // Outputs: dbias if (args_spec_list.empty()) { - MS_LOG(EXCEPTION) << "" << primitive->name() << " evaluator at least has 1 parameters, while the input size is " + MS_LOG(EXCEPTION) << primitive->name() << " evaluator at least has 1 parameters, while the input size is " << args_spec_list.size() << "."; } @@ -255,8 +255,7 @@ AbstractBasePtr InferImplBiasAddGrad(const AnalysisEnginePtr &, const PrimitiveP MS_EXCEPTION_IF_NULL(shape_y); std::vector y_dims = shape_y->shape(); if (y_dims.size() < 2) { - MS_LOG(EXCEPTION) << "" << primitive->name() << " input y backprop, dim should >= 2, while " << y_dims.size() - << "."; + MS_LOG(EXCEPTION) << primitive->name() << " input y backprop, dim should >= 2, while " << y_dims.size() << "."; } std::vector bias_dims = {y_dims[1]}; ShapePtr ret_shape = std::make_shared(bias_dims); diff --git a/mindspore/ccsrc/operator/prim_statement.cc b/mindspore/ccsrc/operator/prim_statement.cc index 239aed5bde..0b9d491ce6 100644 --- a/mindspore/ccsrc/operator/prim_statement.cc +++ b/mindspore/ccsrc/operator/prim_statement.cc @@ -80,8 +80,7 @@ AbstractBasePtr InferImplDot(const AnalysisEnginePtr &, const PrimitivePtr &prim auto y_shp_value = y_shp->shape(); // Should be matrix which shape size is 2. if (x_shp_value.size() != 2 || y_shp_value.size() != 2) { - MS_LOG(EXCEPTION) << "" << op_name - << " evaluator requires input two 2D tensors, while the dimensions of two tensors are " + MS_LOG(EXCEPTION) << op_name << " evaluator requires input two 2D tensors, while the dimensions of two tensors are " << x_shp_value.size() << ", " << y_shp_value.size() << " "; } if (x_shp_value[1] != y_shp_value[0] && x_shp_value[1] != Shape::SHP_ANY && y_shp_value[0] != Shape::SHP_ANY) { diff --git a/mindspore/ccsrc/optimizer/CMakeLists.txt b/mindspore/ccsrc/optimizer/CMakeLists.txt index 48cbeb41dd..197ece8505 100644 --- a/mindspore/ccsrc/optimizer/CMakeLists.txt +++ b/mindspore/ccsrc/optimizer/CMakeLists.txt @@ -1,9 +1,2 @@ -file(GLOB_RECURSE _OPTIMIZER_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "*.cc" - ) - -add_library(_mindspore_optimizer_obj OBJECT ${_OPTIMIZER_ALL_SRC_FILES}) -if(ENABLE_DUMP_PROTO) - file(GLOB_RECURSE _PROTO_SRC_LIST "parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc") - target_sources(_mindspore_optimizer_obj PRIVATE ${_PROTO_SRC_LIST}) -endif() \ No newline at end of file +file(GLOB_RECURSE _OPTIMIZER_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") +add_library(_mindspore_optimizer_obj OBJECT ${_OPTIMIZER_SRC_FILES}) diff --git a/mindspore/ccsrc/optimizer/ad/dfunctor.cc b/mindspore/ccsrc/optimizer/ad/dfunctor.cc index 3e1aa6e555..de368dbdd2 100644 --- a/mindspore/ccsrc/optimizer/ad/dfunctor.cc +++ b/mindspore/ccsrc/optimizer/ad/dfunctor.cc @@ -175,7 +175,7 @@ AdjointPtr DFunctor::MapMorphism(const AnfNodePtr &morph) { UpdateAdjoint(node_adjoint); anfnode_to_adjoin_[morph] = node_adjoint; if (cnode_morph->stop_gradient()) { - MS_LOG(WARNING) << "MapMorphism node " << morph->ToString() << " is stopped."; + MS_LOG(DEBUG) << "MapMorphism node " << morph->ToString() << " is stopped."; return node_adjoint; } @@ -309,14 +309,6 @@ FuncGraphPtr DFunctor::KUserDefined(const FuncGraphPtr &primal) { auto bprop = primal->transforms().find("bprop"); if (bprop != primal->transforms().end()) { FuncGraphPtr bprop_graph = bprop->second.func_graph(); - const size_t param_diff = 1; - if (bprop_graph->output()->isa() && - bprop_graph->output()->cast()->size() + param_diff != bprop_graph->parameters().size()) { - // It does not matter with the final tangents, just a tip for debugging - MS_LOG(DEBUG) << "User defined Cell bprop " << primal->ToString() << " in scope " - << primal->output()->scope()->name() - << " output must be a tuple and output number should be the same with inputs."; - } resources_->manager()->AddFuncGraph(bprop_graph); if (bprop_graph->free_variables_nodes().size() != 0 || primal->free_variables_nodes().size() != 0) { diff --git a/mindspore/ccsrc/optimizer/ad/dfunctor.h b/mindspore/ccsrc/optimizer/ad/dfunctor.h index 3059736171..1358cc8f28 100644 --- a/mindspore/ccsrc/optimizer/ad/dfunctor.h +++ b/mindspore/ccsrc/optimizer/ad/dfunctor.h @@ -127,7 +127,7 @@ class KPrim { AnfNodePtr BuildOutput(const FuncGraphPtr &bprop_fg); void TransformArgs(const FuncGraphManagerPtr &mng, const FuncGraphPtr &bprop_fg, const FuncGraphPtr &outer, std::vector *const transf_args); - void AddCheckTypeShapeOp(const FuncGraphPtr &bprop_fg); + void CheckBprop(const FuncGraphPtr &bprop_fg, const string &prim_to_check); Registry bprop_registry_; std::unordered_map bprop_registry_meta_; @@ -137,10 +137,7 @@ template FuncGraphPtr KPrim::BpropToK(const T &primal, const FuncGraphPtr &bprop_fg) { MS_EXCEPTION_IF_NULL(primal); MS_EXCEPTION_IF_NULL(bprop_fg); - - if (IsPrimitiveCNode(bprop_fg->output(), prim::kPrimMakeTuple)) { - AddCheckTypeShapeOp(bprop_fg); - } + CheckBprop(bprop_fg, primal->ToString()); auto debug_info = std::make_shared(); debug_info->set_name(primal->ToString()); diff --git a/mindspore/ccsrc/optimizer/ad/kprim.cc b/mindspore/ccsrc/optimizer/ad/kprim.cc index 2c8ddbfa82..c74670e55d 100644 --- a/mindspore/ccsrc/optimizer/ad/kprim.cc +++ b/mindspore/ccsrc/optimizer/ad/kprim.cc @@ -50,9 +50,13 @@ FuncGraphPtr KPrim::GetBprop(const PrimitivePtr &prim) { grad_op_child_scope_prefix + prim->name()); ScopeGuard scope_guard(scope); py::function fn = prim->GetBpropFunction(); + if (fn == nullptr || py::isinstance(fn)) { + MS_LOG(DEBUG) << "Fail to find bprop function for " << prim->name() << "."; + return nullptr; + } FuncGraphPtr func_graph = parse::ParsePythonCode(fn); if (func_graph == nullptr) { - MS_LOG(WARNING) << "Fail to find bprop function for " << prim->name() << "."; + MS_LOG(ERROR) << "Fail to parse bprop function for " << prim->name() << "."; return nullptr; } return func_graph; @@ -153,31 +157,23 @@ void KPrim::TransformArgs(const FuncGraphManagerPtr &mng, const FuncGraphPtr &bp } } -void KPrim::AddCheckTypeShapeOp(const FuncGraphPtr &bprop_fg) { +void KPrim::CheckBprop(const FuncGraphPtr &bprop_fg, const string &prim_to_check) { // bprop_fg has been checked in caller - auto same_type_shape = prim::GetPythonOps("same_type_shape", "mindspore.ops.functional")->cast(); - MS_EXCEPTION_IF_NULL(same_type_shape); - - std::vector bout_input; - bout_input.push_back(NewValueNode(prim::kPrimMakeTuple)); - - auto fg_out = bprop_fg->output(); - MS_EXCEPTION_IF_NULL(fg_out); - auto cnode = fg_out->cast(); - MS_EXCEPTION_IF_NULL(cnode); - - auto &inputs = cnode->inputs(); - auto params = bprop_fg->parameters(); - std::vector sub_input; - for (size_t i = 1; i < inputs.size(); ++i) { - sub_input.clear(); - sub_input.push_back(NewValueNode(same_type_shape)); - sub_input.push_back(inputs[i]); - sub_input.push_back(params[i - 1]); - bout_input.push_back(bprop_fg->NewCNode(sub_input)); - } - AnfNodePtr cbout = bprop_fg->NewCNode(bout_input); - bprop_fg->set_output(cbout); + auto check_bprop = prim::GetPythonOps("check_bprop", "mindspore.ops.functional")->cast(); + MS_EXCEPTION_IF_NULL(check_bprop); + check_bprop->set_attr("prim_to_check", std::make_shared(prim_to_check)); + + std::vector inputs; + inputs.emplace_back(NewValueNode(prim::kPrimMakeTuple)); + inputs.insert(inputs.begin() + 1, bprop_fg->parameters().begin(), bprop_fg->parameters().end() - 2); + AnfNodePtr params = bprop_fg->NewCNode(inputs); + + inputs.clear(); + inputs.push_back(NewValueNode(check_bprop)); + inputs.push_back(bprop_fg->output()); + inputs.push_back(params); + AnfNodePtr bprop_out = bprop_fg->NewCNode(inputs); + bprop_fg->set_output(bprop_out); } FuncGraphPtr KPrim::KUserDefinedCellBprop(const FuncGraphPtr bprop_fg) { diff --git a/mindspore/ccsrc/optimizer/cse.cc b/mindspore/ccsrc/optimizer/cse.cc index 82050f6108..42ebf5a658 100644 --- a/mindspore/ccsrc/optimizer/cse.cc +++ b/mindspore/ccsrc/optimizer/cse.cc @@ -40,14 +40,14 @@ BasePtr AbsOf(const AnfNodePtr &node) { return node_abs; } -namespace { -void BuildOrderGroup(const FuncGraphManagerPtr manager, std::vector *const order_group, - std::unordered_map> *groups) { - MS_EXCEPTION_IF_NULL(order_group); - - std::unordered_map hashes; +bool CSE::BuildOrderGroupAndDoReplace(const FuncGraphManagerPtr manager) const { + bool changed = false; for (FuncGraphPtr fg : manager->func_graphs()) { MS_EXCEPTION_IF_NULL(fg); + std::vector order_group; + std::unordered_map> groups; + std::unordered_map hashes; + std::vector toposet = TopoSort(fg->get_return()); for (auto node : toposet) { MS_EXCEPTION_IF_NULL(node); @@ -75,17 +75,20 @@ void BuildOrderGroup(const FuncGraphManagerPtr manager, std::vector } hashes[node] = h; - if (groups->find(h) == groups->end()) { + if (groups.find(h) == groups.end()) { std::vector innervec({node}); - (*groups)[h] = innervec; - order_group->emplace_back(h); + groups[h] = innervec; + order_group.emplace_back(h); } else { - (*groups)[h].push_back(node); + groups[h].push_back(node); } } + + changed = DoReplace(manager, order_group, &groups) || changed; } + + return changed; } -} // namespace bool CSE::CheckReplace(const AnfNodePtr &main, const AnfNodePtr &node) const { MS_EXCEPTION_IF_NULL(main); @@ -177,10 +180,7 @@ bool CSE::Cse(const FuncGraphPtr root, const FuncGraphManagerPtr manager) const MS_EXCEPTION_IF_NULL(manager); manager->AddFuncGraph(root); - std::unordered_map> groups; - std::vector order_group; - BuildOrderGroup(manager, &order_group, &groups); - return DoReplace(manager, order_group, &groups); + return BuildOrderGroupAndDoReplace(manager); } } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/optimizer/cse.h b/mindspore/ccsrc/optimizer/cse.h index 823b24edb7..544e6cb6a3 100644 --- a/mindspore/ccsrc/optimizer/cse.h +++ b/mindspore/ccsrc/optimizer/cse.h @@ -46,6 +46,7 @@ class CSE { bool Cse(const FuncGraphPtr root, const FuncGraphManagerPtr manager) const; private: + bool BuildOrderGroupAndDoReplace(const FuncGraphManagerPtr manager) const; bool DoReplace(const FuncGraphManagerPtr manager, const std::vector &order_group, std::unordered_map> *groups) const; bool report_changes_; diff --git a/mindspore/ccsrc/optimizer/irpass.cc b/mindspore/ccsrc/optimizer/irpass.cc index be9c8f787a..2bd013cb08 100644 --- a/mindspore/ccsrc/optimizer/irpass.cc +++ b/mindspore/ccsrc/optimizer/irpass.cc @@ -67,6 +67,7 @@ OptimizeIRPassLib::OptimizeIRPassLib() { {prim::kPrimReduceMean, prim::kPrimReduceAll, prim::kPrimReduceSum, prim::kPrimReduceMax, prim::kPrimReduceMin}); partial_eliminate_ = MakeSubstitution(PartialEliminater(), "partial_eliminate", IsCNodeDup); same_eliminate_ = MakeSubstitution(SameEliminater(), "same_eliminate", prim::kPrimSameTypeShape); + check_bprop_eliminate_ = MakeSubstitution(CheckBpropEliminater(), "check_bprop_eliminate", prim::kPrimCheckBprop); reset_defer_inline_ = MakeSubstitution(ResetDeferInline(), "reset_defer_inline", IsValueNode); // Env Item Eliminate @@ -133,7 +134,6 @@ ResolveIRPassLib::ResolveIRPassLib() { InferenceOptPrepareLib::InferenceOptPrepareLib() { grad_var_prepare_ = MakeSubstitution(GradVarPrepare(), "grad_var_prepare", IsCNode); } - } // namespace irpass } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/optimizer/irpass.h b/mindspore/ccsrc/optimizer/irpass.h index 00274bdcc8..02bfee65d6 100644 --- a/mindspore/ccsrc/optimizer/irpass.h +++ b/mindspore/ccsrc/optimizer/irpass.h @@ -45,6 +45,7 @@ class OptimizeIRPassLib { SubstitutionPtr reduce_eliminate_; SubstitutionPtr partial_eliminate_; SubstitutionPtr same_eliminate_; + SubstitutionPtr check_bprop_eliminate_; SubstitutionPtr reset_defer_inline_; // Env Item Eliminate @@ -159,7 +160,6 @@ inline bool IsCNodeDup(const AnfNodePtr &node) { } return false; } - } // namespace irpass } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/optimizer/irpass/grad_var_prepare.cc b/mindspore/ccsrc/optimizer/irpass/grad_var_prepare.cc index 32a42bc16b..317d67e792 100644 --- a/mindspore/ccsrc/optimizer/irpass/grad_var_prepare.cc +++ b/mindspore/ccsrc/optimizer/irpass/grad_var_prepare.cc @@ -31,7 +31,6 @@ namespace mindspore { namespace opt { namespace irpass { - static AnfNodePtr GenerateUnpackGraphNode(std::vector inputs_y, FuncGraphPtr func_graph, AnfNodePtr func_node, bool is_unpack, bool sens_param) { MS_EXCEPTION_IF_NULL(func_graph); diff --git a/mindspore/ccsrc/optimizer/irpass/grad_var_prepare.h b/mindspore/ccsrc/optimizer/irpass/grad_var_prepare.h index 599d1dca17..9713017d12 100644 --- a/mindspore/ccsrc/optimizer/irpass/grad_var_prepare.h +++ b/mindspore/ccsrc/optimizer/irpass/grad_var_prepare.h @@ -33,7 +33,6 @@ namespace mindspore { namespace opt { namespace irpass { - // {{GradOperation, g, w}, Ys} // {UnPackCall, {GradOperation, g, w}, Ys} class GradVarPrepare : public AnfVisitor { diff --git a/mindspore/ccsrc/optimizer/irpass/inline.h b/mindspore/ccsrc/optimizer/irpass/inline.h index a7b6b975bb..8ebd0f6eb7 100644 --- a/mindspore/ccsrc/optimizer/irpass/inline.h +++ b/mindspore/ccsrc/optimizer/irpass/inline.h @@ -81,10 +81,10 @@ bool IsTrivial(const FuncGraphPtr &fg, AnfNodePtr) { } bool IsUniqueUse(const FuncGraphPtr &fg, AnfNodePtr) { - auto &users = fg->func_graph_users(); + auto &cnodes = fg->func_graph_cnodes_index(); int n_use = - std::accumulate(users.begin(), users.end(), 0, - [](int sum, const std::pair &item) { return sum + item.second; }); + std::accumulate(cnodes.begin(), cnodes.end(), 0, + [](int sum, const std::pair &item) { return sum + item.second; }); return n_use == 1; } diff --git a/mindspore/ccsrc/optimizer/irpass/special_op_eliminate.h b/mindspore/ccsrc/optimizer/irpass/special_op_eliminate.h index 2dd27a89c3..e06ccd862b 100644 --- a/mindspore/ccsrc/optimizer/irpass/special_op_eliminate.h +++ b/mindspore/ccsrc/optimizer/irpass/special_op_eliminate.h @@ -109,6 +109,25 @@ class SameEliminater : public AnfVisitor { AnfNodePtr x_{nullptr}; }; +// {prim::kPrimCheckBprop, X, Y} -> X +class CheckBpropEliminater : public AnfVisitor { + public: + AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { + x_ = nullptr; + AnfVisitor::Match(prim::kPrimCheckBprop, {IsNode, IsNode})(node); + return x_; + } + + void Visit(const AnfNodePtr &node) override { + if (x_ == nullptr) { + x_ = node; + } + } + + private: + AnfNodePtr x_{nullptr}; +}; + // Reset defer_inline flag class ResetDeferInline : public AnfVisitor { public: diff --git a/mindspore/ccsrc/optimizer/optimizer.h b/mindspore/ccsrc/optimizer/optimizer.h index f67466efba..1a0ddbc65f 100644 --- a/mindspore/ccsrc/optimizer/optimizer.h +++ b/mindspore/ccsrc/optimizer/optimizer.h @@ -17,7 +17,9 @@ #ifndef MINDSPORE_CCSRC_OPTIMIZER_OPTIMIZER_H_ #define MINDSPORE_CCSRC_OPTIMIZER_OPTIMIZER_H_ +#include #include +#include #include #include #include @@ -129,29 +131,38 @@ class Optimizer : public std::enable_shared_from_this { return optimizer; } - FuncGraphPtr step(FuncGraphPtr func_graph, const abstract::AbstractBasePtrList &args_spec, bool use_profile = true) { + FuncGraphPtr step(FuncGraphPtr func_graph, bool use_profile = true) { // Optimizer step counter; int counter = 1; bool changes = true; while (changes) { changes = false; - auto run_runc = [&counter, &func_graph, &args_spec, &changes, use_profile, this]() { + auto run_runc = [&counter, &func_graph, &changes, use_profile, this]() { for (size_t i = 0; i < passes_.size(); ++i) { const OptPass &opt = passes_[i]; - auto opt_func = [&func_graph, &args_spec, &changes, &opt, this]() { + auto opt_func = [&func_graph, &changes, &opt, this]() { if (opt.is_renormalize()) { auto resource_ptr = std::dynamic_pointer_cast(resource_); if (resource_ptr != nullptr) { + // StepParallel may replace the AbstractValue of the parameters of func_graph, + // So generate the args_spec from parameters. + abstract::AbstractBasePtrList maybe_new_args_spec; if (is_watch_renormalize_) { if (untyped_nodes_.size() > 0) { - func_graph = pipeline::Renormalize(resource_ptr, func_graph, args_spec); + std::transform(func_graph->parameters().begin(), func_graph->parameters().end(), + std::back_inserter(maybe_new_args_spec), + [](AnfNodePtr param) -> AbstractBasePtr { return param->abstract(); }); + func_graph = pipeline::Renormalize(resource_ptr, func_graph, maybe_new_args_spec); clear_untyped_nodes(); } else { MS_LOG(INFO) << "Optimizer::step: Skipping Renormalize because untyped_nodes_ is empty."; } } else { - func_graph = pipeline::Renormalize(resource_ptr, func_graph, args_spec); + std::transform(func_graph->parameters().begin(), func_graph->parameters().end(), + std::back_inserter(maybe_new_args_spec), + [](AnfNodePtr param) -> AbstractBasePtr { return param->abstract(); }); + func_graph = pipeline::Renormalize(resource_ptr, func_graph, maybe_new_args_spec); } } } else if (opt(func_graph, shared_from_this())) { @@ -160,7 +171,7 @@ class Optimizer : public std::enable_shared_from_this { }; use_profile ? (WITH(MsProfile::GetProfile()->Step(pass_names_[i])) opt_func) : opt_func(); #ifdef DEBUG - MS_LOG(DEBUG) << "" << name_ << " round " << counter << " OptPass " << pass_names_[i] << " end."; + MS_LOG(DEBUG) << name_ << " round " << counter << " OptPass " << pass_names_[i] << " end."; auto fg_name = name_ + "_r" + std::to_string(counter) + "_" + std::to_string(i) + "_" + pass_names_[i]; func_graph->DumpFuncGraph(fg_name); DumpIR(fg_name + ".ir", func_graph); @@ -174,13 +185,6 @@ class Optimizer : public std::enable_shared_from_this { break; } } - - auto keep_root = [&func_graph, this]() { - std::vector func_graphs; - func_graphs.push_back(func_graph); - resource_->manager()->KeepRoots(func_graphs); - }; - use_profile ? WITH(MsProfile::GetProfile()->Step("keep_roots")) keep_root : keep_root(); return func_graph; } diff --git a/mindspore/ccsrc/parallel/CMakeLists.txt b/mindspore/ccsrc/parallel/CMakeLists.txt new file mode 100644 index 0000000000..0280266e40 --- /dev/null +++ b/mindspore/ccsrc/parallel/CMakeLists.txt @@ -0,0 +1,6 @@ +file(GLOB_RECURSE _PARALLEL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") +if (ENABLE_DUMP_PROTO) + list(REMOVE_ITEM _PARALLEL_SRC_FILES "parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc") +endif () + +add_library(_mindspore_parallel_obj OBJECT ${_PARALLEL_SRC_FILES}) diff --git a/mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.cc b/mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.cc index 8ab0895216..23ec9da87b 100644 --- a/mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.cc +++ b/mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.cc @@ -31,10 +31,11 @@ bool StepAllreduceFusion(const FuncGraphPtr &root, const opt::OptimizerPtr &opti MS_EXCEPTION_IF_NULL(optimizer); MS_EXCEPTION_IF_NULL(ParallelContext::GetInstance()); std::string parallel_mode = ParallelContext::GetInstance()->parallel_mode(); + bool enable_all_reduce_fusion = ParallelContext::GetInstance()->enable_all_reduce_fusion(); // assume no change to graph bool changes = false; // control whether use model_parallel mode - if (((parallel_mode != AUTO_PARALLEL) && (parallel_mode != SEMI_AUTO_PARALLEL)) || + if (((parallel_mode != AUTO_PARALLEL) && (parallel_mode != SEMI_AUTO_PARALLEL)) || (!enable_all_reduce_fusion) || (root->has_flag(ALLREDUCE_FUSION_RUN_ONCE_ONLY))) { return changes; } diff --git a/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h b/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h index 530f67ba45..31de9f4456 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h +++ b/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h @@ -44,6 +44,7 @@ namespace parallel { #define DEFAULT_TENSOR_SLICE_ALIGNMENT_SIZE 16 #define DEFAULT_FULLY_USE_DEVICES true #define DEFAULT_ELEMENTWISE_OP_STRA_FOLLOW false +#define DEFAULT_IS_MULTI_SUBGRAPHS false class CostGraph; using CostGraphPtr = std::shared_ptr; diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.cc b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.cc index 4591753efe..e5ba59425c 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.cc +++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.cc @@ -296,10 +296,10 @@ double CostConvolution::GetMinCostIn(const Graph::NodeType &node) { static_cast(op.arguments[1].tensor_shape.shape_n * op.arguments[1].tensor_str.str_n) * static_cast(op.arguments[1].tensor_shape.shape_w * op.arguments[1].tensor_str.str_w) * static_cast(op.arguments[1].tensor_shape.shape_c * op.arguments[1].tensor_str.str_c); - int tensor_out = static_cast(node.tensor_parm.tensor_shape.shape_h * node.tensor_parm.tensor_shape.shape_w) * - static_cast(node.tensor_parm.tensor_shape.shape_n * node.tensor_parm.tensor_shape.shape_c) * - static_cast(node.tensor_parm.tensor_str.str_h * node.tensor_parm.tensor_str.str_w) * - static_cast(node.tensor_parm.tensor_str.str_n * node.tensor_parm.tensor_str.str_c); + int tensor_out = static_cast(node.tensor_parm.tensor_shape.shape_h * node.tensor_parm.tensor_str.str_h) * + static_cast(node.tensor_parm.tensor_shape.shape_n * node.tensor_parm.tensor_str.str_n) * + static_cast(node.tensor_parm.tensor_shape.shape_w * node.tensor_parm.tensor_str.str_w) * + static_cast(node.tensor_parm.tensor_shape.shape_c * node.tensor_parm.tensor_str.str_c); std::vector cost_in; cost_in.push_back(StrDimB(tensor_filter)); @@ -446,51 +446,8 @@ StrategyRec CostPooling::ChoseStr(const std::vector &cost_op, StrategyRe return str; } -// Get optimal strategy for Add -StrategyRec CostAdd::GetOptimalStr(const Graph::NodeType &node, - const std::vector> &node_name_to_strategy, - const Graph &graph) { - int tensor_n = static_cast(node.tensor_parm.tensor_shape.shape_n * node.tensor_parm.tensor_str.str_n); - int tensor_c = static_cast(node.tensor_parm.tensor_shape.shape_c * node.tensor_parm.tensor_str.str_c); - int tensor_h = static_cast(node.tensor_parm.tensor_shape.shape_h * node.tensor_parm.tensor_str.str_h); - int tensor_w = static_cast(node.tensor_parm.tensor_shape.shape_w * node.tensor_parm.tensor_str.str_w); - - std::vector cost_op; - std::vector> mode; - - if (tensor_n < 2) { - cost_op.push_back(DOUBLE_MAX); - } else { - cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy, - mode = {{0.5, 1, 1, 1}, {0.5, 1, 1, 1}, {0.5, 1, 1, 1}}, graph)); - } - - if (tensor_c < 2) { - cost_op.push_back(DOUBLE_MAX); - } else { - cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy, - mode = {{1, 0.5, 1, 1}, {1, 0.5, 1, 1}, {1, 0.5, 1, 1}}, graph)); - } - - if (tensor_h < 2) { - cost_op.push_back(DOUBLE_MAX); - } else { - cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy, - mode = {{1, 1, 0.5, 1}, {1, 1, 0.5, 1}, {1, 1, 0.5, 1}}, graph)); - } - - if (tensor_w < 2) { - cost_op.push_back(DOUBLE_MAX); - } else { - cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy, - mode = {{1, 1, 1, 0.5}, {1, 1, 1, 0.5}, {1, 1, 1, 0.5}}, graph)); - } - - return ChoseStr(cost_op, node.apply.str); -} - // Chose strategy for Add -StrategyRec CostAdd::ChoseStr(const std::vector &cost_op, StrategyRec str) { +StrategyRec CostTensorAdd::ChoseStr(const std::vector &cost_op, StrategyRec str) { uint64_t min_position = min_element(cost_op.begin(), cost_op.end()) - cost_op.begin(); if (cost_op[min_position] > (DOUBLE_MAX - 0.1)) { return str; @@ -540,49 +497,6 @@ StrategyRec CostReshape::GetOptimalStr(const Graph::NodeType &node) const { retu StrategyRec CostReshape::ChoseStr(StrategyRec str) const { return str; } -// Get optimal strategy for Biasadd -StrategyRec CostBiasAdd::GetOptimalStr(const Graph::NodeType &node, - const std::vector> &node_name_to_strategy, - const Graph &graph) { - int tensor_n = static_cast(node.tensor_parm.tensor_shape.shape_n * node.tensor_parm.tensor_str.str_n); - int tensor_c = static_cast(node.tensor_parm.tensor_shape.shape_c * node.tensor_parm.tensor_str.str_c); - int tensor_h = static_cast(node.tensor_parm.tensor_shape.shape_h * node.tensor_parm.tensor_str.str_h); - int tensor_w = static_cast(node.tensor_parm.tensor_shape.shape_w * node.tensor_parm.tensor_str.str_w); - - std::vector cost_op; - std::vector> mode; - - if (tensor_n < 2) { - cost_op.push_back(DOUBLE_MAX); - } else { - cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy, - mode = {{0.5, 1, 1, 1}, {0.5, 1, 1, 1}, {0.5, 1, 1, 1}}, graph)); - } - - if (tensor_c < 2) { - cost_op.push_back(DOUBLE_MAX); - } else { - cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy, - mode = {{1, 0.5, 1, 1}, {1, 0.5, 1, 1}, {1, 0.5, 1, 1}}, graph)); - } - - if (tensor_h < 2) { - cost_op.push_back(DOUBLE_MAX); - } else { - cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy, - mode = {{1, 1, 0.5, 1}, {1, 1, 0.5, 1}, {1, 1, 0.5, 1}}, graph)); - } - - if (tensor_w < 2) { - cost_op.push_back(DOUBLE_MAX); - } else { - cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy, - mode = {{1, 1, 1, 0.5}, {1, 1, 1, 0.5}, {1, 1, 1, 0.5}}, graph)); - } - - return ChoseStr(cost_op, node.apply.str); -} - // Chose strategy for BiasAdd StrategyRec CostBiasAdd::ChoseStr(const std::vector &cost_op, StrategyRec str) { uint64_t min_position = min_element(cost_op.begin(), cost_op.end()) - cost_op.begin(); @@ -629,7 +543,7 @@ StrategyRec CostBiasAdd::ChoseStr(const std::vector &cost_op, StrategyRe return str; } -// Get optimal strategy for Common OPs: ReLU and Softmax +// Get optimal strategy for Common OPs StrategyRec CostCommon::GetOptimalStr(const Graph::NodeType &node, const std::vector> &node_name_to_strategy, const Graph &graph) { @@ -714,6 +628,22 @@ StrategyRec CostCommon::ChoseStr(const std::vector &cost_op, StrategyRec return str; } +// Get weight for BN +double CostBatchNorm::GetMinCostIn(const OperatorRec &op) { + int tensor = static_cast(op.arguments[0].tensor_shape.shape_h * op.arguments[0].tensor_str.str_h) * + static_cast(op.arguments[0].tensor_shape.shape_n * op.arguments[0].tensor_str.str_n) * + static_cast(op.arguments[0].tensor_shape.shape_w * op.arguments[0].tensor_str.str_w) * + static_cast(op.arguments[0].tensor_shape.shape_c * op.arguments[0].tensor_str.str_c); + + std::vector cost_in; + cost_in.push_back(StrDimB(tensor) * 1.2); + cost_in.push_back(DOUBLE_MAX); + cost_in.push_back(StrDimH(tensor) * 1.2); + cost_in.push_back(StrDimW(tensor) * 1.2); + + return *min_element(cost_in.begin(), cost_in.end()); +} + // Get optimal strategy for BN StrategyRec CostBatchNorm::GetOptimalStr(const Graph::NodeType &node, const std::vector> &node_name_to_strategy, diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.h b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.h index af37b9178e..315c081d67 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.h +++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.h @@ -157,21 +157,6 @@ class CostPooling { double cost_in_ = 0; }; // class CostPooling is used to compute the cost of Pooling operator. -// class CostAdd is used to compute the cost of Add operator. -class CostAdd { - public: - StrategyRec GetOptimalStr(const Graph::NodeType &node, - const std::vector> &node_name_to_strategy, - const Graph &graph); - - double GetMinCostIn() const { return cost_in_; } - - private: - StrategyRec ChoseStr(const std::vector &cost_op, StrategyRec str); - - double cost_in_ = 0; -}; // class CostAdd is used to compute the cost of Add operator. - // class CostReshape is used to compute the cost of Reshape operator. class CostReshape { public: @@ -185,35 +170,41 @@ class CostReshape { double cost_in_ = 0; }; // class CostReshape is used to compute the cost of Reshape operator. -// class CostBiasAdd is used to compute the cost of BiasAdd operator. -class CostBiasAdd { +// class CostCommon is used to compute the cost of an element-wise operator +class CostCommon { public: - StrategyRec GetOptimalStr(const Graph::NodeType &node, - const std::vector> &node_name_to_strategy, - const Graph &graph); + virtual StrategyRec GetOptimalStr(const Graph::NodeType &node, + const std::vector> &node_name_to_strategy, + const Graph &graph); - double GetMinCostIn() const { return cost_in_; } + virtual double GetMinCostIn() const { return cost_in_; } - private: - StrategyRec ChoseStr(const std::vector &cost_op, StrategyRec str); + protected: + virtual StrategyRec ChoseStr(const std::vector &cost_op, StrategyRec str); double cost_in_ = 0; -}; // class CostBiasAdd is used to compute the cost of BiasAdd operator. - -// class CostCommon is used to compute the cost of the element independent operator. -class CostCommon { - public: - StrategyRec GetOptimalStr(const Graph::NodeType &node, - const std::vector> &node_name_to_strategy, - const Graph &graph); +}; // class CostCommon is used to compute the cost of an element-wise operator - double GetMinCostIn() const { return cost_in_; } - - private: +// class CostBiasAdd is used to compute the cost of the addition between a tensor and a bias +class CostBiasAdd : public CostCommon { StrategyRec ChoseStr(const std::vector &cost_op, StrategyRec str); - - double cost_in_ = 0; -}; // class CostCommon is used to compute the cost of Softmax & || Activation operator. +}; +// class CostAdd is used to compute the cost of Add operator. +class CostTensorAdd : public CostCommon { + StrategyRec ChoseStr(const std::vector &cost_op, StrategyRec str); +}; + +// all the following operation are element-wise and have the same cost +class CostOneHot : public CostCommon {}; +class CostReLU : public CostCommon {}; +class CostLog : public CostCommon {}; +class CostExp : public CostCommon {}; +class CostAdd : public CostCommon {}; +class CostSub : public CostCommon {}; +class CostMul : public CostCommon {}; +class CostDiv : public CostCommon {}; +class CostSqueeze : public CostCommon {}; +class CostCast : public CostCommon {}; // class BatchNorm is used to compute the cost of BatchNorm operator. class CostBatchNorm { @@ -222,7 +213,7 @@ class CostBatchNorm { const std::vector> &node_name_to_strategy, const Graph &graph); - double GetMinCostIn() const { return 0.0; } + double GetMinCostIn(const OperatorRec &op); private: double StrDimB(int32_t Tensor) { diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc index b2c34127a1..42b3bfc72e 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc +++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc @@ -38,6 +38,12 @@ void GenerateStrategy(std::shared_ptr graph, bool mask_special_ops, for (size_t iter_op_inputs = 0; iter_op_inputs < ops[iter_ops]->inputs_tensor_info().size(); iter_op_inputs++) { stra.push_back(PrepareStrategy(graph, ops, iter_ops, iter_op_inputs)); } + // OneHot's scalar parameters were removed by entire_costgraph, we had to complete them. + if (ops[iter_ops]->type() == ONEHOT) { + std::vector s_Onehot = {}; + stra.push_back(s_Onehot); + stra.push_back(s_Onehot); + } StrategyPtr sp = std::make_shared(0, stra); ops[iter_ops]->SetSelectedStrategyAndCost(sp, ops[iter_ops]->selected_cost()); } @@ -126,8 +132,9 @@ std::vector MakeOriginalStrategy(const std::vector= ops.size()) { MS_LOG(EXCEPTION) << "Failure: Operators' elements out of range."; } - if (iter_op_inputs >= ops[iter_ops]->strategy()->GetInputDim().size()) + if (iter_op_inputs >= ops[iter_ops]->strategy()->GetInputDim().size()) { MS_LOG(EXCEPTION) << "Failure: Strategy's InputDim out of range."; + } size_t input_size = ops[iter_ops]->strategy()->GetInputDim()[iter_op_inputs].size(); for (size_t dim = 0; dim < input_size; dim++) { s.push_back(1); @@ -155,8 +162,9 @@ std::vector MakeDataParallelStrategy(const std::vectorstrategy(); - if (iter_op_inputs >= origin_strategy->GetInputDim().size()) + if (iter_op_inputs >= origin_strategy->GetInputDim().size()) { MS_LOG(EXCEPTION) << "Failure: Strategy's InputDim out of range."; + } size_t input_size = origin_strategy->GetInputDim()[iter_op_inputs].size(); for (size_t dim = 0; dim < input_size; dim++) { if (dim == 0 && input_size == 4) { @@ -192,21 +200,22 @@ std::vector PrepareStrategy(const std::shared_ptr &graph, return MakeOriginalStrategy(ops, iter_ops, iter_op_inputs); } else if (type == RELU) { return MakeRecSearchStrategy(graph, iter_ops, iter_op_inputs); - } else if (type == BATCH_NORM || (type == FUSE_BATCH_NORM)) { + } else if ((type == BATCH_NORM) || (type == FUSE_BATCH_NORM)) { return PrepareBN(graph, iter_ops, iter_op_inputs); - } else if (type == SOFTMAX_CROSS_ENTROPY_WITH_LOGITS) { + } else if (type == SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS) { return PrepareSparse(iter_op_inputs); } else { return MakeDataParallelStrategy(ops, iter_ops, iter_op_inputs); } } +// use to respect strategy checks of auto parallel void MaskSpecialOps(std::shared_ptr graph) { size_t iter_nodes = graph->nodes.size(); for (size_t i = 0; i < iter_nodes; i++) { Graph::NodeType &node = graph->nodes[i]; - if (node.apply.op_type == 1) { // For Convolution + if (node.apply.op_type == kRecConvolution) { // For convolution // cover input tensor strategy node.apply.arguments[0].tensor_str.str_n = 1.0 / static_cast(g_device_manager->DeviceNum()); node.apply.arguments[0].tensor_str.str_c = 1; @@ -217,19 +226,6 @@ void MaskSpecialOps(std::shared_ptr graph) { node.apply.arguments[1].tensor_str.str_c = 1; node.apply.arguments[1].tensor_str.str_h = 1; node.apply.arguments[1].tensor_str.str_w = 1; - } else if (node.apply.op_type == 8) { // For BN - node.apply.arguments[0].tensor_str.str_n = 1.0 / static_cast(g_device_manager->DeviceNum()); - node.apply.arguments[0].tensor_str.str_c = 1; - node.apply.arguments[0].tensor_str.str_h = 1; - node.apply.arguments[0].tensor_str.str_w = 1; - // cover 1-d argument blobs - node.apply.arguments[1].tensor_str.str_n = 1; - node.apply.arguments[2].tensor_str.str_c = 1; - node.apply.arguments[3].tensor_str.str_h = 1; - node.apply.arguments[4].tensor_str.str_w = 1; - } else if (node.apply.op_type == 4 || node.apply.op_type == 9) { // For SparseSoftmaxCrossEntropyWithLogits - node.tensor_parm.tensor_str.str_h = 1.0 / static_cast(g_device_manager->DeviceNum()); - node.tensor_parm.tensor_str.str_w = 1; } } } diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_graph.h b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_graph.h index 6ab2782cb2..ae5ccabaf5 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_graph.h +++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_graph.h @@ -27,17 +27,26 @@ namespace mindspore { namespace parallel { enum OperatorType { + kRecUnkownType, kRecMatMul, kRecConvolution, kRecPooling, - kRecAdd, - kRecSoftmax, - kRecReshape, - kRecBiasAdd, + kRecTensorAdd, kRecReLU, kRecBatchNorm, + kRecReshape, + kRecBiasAdd, + kRecSoftmax, kRecSparseSoftmaxCrossEntropyWithLogits, - kRecUnkownType + kRecOneHot, + kRecLog, + kRecExp, + kRecAdd, + kRecSub, + kRecMul, + kRecDiv, + kRecSqueeze, + kRecCast }; enum InfoType { kApplication, kConstant }; diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.cc b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.cc index 6b438cb670..ada22fef9a 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.cc +++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.cc @@ -1,187 +1,165 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "parallel/auto_parallel/rec_core/rec_parse_graph.h" - -#include -#include -#include -#include - -#include "ir/value.h" -#include "parallel/auto_parallel/rec_core/rec_graph.h" -#include "parallel/auto_parallel/rec_core/rec_tensor.h" -#include "parallel/ops_info/operator_info.h" - -namespace mindspore { -namespace parallel { -const TensorParam MakeTensor(int n, int c, int h, int w) { - TensorParam new_tensor; - new_tensor.tensor_type = kFloat32; - new_tensor.tensor_shape.shape_n = n; - new_tensor.tensor_shape.shape_c = c; - new_tensor.tensor_shape.shape_h = h; - new_tensor.tensor_shape.shape_w = w; - const TensorParam &tensor = new_tensor; - return tensor; -} - -Graph::NodeType MakeNewOperator(std::vector> ops, size_t iter_ops) { - Graph::NodeType NewOp; - NewOp.name = ops[iter_ops]->name(); - NewOp.info = InfoType::kApplication; - - auto op_type = ops[iter_ops]->type(); - auto idx = DictOpType.find(op_type); - if (idx == DictOpType.end()) { - NewOp.apply.op_type = OperatorType::kRecUnkownType; - MS_LOG(INFO) << "Unknown operator type."; - } else { - NewOp.apply.op_type = DictOpType.at(op_type); - } - - if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 4) { - NewOp.tensor_parm = MakeTensor( - ops[iter_ops]->outputs_tensor_info()[0].shape()[0], ops[iter_ops]->outputs_tensor_info()[0].shape()[1], - ops[iter_ops]->outputs_tensor_info()[0].shape()[2], ops[iter_ops]->outputs_tensor_info()[0].shape()[3]); - } else if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 2) { - NewOp.tensor_parm = Fill2DTensor(ops, iter_ops, NewOp); - } else if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 1) { - NewOp.tensor_parm = MakeTensor(1, 1, 1, ops[iter_ops]->outputs_tensor_info()[0].shape()[0]); - } else if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 0) { - NewOp.tensor_parm = MakeTensor(1, 1, 1, 1); - } else { - MS_LOG(ERROR) << "Tensor's shape is unknown."; - } - - NewOp.apply = CompleteOperatorInputs(ops, iter_ops, NewOp); - return NewOp; -} - -TensorParam Fill2DTensor(const std::vector> &ops, const size_t iter_ops, - Graph::NodeType NewTensor) { - if (NewTensor.apply.op_type == OperatorType::kRecMatMul) { - auto attrs = ops[iter_ops]->attrs(); - bool transpose_a = attrs[TRANSPOSE_A]->cast()->value(); - bool transpose_b = attrs[TRANSPOSE_B]->cast()->value(); - if (transpose_a) { - NewTensor.tensor_parm = MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[0].shape()[1], - ops[iter_ops]->inputs_tensor_info()[0].shape()[0]); - } else if (transpose_b) { - NewTensor.tensor_parm = MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[0].shape()[1], - ops[iter_ops]->inputs_tensor_info()[0].shape()[0]); - } else { - NewTensor.tensor_parm = MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[0].shape()[0], - ops[iter_ops]->inputs_tensor_info()[0].shape()[1]); - } - } else { - NewTensor.tensor_parm = MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[0].shape()[0], - ops[iter_ops]->inputs_tensor_info()[0].shape()[1]); - } - return NewTensor.tensor_parm; -} - -OperatorRec CompleteOperatorInputs(const std::vector> &ops, const size_t iter_ops, - Graph::NodeType NewTensor) { - for (size_t iter_input_tensors = 0; iter_input_tensors < ops[iter_ops]->inputs_tensor_info().size(); - iter_input_tensors++) { - if (ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape().size() == 4) { - NewTensor.apply.arguments[iter_input_tensors] = - MakeTensor(ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0], - ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1], - ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[2], - ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[3]); - } else if (ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape().size() == 2) { - NewTensor.apply.arguments[iter_input_tensors] = Complete2DInputs(ops, iter_ops, iter_input_tensors, NewTensor); - } else if (ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape().size() == 1) { - NewTensor.apply.arguments[iter_input_tensors] = - MakeTensor(1, 1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0]); - } else if (ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape().size() == 0) { - NewTensor.apply.arguments[iter_input_tensors] = MakeTensor(1, 1, 1, 1); - } else { - MS_LOG(ERROR) << "Tensor's shape is unknown."; - } - } - return NewTensor.apply; -} - -TensorParam Complete2DInputs(const std::vector> &ops, const size_t iter_ops, - const size_t iter_input_tensors, Graph::NodeType NewTensor) { - if (NewTensor.apply.op_type == OperatorType::kRecMatMul) { - auto attrs = ops[iter_ops]->attrs(); - bool transpose_a = attrs[TRANSPOSE_A]->cast()->value(); - bool transpose_b = attrs[TRANSPOSE_B]->cast()->value(); - if (transpose_a && (iter_input_tensors == 0)) { - NewTensor.apply.arguments[iter_input_tensors] = - MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1], - ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0]); - } else if (transpose_b && (iter_input_tensors == 1)) { - NewTensor.apply.arguments[iter_input_tensors] = - MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1], - ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0]); - } else { - NewTensor.apply.arguments[iter_input_tensors] = - MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0], - ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1]); - } - } else { - NewTensor.apply.arguments[iter_input_tensors] = - MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0], - ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1]); - } - return NewTensor.apply.arguments[iter_input_tensors]; -} - -std::shared_ptr ParseGraph(const std::vector> &ops, - const std::vector> &input_tensor_names) { - std::shared_ptr graph(new Graph); - if (ops.size() > SIZE_MAX / 2) { - MS_LOG(EXCEPTION) << "Total number of operators is bigger than " << SIZE_MAX / 2; - } - - for (size_t iter_ops = 0; iter_ops < ops.size(); iter_ops++) { - Graph::NodeType NewOp = MakeNewOperator(ops, iter_ops); - graph->nodes.push_back(NewOp); - } - MakeEdge(input_tensor_names, graph); - - return graph; -} - -void MakeEdge(const std::vector> &input_tensor_names, std::shared_ptr graph) { - for (size_t iter_i = 0; iter_i < input_tensor_names.size(); iter_i++) { - for (size_t iter_j = 1; iter_j < input_tensor_names[iter_i].size(); iter_j++) { - size_t head_node_index = GetIndexInInputTensorNames(input_tensor_names, input_tensor_names[iter_i][iter_j]); - if (head_node_index < SIZE_MAX / 2 && head_node_index != iter_i) { - graph->nodes[iter_i].node_in.push_back(head_node_index); - graph->nodes[head_node_index].node_out.push_back(iter_i); - } - } - } -} - -size_t GetIndexInInputTensorNames(const std::vector> &input_tensor_name, - const std::string &input_name) { - for (size_t index = 0; index < input_tensor_name.size(); index++) { - if (input_tensor_name[index][0] == input_name) { - return index; - } - } - MS_LOG(INFO) << "Get index failed, using SIZE_MAX insted"; - return SIZE_MAX; -} -} // namespace parallel -} // namespace mindspore +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "parallel/auto_parallel/rec_core/rec_parse_graph.h" + +#include +#include +#include +#include + +#include "ir/value.h" +#include "parallel/auto_parallel/rec_core/rec_graph.h" +#include "parallel/auto_parallel/rec_core/rec_tensor.h" +#include "parallel/ops_info/operator_info.h" + +namespace mindspore { +namespace parallel { +const TensorParam MakeTensor(int n, int c, int h, int w) { + TensorParam new_tensor; + new_tensor.tensor_type = kFloat32; + new_tensor.tensor_shape.shape_n = n; + new_tensor.tensor_shape.shape_c = c; + new_tensor.tensor_shape.shape_h = h; + new_tensor.tensor_shape.shape_w = w; + const TensorParam &tensor = new_tensor; + return tensor; +} + +Graph::NodeType MakeNewOperator(std::vector> ops, size_t iter_ops) { + Graph::NodeType NewOp; + NewOp.name = ops[iter_ops]->name(); + NewOp.info = InfoType::kApplication; + + auto op_type = ops[iter_ops]->type(); + auto idx = DictOpType.find(op_type); + if (idx == DictOpType.end()) { + NewOp.apply.op_type = OperatorType::kRecUnkownType; + MS_LOG(INFO) << "Unknown operator type."; + } else { + NewOp.apply.op_type = DictOpType.at(op_type); + } + + if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 4) { + NewOp.tensor_parm = MakeTensor( + ops[iter_ops]->outputs_tensor_info()[0].shape()[0], ops[iter_ops]->outputs_tensor_info()[0].shape()[1], + ops[iter_ops]->outputs_tensor_info()[0].shape()[2], ops[iter_ops]->outputs_tensor_info()[0].shape()[3]); + } else if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 2) { + NewOp.tensor_parm = MakeTensor(1, 1, ops[iter_ops]->outputs_tensor_info()[0].shape()[0], + ops[iter_ops]->outputs_tensor_info()[0].shape()[1]); + } else if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 1) { + NewOp.tensor_parm = MakeTensor(1, 1, 1, ops[iter_ops]->outputs_tensor_info()[0].shape()[0]); + } else if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 0) { + NewOp.tensor_parm = MakeTensor(1, 1, 1, 1); + } else { + MS_LOG(ERROR) << "Tensor's shape is unknown."; + } + + NewOp.apply = CompleteOperatorInputs(ops, iter_ops, NewOp); + return NewOp; +} + +OperatorRec CompleteOperatorInputs(const std::vector> &ops, const size_t iter_ops, + Graph::NodeType NewTensor) { + for (size_t iter_input_tensors = 0; iter_input_tensors < ops[iter_ops]->inputs_tensor_info().size(); + iter_input_tensors++) { + if (ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape().size() == 4) { + NewTensor.apply.arguments[iter_input_tensors] = + MakeTensor(ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0], + ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1], + ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[2], + ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[3]); + } else if (ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape().size() == 2) { + NewTensor.apply.arguments[iter_input_tensors] = Complete2DInputs(ops, iter_ops, iter_input_tensors, NewTensor); + } else if (ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape().size() == 1) { + NewTensor.apply.arguments[iter_input_tensors] = + MakeTensor(1, 1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0]); + } else if (ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape().size() == 0) { + NewTensor.apply.arguments[iter_input_tensors] = MakeTensor(1, 1, 1, 1); + } else { + MS_LOG(ERROR) << "Tensor's shape is unknown."; + } + } + return NewTensor.apply; +} + +TensorParam Complete2DInputs(const std::vector> &ops, const size_t iter_ops, + const size_t iter_input_tensors, Graph::NodeType NewTensor) { + if (NewTensor.apply.op_type == OperatorType::kRecMatMul) { + auto attrs = ops[iter_ops]->attrs(); + bool transpose_a = attrs[TRANSPOSE_A]->cast()->value(); + bool transpose_b = attrs[TRANSPOSE_B]->cast()->value(); + if (transpose_a && (iter_input_tensors == 0)) { + NewTensor.apply.arguments[iter_input_tensors] = + MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1], + ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0]); + } else if (transpose_b && (iter_input_tensors == 1)) { + NewTensor.apply.arguments[iter_input_tensors] = + MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1], + ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0]); + } else { + NewTensor.apply.arguments[iter_input_tensors] = + MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0], + ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1]); + } + } else { + NewTensor.apply.arguments[iter_input_tensors] = + MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0], + ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1]); + } + return NewTensor.apply.arguments[iter_input_tensors]; +} + +std::shared_ptr ParseGraph(const std::vector> &ops, + const std::vector> &input_tensor_names) { + std::shared_ptr graph(new Graph); + if (ops.size() > SIZE_MAX / 2) { + MS_LOG(EXCEPTION) << "Total number of operators is bigger than " << SIZE_MAX / 2; + } + + for (size_t iter_ops = 0; iter_ops < ops.size(); iter_ops++) { + Graph::NodeType NewOp = MakeNewOperator(ops, iter_ops); + graph->nodes.push_back(NewOp); + } + MakeEdge(input_tensor_names, graph); + + return graph; +} + +void MakeEdge(const std::vector> &input_tensor_names, std::shared_ptr graph) { + for (size_t iter_i = 0; iter_i < input_tensor_names.size(); iter_i++) { + for (size_t iter_j = 1; iter_j < input_tensor_names[iter_i].size(); iter_j++) { + size_t head_node_index = GetIndexInInputTensorNames(input_tensor_names, input_tensor_names[iter_i][iter_j]); + if (head_node_index < SIZE_MAX / 2 && head_node_index != iter_i) { + graph->nodes[iter_i].node_in.push_back(head_node_index); + graph->nodes[head_node_index].node_out.push_back(iter_i); + } + } + } +} + +size_t GetIndexInInputTensorNames(const std::vector> &input_tensor_name, + const std::string &input_name) { + for (size_t index = 0; index < input_tensor_name.size(); index++) { + if (input_tensor_name[index][0] == input_name) { + return index; + } + } + MS_LOG(INFO) << "Get index failed, using SIZE_MAX insted"; + return SIZE_MAX; +} +} // namespace parallel +} // namespace mindspore diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h index ae50ced418..2b1d0c55ed 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h +++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h @@ -31,23 +31,28 @@ namespace parallel { const std::map DictOpType{ {MATMUL, OperatorType::kRecMatMul}, {CONV2D, OperatorType::kRecConvolution}, + {MAXPOOL, OperatorType::kRecPooling}, {MAXPOOLV2, OperatorType::kRecPooling}, {SIMPLE_MEAN, OperatorType::kRecPooling}, - {TENSOR_ADD, OperatorType::kRecAdd}, + {TENSOR_ADD, OperatorType::kRecTensorAdd}, {RESHAPE, OperatorType::kRecReshape}, {BIAS_ADD, OperatorType::kRecBiasAdd}, {RELU, OperatorType::kRecReLU}, {BATCH_NORM, OperatorType::kRecBatchNorm}, {SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS, OperatorType::kRecSparseSoftmaxCrossEntropyWithLogits}, -}; + {ONEHOT, OperatorType::kRecOneHot}, + {LOG, OperatorType::kRecLog}, + {EXP, OperatorType::kRecExp}, + {SUB, OperatorType::kRecSub}, + {MUL, OperatorType::kRecMul}, + {DIV, OperatorType::kRecDiv}, + {SQUEEZE, OperatorType::kRecSqueeze}, + {CAST, OperatorType::kRecCast}}; const TensorParam MakeTensor(int n, int c, int h, int w); Graph::NodeType MakeNewOperator(std::vector> ops, size_t iter_ops); -TensorParam Fill2DTensor(const std::vector> &ops, const size_t iter_ops, - Graph::NodeType NewTensor); - OperatorRec CompleteOperatorInputs(const std::vector> &ops, const size_t iter_ops, Graph::NodeType NewTensor); diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc index 24ad8ac203..3527c18079 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc +++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc @@ -29,52 +29,63 @@ namespace mindspore { namespace parallel { -#define DEVICE_MEMORY 1024.0 * 1024.0 * 1024.0 // 1GB - // Get the target node's weight for sorting. double GetWeights(const Graph::NodeType &node) { const OperatorRec &op = node.apply; - if (op.op_type == 0) { + if (op.op_type == OperatorType::kRecMatMul) { // For MatMul auto cost_ptr = std::make_shared(); return cost_ptr->GetMinCostIn(op); - } else if (op.op_type == 1) { + } else if (op.op_type == OperatorType::kRecConvolution) { // For Convolution auto cost_ptr = std::make_shared(); return cost_ptr->GetMinCostIn(node); - } else if (op.op_type == 2) { + } else if (op.op_type == OperatorType::kRecPooling) { // For Pooling auto cost_ptr = std::make_shared(); return cost_ptr->GetMinCostIn(); - } else if (op.op_type == 3) { - // For Add - auto cost_ptr = std::make_shared(); + } else if (op.op_type == OperatorType::kRecTensorAdd) { + // For TensorAdd + auto cost_ptr = std::make_shared(); return cost_ptr->GetMinCostIn(); - } else if (op.op_type == 4 || op.op_type == 7 || op.op_type == 9) { - // For Softmax & || Activation + } else if (op.op_type == OperatorType::kRecReLU || op.op_type == OperatorType::kRecSoftmax || + op.op_type == OperatorType::kRecSparseSoftmaxCrossEntropyWithLogits) { + // For Activation and Softmax auto cost_ptr = std::make_shared(); return cost_ptr->GetMinCostIn(); - } else if (op.op_type == 5) { + } else if (op.op_type == OperatorType::kRecReshape) { // For Reshape auto cost_ptr = std::make_shared(); return cost_ptr->GetMinCostIn(); - } else if (op.op_type == 6) { + } else if (op.op_type == OperatorType::kRecBiasAdd) { // For BiasAdd auto cost_ptr = std::make_shared(); return cost_ptr->GetMinCostIn(); - } else if (op.op_type == 8) { + } else if (op.op_type == OperatorType::kRecBatchNorm) { // For BatchNorm auto cost_ptr = std::make_shared(); + return cost_ptr->GetMinCostIn(op); + } else if (op.op_type == OperatorType::kRecOneHot || op.op_type == OperatorType::kRecLog || + op.op_type == OperatorType::kRecExp || op.op_type == OperatorType::kRecAdd || + op.op_type == OperatorType::kRecSub || op.op_type == OperatorType::kRecMul || + op.op_type == OperatorType::kRecDiv || op.op_type == OperatorType::kRecSqueeze || + op.op_type == OperatorType::kRecCast) { + // For element-wise op + auto cost_ptr = std::make_shared(); + return cost_ptr->GetMinCostIn(); + } else if (op.op_type == OperatorType::kRecUnkownType) { + // For unknown type + return 0.0; } else { MS_LOG(EXCEPTION) << "Failure: GetOperatorWeight failed."; } @@ -97,8 +108,8 @@ std::vector SortByWeight(const std::shared_ptr graph) { } } - // Do sorting. - sort(weight_to_node_index.begin(), weight_to_node_index.end()); + // Ordering ops aka nodes of the graph + std::sort(weight_to_node_index.begin(), weight_to_node_index.end()); // Store the result in node_index_by_weights. uint64_t size = weight_to_node_index.size(); @@ -115,53 +126,67 @@ StrategyRec PartitionNode(const Graph::NodeType &node, std::shared_ptr graph) { MS_EXCEPTION_IF_NULL(graph); - if (node.apply.op_type == 0) { + if (node.apply.op_type == OperatorType::kRecMatMul) { // For MatMul auto cost_ptr = std::make_shared(); return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph); - } else if (node.apply.op_type == 1) { + } else if (node.apply.op_type == OperatorType::kRecConvolution) { // For Convolution auto cost_ptr = std::make_shared(); return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph); - } else if (node.apply.op_type == 2) { + } else if (node.apply.op_type == OperatorType::kRecPooling) { // For Pooling auto cost_ptr = std::make_shared(); return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph); - } else if (node.apply.op_type == 3) { - // For Add - auto cost_ptr = std::make_shared(); + } else if (node.apply.op_type == OperatorType::kRecTensorAdd) { + // For TensorAdd + auto cost_ptr = std::make_shared(); return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph); - } else if (node.apply.op_type == 4 || node.apply.op_type == 7 || node.apply.op_type == 9) { + } else if (node.apply.op_type == OperatorType::kRecReLU || node.apply.op_type == OperatorType::kRecSoftmax || + node.apply.op_type == OperatorType::kRecSparseSoftmaxCrossEntropyWithLogits) { // For Softmax & Activation auto cost_ptr = std::make_shared(); return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph); - } else if (node.apply.op_type == 5) { + } else if (node.apply.op_type == OperatorType::kRecReshape) { // For Reshape auto cost_ptr = std::make_shared(); return cost_ptr->GetOptimalStr(node); - } else if (node.apply.op_type == 6) { + } else if (node.apply.op_type == OperatorType::kRecBiasAdd) { // For BiasAdd auto cost_ptr = std::make_shared(); return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph); - } else if (node.apply.op_type == 8) { + } else if (node.apply.op_type == OperatorType::kRecBatchNorm) { // For BatchNorm auto cost_ptr = std::make_shared(); return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph); + } else if (node.apply.op_type == OperatorType::kRecOneHot || node.apply.op_type == OperatorType::kRecLog || + node.apply.op_type == OperatorType::kRecExp || node.apply.op_type == OperatorType::kRecAdd || + node.apply.op_type == OperatorType::kRecSub || node.apply.op_type == OperatorType::kRecMul || + node.apply.op_type == OperatorType::kRecDiv || node.apply.op_type == OperatorType::kRecSqueeze || + node.apply.op_type == OperatorType::kRecCast) { + // For element-wise op + auto cost_ptr = std::make_shared(); + + return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph); + } else if (node.apply.op_type == OperatorType::kRecUnkownType) { + // For unknown type + StrategyRec default_strategy; + return default_strategy; } else { MS_LOG(EXCEPTION) << "Failure: Partition Operator failed."; } } // Parttion graph into all devices. -Status PartitionForAllDevices(const size_t num_device, std::shared_ptr graph) { +Status PartitionForAllDevices(const size_t num_device, const double device_memory, std::shared_ptr graph) { if (num_device < 1) { MS_LOG(EXCEPTION) << "ERROR: Number of devices can't be " << num_device << "."; } @@ -206,8 +231,7 @@ Status PartitionForAllDevices(const size_t num_device, std::shared_ptr gr } } - InferUndecideStrategy(graph); - if (DevicesMemoryControl(graph) != SUCCESS) { + if (DevicesMemoryControl(device_memory, graph) != SUCCESS) { return FAILED; } else { return SUCCESS; @@ -232,89 +256,15 @@ Graph::NodeType ApplyStrToTensor(Graph::NodeType Node) { return Node; } -// Check Strategy for the same tensor between op. -void InferUndecideStrategy(std::shared_ptr graph) { - MS_EXCEPTION_IF_NULL(graph); - - uint64_t iter_nodes = graph->nodes.size(); - - // For all the nodes in the graph - for (uint64_t i_node = 0; i_node < iter_nodes; i_node++) { - // If this target node is an operator, find it's adjecent op's strategy; - if (graph->nodes[i_node].info == 0) { - // Try to apply last op's strategy. - ApplyLastStrategy(i_node, graph); - // Try to apply next op's strategy. - ApplyNextStrategy(i_node, graph); - } - } -} - -void ApplyLastStrategy(const uint64_t node_index, std::shared_ptr graph) { - Graph::NodeType &target_node = graph->nodes[node_index]; - - // Number of node-in - size_t num_node_in = target_node.node_in.size(); - - // Find forward op and copy strategy if meets the limits. - for (size_t index = 0; index < num_node_in; index++) { - if (graph->nodes[target_node.node_in[index]].tensor_parm.tensor_str.str_n <= - target_node.apply.arguments[0].tensor_str.str_n && - graph->nodes[target_node.node_in[index]].tensor_parm.tensor_str.str_c <= - target_node.apply.arguments[0].tensor_str.str_c && - graph->nodes[target_node.node_in[index]].tensor_parm.tensor_str.str_h <= - target_node.apply.arguments[0].tensor_str.str_h && - graph->nodes[target_node.node_in[index]].tensor_parm.tensor_str.str_w <= - target_node.apply.arguments[0].tensor_str.str_w) { - target_node.apply.arguments[0].tensor_str.str_n = - graph->nodes[target_node.node_in[index]].tensor_parm.tensor_str.str_n; - target_node.apply.arguments[0].tensor_str.str_c = - graph->nodes[target_node.node_in[index]].tensor_parm.tensor_str.str_c; - target_node.apply.arguments[0].tensor_str.str_h = - graph->nodes[target_node.node_in[index]].tensor_parm.tensor_str.str_h; - target_node.apply.arguments[0].tensor_str.str_w = - graph->nodes[target_node.node_in[index]].tensor_parm.tensor_str.str_w; - } - } -} - -void ApplyNextStrategy(const uint64_t node_index, std::shared_ptr graph) { - Graph::NodeType &target_node = graph->nodes[node_index]; - - // Number of node-out - size_t num_node_out = target_node.node_out.size(); - - // Find backward op and copy strategy if meets the limits. - for (size_t index = 0; index < num_node_out; index++) { - if (graph->nodes[target_node.node_out[index]].apply.arguments[0].tensor_str.str_n <= - target_node.tensor_parm.tensor_str.str_n && - graph->nodes[target_node.node_out[index]].apply.arguments[0].tensor_str.str_c <= - target_node.tensor_parm.tensor_str.str_c && - graph->nodes[target_node.node_out[index]].apply.arguments[0].tensor_str.str_h <= - target_node.tensor_parm.tensor_str.str_h && - graph->nodes[target_node.node_out[index]].apply.arguments[0].tensor_str.str_w <= - target_node.tensor_parm.tensor_str.str_w) { - target_node.tensor_parm.tensor_str.str_n = - graph->nodes[target_node.node_out[index]].apply.arguments[0].tensor_str.str_n; - target_node.tensor_parm.tensor_str.str_c = - graph->nodes[target_node.node_out[index]].apply.arguments[0].tensor_str.str_c; - target_node.tensor_parm.tensor_str.str_h = - graph->nodes[target_node.node_out[index]].apply.arguments[0].tensor_str.str_h; - target_node.tensor_parm.tensor_str.str_w = - graph->nodes[target_node.node_out[index]].apply.arguments[0].tensor_str.str_w; - } - } -} - -Status DevicesMemoryControl(std::shared_ptr graph) { +Status DevicesMemoryControl(const double device_memory, std::shared_ptr graph) { MS_EXCEPTION_IF_NULL(graph); uint64_t iter_nodes = graph->nodes.size(); + double used_memory = 0.0; for (uint64_t i_node = 0; i_node < iter_nodes; i_node++) { if (graph->nodes[i_node].info == 0) { Graph::NodeType &Node = graph->nodes[i_node]; - double used_memory = 0.0; for (int index = 0; index < 2; index++) { used_memory += Node.apply.arguments[index].tensor_str.str_n * Node.apply.arguments[index].tensor_shape.shape_n * @@ -329,12 +279,12 @@ Status DevicesMemoryControl(std::shared_ptr graph) { Node.tensor_parm.tensor_str.str_h * Node.tensor_parm.tensor_shape.shape_h * Node.tensor_parm.tensor_str.str_w * Node.tensor_parm.tensor_shape.shape_w * GetDataTypeSize(Node.tensor_parm.tensor_type); - if (DEVICE_MEMORY < used_memory) { - MS_LOG(EXCEPTION) << "Failure: Out of memory!"; - return FAILED; - } } } + if (device_memory < used_memory) { + MS_LOG(EXCEPTION) << "Failure: Out of memory!"; + return FAILED; + } return SUCCESS; } diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.h b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.h index 4f831f4f9a..fc504b3cb2 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.h +++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.h @@ -40,17 +40,11 @@ StrategyRec PartitionNode(const Graph::NodeType &node, const std::vector> &node_name_to_strategy, std::shared_ptr graph); -Status PartitionForAllDevices(const size_t num_device, std::shared_ptr graph); +Status PartitionForAllDevices(const size_t num_device, const double device_memory, std::shared_ptr graph); Graph::NodeType ApplyStrToTensor(Graph::NodeType Node); -void InferUndecideStrategy(std::shared_ptr graph); - -void ApplyLastStrategy(const uint64_t node_index, std::shared_ptr graph); - -void ApplyNextStrategy(const uint64_t node_index, std::shared_ptr graph); - -Status DevicesMemoryControl(std::shared_ptr graph); +Status DevicesMemoryControl(const double device_memory, std::shared_ptr graph); size_t GetDataTypeSize(const TensorType &type); } // namespace parallel diff --git a/mindspore/ccsrc/parallel/context.cc b/mindspore/ccsrc/parallel/context.cc index bc4aca896b..9ba7efd60f 100644 --- a/mindspore/ccsrc/parallel/context.cc +++ b/mindspore/ccsrc/parallel/context.cc @@ -55,6 +55,9 @@ void ParallelContext::Reset() { parallel_mode_ = STAND_ALONE; parameter_broadcast_ = false; parameter_broadcast_is_set_ = false; + enable_all_reduce_fusion_ = false; + strategy_ckpt_load_file_ = ""; + strategy_ckpt_save_file_ = ""; } void ParallelContext::set_device_num(int32_t device_num) { @@ -102,6 +105,14 @@ void ParallelContext::set_parameter_broadcast(bool parameter_broadcast) { parameter_broadcast_is_set_ = true; } +void ParallelContext::set_strategy_ckpt_load_file(const std::string &strategy_ckpt_load_file) { + strategy_ckpt_load_file_ = strategy_ckpt_load_file; +} + +void ParallelContext::set_strategy_ckpt_save_file(const std::string &strategy_ckpt_save_file) { + strategy_ckpt_save_file_ = strategy_ckpt_save_file; +} + void ParallelContext::set_all_reduce_fusion_split_indices(const std::vector indices) { all_reduce_fusion_split_indices_ = indices; } diff --git a/mindspore/ccsrc/parallel/context.h b/mindspore/ccsrc/parallel/context.h index 64261cb964..0e007c92c6 100644 --- a/mindspore/ccsrc/parallel/context.h +++ b/mindspore/ccsrc/parallel/context.h @@ -80,6 +80,15 @@ class ParallelContext { const std::vector all_reduce_fusion_split_indices() const; void set_all_reduce_fusion_split_sizes(const std::vector sizes); const std::vector all_reduce_fusion_split_sizes() const; + void set_enable_all_reduce_fusion(bool enable_all_reduce_fusion) { + enable_all_reduce_fusion_ = enable_all_reduce_fusion; + } + bool enable_all_reduce_fusion() const { return enable_all_reduce_fusion_; } + + void set_strategy_ckpt_load_file(const std::string &strategy_ckpt_load_file); + std::string strategy_ckpt_load_file() const { return strategy_ckpt_load_file_; } + void set_strategy_ckpt_save_file(const std::string &strategy_ckpt_save_file); + std::string strategy_ckpt_save_file() const { return strategy_ckpt_save_file_; } void Reset(); @@ -98,8 +107,11 @@ class ParallelContext { bool device_num_is_set_; bool global_rank_is_set_; bool parameter_broadcast_is_set_; + bool enable_all_reduce_fusion_; std::vector all_reduce_fusion_split_indices_; std::vector all_reduce_fusion_split_sizes_; + std::string strategy_ckpt_load_file_; + std::string strategy_ckpt_save_file_; }; } // namespace parallel } // namespace mindspore diff --git a/mindspore/ccsrc/parallel/costmodel_context.cc b/mindspore/ccsrc/parallel/costmodel_context.cc index 82b260f967..591fa737aa 100644 --- a/mindspore/ccsrc/parallel/costmodel_context.cc +++ b/mindspore/ccsrc/parallel/costmodel_context.cc @@ -46,6 +46,7 @@ void CostModelContext::ResetCostModel() { costmodel_communi_threshold_ = DEFAULT_COST_MODEL_COMMUNI_THRESHOLD; costmodel_communi_const_ = DEFAULT_COST_MODEL_COMMUNI_CONST; costmodel_communi_bias_ = DEFAULT_COST_MODEL_COMMUNI_BIAS; + is_multi_subgraphs_ = DEFAULT_IS_MULTI_SUBGRAPHS; costmodel_allreduce_fusion_algorithm_ = DEFAULT_COST_MODEL_ALLREDUCE_FUSION_ALGORITHM; costmodel_allreduce_fusion_times_ = DEFAULT_COST_MODEL_ALLREDUCE_FUSION_TIMES; costmodel_allreduce_fusion_tail_percent_ = DEFAULT_COST_MODEL_ALLREDUCE_FUSION_TAIL_PERCENT; @@ -84,6 +85,7 @@ void CostModelContext::set_costmodel_communi_const(double cm_communi_const) { void CostModelContext::set_costmodel_communi_bias(double cm_communi_bias) { costmodel_communi_bias_ = cm_communi_bias; } +void CostModelContext::set_multi_subgraphs(bool multi_graphs) { is_multi_subgraphs_ = multi_graphs; } void CostModelContext::set_costmodel_allreduce_fusion_algorithm(int32_t algorithm) { costmodel_allreduce_fusion_algorithm_ = algorithm; } diff --git a/mindspore/ccsrc/parallel/costmodel_context.h b/mindspore/ccsrc/parallel/costmodel_context.h index 9937483051..ebb0d00008 100644 --- a/mindspore/ccsrc/parallel/costmodel_context.h +++ b/mindspore/ccsrc/parallel/costmodel_context.h @@ -67,6 +67,9 @@ class CostModelContext { void set_costmodel_communi_bias(double); double costmodel_communi_bias() const { return costmodel_communi_bias_; } + void set_multi_subgraphs(bool); + bool is_multi_subgraphs() const { return is_multi_subgraphs_; } + void set_costmodel_allreduce_fusion_algorithm(int32_t); int32_t costmodel_allreduce_fusion_algorithm() const { return costmodel_allreduce_fusion_algorithm_; } @@ -138,6 +141,8 @@ class CostModelContext { // COST_MODEL_COMMUNI_BIAS double costmodel_communi_bias_; + bool is_multi_subgraphs_; + int32_t costmodel_allreduce_fusion_algorithm_; int32_t costmodel_allreduce_fusion_times_; diff --git a/mindspore/ccsrc/parallel/device_matrix.h b/mindspore/ccsrc/parallel/device_matrix.h index 236a7fad08..295bf33836 100644 --- a/mindspore/ccsrc/parallel/device_matrix.h +++ b/mindspore/ccsrc/parallel/device_matrix.h @@ -26,7 +26,6 @@ namespace mindspore { namespace parallel { - using RankList = std::vector; using Shape = std::vector; diff --git a/mindspore/ccsrc/parallel/ops_info/layer_norm_info.cc b/mindspore/ccsrc/parallel/ops_info/layer_norm_info.cc index 3abfc3d2ed..5bdd24090f 100644 --- a/mindspore/ccsrc/parallel/ops_info/layer_norm_info.cc +++ b/mindspore/ccsrc/parallel/ops_info/layer_norm_info.cc @@ -69,7 +69,7 @@ Status LayerNormInfo::CheckStrategy(const StrategyPtr &strategy) { } // check input strategy for (size_t i = begin_norm_axis_; i < input_strategy.size(); ++i) { - if (input_strategy[begin_norm_axis_] != NO_SPLIT_STRATEGY) { + if (input_strategy[i] != NO_SPLIT_STRATEGY) { MS_LOG(ERROR) << name_ << ": Invalid input strategy " << ShapeToString(input_strategy); return FAILED; } diff --git a/mindspore/ccsrc/parallel/ops_info/operator_info.h b/mindspore/ccsrc/parallel/ops_info/operator_info.h index 347da7e573..de95bd84ad 100644 --- a/mindspore/ccsrc/parallel/ops_info/operator_info.h +++ b/mindspore/ccsrc/parallel/ops_info/operator_info.h @@ -150,14 +150,11 @@ class OperatorInfo { // needed by rec_parser void set_type(const std::string &type) { type_ = type; } const std::string &type() const { return type_; } - void set_cnode_name(const std::string &cnode_name) { cnode_name_ = cnode_name; } - const std::string &cnode_name() const { return cnode_name_; } const std::unordered_map &attrs() const { return attrs_; } protected: // needed by rec_parser std::string type_; - std::string cnode_name_; virtual Status CheckStrategy(const StrategyPtr &strategy) = 0; virtual Status InferTensorMap() = 0; virtual Status InferForwardCommunication() = 0; diff --git a/mindspore/ccsrc/parallel/ops_info/ops_utils.h b/mindspore/ccsrc/parallel/ops_info/ops_utils.h index bdae87858d..e0b62eb233 100644 --- a/mindspore/ccsrc/parallel/ops_info/ops_utils.h +++ b/mindspore/ccsrc/parallel/ops_info/ops_utils.h @@ -61,6 +61,8 @@ constexpr char CROSS_BATCH[] = "cross_batch"; constexpr char STEP_PARALLEL_BEGIN[] = "step_parallel_begin"; constexpr char STEP_PARALLEL_END[] = "step_parallel_end"; constexpr char STEP_AUTO_PARALLEL_BEGIN[] = "step_auto_parallel_begin.dot"; +constexpr char REQUIRES_GRAD[] = "requires_grad"; +constexpr char PARAM_NAME[] = "name"; constexpr char RELU_TYPE[] = "relu"; constexpr char RELU6_TYPE[] = "relu6"; diff --git a/mindspore/ccsrc/parallel/ops_info/prelu_info.cc b/mindspore/ccsrc/parallel/ops_info/prelu_info.cc index fed361616b..14483e97a1 100644 --- a/mindspore/ccsrc/parallel/ops_info/prelu_info.cc +++ b/mindspore/ccsrc/parallel/ops_info/prelu_info.cc @@ -32,7 +32,7 @@ namespace parallel { * prelu has 2 input * A: A float tensor of shape [NCHW] representing the output of the preview layer. * w: Float Tensor, w > 0: there is only two shapes are legitimate: 1, or the number of channels at input. - * the strategy of w should equal to the channel dimension of strategy of A + * the strategy of w should equal to the channel dimension of strategy of A, or equal to 1 */ Status PReLUInfo::CheckStrategy(const StrategyPtr &strategy) { if (CheckStrategyValue(strategy, inputs_shape_, is_auto_parallel_) != SUCCESS) { @@ -52,7 +52,7 @@ Status PReLUInfo::CheckStrategy(const StrategyPtr &strategy) { } return FAILED; } - if (stra[0][PRELU_CHANNEL_INDEX] != stra[1][0]) { + if (stra[0][PRELU_CHANNEL_INDEX] != stra[1][0] && inputs_shape_[1][0] != 1) { if (is_auto_parallel_) { MS_LOG(DEBUG) << name_ << ": Invalid channel strategy."; } else { @@ -107,7 +107,11 @@ Status PReLUInfo::InferTensorMap() { } TensorMap param_tensor_map; - param_tensor_map.push_back(input_tensor_map.at(1)); + if (inputs_shape_[1][0] == 1) { + param_tensor_map.push_back(-1); + } else { + param_tensor_map.push_back(input_tensor_map.at(1)); + } inputs_tensor_map_.push_back(input_tensor_map); inputs_tensor_map_.push_back(param_tensor_map); outputs_tensor_map_.push_back(input_tensor_map); diff --git a/mindspore/ccsrc/parallel/step_auto_parallel.cc b/mindspore/ccsrc/parallel/step_auto_parallel.cc index 8a95232aa4..b16108a279 100644 --- a/mindspore/ccsrc/parallel/step_auto_parallel.cc +++ b/mindspore/ccsrc/parallel/step_auto_parallel.cc @@ -40,6 +40,7 @@ #include "parallel/context.h" #include "parallel/ops_info/tmp_identity_info.h" #include "parallel/step_parallel.h" +#include "parallel/strategy_checkpoint/parallel_strategy_checkpoint.h" #include "pipeline/parse/python_adapter.h" #include "pipeline/pipeline.h" @@ -339,7 +340,7 @@ bool IsAutoParallelCareNode(const CNodePtr &cnode) { return IsParallelCareNode(cnode) && IsSplittableOperator(prim->name()); } -OperatorInfoPtr CreateTheOperatorInfo(const PrimitivePtr &prim, const CNodePtr &cnode) { +OperatorInfoPtr CreateTheOperatorInfo(const PrimitivePtr &prim, const CNodePtr &cnode, StrategyMap *stra_map) { MS_EXCEPTION_IF_NULL(prim); MS_EXCEPTION_IF_NULL(cnode); auto attrs = prim->attrs(); @@ -385,9 +386,14 @@ OperatorInfoPtr CreateTheOperatorInfo(const PrimitivePtr &prim, const CNodePtr & operator_info->set_input_value(input_value); operator_info->set_outputs_dtype(cnode->Type()); operator_info->set_cnode(cnode); + // key of strategy map + std::string strategy_key_name = NodeParameterName(cnode); + bool load_strategy_from_ckpt = + StrategyCheckpoint::GetInstance().LoadCheckPointOn() && stra_map->find(strategy_key_name) != stra_map->end(); // If no strategy has been configured for this operator, then candidate strategies are generated for - // auto-strategy searching; if this primitive is CAST, we ignore the user-specified strategy - if (!StrategyFound(attrs) || prim->name() == CAST) { + // auto-strategy searching; if this primitive is CAST, we ignore the user-specified strategy. + // if strategy is set to load from checkpoint, it is prefer to load strategy from checkpoint . + if ((!StrategyFound(attrs) || prim->name() == CAST) && !load_strategy_from_ckpt) { // Compute split_flag_list_, indicating which input has batch dimension. This is ONLY used for preparation for // BatchParallelInfo operator operator_info->ComputeBatchSplitFlagList(); @@ -397,7 +403,12 @@ OperatorInfoPtr CreateTheOperatorInfo(const PrimitivePtr &prim, const CNodePtr & } } else { // In this case, the configured strategy should be extracted to help setting cost - StrategyPtr strategyPtr = parallel::ExtractStrategy(attrs); + StrategyPtr strategyPtr; + if (load_strategy_from_ckpt) { + strategyPtr = (*stra_map)[strategy_key_name]; + } else { + strategyPtr = parallel::ExtractStrategy(attrs); + } if (strategyPtr != nullptr) { if (prim->name() == RESHAPE) { MS_LOG(EXCEPTION) << "Setting strategy for Reshape goes for nothing!"; @@ -426,14 +437,20 @@ OperatorInfoPtr CreateTheOperatorInfo(const PrimitivePtr &prim, const CNodePtr & return operator_info; } -Status ConstructCostGraphNodes(const std::vector &all_nodes, const FuncGraphPtr &) { +// Using CNode's UniqueIds to construct nodes +Status ConstructCostGraphNodesByUniqueId(const std::vector &all_nodes, const FuncGraphPtr &) { MS_LOG(INFO) << "Constructing nodes for cost graph begins."; entire_costgraph = std::make_shared(); entire_costgraph->SetDeviceMemoryAndCostParameter(); - bool new_operator = true, first_operator = true; - std::string first_operator_cnode; - size_t current_op_index = 0; - + // The map from CNode's UniqueId to its operatorInfo + std::map from_cnode_to_info; + // extract strategy from checkpoint for multi-train + StrategyMap stra_map; + if (StrategyCheckpoint::GetInstance().LoadCheckPointOn()) { + if (StrategyCheckpoint::GetInstance().Load(&stra_map) != SUCCESS) { + MS_LOG(EXCEPTION) << "Load strategy checkpoint failed"; + } + } // Step 1 for (auto &node : all_nodes) { // NOTE: we only care about splittable Primitive operators @@ -449,13 +466,9 @@ Status ConstructCostGraphNodes(const std::vector &all_nodes, const F PrimitivePtr prim = GetValueNode(prim_anf_node); MS_EXCEPTION_IF_NULL(prim); - // When visiting the second subgraph, use the corresponding operatorInfo which already created - bool modify_new_operator = (new_operator) && (!first_operator) && (cnode->UniqueId() == first_operator_cnode); - if (modify_new_operator) { - new_operator = false; - } - if (new_operator) { - auto operator_info = CreateTheOperatorInfo(prim, cnode); + auto search_cnode = from_cnode_to_info.find(cnode->UniqueId()); + if (search_cnode == from_cnode_to_info.end()) { + auto operator_info = CreateTheOperatorInfo(prim, cnode, &stra_map); if (operator_info == nullptr) { return FAILED; } @@ -465,14 +478,73 @@ Status ConstructCostGraphNodes(const std::vector &all_nodes, const F entire_costgraph->AddOperator(operator_info); (void)cnode->set_operator_info(operator_info); - if (first_operator) { - first_operator_cnode = cnode->UniqueId(); - first_operator = false; + MS_LOG(INFO) << "The CNode with UniqueId: " << cnode->UniqueId() + << " and UniqueIdThroughCopy: " << cnode->UniqueIdThroughCopy() + << " is set OperatorInfo: " << operator_info->name() << ", Primitive: " << prim->name(); + (void)from_cnode_to_info.emplace(std::make_pair(cnode->UniqueIdThroughCopy(), operator_info)); + // Needed by rec_parser + entire_costgraph->add_inputs_tensor_name(inputs_tensor_name); + } else { + // Two CNODEs' UniqueIds should not be equal + MS_LOG(EXCEPTION) << "The CNode with UniqueId: " << cnode->UniqueId() + << " and UniqueIdThroughCopy: " << cnode->UniqueIdThroughCopy() + << " is set OperatorInfo: " << search_cnode->second->name() << ", Primitive: " << prim->name(); + } + } + + MS_LOG(INFO) << "Constructing nodes for cost graph ends."; + return SUCCESS; +} + +// Using CNode's UniqueIdThroughCopys to construct nodes +Status ConstructCostGraphNodesByUniqueIdTC(const std::vector &all_nodes, const FuncGraphPtr &) { + MS_LOG(INFO) << "Constructing nodes for cost graph begins."; + entire_costgraph = std::make_shared(); + entire_costgraph->SetDeviceMemoryAndCostParameter(); + // The map from CNode's UniqueIdThroughCopy to its operatorInfo + std::map from_cnode_to_info; + // extract strategy from checkpoint for multi-train + StrategyMap stra_map; + if (StrategyCheckpoint::GetInstance().LoadCheckPointOn()) { + if (StrategyCheckpoint::GetInstance().Load(&stra_map) != SUCCESS) { + MS_LOG(EXCEPTION) << "Load strategy checkpoint failed"; + } + } + for (auto &node : all_nodes) { + // NOTE: we only care about splittable Primitive operators + auto cnode = node->cast(); + bool bool_result = (cnode == nullptr) || (!IsValueNode(cnode->input(0))); + if (bool_result) { + continue; + } + ValueNodePtr prim_anf_node = cnode->input(0)->cast(); + if (!IsAutoParallelCareNode(cnode)) { + continue; + } + PrimitivePtr prim = GetValueNode(prim_anf_node); + + // Find the operatorInfo if it exists + auto search_cnode = from_cnode_to_info.find(cnode->UniqueIdThroughCopy()); + if (search_cnode == from_cnode_to_info.end()) { + // In this case, the corresponding OperatorInfo is not created, create the new one. + auto operator_info = CreateTheOperatorInfo(prim, cnode, &stra_map); + if (operator_info == nullptr) { + return FAILED; } // Needed by rec_parser + operator_info->set_type(prim->name()); + std::vector inputs_tensor_name = ExtractInputsTensorName(cnode); + + entire_costgraph->AddOperator(operator_info); + (void)cnode->set_operator_info(operator_info); + MS_LOG(INFO) << "The CNode with UniqueId: " << cnode->UniqueId() + << " and UniqueIdThroughCopy: " << cnode->UniqueIdThroughCopy() + << " is set OperatorInfo: " << operator_info->name() << ", Primitive: " << prim->name(); + (void)from_cnode_to_info.emplace(std::make_pair(cnode->UniqueIdThroughCopy(), operator_info)); + // Needed by rec_parser entire_costgraph->add_inputs_tensor_name(inputs_tensor_name); } else { - auto current_op_ptr = entire_costgraph->FindOperatorByIndex(current_op_index); + auto current_op_ptr = search_cnode->second; if (current_op_ptr == nullptr) { MS_LOG(EXCEPTION) << "Find " << prim->name() << " from CostGraph failed."; } else { @@ -484,14 +556,12 @@ Status ConstructCostGraphNodes(const std::vector &all_nodes, const F << " does not match the Prim: " << prim->name(); } (void)cnode->set_operator_info(current_op_ptr); - current_op_index++; + MS_LOG(INFO) << "The CNode with UniqueId: " << cnode->UniqueId() + << " and UniqueIdThroughCopy: " << cnode->UniqueIdThroughCopy() + << " is set OperatorInfo: " << current_op_ptr->name() << ", Primitive: " << prim->name(); } } } - if ((!new_operator) && (current_op_index != entire_costgraph->GetOperators().size())) { - MS_LOG(EXCEPTION) << "The second subgraph's operator number: " << current_op_index - << " does not match the first ones: " << entire_costgraph->GetOperators().size(); - } MS_LOG(INFO) << "Constructing nodes for cost graph ends."; return SUCCESS; @@ -844,11 +914,20 @@ Status ParallelStrategySearch(const std::vector &all_nodes, const Fu // OUTPUT: the determined strategy for each operator. // Step 1 - if (ConstructCostGraphNodes(all_nodes, root) == SUCCESS) { - MS_LOG(INFO) << "Constructing nodes for cost graph succeeded. There are " << entire_costgraph->GetOperators().size() - << " operators."; + if (CostModelContext::GetInstance()->is_multi_subgraphs()) { + if (ConstructCostGraphNodesByUniqueIdTC(all_nodes, root) == SUCCESS) { + MS_LOG(INFO) << "Constructing nodes for cost graph succeeded. There are " + << entire_costgraph->GetOperators().size() << " operators."; + } else { + MS_LOG(EXCEPTION) << "Constructing nodes for cost graph failed."; + } } else { - MS_LOG(EXCEPTION) << "Constructing nodes for cost graph failed."; + if (ConstructCostGraphNodesByUniqueId(all_nodes, root) == SUCCESS) { + MS_LOG(INFO) << "Constructing nodes for cost graph succeeded. There are " + << entire_costgraph->GetOperators().size() << " operators."; + } else { + MS_LOG(EXCEPTION) << "Constructing nodes for cost graph failed."; + } } // Step 2 @@ -916,7 +995,7 @@ std::vector> RecInputTensorNames(const std::map &all_nodes, const FuncGraphPtr &root) { - if (ConstructCostGraphNodes(all_nodes, root) == SUCCESS) { + if (ConstructCostGraphNodesByUniqueId(all_nodes, root) == SUCCESS) { MS_LOG(INFO) << "Constructing nodes for cost graph succeeded. There are " << entire_costgraph->GetOperators().size() << " operators."; } else { @@ -935,7 +1014,8 @@ Status ParallelStrategyRecSearch(const std::vector &all_nodes, const std::shared_ptr graph = ParseGraph(ops, input_tensor_names); size_t num_device = g_device_manager->DeviceNum(); - if (PartitionForAllDevices(num_device, graph) == SUCCESS) { + double device_memory = entire_costgraph->GetDeviceMemory(); + if (PartitionForAllDevices(num_device, device_memory, graph) == SUCCESS) { MS_LOG(INFO) << "Partition Success With " << num_device << " devices."; } else { MS_LOG(ERROR) << "PartitionForAllDevices failed."; diff --git a/mindspore/ccsrc/parallel/step_auto_parallel.h b/mindspore/ccsrc/parallel/step_auto_parallel.h index f120edcc61..fff9dfa4c3 100644 --- a/mindspore/ccsrc/parallel/step_auto_parallel.h +++ b/mindspore/ccsrc/parallel/step_auto_parallel.h @@ -43,7 +43,9 @@ std::vector ExtractInputTypeLengthByNode(const CNodePtr &node); std::vector ExtractOutputTypeByNode(const CNodePtr &node); -Status ConstructCostGraphNodes(const std::vector &all_nodes, const FuncGraphPtr &root); +Status ConstructCostGraphNodesByUniqueId(const std::vector &all_nodes, const FuncGraphPtr &root); + +Status ConstructCostGraphNodesByUniqueIdTC(const std::vector &all_nodes, const FuncGraphPtr &root); void ConstructCostGraphEdges(const std::vector &all_nodes); diff --git a/mindspore/ccsrc/parallel/step_parallel.cc b/mindspore/ccsrc/parallel/step_parallel.cc index c24c14abf6..21a515ff85 100644 --- a/mindspore/ccsrc/parallel/step_parallel.cc +++ b/mindspore/ccsrc/parallel/step_parallel.cc @@ -345,7 +345,6 @@ bool FindCommunicationOp(const std::vector &all_nodes) { continue; } auto cnode = node->cast(); - MS_EXCEPTION_IF_NULL(cnode); if (!IsValueNode(cnode->input(0))) { continue; } @@ -903,9 +902,15 @@ void InsertMirrorOps(const MirrorOps &mirror_ops, const CNodePtr &node) { } } -void BackwardCommunication(const OperatorInfoPtr &distribute_operator, const CNodePtr &node, bool is_loss_node) { +void BackwardCommunication(const OperatorInfoPtr &distribute_operator, const CNodePtr &node, + const std::vector> &sens_loss_pairs) { MS_EXCEPTION_IF_NULL(distribute_operator); MS_EXCEPTION_IF_NULL(node); + + bool is_loss_cnode = + std::any_of(sens_loss_pairs.begin(), sens_loss_pairs.end(), + [node](const std::pair &element) { return element.second == node; }); + MirrorOps mirror_ops = distribute_operator->mirror_ops(); VirtualDivOp virtual_div_op = distribute_operator->virtual_div_op(); // insert mirror op @@ -914,7 +919,7 @@ void BackwardCommunication(const OperatorInfoPtr &distribute_operator, const CNo InsertMirrorOps(mirror_ops, node); } // insert virtual div op - if (!virtual_div_op.empty() && is_loss_node) { + if (!virtual_div_op.empty() && is_loss_cnode) { MS_LOG(INFO) << "insert virtual div op for " << distribute_operator->name(); InsertVirtualDivOp(virtual_div_op, node); } @@ -986,10 +991,6 @@ StrategyPtr ExtractStrategy(std::unordered_map attrs) { Dimensions dim; if (elements[index]->isa()) { ValueTuplePtr value_tuple = elements[index]->cast(); - if (value_tuple == nullptr) { - MS_LOG(EXCEPTION) << "Failure:value_tuple is nullptr"; - } - std::vector value_vector = value_tuple->value(); (void)std::transform(value_vector.begin(), value_vector.end(), std::back_inserter(dim), [](const ValuePtr &value) { return static_cast(GetValue(value)); }); @@ -1013,7 +1014,6 @@ Shapes GetNodeShape(const AnfNodePtr &node) { BaseShapePtr base_shape_ptr = node->Shape(); if (node->isa()) { auto cnode = node->cast(); - MS_EXCEPTION_IF_NULL(cnode); if (IsValueNode(cnode->input(0))) { PrimitivePtr prim = GetValueNode(cnode->input(0)); MS_EXCEPTION_IF_NULL(prim); @@ -1190,7 +1190,7 @@ std::pair FindSubGraph(const FuncGraphPtr &graph, const AnfNode continue; } CNodePtr graph_cnode_inp0 = graph_cnode->input(0)->cast(); - if ((graph_cnode_inp0 == nullptr) || !IsValueNode(graph_cnode_inp0->input(1))) { + if (!IsValueNode(graph_cnode_inp0->input(1))) { continue; } FuncGraphPtr graph_sub = GetValueNode(graph_cnode_inp0->input(1)); @@ -1230,7 +1230,11 @@ void SetParallelShape(const AnfNodePtr ¶meter, const std::pairToString(); std::shared_ptr parallel_shape = std::make_shared(slice_shape); MS_EXCEPTION_IF_NULL(parallel_shape); - abstract->set_shape(parallel_shape); + // Don't modify it in-place as the pointer of this AbstractValue may used as cache key in StaticAnalysis. + auto cloned_abstract = abstract->Clone(); + MS_EXCEPTION_IF_NULL(cloned_abstract); + cloned_abstract->set_shape(parallel_shape); + parameter->set_abstract(cloned_abstract); TensorLayout tensor_layout = tensorinfo_in.tensor_layout(); ParameterPtr parameter_ptr = parameter->cast(); MS_EXCEPTION_IF_NULL(parameter_ptr); @@ -1330,7 +1334,10 @@ void SetClonedTensorShapeForOptimizer(const FuncGraphPtr &root) { cloned_parameter->set_tensor_layout(cloned_from_parameter->tensor_layout()); MS_EXCEPTION_IF_NULL(cloned_parameter_node->abstract()); MS_EXCEPTION_IF_NULL(cloned_from_node->abstract()); - cloned_parameter_node->abstract()->set_shape(cloned_from_node->abstract()->GetShapeTrack()); + auto cloned_abstract = cloned_parameter_node->abstract()->Clone(); + MS_EXCEPTION_IF_NULL(cloned_abstract); + cloned_abstract->set_shape(cloned_from_node->abstract()->GetShapeTrack()); + cloned_parameter_node->set_abstract(cloned_abstract); MS_LOG(INFO) << "The parameter: " << cloned_parameter->name() << " is cloned, the be cloned parameter is: " << cloned_from_parameter->name() << ", clone index is: " << cloned_index; @@ -1371,6 +1378,13 @@ void SetVirtualDatasetStrategy(const CNodePtr &node) { } void ExtractInformation(const std::vector &all_nodes) { + // load strategy map from checkpoint + StrategyMap stra_map; + if (StrategyCheckpoint::GetInstance().LoadCheckPointOn()) { + if (StrategyCheckpoint::GetInstance().Load(&stra_map) != SUCCESS) { + MS_LOG(EXCEPTION) << "Load strategy checkpoint failed"; + } + } for (auto &node : all_nodes) { auto cnode = node->cast(); if ((cnode == nullptr) || !IsValueNode(cnode->input(0))) { @@ -1407,7 +1421,12 @@ void ExtractInformation(const std::vector &all_nodes) { (void)cnode->set_operator_info(operator_); continue; } - if (!StrategyFound(attrs)) { + // load strategy checkpoint + // key of strategy map + std::string strategy_key_name = NodeParameterName(cnode); + bool load_strategy_from_ckpt = + StrategyCheckpoint::GetInstance().LoadCheckPointOn() && stra_map.find(strategy_key_name) != stra_map.end(); + if (!StrategyFound(attrs) && !load_strategy_from_ckpt) { MS_LOG(INFO) << "ExtractInformation: the strategy of node " << node->ToString() << " prim " << prim->name() << " is empty, using batch parallel"; std::shared_ptr> strategy_v_ptr = operator_->GenerateBatchStrategies(); @@ -1425,6 +1444,8 @@ void ExtractInformation(const std::vector &all_nodes) { MS_LOG(INFO) << "node " << node->ToString() << " prim " << prim->name() << " batch parallel strategy is " << attrs[GEN_STRATEGY]->ToString(); strategyPtr = NewStrategy(0, *strategy_v_ptr); + } else if (load_strategy_from_ckpt) { + strategyPtr = stra_map[strategy_key_name]; } else { strategyPtr = ExtractStrategy(attrs); } @@ -1523,9 +1544,32 @@ std::shared_ptr FindPrevParallelCareNodeLayout(const AnfNodePtr &n return nullptr; } +std::shared_ptr CreateParameterLayout(const AnfNodePtr &node) { + // Create DataParallel tensor layout for parameter(support WideDeep). + CheckGlobalDeviceManager(); + int32_t dev_num = SizeToInt(g_device_manager->GetDeviceListByStageId(0).size()); + TensorLayout input_tensor_layout; + // create input_shape + Shapes inputs_shape = GetNodeShape(node); + Shape input_shape_array = inputs_shape[0]; + if (input_shape_array.empty()) { + MS_LOG(EXCEPTION) << "Don't support reshape a scalar parameter."; + } + // create tensor_map + size_t shape_size = input_shape_array.size(); + TensorMap input_tensor_map_array(SizeToInt(shape_size) - 1, -1); + input_tensor_map_array.insert(input_tensor_map_array.begin(), 0); + // create dev_matrix + Shape dev_matrix_array = {dev_num}; + if (input_tensor_layout.InitFromVector(dev_matrix_array, input_tensor_map_array, input_shape_array) != SUCCESS) { + MS_LOG(EXCEPTION) << "Create tensor layout for parameter failed."; + } + return std::make_shared(input_tensor_layout); +} + std::shared_ptr FindPrevLayout(const AnfNodePtr &node) { if (node->isa()) { - MS_LOG(EXCEPTION) << "Failure: parameter before reshape is not supported temporary"; + return CreateParameterLayout(node); } if (!node->isa()) { return nullptr; @@ -1620,7 +1664,6 @@ CNodePtr FindLossCNode(const FuncGraphPtr &func_graph) { auto pre_cnode = pre_node->cast(); MS_EXCEPTION_IF_NULL(pre_cnode); auto current_prim = GetValueNode(pre_cnode->input(0)); - // return -> cast if (current_prim->name() == CAST && pre_cnode->operator_info() == nullptr) { pre_cnode = pre_cnode->input(1)->cast(); @@ -1663,14 +1706,8 @@ CNodePtr FindLossCNode(const FuncGraphPtr &func_graph) { return pre_cnode; } -TensorLayouts GetLossNodeGradOutputLayout(const CNodePtr &cnode) { - MS_EXCEPTION_IF_NULL(cnode); +TensorLayouts GetLossNodeGradOutputLayout(const CNodePtr &loss_cnode) { TensorLayouts ret; - if (!IsValueNode(cnode->input(1))) { - MS_LOG(EXCEPTION) << "Sens can't find the corresponding graph."; - } - auto func_graph = GetValueNode(cnode->input(1)); - auto loss_cnode = FindLossCNode(func_graph); MS_EXCEPTION_IF_NULL(loss_cnode); AnfNodePtr node = loss_cnode->cast(); MS_EXCEPTION_IF_NULL(node); @@ -1706,16 +1743,16 @@ TensorLayouts GetLossNodeGradOutputLayout(const CNodePtr &cnode) { return ret; } -void SplitSens(const AnfNodePtr &grad_sens_node, const TensorLayout &loss_grad_layout) { +void SplitSens(const CNodePtr &grad_sens_node, const TensorLayout &loss_grad_layout) { MS_EXCEPTION_IF_NULL(grad_sens_node); - - auto cnode = grad_sens_node->cast(); - MS_EXCEPTION_IF_NULL(cnode); - AnfNodePtr sens_tensor_node = cnode->input(1); + if (grad_sens_node->size() <= 1) { + MS_LOG(EXCEPTION) << "The size of grad sens node is smaller than 2"; + } + AnfNodePtr sens_tensor_node = grad_sens_node->input(1); MS_EXCEPTION_IF_NULL(sens_tensor_node); Shapes sens_shapes = GetNodeShape(sens_tensor_node); if (sens_shapes.size() != 1) { - MS_LOG(EXCEPTION) << "SplitSens: GetNodeShape for sens_tensor_node, output size is not 1"; + MS_LOG(EXCEPTION) << "GetNodeShape for sens_tensor_node, output size is not 1"; } // If the shape of sens tensor is [] or [1], no need to split it. Shape sens_shape = sens_shapes[0]; @@ -1743,19 +1780,22 @@ void SplitSens(const AnfNodePtr &grad_sens_node, const TensorLayout &loss_grad_l auto slice_shape = loss_grad_layout.slice_shape().array(); std::shared_ptr parallel_shape = std::make_shared(slice_shape); MS_EXCEPTION_IF_NULL(parallel_shape); - abstract->set_shape(parallel_shape); + auto cloned_abstract = abstract->Clone(); + MS_EXCEPTION_IF_NULL(cloned_abstract); + cloned_abstract->set_shape(parallel_shape); + sens_tensor_node->set_abstract(cloned_abstract); auto sens_tensor_param = sens_tensor_node->cast(); sens_tensor_param->set_tensor_layout(std::make_shared(loss_grad_layout)); return; } - MS_LOG(EXCEPTION) << "SplitSens: the type of sens node is not Tensor or Parameter, it is unsupported now."; + MS_LOG(EXCEPTION) << "The type of sens node is not Tensor or Parameter, it is unsupported now."; } // Use _GetTensorSlice operator to split the sens tensor - FuncGraphPtr func_graph = cnode->func_graph(); // only cnode can get the graph + FuncGraphPtr func_graph = grad_sens_node->func_graph(); // only cnode can get the graph MS_EXCEPTION_IF_NULL(func_graph); Operator op = CreateGetTensorSliceOp(loss_grad_layout); - InsertGetTensorSliceOp(op, cnode, func_graph, 1, SPLIT_SENS); + InsertGetTensorSliceOp(op, grad_sens_node, func_graph, 1, SPLIT_SENS); } void InsertForwardOps(const OperatorInfoPtr &distribute_operator, const CNodePtr &cnode) { @@ -1821,7 +1861,6 @@ std::set FindForwardGraphByRootNodes(const AnfNodeSet &root_all_no } auto cnode = node->cast(); - MS_EXCEPTION_IF_NULL(cnode); if ((cnode->size() < 2) || !IsValueNode(cnode->input(0))) { continue; } @@ -1838,55 +1877,12 @@ std::set FindForwardGraphByRootNodes(const AnfNodeSet &root_all_no return graph_set; } -// Sens node satisfies the following conditions: cnode(sens)-->cnode(tuple_getitem)-->cnode-->cnode(J) -void StepSplitSens(const AnfNodePtr &node) { - if (!node->isa()) { - return; - } - - // cnode(sens)-->cnode(tuple_getitem) - auto cnode = node->cast(); - AnfNodePtr expect_tuple_getitem = cnode->input(0); - MS_EXCEPTION_IF_NULL(expect_tuple_getitem); - if (!expect_tuple_getitem->isa()) { - return; - } - auto expect_tuple_getitem_cnode = expect_tuple_getitem->cast(); - MS_EXCEPTION_IF_NULL(expect_tuple_getitem_cnode); - if (!IsValueNode(expect_tuple_getitem_cnode->input(0))) { - return; - } - auto expect_tuple_getitem_prim = GetValueNode(expect_tuple_getitem_cnode->input(0)); - if (expect_tuple_getitem_prim->name() != TUPLE_GETITEM) { - return; - } - - // cnode(sens)-->cnode(tuple_getitem)-->cnode - AnfNodePtr expect_anonymous = expect_tuple_getitem_cnode->input(1); - MS_EXCEPTION_IF_NULL(expect_anonymous); - if (!expect_anonymous->isa()) { - return; - } - - // cnode(sens)-->cnode(tuple_getitem)-->cnode-->cnode(J) - auto expect_anonymous_cnode = expect_anonymous->cast(); - MS_EXCEPTION_IF_NULL(expect_anonymous_cnode); - AnfNodePtr expect_j = expect_anonymous_cnode->input(0); - MS_EXCEPTION_IF_NULL(expect_j); - if (!expect_j->isa()) { - return; - } - auto expect_j_cnode = expect_j->cast(); - MS_EXCEPTION_IF_NULL(expect_j_cnode); - if (!IsValueNode(expect_j_cnode->input(0))) { - return; - } - auto expect_j_prim = GetValueNode(expect_j_cnode->input(0)); - if (expect_j_prim->name() == J) { - auto loss_grad_layout = GetLossNodeGradOutputLayout(expect_j_cnode); - if (!loss_grad_layout.empty()) { - SplitSens(node, loss_grad_layout[0]); - } +void StepSplitSens(const std::pair &sens_loss_pair) { + CNodePtr sens_node = sens_loss_pair.first; + CNodePtr loss_node = sens_loss_pair.second; + auto loss_grad_layout = GetLossNodeGradOutputLayout(loss_node); + if (!loss_grad_layout.empty()) { + SplitSens(sens_node, loss_grad_layout[0]); } } @@ -1905,26 +1901,77 @@ std::vector FindLossCNodeFromRoot(const FuncGraphPtr &root) { return loss_node; } +// Sens node satisfies the following conditions: cnode(sens)-->cnode(tuple_getitem)-->cnode-->cnode(J) +std::vector> GetSensLossPairs(const FuncGraphPtr &root) { + MS_EXCEPTION_IF_NULL(root); + std::vector> sens_loss_pairs; + for (auto &node : root->nodes()) { + if (!node->isa()) { + continue; + } + + // cnode(sens)-->cnode(tuple_getitem) + auto sens_cnode = node->cast(); + AnfNodePtr expect_tuple_getitem = sens_cnode->input(0); + MS_EXCEPTION_IF_NULL(expect_tuple_getitem); + if (!expect_tuple_getitem->isa()) { + continue; + } + + auto expect_tuple_getitem_cnode = expect_tuple_getitem->cast(); + if (!IsSomePrimitive(expect_tuple_getitem_cnode, TUPLE_GETITEM)) { + continue; + } + + // cnode(sens)-->cnode(tuple_getitem)-->cnode + AnfNodePtr expect_anonymous = expect_tuple_getitem_cnode->input(1); + MS_EXCEPTION_IF_NULL(expect_anonymous); + if (!expect_anonymous->isa()) { + continue; + } + + // cnode(sens)-->cnode(tuple_getitem)-->cnode-->cnode(J) + auto expect_anonymous_cnode = expect_anonymous->cast(); + AnfNodePtr expect_j = expect_anonymous_cnode->input(0); + MS_EXCEPTION_IF_NULL(expect_j); + if (!expect_j->isa()) { + continue; + } + auto expect_j_cnode = expect_j->cast(); + if (!IsSomePrimitive(expect_j_cnode, J)) { + continue; + } + + if (!IsValueNode(expect_j_cnode->input(1))) { + MS_LOG(EXCEPTION) << "Sens can't find the corresponding graph."; + } + auto func_graph = GetValueNode(expect_j_cnode->input(1)); + auto loss_cnode = FindLossCNode(func_graph); + std::pair sens_loss_pair = std::make_pair(sens_cnode, loss_cnode); + sens_loss_pairs.push_back(sens_loss_pair); + } + return sens_loss_pairs; +} + void ParallelCommunication(const FuncGraphPtr &root, const std::vector &all_nodes, const FuncGraphManagerPtr &manager) { MS_EXCEPTION_IF_NULL(root); MS_EXCEPTION_IF_NULL(manager); TensorRedistribution tensor_redistribution; - AnfNodePtr grad_sens_node = nullptr; - std::vector loss_cnode = FindLossCNodeFromRoot(root); + std::vector> sens_loss_pairs = GetSensLossPairs(root); + bool has_backward = !sens_loss_pairs.empty(); // split sens must before inserting the operators. - for (auto &node : all_nodes) { + for (auto &pair : sens_loss_pairs) { // If the shape of grad-sens tensor is not [] or [1], use get tensor slice to handel it. // If the type of sens node is not Tensor, it is unsupported now, do nothing default. - StepSplitSens(node); + StepSplitSens(pair); } for (auto &node : all_nodes) { MS_EXCEPTION_IF_NULL(node); if (node->isa()) { auto cnode = node->cast(); - MS_EXCEPTION_IF_NULL(cnode); if (!IsValueNode(cnode->input(0))) { continue; } @@ -1933,11 +1980,6 @@ void ParallelCommunication(const FuncGraphPtr &root, const std::vectorget_return(); - auto all_nodes = DeepScopedGraphSearch(ret); - for (auto &node : all_nodes) { - MS_EXCEPTION_IF_NULL(node); - auto cnode = node->cast(); - if ((cnode == nullptr) || !IsValueNode(cnode->input(0))) { - continue; - } - PrimitivePtr prim = GetValueNode(cnode->input(0)); - MS_EXCEPTION_IF_NULL(prim); - OperatorInfoPtr operator_info = cnode->operator_info(); - if (operator_info) { - if (prim->instance_name().empty()) { - continue; +std::string NodeParameterName(const CNodePtr &node) { + std::vector node_inputs{node->inputs()}; + for (auto input : node_inputs) { + if (input->isa()) { + auto input_parameter = input->cast(); + if (input_parameter->has_default()) { + if (py::cast(parse::python_adapter::GetPyObjAttr(input_parameter->default_param(), REQUIRES_GRAD))) { + return py::cast( + parse::python_adapter::GetPyObjAttr(input_parameter->default_param(), PARAM_NAME)); + } } - std::string instance_name = prim->instance_name(); - StrategyPtr strategyPtr = operator_info->strategy(); - MS_EXCEPTION_IF_NULL(node->scope()); - std::string node_name = node->scope()->name() + std::string(CONNSYMBOL) + instance_name; - straMap[node_name] = strategyPtr; } } - if (StrategyCheckpoint::GetInstance().Save(straMap) != SUCCESS) { - MS_LOG(EXCEPTION) << "Save strategy checkpoint failed"; - } + return ""; } -void RestoreStrategy(const FuncGraphPtr &func_graph) { +void CheckpointStrategy(const FuncGraphPtr &func_graph) { MS_EXCEPTION_IF_NULL(func_graph); - MS_LOG(INFO) << "Extract strategy from checkpoint begin"; - StrategyMap straMap; - if (StrategyCheckpoint::GetInstance().Load(&straMap) != SUCCESS) { - MS_LOG(EXCEPTION) << "Load strategy checkpoint failed"; - } - if (StrategyCheckpoint::GetInstance().RemoveCheckPoint() != SUCCESS) { - MS_LOG(EXCEPTION) << "Remove strategy checkpoint failed"; - } + MS_LOG(DEBUG) << "Save strategy to checkpoint begin"; + StrategyMap stra_map; auto ret = func_graph->get_return(); auto all_nodes = DeepScopedGraphSearch(ret); for (auto &node : all_nodes) { @@ -2041,23 +2064,22 @@ void RestoreStrategy(const FuncGraphPtr &func_graph) { if ((cnode == nullptr) || !IsValueNode(cnode->input(0))) { continue; } + std::string param_name = NodeParameterName(cnode); + if (param_name.empty()) { + continue; + } PrimitivePtr prim = GetValueNode(cnode->input(0)); MS_EXCEPTION_IF_NULL(prim); OperatorInfoPtr operator_info = cnode->operator_info(); if (operator_info) { - if (prim->instance_name().empty()) { - continue; - } - std::string instance_name = prim->instance_name(); + StrategyPtr strategyPtr = operator_info->strategy(); MS_EXCEPTION_IF_NULL(node->scope()); - std::string node_name = node->scope()->name() + std::string(CONNSYMBOL) + instance_name; - MS_LOG(INFO) << "Node name is " << node_name; - if (straMap.find(node_name) != straMap.end()) { - StrategyPtr strategyPtr = straMap[node_name]; - operator_info->set_strategy(strategyPtr); - } + stra_map[param_name] = strategyPtr; } } + if (StrategyCheckpoint::GetInstance().Save(stra_map) != SUCCESS) { + MS_LOG(EXCEPTION) << "Save strategy checkpoint failed"; + } } void SetForwardFlag(const std::vector &all_nodes) { @@ -2067,7 +2089,6 @@ void SetForwardFlag(const std::vector &all_nodes) { continue; } auto cnode = node->cast(); - MS_EXCEPTION_IF_NULL(cnode); if (!IsValueNode(cnode->input(0))) { continue; } @@ -2085,7 +2106,6 @@ void SetForwardFlag(const AnfNodeSet &all_nodes) { continue; } auto cnode = node->cast(); - MS_EXCEPTION_IF_NULL(cnode); if (!IsValueNode(cnode->input(0))) { continue; } @@ -2114,7 +2134,6 @@ std::vector FindRootForwardCNode(const FuncGraphPtr &graph, const An continue; } auto cnode = node->cast(); - MS_EXCEPTION_IF_NULL(cnode); auto root_node_id = node->UniqueIdThroughCopy(); if (loss_cnode_id == root_node_id) { root_forward_nodes = DeepLinkedGraphSearch(cnode); @@ -2237,14 +2256,9 @@ bool StepParallel(const FuncGraphPtr &root, const opt::OptimizerPtr &optimizer) // extract shape and strategy, set operator_info ExtractInformation(all_nodes); ReshapeInit(all_nodes); - // extract strategy from checkpoint for multi-train - if (StrategyCheckpoint::GetInstance().CheckPointOn() && StrategyCheckpoint::GetInstance().CheckPointExit()) { - RestoreStrategy(root); - } } // save strategy as checkpoint for multi-train - if (StrategyCheckpoint::GetInstance().CheckPointOn() && - StrategyCheckpoint::GetInstance().GetCurrentTrainTime() < StrategyCheckpoint::GetInstance().GetTrainTimes()) { + if (StrategyCheckpoint::GetInstance().SaveCheckPointOn()) { CheckpointStrategy(root); } @@ -2278,13 +2292,10 @@ std::vector ExtractInputsTensorName(const CNodePtr &node) { std::vector all_inputs = node->inputs(); std::vector node_inputs{all_inputs.begin() + 1, all_inputs.end()}; + std::string node_id = node->UniqueId(); + name_inputs.push_back(node_id); for (auto &input : node_inputs) { - std::string name; - if (IsValueNode(input) || input->isa() || input->isa()) { - name = input->ToString(); - } else { - continue; - } + std::string name = input->UniqueId(); name_inputs.push_back(name); } diff --git a/mindspore/ccsrc/parallel/step_parallel.h b/mindspore/ccsrc/parallel/step_parallel.h index b0d128f515..93c3ed798c 100644 --- a/mindspore/ccsrc/parallel/step_parallel.h +++ b/mindspore/ccsrc/parallel/step_parallel.h @@ -82,7 +82,8 @@ std::pair FindCNode(const AnfNodePtr &anode, const std::string & void InsertMirrorOps(const MirrorOps &mirror_ops, const CNodePtr &node); -void BackwardCommunication(const OperatorInfoPtr &distribute_operator, const CNodePtr &node, bool is_loss_node); +void BackwardCommunication(const OperatorInfoPtr &distribute_operator, const CNodePtr &node, + const std::vector> &sens_loss_pairs); // Generate and init parallel operator OperatorInfoPtr OperatorInstance(const PrimitivePtr &prim, const PrimitiveAttrs &attrs, @@ -134,7 +135,7 @@ void ReshapeInit(const std::vector &all_nodes); void ParallelCommunication(const FuncGraphPtr &root, const std::vector &all_nodes, const FuncGraphManagerPtr &manager); -void RestoreStrategy(const FuncGraphPtr &func_graph); +std::string NodeParameterName(const CNodePtr &node); void CheckpointStrategy(const FuncGraphPtr &func_graph); diff --git a/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc b/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc index dd518dc76c..de10f4beb4 100644 --- a/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc +++ b/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc @@ -23,36 +23,38 @@ #include "common/utils.h" #include "utils/convert_utils.h" #include "utils/log_adapter.h" -#include "utils/node_strategy.pb.h" +#include "proto/node_strategy.pb.h" namespace mindspore { namespace parallel { StrategyCheckpoint &StrategyCheckpoint::GetInstance() { static StrategyCheckpoint instance = StrategyCheckpoint(); + if (ParallelContext::GetInstance() != nullptr) { + instance.load_file_ = ParallelContext::GetInstance()->strategy_ckpt_load_file(); + instance.load_checkpoint_on_ = !ParallelContext::GetInstance()->strategy_ckpt_load_file().empty(); + instance.save_file_ = ParallelContext::GetInstance()->strategy_ckpt_save_file(); + instance.save_checkpoint_on_ = !ParallelContext::GetInstance()->strategy_ckpt_save_file().empty(); + } return instance; } -bool StrategyCheckpoint::CheckPointExit() const { - std::ifstream fin(path_); +bool StrategyCheckpoint::CheckPointExit(const std::string path) const { + std::ifstream fin(path); if (fin) { return true; } return false; } -Status StrategyCheckpoint::RemoveCheckPoint() const { - if (std::remove(common::SafeCStr(path_)) == 0) { - return SUCCESS; - } - return FAILED; -} - Status StrategyCheckpoint::Load(StrategyMap *strategy_map) { if (strategy_map == nullptr) { MS_LOG(EXCEPTION) << "Failure:strategy_map is nullptr"; } + if (!CheckPointExit(load_file_)) { + MS_LOG(EXCEPTION) << "CheckPoint file is not found"; + } straspb::ParallelStrategyMap parallel_strategy_map; - std::fstream input(path_, std::ios::in | std::ios::binary); + std::fstream input(load_file_, std::ios::in | std::ios::binary); if (!parallel_strategy_map.ParseFromIstream(&input)) { MS_LOG(ERROR) << "Load strategy file failed"; return FAILED; @@ -77,14 +79,14 @@ Status StrategyCheckpoint::Load(StrategyMap *strategy_map) { StrategyPtr strategy = NewStrategy(stage, strategy_inputs); (*strategy_map)[node_name] = strategy; - current_train_time_ = (int32_t)parallel_strategy_map.train_time(); + current_stage_ = (int32_t)parallel_strategy_map.current_stage(); } return SUCCESS; } Status StrategyCheckpoint::Save(const StrategyMap &strategy_map) { straspb::ParallelStrategyMap parallel_strategy_map; - parallel_strategy_map.set_train_time(IntToUint(++current_train_time_)); + parallel_strategy_map.set_current_stage(IntToUint(++current_stage_)); for (auto &node_stra : strategy_map) { straspb::ParallelStrategyItem *parallel_strategy_item = parallel_strategy_map.add_parallel_strategy_item(); MS_EXCEPTION_IF_NULL(parallel_strategy_item); @@ -100,7 +102,7 @@ Status StrategyCheckpoint::Save(const StrategyMap &strategy_map) { } } } - std::fstream output(path_, std::ios::out | std::ios::trunc | std::ios::binary); + std::fstream output(save_file_, std::ios::out | std::ios::trunc | std::ios::binary); if (!parallel_strategy_map.SerializeToOstream(&output)) { MS_LOG(ERROR) << "Save strategy file failed"; return FAILED; diff --git a/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h b/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h index c871ea6eef..a758a9e7bb 100644 --- a/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h +++ b/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h @@ -21,43 +21,36 @@ #include #include "parallel/ops_info/ops_utils.h" #include "parallel/strategy.h" +#include "parallel/context.h" namespace mindspore { namespace parallel { -constexpr char DEFAULT_CHECKPOINT_PATH[] = "./strategys.ckpt"; - using StrategyMap = std::unordered_map; class StrategyCheckpoint { public: - StrategyCheckpoint() : path_(DEFAULT_CHECKPOINT_PATH), current_train_time_(1) { - train_times_ = 1; - checkpoint_on_ = false; - const char *train_times_str = std::getenv("PARALLEL_TRAIN_TIMES"); - if (train_times_str != nullptr && std::stoi(train_times_str) > 0) { - train_times_ = std::stoi(train_times_str); - } - const char *checkpoint_on_str = std::getenv("PARALLEL_CHECKPOINT_ON"); - if (checkpoint_on_str != nullptr) { - checkpoint_on_ = (std::string(checkpoint_on_str) == "on"); - } + StrategyCheckpoint() { + current_stage_ = 0; + load_file_ = ""; + load_checkpoint_on_ = false; + save_file_ = ""; + save_checkpoint_on_ = false; } ~StrategyCheckpoint() = default; - bool CheckPointExit() const; - Status RemoveCheckPoint() const; + Status Load(StrategyMap *strategy_map); Status Save(const StrategyMap &strategy_map); static StrategyCheckpoint &GetInstance(); - int32_t GetTrainTimes() const { return train_times_; } - int32_t GetCurrentTrainTime() const { return current_train_time_; } - bool CheckPointOn() const { return checkpoint_on_; } + bool LoadCheckPointOn() const { return load_checkpoint_on_; } + bool SaveCheckPointOn() const { return save_checkpoint_on_; } private: - std::string path_; - bool checkpoint_on_; - // total train times for a train, get from Environmental variable:TRAIN_TIME, please export it - int32_t train_times_; - int32_t current_train_time_; + std::string load_file_; + std::string save_file_; + bool load_checkpoint_on_; + bool save_checkpoint_on_; + bool CheckPointExit(const std::string path) const; + int32_t current_stage_; }; } // namespace parallel } // namespace mindspore diff --git a/mindspore/ccsrc/parallel/tensor_layout/redistribution_operator_infer.h b/mindspore/ccsrc/parallel/tensor_layout/redistribution_operator_infer.h index a96097a1d3..37a8ac3d9e 100644 --- a/mindspore/ccsrc/parallel/tensor_layout/redistribution_operator_infer.h +++ b/mindspore/ccsrc/parallel/tensor_layout/redistribution_operator_infer.h @@ -39,7 +39,8 @@ using OperatorList = std::vector; class RedistributionOperatorInfer { public: const int NONE = -1; - explicit RedistributionOperatorInfer(bool construct_op_flag = true) : construct_op_flag_(construct_op_flag) {} + explicit RedistributionOperatorInfer(bool construct_op_flag = true) + : construct_op_flag_(construct_op_flag), is_cost_model_(false) {} Status Init(const TensorLayout &tensor_layout, const Map &out_tensor_map, RankList dev_list, bool is_cost_model = false); ~RedistributionOperatorInfer() = default; diff --git a/mindspore/ccsrc/pipeline/CMakeLists.txt b/mindspore/ccsrc/pipeline/CMakeLists.txt index 4aadbcce58..630eb510fe 100644 --- a/mindspore/ccsrc/pipeline/CMakeLists.txt +++ b/mindspore/ccsrc/pipeline/CMakeLists.txt @@ -1,12 +1,17 @@ -file(GLOB_RECURSE _PIPELINE_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "pipeline.cc" - "resource.cc" - "pass.cc" - "action.cc" - "validator.cc" - "remove_value_node_dup.cc" - "parse/*.cc" - "static_analysis/*.cc" - ) +file(GLOB_RECURSE _PIPELINE_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} + "pipeline.cc" + "resource.cc" + "pass.cc" + "action.cc" + "validator.cc" + "remove_value_node_dup.cc" + "parse/*.cc" + "static_analysis/*.cc" +) -add_library(_mindspore_pipeline_obj OBJECT ${_PIPELINE_ALL_SRC_FILES}) \ No newline at end of file +if (ENABLE_GE OR ENABLE_D) + file(GLOB_RECURSE _PIPELINE_GE_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "pipeline_ge.cc") + list(APPEND _PIPELINE_SRC_FILES ${_PIPELINE_GE_SRC_FILES}) +endif () + +add_library(_mindspore_pipeline_obj OBJECT ${_PIPELINE_SRC_FILES}) diff --git a/mindspore/ccsrc/pipeline/action.cc b/mindspore/ccsrc/pipeline/action.cc index e8723e66a4..f15723d64d 100644 --- a/mindspore/ccsrc/pipeline/action.cc +++ b/mindspore/ccsrc/pipeline/action.cc @@ -24,6 +24,7 @@ #include #include "ir/func_graph_cloner.h" +#include "parallel/costmodel_context.h" #include "pipeline/pass.h" #include "pipeline/parse/parse_base.h" #include "pipeline/parse/data_converter.h" @@ -129,7 +130,7 @@ bool ParseAction(const ResourcePtr &res) { // This step do this optimize: graph1(x){xx(fv1),xxx(fv2)}, graph2(x){xxx(fv3),xxx(fv4)}-> // graph1(x){base_graph(x, fv1, fv2)}, graph1(x){base_graph(x, fv3, fv4)}, base_graph(x, fv...){xxx,xxx} // all obj_map's graph shared base_graph -bool CombineLikeGraphs(const ResourcePtr &) { +bool CombineLikeGraphs(const ResourcePtr &res) { auto &obj_map = parse::data_converter::GetObjGraphs(); for (auto it : obj_map) { @@ -146,13 +147,15 @@ bool CombineLikeGraphs(const ResourcePtr &) { if (fg->paramter_obj_nodes().size() == 0 || graphs.size() <= 1) { continue; } - auto mng = Manage(base_graph, false); for (auto &fv : fg->paramter_obj_nodes()) { TraceManager::DebugTrace(std::make_shared(fv->debug_info())); auto param = base_graph->add_parameter(); TraceManager::EndTrace(); - auto repl_node = (*cloner->cloned_node())[fv]; - (void)mng->Replace(repl_node, param); + auto &node_users = res->manager()->node_users()[fv]; + for (auto &n : node_users) { + auto repl_n = (*cloner->cloned_node())[n.first]->cast(); + repl_n->set_input(n.second, param); + } } MS_LOG(DEBUG) << "Fg0 paramter_obj_nodes size :" << fg->paramter_obj_nodes().size(); @@ -341,7 +344,10 @@ static std::vector CommonPipeline() { // Resolve the python func actions.emplace_back(std::make_pair("symbol_resolve", SymbolResolveAction)); - actions.emplace_back(std::make_pair("combine_like_graphs", CombineLikeGraphs)); + auto multi_graphs = parallel::CostModelContext::GetInstance()->is_multi_subgraphs(); + if (!multi_graphs) { + actions.emplace_back(std::make_pair("combine_like_graphs", CombineLikeGraphs)); + } actions.emplace_back(std::make_pair("inference_opt_prepare", InferenceOptPrepareAction)); // Evaluate type and shape, and specialize actions.emplace_back(std::make_pair("abstract_specialize", AbstractSpecializeAction)); diff --git a/mindspore/ccsrc/pipeline/base.h b/mindspore/ccsrc/pipeline/base.h index 8ca153f45b..57edea03a2 100644 --- a/mindspore/ccsrc/pipeline/base.h +++ b/mindspore/ccsrc/pipeline/base.h @@ -28,13 +28,11 @@ namespace mindspore { namespace pipeline { - struct ExecutorInfo { FuncGraphPtr func_graph; ResourcePtr resource; std::size_t arg_list_size; }; - using ExecutorInfoPtr = std::shared_ptr; inline std::string GetPhasePrefix(const std::string &phase) { diff --git a/mindspore/ccsrc/pipeline/init.cc b/mindspore/ccsrc/pipeline/init.cc index 86e6d436b7..f1feedb64f 100644 --- a/mindspore/ccsrc/pipeline/init.cc +++ b/mindspore/ccsrc/pipeline/init.cc @@ -97,7 +97,7 @@ PYBIND11_MODULE(_c_expression, m) { py::arg("batch_size"), py::arg("types"), py::arg("shapes"), py::arg("input_indexs"), py::arg("phase") = py::str("dataset"), "Init and exec dataset."); (void)m.def("_set_dataset_mode_config", &mindspore::ConfigManager::SetDatasetModeConfig, "API for set dataset mode."); - (void)m.def("init_ge", &mindspore::pipeline::InitGe, "Init GE"); + (void)m.def("init_backend", &mindspore::pipeline::InitBackend, "Init Backend."); (void)m.def("export_graph", &mindspore::pipeline::ExportGraph, "Export Graph."); @@ -115,8 +115,6 @@ PYBIND11_MODULE(_c_expression, m) { .def("set_device_id", &mindspore::MsContext::set_device_id, "Set device id.") .def("open_tsd", &mindspore::MsContext::OpenTsd, "Open tdt dataset client.") .def("close_tsd", &mindspore::MsContext::CloseTsd, "Close tdt dataset client.") - .def("set_hccl_flag", &mindspore::MsContext::set_enable_hccl, "Set enable hccl.") - .def("get_hccl_flag", &mindspore::MsContext::enable_hccl, "Get whether to enable hccl.") .def("set_task_sink_flag", &mindspore::MsContext::set_enable_task_sink, "Set enable task sink.") .def("get_task_sink_flag", &mindspore::MsContext::enable_task_sink, "Get whether to enable task sink.") .def("get_save_graphs_flag", &mindspore::MsContext::save_graphs_flag, "Get whether to save graphs.") @@ -183,10 +181,20 @@ PYBIND11_MODULE(_c_expression, m) { "Set all reduce fusion split sizes.") .def("get_all_reduce_fusion_split_sizes", &ParallelContext::all_reduce_fusion_split_sizes, "Get all reduce fusion split sizes.") + .def("set_enable_all_reduce_fusion", &ParallelContext::set_enable_all_reduce_fusion, + "Set enable/disable all reduce fusion.") + .def("get_enable_all_reduce_fusion", &ParallelContext::enable_all_reduce_fusion, + "Get enable/disable all reduce fusion.") .def("get_parameter_broadcast", &ParallelContext::parameter_broadcast, "Get parameter broadcast.") .def("get_parameter_broadcast_is_set", &ParallelContext::parameter_broadcast_is_set, "Get parameter broadcast is set.") .def("set_parameter_broadcast", &ParallelContext::set_parameter_broadcast, "Set parameter broadcast.") + .def("set_strategy_ckpt_load_file", &ParallelContext::set_strategy_ckpt_load_file, + "Set strategy checkpoint load file.") + .def("set_strategy_ckpt_save_file", &ParallelContext::set_strategy_ckpt_save_file, + "Set strategy checkpoint save file.") + .def("get_strategy_ckpt_load_file", &ParallelContext::strategy_ckpt_load_file, "Get strategy checkpoint load file.") + .def("get_strategy_ckpt_save_file", &ParallelContext::strategy_ckpt_save_file, "Get strategy checkpoint save file.") .def("reset", &ParallelContext::Reset, "Reset auto parallel context."); (void)py::class_>(m, "CostModelContext") @@ -206,10 +214,6 @@ PYBIND11_MODULE(_c_expression, m) { "Set the parameter cost_model_gamma of the DP algorithm") .def("get_costmodel_gamma", &CostModelContext::costmodel_gamma, "Get the parameter cost_model_gamma of the DP algorithm.") - .def("set_simplify_cal", &CostModelContext::set_costmodel_simplify_cal, - "Set the parameter cost_model_simplify_cal of the DP algorithm.") - .def("get_simplify_cal", &CostModelContext::costmodel_simplify_cal, - "Get the parameter cost_model_simplify_cal of the DP algorithm.") .def("set_costmodel_communi_threshold", &CostModelContext::set_costmodel_communi_threshold, "Set the parameter cost_model_communi_threshold of the DP algorithm.") .def("get_costmodel_communi_threshold", &CostModelContext::costmodel_communi_threshold, @@ -222,6 +226,8 @@ PYBIND11_MODULE(_c_expression, m) { "Set the parameter cost_model_communi_bias of the DP algorithm.") .def("get_costmodel_communi_bias", &CostModelContext::costmodel_communi_bias, "Get the parameter cost_model_communi_bias of the DP algorithm.") + .def("set_multi_subgraphs", &CostModelContext::set_multi_subgraphs, "Set the parameter is_multi_subgraphs.") + .def("get_multi_subgraphs", &CostModelContext::is_multi_subgraphs, "Get the parameter is_multi_subgraphs.") .def("set_costmodel_allreduce_fusion_algorithm", &CostModelContext::set_costmodel_allreduce_fusion_algorithm, "Set the parameter gradient AllReduce fusion algorithm.") .def("get_costmodel_allreduce_fusion_algorithm", &CostModelContext::costmodel_allreduce_fusion_algorithm, diff --git a/mindspore/ccsrc/pipeline/parse/function_block.cc b/mindspore/ccsrc/pipeline/parse/function_block.cc index 156f727b9e..16b0dfe30e 100644 --- a/mindspore/ccsrc/pipeline/parse/function_block.cc +++ b/mindspore/ccsrc/pipeline/parse/function_block.cc @@ -37,7 +37,7 @@ void FunctionBlock::AddPrevBlock(const FunctionBlockPtr &block) { prev_blocks_.p // write variable records the variable name to corresponding node void FunctionBlock::WriteVariable(const std::string &var_name, const AnfNodePtr &node) { - MS_LOG(DEBUG) << "" << func_graph_->ToString() << " write var " << var_name << " with node " << node->DebugString(); + MS_LOG(DEBUG) << func_graph_->ToString() << " write var " << var_name << " with node " << node->DebugString(); vars_[var_name] = node; } @@ -71,7 +71,7 @@ AnfNodePtr FunctionBlock::ReadVariable(const std::string &var) { TraceManager::DebugTrace(std::make_shared(debug_info)); ParameterPtr phi_param = std::make_shared(func_graph()); TraceManager::EndTrace(); - MS_LOG(DEBUG) << "" << func_graph_->ToString() << " generate phi node " << phi_param->ToString() << " for " << var; + MS_LOG(DEBUG) << func_graph_->ToString() << " generate phi node " << phi_param->ToString() << " for " << var; func_graph()->add_parameter(phi_param); phi_nodes_[phi_param] = var; WriteVariable(var, phi_param); diff --git a/mindspore/ccsrc/pipeline/parse/parse.cc b/mindspore/ccsrc/pipeline/parse/parse.cc index 22d6fc9049..af1d67a6fd 100644 --- a/mindspore/ccsrc/pipeline/parse/parse.cc +++ b/mindspore/ccsrc/pipeline/parse/parse.cc @@ -600,8 +600,9 @@ AnfNodePtr Parser::ParseAttribute(const FunctionBlockPtr &block, const py::objec std::string var_name = "self."; std::string attr_name = node.attr("attr").cast(); (void)var_name.append(attr_name); + auto obj = ast()->obj().attr(attr_name.c_str()); if (py::hasattr(ast()->obj(), attr_name.c_str()) && - py::hasattr(ast()->obj().attr(attr_name.c_str()), PYTHON_PRIMITIVE_FLAG)) { + (data_converter::IsCellInstance(obj) || py::hasattr(obj, PYTHON_PRIMITIVE_FLAG))) { return block->MakeResolveSymbol(var_name); } else { return block->ReadVariable(var_name); diff --git a/mindspore/ccsrc/pipeline/parse/parse_base.h b/mindspore/ccsrc/pipeline/parse/parse_base.h index aad8be0d6e..c7ce4e1196 100644 --- a/mindspore/ccsrc/pipeline/parse/parse_base.h +++ b/mindspore/ccsrc/pipeline/parse/parse_base.h @@ -79,6 +79,9 @@ const char PYTHON_PARSE_EXPAND_EXPR_STATEMENT[] = "expand_expr_statement"; const char PYTHON_PARSE_GENERATE_SCOPE[] = "generate_scope"; const char PYTHON_PARSE_GET_SCOPE_NAME[] = "get_scope_name"; +const char PYTHON_PARSE_CLASS_SLICE[] = "create_slice_obj"; +const char PYTHON_PARSE_CLASS_ELLIPSIS[] = "create_ellipsis_obj"; + // define the common name const char NAMED_PRIMITIVE_ITER[] = "iter"; const char NAMED_PRIMITIVE_NEXT[] = "next"; diff --git a/mindspore/ccsrc/pipeline/parse/resolve.cc b/mindspore/ccsrc/pipeline/parse/resolve.cc index 284512c943..18f186dbb1 100644 --- a/mindspore/ccsrc/pipeline/parse/resolve.cc +++ b/mindspore/ccsrc/pipeline/parse/resolve.cc @@ -276,9 +276,8 @@ bool ResolveFuncGraph(const FuncGraphPtr &func_graph, const pipeline::ResourceBa (void)parse::python_adapter::set_python_scoped(); - abstract::AbstractBasePtrList args_spec; MS_EXCEPTION_IF_NULL(opt_resolve); - (void)opt_resolve->step(func_graph, args_spec, use_profile); + (void)opt_resolve->step(func_graph, use_profile); return true; } diff --git a/mindspore/ccsrc/pipeline/pass.cc b/mindspore/ccsrc/pipeline/pass.cc index 6cdf641443..d9f805fdc9 100644 --- a/mindspore/ccsrc/pipeline/pass.cc +++ b/mindspore/ccsrc/pipeline/pass.cc @@ -108,6 +108,7 @@ OptPassGroupMap GetOptPassesA(const opt::irpass::OptimizeIRPassLib &irpass) { }); opt::OptPassConfig a_3 = opt::OptPassConfig({ irpass.same_eliminate_, + irpass.check_bprop_eliminate_, irpass.replace_applicator_, }); opt::OptPassConfig virtual_dataset = opt::OptPassConfig({irpass.virtual_dataset_eliminate_}); @@ -205,14 +206,15 @@ bool OptPassGroup(const ResourcePtr &res, const std::string &name) { return false; } - abstract::AbstractBasePtrList args = res->args_spec(); FuncGraphPtr func_graph = res->func_graph(); MS_LOG(DEBUG) << "Start " << name << " func graph:" << func_graph->ToString() << ", " << func_graph->get_return()->DebugString(true); InitOpt(res); if (g_pass_opts.find(name) != g_pass_opts.end()) { - res->set_func_graph(g_pass_opts[name]->step(func_graph, args)); + res->set_func_graph(g_pass_opts[name]->step(func_graph)); } + // Note: StepParallel may modify the AbstractValue of the parameters of func_graph, but they are not updated to + // res->args_spec_ yet. So if any later pass or action want to use that variable, it should be set here. return true; } @@ -255,10 +257,9 @@ bool ValidatePass(const ResourcePtr &res) { bool InferenceOptPreparePass(const ResourcePtr &res) { FuncGraphPtr func_graph = res->func_graph(); MS_EXCEPTION_IF_NULL(func_graph); - abstract::AbstractBasePtrList args_spec = res->args_spec(); auto prepare_map = GetInferenceOptPreparePhases(); auto infer_opt_prepare = opt::Optimizer::MakeOptimizer("inference_prepare", res, prepare_map); - (void)infer_opt_prepare->step(func_graph, args_spec, false); + (void)infer_opt_prepare->step(func_graph, false); return true; } diff --git a/mindspore/ccsrc/pipeline/pipeline.cc b/mindspore/ccsrc/pipeline/pipeline.cc index fca105d13c..d04f9617f1 100644 --- a/mindspore/ccsrc/pipeline/pipeline.cc +++ b/mindspore/ccsrc/pipeline/pipeline.cc @@ -38,6 +38,7 @@ #include "parallel/graph_util/get_parallel_info.h" #include "device/kernel_runtime_manager.h" #include "debug/trace.h" +#include "pynative/pynative_execute.h" #if (ENABLE_GE || ENABLE_D) #include "pipeline/pipeline_ge.h" @@ -101,7 +102,7 @@ py::tuple GenerateKey(const std::string &name, const std::unordered_mapresource->func_graph(); + MS_EXCEPTION_IF_NULL(func_graph); + if (phase_s.empty()) { + MS_LOG(ERROR) << "`phase` is empty '" << phase_s << "'!"; + return; + } + std::string name_prefix = phase_s.substr(0, phase_s.find(".")); + std::string pb_filename = std::string("ms_output_") + name_prefix + ".pb"; + std::string filename = GetFilePathName(pb_filename); + + MS_LOG(INFO) << "Begin saving graph to file <<'" << filename << "' in protobuf formart."; + ChangeFileMode(filename, S_IRWXU); + std::ofstream ofs(filename); + if (!ofs.is_open()) { + MS_LOG(ERROR) << "Open file '" << filename << "' failed!"; + return; + } + ofs << GetFuncGraphProtoString(func_graph); + ofs.close(); + // set file mode to read only by user + ChangeFileMode(filename, S_IRUSR); + MS_LOG(INFO) << "End saving graph to file in protobuf format"; +#endif +} + bool ExecutorPy::ChangeExportGeirUseVmFlag(bool use_vm, const std::string &phase_s) const { std::string phase_prefix = GetPhasePrefix(phase_s); @@ -309,7 +338,7 @@ void ExecutorPy::GetGeBackendPolicy() const { MS_EXCEPTION_IF_NULL(ms_context); std::string backend = ms_context->backend_policy(); if (backend != "ge") { - MS_LOG(EXCEPTION) << "" << backend << " backend policy is not supported under ge backend!"; + MS_LOG(EXCEPTION) << backend << " backend policy is not supported under ge backend!"; } } @@ -365,6 +394,8 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons info_[phase_s] = executor_info; pip->Run(); + // save compile graph to file in protobuf format + SaveCompiledGraphToPb(phase_s); // save the run graph func to MsPipeLine SaveCompiledGraph(phase_s); @@ -465,10 +496,10 @@ void RunPipelineAction(const ActionItem &action, pipeline::ResourcePtr resource, // load MindSpore IR from file if (action.first == "symbol_resolve") { - MS_LOG(DEBUG) << "" << action.first << " read ir file: " << ir_file; + MS_LOG(DEBUG) << action.first << " read ir file: " << ir_file; std::vector graphs = ImportIR(ir_file); if (graphs.size() == 0) { - MS_LOG(EXCEPTION) << "" << action.first << " read ir file " << ir_file << " failed as no graph found"; + MS_LOG(EXCEPTION) << action.first << " read ir file " << ir_file << " failed as no graph found"; } auto manager = resource->manager(); MS_EXCEPTION_IF_NULL(manager); @@ -557,20 +588,6 @@ void Pipeline::Run() { std::string user_graph_file = GetFilePathName("ModelDigraph.dot"); MS_LOG(DEBUG) << "Save user graph to: " << user_graph_file; draw::DrawUserFuncGraph(user_graph_file, user_graph); - -#ifdef ENABLE_DUMP_IR - std::string filename = GetFilePathName("ms_output.pb"); - ChangeFileMode(filename, S_IRWXU); - std::ofstream ofs(filename); - if (!ofs.is_open()) { - MS_LOG(ERROR) << "Open file '" << filename << "' failed!"; - return; - } - ofs << GetFuncGraphProtoString(user_graph); - ofs.close(); - // set file mode to read only by user - ChangeFileMode(filename, S_IRUSR); -#endif } MS_LOG(INFO) << "End"; } @@ -668,6 +685,13 @@ bool InitExecDataset(const std::string &queue_name, int64_t iter_num, int64_t ba const std::vector &types, const std::vector> &shapes, const std::vector &input_indexes, const std::string &phase) { std::string name = MsContext::GetInstance()->backend_policy(); +#ifndef NO_DLIB + auto ms_context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(ms_context); + if (!ms_context->IsTsdOpened() || !ms_context->IsGeInited()) { + (void)InitBackend(); + } +#endif if (name == kMsConvert || name == kMsVm) { return InitExecDatasetVm(queue_name, iter_num, batch_size, types, shapes, input_indexes); } @@ -746,7 +770,7 @@ void ResetOpId() { mindspore::id_generator::reset_id(); } void InitHccl() { #ifdef ENABLE_GE - (void)InitGe(); + (void)InitBackend(); #else mindspore::parse::python_adapter::set_python_env_flag(true); auto ms_context = MsContext::GetInstance(); @@ -754,7 +778,7 @@ void InitHccl() { (void)ms_context->OpenTsd(); uint32_t device_id = ms_context->device_id(); std::string device_name = ms_context->device_target(); - + ms_context->set_enable_hccl(true); if (ms_context->backend_policy() == "ms" && ms_context->device_target() == kAscendDevice) { auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(device_name, device_id); MS_EXCEPTION_IF_NULL(runtime_instance); @@ -768,7 +792,7 @@ void InitHccl() { void FinalizeHccl() { #ifdef ENABLE_GE - (void)FinalizeGe(); + (void)FinalizeBackend(); #else device::KernelRuntimeManager::Instance().ClearRuntimeResource(); #endif @@ -789,7 +813,7 @@ void ReleaseGeTsd() { } } -void InitGe() { +void InitBackend() { // set python env flag mindspore::parse::python_adapter::set_python_env_flag(true); // open tsd before ge initialize @@ -801,7 +825,7 @@ void InitGe() { (void)ms_context->InitGe(); } -void FinalizeGe() { +void FinalizeBackend() { auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); (void)context_ptr->FinalizeGe(); @@ -810,6 +834,7 @@ void FinalizeGe() { void ClearResAtexit() { MS_LOG(DEBUG) << "Pipeline clear all resource"; + pynative::ClearPyNativeSession(); device::KernelRuntimeManager::Instance().ClearRuntimeResource(); ad::g_k_prims.clear(); diff --git a/mindspore/ccsrc/pipeline/pipeline.h b/mindspore/ccsrc/pipeline/pipeline.h index 865c961ac1..6a99d4dbcd 100644 --- a/mindspore/ccsrc/pipeline/pipeline.h +++ b/mindspore/ccsrc/pipeline/pipeline.h @@ -70,6 +70,7 @@ class ExecutorPy : public std::enable_shared_from_this { ~ExecutorPy(); void SaveCompiledGraph(const std::string &phase_s); + void SaveCompiledGraphToPb(const std::string &phase_s); bool CompileInner(const py::object &obj, const py::tuple &args, const py::object &phase, bool use_vm); bool Compile(const py::object &obj, const py::tuple &args, const py::object &phase, bool use_vm); @@ -115,8 +116,8 @@ bool InitDistribute(const std::map &options); void ResetOpId(); void InitHccl(); void FinalizeHccl(); -void InitGe(); -void FinalizeGe(); +void InitBackend(); +void FinalizeBackend(); void ClearResAtexit(); void ReleaseGeTsd(); diff --git a/mindspore/ccsrc/pipeline/pipeline_ge.cc b/mindspore/ccsrc/pipeline/pipeline_ge.cc index 1da85b5699..c442fba931 100644 --- a/mindspore/ccsrc/pipeline/pipeline_ge.cc +++ b/mindspore/ccsrc/pipeline/pipeline_ge.cc @@ -52,11 +52,11 @@ void DoExecNonInputGraph(const std::string &phase) { transform::RunOptions run_options; run_options.name = phase; auto graph_runner = DfGraphManager::GetInstance().GetGraphRunner(); - if (graph_runner == nullptr) { MS_LOG(ERROR) << "Can not found GraphRunner"; return; } + { // Release GIL before calling into (potentially long-running) C++ code py::gil_scoped_release release; @@ -181,7 +181,6 @@ bool AddDFGraph(const std::map &info, const py::di size_t pos = phase.find('.'); std::string net_id = ((pos == std::string::npos || pos == phase.size() - 1) ? phase : phase.substr(pos + 1)); std::string phase_prefix = phase.substr(0, pos); - if (phase_prefix == "export") { MS_LOG(INFO) << "Set DfGraphConvertor training : false"; convertor.set_training(false); @@ -319,19 +318,24 @@ void RunGEInitGraph(const py::dict &init_params, const std::string &phase) { py::object ExtractGeneralCnodeRet(const AbstractBasePtr &cnode_data, const py::tuple &data, size_t *count) { MS_EXCEPTION_IF_NULL(cnode_data); - if (*count >= data.size()) { - MS_LOG(EXCEPTION) << "The number of elements in the outputs : " << data.size() - << " less than the number of elements required. "; - } if (cnode_data->isa()) { + if (*count >= data.size()) { + MS_LOG(EXCEPTION) << "The number of elements in the outputs : " << data.size() + << " less than the number of elements required. "; + } + BaseShapePtr shape = cnode_data->BuildShape(); - auto shape_act = shape->cast()->shape(); - Tensor tensor_exp = py::cast(data[*count]); - if (shape_act != tensor_exp.shape()) { - MS_LOG(EXCEPTION) << "The shape of the tensor returned from GE is not the same as " - "the shape of the tensor derived from ME."; + if (!shape->isa()) { + MS_LOG(EXCEPTION) << "The shape of the tensor derived is not Shape, is " << shape->ToString(); } + auto shape_me = shape->cast()->shape(); + auto shape_ge = py::cast(data[*count]).shape(); + if (shape_ge != shape_me) { + MS_LOG(EXCEPTION) << "The shape of the " << *count << "th tensor returned: " << shape_ge + << " is not the same as the shape of the tensor derived: " << shape_me; + } + return data[(*count)++]; } @@ -343,7 +347,7 @@ py::object ExtractGeneralCnodeRet(const AbstractBasePtr &cnode_data, const py::t auto data_tp = cnode_data->cast(); auto elements = data_tp->elements(); size_t size = data_tp->size(); - py::tuple tp = py::tuple(size); + auto tp = py::tuple(size); for (size_t i = 0; i < size; i++) { tp[i] = ExtractGeneralCnodeRet(elements[i], data, count); } @@ -357,11 +361,11 @@ py::object StructureOutput(const AnfNodePtr &output_node, const py::tuple &data, return ValuePtrToPyData(GetValueNode(output_node)); } - if (*count >= data.size()) { - MS_LOG(EXCEPTION) << "The number of elements in the outputs : " << data.size() - << " less than the number of elements required. "; - } if (output_node->isa()) { + if (*count >= data.size()) { + MS_LOG(EXCEPTION) << "The number of elements in the outputs : " << data.size() + << " less than the number of elements required. "; + } return data[(*count)++]; } @@ -374,7 +378,7 @@ py::object StructureOutput(const AnfNodePtr &output_node, const py::tuple &data, if (output_c->IsApply(prim::kPrimMakeTuple)) { auto input_list = output_c->inputs(); size_t size = input_list.size(); - py::tuple tp = py::tuple(size - 1); + auto tp = py::tuple(size - 1); for (size_t i = 1; i < size; i++) { tp[i - 1] = StructureOutput(input_list[i], data, count); } @@ -396,11 +400,8 @@ std::shared_ptr DoExecGraph(const FuncGraphPtr &graph, const std::ve std::vector ge_outputs; transform::RunOptions run_options; - run_options.name = phase; - auto graph_runner = DfGraphManager::GetInstance().GetGraphRunner(); - if (graph_runner == nullptr) { MS_LOG(EXCEPTION) << "Can not found GraphRunner."; } @@ -464,7 +465,7 @@ void ProcessGeArg(const std::map &info, const py:: if (converted->isa()) { inputs->push_back(converted->cast()); } else { - MS_LOG(EXCEPTION) << "Args " << converted->ToString() << " is not tensor"; + MS_EXCEPTION(TypeError) << "Args " << converted->ToString() << " is not tensor"; } } } @@ -473,7 +474,6 @@ void ProcessGeArg(const std::map &info, const py:: py::object ExecDFGraph(const std::map &info, const py::tuple &args, const std::string &phase) { std::string phase_prefix = GetPhasePrefix(phase); - if (phase_prefix == "save") { DoExecNonInputGraph(phase); ConfigManager::GetInstance().ResetConfig(); @@ -483,7 +483,6 @@ py::object ExecDFGraph(const std::map &info, const if (info.count(phase) == 0) { MS_LOG(EXCEPTION) << "There is no phase:" << phase; } - FuncGraphPtr anf_graph = info.at(phase)->func_graph; #ifdef ENABLE_INFER diff --git a/mindspore/ccsrc/pipeline/pipeline_ge.h b/mindspore/ccsrc/pipeline/pipeline_ge.h index 9dc1524682..f3a363dbe8 100644 --- a/mindspore/ccsrc/pipeline/pipeline_ge.h +++ b/mindspore/ccsrc/pipeline/pipeline_ge.h @@ -31,7 +31,6 @@ namespace mindspore { namespace pipeline { - namespace py = pybind11; void SetGeOption(const std::map &options); @@ -50,7 +49,6 @@ bool InitExecDatasetGe(const std::string &queue_name, int64_t size, int64_t batc const std::vector &input_indexes, const std::string &phase); void ExportDFGraph(const std::string &file_name, const std::string &phase); - } // namespace pipeline } // namespace mindspore diff --git a/mindspore/ccsrc/pipeline/static_analysis/abstract_function.h b/mindspore/ccsrc/pipeline/static_analysis/abstract_function.h index 133d5e99a9..513b290a9d 100644 --- a/mindspore/ccsrc/pipeline/static_analysis/abstract_function.h +++ b/mindspore/ccsrc/pipeline/static_analysis/abstract_function.h @@ -41,7 +41,7 @@ class AbstractFuncAtom : public AbstractFunction { AbstractFunctionPtr Join(const AbstractFunctionPtr &other) final; void Visit(std::function) const final; - bool operator==(const AbstractFunction &other) const; + bool operator==(const AbstractFunction &other) const override; std::size_t hash() const override { return tid(); } }; @@ -270,7 +270,7 @@ class TypedPrimitiveAbstractClosure : public AbstractFuncAtom { class DummyAbstractClosure : public AbstractFuncAtom { public: DummyAbstractClosure() = default; - ~DummyAbstractClosure() = default; + ~DummyAbstractClosure() override = default; MS_DECLARE_PARENT(DummyAbstractClosure, AbstractFuncAtom) EvaluatorPtr GetEvaluator(AnalysisEnginePtr) override { MS_LOG(EXCEPTION) << "A dummy function cannot eval."; } diff --git a/mindspore/ccsrc/pipeline/static_analysis/abstract_value.cc b/mindspore/ccsrc/pipeline/static_analysis/abstract_value.cc index 210257ea53..f46532ed43 100644 --- a/mindspore/ccsrc/pipeline/static_analysis/abstract_value.cc +++ b/mindspore/ccsrc/pipeline/static_analysis/abstract_value.cc @@ -997,6 +997,9 @@ bool AbstractBasePtrListDeepEqual(const AbstractBasePtrList &lhs, const Abstract for (std::size_t i = 0; i < size; i++) { MS_EXCEPTION_IF_NULL(lhs[i]); MS_EXCEPTION_IF_NULL(rhs[i]); + if (lhs[i] == rhs[i]) { + continue; + } if (!(*lhs[i] == *rhs[i])) { return false; } diff --git a/mindspore/ccsrc/pipeline/static_analysis/analysis_context.cc b/mindspore/ccsrc/pipeline/static_analysis/analysis_context.cc index aeaa6b17f8..39a1da5e0f 100644 --- a/mindspore/ccsrc/pipeline/static_analysis/analysis_context.cc +++ b/mindspore/ccsrc/pipeline/static_analysis/analysis_context.cc @@ -23,6 +23,24 @@ namespace mindspore { namespace abstract { +AnalysisContextPtr AnalysisContext::NewContext(AnalysisContextPtr parent, FuncGraphPtr fg, + const AbstractBasePtrList &args_spec_list) { + auto children_context_map_iter = parent->children_cache_.find(fg); + if (children_context_map_iter != parent->children_cache_.end()) { + auto children_context_map = children_context_map_iter->second; + auto children_context_iter = children_context_map.find(args_spec_list); + if (children_context_iter != children_context_map.end()) { + return children_context_iter->second.lock(); + } + } + AnalysisContextPtr context_new = std::make_shared(parent, fg, args_spec_list); + // Reference to myself, so use weak_ptr to break reference cycle. + auto weak_context = std::weak_ptr(context_new); + context_new->parent_cache_[fg] = weak_context; + parent->children_cache_[fg][args_spec_list] = weak_context; + return context_new; +} + AnalysisContextPtr AnalysisContext::NewFuncGraphContext(const FuncGraphPtr &func_graph, const AbstractBasePtrList &args_spec_list) { FuncGraphPtr graph_parent = func_graph->parent(); @@ -78,7 +96,7 @@ AnalysisContextPtr AnalysisContext::Filter(const FuncGraphPtr &func_graph) { oss << ", context: " << iter.second.lock()->ToString() << "]"; } oss << "}"; - MS_LOG(EXCEPTION) << "" << oss.str() << " NodeInfo: " << trace::GetDebugInfo(func_graph->debug_info()); + MS_LOG(EXCEPTION) << oss.str() << " NodeInfo: " << trace::GetDebugInfo(func_graph->debug_info()); } return parent_context; } @@ -89,6 +107,13 @@ AnalysisContextPtr AnalysisContext::DummyContext() { return dummy_context; } +bool AnalysisContext::IsDummyContext() { + if (parent_ == nullptr && func_graph_ == nullptr && args_spec_list_.empty()) { + return true; + } + return false; +} + const AnalysisContextPtr kDummyAnalysisContext = std::make_shared(nullptr, nullptr, AbstractBasePtrList()); diff --git a/mindspore/ccsrc/pipeline/static_analysis/analysis_context.h b/mindspore/ccsrc/pipeline/static_analysis/analysis_context.h index 0fb043674c..c0b3403702 100644 --- a/mindspore/ccsrc/pipeline/static_analysis/analysis_context.h +++ b/mindspore/ccsrc/pipeline/static_analysis/analysis_context.h @@ -28,6 +28,11 @@ namespace mindspore { namespace abstract { +class AnalysisContext; +using AnalysisContextWeakPtr = std::weak_ptr; +using ArgsSpecToAnalysisContextMap = + std::unordered_map; + // AnalysisContext will be stored in Config in AnalysisCache. class AnalysisContext { public: @@ -41,12 +46,7 @@ class AnalysisContext { ~AnalysisContext() = default; // Helper function to wrapper constructor to save shared_ptr in parent_cache. - AnalysisContextPtr NewContext(AnalysisContextPtr parent, FuncGraphPtr fg, const AbstractBasePtrList &args_spec_list) { - AnalysisContextPtr context_new = std::make_shared(parent, fg, args_spec_list); - // Reference to myself, so use weak_ptr to break reference cycle. - context_new->parent_cache_[fg] = std::weak_ptr(context_new); - return context_new; - } + AnalysisContextPtr NewContext(AnalysisContextPtr parent, FuncGraphPtr fg, const AbstractBasePtrList &args_spec_list); // Extend this context with values for another graph. AnalysisContextPtr NewFuncGraphContext(const FuncGraphPtr &func_graph, const AbstractBasePtrList &args_spec_list); @@ -56,6 +56,7 @@ class AnalysisContext { bool operator==(const AnalysisContext &other) const; std::size_t hash(); static AnalysisContextPtr DummyContext(); + bool IsDummyContext(); FuncGraphPtr func_graph() const { return func_graph_; } AnalysisContextPtr parent() const { return parent_; } std::string ToString() const; @@ -66,7 +67,8 @@ class AnalysisContext { AnalysisContextPtr parent_; FuncGraphPtr func_graph_; AbstractBasePtrList args_spec_list_; - std::unordered_map> parent_cache_; + std::unordered_map parent_cache_; + std::unordered_map children_cache_; }; struct ContextHasher { diff --git a/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc b/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc index 5bad1634d5..06d61292d7 100644 --- a/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc +++ b/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc @@ -33,8 +33,7 @@ void InferEntryLogging(const EvaluatorPtr &evaluator, const AbstractBasePtrList MS_LOG(DEBUG) << "Evaluator " << evaluator->ToString() << " run for " << out_conf->node()->scope()->name(); } for (size_t i = 0; i < arg_spec_list.size(); i++) { - MS_LOG(DEBUG) << "" << evaluator->ToString() << " input[" << i - << "] abstract value: " << arg_spec_list[i]->ToString(); + MS_LOG(DEBUG) << evaluator->ToString() << " input[" << i << "] abstract value: " << arg_spec_list[i]->ToString(); } } @@ -94,11 +93,9 @@ AbstractBasePtr BaseFuncGraphEvaluator::Infer(AnalysisEnginePtr engine, const Ab MS_EXCEPTION_IF_NULL(fg); std::size_t nargs = fg->parameters().size(); if (args_spec_list.size() != nargs) { - MS_LOG(EXCEPTION) << "Function " << fg->ToString() << ", The number of parameters of this function is " - << fg->parameters().size() - << "," - " but the number of provided arguments is " - << args_spec_list.size() << ". NodeInfo: " << trace::GetDebugInfo(fg->debug_info()); + MS_EXCEPTION(TypeError) << "Function " << fg->ToString() << ", The number of parameters of this function is " + << fg->parameters().size() << ", but the number of provided arguments is " + << args_spec_list.size() << ". NodeInfo: " << trace::GetDebugInfo(fg->debug_info()); } MS_EXCEPTION_IF_NULL(parent_context_); MS_EXCEPTION_IF_NULL(engine); @@ -139,7 +136,7 @@ AbstractBasePtrList FuncGraphEvaluator::NormalizeArgs(const AbstractBasePtrList MS_EXCEPTION_IF_NULL(arg); return arg->Broaden(); }); - MS_LOG(DEBUG) << "" << func_graph_->ToString() << " original: " << mindspore::ToString(args_spec_list) + MS_LOG(DEBUG) << func_graph_->ToString() << " original: " << mindspore::ToString(args_spec_list) << ", broaded: " << mindspore::ToString(broaded_list); return broaded_list; } @@ -232,20 +229,20 @@ AbstractBasePtr Evaluator::Run(AnalysisEnginePtr engine, const ConfigPtrList &ar MS_EXCEPTION_IF_NULL(cache_); auto iter = cache_->find(args_spec_list); if (iter == cache_->end()) { - MS_LOG(DEBUG) << "" << evaluator_name << " cache miss, call Infer()."; + MS_LOG(DEBUG) << evaluator_name << " cache miss, call Infer()."; AbstractBasePtr ret = Infer(engine, args_spec_list); if (ret == nullptr) { InferFailLogging(shared_from_base(), args_spec_list, out_conf); MS_LOG(EXCEPTION) << "Evaluator " << evaluator_name << " result is nullptr."; } MS_EXCEPTION_IF_NULL(ret); - MS_LOG(DEBUG) << "" << evaluator_name << " set cache. return: " << ret->ToString() << "."; + MS_LOG(DEBUG) << evaluator_name << " set cache. return: " << ret->ToString() << "."; (*cache_)[args_spec_list] = ret; trace::TraceGraphInferLeave(shared_from_base()); return ret; } else { MS_EXCEPTION_IF_NULL(iter->second); - MS_LOG(DEBUG) << "" << evaluator_name << " cache hit. return: " << iter->second->ToString() << "."; + MS_LOG(DEBUG) << evaluator_name << " cache hit. return: " << iter->second->ToString() << "."; trace::TraceGraphInferLeave(shared_from_base()); return iter->second; } @@ -260,7 +257,6 @@ AbstractBasePtr TrivialPrimEvaluator::Run(AnalysisEnginePtr engine, const Config return conf->GetEvaluatedValue(); }); AbstractBasePtr ret = EvalPrim(engine, args_spec_list); - (*cache_)[args_spec_list] = ret; return ret; } diff --git a/mindspore/ccsrc/pipeline/static_analysis/param_validator.cc b/mindspore/ccsrc/pipeline/static_analysis/param_validator.cc index 69f6af0dc0..2cbd33c162 100644 --- a/mindspore/ccsrc/pipeline/static_analysis/param_validator.cc +++ b/mindspore/ccsrc/pipeline/static_analysis/param_validator.cc @@ -103,7 +103,7 @@ ShapePtr CheckShapeSame(const std::string &op, const AbstractTensorPtr &tensor_b ShapePtr shape_base = tensor_base->shape(); ShapePtr shape = tensor->shape(); if (*shape != *shape_base) { - MS_LOG(EXCEPTION) << "" << op << " evaluator first arg shape " << tensor->shape()->ToString() + MS_LOG(EXCEPTION) << op << " evaluator first arg shape " << tensor->shape()->ToString() << " are not consistent with second arg shape " << tensor_base->shape()->ToString(); } return shape_base; @@ -113,7 +113,7 @@ TypePtr CheckDtypeSame(const std::string &op, const AbstractTensorPtr &tensor_ba TypePtr type_base = tensor_base->element()->BuildType(); TypePtr type = tensor->element()->BuildType(); if (*type != *type_base) { - MS_LOG(EXCEPTION) << "" << op << " evaluator first arg dtype " << type_base->ToString() + MS_LOG(EXCEPTION) << op << " evaluator first arg dtype " << type_base->ToString() << " are not consistent with second arg dtype " << type->ToString(); } return type_base; @@ -121,14 +121,14 @@ TypePtr CheckDtypeSame(const std::string &op, const AbstractTensorPtr &tensor_ba int CheckAxis(const std::string &op, const ValuePtr &axis, int minimum, int max) { if (axis == nullptr) { - MS_LOG(EXCEPTION) << "" << op << " evaluator axis is null"; + MS_LOG(EXCEPTION) << op << " evaluator axis is null"; } if (!axis->isa()) { - MS_LOG(EXCEPTION) << "" << op << " evaluator axis should be int, but got " << axis->type_name(); + MS_LOG(EXCEPTION) << op << " evaluator axis should be int, but got " << axis->type_name(); } int axis_value = GetValue(axis); if (axis_value > max || axis_value < minimum) { - MS_LOG(EXCEPTION) << "" << op << " evaluator axis value should be in the range [" << minimum << ", " << max + MS_LOG(EXCEPTION) << op << " evaluator axis value should be in the range [" << minimum << ", " << max << "], but get " << axis_value; } return axis_value; @@ -136,8 +136,7 @@ int CheckAxis(const std::string &op, const ValuePtr &axis, int minimum, int max) void CheckArgsSize(const std::string &op, const mindspore::abstract::AbstractBasePtrList &args_spec_list, size_t size_expect) { if (args_spec_list.size() != size_expect) { - MS_LOG(EXCEPTION) << "" << op << " input args size should be " << size_expect << ", but got " - << args_spec_list.size(); + MS_LOG(EXCEPTION) << op << " input args size should be " << size_expect << ", but got " << args_spec_list.size(); } for (size_t i = 0; i < size_expect; i++) { diff --git a/mindspore/ccsrc/pipeline/static_analysis/param_validator.h b/mindspore/ccsrc/pipeline/static_analysis/param_validator.h index 5904c7e67a..ecb9529a58 100644 --- a/mindspore/ccsrc/pipeline/static_analysis/param_validator.h +++ b/mindspore/ccsrc/pipeline/static_analysis/param_validator.h @@ -70,7 +70,7 @@ ABSTRACT_REPORT_NAME_TRAITS(Class) template std::shared_ptr CheckArg(const std::string &op, const AbstractBasePtrList &args_spec_list, size_t index) { if (index >= args_spec_list.size()) { - MS_EXCEPTION(ValueError) << "" << op << " evaluator args list index out of bound, size " << args_spec_list.size() + MS_EXCEPTION(ValueError) << op << " evaluator args list index out of bound, size " << args_spec_list.size() << ", index " << index; } auto arg = dyn_cast(args_spec_list[index]); diff --git a/mindspore/ccsrc/pipeline/static_analysis/prim.cc b/mindspore/ccsrc/pipeline/static_analysis/prim.cc index 233d5df305..274f63844c 100644 --- a/mindspore/ccsrc/pipeline/static_analysis/prim.cc +++ b/mindspore/ccsrc/pipeline/static_analysis/prim.cc @@ -289,6 +289,21 @@ py::dict ConvertAbstractToPython(const AbstractBasePtr &abs_base) { dic["shape"] = shape; dic["dtype"] = abs_base->BuildType(); dic["value"] = BuildValue(abs_base->BuildValue()); + } else if (abs_base->isa()) { + auto arg_slice = dyn_cast(abs_base); + std::vector shape; + dic["shape"] = shape; + dic["dtype"] = arg_slice->BuildType(); + dic["value"] = BuildValue(arg_slice->BuildValue()); + } else if (abs_base->isa()) { + auto value = abs_base->cast()->ref(); + dic = ConvertAbstractToPython(value); + } else if (abs_base->isa()) { + auto arg_slice = dyn_cast(abs_base); + std::vector shape; + dic["shape"] = shape; + dic["dtype"] = arg_slice->BuildType(); + dic["value"] = BuildValue(arg_slice->BuildValue()); } else if (abs_base->isa()) { auto arg_tuple = dyn_cast(abs_base); size_t len = arg_tuple->size(); @@ -321,6 +336,10 @@ py::dict ConvertAbstractToPython(const AbstractBasePtr &abs_base) { dic["shape"] = py::none(); dic["dtype"] = py::none(); dic["value"] = py::none(); + } else if (abs_base->isa()) { + dic["shape"] = py::none(); + dic["dtype"] = abs_base->BuildType(); + dic["value"] = py::none(); } else { auto value = abs_base->BuildValue(); if ((*value == *kAnyValue)) { @@ -398,6 +417,10 @@ AbstractBasePtr PyInferRes2Abstract(const PrimitivePyPtr &prim_py, const py::dic AbstractBasePtr PythonPrimEvaluator::EvalPrim(const AnalysisEnginePtr &, const AbstractBasePtrList &args) { MS_LOG(DEBUG) << "Eval for:" << prim_py_->ToString(); + const auto &iter = cache_->find(args); + if (iter != cache_->end()) { + return iter->second; + } auto py_args = PreparePyInputs(prim_py_, args); auto pyobj = prim_py_->GetPyObj(); @@ -411,6 +434,7 @@ AbstractBasePtr PythonPrimEvaluator::EvalPrim(const AnalysisEnginePtr &, const A auto res_spec = PyInferRes2Abstract(prim_py_, output); MS_LOG(DEBUG) << "Python InferTensor result spec: " << res_spec->ToString() << "."; + (*cache_)[args] = res_spec; return res_spec; } diff --git a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc b/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc index 6230df44a5..c5ee7447f1 100644 --- a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc +++ b/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc @@ -87,7 +87,10 @@ AbstractBasePtr AnalysisCache::GetValue(const AnfNodeConfigPtr &conf) { std::size_t AnfNodeConfigHasher::operator()(const AnfNodeConfigPtr conf) const { MS_EXCEPTION_IF_NULL(conf); MS_EXCEPTION_IF_NULL(conf->node()); - std::size_t hash_value = hash_combine(conf->node()->hash(), conf->context()->hash()); + std::size_t hash_value = conf->node()->hash(); + if (!conf->context()->IsDummyContext()) { + hash_value = hash_combine(hash_value, std::hash{}(conf->context().get())); + } if (conf->context() != nullptr && conf->context()->func_graph() != nullptr) { MS_LOG(DEBUG) << "NodeConfigHasher Node: " << conf->node()->DebugString() << ", Graph: " << conf->context()->func_graph()->ToString() << " ### , hash value: " << hash_value; @@ -122,7 +125,7 @@ AnalysisResult AnalysisEngine::Run(const FuncGraphPtr &func_graph, const Abstrac MS_EXCEPTION_IF_NULL(root_context->func_graph()); AnfNodeConfigPtr output_conf = MakeConfig(root_context->func_graph()->get_return(), root_context); MS_EXCEPTION_IF_NULL(func_graph); - MS_LOG(INFO) << "" << func_graph->ToString() << ": Run finished."; + MS_LOG(INFO) << func_graph->ToString() << ": Run finished."; AnalysisResult result; MS_EXCEPTION_IF_NULL(output_conf); @@ -167,7 +170,7 @@ AbstractBasePtr AnalysisEngine::Eval(const AnfNodeConfigPtr &conf) { for (auto iter : compute_conf_stack_) { buffer << " -> " << iter->DebugString(); } - MS_LOG(DEBUG) << "" << buffer.str(); + MS_LOG(DEBUG) << buffer.str(); #endif MS_LOG(DEBUG) << "Begin Eval NodeConfig " << conf->ToString(); MS_EXCEPTION_IF_NULL(node); @@ -271,6 +274,18 @@ void AnalysisEngine::ClearEvaluatorCache() { MS_EXCEPTION_IF_NULL(evaluator->cache()); evaluator->cache()->clear(); } + for (auto &element : prim_constructors_) { + EvaluatorPtr evaluator = element.second; + MS_EXCEPTION_IF_NULL(evaluator); + MS_EXCEPTION_IF_NULL(evaluator->cache()); + evaluator->cache()->clear(); + } + for (auto &element : prim_py_evaluators_) { + EvaluatorPtr evaluator = element.second; + MS_EXCEPTION_IF_NULL(evaluator); + MS_EXCEPTION_IF_NULL(evaluator->cache()); + evaluator->cache()->clear(); + } } void AnalysisEngine::Clear() { @@ -296,7 +311,17 @@ EvaluatorPtr GetPrimEvaluator(const PrimitivePtr &prim, const AnalysisEnginePtr if (prim->HasPyEvaluator()) { auto prim_py = dyn_cast(prim); if (prim_py != nullptr) { - return std::make_shared(prim_py); + if (engine == nullptr) { + return std::make_shared(prim_py); + } + + const auto &iter = engine->prim_py_evaluators_.find(prim_py); + if (iter != engine->prim_py_evaluators_.end()) { + return iter->second; + } + evaluator = std::make_shared(prim_py); + engine->prim_py_evaluators_[prim_py] = evaluator; + return evaluator; } MS_LOG(EXCEPTION) << "The primitive with python evaluator should be a python primitive."; } diff --git a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.h b/mindspore/ccsrc/pipeline/static_analysis/static_analysis.h index ef4f78e619..beffb9ee70 100644 --- a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.h +++ b/mindspore/ccsrc/pipeline/static_analysis/static_analysis.h @@ -83,9 +83,12 @@ class AnfNodeConfig : public Config { // used by unordered_map; bool operator==(const AnfNodeConfig &other) const { - // compare node with pointer, context with content; + // compare node with pointer, context with pointer except DummyContext as it's created by make_shared; // context should not be nullptr; - return (node_ == other.node_) && (*context_ == *other.context_); + if (context_->IsDummyContext() && other.context_->IsDummyContext()) { + return true; + } + return (node_ == other.node_) && (context_ == other.context_); } std::string ToString() const override { @@ -194,6 +197,7 @@ class AnalysisEngine : public std::enable_shared_from_this { const PrimEvaluatorMap &PrimConstructors() const { return prim_constructors_; } AnalysisCache cache_; + std::unordered_map prim_py_evaluators_; private: const PrimEvaluatorMap &prim_constructors_; diff --git a/mindspore/ccsrc/pipeline/static_analysis/utils.cc b/mindspore/ccsrc/pipeline/static_analysis/utils.cc index 997a089301..4c399f6ffc 100644 --- a/mindspore/ccsrc/pipeline/static_analysis/utils.cc +++ b/mindspore/ccsrc/pipeline/static_analysis/utils.cc @@ -175,7 +175,7 @@ std::vector RealBroadcast(const std::string &op, std::vector x_shape, output_i = x_i; } else { MS_LOG(EXCEPTION) - << "" << op + << op << " evaluator the shape of first tensor and the shape of second tensor do not meet the broadcasting " "requirements"; } diff --git a/mindspore/ccsrc/pre_activate/CMakeLists.txt b/mindspore/ccsrc/pre_activate/CMakeLists.txt new file mode 100644 index 0000000000..611b5de4e2 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/CMakeLists.txt @@ -0,0 +1,13 @@ +file(GLOB_RECURSE _PREACTIVATE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} + "common/*.cc" + "mem_reuse/*.cc" + "pass/*.cc" + "gpu/*.cc" +) + +if (ENABLE_D) + file(GLOB_RECURSE _D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "ascend/*.cc") + list(APPEND _PREACTIVATE_SRC_LIST ${_D_SRC_LIST}) +endif () + +add_library(_mindspore_pre_activate_obj OBJECT ${_PREACTIVATE_SRC_LIST}) \ No newline at end of file diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc index a2d82525e9..4294f48e47 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc @@ -19,9 +19,10 @@ #include "pre_activate/common/optimizer.h" #include "pre_activate/ascend/ir_fission/bn_split.h" #include "pre_activate/ascend/ir_fission/bn_grad_split.h" +#include "pre_activate/ascend/ir_fission/batch_norm_grad_split.h" #include "pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h" #include "pre_activate/ascend/ir_fission/layer_norm_grad_split.h" -#include "pre_activate/pass/allreduce_fusion.h" +#include "pre_activate/pass/communication_op_fusion.h" #include "pre_activate/ascend/ir_fusion/square_sum_fusion.h" #include "pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h" #include "pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.h" @@ -38,14 +39,16 @@ #include "pre_activate/ascend/ir_fusion/adam_apply_one_fusion.h" #include "pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.h" #include "pre_activate/ascend/ir_fusion/parameter_and_transop_fusion.h" +#include "pre_activate/ascend/ir_fusion/refresh_parameter_format.h" #include "pre_activate/ascend/ir_fusion/transpose_transdata_fusion.h" -#include "pre_activate/ascend/ir_fusion/transdata_split.h" +#include "pre_activate/ascend/ir_fission/transdata_split.h" #include "pre_activate/ascend/ir_fission/topk_split.h" #include "pre_activate/ascend/ir_fusion/momentum_lossscale_fusion.h" #include "pre_activate/ascend/ir_fusion/mul_add_fusion.h" #include "pre_activate/ascend/ir_fusion/mul_addn_fusion.h" #include "pre_activate/ascend/ir_fusion/matmul_biasadd_fusion.h" #include "pre_activate/ascend/ir_fusion/remove_reshape_pair.h" +#include "pre_activate/ascend/ir_fusion/derelu_fusion.h" #include "pre_activate/ascend/format_type/insert_trans_op.h" #include "pre_activate/pass/getitem_tuple.h" #include "pre_activate/pass/optimize_dependence.h" @@ -57,7 +60,7 @@ #include "pre_activate/ascend/format_type/check_consistency.h" #include "pre_activate/ascend/buffer_fusion/buffer_fusion.h" #include "pre_activate/ascend/format_type/deal_ref_trans_and_cast.h" -#include "pre_activate/ascend/ir_fission/add_memcpy_async.h" +#include "pre_activate/ascend/enhancer/add_memcpy_async.h" #include "pre_activate/ascend/format_type/insert_cast_for_runop.h" #include "pre_activate/ascend/format_type/insert_transdata_for_runop.h" #include "pre_activate/ascend/enhancer/getnext_memcpy_elimination.h" @@ -85,7 +88,6 @@ void AddAscendBackendOptionalIRFusion(PassManager *ir_fusion_pm) { ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); - ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); @@ -94,8 +96,9 @@ void AddAscendBackendOptionalIRFusion(PassManager *ir_fusion_pm) { ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); - ir_fusion_pm->AddPass(std::make_shared()); + ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); + ir_fusion_pm->AddPass(std::make_shared()); } } // namespace @@ -183,14 +186,15 @@ void AscendBackendIRFusionOptimization(const std::shared_ptrgraph_id()) + ".ir"; DumpIR(file_path, kernel_graph); DumpIRProto(kernel_graph, "before_hwopt"); } auto optimizer = std::make_shared(); auto ir_fusion_pm = std::make_shared("ir_fusion_pm"); - ir_fusion_pm->AddPass(std::make_shared()); - ir_fusion_pm->AddPass(std::make_shared()); + ir_fusion_pm->AddPass(std::make_shared()); + ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); if (context_ptr->ir_fusion_flag()) { AddAscendBackendOptionalIRFusion(ir_fusion_pm.get()); @@ -205,7 +209,8 @@ void AscendBackendIRFusionOptimization(const std::shared_ptrOptimize(kernel_graph); kernel_graph->SetExecOrderByDefault(); if (save_graphs) { - std::string file_path = save_graphs_path + "/" + "hwopt_d_ir_fusion_after.ir"; + std::string file_path = save_graphs_path + "/" + "hwopt_d_ir_fusion_after" + "_graph_" + + std::to_string(kernel_graph->graph_id()) + ".ir "; DumpIR(file_path, kernel_graph); } } @@ -249,7 +254,8 @@ void AscendBackendOptimization(const std::shared_ptr &kern save_graphs_path = "."; } if (save_graphs) { - std::string file_path = save_graphs_path + "/" + "hwopt_d_before.ir"; + std::string file_path = + save_graphs_path + "/" + "hwopt_d_before" + "_graph_" + std::to_string(kernel_graph->graph_id()) + ".ir"; DumpIR(file_path, kernel_graph); } // data layout optimization @@ -261,7 +267,9 @@ void AscendBackendOptimization(const std::shared_ptr &kern auto optimizer = std::make_shared(); auto other_pm = std::make_shared("other_pm"); other_pm->AddPass(std::make_shared()); + other_pm->AddPass(std::make_shared()); other_pm->AddPass(std::make_shared()); + other_pm->AddPass(std::make_shared()); other_pm->AddPass(std::make_shared()); other_pm->AddPass(std::make_shared()); other_pm->AddPass(std::make_shared()); @@ -273,7 +281,8 @@ void AscendBackendOptimization(const std::shared_ptr &kern (void)optimizer->Optimize(kernel_graph); kernel_graph->SetExecOrderByDefault(); if (save_graphs) { - std::string file_path = save_graphs_path + "/" + "hwopt_d_end.ir"; + std::string file_path = + save_graphs_path + "/" + "hwopt_d_end" + "_graph_" + std::to_string(kernel_graph->graph_id()) + ".ir"; DumpIR(file_path, kernel_graph, true); DumpIRProto(kernel_graph, "after_hwopt"); } diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.h b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.h index fcd9c15c58..65e70def85 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.h +++ b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.h @@ -26,7 +26,6 @@ void AscendDataLayout(const std::shared_ptr &kernel_graph) void AscendMixPrecision(const std::shared_ptr &kernel_graph); void AscendBackendOptimization(const std::shared_ptr &kernel_graph); void AscendBackendIRFusionOptimization(const std::shared_ptr &kernel_graph); -void RunOpAscendBackendIRFusionOptimization(const std::shared_ptr &kernel_graph); } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_helper.h b/mindspore/ccsrc/pre_activate/ascend/ascend_helper.h index a8fd7dc514..1840966358 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ascend_helper.h +++ b/mindspore/ccsrc/pre_activate/ascend/ascend_helper.h @@ -21,6 +21,7 @@ #include #include "device/ascend/kernel_select_ascend.h" #include "kernel/kernel_query.h" +#include "kernel/tbe/tbe_kernel_select.h" namespace mindspore { namespace opt { @@ -36,6 +37,16 @@ class KernelSelect { }; using KernelSelectPtr = std::shared_ptr; +class SupportedChecker { + public: + SupportedChecker() = default; + virtual ~SupportedChecker() = default; + virtual bool CheckSupported(const AnfNodePtr &anf_node, const kernel::KernelBuildInfoPtr &select_kernel_build_info) { + return kernel::CheckSupported(anf_node, select_kernel_build_info); + } +}; +using SupportedCheckerPtr = std::shared_ptr; + class KernelQuery { public: KernelQuery() = default; diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc index 58b8a93516..a2313a50d0 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc @@ -17,12 +17,14 @@ #include #include +#include #include #include #include #include #include #include +#include #include "kernel/kernel_fusion.h" #include "debug/anf_ir_dump.h" @@ -260,33 +262,32 @@ CNodePtr CreateFusionOp(const std::vector &inputs_list, const std::v return buffer_fusion_kernel; } -kernel::KernelBuildInfoPtr CreateFusionOpKernelInfo(const std::vector &inputs_list_in, - const std::vector &inputs_list, +kernel::KernelBuildInfoPtr CreateFusionOpKernelInfo(const std::vector &inputs_list, const std::vector &outputs_list) { MS_LOG(DEBUG) << "Start Create Kernel Info"; kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; // inputs format and data type std::vector inputs_format; std::vector inputs_data_type; - for (auto node : inputs_list_in) { - auto cnode = node->cast(); - MS_EXCEPTION_IF_NULL(cnode); - auto &inputs = cnode->inputs(); - for (size_t input_index = 1; input_index < inputs.size(); ++input_index) { - if (std::find(inputs_list.begin(), inputs_list.end(), inputs[input_index]) != inputs_list.end()) { - inputs_format.push_back(AnfAlgo::GetInputFormat(node, input_index - 1)); - inputs_data_type.push_back(AnfAlgo::GetInputDeviceDataType(node, input_index - 1)); - } - } + for (const auto &input : inputs_list) { + auto real_input = AnfAlgo::VisitKernel(input, 0); + inputs_format.push_back(AnfAlgo::GetOutputFormat(real_input.first, real_input.second)); + inputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(real_input.first, real_input.second)); } // outputs format and data type std::vector outputs_format; std::vector outputs_data_type; - for (size_t index = 0; index < outputs_list.size(); ++index) { - for (size_t idx = 0; idx < AnfAlgo::GetOutputTensorNum(outputs_list[index]); ++idx) { - auto kernel_with_index = AnfAlgo::VisitKernel(outputs_list[index], idx); - outputs_format.push_back(AnfAlgo::GetOutputFormat(kernel_with_index.first, kernel_with_index.second)); - outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(kernel_with_index.first, kernel_with_index.second)); + for (const auto &output : outputs_list) { + if (AnfAlgo::GetCNodeName(output) == prim::kPrimTupleGetItem->name()) { + auto tuple_getitem = output->cast(); + MS_EXCEPTION_IF_NULL(tuple_getitem); + outputs_format.push_back(AnfAlgo::GetOutputFormat( + tuple_getitem->input(1), IntToSize(GetValue(GetValueNode(tuple_getitem->input(2)))))); + outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType( + tuple_getitem->input(1), IntToSize(GetValue(GetValueNode(tuple_getitem->input(2)))))); + } else { + outputs_format.push_back(AnfAlgo::GetOutputFormat(output, 0)); + outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(output, 0)); } } builder.SetInputsFormat(inputs_format); @@ -320,140 +321,234 @@ AnfNodePtr CreateTupleGetItem(const AnfNodePtr &buffer_fusion_kernel, session::K return tuple_item; } -void ReplaceOldNode(const std::vector &outputs_list, const AnfNodePtr &buffer_fusion_kernel, - session::KernelGraph *kernel_graph) { +void ReplaceInputNodeInOtherFusionScope(std::unordered_map *buffer_fusion_infos, + int32_t fusion_id, const AnfNodePtr &output_item, + const AnfNodePtr &replace_item) { + for (int32_t id = fusion_id + 1; id <= SizeToInt(buffer_fusion_infos->size()); ++id) { + auto itr = std::find((*buffer_fusion_infos)[id].inputs_list.begin(), (*buffer_fusion_infos)[id].inputs_list.end(), + output_item); + if (itr != (*buffer_fusion_infos)[id].inputs_list.end()) { + MS_LOG(DEBUG) << "replace input of other pattern, id = " << id; + *itr = replace_item; + } + } +} + +void ReplaceOldNode(std::unordered_map *buffer_fusion_infos, int32_t fusion_id, + const AnfNodePtr &buffer_fusion_kernel, session::KernelGraph *kernel_graph) { MS_EXCEPTION_IF_NULL(kernel_graph); auto manager = kernel_graph->manager(); MS_EXCEPTION_IF_NULL(manager); - if (outputs_list.size() == 1) { // single output - (void)manager->Replace(outputs_list[0], buffer_fusion_kernel); + auto buffer_fusion_info = (*buffer_fusion_infos)[fusion_id]; + if (buffer_fusion_info.outputs_list.size() == 1) { // single output + (void)manager->Replace(buffer_fusion_info.outputs_list[0], buffer_fusion_kernel); + ReplaceInputNodeInOtherFusionScope(buffer_fusion_infos, fusion_id, buffer_fusion_info.outputs_list[0], + buffer_fusion_kernel); } else { // multiple output - size_t real_idx = 0; - for (size_t index = 0; index < outputs_list.size(); ++index) { - if (AnfAlgo::GetOutputTensorNum(outputs_list[index]) == 1) { - auto tuple_item = CreateTupleGetItem(buffer_fusion_kernel, kernel_graph, real_idx++); - (void)manager->Replace(outputs_list[index], tuple_item); - } else { - std::vector make_tuple_inputs; - AbstractBasePtrList abstract_list; - make_tuple_inputs.push_back(NewValueNode(prim::kPrimMakeTuple)); - for (size_t idx = 0; idx < AnfAlgo::GetOutputTensorNum(outputs_list[index]); ++idx) { - auto tuple_item = CreateTupleGetItem(buffer_fusion_kernel, kernel_graph, real_idx++); - abstract_list.push_back(tuple_item->abstract()); - make_tuple_inputs.push_back(tuple_item); + for (size_t index = 0; index < buffer_fusion_info.outputs_list.size(); ++index) { + auto tuple_item = CreateTupleGetItem(buffer_fusion_kernel, kernel_graph, index); + (void)manager->Replace(buffer_fusion_info.outputs_list[index], tuple_item); + ReplaceInputNodeInOtherFusionScope(buffer_fusion_infos, fusion_id, buffer_fusion_info.outputs_list[index], + tuple_item); + } + } +} + +void GetFusionScopeComputeNodeList(session::KernelGraph *kernel_graph, + std::unordered_map *buffer_fusion_infos) { + MS_EXCEPTION_IF_NULL(buffer_fusion_infos); + auto nodes = TopoSort(kernel_graph->get_return()); + for (auto &node : nodes) { + MS_EXCEPTION_IF_NULL(node); + if (AnfAlgo::IsRealCNodeKernel(node) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, node)) { + auto fusion_id = AnfAlgo::GetNodeAttr(node, kOpAttrFusionId); + (*buffer_fusion_infos)[fusion_id].anf_nodes.push_back(node); + } + } +} + +void GetFusionScopeInputNodeList(const session::KernelGraph &kernel_graph, + std::unordered_map *buffer_fusion_infos) { + MS_EXCEPTION_IF_NULL(buffer_fusion_infos); + auto manager = kernel_graph.manager(); + MS_EXCEPTION_IF_NULL(manager); + + for (auto &buffer_fusion_info : *buffer_fusion_infos) { + auto fusion_id = buffer_fusion_info.first; + auto fusion_info = buffer_fusion_info.second; + for (const auto &node : fusion_info.anf_nodes) { + auto cnode = node->cast(); + for (size_t idx = 1; idx < cnode->inputs().size(); ++idx) { + auto real_input = AnfAlgo::VisitKernel(cnode->input(idx), 0); + if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), real_input.first) == + fusion_info.anf_nodes.end()) { + if (std::find((*buffer_fusion_infos)[fusion_id].inputs_list.begin(), + (*buffer_fusion_infos)[fusion_id].inputs_list.end(), + cnode->input(idx)) == (*buffer_fusion_infos)[fusion_id].inputs_list.end()) { + (*buffer_fusion_infos)[fusion_id].inputs_list.push_back(cnode->input(idx)); + } } - AnfNodePtr make_tuple = kernel_graph->NewCNode(make_tuple_inputs); - make_tuple->set_abstract(std::make_shared(abstract_list)); - (void)manager->Replace(outputs_list[index], make_tuple); } } } } -void GetInputList(const CNodePtr &node, const int32_t cur_fusion_id, std::vector *inputs_list) { - MS_EXCEPTION_IF_NULL(node); - MS_EXCEPTION_IF_NULL(inputs_list); - auto &inputs = node->inputs(); - for (size_t input_index = 1; input_index < inputs.size(); ++input_index) { - auto input = inputs[input_index]; - if (AnfAlgo::IsRealCNodeKernel(input)) { - if (AnfAlgo::HasNodeAttr(kOpAttrFusionId, input)) { - auto fusion_id = AnfAlgo::GetNodeAttr(input, kOpAttrFusionId); - if (fusion_id != cur_fusion_id) { - inputs_list->push_back(input); +bool TupleGetitemNodeCompare(const AnfNodePtr &node1, const AnfNodePtr &node2) { + MS_EXCEPTION_IF_NULL(node1); + MS_EXCEPTION_IF_NULL(node2); + auto getitem1 = node1->cast(); + auto getitem2 = node2->cast(); + MS_EXCEPTION_IF_NULL(getitem1); + MS_EXCEPTION_IF_NULL(getitem2); + auto output_idx1 = GetValue(GetValueNode(getitem1->input(2))); + auto output_idx2 = GetValue(GetValueNode(getitem2->input(2))); + return output_idx1 < output_idx2; +} + +void GetFusionScopeOutputNodeList(session::KernelGraph *kernel_graph, + std::unordered_map *buffer_fusion_infos) { + MS_EXCEPTION_IF_NULL(kernel_graph); + MS_EXCEPTION_IF_NULL(buffer_fusion_infos); + auto manager = kernel_graph->manager(); + MS_EXCEPTION_IF_NULL(manager); + + for (auto &buffer_fusion_info : *buffer_fusion_infos) { + auto fusion_id = buffer_fusion_info.first; + auto fusion_info = buffer_fusion_info.second; + for (const auto &node : fusion_info.anf_nodes) { + if (AnfAlgo::GetOutputTensorNum(node) == 1) { + for (auto use_node : manager->node_users()[node]) { + if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), use_node.first) == + fusion_info.anf_nodes.end()) { + (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(node); + break; + } } } else { - inputs_list->push_back(input); - } - } else if (input->isa()) { - for (auto &input_in : input->cast()->inputs()) { - if (AnfAlgo::IsRealCNodeKernel(input_in)) { - if (AnfAlgo::HasNodeAttr(kOpAttrFusionId, input_in)) { - auto fusion_id = AnfAlgo::GetNodeAttr(input_in, kOpAttrFusionId); - if (fusion_id != cur_fusion_id) { - inputs_list->push_back(input); + int prev_idx = 0; + std::vector tuple_getitem_nodes; + std::transform(manager->node_users()[node].begin(), manager->node_users()[node].end(), + std::back_inserter(tuple_getitem_nodes), + [](const std::pair &use_node) { return use_node.first; }); + std::sort(tuple_getitem_nodes.begin(), tuple_getitem_nodes.end(), TupleGetitemNodeCompare); + for (auto getitem : tuple_getitem_nodes) { + auto getitem_ptr = getitem->cast(); + auto input2 = getitem_ptr->input(2); + auto output_idx = GetValue(GetValueNode(input2)); + for (int stub_idx = prev_idx; stub_idx < output_idx; ++stub_idx) { + auto stub_node = CreateTupleGetItem(node, kernel_graph, IntToSize(stub_idx)); + (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(stub_node); + } + prev_idx = output_idx + 1; + for (auto item_use_node : manager->node_users()[getitem]) { + if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), item_use_node.first) == + fusion_info.anf_nodes.end()) { + (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(getitem); + break; } - } else { - inputs_list->push_back(input); } } } - } else { - inputs_list->push_back(input); } } } -void CheckCurrentNodeIsInput(const CNodePtr &node, const int32_t &cur_fusion_id, - std::unordered_map *buffer_fusion_infos) { - MS_EXCEPTION_IF_NULL(buffer_fusion_infos); - if ((*buffer_fusion_infos).find(cur_fusion_id) == (*buffer_fusion_infos).end()) { - BufferFusionInfo_t buffer_fusion_info; - (*buffer_fusion_infos)[cur_fusion_id] = buffer_fusion_info; - } - std::vector inputs_list; - GetInputList(node, cur_fusion_id, &inputs_list); - if (!inputs_list.empty()) { - if (!(*buffer_fusion_infos)[cur_fusion_id].inputs_list.empty()) { - (void)(*buffer_fusion_infos)[cur_fusion_id].inputs_list.insert( - (*buffer_fusion_infos)[cur_fusion_id].inputs_list.end(), inputs_list.begin(), inputs_list.end()); - (void)(*buffer_fusion_infos)[cur_fusion_id].inputs_list_in.insert( - (*buffer_fusion_infos)[cur_fusion_id].inputs_list_in.end(), node); +void SetFusionOpRefInfos(session::KernelGraph *kernel_graph, const std::vector &outputs_list, + const AnfNodePtr &fusion_kernel) { + MS_EXCEPTION_IF_NULL(kernel_graph); + auto manager = kernel_graph->manager(); + MS_EXCEPTION_IF_NULL(manager); + for (size_t idx = 0; idx < outputs_list.size(); ++idx) { + auto output = outputs_list[idx]; + if (output->isa() && AnfAlgo::GetCNodeName(output) == prim::kPrimTupleGetItem->name()) { + auto real_output = AnfAlgo::VisitKernel(output, 0); + auto output_cnode = output->cast(); + MS_EXCEPTION_IF_NULL(output_cnode); + auto input2 = output_cnode->input(2); + auto output_idx = GetValue(GetValueNode(input2)); + session::AnfWithOutIndex out_pair(real_output.first, output_idx); + if (kernel_graph->IsInRefOutputMap(out_pair)) { + auto origin_pair = kernel_graph->GetRefCorrespondOutput(out_pair); + session::AnfWithOutIndex fusion_final_pair(fusion_kernel, idx); + kernel_graph->AddRefCorrespondPairs(fusion_final_pair, origin_pair); + } } else { - (*buffer_fusion_infos)[cur_fusion_id].inputs_list = inputs_list; - (*buffer_fusion_infos)[cur_fusion_id].inputs_list_in.push_back(node); + session::AnfWithOutIndex out_pair(output, 0); + if (kernel_graph->IsInRefOutputMap(out_pair)) { + auto origin_pair = kernel_graph->GetRefCorrespondOutput(out_pair); + session::AnfWithOutIndex fusion_final_pair(fusion_kernel, idx); + kernel_graph->AddRefCorrespondPairs(fusion_final_pair, origin_pair); + } } } } -void InsertNode(const AnfNodePtr &node, std::vector *list) { - MS_EXCEPTION_IF_NULL(list); - if (std::find(list->begin(), list->end(), node) == list->end()) { - (void)list->insert(list->end(), node); +void MatchConvBnreduce(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, + std::unordered_set *fused_set, FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(cnode); + MS_EXCEPTION_IF_NULL(fused_set); + MS_EXCEPTION_IF_NULL(candidate_fusion); + auto manager = kernel_graph.manager(); + MS_EXCEPTION_IF_NULL(manager); + auto conv = cnode->input(1); + if (conv->isa() && AnfAlgo::GetCNodeName(conv) == prim::kPrimConv2D->name()) { + std::vector output_used_num{SizeToInt(manager->node_users()[conv].size())}; + AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), conv); + std::unordered_set record{cnode, conv}; + candidate_fusion->push_back(record); + fused_set->insert(record.begin(), record.end()); } } -void CheckCurrentNodeIsOutput(const CNodePtr &node, const int32_t &cur_fusion_id, - std::unordered_map *buffer_fusion_infos) { - MS_EXCEPTION_IF_NULL(node); - MS_EXCEPTION_IF_NULL(buffer_fusion_infos); - for (auto &input : node->inputs()) { - MS_EXCEPTION_IF_NULL(input); - if (AnfAlgo::IsRealCNodeKernel(input) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, input)) { - auto fusion_id = AnfAlgo::GetNodeAttr(input, kOpAttrFusionId); - if (buffer_fusion_infos->find(fusion_id) == buffer_fusion_infos->end()) { - BufferFusionInfo_t buffer_fusion_info; - (*buffer_fusion_infos)[fusion_id] = buffer_fusion_info; - } - if (fusion_id != cur_fusion_id) { - InsertNode(input, &((*buffer_fusion_infos)[fusion_id].outputs_list)); - } - } else if (input->isa()) { - for (auto &input_in : input->cast()->inputs()) { - if (AnfAlgo::IsRealCNodeKernel(input_in) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, input_in)) { - auto fusion_id = AnfAlgo::GetNodeAttr(input_in, kOpAttrFusionId); - if (buffer_fusion_infos->find(fusion_id) == buffer_fusion_infos->end()) { - BufferFusionInfo_t buffer_fusion_info; - (*buffer_fusion_infos)[fusion_id] = buffer_fusion_info; - } - if (fusion_id != cur_fusion_id) { - InsertNode(input_in, &((*buffer_fusion_infos)[fusion_id].outputs_list)); - } - } - } +void MatchBnupdateRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, const session::KernelGraph &kernel_graph, + std::unordered_set *fused_set, FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(cnode); + MS_EXCEPTION_IF_NULL(fused_set); + MS_EXCEPTION_IF_NULL(candidate_fusion); + auto manager = kernel_graph.manager(); + MS_EXCEPTION_IF_NULL(manager); + auto getitem = relu_input->cast(); + auto bnupdate = getitem->input(1); + if (bnupdate->isa() && AnfAlgo::GetCNodeName(bnupdate) == kBNTrainingUpdateOpName) { + std::vector output_used_num(AnfAlgo::GetOutputTensorNum(bnupdate), 0); + for (auto out_getitem : manager->node_users()[bnupdate]) { + auto out_getitem_ptr = out_getitem.first->cast(); + auto input2 = out_getitem_ptr->input(2); + auto output_idx = GetValue(GetValueNode(input2)); + output_used_num[output_idx] = SizeToInt(manager->node_users()[out_getitem.first].size()); } + AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), bnupdate); + std::unordered_set record{cnode, bnupdate}; + candidate_fusion->push_back(record); + fused_set->insert(record.begin(), record.end()); } } -void GetFusionScopeNodeList(const session::KernelGraph &kernel_graph, - std::unordered_map *buffer_fusion_infos) { - MS_EXCEPTION_IF_NULL(buffer_fusion_infos); - auto nodes = TopoSort(kernel_graph.get_return()); - for (auto &node : nodes) { - MS_EXCEPTION_IF_NULL(node); - if (AnfAlgo::IsRealCNodeKernel(node) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, node)) { - auto fusion_id = AnfAlgo::GetNodeAttr(node, kOpAttrFusionId); - (*buffer_fusion_infos)[fusion_id].anf_nodes.push_back(node); +void MatchBnupdateAddRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, const session::KernelGraph &kernel_graph, + std::unordered_set *fused_set, FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(cnode); + MS_EXCEPTION_IF_NULL(fused_set); + MS_EXCEPTION_IF_NULL(candidate_fusion); + auto manager = kernel_graph.manager(); + MS_EXCEPTION_IF_NULL(manager); + auto add = relu_input->cast(); + MS_EXCEPTION_IF_NULL(add); + auto tuple_getitem = add->input(1); + if (tuple_getitem->isa() && AnfAlgo::GetCNodeName(tuple_getitem) == prim::kPrimTupleGetItem->name()) { + auto getitem = tuple_getitem->cast(); + auto bnupdate = getitem->input(1); + if (bnupdate->isa() && AnfAlgo::GetCNodeName(bnupdate) == kBNTrainingUpdateOpName) { + std::vector output_used_num(AnfAlgo::GetOutputTensorNum(bnupdate), 0); + for (auto out_getitem : manager->node_users()[bnupdate]) { + auto out_getitem_ptr = out_getitem.first->cast(); + auto input2 = out_getitem_ptr->input(2); + auto output_idx = GetValue(GetValueNode(input2)); + output_used_num[output_idx] = SizeToInt(manager->node_users()[out_getitem.first].size()); + } + AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), bnupdate); + std::unordered_set record{cnode, relu_input, bnupdate}; + candidate_fusion->push_back(record); + fused_set->insert(record.begin(), record.end()); } } } @@ -470,15 +565,14 @@ void MatchOpNamePattern(const session::KernelGraph &kernel_graph, std::unordered auto cnode = node->cast(); MS_EXCEPTION_IF_NULL(cnode); if (AnfAlgo::GetCNodeName(cnode) == kBNTrainingReduceOpName) { - auto conv = cnode->input(1); - if (conv->isa() && AnfAlgo::GetCNodeName(conv) == prim::kPrimConv2D->name()) { - auto manager = kernel_graph.manager(); - MS_EXCEPTION_IF_NULL(manager); - auto &users = manager->node_users(); - AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(users[conv].size()), conv); - std::unordered_set record({cnode, conv}); - candidate_fusion->push_back(record); - fused_set->insert(record.begin(), record.end()); + MatchConvBnreduce(cnode, kernel_graph, fused_set, candidate_fusion); + } else if (AnfAlgo::GetCNodeName(cnode) == kReluV2OpName || + AnfAlgo::GetCNodeName(cnode) == prim::kPrimRelu->name()) { + auto relu_input = cnode->input(1); + if (relu_input->isa() && AnfAlgo::GetCNodeName(relu_input) == prim::kPrimTensorAdd->name()) { + MatchBnupdateAddRelu(cnode, relu_input, kernel_graph, fused_set, candidate_fusion); + } else if (relu_input->isa() && AnfAlgo::GetCNodeName(relu_input) == prim::kPrimTupleGetItem->name()) { + MatchBnupdateRelu(cnode, relu_input, kernel_graph, fused_set, candidate_fusion); } } } @@ -536,31 +630,15 @@ void MatchFusionTypePattern(const session::KernelGraph &kernel_graph, std::unord } } // namespace -void BufferFusion::GetBufferFusionInfo(const session::KernelGraph &kernel_graph, +void BufferFusion::GetBufferFusionInfo(session::KernelGraph *kernel_graph, std::unordered_map *buffer_fusion_infos) const { MS_EXCEPTION_IF_NULL(buffer_fusion_infos); - std::vector node_list = TopoSort(kernel_graph.get_return()); - for (auto &node : node_list) { - if (!AnfAlgo::IsRealCNodeKernel(node)) { - continue; - } - - int32_t cur_fusion_id = -1; - auto cnode = node->cast(); - MS_EXCEPTION_IF_NULL(cnode); - if (AnfAlgo::HasNodeAttr(kOpAttrFusionId, cnode)) { - cur_fusion_id = AnfAlgo::GetNodeAttr(cnode, kOpAttrFusionId); - CheckCurrentNodeIsInput(cnode, cur_fusion_id, buffer_fusion_infos); - } - // Check if current node is output - CheckCurrentNodeIsOutput(cnode, cur_fusion_id, buffer_fusion_infos); - } - - GetFusionScopeNodeList(kernel_graph, buffer_fusion_infos); + GetFusionScopeComputeNodeList(kernel_graph, buffer_fusion_infos); + GetFusionScopeInputNodeList(*kernel_graph, buffer_fusion_infos); + GetFusionScopeOutputNodeList(kernel_graph, buffer_fusion_infos); for (auto &buffer_fusion_info : *buffer_fusion_infos) { buffer_fusion_info.second.kernel_build_info = - CreateFusionOpKernelInfo(buffer_fusion_info.second.inputs_list_in, buffer_fusion_info.second.inputs_list, - buffer_fusion_info.second.outputs_list); + CreateFusionOpKernelInfo(buffer_fusion_info.second.inputs_list, buffer_fusion_info.second.outputs_list); } } @@ -569,7 +647,7 @@ bool BufferFusion::FuseBufferFusionPattern(session::KernelGraph *kernel_graph) c bool change = false; std::unordered_map buffer_fusion_infos; buffer_fusion_infos.clear(); - GetBufferFusionInfo(*kernel_graph, &buffer_fusion_infos); + GetBufferFusionInfo(kernel_graph, &buffer_fusion_infos); std::vector fusion_scope_infos; for (auto &buffer_fusion_info : buffer_fusion_infos) { @@ -600,7 +678,7 @@ bool BufferFusion::FuseBufferFusionPattern(session::KernelGraph *kernel_graph) c MS_LOG(DEBUG) << "fusion id: " << fusion_id << ", fusion op compiling failed"; continue; } - change = ReplaceFusionOp(buffer_fusion_infos[fusion_id], kernel_mods[fusion_id], kernel_graph); + change = ReplaceFusionOp(&buffer_fusion_infos, fusion_id, kernel_mods[fusion_id], kernel_graph); } MS_LOG(DEBUG) << "End Buffer Fusion"; return change; @@ -630,8 +708,10 @@ bool BufferFusion::MatchBufferFusionPattern(const session::KernelGraph &kernel_g return true; } -bool BufferFusion::ReplaceFusionOp(const BufferFusionInfo_t &buffer_fusion_info, const kernel::KernelModPtr &kernel_ptr, +bool BufferFusion::ReplaceFusionOp(std::unordered_map *buffer_fusion_infos, + int32_t fusion_id, const kernel::KernelModPtr &kernel_ptr, session::KernelGraph *kernel_graph) const { + auto buffer_fusion_info = (*buffer_fusion_infos)[fusion_id]; auto buffer_fusion = CreateFusionOp(buffer_fusion_info.inputs_list, buffer_fusion_info.outputs_list, buffer_fusion_info.anf_nodes, kernel_graph); AnfAlgo::SetSelectKernelBuildInfo(buffer_fusion_info.kernel_build_info, buffer_fusion.get()); @@ -650,8 +730,8 @@ bool BufferFusion::ReplaceFusionOp(const BufferFusionInfo_t &buffer_fusion_info, } AnfAlgo::SetOutputInferTypeAndShape(types, shapes, buffer_fusion.get()); AnfAlgo::SetKernelMod(kernel_ptr, buffer_fusion.get()); - // replace node - ReplaceOldNode(buffer_fusion_info.outputs_list, buffer_fusion, kernel_graph); + SetFusionOpRefInfos(kernel_graph, buffer_fusion_info.outputs_list, buffer_fusion); + ReplaceOldNode(buffer_fusion_infos, fusion_id, buffer_fusion, kernel_graph); return true; } diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.h b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.h index c54fd0cd97..f2fa63601b 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.h +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.h @@ -30,7 +30,6 @@ namespace opt { struct BufferFusionInfo_t { std::vector anf_nodes; std::vector inputs_list; - std::vector inputs_list_in; std::vector outputs_list; kernel::KernelBuildInfoPtr kernel_build_info; }; @@ -44,10 +43,10 @@ class BufferFusion : public Pass { bool Run(const FuncGraphPtr &graph) override; private: - void GetBufferFusionInfo(const session::KernelGraph &kernel_graph, + void GetBufferFusionInfo(session::KernelGraph *kernel_graph, std::unordered_map *buffer_fusion_infos) const; - bool ReplaceFusionOp(const BufferFusionInfo_t &buffer_fusion_info, const kernel::KernelModPtr &kernel_ptr, - session::KernelGraph *kernel_graph) const; + bool ReplaceFusionOp(std::unordered_map *buffer_fusion_infos, int32_t fusion_id, + const kernel::KernelModPtr &kernel_ptr, session::KernelGraph *kernel_graph) const; bool MatchBufferFusionPattern(const session::KernelGraph &kernel_graph) const; bool FuseBufferFusionPattern(session::KernelGraph *kernel_graph) const; }; diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/add_memcpy_async.cc b/mindspore/ccsrc/pre_activate/ascend/enhancer/add_memcpy_async.cc similarity index 97% rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/add_memcpy_async.cc rename to mindspore/ccsrc/pre_activate/ascend/enhancer/add_memcpy_async.cc index bbea944750..bb708e02a2 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/add_memcpy_async.cc +++ b/mindspore/ccsrc/pre_activate/ascend/enhancer/add_memcpy_async.cc @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "pre_activate/ascend/ir_fission/add_memcpy_async.h" +#include "pre_activate/ascend/enhancer/add_memcpy_async.h" #include #include "utils/utils.h" #include "session/anf_runtime_algorithm.h" diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/add_memcpy_async.h b/mindspore/ccsrc/pre_activate/ascend/enhancer/add_memcpy_async.h similarity index 82% rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/add_memcpy_async.h rename to mindspore/ccsrc/pre_activate/ascend/enhancer/add_memcpy_async.h index 227fc74fed..900b0fb46a 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/add_memcpy_async.h +++ b/mindspore/ccsrc/pre_activate/ascend/enhancer/add_memcpy_async.h @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_ADD_MEMCPY_ASYNC_H_ -#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_ADD_MEMCPY_ASYNC_H_ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_ADD_MEMCPY_ASYNC_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_ADD_MEMCPY_ASYNC_H_ #include #include "pre_activate/common/optimizer.h" @@ -28,4 +28,4 @@ class AddMemcpyAsync : public PatternProcessPass { }; } // namespace opt } // namespace mindspore -#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_ADD_MEMCPY_ASYNC_H_ +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_ADD_MEMCPY_ASYNC_H_ diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_split.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_split.cc new file mode 100644 index 0000000000..cb8670dd00 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_split.cc @@ -0,0 +1,132 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "pre_activate/ascend/ir_fission/batch_norm_grad_split.h" + +#include +#include +#include + +#include "utils/utils.h" +#include "utils/context/ms_context.h" +#include "common/utils.h" +#include "pre_activate/common/helper.h" +#include "device/kernel_info.h" +#include "session/anf_runtime_algorithm.h" + +namespace mindspore { +namespace opt { +namespace { +void CreateOutputsOfUpdateGrad(const FuncGraphPtr &graph, const CNodePtr &bn_grad_node, + std::vector *bn_update_grad_outputs) { + MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(bn_grad_node); + auto bn_grad_inputs = bn_grad_node->inputs(); + if (bn_grad_inputs.size() < kBNGradInputNum) { + MS_LOG(EXCEPTION) << "BNGrad has wrong inputs size"; + } + std::vector bn_update_grad_inputs = { + NewValueNode(std::make_shared(kBNTrainingUpdateGradOpName)), bn_grad_inputs[1], bn_grad_inputs[2], + bn_grad_inputs[4], bn_grad_inputs[5]}; + auto bn_update_grad = graph->NewCNode(bn_update_grad_inputs); + MS_EXCEPTION_IF_NULL(bn_update_grad); + bn_update_grad->set_kernel_info(std::make_shared()); + bn_update_grad->set_scope(bn_grad_node->scope()); + + auto types = {AnfAlgo::GetOutputInferDataType(bn_grad_node, 1), AnfAlgo::GetOutputInferDataType(bn_grad_node, 2)}; + auto shapes = {AnfAlgo::GetOutputInferShape(bn_grad_node, 1), AnfAlgo::GetOutputInferShape(bn_grad_node, 2)}; + AnfAlgo::SetOutputInferTypeAndShape(types, shapes, bn_update_grad.get()); + + AnfAlgo::CopyNodeAttr(kAttrEpsilon, bn_grad_node, bn_update_grad); + CreateMultipleOutputsOfAnfNode(graph, bn_update_grad, kBNTrainingUpdateGradOutputNum, bn_update_grad_outputs); +} + +void CreateOutputsOfReduceGrad(const FuncGraphPtr &graph, const CNodePtr &bn_grad_node, + const std::vector &bn_update_grad_outputs, + std::vector *bn_reduce_grad_outputs) { + MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(bn_grad_node); + auto bn_grad_inputs = bn_grad_node->inputs(); + if (bn_grad_inputs.size() < kBNGradInputNum) { + MS_LOG(EXCEPTION) << "BNGrad has wrong inputs size"; + } + if (bn_update_grad_outputs.size() != kBNTrainingUpdateGradOutputNum) { + MS_LOG(EXCEPTION) << "BNTrainingReduceGrad_outputs has wrong size"; + } + std::vector bn_reduce_grad_inputs = { + NewValueNode(std::make_shared(kBNTrainingReduceGradOpName)), + bn_grad_inputs[1], + bn_grad_inputs[2], + bn_update_grad_outputs[0], + bn_update_grad_outputs[1], + bn_grad_inputs[3], + bn_grad_inputs[4], + bn_grad_inputs[5]}; + auto bn_reduce_grad = graph->NewCNode(bn_reduce_grad_inputs); + MS_EXCEPTION_IF_NULL(bn_reduce_grad); + bn_reduce_grad->set_kernel_info(std::make_shared()); + bn_reduce_grad->set_scope(bn_grad_node->scope()); + + auto types = {AnfAlgo::GetOutputInferDataType(bn_grad_node, 0)}; + auto shapes = {AnfAlgo::GetOutputInferShape(bn_grad_node, 0)}; + AnfAlgo::SetOutputInferTypeAndShape(types, shapes, bn_reduce_grad.get()); + + AnfAlgo::CopyNodeAttr(kAttrEpsilon, bn_grad_node, bn_reduce_grad); + (*bn_reduce_grad_outputs).push_back(bn_reduce_grad); +} + +} // namespace +const BaseRef BatchNormGradSplit::DefinePattern() const { + VarPtr Xs = std::make_shared(); + auto prim = std::make_shared(kBatchNormGradOpName); + return VectorRef({prim, Xs}); +} + +const AnfNodePtr BatchNormGradSplit::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, + const EquivPtr &) const { + MS_EXCEPTION_IF_NULL(node); + MS_EXCEPTION_IF_NULL(func_graph); + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + auto primitive = AnfAlgo::GetCNodePrimitive(cnode); + MS_EXCEPTION_IF_NULL(primitive); + if (!primitive->HasAttr(kAttrIsTraining)) { + MS_LOG(INFO) << "Op BatchNormGrad must have attrs of is_training"; + return nullptr; + } + if (!AnfAlgo::GetNodeAttr(cnode, kAttrIsTraining)) { + MS_LOG(INFO) << "is_training must be true"; + return nullptr; + } + + std::vector bn_update_grad_outputs; + CreateOutputsOfUpdateGrad(func_graph, cnode, &bn_update_grad_outputs); + if (bn_update_grad_outputs.size() != kBNTrainingUpdateGradOutputNum) { + MS_LOG(EXCEPTION) << "bn_update_grad_outputs has wrong size"; + } + + std::vector bn_reduce_grad_outputs; + CreateOutputsOfReduceGrad(func_graph, cnode, bn_update_grad_outputs, &bn_reduce_grad_outputs); + if (bn_reduce_grad_outputs.size() != kSingleOutputNum) { + MS_LOG(EXCEPTION) << "bn_reduce_grad_outputs has wrong size"; + } + + std::vector make_tuple_inputs = {NewValueNode(prim::kPrimMakeTuple), bn_reduce_grad_outputs[0], + bn_update_grad_outputs[0], bn_update_grad_outputs[1]}; + auto make_tuple = func_graph->NewCNode(make_tuple_inputs); + return make_tuple; +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_split.h b/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_split.h new file mode 100644 index 0000000000..e539fdb27c --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_split.h @@ -0,0 +1,33 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_BATCH_NORM_GRAD_SPLIT_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_BATCH_NORM_GRAD_SPLIT_H_ + +#include "pre_activate/common/optimizer.h" +#include "pre_activate/common/helper.h" + +namespace mindspore { +namespace opt { +class BatchNormGradSplit : public PatternProcessPass { + public: + explicit BatchNormGradSplit(bool multigraph = true) : PatternProcessPass("batch_norm_grad_split", multigraph) {} + ~BatchNormGradSplit() override = default; + const BaseRef DefinePattern() const override; + const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; +}; +} // namespace opt +} // namespace mindspore +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_BATCH_NORM_GRAD_SPLIT_H_ diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.cc index 5924f6cd1c..4bdd5f0382 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.cc @@ -16,6 +16,9 @@ #include "pre_activate/ascend/ir_fission/topk_split.h" #include #include +#include +#include "pre_activate/common/helper.h" +#include "kernel/kernel_build_info.h" #include "utils/utils.h" #include "session/kernel_graph.h" #include "session/anf_runtime_algorithm.h" @@ -25,6 +28,7 @@ namespace mindspore { namespace opt { constexpr size_t kFloat16Len = 2; // size of float16; +constexpr size_t kTopkIndexK = 1; namespace { tensor::TensorPtr CreateTensor(const AnfNodePtr &node) { // 1 create tensor @@ -70,37 +74,68 @@ ValueNodePtr CreateValueNode(const AnfNodePtr &node) { AnfAlgo::SetSelectKernelBuildInfo(builder1.Build(), indices_const.get()); return indices_const; } + +kernel::KernelBuildInfoPtr CreateKernelBuildInfo() { + kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; + builder.SetInputsFormat({kOpFormat_DEFAULT, kOpFormat_DEFAULT}); + builder.SetOutputsFormat({kOpFormat_DEFAULT, kOpFormat_DEFAULT}); + builder.SetInputsDeviceType({kNumberTypeFloat16, kNumberTypeFloat16}); + builder.SetOutputsDeviceType({kNumberTypeFloat16, kNumberTypeInt32}); + return builder.Build(); +} } // namespace const BaseRef TopKSplit::DefinePattern() const { - VarPtr X = std::make_shared(); - MS_EXCEPTION_IF_NULL(X); + VarPtr X1 = std::make_shared(); + VarPtr X2 = std::make_shared(); auto prim = std::make_shared(kTopKOpName); - MS_EXCEPTION_IF_NULL(prim); - return VectorRef({prim, X}); + return VectorRef({prim, X1, X2}); } const AnfNodePtr TopKSplit::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const EquivPtr &) const { MS_EXCEPTION_IF_NULL(func_graph); MS_EXCEPTION_IF_NULL(node); auto kernel_graph = func_graph->cast(); - auto indices_const = CreateValueNode(node); // set value node as topk's input auto cnode = node->cast(); MS_EXCEPTION_IF_NULL(cnode); - MS_LOG(INFO) << "already has input size: " << cnode->inputs().size(); - cnode->add_input(indices_const); + // Copy a new node to check supported. + std::vector new_inputs{NewValueNode(std::make_shared(kTopKOpName))}; + new_inputs.insert(new_inputs.end(), cnode->inputs().begin() + 1, cnode->inputs().end()); + CNodePtr new_cnode = func_graph->NewCNode(new_inputs); + MS_EXCEPTION_IF_NULL(new_cnode); + new_cnode->set_abstract(cnode->abstract()); + new_cnode->set_scope(cnode->scope()); + AnfAlgo::CopyNodeAttrs(cnode, new_cnode); + CheckCNodeInputSize(new_cnode, kTopkInputNum); + // Convert the tensor input to scalar and convert it to attr + auto input_k = new_cnode->input(kTopkIndexK + 1); + MS_EXCEPTION_IF_NULL(input_k); + if (!IsValueNode(input_k)) { + return nullptr; + } + ValuePtr value = GetValueNode(input_k); + MS_EXCEPTION_IF_NULL(value); + auto tensor = value->cast(); + MS_EXCEPTION_IF_NULL(tensor); + int32_t *data = reinterpret_cast(tensor->data_c()); + MS_EXCEPTION_IF_NULL(data); + auto new_value_node = std::make_shared(MakeValue(*data)); + new_cnode->set_input(kTopkIndexK + 1, new_value_node); + + std::unordered_set attr_index{kTopkIndexK}; + ConstInputToAttr(new_cnode, attr_index); + auto indices_const = CreateValueNode(new_cnode); + new_cnode->add_input(indices_const); + MS_EXCEPTION_IF_NULL(supported_checker_); + if (!supported_checker_->CheckSupported(new_cnode, CreateKernelBuildInfo())) { + return nullptr; + } + if (kernel_graph != nullptr) { kernel_graph->AddValueNodeToGraph(indices_const); } - CNodePtr new_cnode = nullptr; - if (kernel_graph == nullptr) { - new_cnode = std::make_shared(*cnode); - } else { - new_cnode = kernel_graph->NewCNode(cnode); - } - MS_EXCEPTION_IF_NULL(new_cnode); return new_cnode; } } // namespace opt diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.h b/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.h index 8fcbbac475..e7293e1fa3 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.h +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.h @@ -16,15 +16,22 @@ #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_TOPK_SPLIT_H_ #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_TOPK_SPLIT_H_ +#include #include "pre_activate/common/optimizer.h" +#include "pre_activate/ascend/ascend_helper.h" + namespace mindspore { namespace opt { class TopKSplit : public PatternProcessPass { public: - explicit TopKSplit(bool multigraph = true) : PatternProcessPass("topk_split", multigraph) {} + explicit TopKSplit(bool multigraph = true) + : PatternProcessPass("topk_split", multigraph), supported_checker_(std::make_shared()) {} ~TopKSplit() override = default; const BaseRef DefinePattern() const override; const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; + + private: + SupportedCheckerPtr supported_checker_; }; } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/transdata_split.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fission/transdata_split.cc similarity index 98% rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/transdata_split.cc rename to mindspore/ccsrc/pre_activate/ascend/ir_fission/transdata_split.cc index d3990fe898..2c77794b14 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/transdata_split.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/transdata_split.cc @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "pre_activate/ascend/ir_fusion/transdata_split.h" +#include "pre_activate/ascend/ir_fission/transdata_split.h" #include #include "pre_activate/ascend/ascend_helper.h" #include "session/anf_runtime_algorithm.h" diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/transdata_split.h b/mindspore/ccsrc/pre_activate/ascend/ir_fission/transdata_split.h similarity index 83% rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/transdata_split.h rename to mindspore/ccsrc/pre_activate/ascend/ir_fission/transdata_split.h index 0e84c23256..f450897db1 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/transdata_split.h +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/transdata_split.h @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_TRANSDATA_SPLIT_H_ -#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_TRANSDATA_SPLIT_H_ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_TRANSDATA_SPLIT_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_TRANSDATA_SPLIT_H_ #include #include #include @@ -42,4 +42,4 @@ class TransDataSplit : public Pass { }; } // namespace opt } // namespace mindspore -#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_TRANSDATA_SPLIT_H_ +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_TRANSDATA_SPLIT_H_ diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.cc index 6b7f732a6a..caea9599c1 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include "session/anf_runtime_algorithm.h" #include "ir/primitive.h" #include "utils/utils.h" @@ -71,6 +72,38 @@ AnfNodePtr GetMul0(const FuncGraphPtr &graph, const AnfNodePtr &input2, const An } return mul0; } + +bool QuitFusion(const FuncGraphPtr &graph, const AnfNodePtr &mul0_anf, const AnfNodePtr &reduce_sum) { + MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(mul0_anf); + MS_EXCEPTION_IF_NULL(reduce_sum); + if (!mul0_anf->isa()) { + return true; + } + auto mul0 = mul0_anf->cast(); + MS_EXCEPTION_IF_NULL(mul0); + + // when network is _VirtualDatasetCell, quit fusion + if (mul0->fullname_with_scope().find("network-_VirtualDatasetCell") != std::string::npos) { + return true; + } + + auto manager = graph->manager(); + MS_EXCEPTION_IF_NULL(manager); + if (manager->node_users().find(reduce_sum) == manager->node_users().end()) { + MS_LOG(EXCEPTION) << "node has no output in manager"; + } + const AnfNodeIndexSet &outputs_set = manager->node_users()[reduce_sum]; + auto it = std::find_if(outputs_set.begin(), outputs_set.end(), [&mul0](const std::pair &node_index) { + return node_index.first == mul0->input(1) || node_index.first == mul0; + }); + if (it != outputs_set.end()) { + MS_LOG(INFO) << "ReduceSum's output node is mul0's input or mul0! If do fusion, graph will exist a circle"; + return true; + } + + return false; +} } // namespace const BaseRef ConfusionMulGradFusion::DefinePattern() const { @@ -98,6 +131,9 @@ const AnfNodePtr ConfusionMulGradFusion::Process(const FuncGraphPtr &graph, cons MS_LOG(INFO) << "Mul0 do not exist, quit fusion"; return nullptr; } + if (QuitFusion(graph, mul0, node)) { + return nullptr; + } auto fusion_node = CreateFusionNode(graph, reduce_sum, mul0, input3); std::vector fusion_node_outputs; diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.cc index 8078247c2a..a524d694e6 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.cc @@ -47,7 +47,7 @@ void SetAttrsForFusionNode(const AnfNodePtr &sub_anf, const AnfNodePtr &fusion_n const BaseRef ConfusionSoftmaxGradRule::DefinePattern() const { return VectorRef( - {prim::kPrimSub, input0_, VectorRef({prim::kPrimReduceSum, VectorRef({prim::kPrimMul, input0_, input1_})})}); + {prim::kPrimSub, input0_, VectorRef({prim::kPrimReduceSum, VectorRef({prim::kPrimMul, input1_, input0_})})}); } const AnfNodePtr ConfusionSoftmaxGradRule::Process(const FuncGraphPtr &graph, const AnfNodePtr &node, diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/derelu_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/derelu_fusion.cc index d5ea315de1..74b63a5b52 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/derelu_fusion.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/derelu_fusion.cc @@ -50,9 +50,22 @@ CNodePtr CreateReluV2(const FuncGraphPtr &graph, const CNodePtr &relu) { MS_EXCEPTION_IF_NULL(new_node); new_node->set_scope(relu->scope()); - // ReluV2's 2rd output is mask whose data type is uint8 and value is 0 or 1, so shape is an empty vector + // ReluV2's 2rd output is mask whose data type is uint8 TypeId mask_dtype = kNumberTypeUInt8; - std::vector mask_shape; + std::vector mask_shape = AnfAlgo::GetOutputInferShape(relu, 0); + if (mask_shape.size() != 4) { + MS_LOG(WARNING) << "relu's infer shape size not equal 4"; + return nullptr; + } + auto input_dtype = AnfAlgo::GetPrevNodeOutputInferDataType(relu, 0); + if (input_dtype == kNumberTypeUInt8 || input_dtype == kNumberTypeInt8) { + mask_shape[1] = (mask_shape[1] + 31) / 32; + mask_shape.push_back(4); + } else { + mask_shape[1] = (mask_shape[1] + 15) / 16; + mask_shape.push_back(2); + } + auto types = {AnfAlgo::GetOutputInferDataType(relu, 0), mask_dtype}; auto shapes = {AnfAlgo::GetOutputInferShape(relu, 0), mask_shape}; AnfAlgo::SetOutputInferTypeAndShape(types, shapes, new_node.get()); @@ -91,6 +104,9 @@ const AnfNodePtr DereluFusion::Process(const FuncGraphPtr &graph, const AnfNodeP MS_EXCEPTION_IF_NULL(relu); auto relu_v2 = CreateReluV2(graph, relu); + if (relu_v2 == nullptr) { + return nullptr; + } std::vector relu_v2_node_outputs; CreateMultipleOutputsOfAnfNode(graph, relu_v2, kReluV2OutputNum, &relu_v2_node_outputs); diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.cc index 12f2684b3b..7641772d7a 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.cc @@ -23,6 +23,8 @@ namespace mindspore { namespace opt { namespace { +constexpr size_t kReplaceOutputIndex0 = 3; +constexpr size_t kReplaceOutputIndex1 = 4; bool IsC(const BaseRef &n) { if (utils::isa(n)) { AnfNodePtr in = utils::cast(n); @@ -32,52 +34,6 @@ bool IsC(const BaseRef &n) { return false; } -AnfNodePtr GetBatchNormNode(const AnfNodePtr &node) { - MS_EXCEPTION_IF_NULL(node); - auto depend_cnode = node->cast(); - MS_EXCEPTION_IF_NULL(depend_cnode); - CheckCNodeInputSize(depend_cnode, kDependInputNum); - AnfNodePtr assign_sub = depend_cnode->input(2); - MS_EXCEPTION_IF_NULL(assign_sub); - auto assign_sub_cnode = assign_sub->cast(); - MS_EXCEPTION_IF_NULL(assign_sub_cnode); - CheckCNodeInputSize(assign_sub_cnode, kAssignSubInputNum); - AnfNodePtr mul = assign_sub_cnode->input(2); - MS_EXCEPTION_IF_NULL(mul); - auto mul_cnode = mul->cast(); - MS_EXCEPTION_IF_NULL(mul_cnode); - CheckCNodeInputSize(mul_cnode, kMulInputNum); - AnfNodePtr sub = mul_cnode->input(1); - MS_EXCEPTION_IF_NULL(sub); - auto sub_cnode = sub->cast(); - MS_EXCEPTION_IF_NULL(sub_cnode); - CheckCNodeInputSize(sub_cnode, kSubInputNum); - AnfNodePtr tuple_getitem = sub_cnode->input(2); - MS_EXCEPTION_IF_NULL(tuple_getitem); - auto tuple_getitem_cnode = tuple_getitem->cast(); - MS_EXCEPTION_IF_NULL(tuple_getitem_cnode); - CheckCNodeInputSize(tuple_getitem_cnode, kTupleGetitemInputNum); - return tuple_getitem_cnode->input(1); -} - -bool CompareTupleGetitem(const AnfNodePtr &n1, const AnfNodePtr &n2) { - MS_EXCEPTION_IF_NULL(n1); - MS_EXCEPTION_IF_NULL(n2); - auto n1_cnode = n1->cast(); - auto n2_cnode = n2->cast(); - MS_EXCEPTION_IF_NULL(n1_cnode); - MS_EXCEPTION_IF_NULL(n2_cnode); - auto index_input1 = n1_cnode->input(kInputNodeOutputIndexInTupleGetItem); - MS_EXCEPTION_IF_NULL(index_input1); - auto value_node1 = index_input1->cast(); - MS_EXCEPTION_IF_NULL(value_node1); - auto index_input2 = n2_cnode->input(kInputNodeOutputIndexInTupleGetItem); - MS_EXCEPTION_IF_NULL(index_input2); - auto value_node2 = index_input2->cast(); - MS_EXCEPTION_IF_NULL(value_node2); - return GetValue(value_node1->value()) < GetValue(value_node2->value()); -} - void GetBNOutput(const FuncGraphPtr &func_graph, const AnfNodePtr &bn, std::vector *bn_outputs) { MS_EXCEPTION_IF_NULL(func_graph); MS_EXCEPTION_IF_NULL(bn); @@ -92,54 +48,35 @@ void GetBNOutput(const FuncGraphPtr &func_graph, const AnfNodePtr &bn, std::vect MS_EXCEPTION_IF_NULL(output); bn_outputs->push_back(output); } - sort(bn_outputs->begin(), bn_outputs->end(), CompareTupleGetitem); } } // namespace const BaseRef FusedBatchNormFusion::DefinePattern() const { - const auto prim_batch_norm = std::make_shared(kBatchNormOpName); std::shared_ptr Xs = std::make_shared(); VarPtr index0 = std::make_shared(IsC); VarPtr index1 = std::make_shared(IsC); VarPtr index2 = std::make_shared(IsC); - VectorRef batch_norm = VectorRef({prim_batch_norm, data_input_var0_, data_input_var1_, data_input_var2_, Xs}); + VectorRef batch_norm = VectorRef({batch_norm_var_, data_input0_var_, data_input1_var_, data_input2_var_, Xs}); VectorRef tuple_getitem0 = VectorRef({prim::kPrimTupleGetItem, batch_norm, index0}); VectorRef tuple_getitem1 = VectorRef({prim::kPrimTupleGetItem, batch_norm, index1}); VectorRef tuple_getitem2 = VectorRef({prim::kPrimTupleGetItem, batch_norm, index2}); - VectorRef sub0 = VectorRef({prim::kPrimSub, variable_input_var0_, tuple_getitem1}); - VectorRef sub1 = VectorRef({prim::kPrimSub, variable_input_var1_, tuple_getitem2}); - VectorRef mul0 = VectorRef({prim::kPrimMul, sub0, constant_input_var0_}); - VectorRef mul1 = VectorRef({prim::kPrimMul, sub1, constant_input_var1_}); - VectorRef assign_sub0 = VectorRef({prim::kPrimAssignSub, variable_input_var0_, mul0}); - VectorRef assign_sub1 = VectorRef({prim::kPrimAssignSub, variable_input_var1_, mul1}); + VectorRef sub0 = VectorRef({prim::kPrimSub, variable_input0_var_, tuple_getitem1}); + VectorRef sub1 = VectorRef({prim::kPrimSub, variable_input1_var_, tuple_getitem2}); + VectorRef mul0 = VectorRef({prim::kPrimMul, sub0, constant_input0_var_}); + VectorRef mul1 = VectorRef({prim::kPrimMul, sub1, constant_input1_var_}); + VectorRef assign_sub0 = VectorRef({prim::kPrimAssignSub, variable_input0_var_, mul0}); + VectorRef assign_sub1 = VectorRef({prim::kPrimAssignSub, variable_input1_var_, mul1}); VectorRef depend0 = VectorRef({prim::kPrimDepend, tuple_getitem0, assign_sub0}); return VectorRef({prim::kPrimDepend, depend0, assign_sub1}); } -abstract::AbstractTuplePtr FusedBatchNormFusion::CreateAbstractOfFusedBatchNorm(const EquivPtr &equiv, - const AnfNodePtr &bn) const { - MS_EXCEPTION_IF_NULL(equiv); - MS_EXCEPTION_IF_NULL(bn); - auto variable_input0 = utils::cast((*equiv)[variable_input_var0_]); - MS_EXCEPTION_IF_NULL(variable_input0); - auto variable_input1 = utils::cast((*equiv)[variable_input_var1_]); - MS_EXCEPTION_IF_NULL(variable_input1); - auto bn_abstract_tuple = dyn_cast(bn->abstract()); - MS_EXCEPTION_IF_NULL(bn_abstract_tuple); - if (bn_abstract_tuple->elements().size() != kBnOutputNum) { - MS_LOG(EXCEPTION) << "The abstract size of node bn must be " << kBnOutputNum << ", but it is " - << bn_abstract_tuple->elements().size(); - } - AbstractBasePtrList fused_bn_abstract_list{bn_abstract_tuple->elements()[0], variable_input0->abstract(), - variable_input1->abstract(), bn_abstract_tuple->elements()[3], - bn_abstract_tuple->elements()[4]}; - auto abstract_tuple = std::make_shared(fused_bn_abstract_list); - return abstract_tuple; -} - ValuePtr FusedBatchNormFusion::GetFactor(const EquivPtr &equiv) const { MS_EXCEPTION_IF_NULL(equiv); - auto constant_input = utils::cast((*equiv)[constant_input_var0_]); + auto iter_constant_input0 = (*equiv).find(constant_input0_var_); + if (iter_constant_input0 == (*equiv).end()) { + MS_LOG(EXCEPTION) << "The equiv map is expected to contains the constant_input0 var after matched."; + } + auto constant_input = utils::cast(iter_constant_input0->second); MS_EXCEPTION_IF_NULL(constant_input); if (!constant_input->isa()) { return nullptr; @@ -158,53 +95,187 @@ ValuePtr FusedBatchNormFusion::GetFactor(const EquivPtr &equiv) const { return MakeValue(tensor_data[0]); } -const AnfNodePtr FusedBatchNormFusion::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, - const EquivPtr &equiv) const { +AnfNodePtr FusedBatchNormFusion::CreateBNTrainingReduce(const FuncGraphPtr &func_graph, const AnfNodePtr &node, + const EquivPtr &equiv) const { MS_EXCEPTION_IF_NULL(func_graph); + MS_EXCEPTION_IF_NULL(node); MS_EXCEPTION_IF_NULL(equiv); - // Set inputs - auto data_input0 = utils::cast((*equiv)[data_input_var0_]); - MS_EXCEPTION_IF_NULL(data_input0); - auto data_input1 = utils::cast((*equiv)[data_input_var1_]); + // Set input to create node + auto iter_data_input0 = (*equiv).find(data_input0_var_); + if (iter_data_input0 == (*equiv).end()) { + MS_LOG(EXCEPTION) << "The equiv map is expected to contains the data_input0 var after matched."; + } + std::vector bn_training_reduce_inputs = { + NewValueNode(std::make_shared(kBNTrainingReduceOpName)), + utils::cast(iter_data_input0->second)}; + auto bn_training_reduce = func_graph->NewCNode(bn_training_reduce_inputs); + MS_EXCEPTION_IF_NULL(bn_training_reduce); + bn_training_reduce->set_scope(node->scope()); + // Set abstract + auto iter_data_input1 = (*equiv).find(data_input1_var_); + if (iter_data_input1 == (*equiv).end()) { + MS_LOG(EXCEPTION) << "The equiv map is expected to contains the data_input1 var after matched."; + } + auto data_input1 = utils::cast(iter_data_input1->second); MS_EXCEPTION_IF_NULL(data_input1); - auto data_input2 = utils::cast((*equiv)[data_input_var2_]); + auto iter_data_input2 = (*equiv).find(data_input2_var_); + if (iter_data_input2 == (*equiv).end()) { + MS_LOG(EXCEPTION) << "The equiv map is expected to contains the data_input2 var after matched."; + } + auto data_input2 = utils::cast(iter_data_input2->second); MS_EXCEPTION_IF_NULL(data_input2); - auto variable_input0 = utils::cast((*equiv)[variable_input_var0_]); + AbstractBasePtrList abstract_list{data_input1->abstract(), data_input2->abstract()}; + auto abstract_tuple = std::make_shared(abstract_list); + bn_training_reduce->set_abstract(abstract_tuple); + return bn_training_reduce; +} + +void FusedBatchNormFusion::GetBNTrainingUpdateInputs(const EquivPtr &equiv, + const std::vector &bn_training_reduce_outputs, + std::vector *bn_training_update_inputs) const { + MS_EXCEPTION_IF_NULL(equiv); + MS_EXCEPTION_IF_NULL(bn_training_update_inputs); + auto iter_data_input0 = (*equiv).find(data_input0_var_); + if (iter_data_input0 == (*equiv).end()) { + MS_LOG(EXCEPTION) << "The equiv map is expected to contains the data_input0 var after matched."; + } + auto iter_data_input1 = (*equiv).find(data_input1_var_); + if (iter_data_input1 == (*equiv).end()) { + MS_LOG(EXCEPTION) << "The equiv map is expected to contains the data_input1 var after matched."; + } + auto iter_data_input2 = (*equiv).find(data_input2_var_); + if (iter_data_input2 == (*equiv).end()) { + MS_LOG(EXCEPTION) << "The equiv map is expected to contains the data_input2 var after matched."; + } + auto iter_variable_input0 = (*equiv).find(variable_input0_var_); + if (iter_variable_input0 == (*equiv).end()) { + MS_LOG(EXCEPTION) << "The equiv map is expected to contains the variable_input0 var after matched."; + } + auto iter_variable_input1 = (*equiv).find(variable_input1_var_); + if (iter_variable_input1 == (*equiv).end()) { + MS_LOG(EXCEPTION) << "The equiv map is expected to contains the variable_input1 var after matched."; + } + if (bn_training_reduce_outputs.size() != kBNTrainingReduceOutputNum) { + MS_LOG(EXCEPTION) << "The output size of node bn_training_reduce must be " << kBNTrainingReduceOutputNum + << ", but it is " << bn_training_reduce_outputs.size(); + } + *bn_training_update_inputs = { + NewValueNode(std::make_shared(kBNTrainingUpdateOpName)), + utils::cast(iter_data_input0->second), + bn_training_reduce_outputs[0], + bn_training_reduce_outputs[1], + utils::cast(iter_data_input1->second), + utils::cast(iter_data_input2->second), + utils::cast(iter_variable_input0->second), + utils::cast(iter_variable_input1->second), + }; +} + +void FusedBatchNormFusion::GetBNTrainingUpdateAbstractList(const EquivPtr &equiv, const AnfNodePtr &bn, + std::vector *abstract_list) const { + MS_EXCEPTION_IF_NULL(equiv); + MS_EXCEPTION_IF_NULL(bn); + MS_EXCEPTION_IF_NULL(abstract_list); + auto bn_abstract_tuple = dyn_cast(bn->abstract()); + MS_EXCEPTION_IF_NULL(bn_abstract_tuple); + if (bn_abstract_tuple->elements().size() < kBnOutputNum) { + MS_LOG(EXCEPTION) << "The abstract size of node bn must not be less than " << kBnOutputNum << ", but it is " + << bn_abstract_tuple->elements().size(); + } + auto iter_variable_input0 = (*equiv).find(variable_input0_var_); + if (iter_variable_input0 == (*equiv).end()) { + MS_LOG(EXCEPTION) << "The equiv map is expected to contains the variable_input0 var after matched."; + } + auto variable_input0 = utils::cast(iter_variable_input0->second); MS_EXCEPTION_IF_NULL(variable_input0); - auto variable_input1 = utils::cast((*equiv)[variable_input_var1_]); + auto iter_variable_input1 = (*equiv).find(variable_input1_var_); + if (iter_variable_input1 == (*equiv).end()) { + MS_LOG(EXCEPTION) << "The equiv map is expected to contains the variable_input1 var after matched."; + } + auto variable_input1 = utils::cast(iter_variable_input1->second); MS_EXCEPTION_IF_NULL(variable_input1); - std::vector fused_bn_inputs = { - NewValueNode(prim::kPrimFusedBatchNorm), data_input0, data_input1, data_input2, variable_input0, variable_input1}; - auto fused_bn = func_graph->NewCNode(fused_bn_inputs); - fused_bn->set_scope(node->scope()); - MS_EXCEPTION_IF_NULL(fused_bn); + *abstract_list = {bn_abstract_tuple->elements()[0], variable_input0->abstract(), variable_input1->abstract(), + bn_abstract_tuple->elements()[1], bn_abstract_tuple->elements()[2]}; +} + +AnfNodePtr FusedBatchNormFusion::CreateBNTrainingUpdate( + const FuncGraphPtr &func_graph, const AnfNodePtr &node, const EquivPtr &equiv, + const std::vector &bn_training_reduce_outputs) const { + MS_EXCEPTION_IF_NULL(func_graph); + MS_EXCEPTION_IF_NULL(node); + MS_EXCEPTION_IF_NULL(equiv); + // Set input + std::vector bn_training_update_inputs; + GetBNTrainingUpdateInputs(equiv, bn_training_reduce_outputs, &bn_training_update_inputs); + auto bn_training_update = func_graph->NewCNode(bn_training_update_inputs); + MS_EXCEPTION_IF_NULL(bn_training_update); // Set abstract - AnfNodePtr bn = GetBatchNormNode(node); - fused_bn->set_abstract(CreateAbstractOfFusedBatchNorm(equiv, bn)); - // Set attr - AnfAlgo::CopyNodeAttr(kAttrEpsilon, bn, fused_bn); + auto iter_batch_norm = (*equiv).find(batch_norm_var_); + if (iter_batch_norm == (*equiv).end()) { + MS_LOG(EXCEPTION) << "The equiv map is expected to contains the batch_norm var after matched."; + } + AnfNodePtr bn = utils::cast(iter_batch_norm->second); + MS_EXCEPTION_IF_NULL(bn); + AbstractBasePtrList abstract_list; + GetBNTrainingUpdateAbstractList(equiv, bn, &abstract_list); + auto abstract_tuple = std::make_shared(abstract_list); + bn_training_update->set_abstract(abstract_tuple); + AnfAlgo::CopyNodeAttr(kAttrEpsilon, bn, bn_training_update); ValuePtr factor = GetFactor(equiv); if (factor == nullptr) { return nullptr; } - AnfAlgo::SetNodeAttr(kAttrMomentum, factor, fused_bn); - // Replace old nodes with outputs of fused_bn - std::vector fused_bn_outputs; - CreateMultipleOutputsOfAnfNode(func_graph, fused_bn, kBnOutputNum, &fused_bn_outputs); - if (fused_bn_outputs.size() != kBnOutputNum) { - MS_LOG(EXCEPTION) << "The output size of node bn must be " << kBnOutputNum << ", but it is " - << fused_bn_outputs.size(); + AnfAlgo::SetNodeAttr(kAttrFactor, factor, bn_training_update); + AnfAlgo::SetNodeAttr(kAttrIsRef, MakeValue(true), bn_training_update); + bn_training_update->set_scope(node->scope()); + return bn_training_update; +} + +const AnfNodePtr FusedBatchNormFusion::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, + const EquivPtr &equiv) const { + MS_EXCEPTION_IF_NULL(func_graph); + MS_EXCEPTION_IF_NULL(equiv); + MS_EXCEPTION_IF_NULL(node); + AnfNodePtr bn_training_reduce = CreateBNTrainingReduce(func_graph, node, equiv); + std::vector bn_training_reduce_outputs; + CreateMultipleOutputsOfAnfNode(func_graph, bn_training_reduce, kBNTrainingReduceOutputNum, + &bn_training_reduce_outputs); + AnfNodePtr bn_training_update = CreateBNTrainingUpdate(func_graph, node, equiv, bn_training_reduce_outputs); + if (bn_training_update == nullptr) { + MS_LOG(DEBUG) << "Create BNTrainingUpdate failed for bn node " << node->DebugString(); + return nullptr; + } + std::vector bn_training_update_outputs; + CreateMultipleOutputsOfAnfNode(func_graph, bn_training_update, kBNTrainingUpdateOutputNum, + &bn_training_update_outputs); + if (bn_training_update_outputs.size() < kBNTrainingUpdateOutputNum) { + MS_LOG(EXCEPTION) << "The output size of node bn must be " << kBNTrainingUpdateOutputNum << ", but it is " + << bn_training_update_outputs.size(); + } + // Replace old bn outputs with new outputs + auto iter_batch_norm = (*equiv).find(batch_norm_var_); + if (iter_batch_norm == (*equiv).end()) { + MS_LOG(EXCEPTION) << "The equiv map is expected to contains the batch_norm var after matched."; } + AnfNodePtr bn = utils::cast(iter_batch_norm->second); std::vector bn_outputs; GetBNOutput(func_graph, bn, &bn_outputs); - if (bn_outputs.size() != kBnOutputNum) { - MS_LOG(EXCEPTION) << "The output size of node bn must be " << kBnOutputNum << ", but it is " << bn_outputs.size(); - } auto manager = func_graph->manager(); MS_EXCEPTION_IF_NULL(manager); - (void)manager->Replace(bn_outputs[3], fused_bn_outputs[3]); - (void)manager->Replace(bn_outputs[4], fused_bn_outputs[4]); - return fused_bn_outputs[0]; + for (const auto &output : bn_outputs) { + MS_EXCEPTION_IF_NULL(output); + auto tuple_getitem_cnode = output->cast(); + MS_EXCEPTION_IF_NULL(tuple_getitem_cnode); + AnfNodePtr index_node = tuple_getitem_cnode->input(kInputNodeOutputIndexInTupleGetItem); + MS_EXCEPTION_IF_NULL(index_node); + auto value_node = index_node->cast(); + MS_EXCEPTION_IF_NULL(value_node); + int index = GetValue(value_node->value()); + if (index == kReplaceOutputIndex0 || index == kReplaceOutputIndex1) { + (void)manager->Replace(output, bn_training_update_outputs[index]); + } + } + return bn_training_update_outputs[0]; } } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h index db25e4f9f5..e6bf1dda55 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h @@ -19,6 +19,7 @@ #include #include #include "pre_activate/common/optimizer.h" +#include "utils/utils.h" namespace mindspore { namespace opt { @@ -26,29 +27,37 @@ class FusedBatchNormFusion : public PatternProcessPass { public: explicit FusedBatchNormFusion(bool multigraph = true) : PatternProcessPass("fused_batch_norm_fusion", multigraph), - data_input_var0_(std::make_shared()), - data_input_var1_(std::make_shared()), - data_input_var2_(std::make_shared()), - variable_input_var0_(std::make_shared()), - variable_input_var1_(std::make_shared()), - constant_input_var0_(std::make_shared()), - constant_input_var1_(std::make_shared()) {} + data_input0_var_(std::make_shared()), + data_input1_var_(std::make_shared()), + data_input2_var_(std::make_shared()), + variable_input0_var_(std::make_shared()), + variable_input1_var_(std::make_shared()), + constant_input0_var_(std::make_shared()), + constant_input1_var_(std::make_shared()), + batch_norm_var_(std::make_shared(std::make_shared(prim::kPrimBatchNorm->name()))) {} ~FusedBatchNormFusion() override = default; const BaseRef DefinePattern() const override; const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; private: - abstract::AbstractTuplePtr CreateAbstractOfFusedBatchNorm(const EquivPtr &equiv, const AnfNodePtr &bn) const; - + AnfNodePtr CreateBNTrainingReduce(const FuncGraphPtr &func_graph, const AnfNodePtr &node, + const EquivPtr &equiv) const; + void GetBNTrainingUpdateInputs(const EquivPtr &equiv, const std::vector &bn_training_reduce_outputs, + std::vector *bn_training_update_inputs) const; + void GetBNTrainingUpdateAbstractList(const EquivPtr &equiv, const AnfNodePtr &bn, + std::vector *abstract_list) const; + AnfNodePtr CreateBNTrainingUpdate(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const EquivPtr &equiv, + const std::vector &bn_training_reduce_outputs) const; ValuePtr GetFactor(const EquivPtr &equiv) const; - VarPtr data_input_var0_; - VarPtr data_input_var1_; - VarPtr data_input_var2_; - VarPtr variable_input_var0_; - VarPtr variable_input_var1_; - VarPtr constant_input_var0_; - VarPtr constant_input_var1_; + VarPtr data_input0_var_; + VarPtr data_input1_var_; + VarPtr data_input2_var_; + VarPtr variable_input0_var_; + VarPtr variable_input1_var_; + VarPtr constant_input0_var_; + VarPtr constant_input1_var_; + VarPtr batch_norm_var_; }; } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.cc new file mode 100644 index 0000000000..857670a384 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.cc @@ -0,0 +1,71 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "pre_activate/ascend/ir_fusion/refresh_parameter_format.h" +#include "session/anf_runtime_algorithm.h" +#include "utils/utils.h" +#include "operator/ops.h" +#include "device/kernel_info.h" +#include "pre_activate/common/helper.h" +#include "pre_activate/common/optimizer.h" +#include "pre_activate/ascend/ascend_helper.h" + +namespace mindspore { +namespace opt { +void DoRefresh(const CNodePtr &cnode) { + if (cnode == nullptr) { + MS_LOG(EXCEPTION) << "node is nullptr"; + } + for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(cnode); input_index++) { + auto input_kernel_node = AnfAlgo::GetInputNode(cnode, input_index); + if (input_kernel_node->isa()) { + std::shared_ptr builder = + std::make_shared(); + auto cnode_input_format = AnfAlgo::GetInputFormat(cnode, input_index); + auto kernel_node_format = AnfAlgo::GetOutputFormat(input_kernel_node, 0); + auto dtype = AnfAlgo::GetOutputDeviceDataType(input_kernel_node, 0); + if (kernel_node_format != cnode_input_format) { + builder->SetOutputsFormat({cnode_input_format}); + builder->SetOutputsDeviceType({dtype}); + AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), input_kernel_node.get()); + } + } + } +} + +bool RefreshParameterFormat::Run(const FuncGraphPtr &func_graph) { + if (func_graph == nullptr) { + MS_LOG(ERROR) << "func_graph is nullptr."; + return false; + } + std::vector node_list = TopoSort(func_graph->get_return()); + for (auto node : node_list) { + if (node == nullptr || !node->isa()) { + continue; + } + auto cnode = node->cast(); + if (cnode == nullptr) { + continue; + } + auto node_name = AnfAlgo::GetCNodeName(cnode); + if (node_name == kBNTrainingUpdateOpName) { + DoRefresh(cnode); + } + } + return true; +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.h b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.h new file mode 100644 index 0000000000..0ba688b134 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.h @@ -0,0 +1,40 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_REFRESH_PARAMETER_FORMAT_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_REFRESH_PARAMETER_FORMAT_H_ + +#include +#include +#include +#include "ir/anf.h" +#include "pre_activate/common/pass.h" + +namespace mindspore { +namespace opt { +class RefreshParameterFormat : public Pass { + public: + explicit RefreshParameterFormat(size_t groups = 1) : Pass("refresh_parameter_format"), groups_(groups) {} + ~RefreshParameterFormat() override = default; + bool Run(const FuncGraphPtr &graph) override; + + private: + size_t groups_ = 1; +}; +} // namespace opt +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_REFRESH_PARAMETER_FORMAT_H_ diff --git a/mindspore/ccsrc/pre_activate/common/common_backend_optimization.cc b/mindspore/ccsrc/pre_activate/common/common_backend_optimization.cc index f622f2f06f..0383311122 100644 --- a/mindspore/ccsrc/pre_activate/common/common_backend_optimization.cc +++ b/mindspore/ccsrc/pre_activate/common/common_backend_optimization.cc @@ -27,6 +27,7 @@ namespace mindspore { namespace opt { void BackendCommonOptimization(const std::shared_ptr &kernel_graph) { + MS_LOG(INFO) << "start common opt graph:" << kernel_graph->graph_id(); auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); bool save_graphs = context_ptr->save_graphs_flag(); diff --git a/mindspore/ccsrc/pre_activate/common/helper.cc b/mindspore/ccsrc/pre_activate/common/helper.cc index 15ca1ca838..6233488089 100644 --- a/mindspore/ccsrc/pre_activate/common/helper.cc +++ b/mindspore/ccsrc/pre_activate/common/helper.cc @@ -299,6 +299,10 @@ tensor::TensorPtr CreateTensorWithValueTuple(const ValueTuplePtr &value_tuple_pt tensor::TensorPtr CreateTupleTensor(const ValueTuplePtr &value_tuple) { MS_EXCEPTION_IF_NULL(value_tuple); tensor::TensorPtr tensor = nullptr; + if (value_tuple->value().empty()) { + MS_LOG(WARNING) << "The value tuple is empty."; + return nullptr; + } ValuePtr v = *(value_tuple->value().begin()); MS_EXCEPTION_IF_NULL(v); // Currently we only deal with the scalar tuple @@ -440,5 +444,47 @@ AnfNodePtr CreatTupleGetItemNode(const FuncGraphPtr &func_graph, const AnfNodePt AnfAlgo::SetOutputInferTypeAndShape({origin_type}, {origin_shape}, tuple_getitem.get()); return tuple_getitem; } + +void ConstInputToAttr(const CNodePtr &cnode, const std::unordered_set &input_attrs) { + MS_EXCEPTION_IF_NULL(cnode); + std::vector new_inputs; + std::vector new_input_names; + auto primitive = AnfAlgo::GetCNodePrimitive(cnode); + MS_EXCEPTION_IF_NULL(primitive); + auto input_names = primitive->GetAttr(kAttrInputNames); + if (input_names == nullptr) { + MS_LOG(DEBUG) << "input_names are nullptr in cnode[" + cnode->DebugString() + "]"; + return; + } + auto input_names_vec = GetValue>(input_names); + auto inputs = cnode->inputs(); + new_inputs.push_back(inputs[0]); + bool need_update = false; + for (size_t i = 0; i < inputs.size() - 1; ++i) { + auto input_node = inputs[i + 1]; + MS_EXCEPTION_IF_NULL(input_node); + if (input_attrs.find(i) != input_attrs.end() && input_node->isa()) { + auto value_node = input_node->cast(); + MS_EXCEPTION_IF_NULL(value_node); + MS_LOG(DEBUG) << "start erase input[" << i << "] of cnode[" + cnode->DebugString() + "]"; + if (i >= input_names_vec.size()) { + MS_LOG(EXCEPTION) << "index " << i << " is larger than input names size [" << input_names_vec.size() << "]"; + } + primitive->set_attr(input_names_vec[i], value_node->value()); + need_update = true; + } else { + new_inputs.push_back(input_node); + if (i < input_names_vec.size()) { + new_input_names.push_back(input_names_vec[i]); + } + } + } + if (need_update) { + // Update cnode's inputs + cnode->set_inputs(new_inputs); + // Update cnode's input_names attr + primitive->set_attr(kAttrInputNames, MakeValue(new_input_names)); + } +} } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/common/helper.h b/mindspore/ccsrc/pre_activate/common/helper.h index 04a4dd6c81..9ef57d8e7c 100644 --- a/mindspore/ccsrc/pre_activate/common/helper.h +++ b/mindspore/ccsrc/pre_activate/common/helper.h @@ -19,6 +19,7 @@ #include #include #include +#include #include "ir/func_graph.h" #include "session/kernel_graph.h" #include "common/utils.h" @@ -86,6 +87,7 @@ constexpr size_t kAdamApplyOneOutputNum = 3; constexpr size_t kBackendTransDataInputNum = 2; constexpr size_t kApplyMomentumInputNum = 6; constexpr size_t kBiasAddInputNum = 3; +constexpr size_t kTopkInputNum = 3; enum FusedBatchNormInput { kX = 1, @@ -150,6 +152,8 @@ void RemoveNopNode(session::KernelGraph *const graph); AnfNodePtr CreatTupleGetItemNode(const FuncGraphPtr &func_graph, const AnfNodePtr &node, size_t output_idx); bool IsUsedByOthers(const FuncGraphPtr &graph, const AnfNodePtr &node); + +void ConstInputToAttr(const CNodePtr &cnode, const std::unordered_set &input_attrs); } // namespace opt } // namespace mindspore #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_COMMON_HELPER_H_ diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.cc b/mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.cc index b7280f52ae..a2dfce2241 100644 --- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.cc +++ b/mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.cc @@ -38,9 +38,12 @@ DeviceMemPtr DynamicMemPoolBestFit::AllocTensorMem(size_t size) { std::vector DynamicMemPoolBestFit::AllocContinuousTensorMem(size_t total_size, std::vector size_list) { + std::vector device_addr_list; // Pre-alloc the one whole piece memory. auto device_addr = AllocTensorMem(total_size); - MS_EXCEPTION_IF_NULL(device_addr); + if (!device_addr) { + return device_addr_list; + } // Remove the pre-alloc memory. auto mem_block = FindMemBlock(device_addr); MS_EXCEPTION_IF_NULL(mem_block); @@ -54,7 +57,6 @@ std::vector DynamicMemPoolBestFit::AllocContinuousTensorMem(size_t (void)mem_block->block_all_mem_buf_map_.erase(iter); // Split the pre-alloc memory into continuous memory by the size list. DynamicMemBufPtr continuous_mem_buf; - std::vector device_addr_list; auto buf_addr = device_addr; for (size_t i = 0; i < size_list.size(); i++) { continuous_mem_buf = std::make_shared(buf_addr, kMemBufUsed, size_list[i]); @@ -102,13 +104,16 @@ DeviceMemPtr DynamicMemPoolBestFit::FindIdleMemBuf(size_t size) { DeviceMemPtr DynamicMemPoolBestFit::AddMemBlockAndMemBuf(size_t size) { size_t alloc_mem_size = CalMemBlockAllocSize(size); - + if (alloc_mem_size == 0) { + return nullptr; + } // Add new memory block DeviceMemPtr device_addr = nullptr; auto real_alloc_size = AllocDeviceMem(alloc_mem_size, &device_addr); if (real_alloc_size < size) { - MS_LOG(EXCEPTION) << "Memory not enough: alloc size[" << real_alloc_size << "] is smaller than required size[" - << size << "]."; + MS_LOG(WARNING) << "Memory not enough: alloc size[" << real_alloc_size << "] is smaller than required size[" << size + << "]."; + return nullptr; } auto mem_block = std::make_shared(device_addr, real_alloc_size); MS_EXCEPTION_IF_NULL(mem_block); @@ -135,10 +140,10 @@ DeviceMemPtr DynamicMemPoolBestFit::AddMemBlockAndMemBuf(size_t size) { size_t DynamicMemPoolBestFit::CalMemBlockAllocSize(size_t size) { auto device_free_mem_size = free_mem_size(); if (device_free_mem_size < size) { - MS_LOG(EXCEPTION) << "Memory not enough: current free memory size[" << device_free_mem_size - << "] is smaller than required size[" << size << "]."; + MS_LOG(WARNING) << "Memory not enough: current free memory size[" << device_free_mem_size + << "] is smaller than required size[" << size << "]."; + return 0; } - auto alloc_mem_size = mem_alloc_unit_size(); // Growing at twice of alloc size while (alloc_mem_size < size) { @@ -156,7 +161,6 @@ void DynamicMemPoolBestFit::DivideMemBuf(size_t size, const DynamicMemBufPtr &me MS_EXCEPTION_IF_NULL(mem_buf); auto mem_block = FindMemBlock(mem_buf->device_addr_); MS_EXCEPTION_IF_NULL(mem_block); - // Divide new memory buf size_t newbuf_size = mem_buf->size_ - size; mem_buf->size_ = size; diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_allocator.cc b/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_allocator.cc index 1cecd170d3..8a3647d980 100644 --- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_allocator.cc +++ b/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_allocator.cc @@ -251,9 +251,10 @@ void BestFitMemReuse::ReleaseNodeUnusedOutput(const KernelDef *kernel_def_ptr) { } size_t BestFitMemReuse::FindIndx(const std::vector &membuf_ptr_list, int fac_idx) const { - size_t membuf_index = 0; + size_t membuf_index = membuf_ptr_list.size(); for (size_t n = 0; n < membuf_ptr_list.size(); ++n) { auto membuf = membuf_ptr_list[n]; + MS_EXCEPTION_IF_NULL(membuf); if (membuf->index_ == fac_idx) { membuf_index = n; break; diff --git a/mindspore/ccsrc/pre_activate/pass/allreduce_fusion.h b/mindspore/ccsrc/pre_activate/pass/allreduce_fusion.h deleted file mode 100644 index e443767e43..0000000000 --- a/mindspore/ccsrc/pre_activate/pass/allreduce_fusion.h +++ /dev/null @@ -1,49 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_ALLREDUCE_FUSION_H_ -#define MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_ALLREDUCE_FUSION_H_ -#include - -#include "pre_activate/common/pass.h" -#include "ir/func_graph.h" -#include "ir/anf.h" - -namespace mindspore { -namespace opt { -struct AllReduceInfo_t { - std::vector allreduce_node; - std::vector input_grad_size; - std::vector input_grad_time; -}; - -class AllReduceFusion : public Pass { - public: - explicit AllReduceFusion(size_t groups = 1) : Pass("all_reduce_fusion"), groups_(groups) {} - ~AllReduceFusion() override = default; - bool Run(const FuncGraphPtr &graph) override; - - private: - bool DoFusion(const FuncGraphPtr &func_graph, const AllReduceInfo_t &allreduce_node_info, size_t segment_num, - const std::vector &segment_index) const; - AnfNodePtr CreateFusedAllReduce(const FuncGraphPtr &func_graph, const AllReduceInfo_t &allreduce_node_info, - size_t start_index, size_t end_index) const; - bool GetSplitSegments(const AllReduceInfo_t &allreduce_node_info, size_t *segment_num, - std::vector *segment_index) const; - size_t groups_ = 1; -}; -} // namespace opt -} // namespace mindspore -#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_ALLREDUCE_FUSION_H_ diff --git a/mindspore/ccsrc/pre_activate/pass/allreduce_fusion.cc b/mindspore/ccsrc/pre_activate/pass/communication_op_fusion.cc similarity index 62% rename from mindspore/ccsrc/pre_activate/pass/allreduce_fusion.cc rename to mindspore/ccsrc/pre_activate/pass/communication_op_fusion.cc index 70a8974eca..4bcd488f69 100644 --- a/mindspore/ccsrc/pre_activate/pass/allreduce_fusion.cc +++ b/mindspore/ccsrc/pre_activate/pass/communication_op_fusion.cc @@ -13,14 +13,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "pre_activate/pass/allreduce_fusion.h" +#include "pre_activate/pass/communication_op_fusion.h" #include -#include #include #include -#include "utils/utils.h" #include "utils/graph_utils.h" #include "operator/ops.h" #include "device/kernel_info.h" @@ -31,9 +29,12 @@ namespace mindspore { namespace opt { namespace { -kernel::KernelBuildInfoPtr GenerateKernelBuildInfo(const AllReduceInfo_t &allreduce_node_info, size_t start_index, +constexpr auto kAttrDefaultGroup = "default_group"; +constexpr auto kAttrDefaultOp = "default_op"; + +kernel::KernelBuildInfoPtr GenerateKernelBuildInfo(const CommunicationOpInfo &communication_op_info, size_t start_index, size_t end_index) { - if (end_index >= allreduce_node_info.allreduce_node.size()) { + if (end_index >= communication_op_info.communication_op_nodes.size()) { MS_LOG(EXCEPTION) << "end index out of vector size"; } std::vector inputs_device_format; @@ -43,7 +44,7 @@ kernel::KernelBuildInfoPtr GenerateKernelBuildInfo(const AllReduceInfo_t &allred std::vector> outputs_shape; kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; for (size_t idx = start_index; idx <= end_index; ++idx) { - auto cnode = allreduce_node_info.allreduce_node[idx]; + auto cnode = communication_op_info.communication_op_nodes[idx]; MS_EXCEPTION_IF_NULL(cnode); for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(cnode); ++input_index) { inputs_device_format.push_back(AnfAlgo::GetInputFormat(cnode, input_index)); @@ -64,14 +65,38 @@ kernel::KernelBuildInfoPtr GenerateKernelBuildInfo(const AllReduceInfo_t &allred builder.SetOutputsDeviceType(outputs_device_type); return builder.Build(); } + +std::string GetFusionGroupKey(const AnfNodePtr &node) { + auto primitive = AnfAlgo::GetCNodePrimitive(node); + MS_EXCEPTION_IF_NULL(primitive); + ValuePtr attr_fusion = primitive->GetAttr(kAttrFusion); + if (attr_fusion == nullptr) { + return ""; + } + int fusion = GetValue(attr_fusion); + if (fusion == 0) { + return ""; + } + std::string group = kAttrDefaultGroup; + ValuePtr attr_group = primitive->GetAttr(kAttrGroup); + if (attr_group != nullptr) { + group = GetValue(attr_group); + } + std::string op = kAttrDefaultOp; + ValuePtr attr_op = primitive->GetAttr(kAttrOp); + if (attr_op != nullptr) { + op = GetValue(attr_op); + } + return group + op + std::to_string(fusion); +} } // namespace -bool AllReduceFusion::GetSplitSegments(const AllReduceInfo_t &allreduce_node_info, size_t *segment_num, - std::vector *segment_index) const { +bool CommunicationOpFusion::GetSplitSegments(const CommunicationOpInfo &communication_op_info, size_t *segment_num, + std::vector *segment_index) const { MS_EXCEPTION_IF_NULL(segment_num); MS_EXCEPTION_IF_NULL(segment_index); - size_t allreduce_node_size = allreduce_node_info.allreduce_node.size(); - MS_LOG(INFO) << "graph all reduce node size " << allreduce_node_size; + size_t communication_op_node_size = communication_op_info.communication_op_nodes.size(); + MS_LOG(INFO) << "graph " << op_name_ << " node size " << communication_op_node_size; auto parallel_context = parallel::ParallelContext::GetInstance(); MS_EXCEPTION_IF_NULL(parallel_context); @@ -82,30 +107,31 @@ bool AllReduceFusion::GetSplitSegments(const AllReduceInfo_t &allreduce_node_inf uint32_t last_index = 0; for (size_t i = 0; i < split_indices.size(); ++i) { uint32_t index = split_indices[i]; - if (index <= last_index || index >= allreduce_node_size) { - MS_LOG(EXCEPTION) << "invalid allreduce split index " << i << " " << index; + if (index <= last_index || index >= communication_op_node_size) { + MS_LOG(EXCEPTION) << "invalid " << op_name_ << " split index " << i << " " << index; } segment_index->push_back(index); last_index = index; segments++; } - if (last_index != allreduce_node_size - 1) { - segment_index->push_back(allreduce_node_size - 1); + if (last_index != communication_op_node_size - 1) { + segment_index->push_back(communication_op_node_size - 1); segments++; } } else { segments = groups_; for (size_t i = 0; i < segments - 1; ++i) { - segment_index->push_back((i + 1) * (allreduce_node_size / segments) - 1); + segment_index->push_back((i + 1) * (communication_op_node_size / segments) - 1); } - segment_index->push_back(allreduce_node_size - 1); + segment_index->push_back(communication_op_node_size - 1); } - if (segments >= allreduce_node_size) { - MS_LOG(INFO) << "fusion not changed: segment_num=" << segments << ", allreduce_node_size=" << allreduce_node_size; + if (segments >= communication_op_node_size) { + MS_LOG(INFO) << "fusion not changed: segment_num=" << segments + << ", communication_op_node_size=" << communication_op_node_size; return false; } - if (segment_index->at(segments - 1) != allreduce_node_size - 1) { + if (segment_index->at(segments - 1) != communication_op_node_size - 1) { MS_LOG(EXCEPTION) << "the last segment index is invalid."; } for (size_t i = 0; i < segments - 1; ++i) { @@ -118,19 +144,19 @@ bool AllReduceFusion::GetSplitSegments(const AllReduceInfo_t &allreduce_node_inf return true; } -AnfNodePtr AllReduceFusion::CreateFusedAllReduce(const FuncGraphPtr &func_graph, - const AllReduceInfo_t &allreduce_node_info, size_t start_index, - size_t end_index) const { +AnfNodePtr CommunicationOpFusion::CreateFusedCommunicationOp(const FuncGraphPtr &func_graph, + const CommunicationOpInfo &communication_op_info, + size_t start_index, size_t end_index) const { MS_EXCEPTION_IF_NULL(func_graph); - auto prim = std::make_shared(kAllReduceOpName); + auto prim = std::make_shared(op_name_); MS_EXCEPTION_IF_NULL(prim); std::vector fusion_inputs = {NewValueNode(prim)}; // get all inputs of current segment - if (end_index >= allreduce_node_info.allreduce_node.size()) { + if (end_index >= communication_op_info.communication_op_nodes.size()) { MS_LOG(EXCEPTION) << "end index out of vector size"; } for (size_t idx = start_index; idx <= end_index; ++idx) { - auto cnode = allreduce_node_info.allreduce_node[idx]; + auto cnode = communication_op_info.communication_op_nodes[idx]; MS_EXCEPTION_IF_NULL(cnode); fusion_inputs.insert(fusion_inputs.end(), cnode->inputs().begin() + 1, cnode->inputs().end()); } @@ -141,14 +167,14 @@ AnfNodePtr AllReduceFusion::CreateFusedAllReduce(const FuncGraphPtr &func_graph, fused_node->set_kernel_info(kernel_info); AbstractBasePtrList abstract_list; for (size_t idx = start_index; idx <= end_index; ++idx) { - auto cnode = allreduce_node_info.allreduce_node[idx]; + auto cnode = communication_op_info.communication_op_nodes[idx]; MS_EXCEPTION_IF_NULL(cnode); AnfAlgo::CopyNodeAttr("fusion", cnode, fused_node); AnfAlgo::CopyNodeAttr("op", cnode, fused_node); AnfAlgo::CopyNodeAttr("group", cnode, fused_node); abstract_list.push_back(cnode->abstract()); } - auto kernel_build_info = GenerateKernelBuildInfo(allreduce_node_info, start_index, end_index); + auto kernel_build_info = GenerateKernelBuildInfo(communication_op_info, start_index, end_index); AnfAlgo::SetSelectKernelBuildInfo(kernel_build_info, fused_node.get()); auto abstract_tuple = std::make_shared(abstract_list); MS_EXCEPTION_IF_NULL(abstract_tuple); @@ -156,8 +182,8 @@ AnfNodePtr AllReduceFusion::CreateFusedAllReduce(const FuncGraphPtr &func_graph, return fused_node; } -bool AllReduceFusion::DoFusion(const FuncGraphPtr &func_graph, const AllReduceInfo_t &allreduce_node_info, - size_t segment_num, const std::vector &segment_index) const { +bool CommunicationOpFusion::DoFusion(const FuncGraphPtr &func_graph, const CommunicationOpInfo &communication_op_info, + size_t segment_num, const std::vector &segment_index) const { MS_EXCEPTION_IF_NULL(func_graph); auto manager = func_graph->manager(); MS_EXCEPTION_IF_NULL(manager); @@ -169,12 +195,13 @@ bool AllReduceFusion::DoFusion(const FuncGraphPtr &func_graph, const AllReduceIn start_index = end_index + 1; continue; } - AnfNodePtr new_allreduce = CreateFusedAllReduce(func_graph, allreduce_node_info, start_index, end_index); - // replace old allreduce with new allreduce + AnfNodePtr new_communication_op = + CreateFusedCommunicationOp(func_graph, communication_op_info, start_index, end_index); + // replace old communication op with new communication op for (auto idx = start_index; idx <= end_index; ++idx) { std::vector tuple_getitem_input; tuple_getitem_input.push_back(NewValueNode(prim::kPrimTupleGetItem)); - tuple_getitem_input.push_back(new_allreduce); + tuple_getitem_input.push_back(new_communication_op); auto index = NewValueNode(SizeToInt(idx - start_index)); MS_EXCEPTION_IF_NULL(index); auto imm = std::make_shared(idx - start_index); @@ -185,10 +212,10 @@ bool AllReduceFusion::DoFusion(const FuncGraphPtr &func_graph, const AllReduceIn tuple_getitem_input.push_back(index); AnfNodePtr tuple_getitem = func_graph->NewCNode(tuple_getitem_input); MS_EXCEPTION_IF_NULL(tuple_getitem); - auto allreduce_node_item = allreduce_node_info.allreduce_node.at(idx); - MS_EXCEPTION_IF_NULL(allreduce_node_item); - tuple_getitem->set_abstract(allreduce_node_item->abstract()); - if (!manager->Replace(allreduce_node_item, tuple_getitem)) { + auto communication_op_node_item = communication_op_info.communication_op_nodes.at(idx); + MS_EXCEPTION_IF_NULL(communication_op_node_item); + tuple_getitem->set_abstract(communication_op_node_item->abstract()); + if (!manager->Replace(communication_op_node_item, tuple_getitem)) { MS_LOG(EXCEPTION) << "manager replace node failed"; } } @@ -198,29 +225,24 @@ bool AllReduceFusion::DoFusion(const FuncGraphPtr &func_graph, const AllReduceIn return changed; } -bool AllReduceFusion::Run(const FuncGraphPtr &func_graph) { +bool CommunicationOpFusion::Run(const FuncGraphPtr &func_graph) { MS_EXCEPTION_IF_NULL(func_graph); const float input_grad_size_num = 0.0; const float input_grad_time_num = 0.0; // divide candidate fusion groups with same (group,op,fusion) attrs, fusion==0 means not fusion - std::unordered_map candidate_groups; + std::unordered_map candidate_groups; std::vector node_list = TopoSort(func_graph->get_return()); for (auto &node : node_list) { - if (node != nullptr && node->isa() && AnfAlgo::GetCNodeName(node) == kAllReduceOpName) { - auto primitive = AnfAlgo::GetCNodePrimitive(node); - MS_EXCEPTION_IF_NULL(primitive); - int fusion = GetValue(primitive->GetAttr("fusion")); - if (fusion == 0) { + if (node != nullptr && node->isa() && AnfAlgo::GetCNodeName(node) == op_name_) { + std::string key = GetFusionGroupKey(node); + if (key.empty()) { continue; } - std::string group = GetValue(primitive->GetAttr("group")); - std::string op = GetValue(primitive->GetAttr("op")); - std::string key = group + op + std::to_string(fusion); if (candidate_groups.find(key) == candidate_groups.end()) { - AllReduceInfo_t allreduce_node_info; - candidate_groups[key] = allreduce_node_info; + CommunicationOpInfo communication_op_info; + candidate_groups[key] = communication_op_info; } - candidate_groups[key].allreduce_node.push_back(node->cast()); + candidate_groups[key].communication_op_nodes.push_back(node->cast()); candidate_groups[key].input_grad_size.push_back(input_grad_size_num); candidate_groups[key].input_grad_time.push_back(input_grad_time_num); } @@ -228,7 +250,7 @@ bool AllReduceFusion::Run(const FuncGraphPtr &func_graph) { // split candidate group to segments according to _group class member bool changed = false; for (auto &it : candidate_groups) { - if (it.second.allreduce_node.size() <= 1) { + if (it.second.communication_op_nodes.size() <= 1) { continue; } size_t segment_num = 0; diff --git a/mindspore/ccsrc/pre_activate/pass/communication_op_fusion.h b/mindspore/ccsrc/pre_activate/pass/communication_op_fusion.h new file mode 100644 index 0000000000..af8b557d5f --- /dev/null +++ b/mindspore/ccsrc/pre_activate/pass/communication_op_fusion.h @@ -0,0 +1,67 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_COMMUNICATION_OP_FUSION_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_COMMUNICATION_OP_FUSION_H_ +#include +#include +#include + +#include "pre_activate/common/pass.h" +#include "ir/func_graph.h" +#include "ir/anf.h" +#include "utils/utils.h" + +namespace mindspore { +namespace opt { +struct CommunicationOpInfo { + std::vector communication_op_nodes; + std::vector input_grad_size; + std::vector input_grad_time; +}; + +class CommunicationOpFusion : public Pass { + public: + explicit CommunicationOpFusion(const std::string &name, std::string op_name, size_t groups = 1) + : Pass(name), op_name_(std::move(op_name)), groups_(groups) {} + ~CommunicationOpFusion() override = default; + bool Run(const FuncGraphPtr &graph) override; + + private: + bool DoFusion(const FuncGraphPtr &func_graph, const CommunicationOpInfo &communication_op_info, size_t segment_num, + const std::vector &segment_index) const; + AnfNodePtr CreateFusedCommunicationOp(const FuncGraphPtr &func_graph, + const CommunicationOpInfo &communication_op_info, size_t start_index, + size_t end_index) const; + bool GetSplitSegments(const CommunicationOpInfo &communication_op_info, size_t *segment_num, + std::vector *segment_index) const; + std::string op_name_; + size_t groups_ = 1; +}; + +class AllReduceFusion : public CommunicationOpFusion { + public: + explicit AllReduceFusion(size_t groups = 1) : CommunicationOpFusion("all_reduce_fusion", kAllReduceOpName, groups) {} + ~AllReduceFusion() override = default; +}; + +class AllGatherFusion : public CommunicationOpFusion { + public: + explicit AllGatherFusion(size_t groups = 1) : CommunicationOpFusion("all_gather_fusion", kAllGatherOpName, groups) {} + ~AllGatherFusion() override = default; +}; +} // namespace opt +} // namespace mindspore +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_COMMUNICATION_OP_FUSION_H_ diff --git a/mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.cc b/mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.cc index fb47c9fc2a..3153a3bef9 100644 --- a/mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.cc +++ b/mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.cc @@ -25,6 +25,7 @@ namespace mindspore { namespace opt { ConstInputToAttrInfoRegistry::ConstInputToAttrInfoRegistry() { Register(prim::kPrimCast->name(), {1}); + Register(prim::kPrimAvgPoolGrad->name(), {0}); Register(prim::kPrimConv2DBackpropInput->name(), {2}); Register(prim::kPrimConv2DBackpropFilter->name(), {2}); Register(prim::kPrimDepthwiseConv2dNativeBackpropFilter->name(), {1}); @@ -52,7 +53,6 @@ ConstInputToAttrInfoRegistry::ConstInputToAttrInfoRegistry() { Register(kFlattenGradOpName, {1}); Register(kExpandDimsOpName, {1}); Register(kSplitOpName, {0}); - Register(kTopKOpName, {1}); Register(kErfOpName, {1}); Register(kSparseApplyAdagradOpName, {2}); Register(kResizeNearestNeighborGrad, {1}); diff --git a/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_attr.cc b/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_attr.cc index 15d62a164f..1f9e2712a6 100644 --- a/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_attr.cc +++ b/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_attr.cc @@ -18,10 +18,10 @@ #include #include #include -#include #include #include "pre_activate/pass/const_input_to_attr_registry.h" +#include "pre_activate/common/helper.h" #include "utils/utils.h" #include "utils/context/ms_context.h" #include "operator/ops.h" @@ -29,50 +29,6 @@ namespace mindspore { namespace opt { -namespace { -void ConstInputToAttr(const CNodePtr &cnode, const std::unordered_set &input_attrs) { - MS_EXCEPTION_IF_NULL(cnode); - std::vector new_inputs; - std::vector new_input_names; - auto primitive = AnfAlgo::GetCNodePrimitive(cnode); - MS_EXCEPTION_IF_NULL(primitive); - auto input_names = primitive->GetAttr(kAttrInputNames); - if (input_names == nullptr) { - MS_LOG(DEBUG) << "input_names are nullptr in cnode[" + cnode->DebugString() + "]"; - return; - } - auto input_names_vec = GetValue>(input_names); - auto inputs = cnode->inputs(); - new_inputs.push_back(inputs[0]); - bool need_update = false; - for (size_t i = 0; i < inputs.size() - 1; ++i) { - auto input_node = inputs[i + 1]; - MS_EXCEPTION_IF_NULL(input_node); - if (input_attrs.find(i) != input_attrs.end() && input_node->isa()) { - auto value_node = input_node->cast(); - MS_EXCEPTION_IF_NULL(value_node); - MS_LOG(DEBUG) << "start erase input[" << i << "] of cnode[" + cnode->DebugString() + "]"; - if (i >= input_names_vec.size()) { - MS_LOG(EXCEPTION) << "index " << i << " is larger than input names size [" << input_names_vec.size() << "]"; - } - primitive->set_attr(input_names_vec[i], value_node->value()); - need_update = true; - } else { - new_inputs.push_back(input_node); - if (i < input_names_vec.size()) { - new_input_names.push_back(input_names_vec[i]); - } - } - } - if (need_update) { - // Update cnode's inputs - cnode->set_inputs(new_inputs); - // Update cnode's input_names attr - primitive->set_attr(kAttrInputNames, MakeValue(new_input_names)); - } -} -} // namespace - const AnfNodePtr ConvertConstInputToAttr::Process(const FuncGraphPtr &, const AnfNodePtr &node, const EquivPtr &) const { if (node == nullptr || !AnfAlgo::IsRealCNodeKernel(node)) { diff --git a/mindspore/ccsrc/predict/CMakeLists.txt b/mindspore/ccsrc/predict/CMakeLists.txt index d88cf5cd83..a8cca431e7 100644 --- a/mindspore/ccsrc/predict/CMakeLists.txt +++ b/mindspore/ccsrc/predict/CMakeLists.txt @@ -1,8 +1,14 @@ -file(GLOB_RECURSE _PRE_ACTIVE_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "ascend/*.cc" - "common/*.cc" - "pass/*.cc" - "gpu/*.cc" - ) +file(GLOB_RECURSE _PREDICT_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} + "predict.cc" + "generator/utils/ir_model_util.cc" + "converter/*.cc" + "converter/attr_utils/*.cc" + "converter/lite_model/*.cc" + "converter/lite_model/operations/*.cc" +) -add_library(_mindspore_pre_active_obj OBJECT ${_PRE_ACTIVE_ALL_SRC_FILES}) \ No newline at end of file +if (ENABLE_D) + file(GLOB_RECURSE _D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "generator/ir/*.cc") + list(APPEND _PREDICT_SRC_LIST ${_D_SRC_LIST}) +endif () +add_library(_mindspore_predict_obj OBJECT ${_PREDICT_SRC_LIST}) \ No newline at end of file diff --git a/mindspore/ccsrc/predict/generator/ir/ir_task_info.h b/mindspore/ccsrc/predict/generator/ir/ir_task_info.h index 8e80cdddbe..4b3ac85ea6 100644 --- a/mindspore/ccsrc/predict/generator/ir/ir_task_info.h +++ b/mindspore/ccsrc/predict/generator/ir/ir_task_info.h @@ -21,7 +21,7 @@ #include #include #include -#include "predict/proto/ge_runtime_taskinfo.pb.h" +#include "proto/ge_runtime_taskinfo.pb.h" namespace mindspore { namespace generator { diff --git a/mindspore/ccsrc/pybind_api/CMakeLists.txt b/mindspore/ccsrc/pybind_api/CMakeLists.txt index adcb5ddda1..d04d173f60 100644 --- a/mindspore/ccsrc/pybind_api/CMakeLists.txt +++ b/mindspore/ccsrc/pybind_api/CMakeLists.txt @@ -1,5 +1,2 @@ -file(GLOB_RECURSE _PYNATIVE_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "*.cc" - ) - -add_library(_mindspore_pynative_obj OBJECT ${_PYNATIVE_ALL_SRC_FILES}) \ No newline at end of file +file(GLOB_RECURSE _PYBIND_API_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") +add_library(_mindspore_pybind_api_obj OBJECT ${_PYBIND_API_SRC_LIST}) \ No newline at end of file diff --git a/mindspore/ccsrc/pynative/CMakeLists.txt b/mindspore/ccsrc/pynative/CMakeLists.txt index adcb5ddda1..9536986147 100644 --- a/mindspore/ccsrc/pynative/CMakeLists.txt +++ b/mindspore/ccsrc/pynative/CMakeLists.txt @@ -1,5 +1,8 @@ -file(GLOB_RECURSE _PYNATIVE_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "*.cc" - ) - -add_library(_mindspore_pynative_obj OBJECT ${_PYNATIVE_ALL_SRC_FILES}) \ No newline at end of file +file(GLOB_RECURSE _PYNATIVE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "base.cc" "pynative_execute.cc") + +if (ENABLE_GE) + file(GLOB_RECURSE _GE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "pynative_execute_ge.cc") + list(APPEND _PYNATIVE_SRC_LIST ${_GE_SRC_LIST}) +endif () + +add_library(_mindspore_pynative_obj OBJECT ${_PYNATIVE_SRC_LIST}) diff --git a/mindspore/ccsrc/pynative/base.h b/mindspore/ccsrc/pynative/base.h index 37ff000b04..fc143da3c1 100644 --- a/mindspore/ccsrc/pynative/base.h +++ b/mindspore/ccsrc/pynative/base.h @@ -31,7 +31,6 @@ namespace mindspore { namespace pynative { - namespace py = pybind11; enum PynativeStatusCode { @@ -61,7 +60,6 @@ using OpExecInfoPtr = std::shared_ptr; OpExecInfoPtr GenerateOpExecInfo(const py::args &args); const std::set ignore_infer_prim = {"partial", "make_ref"}; - } // namespace pynative } // namespace mindspore diff --git a/mindspore/ccsrc/pynative/pynative_execute.cc b/mindspore/ccsrc/pynative/pynative_execute.cc index 0d18dfb577..8d3fe4fbb7 100644 --- a/mindspore/ccsrc/pynative/pynative_execute.cc +++ b/mindspore/ccsrc/pynative/pynative_execute.cc @@ -30,7 +30,8 @@ #include "pipeline/parse/data_converter.h" #include "pipeline/static_analysis/prim.h" #include "session/session_factory.h" - +#include "pre_activate/pass/const_input_to_attr_registry.h" +#include "pre_activate/common/helper.h" #include "pynative/base.h" #ifdef ENABLE_GE @@ -39,10 +40,11 @@ const char SINGLE_OP_GRAPH[] = "single_op_graph"; // primitive unable to infer value for constant input in PyNative mode -const std::unordered_set vm_operators = {"partial", "depend", "make_ref"}; +const std::set vm_operators = {"partial", "depend", "make_ref", "zeros_like_tensor"}; namespace mindspore { namespace pynative { +static std::shared_ptr session = nullptr; inline ValuePtr PyAttrValue(const py::object &obj) { ValuePtr converted_ret = nullptr; bool converted = parse::ConvertData(obj, &converted_ret); @@ -188,6 +190,117 @@ py::object RunOpInVM(const OpExecInfoPtr &op_exec_info, PynativeStatusCode *stat return std::move(result); } +bool RunOpConvertConstInputToAttr(const py::object &input_object, size_t input_index, const PrimitivePtr &op_prim, + const std::unordered_set &input_attrs) { + MS_EXCEPTION_IF_NULL(op_prim); + auto input_names_value = op_prim->GetAttr(kAttrInputNames); + if (input_names_value == nullptr) { + return false; + } + auto input_names_vec = GetValue>(input_names_value); + if (input_index >= input_names_vec.size()) { + MS_LOG(EXCEPTION) << "The input index: " << input_index << " is large than the input names vector size!"; + } + + if (input_attrs.find(input_index) != input_attrs.end()) { + ValuePtr value = parse::data_converter::PyDataToValue(input_object); + MS_EXCEPTION_IF_NULL(value); + auto input_name = input_names_vec[input_index]; + op_prim->set_attr(input_name, value); + return true; + } + return false; +} + +void PlantTensorTupleToVector(const py::tuple &tuple_inputs, const PrimitivePtr &op_prim, + std::vector *input_tensor) { + MS_EXCEPTION_IF_NULL(op_prim); + MS_EXCEPTION_IF_NULL(input_tensor); + for (const auto &input_object : tuple_inputs) { + if (!py::isinstance(input_object)) { + MS_LOG(EXCEPTION) << "The input object is not a tensor!"; + } + auto tensor = py::cast(input_object); + MS_EXCEPTION_IF_NULL(tensor); + input_tensor->push_back(tensor); + } + op_prim->set_attr(kAttrDynInputSizes, MakeValue(std::vector{SizeToInt(tuple_inputs.size())})); +} + +void ConvertValueTupleToTensor(const py::object &input_object, std::vector *input_tensor) { + MS_EXCEPTION_IF_NULL(input_tensor); + ValuePtr input_value = parse::data_converter::PyDataToValue(input_object); + MS_EXCEPTION_IF_NULL(input_value); + if (!input_value->isa()) { + MS_LOG(EXCEPTION) << "The input object is not a value tuple!"; + } + auto value_tuple = input_value->cast(); + MS_EXCEPTION_IF_NULL(value_tuple); + tensor::TensorPtr tensor_ptr = opt::CreateTupleTensor(value_tuple); + MS_EXCEPTION_IF_NULL(tensor_ptr); + input_tensor->push_back(tensor_ptr); +} + +void ConvertPyObjectToTensor(const py::object &input_object, const PrimitivePtr &op_prim, + std::vector *input_tensor) { + MS_EXCEPTION_IF_NULL(op_prim); + MS_EXCEPTION_IF_NULL(input_tensor); + tensor::TensorPtr tensor_ptr = nullptr; + if (py::isinstance(input_object)) { + tensor_ptr = py::cast(input_object); + } else if (py::isinstance(input_object)) { + tensor_ptr = std::make_shared(py::cast(input_object), kFloat32); + } else if (py::isinstance(input_object)) { + tensor_ptr = std::make_shared(py::cast(input_object), nullptr); + } else if (py::isinstance(input_object)) { + tensor_ptr = std::make_shared(py::cast(input_object), nullptr); + } else if (py::isinstance(input_object)) { + tensor_ptr = std::make_shared(py::cast(input_object), nullptr); + } else if (py::isinstance(input_object)) { + auto tuple_inputs = py::cast(input_object); + if (py::isinstance(tuple_inputs[0])) { + PlantTensorTupleToVector(tuple_inputs, op_prim, input_tensor); + } else { + ConvertValueTupleToTensor(input_object, input_tensor); + } + return; + } else { + MS_LOG(EXCEPTION) << "Run op inputs type is invalid!"; + } + MS_EXCEPTION_IF_NULL(tensor_ptr); + input_tensor->push_back(tensor_ptr); +} + +void ConstructInputTensor(const OpExecInfoPtr &op_run_info, std::vector *tensors_mask, + std::vector *input_tensors) { + MS_EXCEPTION_IF_NULL(tensors_mask); + MS_EXCEPTION_IF_NULL(input_tensors); + PrimitivePtr op_prim = op_run_info->py_primitive; + MS_EXCEPTION_IF_NULL(op_prim); + + if (op_run_info->op_inputs.size() != op_run_info->inputs_mask.size()) { + MS_LOG(EXCEPTION) << "Op input size " << op_run_info->op_inputs.size() << " should be equal to op input mask size " + << op_run_info->inputs_mask.size(); + } + opt::ConstInputToAttrInfoRegister reg; + bool reg_exist = opt::ConstInputToAttrInfoRegistry::Instance().GetRegisterByOpName(op_run_info->op_name, ®); + size_t input_num = op_run_info->op_inputs.size(); + MS_LOG(INFO) << "py input size: " << input_num; + for (size_t index = 0; index < input_num; ++index) { + // convert const input to attr + if (reg_exist && + RunOpConvertConstInputToAttr(op_run_info->op_inputs[index], index, op_prim, reg.GetConstInputAttrInfo())) { + continue; + } + // convert const and tuple input to tensor + ConvertPyObjectToTensor(op_run_info->op_inputs[index], op_prim, input_tensors); + // make tensors, weight : 1, data : 0 + std::vector new_mask(input_tensors->size() - tensors_mask->size(), + py::cast(op_run_info->inputs_mask[index])); + tensors_mask->insert(tensors_mask->end(), new_mask.begin(), new_mask.end()); + } +} + py::object RunOpInMs(const OpExecInfoPtr &op_exec_info, PynativeStatusCode *status) { MS_EXCEPTION_IF_NULL(op_exec_info); MS_LOG(INFO) << "Start run op[" << op_exec_info->op_name << "] with backend policy ms"; @@ -198,13 +311,19 @@ py::object RunOpInMs(const OpExecInfoPtr &op_exec_info, PynativeStatusCode *stat if (device_target != kAscendDevice && device_target != kGPUDevice) { MS_EXCEPTION(ArgumentError) << "Device target [" << device_target << "] is not supported in Pynative mode"; } - std::shared_ptr session = session::SessionFactory::Get().Create(device_target); + + if (session == nullptr) { + session = session::SessionFactory::Get().Create(device_target); + } + MS_EXCEPTION_IF_NULL(session); session->Init(ms_context->device_id()); std::string graph_info = GetSingleOpGraphInfo(op_exec_info); std::vector input_tensors; - session->BuildOp(*op_exec_info, graph_info, &input_tensors); + std::vector tensors_mask; + ConstructInputTensor(op_exec_info, &tensors_mask, &input_tensors); + session->BuildOp(*op_exec_info, graph_info, input_tensors, tensors_mask); py::tuple result = session->RunOp(*op_exec_info, graph_info, input_tensors); ms_context->set_enable_pynative_infer(false); *status = PYNATIVE_SUCCESS; @@ -293,5 +412,7 @@ py::tuple RunOp(const py::args &args) { MS_LOG(INFO) << "RunOp end"; return result; } + +void ClearPyNativeSession() { session = nullptr; } } // namespace pynative } // namespace mindspore diff --git a/mindspore/ccsrc/pynative/pynative_execute.h b/mindspore/ccsrc/pynative/pynative_execute.h index c64c6b4b25..65be3b2ab2 100644 --- a/mindspore/ccsrc/pynative/pynative_execute.h +++ b/mindspore/ccsrc/pynative/pynative_execute.h @@ -36,6 +36,9 @@ namespace py = pybind11; py::object RunOpInVM(const OpExecInfoPtr &op_exec_info, PynativeStatusCode *status); py::tuple RunOp(const py::args &args); + +void ClearPyNativeSession(); + } // namespace pynative } // namespace mindspore diff --git a/mindspore/ccsrc/pynative/pynative_execute_ge.cc b/mindspore/ccsrc/pynative/pynative_execute_ge.cc index 0bf2a391f9..7357bdd710 100644 --- a/mindspore/ccsrc/pynative/pynative_execute_ge.cc +++ b/mindspore/ccsrc/pynative/pynative_execute_ge.cc @@ -33,7 +33,6 @@ const char SINGLE_OP_GRAPH[] = "single_op_graph"; namespace mindspore { namespace pynative { - using MeTensor = mindspore::tensor::Tensor; using MeTensorPtr = mindspore::tensor::TensorPtr; using GeOperator = ge::Operator; @@ -307,5 +306,4 @@ py::object RunOpInGE(const OpExecInfoPtr &op_exec_info, PynativeStatusCode *stat return std::move(result); } } // namespace pynative - } // namespace mindspore diff --git a/mindspore/ccsrc/session/CMakeLists.txt b/mindspore/ccsrc/session/CMakeLists.txt index 2e685b04f4..56a7327e80 100644 --- a/mindspore/ccsrc/session/CMakeLists.txt +++ b/mindspore/ccsrc/session/CMakeLists.txt @@ -1,22 +1,29 @@ -file(GLOB_RECURSE _SESSION_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "kernel_graph.cc" - "session_basic.cc" - "session_factory.cc" - "anf_runtime_algorithm.cc" +file(GLOB_RECURSE _SESSION_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} + "kernel_graph.cc" + "session_basic.cc" + "session_factory.cc" + "anf_runtime_algorithm.cc" +) + +if (ENABLE_GPU) + file(GLOB_RECURSE _GPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} + "gpu_session.cc" + ) + list(APPEND _SESSION_SRC_LIST ${_GPU_SRC_LIST}) +endif () + +if (ENABLE_CPU) + file(GLOB_RECURSE _CPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} + "cpu_session.cc" ) -#TODO : Not include session_context.cc -add_library(_mindspore_session_obj OBJECT ${_SESSION_ALL_SRC_FILES}) + list(APPEND _SESSION_SRC_LIST ${_CPU_SRC_LIST}) +endif () -if(ENABLE_D) - file(GLOB_RECURSE _D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "ascend_session.cc" - ) - add_library(_mindspore_session_obj OBJECT ${_D_SRC_LIST}) -endif() +if (ENABLE_D) + file(GLOB_RECURSE _D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} + "ascend_session.cc" + ) + list(APPEND _SESSION_SRC_LIST ${_D_SRC_LIST}) +endif () -if(ENABLE_GPU) - file(GLOB_RECURSE _C_EXPRESSION_GPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "gpu_session.cc" - ) - add_library(_c_expression_gpu_session_obj OBJECT ${_C_EXPRESSION_GPU_SRC_LIST}) -endif() \ No newline at end of file +add_library(_mindspore_session_obj OBJECT ${_SESSION_SRC_LIST}) \ No newline at end of file diff --git a/mindspore/ccsrc/session/anf_runtime_algorithm.cc b/mindspore/ccsrc/session/anf_runtime_algorithm.cc index 525ff44dd8..45588052b0 100644 --- a/mindspore/ccsrc/session/anf_runtime_algorithm.cc +++ b/mindspore/ccsrc/session/anf_runtime_algorithm.cc @@ -300,7 +300,12 @@ std::string AnfRuntimeAlgorithm::GetOutputFormat(const AnfNodePtr &node, size_t MS_EXCEPTION_IF_NULL(kernel_info); auto build_info = kernel_info->select_kernel_build_info(); MS_EXCEPTION_IF_NULL(build_info); - return build_info->GetOutputFormat(output_idx); + auto format = build_info->GetOutputFormat(output_idx); + if (format == kernel::KernelBuildInfo::kInvalidFormat) { + MS_LOG(EXCEPTION) << "Node [" << node->DebugString() << "]" + << " has a invalid output format"; + } + return format; } std::string AnfRuntimeAlgorithm::GetInputFormat(const AnfNodePtr &node, size_t input_idx) { @@ -314,7 +319,12 @@ std::string AnfRuntimeAlgorithm::GetInputFormat(const AnfNodePtr &node, size_t i MS_EXCEPTION_IF_NULL(kernel_info); auto build_info = kernel_info->select_kernel_build_info(); MS_EXCEPTION_IF_NULL(build_info); - return build_info->GetInputFormat(input_idx); + auto format = build_info->GetInputFormat(input_idx); + if (format == kernel::KernelBuildInfo::kInvalidFormat) { + MS_LOG(EXCEPTION) << "Node [" << node->DebugString() << "]" + << " has a invalid input format"; + } + return format; } KernelWithIndex AnfRuntimeAlgorithm::GetPrevNodeOutput(const AnfNodePtr &anf_node, size_t input_idx) { @@ -481,7 +491,12 @@ TypeId AnfRuntimeAlgorithm::GetOutputDeviceDataType(const AnfNodePtr &node, size MS_EXCEPTION_IF_NULL(kernel_info); auto build_info = kernel_info->select_kernel_build_info(); MS_EXCEPTION_IF_NULL(build_info); - return build_info->GetOutputDeviceType(output_idx); + auto dtype = build_info->GetOutputDeviceType(output_idx); + if (dtype == TypeId::kNumberTypeEnd) { + MS_LOG(EXCEPTION) << "Node [" << node->DebugString() << "]" + << " has a invalid dtype"; + } + return dtype; } TypeId AnfRuntimeAlgorithm::GetInputDeviceDataType(const AnfNodePtr &node, size_t input_idx) { @@ -494,7 +509,12 @@ TypeId AnfRuntimeAlgorithm::GetInputDeviceDataType(const AnfNodePtr &node, size_ MS_EXCEPTION_IF_NULL(kernel_info); auto build_info = kernel_info->select_kernel_build_info(); MS_EXCEPTION_IF_NULL(build_info); - return build_info->GetInputDeviceType(input_idx); + auto dtype = build_info->GetInputDeviceType(input_idx); + if (dtype == TypeId::kNumberTypeEnd) { + MS_LOG(EXCEPTION) << "Node [" << node->DebugString() << "]" + << " has a invalid dtype"; + } + return dtype; } TypeId AnfRuntimeAlgorithm::GetPrevNodeOutputDeviceDataType(const AnfNodePtr &anf_node, size_t input_idx) { @@ -514,10 +534,6 @@ const DeviceAddress *AnfRuntimeAlgorithm::GetOutputAddr(const AnfNodePtr &node, MS_LOG(EXCEPTION) << node->DebugString() << "Invalid nop node"; } } - if (output_idx > GetOutputTensorNum(node)) { - MS_LOG(EXCEPTION) << "The index [" << output_idx << "] is out of range of the node's output size [ " - << GetOutputTensorNum(node) << "#node:[ " << node->DebugString() << "]"; - } auto kernel_info = node->kernel_info(); MS_EXCEPTION_IF_NULL(kernel_info); auto addr = kernel_info->GetOutputAddr(output_idx); @@ -539,10 +555,6 @@ DeviceAddressPtr AnfRuntimeAlgorithm::GetMutableOutputAddr(const AnfNodePtr &nod MS_LOG(EXCEPTION) << node->DebugString() << "Invalid nop node."; } } - if (output_idx > GetOutputTensorNum(node)) { - MS_LOG(EXCEPTION) << "The index [" << output_idx << "] is out of range of the node's output size [ " - << GetOutputTensorNum(node) << "#node:[ " << node->DebugString() << "]"; - } auto kernel_info = node->kernel_info(); MS_EXCEPTION_IF_NULL(kernel_info); auto addr = kernel_info->GetMutableOutputAddr(output_idx); @@ -824,6 +836,8 @@ size_t AnfRuntimeAlgorithm::GetRealInputIndex(const mindspore::AnfNodePtr &anf_n MS_EXCEPTION_IF_NULL(anf_node); static std::map> spec_node_list = { {prim::kPrimConv2DBackpropInput->name(), {{0, 1}, {1, 0}}}, + {kFusionOpConv2DBackpropInputReluGradV2Name, {{0, 1}, {1, 0}, {2, 2}}}, + {kFusionOpConv2DBackpropInputAddNReluGradV2Name, {{0, 1}, {1, 0}, {2, 2}, {3, 3}}}, {prim::kPrimConv2DBackpropFilter->name(), {{0, 1}, {1, 0}}}, {prim::kPrimLogSoftmaxGrad->name(), {{0, 1}, {1, 0}}}, {prim::kPrimLayerNormGrad->name(), {{0, 1}, {1, 0}, {2, 2}, {3, 3}, {4, 4}}}, @@ -851,17 +865,12 @@ void AnfRuntimeAlgorithm::SetNodeInput(const CNodePtr &node, const AnfNodePtr &i bool AnfRuntimeAlgorithm::IsCommunicationOp(const AnfNodePtr &node) { MS_EXCEPTION_IF_NULL(node); - auto kernel_name = AnfAlgo::GetCNodeName(node); - auto kernel_type = AnfAlgo::GetKernelType(node); - if (kernel_name == kAllReduceOpName || kernel_type == HCCL_KERNEL) { - return true; + if (!node->isa()) { + return false; } - return false; -} - -bool AnfRuntimeAlgorithm::IsAllReduceOp(const AnfNodePtr &node) { - MS_EXCEPTION_IF_NULL(node); - if (node->isa() && AnfAlgo::GetCNodeName(node) == kAllReduceOpName) { + auto kernel_name = AnfAlgo::GetCNodeName(node); + if (kernel_name == kAllReduceOpName || kernel_name == kAllGatherOpName || kernel_name == kBroadcastOpName || + kernel_name == kReduceScatterOpName) { return true; } return false; diff --git a/mindspore/ccsrc/session/anf_runtime_algorithm.h b/mindspore/ccsrc/session/anf_runtime_algorithm.h index 78359cdd5a..a70a63b678 100644 --- a/mindspore/ccsrc/session/anf_runtime_algorithm.h +++ b/mindspore/ccsrc/session/anf_runtime_algorithm.h @@ -176,7 +176,6 @@ class AnfRuntimeAlgorithm { // get real input index for some tbe ops which input order is different between me and tbe impl static size_t GetRealInputIndex(const AnfNodePtr &anf_node, const size_t cur_index); static bool IsCommunicationOp(const AnfNodePtr &node); - static bool IsAllReduceOp(const AnfNodePtr &node); static bool IsGetNext(const NotNull &node); }; } // namespace session diff --git a/mindspore/ccsrc/session/ascend_session.cc b/mindspore/ccsrc/session/ascend_session.cc index 11ae3da6f7..ace5e34335 100755 --- a/mindspore/ccsrc/session/ascend_session.cc +++ b/mindspore/ccsrc/session/ascend_session.cc @@ -15,6 +15,9 @@ */ #include "session/ascend_session.h" #include +#include +#include +#include #include "operator/ops.h" #include "ir/meta_tensor.h" #include "ir/anf.h" @@ -75,28 +78,15 @@ void DumpGraphInputArgs(const VectorRef &args) { void SetStreamDistinctionLabel(const KernelGraphPtr &graph, uint32_t label, bool is_override) { MS_EXCEPTION_IF_NULL(graph); - for (auto &node : graph->execution_order()) { - if (is_override || AnfAlgo::GetStreamDistinctionLabel(node.get()) == kInvalidDistincLabel) { - MS_EXCEPTION_IF_NULL(node); - AnfAlgo::SetStreamDistinctionLabel(label, node.get()); - } + if (is_override || graph->stream_distinction_label() == kInvalidDistincLabel) { + graph->set_stream_distinction_label(label); } } -GraphId GetDistinctionLabel(const KernelGraphPtr &graph) { - MS_EXCEPTION_IF_NULL(graph); - // if graph is empty,use graph id as distinction label - if (graph->execution_order().empty()) { - return graph->graph_id(); - } - // else use first node of execution order as label - return AnfAlgo::GetStreamDistinctionLabel(graph->execution_order()[0].get()); -} - std::vector GetRealArgs(const KernelGraphPtr graph, const VectorRef &args) { MS_EXCEPTION_IF_NULL(graph); std::vector graph_inputs = graph->inputs(); - auto valid_inputs = graph->ValidInputs(); + auto valid_inputs = graph->valid_inputs(); size_t real_args_size = 0; std::vector real_args = {}; for (size_t i = 0; i < args.size(); i++) { @@ -141,23 +131,9 @@ std::vector GetRealArgs(const KernelGraphPtr graph, const VectorRef &ar GraphId AscendSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList &outputs) { MS_LOG(INFO) << "start"; - auto graph_id = graph_sum_; // construct graph, if successfully, graph_sum_ + 1 auto graph = ConstructKernelGraph(lst, outputs); - MS_EXCEPTION_IF_NULL(graph); - opt::AscendBackendIRFusionOptimization(graph); - // select kernel build info - SelectKernel(*graph); - // convert kernel Graph to model - predictmodel::StepConvertGraph(graph); - // optimize graph - HardwareOptimize(graph); - // init runtime resource - InitRuntimeResource(); - // assign static memory of parameters - auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); - MS_EXCEPTION_IF_NULL(runtime_instance); - runtime_instance->AssignStaticMemoryInput(graph.get()); + auto graph_id = graph->graph_id(); MS_LOG(INFO) << "Compile graph " << graph_id << " success"; return graph_id; } @@ -166,16 +142,36 @@ void AscendSession::BuildGraph(GraphId graph_id) { MS_LOG(INFO) << "start"; auto graph = GetGraph(graph_id); MS_EXCEPTION_IF_NULL(graph); + // resource initialize + InitRuntimeResource(); // multiple graph handle if (graph_id == final_graph_id_) { if (!graph->executable()) { return; } + // insert assigns to child graph + InsertAllAssigns(); + // insert switch and active to child graph + MergeSwitchCompile(); + // OptChildGraphs + auto graph_order = GetGraphOrder(final_graph_id_); + auto &graph_type = GetGraphOrderType(final_graph_id_); + for (size_t i = 0; i < graph_order.size(); i++) { + if (graph_type[i] == BRANCH_END || graph_type[i] == BRANCH_START) { + continue; + } + MS_LOG(INFO) << "Start build child graph " << graph_order[i]; + auto child_graph = GetGraph(graph_order[i]); + CompileChildGraph(child_graph); + } // merge child graph MergeGraphExecOrder(); } else { + auto single_graph = GetGraph(graph_id); + CompileChildGraph(single_graph); // set the distinction label of single graph - SetStreamDistinctionLabel(GetGraph(graph_id), graph_id, false); + single_graph->set_stream_distinction_label(graph_id); + single_graph->UpdateExecuteKernelStreamLabel(); } // adjust execution order because merge child graph and other special operations AdjustKernel(graph); @@ -197,9 +193,26 @@ void AscendSession::BuildGraph(GraphId graph_id) { // load task info to device if it is sink mode LoadTask(graph); } + // sync the inital const tensor to device + SyncInitialTenosrToDevice(); MS_LOG(INFO) << "end"; } +void AscendSession::CompileChildGraph(const KernelGraphPtr &child_graph) { + MS_EXCEPTION_IF_NULL(child_graph); + opt::AscendBackendIRFusionOptimization(child_graph); + // select kernel build info + SelectKernel(*child_graph); + // convert kernel Graph to model + predictmodel::StepConvertGraph(child_graph); + // optimize graph + HardwareOptimize(child_graph); + // assign static memory of parameters + auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); + MS_EXCEPTION_IF_NULL(runtime_instance); + runtime_instance->AssignStaticMemoryInput(child_graph.get()); +} + void AscendSession::RunGraph(const GraphId &graph_id, const std::vector &inputs, VectorRef *const outputs) { MS_LOG(INFO) << "start"; @@ -249,12 +262,25 @@ void AscendSession::RunOpExecTask(const std::shared_ptr &kernel_gra MS_LOG(INFO) << "Finish!"; } +bool AscendSession::GraphCacheExist(const GraphInfo &graph_info) const { + if (run_op_graphs_.find(graph_info) != run_op_graphs_.end()) { + return true; + } + + return false; +} + void AscendSession::BuildOp(const OpRunInfo &op_run_info, const GraphInfo &graph_info, - std::vector *input_tensors) { - MS_EXCEPTION_IF_NULL(input_tensors); + const std::vector &input_tensors, + const std::vector &tensors_mask) { MS_LOG(INFO) << "Build op " << op_run_info.op_name << " start !"; + if (GraphCacheExist(graph_info)) { + MS_LOG(INFO) << "Build op " << op_run_info.op_name << " finish !"; + return; + } + // construct graph include one op - auto graph = ConstructSingleOpGraph(op_run_info, input_tensors); + auto graph = ConstructSingleOpGraph(op_run_info, input_tensors, tensors_mask); MS_EXCEPTION_IF_NULL(graph); opt::RunOpAscendBackendIRFusionOptimization(graph); // kernel select @@ -267,6 +293,7 @@ void AscendSession::BuildOp(const OpRunInfo &op_run_info, const GraphInfo &graph RunOpAdjustKernel(graph); BuildKernel(graph); run_op_graphs_[graph_info] = graph; + MS_LOG(INFO) << "Build op " << op_run_info.op_name << " finish !"; } py::tuple AscendSession::RunOp(const OpRunInfo &op_run_info, const GraphInfo &graph_info, @@ -291,7 +318,6 @@ py::tuple AscendSession::RunOp(const OpRunInfo &op_run_info, const GraphInfo &gr } py::object tuple_obj = utils::cast(output_tensors).object_; py::tuple tuple_tensors = py::cast(tuple_obj); - run_op_graphs_.clear(); MS_LOG(INFO) << "Run op " << op_run_info.op_name << " finish!"; return tuple_tensors; } @@ -299,10 +325,25 @@ py::tuple AscendSession::RunOp(const OpRunInfo &op_run_info, const GraphInfo &gr // compile graph steps void AscendSession::SelectKernel(const KernelGraph &kernel_graph) const { MS_LOG(INFO) << "Start!"; + size_t raise_precision_count = 0; + size_t reduce_precision_count = 0; for (const auto &cnode : kernel_graph.execution_order()) { - device::ascend::SelectKernelInfo(cnode); + auto status = device::ascend::SelectKernelInfo(cnode); + if (status == kStatusRaisePrecision) { + raise_precision_count++; + } else if (status == kStatusReducePrecision) { + reduce_precision_count++; + } MS_LOG(INFO) << "Select ApplyKernel: " << cnode->DebugString(); } + if (raise_precision_count > 0) { + MS_LOG(WARNING) << "There has " << raise_precision_count + << " node/nodes used raise precision to selected the kernel!"; + } + if (reduce_precision_count > 0) { + MS_LOG(WARNING) << "There has " << reduce_precision_count + << " node/nodes used reduce precision to selected the kernel!"; + } MS_LOG(INFO) << "Finish!"; } @@ -445,11 +486,9 @@ void AscendSession::Dump(const std::shared_ptr &kernel_graph) const GraphId AscendSession::SetFinalGraphInput(const std::vector &args) { MS_LOG(INFO) << "Start! Args size " << args.size(); - auto final_graph = std::make_shared(); - final_graph_id_ = graph_sum_++; - graphs_[final_graph_id_] = final_graph; - final_graph->set_graph_id(final_graph_id_); - MS_LOG(INFO) << "Create a new final graph" << final_graph_id_ << "success"; + auto final_graph = NewKernelGraph(); + final_graph_id_ = final_graph->graph_id(); + MS_LOG(INFO) << "Create a new final graph" << final_graph_id_ << " success"; // init private variables and bind them with final_graph_id graph_execute_orders_[final_graph_id_] = std::vector(); graph_order_types_[final_graph_id_] = std::vector(); @@ -485,6 +524,46 @@ GraphId AscendSession::SetFinalGraphInput(const std::vector &args) { return final_graph_id_; } +AnfNodePtr AscendSession::CreateFakeOutput(GraphId fake_graph_id, const AnfNodePtr &true_output) { + auto fake_graph = GetGraph(fake_graph_id); + auto output_item_with_index = AnfAlgo::VisitKernelWithReturnType(true_output, 0); + auto create_parameter = [&](const AbstractBasePtr &abstract) -> AnfNodePtr { + auto parameter = fake_graph->NewParameter(); + MS_EXCEPTION_IF_NULL(parameter); + parameter->set_abstract(abstract); + auto new_parameter = fake_graph->NewParameter(parameter); + // Add new parameter to the graph input of fake_graph to sure that all parameters will be allocated memory. + auto graph_inputs = fake_graph->MutableInputs(); + MS_EXCEPTION_IF_NULL(graph_inputs); + graph_inputs->push_back(new_parameter); + return new_parameter; + }; + auto create_parameter_from_cnode = [&](const AnfNodePtr &cnode, size_t output_idx) -> AnfNodePtr { + MS_EXCEPTION_IF_NULL(cnode); + auto abstract = cnode->abstract(); + MS_EXCEPTION_IF_NULL(abstract); + // create multiple parameters if is a tuple output real kernel + if (abstract->isa()) { + auto tuple_abstract = abstract->cast(); + MS_EXCEPTION_IF_NULL(tuple_abstract); + MS_LOG(INFO) << "tuple_size [" << tuple_abstract->size() << "]"; + return create_parameter((*tuple_abstract)[output_idx]); + } + return create_parameter(cnode->abstract()); + }; + if (AnfAlgo::CheckPrimitiveType(output_item_with_index.first, prim::kPrimMakeTuple)) { + std::vector make_tuple_inputs = {NewValueNode(prim::kPrimMakeTuple)}; + auto make_tuple = output_item_with_index.first->cast(); + MS_EXCEPTION_IF_NULL(make_tuple); + for (size_t i = 1; i < make_tuple->inputs().size(); i++) { + auto input = make_tuple->inputs()[i]; + make_tuple_inputs.push_back(CreateFakeOutput(fake_graph_id, input)); + } + return fake_graph->NewCNode(make_tuple_inputs); + } + return create_parameter_from_cnode(output_item_with_index.first, output_item_with_index.second); +} + void AscendSession::SetFinalGraphOutput(const BaseRef &output) { auto final_graph = GetGraph(final_graph_id_); MS_EXCEPTION_IF_NULL(final_graph); @@ -546,12 +625,6 @@ void AscendSession::InsertSwitchToGraph(GraphId condition_graph_id, GraphId true condition_graph->AddValueNodeToGraph(counter_const); // create a new switch op auto switch_primitive = std::make_shared("StreamSwitch"); - auto kernel_build_info_builder = std::make_shared(); - kernel_build_info_builder->SetOutputsFormat(std::vector{kOpFormat_DEFAULT}); - kernel_build_info_builder->SetOutputsDeviceType(std::vector{kNumberTypeInt32}); - kernel_build_info_builder->SetFusionType(kernel::FusionType::OPAQUE); - kernel_build_info_builder->SetProcessor(kernel::Processor::AICORE); - kernel_build_info_builder->SetKernelType(KernelType::RT_KERNEL); auto cond_output_it = condition_output_.find(condition_graph_id); if (cond_output_it == condition_output_.end()) { MS_LOG(EXCEPTION) << "Can't find condition graph" << condition_graph_id; @@ -561,11 +634,9 @@ void AscendSession::InsertSwitchToGraph(GraphId condition_graph_id, GraphId true MS_EXCEPTION_IF_NULL(cond_output_kernel); std::vector inputs = {NewValueNode(switch_primitive), cond_output_kernel, counter_const}; CNodePtr switch_node = condition_graph->NewCNode(inputs); - AnfAlgo::SetSelectKernelBuildInfo(kernel_build_info_builder->Build(), switch_node.get()); MS_EXCEPTION_IF_NULL(switch_node); switch_node->set_abstract(std::make_shared()); AnfAlgo::SetGraphId(condition_graph_id, switch_node.get()); - AnfAlgo::SetStreamDistinctionLabel(GetDistinctionLabel(GetGraph(condition_graph_id)), switch_node.get()); // set attr: cond_ RT_GREATER AnfAlgo::SetNodeAttr(kAttrSwitchCondition, MakeValue(static_cast(RT_GREATER)), switch_node); // set attr:data_type @@ -573,9 +644,9 @@ void AscendSession::InsertSwitchToGraph(GraphId condition_graph_id, GraphId true // set attr:true branch graph id ,which is same to stream distinction label AnfAlgo::SetNodeAttr(kAttrTrueBranchStream, MakeValue(true_graph_id), switch_node); // append switch at the end of condition graph - std::vector exec_order = condition_graph->execution_order(); - exec_order.push_back(switch_node); - condition_graph->set_execution_order(exec_order); + auto return_node = condition_graph->get_return(); + MS_EXCEPTION_IF_NULL(return_node); + InsertControlDependToGraph(condition_graph_id, return_node->input(1), switch_node); MS_LOG(INFO) << "Finish!"; } @@ -602,8 +673,14 @@ void AscendSession::CopyOutputOfIf(GraphId false_graph_id) { MS_EXCEPTION_IF_NULL(true_last); MS_EXCEPTION_IF_NULL(false_last); MS_LOG(INFO) << "The last graph of false branch is " << false_last_id; - // now only consider the single output - InsertMultipleAssignToGraph(true_last_id, true_last->output(), false_last->output()); + // create fake output + auto fake_output_graph = NewKernelGraph(); + graph_execute_order.push_back(fake_output_graph->graph_id()); + graph_order_type.push_back(COMMON_GRAPH); + fake_output_graph->set_output(CreateFakeOutput(fake_output_graph->graph_id(), final_graph->output())); + final_graph->set_output(fake_output_graph->output()); + InsertMultipleAssignToGraph(true_last_id, true_last->output(), final_graph->output()); + InsertMultipleAssignToGraph(false_last_id, false_last->output(), final_graph->output()); // insert stream active for loop sink auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); @@ -637,14 +714,14 @@ void AscendSession::SwitchCompile(GraphId cond_graph_id, GraphId true_graph_id, if (false_graph_id != kInvalidGraphId) { // false graph and condition in graph same stream auto condition_graph = GetGraph(cond_graph_id); - SetStreamDistinctionLabel(GetGraph(false_graph_id), GetDistinctionLabel(condition_graph), true); + SetStreamDistinctionLabel(GetGraph(false_graph_id), condition_graph->stream_distinction_label(), true); // if false graph is a condition graph and has been switch compiled before,it's false should be updated again auto cond_it = switches_.find(false_graph_id); while (cond_it != switches_.end() && cond_it->second.second != kInvalidGraphId) { cond_graph_id = cond_it->first; false_graph_id = cond_it->second.second; condition_graph = GetGraph(cond_graph_id); - SetStreamDistinctionLabel(GetGraph(false_graph_id), GetDistinctionLabel(condition_graph), true); + SetStreamDistinctionLabel(GetGraph(false_graph_id), condition_graph->stream_distinction_label(), true); cond_it = switches_.find(false_graph_id); } } @@ -678,7 +755,7 @@ void AscendSession::MergeSwitchCompile() { } // insert stream active to common graph if (prev_graph_id != kInvalidGraphId) { - InsertStreamActiveToGraph(prev_graph_id, GetDistinctionLabel(condition_graph)); + InsertStreamActiveToGraph(prev_graph_id, condition_graph->stream_distinction_label()); } // if this is a 'if' condition auto it = while_condition_graphs_.find(cond_graph_id); @@ -687,12 +764,39 @@ void AscendSession::MergeSwitchCompile() { } else { // if it is a while,insert a stream active to true graph GraphId from_graph = it->second; - InsertStreamActiveToGraph(from_graph, GetDistinctionLabel(condition_graph)); + InsertStreamActiveToGraph(from_graph, condition_graph->stream_distinction_label()); } } MS_LOG(INFO) << "Finish!"; } +void AscendSession::InsertAllAssigns() { + std::set> assigns; + for (auto assign : assigns_) { + auto front_anf = std::get<0>(assign); + auto to_graph_id = std::get<1>(assign); + auto input_idx = std::get<2>(assign); + auto to_graph = GetGraph(to_graph_id); + MS_EXCEPTION_IF_NULL(to_graph); + std::vector graph_inputs = to_graph->inputs(); + if (input_idx >= graph_inputs.size()) { + MS_LOG(EXCEPTION) << "input_index " << input_idx << " out of range size " << graph_inputs.size(); + } + auto backend_parameter = graph_inputs[input_idx]; + (void)assigns.insert(std::pair(front_anf, backend_parameter)); + } + // erase the repeat assign + for (auto &assign : assigns) { + auto front_anf = assign.first; + auto backend_parameter = assign.second; + auto from_graph_id = GetGraphIdByNode(front_anf); + auto from_graph = GetGraph(from_graph_id); + MS_EXCEPTION_IF_NULL(from_graph); + auto backend_arg = from_graph->GetBackendAnfByFrontAnf(front_anf); + InsertAssignToGraph(from_graph_id, backend_arg, backend_parameter); + } +} + // insert active to graph void AscendSession::SetActive(GraphId from, GraphId to) { if (while_condition_graphs_.find(to) != while_condition_graphs_.end()) { @@ -722,20 +826,21 @@ void AscendSession::SetActive(GraphId from, GraphId to) { while_condition_graphs_[to] = from; } -void AscendSession::SetChildGraphParameter(const AnfNodePtr &front_anf, const AnfNodePtr &backend_parameter) { +void AscendSession::SetChildGraphParameter(const AnfNodePtr &front_anf, GraphId to_graph_id, size_t input_idx) { MS_LOG(INFO) << "Start!"; - MS_EXCEPTION_IF_NULL(backend_parameter); MS_EXCEPTION_IF_NULL(front_anf); - if (!backend_parameter->isa()) { - MS_LOG(EXCEPTION) << "Backend parameter's type is not a parameter,but is " << backend_parameter->ToString(); - } auto from_graph_id = GetGraphIdByNode(front_anf); auto from_graph = GetGraph(from_graph_id); MS_EXCEPTION_IF_NULL(from_graph); - auto to_graph_id = AnfAlgo::GetGraphId(backend_parameter.get()); auto to_graph = GetGraph(to_graph_id); - auto backend_arg = from_graph->GetBackendAnfByFrontAnf(front_anf); MS_EXCEPTION_IF_NULL(to_graph); + std::vector graph_inputs = to_graph->inputs(); + if (input_idx >= graph_inputs.size()) { + MS_LOG(EXCEPTION) << "input_index " << input_idx << " out of range size " << graph_inputs.size(); + } + auto backend_parameter = graph_inputs[input_idx]; + MS_EXCEPTION_IF_NULL(backend_parameter); + auto backend_arg = from_graph->GetBackendAnfByFrontAnf(front_anf); MS_LOG(INFO) << "Set node[" << front_anf->DebugString() << "] of graph[" << from_graph_id << "]to node[" << backend_parameter->DebugString() << "] of graph[" << AnfAlgo::GetGraphId(backend_parameter.get()) << "]"; @@ -746,39 +851,21 @@ void AscendSession::SetChildGraphParameter(const AnfNodePtr &front_anf, const An // if arg is the the parameter of child graph,it is parameter of final graph too if (front_anf->isa()) { MS_EXCEPTION_IF_NULL(backend_arg); - if (!AnfAlgo::OutputAddrExist(backend_arg, 0)) { - // set parameter's addr in child graph to parameter in final graph - AnfAlgo::SetOutputAddr(AnfAlgo::GetMutableOutputAddr(backend_parameter, 0), 0, backend_arg.get()); - MS_LOG(INFO) << "Assign mem of node" << backend_parameter->DebugString() << " of graph " - << AnfAlgo::GetGraphId(backend_parameter.get()) << " to node" << backend_arg->DebugString() - << "of graph " << AnfAlgo::GetGraphId(backend_arg.get()); - return; - } - // if a parameter is a weight and not linked to any executable node,device type will be kTypeUnknown,set it's device - // type same to arg - if (AnfAlgo::GetOutputDeviceDataType(backend_parameter, 0) == kTypeUnknown) { - AnfAlgo::SetSelectKernelBuildInfo(AnfAlgo::GetSelectKernelBuildInfo(backend_arg), backend_parameter.get()); - } - // if front anf is a parameter,we can assign the value back,because backend_parameter won't be change in it's graph - // unless it's a weight.If backend_parameter is a weight,we should assign the value back. - AnfAlgo::SetOutputAddr(AnfAlgo::GetMutableOutputAddr(backend_arg, 0), 0, backend_parameter.get()); + MS_LOG(INFO) << "Reuse node [" << backend_arg->DebugString() << "], old node[" << backend_parameter->DebugString() + << "] will be replaced."; + to_graph->ReplaceNode(backend_parameter, backend_arg); return; } - InsertAssignToGraph(from_graph_id, backend_arg, backend_parameter); - MS_LOG(INFO) << "Finish!"; + MS_LOG(INFO) << "Assign of node" << backend_arg->DebugString() << " of graph " << from_graph_id << " to node" + << backend_parameter->DebugString() << "of graph " << to_graph_id; + (void)assigns_.insert(std::tuple(front_anf, to_graph_id, input_idx)); } -void AscendSession::SetChildGraphParameter(const tensor::TensorPtr &front_tensor, const AnfNodePtr &backend_parameter) { +void AscendSession::SetChildGraphParameter(const tensor::TensorPtr &front_tensor, GraphId to_graph_id, + size_t input_idx) { MS_LOG(INFO) << "Start!"; - // sync data from host to device - MS_EXCEPTION_IF_NULL(front_tensor); - size_t tensor_size = front_tensor->data().nbytes(); - auto addr = AnfAlgo::GetOutputAddr(backend_parameter, 0); - MS_EXCEPTION_IF_NULL(addr); - if (!addr->SyncHostToDevice(trans::GetRuntimePaddingShape(backend_parameter, 0), tensor_size, - front_tensor->data_type(), front_tensor->data_c(false))) { - MS_LOG(EXCEPTION) << "Tensor SyncHostToDevice fail!"; - } + std::pair graph_input_pair(to_graph_id, input_idx); + initial_tenosrs_[graph_input_pair] = front_tensor; MS_LOG(INFO) << "Finish!"; } @@ -800,45 +887,75 @@ void AscendSession::UpdateGraphOrder(GraphId to_graph_id) { } } +size_t AscendSession::SetChildGraphInput(const KernelGraphPtr &graph, const AnfNodePtr &node, size_t input_index) { + auto output_num = AnfAlgo::GetOutputTensorNum(node); + if (output_num > 1 && !AnfAlgo::CheckPrimitiveType(node, prim::kPrimTupleGetItem)) { + return input_index + output_num; + } + auto valid_inputs = graph->valid_inputs(); + if (valid_inputs[input_index]) { + SetChildGraphParameter(node, graph->graph_id(), input_index); + } else { + MS_LOG(DEBUG) << "Invalid input arg: " << node->DebugString(); + } + return ++input_index; +} + +size_t AscendSession::SetChildGraphInput(const KernelGraphPtr &graph, const ValuePtr &value, size_t input_index) { + MS_EXCEPTION_IF_NULL(value); + if (!value->isa()) { + MS_LOG(EXCEPTION) << "Value Node should be a tensor, unexpected value: " << value->ToString(); + } + SetChildGraphParameter(value->cast(), graph->graph_id(), input_index); + return ++input_index; +} + +size_t AscendSession::SetChildGraphInput(const KernelGraphPtr &graph, const VectorRef &vec_args, size_t input_index) { + auto index = input_index; + for (auto &arg : vec_args) { + if (utils::isa(arg)) { + // arg is a anf node + auto node = utils::cast(arg); + index = SetChildGraphInput(graph, node, input_index); + } else if (utils::isa(arg)) { + // arg is a tensor + auto value = utils::cast(arg); + index = SetChildGraphInput(graph, value, input_index); + } else { + MS_LOG(EXCEPTION) << "Unexpected arg type " << arg.ToString(); + } + } + return index; +} + void AscendSession::SetChildGraphInput(GraphId g, const VectorRef &args) { MS_LOG(INFO) << "Set input of graph " << g; auto to_graph = GetGraph(g); MS_EXCEPTION_IF_NULL(to_graph); DumpGraphInputArgs(args); UpdateGraphOrder(g); - std::vector graph_inputs = to_graph->inputs(); - auto valid_inputs = to_graph->ValidInputs(); + auto &graph_inputs = to_graph->inputs(); auto real_args = GetRealArgs(to_graph, args); size_t input_index = 0; for (size_t i = 0; i < real_args.size(); i++) { if (input_index >= graph_inputs.size()) { MS_LOG(EXCEPTION) << "input_index " << input_index << " out of range size " << graph_inputs.size(); } - if (utils::isa(real_args[i])) { + auto &real_arg = real_args[i]; + if (utils::isa(real_arg)) { // arg is a anf node - auto real_arg = utils::cast(real_args[i]); - auto real_arg_output_num = AnfAlgo::GetOutputTensorNum(real_arg); - if (!AnfAlgo::CheckPrimitiveType(real_arg, prim::kPrimTupleGetItem) && real_arg_output_num > 1) { - input_index += real_arg_output_num; - continue; - } - if (valid_inputs[input_index]) { - SetChildGraphParameter(real_arg, graph_inputs[input_index]); - } else { - MS_LOG(DEBUG) << "Invalid input arg" << real_arg->DebugString(); - } - input_index++; - } else if (utils::isa(args[i])) { - auto value = utils::cast(args[i]); - MS_EXCEPTION_IF_NULL(value); + auto node = utils::cast(real_arg); + input_index = SetChildGraphInput(to_graph, node, input_index); + } else if (utils::isa(real_arg)) { // arg is a tensor - if (!value->isa()) { - MS_LOG(EXCEPTION) << "Value Node should be a tensor, unexpected value: " << value->ToString(); - } - SetChildGraphParameter(value->cast(), graph_inputs[input_index]); - input_index++; + auto value = utils::cast(real_arg); + input_index = SetChildGraphInput(to_graph, value, input_index); + } else if (utils::isa(real_arg)) { + // arg is a VectorRef + auto vec_args = utils::cast(real_arg); + input_index = SetChildGraphInput(to_graph, vec_args, input_index); } else { - MS_LOG(EXCEPTION) << "Unexpected arg type " << args[i].ToString(); + MS_LOG(EXCEPTION) << "Unexpected arg type " << real_arg.ToString(); } } MS_LOG(INFO) << "Finish!"; @@ -860,8 +977,6 @@ GraphId AscendSession::GetGraphIdByNode(const AnfNodePtr &front_anf) const { void AscendSession::MergeGraphExecOrder() { MS_LOG(INFO) << "Start!"; - // insert switch to graph - MergeSwitchCompile(); // merge graph order auto &graph_order = GetGraphOrder(final_graph_id_); auto &graph_type = GetGraphOrderType(final_graph_id_); @@ -871,6 +986,13 @@ void AscendSession::MergeGraphExecOrder() { MS_LOG(WARNING) << "Graph output is a lonely variable not linked to any op!"; return; } + if (graph_order.size() > 1) { + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + if (!context_ptr->enable_task_sink()) { + MS_LOG(EXCEPTION) << "Control sink network should run with task-sink mode!"; + } + } // if first graph is common,the final graph has no label,then set the stream of final graph same with the first graph SetStreamDistinctionLabel(final_graph, graph_order[0], false); std::vector final_exec_order = final_graph->execution_order(); @@ -885,7 +1007,11 @@ void AscendSession::MergeGraphExecOrder() { MS_EXCEPTION_IF_NULL(child_graph); auto exec_order = child_graph->execution_order(); MS_LOG(INFO) << "Merge graph,graph_id " << graph_id; - (void)std::copy(exec_order.begin(), exec_order.end(), std::back_inserter(final_exec_order)); + (void)std::transform(exec_order.begin(), exec_order.end(), std::back_inserter(final_exec_order), + [&](CNodePtr node) -> CNodePtr { + AnfAlgo::SetStreamDistinctionLabel(child_graph->stream_distinction_label(), node.get()); + return node; + }); // add all value nodes of child graphs to final graph for (auto &value_node : child_graph->graph_value_nodes()) { final_graph->AddValueNodeToGraph(value_node); @@ -924,15 +1050,9 @@ void AscendSession::InsertAssignToGraph(GraphId graph_id, const AnfNodePtr &from // generate a new cnode auto assign_node = graph->NewCNode(inputs); MS_EXCEPTION_IF_NULL(assign_node); - assign_node->set_abstract(std::make_shared()); - auto kernel_build_info_builder = std::make_shared(); - kernel_build_info_builder->SetKernelType(KernelType::RT_KERNEL); - AnfAlgo::SetSelectKernelBuildInfo(kernel_build_info_builder->Build(), assign_node.get()); - AnfAlgo::SetStreamDistinctionLabel(GetDistinctionLabel(graph), assign_node.get()); + assign_node->set_abstract(to->abstract()); // append the assign at the end of from graph - auto exec_order = graph->execution_order(); - exec_order.push_back(assign_node); - graph->set_execution_order(exec_order); + InsertDependToGraph(graph_id, assign_node); } void AscendSession::InsertMultipleAssignToGraph(GraphId graph_id, const AnfNodePtr &from, const AnfNodePtr &to) { @@ -952,24 +1072,46 @@ void AscendSession::InsertMultipleAssignToGraph(GraphId graph_id, const AnfNodeP void AscendSession::InsertStreamActiveToGraph(GraphId graph_id, uint32_t actived_stream) { MS_LOG(INFO) << "Insert stream_active from " << graph_id << " to " << actived_stream; - auto from_graph = graphs_[graph_id]; + auto from_graph = GetGraph(graph_id); MS_EXCEPTION_IF_NULL(from_graph); std::vector inputs = {NewValueNode(std::make_shared("StreamActive"))}; auto active_node = from_graph->NewCNode(inputs); MS_EXCEPTION_IF_NULL(active_node); active_node->set_abstract(std::make_shared()); - auto kernel_build_info_builder = std::make_shared(); - kernel_build_info_builder->SetKernelType(KernelType::RT_KERNEL); - AnfAlgo::SetSelectKernelBuildInfo(kernel_build_info_builder->Build(), active_node.get()); // set the active stream id into the attr of active node std::vector active_index_value = {}; active_index_value.push_back(actived_stream); AnfAlgo::SetNodeAttr(kAttrActiveStreamList, MakeValue>(active_index_value), active_node); - AnfAlgo::SetStreamDistinctionLabel(GetDistinctionLabel(from_graph), active_node.get()); // append the active node at the end of from graph - auto exec_order = from_graph->execution_order(); - exec_order.push_back(active_node); - from_graph->set_execution_order(exec_order); + auto return_node = from_graph->get_return(); + MS_EXCEPTION_IF_NULL(return_node); + InsertControlDependToGraph(graph_id, return_node->input(1), active_node); +} + +void AscendSession::InsertDependToGraph(GraphId graph_id, const AnfNodePtr &attch_node) { + MS_LOG(INFO) << "Insert depend at the end of graph, the attach node is " << attch_node->DebugString(); + auto graph = GetGraph(graph_id); + MS_EXCEPTION_IF_NULL(graph); + std::vector inputs = {NewValueNode(std::make_shared("depend"))}; + auto return_node = graph->get_return(); + MS_EXCEPTION_IF_NULL(return_node); + inputs.push_back(return_node->input(1)); + inputs.push_back(attch_node); + auto depend_node = graph->NewCNode(inputs); + return_node->set_input(1, depend_node); +} + +void AscendSession::InsertControlDependToGraph(GraphId graph_id, const AnfNodePtr &first_node, + const AnfNodePtr &second_node) { + MS_LOG(INFO) << "Insert control depend at the end of graph, the first node is " << first_node->DebugString() + << ", the second node is " << second_node->DebugString(); + auto graph = GetGraph(graph_id); + MS_EXCEPTION_IF_NULL(graph); + std::vector inputs = {NewValueNode(std::make_shared("ControlDepend"))}; + inputs.push_back(first_node); + inputs.push_back(second_node); + auto control_depend = graph->NewCNode(inputs); + InsertDependToGraph(graph_id, control_depend); } size_t AscendSession::ExecOrderOfChildGraph(GraphId final_graph, GraphId child_graph) { @@ -998,5 +1140,29 @@ std::vector &AscendSession::GetGraphOrderType(GraphId final_graph_id) } return graph_type_iter->second; } + +void AscendSession::SyncInitialTenosrToDevice() { + for (auto &item : initial_tenosrs_) { + auto to_graph_id = item.first.first; + auto input_idx = item.first.second; + auto front_tensor = item.second; + auto to_graph = GetGraph(to_graph_id); + MS_EXCEPTION_IF_NULL(to_graph); + std::vector graph_inputs = to_graph->inputs(); + if (input_idx >= graph_inputs.size()) { + MS_LOG(EXCEPTION) << "input_index " << input_idx << " out of range size " << graph_inputs.size(); + } + auto backend_parameter = graph_inputs[input_idx]; + // sync data from host to device + MS_EXCEPTION_IF_NULL(front_tensor); + size_t tensor_size = front_tensor->data().nbytes(); + auto addr = AnfAlgo::GetOutputAddr(backend_parameter, 0); + MS_EXCEPTION_IF_NULL(addr); + if (!addr->SyncHostToDevice(trans::GetRuntimePaddingShape(backend_parameter, 0), tensor_size, + front_tensor->data_type(), front_tensor->data_c(false))) { + MS_LOG(EXCEPTION) << "Tensor SyncHostToDevice fail!"; + } + } +} } // namespace session } // namespace mindspore diff --git a/mindspore/ccsrc/session/ascend_session.h b/mindspore/ccsrc/session/ascend_session.h index 2d24691404..635003d97c 100755 --- a/mindspore/ccsrc/session/ascend_session.h +++ b/mindspore/ccsrc/session/ascend_session.h @@ -21,6 +21,9 @@ #include #include #include +#include +#include +#include #include "session/session_basic.h" #include "session/kernel_graph.h" #include "kernel/kernel.h" @@ -42,7 +45,7 @@ class AscendSession : public SessionBasic { void RunGraph(const GraphId &graph_id, const std::vector &inputs, VectorRef *outputs) override; void BuildGraph(GraphId) override; void BuildOp(const OpRunInfo &op_run_info, const GraphInfo &graph_info, - std::vector *input_tensors) override; + const std::vector &input_tensors, const std::vector &tensors_mask) override; py::tuple RunOp(const OpRunInfo &op_run_info, const GraphInfo &graph_info, const std::vector &input_tensors) override; @@ -60,6 +63,8 @@ class AscendSession : public SessionBasic { GraphId GetFinalRunGraph() const override { return final_graph_id_; } // insert active to graph void SetActive(GraphId, GraphId) override; + // compile child graph when session have multiple child graphs + void CompileChildGraph(const KernelGraphPtr &child_graph); private: void InitRuntimeResource(); @@ -79,6 +84,10 @@ class AscendSession : public SessionBasic { void RunOpHardwareOptimize(const std::shared_ptr &kernel_graph) const; void RunOpExecTask(const std::shared_ptr &kernel_graph) const; + size_t SetChildGraphInput(const KernelGraphPtr &graph, const AnfNodePtr &node, size_t input_index); + size_t SetChildGraphInput(const KernelGraphPtr &graph, const ValuePtr &value, size_t input_index); + size_t SetChildGraphInput(const KernelGraphPtr &graph, const VectorRef &vec_args, size_t input_index); + // merge execution order list of child graphs void MergeGraphExecOrder(); // insert assion op to sync data bettween different graphs @@ -91,12 +100,16 @@ class AscendSession : public SessionBasic { size_t ExecOrderOfChildGraph(GraphId final_graph, GraphId child_graph); // handle condition graph from vm void InsertSwitchToGraph(GraphId condition_graph_id, GraphId true_graph_id); + // insert depend to graph, used to attch control nodes to graph + void InsertDependToGraph(GraphId graph_id, const AnfNodePtr &attch_node); + // insert depend to graph, used to attch control nodes to graph + void InsertControlDependToGraph(GraphId graph_id, const AnfNodePtr &first_node, const AnfNodePtr &second_node); // Get graph by graph id ,if not exist return null ptr KernelGraphPtr GetGraph(GraphId graph_id); // set child graph parameter if front arg is a anf - void SetChildGraphParameter(const AnfNodePtr &front_anf, const AnfNodePtr &backend_parameter); + void SetChildGraphParameter(const AnfNodePtr &front_anf, GraphId to_graph_id, size_t input_idx); // set child graph parameter if front arg is a tensor - void SetChildGraphParameter(const tensor::TensorPtr &front_tensor, const AnfNodePtr &backend_parameter); + void SetChildGraphParameter(const tensor::TensorPtr &front_tensor, GraphId to_graph_id, size_t input_idx); // update the execution order of all child graphs void UpdateGraphOrder(GraphId to_graph); // handle switch when merge @@ -107,6 +120,14 @@ class AscendSession : public SessionBasic { std::vector &GetGraphOrderType(GraphId final_graph_id); // copy output of if and else void CopyOutputOfIf(GraphId false_graph_id); + // check if graph cache exist + bool GraphCacheExist(const GraphInfo &graph_info) const; + // insert all assign to child graph + void InsertAllAssigns(); + // create fake output of final graph + AnfNodePtr CreateFakeOutput(GraphId final_graph_id, const AnfNodePtr &true_output); + // sync intial tensors' data to device + void SyncInitialTenosrToDevice(); // member variables // key is final_graph_id,value is child graph execute order of final graph @@ -118,6 +139,10 @@ class AscendSession : public SessionBasic { // record all conditions std::unordered_map> switches_; std::unordered_map condition_output_; + // share parameters + std::set> assigns_; + // initial tensors, these tensor will sync data to device before run graph + std::map, tensor::TensorPtr> initial_tenosrs_; // final_graph_id is used in every root graph has it's own session situation GraphId final_graph_id_; }; diff --git a/mindspore/ccsrc/device/cpu/cpu_session.cc b/mindspore/ccsrc/session/cpu_session.cc similarity index 96% rename from mindspore/ccsrc/device/cpu/cpu_session.cc rename to mindspore/ccsrc/session/cpu_session.cc index 1613f9f91e..e8830d730c 100644 --- a/mindspore/ccsrc/device/cpu/cpu_session.cc +++ b/mindspore/ccsrc/session/cpu_session.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "device/cpu/cpu_session.h" +#include "session/cpu_session.h" #include #include "ir/meta_tensor.h" #include "ir/anf.h" @@ -23,7 +23,7 @@ #include "session/anf_runtime_algorithm.h" #include "device/kernel_runtime.h" #include "predict/predict.h" -#include "device/cpu/cpu_kernel_factory.h" +#include "kernel/cpu/cpu_kernel_factory.h" namespace mindspore { namespace session { @@ -110,7 +110,7 @@ void CPUSession::BuildKernel(const KernelGraph *kernel_graph) { MS_EXCEPTION_IF_NULL(kernel_node); std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); MS_LOG(INFO) << "Cpu building operator[" << kernel_name << "]."; - std::shared_ptr cpu_kernel = device::cpu::CPUKernelFactory::Get().Create(kernel_name); + std::shared_ptr cpu_kernel = kernel::CPUKernelFactory::Get().Create(kernel_name); if (cpu_kernel == nullptr) { MS_LOG(EXCEPTION) << "Operator[" << kernel_name << "] is not support."; } diff --git a/mindspore/ccsrc/device/cpu/cpu_session.h b/mindspore/ccsrc/session/cpu_session.h similarity index 100% rename from mindspore/ccsrc/device/cpu/cpu_session.h rename to mindspore/ccsrc/session/cpu_session.h diff --git a/mindspore/ccsrc/session/gpu_session.cc b/mindspore/ccsrc/session/gpu_session.cc index f5e8c44231..3a80382e9b 100644 --- a/mindspore/ccsrc/session/gpu_session.cc +++ b/mindspore/ccsrc/session/gpu_session.cc @@ -20,7 +20,7 @@ #include "device/gpu/gpu_stream_assign.h" #include "pre_activate/common/optimizer.h" #include "pre_activate/common/pass_manager.h" -#include "pre_activate/pass/allreduce_fusion.h" +#include "pre_activate/pass/communication_op_fusion.h" #include "device/kernel_runtime_manager.h" #include "predict/predict.h" #include "common/utils.h" @@ -133,10 +133,9 @@ void GPUSession::RunGraph(const GraphId &graph_id, const std::vector *input_tensors) { + const std::vector &input_tensors, const std::vector &tensors_mask) { // Prepare the graph - MS_EXCEPTION_IF_NULL(input_tensors); - auto kernel_graph = ConstructSingleOpGraph(op_run_info, input_tensors); + auto kernel_graph = ConstructSingleOpGraph(op_run_info, input_tensors, tensors_mask); MS_EXCEPTION_IF_NULL(kernel_graph); SelectKernel(kernel_graph); StartKernelRT(); diff --git a/mindspore/ccsrc/session/gpu_session.h b/mindspore/ccsrc/session/gpu_session.h index 470c9b4799..2a3cc04b09 100644 --- a/mindspore/ccsrc/session/gpu_session.h +++ b/mindspore/ccsrc/session/gpu_session.h @@ -40,7 +40,7 @@ class GPUSession : public SessionBasic { void RunGraph(const GraphId &graph_id, const std::vector &inputs, VectorRef *outputs) override; void BuildOp(const OpRunInfo &op_run_info, const GraphInfo &graph_info, - std::vector *input_tensors) override; + const std::vector &input_tensors, const std::vector &tensors_mask) override; py::tuple RunOp(const OpRunInfo &op_run_info, const GraphInfo &graph_info, const std::vector &input_tensors) override; diff --git a/mindspore/ccsrc/session/kernel_graph.cc b/mindspore/ccsrc/session/kernel_graph.cc index 139539ccb2..95ac38c405 100755 --- a/mindspore/ccsrc/session/kernel_graph.cc +++ b/mindspore/ccsrc/session/kernel_graph.cc @@ -49,80 +49,81 @@ std::vector KernelGraph::outputs() const { return std::vector(); } -void KernelGraph::SetExecOrderByDefault() { - std::stack seed_nodes; - UpdateNodeEdgeList(&seed_nodes); - execution_order_.clear(); - std::unordered_set visited_nodes; - std::queue zero_input_nodes; - - auto visit_node_descendant = [&visited_nodes, this](const AnfNodePtr &node, std::queue *visit_queue) { - auto it = node_output_edges_.find(node); - if (it == node_output_edges_.end()) { - // value node and parameter has no input,no need to print log - if (node->isa()) { - MS_LOG(DEBUG) << "Can not find node [" << node->DebugString() << "]"; - } - return; +void KernelGraph::VisitNodeDescendants(const AnfNodePtr &node, std::queue *visit_queue, + std::unordered_set *visited_nodes) { + MS_EXCEPTION_IF_NULL(visit_queue); + MS_EXCEPTION_IF_NULL(visited_nodes); + auto it = node_output_edges_.find(node); + if (it == node_output_edges_.end()) { + // value node and parameter has no input,no need to print log + if (node->isa()) { + MS_LOG(DEBUG) << "Can not find node [" << node->DebugString() << "]"; } + return; + } - // visit all reduce node first, then other nodes - std::vector active_nodes; - for (const auto &output_edge : it->second) { - auto next_node = output_edge.first; - if (node_input_num_.find(next_node) == node_input_num_.end()) { - MS_EXCEPTION_IF_NULL(next_node); - MS_LOG(EXCEPTION) << "Can't find node[" << next_node->DebugString() << "]"; - } + // visit all reduce node first, then other nodes + std::vector active_nodes; + for (const auto &output_edge : it->second) { + auto next_node = output_edge.first; + if (node_input_num_.find(next_node) == node_input_num_.end()) { MS_EXCEPTION_IF_NULL(next_node); - MS_LOG(DEBUG) << "Decrease input:" << next_node->DebugString() << ",node:" << node->DebugString() - << ",num: " << node_input_num_[next_node] << ",decrease num:" << output_edge.second; - if (node_input_num_[next_node] < output_edge.second) { - MS_LOG(EXCEPTION) << "Input node:" << next_node->DebugString() << ",node_output_num" - << node_input_num_[next_node] << ",depend edge:" << output_edge.second; - } - node_input_num_[next_node] = node_input_num_[next_node] - output_edge.second; - // allreduce first - if (node_input_num_[next_node] == 0 && visited_nodes.find(next_node) == visited_nodes.end()) { - (void)visited_nodes.insert(next_node); - if (AnfAlgo::IsAllReduceOp(next_node)) { - MS_LOG(DEBUG) << "visit node:" << next_node->DebugString(); - visit_queue->push(next_node); - } else { - active_nodes.emplace_back(next_node); - } + MS_LOG(EXCEPTION) << "Can't find node[" << next_node->DebugString() << "]"; + } + MS_EXCEPTION_IF_NULL(next_node); + MS_LOG(DEBUG) << "Decrease input:" << next_node->DebugString() << ",node:" << node->DebugString() + << ",num: " << node_input_num_[next_node] << ",decrease num:" << output_edge.second; + if (node_input_num_[next_node] < output_edge.second) { + MS_LOG(EXCEPTION) << "Input node:" << next_node->DebugString() << ",node_output_num" << node_input_num_[next_node] + << ",depend edge:" << output_edge.second; + } + node_input_num_[next_node] = node_input_num_[next_node] - output_edge.second; + // allreduce first + if (node_input_num_[next_node] == 0 && visited_nodes->find(next_node) == visited_nodes->end()) { + (void)visited_nodes->insert(next_node); + if (AnfAlgo::IsCommunicationOp(next_node)) { + MS_LOG(DEBUG) << "visit node:" << next_node->DebugString(); + visit_queue->push(next_node); + } else { + active_nodes.emplace_back(next_node); } } + } - for (auto &node : active_nodes) { - MS_LOG(DEBUG) << "visit node:" << node->DebugString(); - visit_queue->push(node); - } - }; + for (auto &node : active_nodes) { + MS_LOG(DEBUG) << "visit node:" << node->DebugString(); + visit_queue->push(node); + } +} - AnfNodePtr last_allreduce_node = nullptr; - std::queue allreduce_descendants; - while (!seed_nodes.empty() || last_allreduce_node != nullptr) { +void KernelGraph::SetExecOrderByDefault() { + std::queue seed_nodes; + UpdateNodeEdgeList(&seed_nodes); + execution_order_.clear(); + std::unordered_set visited_nodes; + std::queue zero_input_nodes; + AnfNodePtr last_communication_node = nullptr; + std::queue communication_descendants; + while (!seed_nodes.empty() || last_communication_node != nullptr) { // seed nodes first, then visit last all reduce node descendant if (seed_nodes.empty()) { - visit_node_descendant(last_allreduce_node, &allreduce_descendants); - last_allreduce_node = nullptr; + VisitNodeDescendants(last_communication_node, &communication_descendants, &visited_nodes); + last_communication_node = nullptr; } else { - zero_input_nodes.push(seed_nodes.top()); + zero_input_nodes.push(seed_nodes.front()); seed_nodes.pop(); } - // all reduce node descendant first, then common queue - while (!zero_input_nodes.empty() || !allreduce_descendants.empty()) { + while (!zero_input_nodes.empty() || !communication_descendants.empty()) { AnfNodePtr node = nullptr; - bool is_allreduce_descendant = false; - if (allreduce_descendants.empty()) { + bool is_communication_descendant = false; + if (communication_descendants.empty()) { node = zero_input_nodes.front(); zero_input_nodes.pop(); } else { - node = allreduce_descendants.front(); - allreduce_descendants.pop(); - is_allreduce_descendant = true; + node = communication_descendants.front(); + communication_descendants.pop(); + is_communication_descendant = true; } // add execute node MS_EXCEPTION_IF_NULL(node); @@ -130,19 +131,18 @@ void KernelGraph::SetExecOrderByDefault() { execution_order_.push_back(node->cast()); } // for all reduce node, visit last all reduce node descendant - if (AnfAlgo::IsAllReduceOp(node)) { - if (last_allreduce_node != nullptr) { - visit_node_descendant(last_allreduce_node, &allreduce_descendants); + if (AnfAlgo::IsCommunicationOp(node)) { + if (last_communication_node != nullptr) { + VisitNodeDescendants(last_communication_node, &communication_descendants, &visited_nodes); } - last_allreduce_node = node; - } else if (is_allreduce_descendant) { - visit_node_descendant(node, &allreduce_descendants); + last_communication_node = node; + } else if (is_communication_descendant) { + VisitNodeDescendants(node, &communication_descendants, &visited_nodes); } else { - visit_node_descendant(node, &zero_input_nodes); + VisitNodeDescendants(node, &zero_input_nodes, &visited_nodes); } } } - CheckLoop(); } @@ -295,10 +295,7 @@ ValueNodePtr KernelGraph::NewValueNode(const ValueNodePtr &value_node) { // set the format of value_node to DEFAULT_FORMAT kernel_build_info_builder->SetOutputsFormat(std::vector{kOpFormat_DEFAULT}); // set value node initial device data type = infer data type - std::vector types; - for (size_t index = 0; index < AnfAlgo::GetOutputTensorNum(value_node); ++index) { - types.push_back(kTypeUnknown); - } + std::vector types = std::vector(AnfAlgo::GetOutputTensorNum(value_node), kTypeUnknown); kernel_build_info_builder->SetOutputsDeviceType(types); AnfAlgo::SetSelectKernelBuildInfo(kernel_build_info_builder->Build(), new_value_node.get()); AnfAlgo::SetGraphId(graph_id_, new_value_node.get()); @@ -330,10 +327,11 @@ void KernelGraph::FrontBackendlMapUpdate(const AnfNodePtr &old_backend_anf, cons MS_LOG(EXCEPTION) << "old can't be same with new"; } if (backend_front_anf_map_.find(old_backend_anf) == backend_front_anf_map_.end()) { - MS_LOG(EXCEPTION) << "old_backend_anf " << old_backend_anf->DebugString() << " is not exist in the map"; + MS_LOG(DEBUG) << "old_backend_anf " << old_backend_anf->DebugString() << " is not exist in the map"; + return; } if (front_backend_anf_map_.find(backend_front_anf_map_[old_backend_anf]) == front_backend_anf_map_.end()) { - MS_LOG(EXCEPTION) << "anf is not exist in the mape ,old " << old_backend_anf->DebugString(); + MS_LOG(EXCEPTION) << "anf is not exist in the map ,old " << old_backend_anf->DebugString(); } front_backend_anf_map_[backend_front_anf_map_[old_backend_anf]] = new_backend_anf; backend_front_anf_map_[new_backend_anf] = backend_front_anf_map_[old_backend_anf]; @@ -467,7 +465,7 @@ bool KernelGraph::HandleControlDependNode(const AnfNodePtr &node, std::queue *seed_nodes) { +void KernelGraph::UpdateNodeEdgeList(std::queue *seed_nodes) { node_output_edges_.clear(); node_input_num_.clear(); node_input_edges_.clear(); @@ -483,7 +481,6 @@ void KernelGraph::UpdateNodeEdgeList(std::stack *seed_nodes) { seed_nodes->push(node); continue; } - if (!node->isa()) { continue; } @@ -529,5 +526,44 @@ bool KernelGraph::RemoveValueNodeFromGraph(const ValueNodePtr &value_node) { } return false; } + +void KernelGraph::ReplaceNode(const AnfNodePtr &old_anf_node, AnfNodePtr new_anf_node) { + MS_EXCEPTION_IF_NULL(old_anf_node); + MS_EXCEPTION_IF_NULL(new_anf_node); + MS_EXCEPTION_IF_NULL(inputs_); + auto it = node_output_edges_.find(old_anf_node); + if (it == node_output_edges_.end()) { + MS_LOG(EXCEPTION) << "Can't find anf node in node_output_edges map"; + } + auto &outputs = it->second; + for (auto &output_node : outputs) { + auto output_cnode = output_node.first->cast(); + MS_EXCEPTION_IF_NULL(output_cnode); + auto &output_node_inputs = output_cnode->inputs(); + for (size_t i = 1; i < output_node_inputs.size(); i++) { + if (output_node_inputs[i] == old_anf_node) { + output_cnode->set_input(i, new_anf_node); + } + } + // update graph inputs + for (size_t i = 0; i < inputs_->size(); i++) { + if ((*inputs_)[i] == old_anf_node) { + (*inputs_)[i] = new_anf_node; + break; + } + } + } + // update front to backend map + FrontBackendlMapUpdate(old_anf_node, new_anf_node); + // update output depend relations + node_output_edges_[new_anf_node] = it->second; + (void)node_output_edges_.erase(old_anf_node); +} + +void KernelGraph::UpdateExecuteKernelStreamLabel() { + for (auto &kernel : execution_order_) { + AnfAlgo::SetStreamDistinctionLabel(stream_distinction_label_, kernel.get()); + } +} } // namespace session } // namespace mindspore diff --git a/mindspore/ccsrc/session/kernel_graph.h b/mindspore/ccsrc/session/kernel_graph.h index 54b16014a3..3425bde9c2 100755 --- a/mindspore/ccsrc/session/kernel_graph.h +++ b/mindspore/ccsrc/session/kernel_graph.h @@ -22,12 +22,12 @@ #include #include #include -#include #include #include #include "ir/func_graph.h" #include "ir/anf.h" #include "utils/graph_utils.h" +#include "device/kernel_info.h" namespace mindspore { namespace session { @@ -38,6 +38,7 @@ class KernelGraph : public FuncGraph { inputs_ = std::make_shared>(); execution_order_ = {}; executable_ = true; + stream_distinction_label_ = kInvalidDistincLabel; } ~KernelGraph() override = default; @@ -89,13 +90,23 @@ class KernelGraph : public FuncGraph { void set_executable(bool executable) { executable_ = executable; } // set invalid inputs for control sink std::vector *MutableValidInputs() { return &valid_inputs_; } - std::vector ValidInputs() { return valid_inputs_; } + std::vector valid_inputs() const { return valid_inputs_; } + // replace node in graph + void ReplaceNode(const AnfNodePtr &old_anf_node, AnfNodePtr new_anf_node); + // set stream label of graph + void set_stream_distinction_label(uint32_t stream_label) { stream_distinction_label_ = stream_label; } + // get stream label of graph + uint32_t stream_distinction_label() { return stream_distinction_label_; } + // refresh execute kernel stream label + void UpdateExecuteKernelStreamLabel(); private: // remove value node form graph bool RemoveValueNodeFromGraph(const ValueNodePtr &value_node); + void VisitNodeDescendants(const AnfNodePtr &node, std::queue *visit_queue, + std::unordered_set *visited_nodes); // update node edge list - void UpdateNodeEdgeList(std::stack *seed_nodes); + void UpdateNodeEdgeList(std::queue *seed_nodes); // add node depend edge by data edge or control depend void AddDependEdge(const AnfNodePtr &node, const AnfNodePtr &input, size_t depend_edge_num); // handle control depend @@ -107,6 +118,7 @@ class KernelGraph : public FuncGraph { std::shared_ptr> inputs_; std::vector execution_order_; uint32_t graph_id_; + uint32_t stream_distinction_label_; // record map bettween front anf and backend anf,use two map implement bidirectional map std::unordered_map front_backend_anf_map_; diff --git a/mindspore/ccsrc/session/session_basic.cc b/mindspore/ccsrc/session/session_basic.cc index 0ef0ad97ea..dd313c6059 100755 --- a/mindspore/ccsrc/session/session_basic.cc +++ b/mindspore/ccsrc/session/session_basic.cc @@ -125,8 +125,9 @@ BaseRef CreateOneTensor(const AnfNodePtr &node, size_t output_index, const Kerne // if in paynative mode,data only copyed to host when user want to print data auto ms_context = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(ms_context); - if (ms_context->enable_pynative_infer()) { + if (ms_context->execution_mode() == kPynativeMode) { tensor->set_device_address(AnfAlgo::GetMutableOutputAddr(node, output_index)); + tensor->set_dirty(false); } else if (!address->SyncDeviceToHost(trans::GetRuntimePaddingShape(node, output_index), LongToSize(tensor->data().nbytes()), tensor->data_type(), tensor->data_c(true))) { @@ -180,116 +181,6 @@ BaseRef CreatTupleForOutput(const AnfNodePtr &anf, const KernelGraph &graph, return ret; } -bool RunOpConvertConstInputToAttr(const py::object &input_object, size_t input_index, const PrimitivePtr &op_prim, - const std::unordered_set &input_attrs) { - MS_EXCEPTION_IF_NULL(op_prim); - auto input_names_value = op_prim->GetAttr(kAttrInputNames); - if (input_names_value == nullptr) { - return false; - } - auto input_names_vec = GetValue>(input_names_value); - if (input_index >= input_names_vec.size()) { - MS_LOG(EXCEPTION) << "The input index: " << input_index << " is large than the input names vector size!"; - } - - if (input_attrs.find(input_index) != input_attrs.end()) { - ValuePtr value = parse::data_converter::PyDataToValue(input_object); - MS_EXCEPTION_IF_NULL(value); - auto input_name = input_names_vec[input_index]; - op_prim->set_attr(input_name, value); - return true; - } - return false; -} - -void PlantTensorTupleToVector(const py::tuple &tuple_inputs, const PrimitivePtr &op_prim, - std::vector *input_tensor) { - MS_EXCEPTION_IF_NULL(op_prim); - MS_EXCEPTION_IF_NULL(input_tensor); - for (const auto &input_object : tuple_inputs) { - if (!py::isinstance(input_object)) { - MS_LOG(EXCEPTION) << "The input object is not a tensor!"; - } - auto tensor = py::cast(input_object); - MS_EXCEPTION_IF_NULL(tensor); - input_tensor->push_back(tensor); - } - op_prim->set_attr(kAttrDynInputSizes, MakeValue(std::vector{SizeToInt(tuple_inputs.size())})); -} - -void ConvertValueTupleToTensor(const py::object &input_object, std::vector *input_tensor) { - MS_EXCEPTION_IF_NULL(input_tensor); - ValuePtr input_value = parse::data_converter::PyDataToValue(input_object); - MS_EXCEPTION_IF_NULL(input_value); - if (!input_value->isa()) { - MS_LOG(EXCEPTION) << "The input object is not a value tuple!"; - } - auto value_tuple = input_value->cast(); - MS_EXCEPTION_IF_NULL(value_tuple); - tensor::TensorPtr tensor_ptr = nullptr; - tensor_ptr = opt::CreateTupleTensor(value_tuple); - MS_EXCEPTION_IF_NULL(tensor_ptr); - input_tensor->push_back(tensor_ptr); -} - -void ConvertPyObjectToTensor(const py::object &input_object, const PrimitivePtr &op_prim, - std::vector *input_tensor) { - MS_EXCEPTION_IF_NULL(op_prim); - MS_EXCEPTION_IF_NULL(input_tensor); - tensor::TensorPtr tensor_ptr = nullptr; - if (py::isinstance(input_object)) { - tensor_ptr = py::cast(input_object); - } else if (py::isinstance(input_object)) { - tensor_ptr = std::make_shared(py::cast(input_object), kFloat32); - } else if (py::isinstance(input_object)) { - tensor_ptr = std::make_shared(py::cast(input_object), nullptr); - } else if (py::isinstance(input_object)) { - tensor_ptr = std::make_shared(py::cast(input_object), nullptr); - } else if (py::isinstance(input_object)) { - tensor_ptr = std::make_shared(py::cast(input_object), nullptr); - } else if (py::isinstance(input_object)) { - auto tuple_inputs = py::cast(input_object); - if (py::isinstance(tuple_inputs[0])) { - PlantTensorTupleToVector(tuple_inputs, op_prim, input_tensor); - } else { - ConvertValueTupleToTensor(input_object, input_tensor); - } - return; - } else { - MS_LOG(EXCEPTION) << "Run op inputs type is invalid!"; - } - MS_EXCEPTION_IF_NULL(tensor_ptr); - input_tensor->push_back(tensor_ptr); -} - -void ConvertInputPyobject(const OpRunInfo &op_run_info, const PrimitivePtr &op_prim, - std::vector *input_tensors, std::vector *tensors_mask) { - MS_EXCEPTION_IF_NULL(op_prim); - MS_EXCEPTION_IF_NULL(input_tensors); - MS_EXCEPTION_IF_NULL(tensors_mask); - if (op_run_info.op_inputs.size() != op_run_info.inputs_mask.size()) { - MS_LOG(EXCEPTION) << "Op input size " << op_run_info.op_inputs.size() << " should be equal to op input mask size " - << op_run_info.inputs_mask.size(); - } - opt::ConstInputToAttrInfoRegister reg; - bool reg_exist = opt::ConstInputToAttrInfoRegistry::Instance().GetRegisterByOpName(op_run_info.op_name, ®); - size_t input_num = op_run_info.op_inputs.size(); - MS_LOG(INFO) << "py input size: " << input_num; - for (size_t index = 0; index < input_num; ++index) { - // convert const input to attr - if (reg_exist && - RunOpConvertConstInputToAttr(op_run_info.op_inputs[index], index, op_prim, reg.GetConstInputAttrInfo())) { - continue; - } - // convert const and tuple input to tensor - ConvertPyObjectToTensor(op_run_info.op_inputs[index], op_prim, input_tensors); - // make tensors, weight : 1, data : 0 - std::vector new_mask(input_tensors->size() - tensors_mask->size(), - py::cast(op_run_info.inputs_mask[index])); - tensors_mask->insert(tensors_mask->end(), new_mask.begin(), new_mask.end()); - } -} - ValueNodePtr CreateNewValueNode(const AnfNodePtr &anf, KernelGraph *graph) { auto value_node = anf->cast(); MS_EXCEPTION_IF_NULL(value_node); @@ -527,9 +418,8 @@ CNodePtr SessionBasic::CreateNewCNode(const CNodePtr &cnode, bool valid_input, K KernelGraphPtr SessionBasic::ConstructKernelGraph(const AnfNodePtrList &lst, const AnfNodePtrList &outputs) { std::unordered_map other_graph_cnode; - auto graph = std::make_shared(); - graph->set_graph_id(graph_sum_); - MS_LOG(INFO) << "Create graph: " << graph_sum_; + auto graph = NewKernelGraph(); + MS_LOG(INFO) << "Create graph: " << graph->graph_id(); size_t from_other_graph_depend_num = 0; for (const auto &node : lst) { MS_EXCEPTION_IF_NULL(node); @@ -566,14 +456,12 @@ KernelGraphPtr SessionBasic::ConstructKernelGraph(const AnfNodePtrList &lst, con } graph->SetExecOrderByDefault(); opt::BackendCommonOptimization(graph); - graphs_[graph_sum_++] = graph; return graph; } // run graph steps void SessionBasic::LoadInputData(const std::shared_ptr &kernel_graph, const std::vector &inputs_const) const { - MS_EXCEPTION_IF_NULL(kernel_graph); std::vector inputs(inputs_const); size_t input_ctrl_size = 1; MS_EXCEPTION_IF_NULL(context_); @@ -583,8 +471,7 @@ void SessionBasic::LoadInputData(const std::shared_ptr &kernel_grap MS_EXCEPTION_IF_NULL(kernel_graph); auto input_nodes = kernel_graph->inputs(); if ((inputs.size() + input_ctrl_size) - 1 != input_nodes.size()) { - MS_LOG(EXCEPTION) << "tensor input size:" << inputs.size() - << " is not equal graph inputs size:" << input_nodes.size() + MS_LOG(EXCEPTION) << "tensor input:" << inputs.size() << " is not equal graph inputs:" << input_nodes.size() << ", input_ctrl_size:" << input_ctrl_size; } auto ms_context = MsContext::GetInstance(); @@ -603,7 +490,7 @@ void SessionBasic::LoadInputData(const std::shared_ptr &kernel_grap need_sync = true; } } else { - if (tensor->is_dirty() || !AnfAlgo::IsParameterWeight(pk_node)) { + if (tensor->is_dirty()) { need_sync = true; } else if (tensor->device_address() != device_address) { (void)tensor->data_sync(); @@ -700,14 +587,14 @@ void SessionBasic::Summary(KernelGraph *graph) { CNodePtr SessionBasic::ConstructOutput(const AnfNodePtrList &outputs, const std::shared_ptr &graph) { MS_EXCEPTION_IF_NULL(graph); std::vector output_args; + for (const auto &output : outputs) { + MS_LOG(INFO) << "output:" << output->DebugString(); + } auto FindEqu = [graph, outputs](const AnfNodePtr &out) -> AnfNodePtr { auto backend_anf = graph->GetBackendAnfByFrontAnf(out); if (backend_anf != nullptr) { return backend_anf; } - for (const auto &output : outputs) { - MS_LOG(INFO) << "output:" << output->DebugString(); - } MS_LOG(EXCEPTION) << "Can't find the node in the equiv map!"; }; output_args.push_back(NewValueNode(prim::kPrimMakeTuple)); @@ -750,26 +637,22 @@ void SessionBasic::CreateOutputNode(const CNodePtr &cnode, const std::shared_ptr } std::shared_ptr SessionBasic::ConstructSingleOpGraph(const OpRunInfo &op_run_info, - std::vector *input_tensors) { - MS_EXCEPTION_IF_NULL(input_tensors); + const std::vector &input_tensors, + const std::vector &tensors_mask) { auto graph = std::make_shared(); std::vector inputs; // set input[0] PrimitivePtr op_prim = op_run_info.py_primitive; - if (op_prim == nullptr) { - op_prim = std::make_shared(op_run_info.op_name); - } + MS_EXCEPTION_IF_NULL(op_prim); inputs.push_back(std::make_shared(op_prim)); // set input parameter - std::vector tensors_mask; - ConvertInputPyobject(op_run_info, op_prim, input_tensors, &tensors_mask); - MS_LOG(INFO) << "Input tensor size: " << input_tensors->size(); - if (input_tensors->size() != tensors_mask.size()) { - MS_LOG(EXCEPTION) << "Input tensors size " << input_tensors->size() << " should be equal to tensors mask size " + MS_LOG(INFO) << "Input tensor size: " << input_tensors.size(); + if (input_tensors.size() != tensors_mask.size()) { + MS_LOG(EXCEPTION) << "Input tensors size " << input_tensors.size() << " should be equal to tensors mask size " << tensors_mask.size(); } - for (size_t i = 0; i < input_tensors->size(); ++i) { - auto parameter = ConstructRunOpParameter(graph, input_tensors->at(i), tensors_mask[i]); + for (size_t i = 0; i < input_tensors.size(); ++i) { + auto parameter = ConstructRunOpParameter(graph, input_tensors.at(i), tensors_mask[i]); inputs.push_back(parameter); graph->MutableInputs()->push_back(parameter); } @@ -811,5 +694,12 @@ BaseRef SessionBasic::TransformBaseRefListToTuple(const BaseRef &base_ref) { MS_LOG(EXCEPTION) << "The output is not a base ref list or a tensor!"; } } + +KernelGraphPtr SessionBasic::NewKernelGraph() { + auto graph = std::make_shared(); + graph->set_graph_id(graph_sum_); + graphs_[graph_sum_++] = graph; + return graph; +} } // namespace session } // namespace mindspore diff --git a/mindspore/ccsrc/session/session_basic.h b/mindspore/ccsrc/session/session_basic.h index aa359c74d9..de443833d6 100755 --- a/mindspore/ccsrc/session/session_basic.h +++ b/mindspore/ccsrc/session/session_basic.h @@ -61,7 +61,8 @@ class SessionBasic { virtual void RunGraph(const GraphId &graph_id, const std::vector &inputs, VectorRef *outputs) = 0; - virtual void BuildOp(const OpRunInfo &, const GraphInfo &, std::vector *input_tensors) {} + virtual void BuildOp(const OpRunInfo &, const GraphInfo &, const std::vector &input_tensors, + const std::vector &tensors_mask) {} virtual py::tuple RunOp(const OpRunInfo &, const GraphInfo &, const std::vector &input_tensors) { return py::tuple(); @@ -99,9 +100,12 @@ class SessionBasic { CNodePtr ConstructOutput(const AnfNodePtrList &outputs, const std::shared_ptr &graph); // create a single run op graph std::shared_ptr ConstructSingleOpGraph(const OpRunInfo &op_run_info, - std::vector *input_tensor); + const std::vector &input_tensors, + const std::vector &tensors_mask); // trans BaseRef list to py::tuple BaseRef TransformBaseRefListToTuple(const BaseRef &base_ref); + // create a new kernel graph and update the graph sum + KernelGraphPtr NewKernelGraph(); std::unordered_map> graphs_; std::unordered_map> run_op_graphs_; diff --git a/mindspore/ccsrc/session/session_factory.h b/mindspore/ccsrc/session/session_factory.h index 476d9ff4a1..99db0afeb7 100644 --- a/mindspore/ccsrc/session/session_factory.h +++ b/mindspore/ccsrc/session/session_factory.h @@ -22,7 +22,6 @@ #include #include #include "common/utils.h" -#include "device/cpu/cpu_kernel.h" #include "session/session_basic.h" namespace mindspore { namespace session { diff --git a/mindspore/ccsrc/transform/CMakeLists.txt b/mindspore/ccsrc/transform/CMakeLists.txt index 718f53f627..d1b70c000d 100644 --- a/mindspore/ccsrc/transform/CMakeLists.txt +++ b/mindspore/ccsrc/transform/CMakeLists.txt @@ -1,5 +1,8 @@ -file(GLOB_RECURSE _TRANSFORM_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "*.cc" - ) +if (ENABLE_GE OR ENABLE_D) + file(GLOB_RECURSE _TRANSFORM_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") + add_library(_mindspore_transform_obj OBJECT ${_TRANSFORM_SRC_LIST}) -add_library(_mindspore_transform_obj OBJECT ${_TRANSFORM_ALL_SRC_FILES}) + if (NOT ENABLE_GE) + target_compile_definitions(_mindspore_transform_obj PRIVATE NO_GE_CLIENT) + endif() +endif () diff --git a/mindspore/ccsrc/transform/convert.cc b/mindspore/ccsrc/transform/convert.cc old mode 100755 new mode 100644 index d15f4ae43a..0171752dfb --- a/mindspore/ccsrc/transform/convert.cc +++ b/mindspore/ccsrc/transform/convert.cc @@ -125,6 +125,7 @@ const char kNameSplitD[] = "Split"; const char kNameBatchToSpaceNd[] = "BatchToSpaceNd"; const char kNameFloor[] = "Floor"; const char kNameNPUGetFloatStatus[] = "NPUGetFloatStatus"; +const char kNameAssign[] = "Assign"; const char kNameAssignAdd[] = "AssignAdd"; const char kNameAssignSub[] = "AssignSub"; const char kNameNPUAllocFloatStatus[] = "NPUAllocFloatStatus"; @@ -178,6 +179,7 @@ const char kNameBinaryCrossEntropyGrad[] = "BinaryCrossEntropyGrad"; const char kNameSparseApplyAdagrad[] = "SparseApplyAdagrad"; const char kNameSparseApplyFtrlD[] = "SparseApplyFtrlD"; const char kNameAcosh[] = "Acosh"; +const char kNameAcoshGrad[] = "AcoshGrad"; const char kNameFloorMod[] = "FloorMod"; const char kNameSpaceToDepth[] = "SpaceToDepth"; const char kNameDepthToSpace[] = "DepthToSpace"; @@ -220,7 +222,6 @@ std::unordered_map &DfGraphConvertor::get_adpt_ma {prim::kPrimAssign->name(), ADPT_DESC(Assign)}, {prim::kPrimStateSetItem->name(), ADPT_DESC(Assign)}, {prim::kPrimReluGrad->name(), ADPT_DESC(ReluGrad)}, - {prim::kPrimFusedBatchNormGrad->name(), ADPT_DESC(FusedBatchNormGrad)}, {prim::kPrimBiasAddGrad->name(), ADPT_DESC(BiasAddGrad)}, {prim::kPrimConv2D->name(), ADPT_DESC(Conv2D)}, {prim::kPrimConv2DBackpropInput->name(), ADPT_DESC(Conv2DBackpropInputD)}, @@ -228,7 +229,6 @@ std::unordered_map &DfGraphConvertor::get_adpt_ma {prim::kPrimDepthwiseConv2dNative->name(), ADPT_DESC(DepthwiseConv2D)}, {prim::kPrimDepthwiseConv2dNativeBackpropFilter->name(), ADPT_DESC(DepthwiseConv2DBackpropFilterD)}, {prim::kPrimDepthwiseConv2dNativeBackpropInput->name(), ADPT_DESC(DepthwiseConv2DBackpropInputD)}, - {prim::kPrimFusedBatchNorm->name(), ADPT_DESC(FusedBatchNorm, BatchNorm)}, {string(kNameBatchNorm), ADPT_DESC(BatchNorm)}, {string(kNameBatchNormGrad), ADPT_DESC(BatchNormGrad)}, {string(kNameReshape), ADPT_DESC(Reshape)}, @@ -376,6 +376,7 @@ std::unordered_map &DfGraphConvertor::get_adpt_ma {string(kNameSparseApplyAdagrad), ADPT_DESC(SparseApplyAdagradD)}, {string(kNameSparseApplyFtrlD), ADPT_DESC(SparseApplyFtrlD)}, {string(kNameAcosh), ADPT_DESC(Acosh)}, + {string(kNameAcoshGrad), ADPT_DESC(AcoshGrad)}, {string(kNameFloorMod), ADPT_DESC(FloorMod)}, {string(kNameSpaceToDepth), ADPT_DESC(SpaceToDepth)}, {string(kNameDepthToSpace), ADPT_DESC(DepthToSpace)}, @@ -625,7 +626,7 @@ void DfGraphConvertor::InitParamWithData(const TensorOrderMap &tensors) { auto node_itor = params_.find(name); // if name not in params_, create a node in graph if (node_itor == params_.end()) { - MS_LOG(WARNING) << "" << name << " is not in params, and create a new node."; + MS_LOG(WARNING) << name << " is not in params, and create a new node."; ParameterPtr param = anf_graph_->add_parameter(); name = name + "_temp"; param->set_name(name); @@ -1158,6 +1159,8 @@ void DfGraphConvertor::SetOpControlInput(const AnfNodePtr node) { } } +const std::vector trans_var_list = {string(kNameAssign), string(kNameAssignAdd), string(kNameAssignSub)}; + void DfGraphConvertor::SetOpInput(const OpAdapterPtr &adpt, const CNodePtr &node) { OperatorPtr src = Convert(node); auto &inputs = node->inputs(); @@ -1170,6 +1173,26 @@ void DfGraphConvertor::SetOpInput(const OpAdapterPtr &adpt, const CNodePtr &node if (IsValueNode(pred)) { continue; } + // transform "Const" op to "Variable" op when the next node is "Assign" op. + std::string c_name = GetCNodeFuncName(node); + auto pos = std::find(trans_var_list.begin(), trans_var_list.end(), c_name); + if (!training_ && pos != trans_var_list.end() && pred->isa()) { + std::string name = std::static_pointer_cast(pred)->name(); + auto op_itor = op_cache_.find(pred.get()); + if (op_itor == op_cache_.end()) { + MS_LOG(EXCEPTION) << "Can not find op for node " << pred->ToString() << "."; + } + if (op_itor->second != nullptr && + (op_itor->second->GetOpType() == "Constant" || op_itor->second->GetOpType() == "Const") && + vars_.find(name) != vars_.end()) { + auto variable = std::make_shared(name); + auto desc = vars_[name]->GetOutputDesc("y"); + (void)variable->update_output_desc_y(desc); + MS_LOG(DEBUG) << "Trans to variable, var = " << variable->GetName() << "."; + op_itor->second = variable; // replace parameter with variable + vars_[name] = variable; + } + } // find in out_hadnle_cache_ first auto it = out_handle_cache_.find(pred.get()); if (it != out_handle_cache_.end()) { diff --git a/mindspore/ccsrc/transform/convert.h b/mindspore/ccsrc/transform/convert.h index 5596e20f19..39efd5d287 100644 --- a/mindspore/ccsrc/transform/convert.h +++ b/mindspore/ccsrc/transform/convert.h @@ -252,7 +252,6 @@ class DfGraphConvertor { bool training_ = false; bool distribute_ = false; }; - } // namespace transform } // namespace mindspore diff --git a/mindspore/ccsrc/transform/op_adapter_util.cc b/mindspore/ccsrc/transform/op_adapter_util.cc index 203acac10f..cae43c13dc 100644 --- a/mindspore/ccsrc/transform/op_adapter_util.cc +++ b/mindspore/ccsrc/transform/op_adapter_util.cc @@ -42,10 +42,10 @@ std::vector ConvertAnyUtil(const ValuePtr &value, const std::string &na MS_LOG(EXCEPTION) << "Value should be ValueTuple, but got" << value->type_name(); } auto vec = value->cast(); - list.resize(vec->value().size()+2); + list.resize(vec->value().size() + 2); list[0] = 1; list[1] = 1; - (void)std::transform(vec->value().begin(), vec->value().end(), list.begin()+2, + (void)std::transform(vec->value().begin(), vec->value().end(), list.begin() + 2, [](const ValuePtr &val) { return static_cast(GetValue(val)); }); } else { int64_t data = GetValue(value); diff --git a/mindspore/ccsrc/transform/op_declare.cc b/mindspore/ccsrc/transform/op_declare.cc index d6da49f85d..27c1d306aa 100644 --- a/mindspore/ccsrc/transform/op_declare.cc +++ b/mindspore/ccsrc/transform/op_declare.cc @@ -356,6 +356,11 @@ INPUT_MAP(Acosh) = {{1, INPUT_DESC(x)}}; ATTR_MAP(Acosh) = EMPTY_ATTR_MAP; OUTPUT_MAP(Acosh) = {{0, OUTPUT_DESC(y)}}; +// AcoshGrad +INPUT_MAP(AcoshGrad) = {{1, INPUT_DESC(y)}, {2, INPUT_DESC(dy)}}; +ATTR_MAP(AcoshGrad) = EMPTY_ATTR_MAP; +OUTPUT_MAP(AcoshGrad) = {{0, OUTPUT_DESC(z)}}; + // Floor INPUT_MAP(Floor) = {{1, INPUT_DESC(x)}}; ATTR_MAP(Floor) = EMPTY_ATTR_MAP; @@ -670,28 +675,6 @@ INPUT_MAP(ReluGrad) = {{1, INPUT_DESC(gradients)}, {2, INPUT_DESC(features)}}; ATTR_MAP(ReluGrad) = EMPTY_ATTR_MAP; OUTPUT_MAP(ReluGrad) = {{0, OUTPUT_DESC(backprops)}}; -// FusedBatchNorm -INPUT_MAP(FusedBatchNorm) = { - {1, INPUT_DESC(x)}, {2, INPUT_DESC(scale)}, {3, INPUT_DESC(b)}, {4, INPUT_DESC(mean)}, {5, INPUT_DESC(variance)}}; -ATTR_MAP(FusedBatchNorm) = {{"mode", ATTR_DESC(mode, AnyTraits())}, - {"momentum", ATTR_DESC(moving_average_fraction, AnyTraits())}, - {"epsilon", ATTR_DESC(epsilon, AnyTraits())}}; -OUTPUT_MAP(FusedBatchNorm) = {{0, OUTPUT_DESC(y)}, - {1, OUTPUT_DESC(running_mean)}, - {2, OUTPUT_DESC(running_variance)}, - {3, OUTPUT_DESC(save_mean)}, - {4, OUTPUT_DESC(save_inv_variance)}}; - -// FusedBatchNromGrad -INPUT_MAP(FusedBatchNormGrad) = {{1, INPUT_DESC(dy)}, - {2, INPUT_DESC(x)}, - {3, INPUT_DESC(scale)}, - {4, INPUT_DESC(save_mean)}, - {5, INPUT_DESC(save_inv_variance)}}; -ATTR_MAP(FusedBatchNormGrad) = {{"momentum", ATTR_DESC(momentum, AnyTraits())}, - {"epsilon", ATTR_DESC(epsilon, AnyTraits())}}; -OUTPUT_MAP(FusedBatchNormGrad) = {{0, OUTPUT_DESC(dx)}, {1, OUTPUT_DESC(bn_scale)}, {2, OUTPUT_DESC(bn_bias)}}; - // BiasAddGrad INPUT_MAP(BiasAddGrad) = {{1, INPUT_DESC(x)}}; ATTR_MAP(BiasAddGrad) = {{"data_format", ATTR_DESC(data_format, AnyTraits())}}; @@ -872,8 +855,8 @@ ATTR_MAP(TransposeD) = EMPTY_ATTR_MAP; // DropOutGenMask INPUT_MAP(DropOutGenMask) = {{1, INPUT_DESC(shape)}, {2, INPUT_DESC(prob)}}; -ATTR_MAP(DropOutGenMask) = {{"seed", ATTR_DESC(seed, AnyTraits())}, - {"seed2", ATTR_DESC(seed2, AnyTraits())}}; +ATTR_MAP(DropOutGenMask) = {{"Seed0", ATTR_DESC(seed, AnyTraits())}, + {"Seed1", ATTR_DESC(seed2, AnyTraits())}}; OUTPUT_MAP(DropOutGenMask) = {{0, OUTPUT_DESC(y)}}; // Pack diff --git a/mindspore/ccsrc/transform/op_declare.h b/mindspore/ccsrc/transform/op_declare.h index ccc6578a61..d15a664256 100755 --- a/mindspore/ccsrc/transform/op_declare.h +++ b/mindspore/ccsrc/transform/op_declare.h @@ -82,10 +82,6 @@ DECLARE_OP_USE_OUTPUT(HcomAllGather) DECLARE_OP_ADAPTER(Variable) DECLARE_OP_ADAPTER(ReluGrad) DECLARE_OP_USE_OUTPUT(ReluGrad) -DECLARE_OP_ADAPTER(FusedBatchNorm) -DECLARE_OP_USE_OUTPUT(FusedBatchNorm) -DECLARE_OP_ADAPTER(FusedBatchNormGrad) -DECLARE_OP_USE_OUTPUT(FusedBatchNormGrad) DECLARE_OP_ADAPTER(BiasAddGrad) DECLARE_OP_USE_OUTPUT(BiasAddGrad) DECLARE_OP_ADAPTER(MaxPoolWithArgmax) @@ -329,13 +325,15 @@ DECLARE_OP_ADAPTER(Const) DECLARE_OP_USE_OUTPUT(Const) DECLARE_OP_ADAPTER(Cos) DECLARE_OP_USE_OUTPUT(Cos) + DECLARE_OP_ADAPTER(Acos) DECLARE_OP_USE_OUTPUT(Acos) - DECLARE_OP_ADAPTER(AcosGrad) DECLARE_OP_USE_OUTPUT(AcosGrad) DECLARE_OP_ADAPTER(Acosh) DECLARE_OP_USE_OUTPUT(Acosh) +DECLARE_OP_ADAPTER(AcoshGrad) +DECLARE_OP_USE_OUTPUT(AcoshGrad) DECLARE_OP_ADAPTER(Floor) DECLARE_OP_USE_OUTPUT(Floor) diff --git a/mindspore/ccsrc/transform/util.cc b/mindspore/ccsrc/transform/util.cc index b1120ade6d..3f856fe564 100644 --- a/mindspore/ccsrc/transform/util.cc +++ b/mindspore/ccsrc/transform/util.cc @@ -171,20 +171,17 @@ GeTensorPtr TransformUtil::ConvertTensor(const MeTensorPtr &tensor, const std::s MS_LOG(ERROR) << "The Me Tensor data type size is wrong, type size is: " << type_size; return nullptr; } - // get tensor buff size - size_t data_buff_size = 0; size_t elements_num = IntToSize(tensor->ElementsNum()); - if (elements_num > 0 && type_size > 0 && UINT_MAX / type_size >= elements_num) { - data_buff_size = elements_num * type_size; + if (UINT_MAX / type_size < elements_num) { + MS_LOG(ERROR) << "The required Me Tensor data buff size " << elements_num << " x " << type_size + << " overflowed UINT_MAX: " << UINT_MAX << "."; + return nullptr; } + + // get tensor buff size + size_t data_buff_size = elements_num * type_size; if (data_buff_size == 0) { - if (elements_num > 0 && type_size > 0 && UINT_MAX / type_size < elements_num) { - MS_LOG(ERROR) << "The required Me Tensor data buff size " << elements_num << " x " << type_size - << " overflowed UINT_MAX: " << UINT_MAX << "."; - } else { - MS_LOG(ERROR) << "The Me Tensor data buff size is 0."; - } - return nullptr; + MS_LOG(INFO) << "The Me Tensor data buff size is 0."; } // create ge tensor auto desc = GetGeTensorDesc(tensor->shape_c(), tensor->data_type(), format); diff --git a/mindspore/ccsrc/utils/CMakeLists.txt b/mindspore/ccsrc/utils/CMakeLists.txt index 1fcf5e0944..52d03c3723 100644 --- a/mindspore/ccsrc/utils/CMakeLists.txt +++ b/mindspore/ccsrc/utils/CMakeLists.txt @@ -1,3 +1,8 @@ -file(GLOB_RECURSE _UTILS_ALL_SRC_FILES *.cc) -#TODO : "utils/node_utils.cc" -add_library(_mindspore_utils_obj OBJECT ${_UTILS_ALL_SRC_FILES}) +file(GLOB_RECURSE _UTILS_SRC_LIST ./*.cc) + +if (NOT ENABLE_GE) + file(GLOB_RECURSE _UTILS_GE_SRC_FILES ./callbacks_ge.cc) + list(REMOVE_ITEM _UTILS_SRC_LIST ${_UTILS_GE_SRC_FILES}) +endif () + +add_library(_mindspore_utils_obj OBJECT ${_UTILS_SRC_LIST}) diff --git a/mindspore/ccsrc/utils/any.cc b/mindspore/ccsrc/utils/any.cc index 3cb89f5dd7..80b8d86658 100644 --- a/mindspore/ccsrc/utils/any.cc +++ b/mindspore/ccsrc/utils/any.cc @@ -26,7 +26,7 @@ bool AnyIsLiteral(const Any &any) { static const std::type_index typeid_float = std::type_index(typeid(float)); static const std::type_index typeid_bool = std::type_index(typeid(bool)); - std::type_index typeid_any = std::type_index(any.type()); + auto typeid_any = std::type_index(any.type()); return typeid_int == typeid_any || typeid_float == typeid_any || typeid_bool == typeid_any; } diff --git a/mindspore/ccsrc/utils/base_ref.h b/mindspore/ccsrc/utils/base_ref.h index 6e7911d0d9..74ccff8f80 100644 --- a/mindspore/ccsrc/utils/base_ref.h +++ b/mindspore/ccsrc/utils/base_ref.h @@ -228,6 +228,8 @@ T cast(const BaseRef &handle) { class VectorRef : public BaseRef { public: + using value_type = BaseRef; + VectorRef() {} explicit VectorRef(const std::vector &elements) : elements_(elements) {} VectorRef(const const_iterator &begin, const const_iterator &end) : elements_(begin, end) {} @@ -251,6 +253,13 @@ class VectorRef : public BaseRef { return elements_[dim]; } + BaseRef &operator[](const std::size_t &dim) { + if (dim >= size()) { + MS_LOG(EXCEPTION) << "Out of the size of the tuple."; + } + return elements_[dim]; + } + uint32_t type() const override { return tid(); } std::string ToString() const override; std::vector &elements() { return elements_; } diff --git a/mindspore/ccsrc/utils/callbacks.cc b/mindspore/ccsrc/utils/callbacks.cc index 06bf1c73ab..ad9751c332 100644 --- a/mindspore/ccsrc/utils/callbacks.cc +++ b/mindspore/ccsrc/utils/callbacks.cc @@ -26,7 +26,6 @@ namespace mindspore { namespace callbacks { - const char PYTHON_MOD_CALLBACK_MODULE[] = "mindspore.train.callback"; const char PYTHON_FUN_PROCESS_CHECKPOINT[] = "_checkpoint_cb_for_save_op"; const char PYTHON_FUN_PROCESS_SUMMARY[] = "_summary_cb_for_save_op"; diff --git a/mindspore/ccsrc/utils/callbacks_ge.cc b/mindspore/ccsrc/utils/callbacks_ge.cc index b4c9fda634..da817b3f78 100644 --- a/mindspore/ccsrc/utils/callbacks_ge.cc +++ b/mindspore/ccsrc/utils/callbacks_ge.cc @@ -24,7 +24,6 @@ namespace mindspore { namespace callbacks { - const char PYTHON_MOD_CALLBACK_MODULE[] = "mindspore.train.callback"; const char PYTHON_FUN_PROCESS_CHECKPOINT[] = "_checkpoint_cb_for_save_op"; const char PYTHON_FUN_PROCESS_SUMMARY[] = "_summary_cb_for_save_op"; diff --git a/mindspore/ccsrc/utils/callbacks_ge.h b/mindspore/ccsrc/utils/callbacks_ge.h index 08f5bb59db..f9088f3f5a 100644 --- a/mindspore/ccsrc/utils/callbacks_ge.h +++ b/mindspore/ccsrc/utils/callbacks_ge.h @@ -26,12 +26,10 @@ namespace mindspore { namespace callbacks { - using mindspore::tensor::TensorPtr; uint32_t CheckpointSaveCallback(uint32_t, const std::map &); uint32_t SummarySaveCallback(uint32_t, const std::map &); - } // namespace callbacks } // namespace mindspore diff --git a/mindspore/ccsrc/utils/context/ms_context.cc b/mindspore/ccsrc/utils/context/ms_context.cc index 0a2f065140..6da1de9cdb 100644 --- a/mindspore/ccsrc/utils/context/ms_context.cc +++ b/mindspore/ccsrc/utils/context/ms_context.cc @@ -69,7 +69,6 @@ MsContext::MsContext(const std::string &policy, const std::string &target) { enable_task_sink_ = true; ir_fusion_flag_ = true; enable_hccl_ = false; - enable_loop_sink_ = false; enable_mem_reuse_ = true; enable_gpu_summary_ = true; precompile_only_ = false; @@ -78,7 +77,7 @@ MsContext::MsContext(const std::string &policy, const std::string &target) { enable_dynamic_mem_pool_ = true; graph_memory_max_size_ = "0"; variable_memory_max_size_ = "0"; - MS_LOG(INFO) << "Create context with backend policy:" << policy << ", device target:" << target << "."; + enable_loop_sink_ = target == kAscendDevice || target == kDavinciDevice; } std::shared_ptr MsContext::GetInstance() { @@ -134,6 +133,7 @@ bool MsContext::set_device_target(const std::string &target) { } else { device_target_ = target; } + enable_loop_sink_ = device_target_ == kAscendDevice; MS_LOG(INFO) << "ms set context device target:" << target; return true; } @@ -438,4 +438,18 @@ bool MsContext::PynativeInitGe() { is_pynative_ge_init_ = true; return true; } + +bool MsContext::IsTsdOpened() { + if (tsd_ref_ > 0) { + return true; + } + return false; +} + +bool MsContext::IsGeInited() { + if (ge_ref_ > 0) { + return true; + } + return false; +} } // namespace mindspore diff --git a/mindspore/ccsrc/utils/context/ms_context.h b/mindspore/ccsrc/utils/context/ms_context.h index 1d84061a8a..b2d594d10e 100644 --- a/mindspore/ccsrc/utils/context/ms_context.h +++ b/mindspore/ccsrc/utils/context/ms_context.h @@ -82,8 +82,10 @@ class MsContext { bool OpenTsd(); bool CloseTsd(bool force = false); + bool IsTsdOpened(); bool InitGe(); bool FinalizeGe(bool force = false); + bool IsGeInited(); void set_enable_hccl(bool enable_hccl) { enable_hccl_ = enable_hccl; } bool enable_hccl() const { return enable_hccl_; } bool PynativeInitGe(); diff --git a/mindspore/ccsrc/utils/convert_utils.cc b/mindspore/ccsrc/utils/convert_utils.cc index e840ff8734..edbfe8dc4c 100644 --- a/mindspore/ccsrc/utils/convert_utils.cc +++ b/mindspore/ccsrc/utils/convert_utils.cc @@ -28,6 +28,7 @@ #include "ir/meta_tensor.h" #include "pipeline/parse/parse.h" +#include "pipeline/parse/parse_base.h" #include "ir/value.h" namespace mindspore { @@ -97,6 +98,15 @@ py::object ValuePtrToPyData(const ValuePtr &value) { i++; } ret = rets; + } else if (value->isa()) { + ret = parse::python_adapter::CallPyFn(parse::PYTHON_MOD_PARSE_MODULE, parse::PYTHON_PARSE_CLASS_ELLIPSIS); + } else if (value->isa()) { + auto slice = value->cast(); + auto start = ValuePtrToPyData(slice->start()); + auto end = ValuePtrToPyData(slice->stop()); + auto step = ValuePtrToPyData(slice->step()); + ret = parse::python_adapter::CallPyFn(parse::PYTHON_MOD_PARSE_MODULE, parse::PYTHON_PARSE_CLASS_SLICE, start, end, + step); } else if (value->isa()) { py::tuple v(1); v[0] = value->cast(); @@ -327,7 +337,7 @@ py::object VectorRefToPyData(const VectorRef &value_list) { py::object ret; MS_LOG(DEBUG) << "vector_ref"; size_t value_size = value_list.size(); - py::tuple ref_tuple = py::tuple(value_size); + auto ref_tuple = py::tuple(value_size); for (size_t i = 0; i < value_size; i++) { ref_tuple[i] = BaseRefToPyData(value_list[i]); } diff --git a/mindspore/ccsrc/utils/log_adapter.cc b/mindspore/ccsrc/utils/log_adapter.cc index 0cd9b64a9b..74a66f44d7 100644 --- a/mindspore/ccsrc/utils/log_adapter.cc +++ b/mindspore/ccsrc/utils/log_adapter.cc @@ -35,7 +35,7 @@ static std::string GetTime() { now_time.tm_hour, now_time.tm_min, now_time.tm_sec); #else struct timeval cur_time; - (void)gettimeofday(&cur_time, NULL); + (void)gettimeofday(&cur_time, nullptr); struct tm now; (void)localtime_r(&cur_time.tv_sec, &now); @@ -143,6 +143,7 @@ static std::string ExceptionTypeToString(ExceptionType type) { _TO_STRING(TimeOutError), _TO_STRING(ResourceUnavailable), _TO_STRING(NoPermissionError), + _TO_STRING(IndexError), _TO_STRING(ValueError), _TO_STRING(TypeError), }; @@ -179,7 +180,8 @@ void LogWriter::operator^(const LogStream &stream) const { std::ostringstream oss; oss << location_.file_ << ":" << location_.line_ << " " << location_.func_ << "] "; - if (exception_type_ != NoExceptionType && exception_type_ != TypeError && exception_type_ != ValueError) { + if (exception_type_ != NoExceptionType && exception_type_ != IndexError && exception_type_ != TypeError && + exception_type_ != ValueError) { oss << ExceptionTypeToString(exception_type_) << " "; } oss << msg.str(); @@ -187,6 +189,9 @@ void LogWriter::operator^(const LogStream &stream) const { trace::TraceGraphInfer(); trace::GetInferStackInfo(oss); + if (exception_type_ == IndexError) { + throw pybind11::index_error(oss.str()); + } if (exception_type_ == ValueError) { throw pybind11::value_error(oss.str()); } @@ -252,9 +257,13 @@ void mindspore_log_init(void) { if (mindspore::GetEnv("GLOG_logfile_mode").empty()) { FLAGS_logfile_mode = 0640; } + std::string logtostderr = mindspore::GetEnv("GLOG_logtostderr"); // default print log to screen - if (mindspore::GetEnv("GLOG_logtostderr").empty()) { + if (logtostderr.empty()) { + FLAGS_logtostderr = true; + } else if (logtostderr == "0" && mindspore::GetEnv("GLOG_log_dir").empty()) { FLAGS_logtostderr = true; + MS_LOG(WARNING) << "`GLOG_log_dir` is not set, output log to screen."; } #else mindspore::InitMsLogLevel(); diff --git a/mindspore/ccsrc/utils/log_adapter.h b/mindspore/ccsrc/utils/log_adapter.h index 2122870c3b..d7d8eff23e 100644 --- a/mindspore/ccsrc/utils/log_adapter.h +++ b/mindspore/ccsrc/utils/log_adapter.h @@ -54,6 +54,7 @@ enum ExceptionType { TimeOutError, ResourceUnavailable, NoPermissionError, + IndexError, ValueError, TypeError, }; diff --git a/mindspore/ccsrc/utils/node_strategy.proto b/mindspore/ccsrc/utils/node_strategy.proto index dc06482ba1..8ec25f21a6 100644 --- a/mindspore/ccsrc/utils/node_strategy.proto +++ b/mindspore/ccsrc/utils/node_strategy.proto @@ -33,6 +33,6 @@ message ParallelStrategyItem { } message ParallelStrategyMap { - required uint32 train_time = 1; + required uint32 current_stage = 1; repeated ParallelStrategyItem parallel_strategy_item = 2; } \ No newline at end of file diff --git a/mindspore/ccsrc/utils/profile.cc b/mindspore/ccsrc/utils/profile.cc index e9e7920e0c..9fb9dc9f1a 100644 --- a/mindspore/ccsrc/utils/profile.cc +++ b/mindspore/ccsrc/utils/profile.cc @@ -158,7 +158,7 @@ void Profile::Print(void) { std::ostringstream oss; PrintProfile(oss, *ctx_ptr_->time_info_); std::string text = oss.str(); - // the length of text is too long to use MS_LOGINFO, use printf to print it + // here use printf to output profile info, not use MS_LOG(INFO) since when open log, it affects performace (void)printf("%s", text.c_str()); (void)fflush(stdout); } @@ -358,7 +358,7 @@ void MsProfile::Print() { PrintTimeStat(oss, groups[i], prefix); } std::string text = oss.str(); - // the length of text is too long to use MS_LOGINFO, use printf to print it + // here use printf to output profile info, not use MS_LOG(INFO) since when open log, it affects performace (void)printf("\nTime group info:\n%s", text.c_str()); (void)fflush(stdout); } diff --git a/mindspore/ccsrc/utils/utils.h b/mindspore/ccsrc/utils/utils.h index 6829a7e888..9fb62a5470 100644 --- a/mindspore/ccsrc/utils/utils.h +++ b/mindspore/ccsrc/utils/utils.h @@ -107,6 +107,7 @@ constexpr auto kLambNextMVOpName = "LambNextMV"; constexpr auto kConfusionTransposeDOpName = "ConfusionTransposeD"; constexpr auto kAdamApplyOneWithDecayOpName = "AdamApplyOneWithDecay"; constexpr auto kBatchNormOpName = "BatchNorm"; +constexpr auto kBatchNormGradOpName = "BatchNormGrad"; constexpr auto kAdamApplyOneOpName = "AdamApplyOne"; constexpr auto kDropoutGenMask = "DropoutGenMask"; constexpr auto kResizeNearestNeighborGrad = "ResizeNearestNeighborGrad"; @@ -120,8 +121,12 @@ constexpr auto kStreamActiveOpName = "StreamActive"; constexpr auto kAssignAddOpName = "AssignAdd"; constexpr auto kSendOpName = "Send"; constexpr auto kRecvOpName = "Recv"; -constexpr auto kReluV2OpName = "ReluV2"; +constexpr auto kReluV2OpName = "ReLUV2"; constexpr auto kReluGradV2OpName = "ReluGradV2"; +constexpr auto kAddNOpName = "AddN"; +constexpr auto kConv2DBackpropInputOpName = "Conv2DBackpropInput"; +constexpr auto kFusionOpConv2DBackpropInputReluGradV2Name = "FusionOp_Conv2DBackpropInput_ReluGradV2"; +constexpr auto kFusionOpConv2DBackpropInputAddNReluGradV2Name = "FusionOp_Conv2DBackpropInput_AddN_ReluGradV2"; // attr key name constexpr auto kAttrInputNames = "input_names"; @@ -155,6 +160,10 @@ constexpr auto kAttrOutputUsedNum = "output_used_num"; constexpr auto kAttrHasBias = "has_bias"; constexpr auto kAttrN = "n"; constexpr auto kAttrLabelForInsertStreamActive = "label_for_insert_stream_active"; +constexpr auto kAttrFusion = "fusion"; +constexpr auto kAttrGroup = "group"; +constexpr auto kAttrOp = "op"; +constexpr auto kAttrIsTraining = "is_training"; // attr value constexpr auto kValueTargetSwitch = "target_switch"; @@ -177,7 +186,10 @@ constexpr auto kControlDependBehindIndex = 2; // index define of depend constexpr auto kRealInputIndexInDepend = 1; constexpr auto kDependAttachNodeIndex = 2; - +// status of kernel select result +const int kStatusReducePrecision = -1; +const int kStatusRaisePrecision = 1; +const int kStatusAllMatched = 0; // format constexpr auto kOpFormat_DEFAULT = "DefaultFormat"; constexpr auto kOpFormat_NC1KHKWHWC0 = "NC1KHKWHWC0"; diff --git a/mindspore/ccsrc/vm/CMakeLists.txt b/mindspore/ccsrc/vm/CMakeLists.txt index 2df984a29f..c5408e683e 100644 --- a/mindspore/ccsrc/vm/CMakeLists.txt +++ b/mindspore/ccsrc/vm/CMakeLists.txt @@ -1,5 +1,2 @@ -file(GLOB_RECURSE _VM_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "*.cc" - ) - -add_library(_mindspore_vm_obj OBJECT ${_VM_ALL_SRC_FILES}) \ No newline at end of file +file(GLOB_RECURSE _VM_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") +add_library(_mindspore_vm_obj OBJECT ${_VM_SRC_LIST}) diff --git a/mindspore/ccsrc/vm/backend.cc b/mindspore/ccsrc/vm/backend.cc index d754667cce..caf4eb3ee3 100644 --- a/mindspore/ccsrc/vm/backend.cc +++ b/mindspore/ccsrc/vm/backend.cc @@ -143,6 +143,66 @@ void MsBackend::SetSwitchGraph() { } } +// convert node from formal parameter to actual parameter, +// and actual parameter is graph user's formal parameter. +// get top while graph's parameter in recall while. +AnfNodePtr MsBackend::ConvertGraphInput(const FuncGraphPtr &func_graph, const AnfNodePtr &node) { + std::unordered_map params_index; + auto result = node; + auto graph = result->func_graph(); + while (func_graph != graph) { + auto iter = graph_user_inputs_.find(graph); + if (iter == graph_user_inputs_.end()) { + break; + } + + params_index.clear(); + auto ¶ms = graph->parameters(); + for (size_t i = 0; i < params.size(); ++i) { + params_index[params[i]] = i; + } + + graph = iter->second.first; + auto &inputs = iter->second.second; + result = inputs[params_index[result]]; + } + return result; +} + +void MsBackend::SetGraphUserInputs(const FuncGraphPtr &func_graph, const FuncGraphPtr &user, + const AnfNodePtrList &inputs) { + if (graph_user_inputs_.find(func_graph) != graph_user_inputs_.end()) { + return; + } + graph_user_inputs_[func_graph] = {user, inputs}; +} + +void MsBackend::RecallGraphInput(const FuncGraphPtr &func_graph, const VectorRef &args, const BaseRef &c) { + std::unordered_map params_index; + auto ¶ms = func_graph->parameters(); + for (size_t i = 0; i < params.size(); ++i) { + params_index[params[i]] = i; + } + + // recall all child graphs in this while + auto &graph_inputs = graph_inputs_[c]; + for (auto &iter : graph_inputs) { + auto &graph = iter.first; + auto &old_args = iter.second; + auto &result = graph_id_map_[graph]; + auto &inputs = result.inputs; + for (size_t i = 0; i < inputs.size(); ++i) { + auto input = ConvertGraphInput(func_graph, inputs[i]); + auto it = params_index.find(input); + if (it != params_index.end()) { + old_args[i] = args[it->second]; + } + } + sess_->SetChildGraphInput(graph, old_args); + } + graph_inputs_.erase(c); +} + // compile set input output VectorRef MsBackend::MsSimuRunGraph(const GraphId &g, const VectorRef &args) { MS_LOG(DEBUG) << "set graph input:" << g; @@ -150,13 +210,20 @@ VectorRef MsBackend::MsSimuRunGraph(const GraphId &g, const VectorRef &args) { sess_->SetChildGraphInput(g, args); if (is_switch_call_) { - bool curr_cond = simu_cond_map_[curr_switch_].curr_cond; - MS_LOG(DEBUG) << "switch call MsSimuRunGraph:" << curr_cond; - if (0 == simu_cond_map_[curr_switch_].cond_graph_map.count(curr_cond)) { - MS_LOG(DEBUG) << "switch call MsSimuRunGraph:" << curr_cond << ", " << g; - simu_cond_map_[curr_switch_].cond_graph_map[curr_cond] = g; - SetSwitchGraph(); + if (!curr_switch_.is_null()) { + // push this {g, args} to all user while graph_inputs for nest while, + // when current condition recall over delete this cond in graph_inputs. + for (auto &iter : graph_inputs_) { + iter.second.push_back({g, args}); + } + if (graph_inputs_.find(curr_switch_) == graph_inputs_.end()) { + graph_inputs_[curr_switch_].push_back({g, args}); + } } + bool curr_cond = simu_cond_map_[curr_switch_].curr_cond; + MS_LOG(DEBUG) << "switch call MsSimuRunGraph:" << curr_cond << ", " << g; + simu_cond_map_[curr_switch_].cond_graph_map[curr_cond] = g; + SetSwitchGraph(); } std::vector outputs; @@ -205,42 +272,17 @@ VectorRef MsBackend::MsRunGraph(const GraphId &g, const VectorRef &args) { return outputs; } -void MsBackend::SetSimuCondFlag(const BaseRef &c, int flag) { - MS_LOG(DEBUG) << "while set cond :" << c.ToString() << ", " << simu_cond_map_.size(); - - if (simu_cond_map_.find(c) == simu_cond_map_.end()) { - MS_LOG(EXCEPTION) << "error c not find"; - } - simu_cond_map_[c].flag = flag; -} - -int MsBackend::GetSimuCondFlag(const BaseRef &c) { - BaseRef cond = c; - if (cond.is_null()) { - MS_LOG(DEBUG) << "get curr_switch"; - cond = curr_switch_; - } - if (simu_cond_map_.find(cond) == simu_cond_map_.end()) { - MS_LOG(ERROR) << "error c not find"; - return -1; - } - return simu_cond_map_[cond].flag; -} - SwitchCondStatus MsBackend::SetSimuCond(const BaseRef &c, bool value) { MS_LOG(DEBUG) << "set cond :" << c.ToString() << ", " << simu_cond_map_.size(); CondGraph cond_graph; cond_graph.curr_cond = value; if (simu_cond_map_.find(c) == simu_cond_map_.end()) { - cond_graph.flag = 0; simu_cond_map_[c] = cond_graph; } if (simu_cond_map_[c].cond_graph_map.count(value)) { - if (value == true) { - return kCondAlreadyRun; - } + return kCondAlreadyRun; } simu_cond_map_[c].curr_cond = value; MS_LOG(DEBUG) << "end set cond "; diff --git a/mindspore/ccsrc/vm/backend.h b/mindspore/ccsrc/vm/backend.h index b950e7adcb..769dab473e 100644 --- a/mindspore/ccsrc/vm/backend.h +++ b/mindspore/ccsrc/vm/backend.h @@ -16,9 +16,11 @@ #ifndef MINDSPORE_CCSRC_VM_BACKEND_H_ #define MINDSPORE_CCSRC_VM_BACKEND_H_ -#include +#include #include +#include #include +#include #include "ir/anf.h" #include "vm/segment_runner.h" @@ -45,6 +47,8 @@ class Backend { virtual bool GetCond(const BaseRef &c, bool *value); virtual void SetSwitchGraph() {} virtual void SetSwitchActive(const BaseRef &, bool) {} + virtual void RecallGraphInput(const FuncGraphPtr &, const VectorRef &, const BaseRef &) {} + virtual void SetGraphUserInputs(const FuncGraphPtr &, const FuncGraphPtr &, const AnfNodePtrList &) {} void set_curr_switch(const BaseRef &value) { curr_switch_ = value; @@ -54,8 +58,6 @@ class Backend { BaseRef curr_switch() { return curr_switch_; } virtual void Link(GraphId) {} virtual LinConvertResult GetMultiGraphRun(const FuncGraphPtr &) { return LinConvertResult(); } - virtual void SetSimuCondFlag(const BaseRef &, int) {} - virtual int GetSimuCondFlag(const BaseRef &) { return 0; } LinConvertResult multi_result() { return multi_result_; } void set_multi_result(const LinConvertResult &value) { multi_result_ = value; } @@ -75,11 +77,11 @@ class Backend { bool simu_flag_; LinConvertResult multi_result_; AnfNodePtr final_output_; + std::unordered_map> graph_user_inputs_; }; struct CondGraph { bool curr_cond; - int flag; std::unordered_map cond_graph_map; }; @@ -97,15 +99,17 @@ class MsBackend : public Backend { void SetSwitchGraph() override; void SetSwitchActive(const BaseRef &c, bool cond) override; + void RecallGraphInput(const FuncGraphPtr &, const VectorRef &, const BaseRef &) override; + void SetGraphUserInputs(const FuncGraphPtr &, const FuncGraphPtr &, const AnfNodePtrList &) override; void Link(GraphId) override; + AnfNodePtr ConvertGraphInput(const FuncGraphPtr &, const AnfNodePtr &); LinConvertResult GetMultiGraphRun(const FuncGraphPtr &g) override; - void SetSimuCondFlag(const BaseRef &c, int flag) override; - int GetSimuCondFlag(const BaseRef &c) override; private: session::SessionPtr sess_; std::unordered_map simu_cond_map_; std::unordered_map graph_id_map_; + std::unordered_map>, BaseRefHash> graph_inputs_; }; } // namespace compile } // namespace mindspore diff --git a/mindspore/ccsrc/vm/transform.cc b/mindspore/ccsrc/vm/transform.cc index 1c3c917dae..9147f75fb2 100644 --- a/mindspore/ccsrc/vm/transform.cc +++ b/mindspore/ccsrc/vm/transform.cc @@ -390,6 +390,16 @@ void CompileGraph::AddTailCall(const AnfNodePtr &fn, size_t size) { void CompileGraph::AddPartial(const CNodePtr &node) { auto inputs = node->inputs(); VectorRef args; + auto fn = inputs[1]; + if (!IsValueNode(fn)) { + MS_LOG(EXCEPTION) << "The type of 1st input of node must be FuncGraph"; + } + if (backend_->is_multi_graph_sink()) { + auto func_graph = GetValueNode(fn); + args.emplace_back(func_graph); + AnfNodePtrList outs(inputs.begin() + 2, inputs.end()); + backend_->SetGraphUserInputs(func_graph, node->func_graph(), outs); + } for (size_t i = 1; i < inputs.size(); i++) { args.emplace_back(Ref(inputs[i])); } @@ -442,12 +452,17 @@ void CompileGraph::AddPrimitive(const CNodePtr &node, const PrimitivePtr &prim) } int CompileGraph::AddCall(const FuncGraphPtr &graph, const CNodePtr &node) { - auto node_inputs = node->inputs(); - AnfNodePtr fn = node_inputs[0]; + auto inputs = node->inputs(); + AnfNodePtr fn = inputs[0]; + if (backend_->is_multi_graph_sink() && IsValueNode(fn)) { + auto func_graph = GetValueNode(fn); + AnfNodePtrList outs(inputs.begin() + 1, inputs.end()); + backend_->SetGraphUserInputs(func_graph, node->func_graph(), outs); + } (void)Ref(fn); - size_t size = node_inputs.size(); + size_t size = inputs.size(); for (size_t i = size - 1; i > 0; i--) { - AddInput(node_inputs[i]); + AddInput(inputs[i]); } if (node == graph->output()) { AddTailCall(fn, size); @@ -471,7 +486,8 @@ void CompileGraph::AddExternal(const LinConvertResult &result) { } void TraverseGraphMap( - const FuncGraphManagerPtr &manager_ptr, FuncGraphTransaction *const tr, const FuncGraphToAnfNodeCounterMap &cts, + const FuncGraphManagerPtr &manager_ptr, FuncGraphTransaction *const tr, + const FuncGraphToAnfNodeCounterMap &cts, const std::function(const PrimitivePtr, const AbstractFunctionPtr)> &get_prim_graph) { MS_EXCEPTION_IF_NULL(manager_ptr); MS_EXCEPTION_IF_NULL(tr); diff --git a/mindspore/ccsrc/vm/vm.cc b/mindspore/ccsrc/vm/vm.cc index 95ceceb67f..cf52aafdfe 100644 --- a/mindspore/ccsrc/vm/vm.cc +++ b/mindspore/ccsrc/vm/vm.cc @@ -32,7 +32,8 @@ namespace compile { // Arguments: // fn_: Callable function. // args_: Sequence of function args. -StructPartial::StructPartial(int fn, const VectorRef &args) : fn_(fn), args_(args) {} +// fg_: Graph of function. +StructPartial::StructPartial(int fn, const VectorRef &args, const FuncGraphPtr &fg) : fn_(fn), args_(args), fg_(fg) {} std::ostream &operator<<(std::ostream &os, const StructPartial &other) { os << "partial(" << other.fn_ << ", " << other.args_.ToString() << ")"; @@ -40,7 +41,7 @@ std::ostream &operator<<(std::ostream &os, const StructPartial &other) { } bool operator==(const StructPartial &lhs, const StructPartial &rhs) { - return (lhs.fn_ == rhs.fn_ && lhs.args_ == rhs.args_); + return (lhs.fn_ == rhs.fn_ && lhs.args_ == rhs.args_ && lhs.fg_ == rhs.fg_); } StructSimuSwitch::StructSimuSwitch(const BaseRef &fn, const BaseRef &value) : fn_(fn), value_(value) {} @@ -216,8 +217,8 @@ void FinalVM::InstCall(const VectorRef &args) { MS_LOG(DEBUG) << "Start"; const size_t args_size = 1; if (args.size() != args_size) { - MS_LOG(ERROR) << "" << __FUNCTION__ << " requires " << args_size << " parameter, while the input size is " - << args.size() << "."; + MS_LOG(ERROR) << __FUNCTION__ << " requires " << args_size << " parameter, while the input size is " << args.size() + << "."; return; } @@ -232,8 +233,8 @@ void FinalVM::InstTailCall(const VectorRef &args) { MS_LOG(DEBUG) << "Start"; const size_t args_size = 3; if (args.size() != args_size) { - MS_LOG(ERROR) << "" << __FUNCTION__ << " requires " << args_size << " parameters, while the input size is " - << args.size() << "."; + MS_LOG(ERROR) << __FUNCTION__ << " requires " << args_size << " parameters, while the input size is " << args.size() + << "."; return; } @@ -242,16 +243,6 @@ void FinalVM::InstTailCall(const VectorRef &args) { int nargs = utils::cast(args[2]); auto new_jmp = Ref(jmp); - - if (backend_->simu_flag()) { - if (backend_->GetSimuCondFlag(BaseRef()) == 2) { - MS_LOG(DEBUG) << "invoke while call tail first"; - Pop(height); - Push(1); - Popp(); - return; - } - } MoveStack(nargs, height); MS_LOG(DEBUG) << "TailCall pushp:" << pc_ << ", jmp:" << jmp; DoJmp(new_jmp); @@ -261,7 +252,7 @@ void FinalVM::InstTailCall(const VectorRef &args) { void FinalVM::InstSwitchReturn(const VectorRef &args) { MS_LOG(DEBUG) << "Start"; if (args.size() != 1) { - MS_LOG(ERROR) << "" << __FUNCTION__ << " requires one parameter, while the input size is " << args.size() << "."; + MS_LOG(ERROR) << __FUNCTION__ << " requires one parameter, while the input size is " << args.size() << "."; return; } Pop(1); @@ -272,8 +263,8 @@ void FinalVM::InstReturn(const VectorRef &args) { MS_LOG(DEBUG) << "Start"; const size_t args_size = 2; if (args.size() != args_size) { - MS_LOG(ERROR) << "" << __FUNCTION__ << " requires " << args_size << " parameters, while the input size is " - << args.size() << "."; + MS_LOG(ERROR) << __FUNCTION__ << " requires " << args_size << " parameters, while the input size is " << args.size() + << "."; return; } @@ -291,11 +282,33 @@ void FinalVM::InstReturn(const VectorRef &args) { MS_LOG(DEBUG) << "End"; } -void FinalVM::InstPartial(const VectorRef &args) { - MS_LOG(DEBUG) << "Start"; +void FinalVM::InstSimuPartial(const VectorRef &args) { + const size_t args_size = 2; + if (args.size() < args_size) { + MS_LOG(ERROR) << __FUNCTION__ << " requires " << args_size << " or more parameters, while the input size is " + << args.size() << "."; + return; + } + + auto &node = args[0]; + if (!utils::isa(node)) { + MS_LOG(ERROR) << "The type of 1st input of node must be FuncGraph"; + return; + } + auto fg = utils::cast(node); + int fn_ = utils::cast(args[1]); + auto fn = utils::cast(Ref(fn_)); + MS_LOG(DEBUG) << "Partial argssize:" << args.size(); + std::vector outs(args.size() - 2); + (void)std::transform(args.begin() + 2, args.end(), outs.begin(), + [&, this](const BaseRef &a) { return Ref(utils::cast(a)); }); + Push(std::make_shared(fn, VectorRef(outs), fg)); +} + +void FinalVM::InstRealPartial(const VectorRef &args) { const size_t args_size = 1; if (args.size() < args_size) { - MS_LOG(ERROR) << "" << __FUNCTION__ << " requires " << args_size << " or more parameters, while the input size is " + MS_LOG(ERROR) << __FUNCTION__ << " requires " << args_size << " or more parameters, while the input size is " << args.size() << "."; return; } @@ -304,18 +317,26 @@ void FinalVM::InstPartial(const VectorRef &args) { auto fn = utils::cast(Ref(fn_)); MS_LOG(DEBUG) << "Partial argssize:" << args.size(); std::vector outs(args.size() - 1); - (void)std::transform(args.begin() + 1, args.end(), outs.begin(), [&, this](const BaseRef &a) { return Ref(utils::cast(a)); }); Push(std::make_shared(fn, VectorRef(outs))); +} + +void FinalVM::InstPartial(const VectorRef &args) { + MS_LOG(DEBUG) << "Start"; + if (backend_->is_multi_graph_sink()) { + InstSimuPartial(args); + } else { + InstRealPartial(args); + } MS_LOG(DEBUG) << "End"; } void FinalVM::InstSimuSwitch(const VectorRef &args) { const size_t args_size = 4; if (args.size() != args_size) { - MS_LOG(ERROR) << "" << __FUNCTION__ << " requires " << args_size << " parameters, while the input size is " - << args.size() << "."; + MS_LOG(ERROR) << __FUNCTION__ << " requires " << args_size << " parameters, while the input size is " << args.size() + << "."; return; } bool cond = utils::cast(args[0]); @@ -328,48 +349,62 @@ void FinalVM::InstSimuSwitch(const VectorRef &args) { bool bool_value = cond; SwitchCondStatus cond_stat = backend_->SetSimuCond(c, bool_value); - int cond_flag = backend_->GetSimuCondFlag(c); - MS_LOG(DEBUG) << "Simu switch cond:" << cond << ", " << cond_flag << ", " << c.cast()->DebugString(); - if (cond_flag == 2) { - Popp(); - Popp(); - backend_->SetSimuCondFlag(c, 0); - return; - } - if (cond_stat == kCondAlreadyRun) { MS_LOG(DEBUG) << "switch alreay run bool while true jmp"; - if (cond_flag == 0) { - MS_LOG(DEBUG) << "switch second run bool while true jmp"; - backend_->SetSwitchActive(c, true); - Push(std::make_shared(Ref(vtrue), c)); - Pushsp(); - backend_->SetSimuCondFlag(c, 1); - return; - } else if (cond_flag == 1) { - MS_LOG(DEBUG) << "switch first run bool while if jmp"; - Push(std::make_shared(Ref(vfalse), c)); - (void)backend_->SetSimuCond(c, false); - backend_->SetSimuCondFlag(c, 2); - return; - } else { - MS_LOG(EXCEPTION) << "error cond not find"; - return; + BaseRef jmp = Ref(vtrue); + if (utils::isa(jmp)) { + auto new_jmp = utils::cast>(jmp); + backend_->RecallGraphInput(new_jmp->fg_, new_jmp->args_, c); } + cond_jmp_[c] = Ref(vfalse); + Push(static_cast(cond_stat)); + Popp(); + backend_->SetSwitchActive(c, bool_value); + return; } if (bool_value) { Push(std::make_shared(Ref(vtrue), c)); Pushsp(); } else { + MergeJmpArgs(Ref(vfalse), c); Push(std::make_shared(Ref(vfalse), c)); } } +void FinalVM::MergeJmpArgs(const BaseRef &jmp, const BaseRef &c) { + auto iter = cond_jmp_.find(c); + if (iter == cond_jmp_.end()) { + return; + } + auto old_jmp = utils::cast>(iter->second); + auto new_jmp = utils::cast>(jmp); + auto &old_args = old_jmp->args_; + auto &new_args = new_jmp->args_; + for (size_t i = 0; i < new_args.size(); ++i) { + auto &old_arg = old_args[i]; + auto &new_arg = new_args[i]; + if (utils::isa(old_arg)) { + auto old_vec_ref = utils::cast(old_arg); + if (utils::isa(new_arg)) { + auto new_vec_ref = utils::cast(new_arg); + std::copy(new_vec_ref.begin(), new_vec_ref.end(), std::back_inserter(old_vec_ref)); + } + new_arg = old_vec_ref; + } else if (utils::isa(new_arg)) { + auto new_vec_ref = utils::cast(new_arg); + new_vec_ref.push_back(old_arg); + new_arg = new_vec_ref; + } else { + new_arg = VectorRef({new_arg, old_arg}); + } + } +} + void FinalVM::InstRealSwitch(const VectorRef &args) { const size_t args_size = 3; if (args.size() != args_size) { - MS_LOG(ERROR) << "" << __FUNCTION__ << " requires " << args_size << " parameters, while the input size is " - << args.size() << "."; + MS_LOG(ERROR) << __FUNCTION__ << " requires " << args_size << " parameters, while the input size is " << args.size() + << "."; return; } @@ -378,7 +413,7 @@ void FinalVM::InstRealSwitch(const VectorRef &args) { int vfalse = utils::cast(args[2]); BaseRef c = Ref(cond); - MS_LOG(DEBUG) << "" << vtrue << " false:" << vfalse << " InstSwitch: " << c.ToString(); + MS_LOG(DEBUG) << vtrue << " false:" << vfalse << " InstSwitch: " << c.ToString(); bool bool_value = false; if (backend_->GetCond(c, &bool_value)) { MS_LOG(DEBUG) << "Cond:" << bool_value; @@ -399,6 +434,7 @@ void FinalVM::InstSwitch(const VectorRef &args) { } else { InstRealSwitch(args); } + MS_LOG(DEBUG) << "End"; } void FinalVM::InstTuple(const VectorRef &args) { @@ -417,8 +453,8 @@ void FinalVM::InstPush(const VectorRef &args) { MS_LOG(DEBUG) << "Start"; const size_t args_size = 1; if (args.size() != args_size) { - MS_LOG(ERROR) << "" << __FUNCTION__ << " requires " << args_size << " parameter, while the input size is " - << args.size() << "."; + MS_LOG(ERROR) << __FUNCTION__ << " requires " << args_size << " parameter, while the input size is " << args.size() + << "."; return; } @@ -431,8 +467,8 @@ void FinalVM::InstInput(const VectorRef &args) { MS_LOG(DEBUG) << "Start"; const size_t args_size = 1; if (args.size() != args_size) { - MS_LOG(ERROR) << "" << __FUNCTION__ << " requires " << args_size << " parameter, while the input size is " - << args.size() << "."; + MS_LOG(ERROR) << __FUNCTION__ << " requires " << args_size << " parameter, while the input size is " << args.size() + << "."; return; } @@ -445,13 +481,13 @@ void FinalVM::InstPadStack(const VectorRef &args) { MS_LOG(DEBUG) << "Start"; const size_t args_size = 1; if (args.size() != args_size) { - MS_LOG(ERROR) << "" << __FUNCTION__ << " requires " << args_size << " parameter, while the input size is " - << args.size() << "."; + MS_LOG(ERROR) << __FUNCTION__ << " requires " << args_size << " parameter, while the input size is " << args.size() + << "."; return; } int sz = utils::cast(args[0]); - MS_LOG(DEBUG) << "" << insts_stack_.size() << " need padstack " << sz << " sp_ " << sp_; + MS_LOG(DEBUG) << insts_stack_.size() << " need padstack " << sz << " sp_ " << sp_; size_t stack_size = insts_stack_.size(); int need = sz - (static_cast(stack_size) - sp_); if (need > 0) { @@ -501,7 +537,7 @@ void FinalVM::InstPushPrim(const VectorRef &args) { MS_LOG(DEBUG) << "Start: " << args.size(); const size_t args_size = 2; if (args.size() < args_size) { - MS_LOG(ERROR) << "" << __FUNCTION__ << " requires " << args_size << " or more parameters, while the input size is " + MS_LOG(ERROR) << __FUNCTION__ << " requires " << args_size << " or more parameters, while the input size is " << args.size() << "."; return; } diff --git a/mindspore/ccsrc/vm/vm.h b/mindspore/ccsrc/vm/vm.h index eab726a9b7..a02eced44c 100644 --- a/mindspore/ccsrc/vm/vm.h +++ b/mindspore/ccsrc/vm/vm.h @@ -27,6 +27,9 @@ #include #include #include +#include + +#include "ir/anf.h" #include "utils/base_ref.h" namespace mindspore { @@ -60,13 +63,14 @@ const std::vector inst_str{"call", "tail_call", "return", "partial class StructPartial : public Base { public: // Initialize StructPartial. - StructPartial(int fn, const VectorRef &args); + StructPartial(int fn, const VectorRef &args, const FuncGraphPtr &fg = nullptr); virtual ~StructPartial() = default; MS_DECLARE_PARENT(StructPartial, Base) int fn_; VectorRef args_; + FuncGraphPtr fg_; }; std::ostream &operator<<(std::ostream &os, const StructPartial &other); @@ -98,6 +102,8 @@ class FinalVM { void InstTailCall(const VectorRef &args); void InstReturn(const VectorRef &args); void InstPartial(const VectorRef &args); + void InstSimuPartial(const VectorRef &args); + void InstRealPartial(const VectorRef &args); void InstSwitch(const VectorRef &args); void InstSimuSwitch(const VectorRef &args); void InstRealSwitch(const VectorRef &args); @@ -120,6 +126,7 @@ class FinalVM { void Pushsp(); void Popsp(); void DoJmp(const BaseRef &jmp); + void MergeJmpArgs(const BaseRef &jmp, const BaseRef &c); private: InstSet insts_; @@ -128,6 +135,7 @@ class FinalVM { std::stack retsp_; int pc_; int sp_; + std::unordered_map cond_jmp_; BackendPtr backend_; const InstFunctionMap inst_function_map = { {Instruction::kCall, [this](const VectorRef &args) { InstCall(args); }}, diff --git a/mindspore/ccsrc/vm/vmimpl.cc b/mindspore/ccsrc/vm/vmimpl.cc index 017121f334..d83bb8f190 100644 --- a/mindspore/ccsrc/vm/vmimpl.cc +++ b/mindspore/ccsrc/vm/vmimpl.cc @@ -445,7 +445,7 @@ BaseRef RunOperation(const PrimitivePtr &prim, const VectorRef &args) { MS_LOG(DEBUG) << "operation start " << prim->name(); auto func = operation != nullptr ? operation->GetComputeFunction() : prim->GetComputeFunction(); if (py::isinstance(func)) { - MS_LOG(EXCEPTION) << "" << prim->name() << " 's compute function is not implemented"; + MS_LOG(EXCEPTION) << prim->name() << " 's compute function is not implemented"; } py::tuple py_args = py::tuple(args.size()); diff --git a/mindspore/common/api.py b/mindspore/common/api.py index b5450bc5a3..3710e40996 100644 --- a/mindspore/common/api.py +++ b/mindspore/common/api.py @@ -22,7 +22,7 @@ from mindspore import context from mindspore import log as logger from mindspore.parallel._utils import _get_parallel_mode from .._c_expression import generate_key, Executor_, Tensor, MetaTensor -from .._c_expression import verify_inputs_signature, init_exec_dataset, _set_dataset_mode_config, init_ge +from .._c_expression import verify_inputs_signature, init_exec_dataset, _set_dataset_mode_config, init_backend from .tensor import Tensor as MsTensor # store ms_function class compiled pipeline cache @@ -70,12 +70,11 @@ def _wrap_func(fn): def _convert_data(data): if isinstance(data, Tensor) and not isinstance(data, MsTensor): return MsTensor(data) + if isinstance(data, tuple): + return tuple(_convert_data(x) for x in data) + if isinstance(data, list): + return list(_convert_data(x) for x in data) return data - - if isinstance(results, tuple): - return tuple(_convert_data(x) for x in results) - if isinstance(results, list): - return list(_convert_data(x) for x in results) return _convert_data(results) return wrapper @@ -184,7 +183,7 @@ class _MindSporeFunction: @_wrap_func def __call__(self, *args): - init_ge() + init_backend() converted, arguments_dict, parse_method = _convert_function_arguments(self.fn, *args) if not converted: raise RuntimeError('Process function parameter is failure') @@ -328,7 +327,7 @@ class _Executor: raise TypeError('Parameters need OrderedDict type, but got {}'. format(type(params))) - def compile(self, obj, *args, phase='predict', params=None): + def compile(self, obj, *args, phase='predict', params=None, do_convert=True): """ Compiles graph. @@ -337,6 +336,7 @@ class _Executor: args (tuple): Function or cell input arguments. phase (str): The name of compile phase. Default: 'predict'. params (OrderedDict): The parameters dictionary used for init data graph. Default: None. + do_convert (bool): When set to True, convert ME graph to GE graph after compiling graph. Return: Str, the full phase of the cell. @@ -368,7 +368,8 @@ class _Executor: if graph is None: logger.error("%r graph compile failed.", phase) - + if not do_convert: + return phase, True if not enable_debug_runtime or enable_ge: if _get_parallel_mode() in ["auto_parallel", "semi_auto_parallel"]: obj.parameter_layout_dict = self._executor.get_parameter_layout(phase) diff --git a/mindspore/common/dtype.py b/mindspore/common/dtype.py index 702e01effb..e6b9779f39 100644 --- a/mindspore/common/dtype.py +++ b/mindspore/common/dtype.py @@ -85,13 +85,16 @@ list_ = typing.List() tuple_ = typing.Tuple() tensor = typing.TensorType() function = typing.Function() +function_type = typing.Function symbolic_key = typing.SymbolicKeyType() env_type = typing.EnvType() +env_type_type = typing.EnvType type_type = typing.TypeType() type_none = typing.TypeNone() string = typing.String() type_refkey = typing.RefKeyType() tensor_type = typing.TensorType +anything_type = typing.TypeAnything number_type = (int8, int16, diff --git a/mindspore/common/tensor.py b/mindspore/common/tensor.py index 70b8b169ca..5504f2b483 100644 --- a/mindspore/common/tensor.py +++ b/mindspore/common/tensor.py @@ -74,6 +74,17 @@ class Tensor(Tensor_): out = tensor_operator_registry.get('__add__')(self, other) return out + def __eq__(self, other): + if not isinstance(other, Tensor): + return False + x = self.asnumpy() + y = other.asnumpy() + out = np.equal(x, y) + return Tensor(np.array(out)) + + def __hash__(self): + return hash(id(self)) + def __mul__(self, other): check_type('tensor input_data', other, (Tensor, float, int)) out = tensor_operator_registry.get('__mul__')(self, other) diff --git a/mindspore/communication/__init__.py b/mindspore/communication/__init__.py index 65078f6820..26acc53d91 100644 --- a/mindspore/communication/__init__.py +++ b/mindspore/communication/__init__.py @@ -17,12 +17,12 @@ Collective communication interface. """ from .management import GlobalComm, init, release, get_rank, get_group_size, get_world_rank_from_group_rank, \ - get_group_rank_from_world_rank, create_group, HCCL_WORLD_COMM_GROUP, NCCL_WORLD_COMM_GROUP, get_group, \ + get_group_rank_from_world_rank, create_group, HCCL_WORLD_COMM_GROUP, NCCL_WORLD_COMM_GROUP, \ get_local_rank, get_local_rank_size, destroy_group __all__ = [ "GlobalComm", "init", "release", "get_rank", "get_group_size", "get_world_rank_from_group_rank", - "get_group_rank_from_world_rank", "create_group", "HCCL_WORLD_COMM_GROUP", "NCCL_WORLD_COMM_GROUP", "get_group", + "get_group_rank_from_world_rank", "create_group", "HCCL_WORLD_COMM_GROUP", "NCCL_WORLD_COMM_GROUP", "get_local_rank", "get_local_rank_size", "destroy_group" ] diff --git a/mindspore/communication/management.py b/mindspore/communication/management.py index 7208538a07..1cd60fe2e5 100755 --- a/mindspore/communication/management.py +++ b/mindspore/communication/management.py @@ -21,7 +21,7 @@ from ._comm_helper import Backend, _get_rank_helper, _get_size_helper, \ from .._c_expression import init_hccl, finalize_hccl, init_gpu_collective -__all__ = ["init", "release", "get_rank", "get_local_rank", "get_group_size", "get_group", +__all__ = ["init", "release", "get_rank", "get_local_rank", "get_group_size", "get_local_rank_size", "get_world_rank_from_group_rank", "get_group_rank_from_world_rank", "create_group", "destroy_group", "HCCL_WORLD_COMM_GROUP", "NCCL_WORLD_COMM_GROUP"] @@ -30,7 +30,7 @@ DEFAULT_WORLD_COMM_GROUP = HCCL_WORLD_COMM_GROUP DEFAULT_BACKEND = Backend("hccl") -def get_group(group): +def _get_group(group): """Get the global world group if the group is default world comm group.""" if group == DEFAULT_WORLD_COMM_GROUP: return GlobalComm.WORLD_COMM_GROUP @@ -100,7 +100,7 @@ def get_rank(group=GlobalComm.WORLD_COMM_GROUP): ValueError: If backend is invalid. RuntimeError: If hccl/nccl is not available or nccl not supports. """ - return _get_rank_helper(group=get_group(group), backend=GlobalComm.BACKEND) + return _get_rank_helper(group=_get_group(group), backend=GlobalComm.BACKEND) def get_local_rank(group=GlobalComm.WORLD_COMM_GROUP): @@ -121,7 +121,7 @@ def get_local_rank(group=GlobalComm.WORLD_COMM_GROUP): ValueError: If backend is invalid. RuntimeError: If hccl/nccl is not available or nccl not supports. """ - return _get_local_rank_helper(group=get_group(group), backend=GlobalComm.BACKEND) + return _get_local_rank_helper(group=_get_group(group), backend=GlobalComm.BACKEND) def get_group_size(group=GlobalComm.WORLD_COMM_GROUP): @@ -139,7 +139,7 @@ def get_group_size(group=GlobalComm.WORLD_COMM_GROUP): ValueError: If backend is invalid. RuntimeError: If hccl/nccl is not available or nccl not supports. """ - return _get_size_helper(group=get_group(group), backend=GlobalComm.BACKEND) + return _get_size_helper(group=_get_group(group), backend=GlobalComm.BACKEND) def get_local_rank_size(group=GlobalComm.WORLD_COMM_GROUP): @@ -160,7 +160,7 @@ def get_local_rank_size(group=GlobalComm.WORLD_COMM_GROUP): ValueError: If backend is invalid. RuntimeError: If hccl/nccl is not available or nccl not supports. """ - return _get_local_size_helper(group=get_group(group), backend=GlobalComm.BACKEND) + return _get_local_size_helper(group=_get_group(group), backend=GlobalComm.BACKEND) def get_world_rank_from_group_rank(group, group_rank_id): diff --git a/mindspore/context.py b/mindspore/context.py index 159522a87a..74acd7cd01 100644 --- a/mindspore/context.py +++ b/mindspore/context.py @@ -225,14 +225,6 @@ class _Context: if not success: raise RuntimeError("Device id set failed!!!") - @property - def enable_hccl(self): - return self._context_handle.get_hccl_flag() - - @enable_hccl.setter - def enable_hccl(self, hccl): - self._context_handle.set_hccl_flag(hccl) - @property def enable_ir_fusion(self): return self._context_handle.get_ir_fusion_flag() @@ -404,7 +396,7 @@ def _context(): @args_type_check(device_num=int, global_rank=int, mirror_mean=bool, cast_before_mirror=bool, parallel_mode=str, - parameter_broadcast=bool) + parameter_broadcast=bool, strategy_ckpt_load_file=str, strategy_ckpt_save_file=str) def set_auto_parallel_context(**kwargs): """ Set auto parallel context. @@ -415,8 +407,11 @@ def set_auto_parallel_context(**kwargs): Args: device_num (int): Available device number, the value must be in [1, 4096]. Default: 1. global_rank (int): Global rank id, the value must be in [0, 4095]. Default: 0. - mirror_mean (bool): Whether to perform mean operator after all-reduce of mirror. Default: False. - cast_before_mirror (bool): Insert Mirror Op after the cast if this flag is True. Default: True. + mirror_mean (bool): Whether to perform mean operator after all-reduce of mirror. + "stand_alone" do not support mirror_mean. Default: False. + cast_before_mirror (bool): Insert Mirror Op after the cast if this flag is True. + "stand_alone", "data_parallel" and "hybrid_parallel" do not support + cast_before_mirror. Default: True. parallel_mode (str): There are five kinds of parallel modes, "stand_alone", "data_parallel", "hybrid_parallel", "semi_auto_parallel" and "auto_parallel". Default: "stand_alone". @@ -433,6 +428,8 @@ def set_auto_parallel_context(**kwargs): parameter_broadcast (bool): Indicating whether to broadcast parameters before training. "stand_alone", "semi_auto_parallel" and "auto_parallel" do not support parameter broadcast. Default: False. + strategy_ckpt_load_file (str): The path to load parallel strategy checkpoint. Default: '' + strategy_ckpt_save_file (str): The path to save parallel strategy checkpoint. Default: '' Raises: ValueError: If input key is not attribute in auto parallel context. @@ -444,6 +441,8 @@ def set_auto_parallel_context(**kwargs): >>> context.set_auto_parallel_context(cast_before_mirror=False) >>> context.set_auto_parallel_context(parallel_mode="auto_parallel") >>> context.set_auto_parallel_context(parameter_broadcast=False) + >>> context.set_auto_parallel_context(strategy_ckpt_load_file="./strategy_stage1.ckpt") + >>> context.set_auto_parallel_context(strategy_ckpt_save_file="./strategy_stage1.ckpt") """ _set_auto_parallel_context(**kwargs) @@ -474,12 +473,14 @@ def reset_auto_parallel_context(): - cast_before_mirror: True. - parallel_mode: "stand_alone". - parameter_broadcast: False. + - strategy_ckpt_load_file: "". + - strategy_ckpt_save_file: "". """ _reset_auto_parallel_context() @args_type_check(mode=int, precompile_only=bool, device_target=str, - device_id=int, enable_ir_fusion=bool, save_graphs=bool, enable_hccl=bool, + device_id=int, enable_ir_fusion=bool, save_graphs=bool, enable_task_sink=bool, save_graphs_path=str, enable_loop_sink=bool, enable_mem_reuse=bool, save_ms_model=bool, save_ms_model_path=str, enable_gpu_summary=bool, enable_auto_mixed_precision=bool, enable_dump=bool, save_dump_path=str, @@ -487,7 +488,7 @@ def reset_auto_parallel_context(): variable_memory_max_size=str) def set_context(**kwargs): """ - Set context for running environment. + Sets context for running environment. Context should be configured before running your program. If there is no configuration, the "Ascend" device target will be used by default. GRAPH_MODE or @@ -512,8 +513,7 @@ def set_context(**kwargs): while device_num_per_host should no more than 4096. Default: 0. enable_ir_fusion (bool): Whether to enable ir fusion. Default: True. save_graphs (bool): Whether to save graphs. Default: False. - enable_hccl (bool): Whether to enable hccl. Default: False. - enable_loop_sink (bool): Whether to enable loop sink. Default: False. + enable_loop_sink (bool): Whether to enable loop sink. Default: True. enable_task_sink (bool): Whether to enable task sink. Default: True. enable_mem_reuse (bool): Whether to enable memory reuse. Default: True. save_ms_model (bool): Whether to save lite model converted by graph. Default: False. @@ -524,10 +524,12 @@ def set_context(**kwargs): reserve_class_name_in_scope (bool) : Whether to save the network class name in the scope. Default: True. enable_reduce_precision (bool): Whether to enable precision reduction. Default: True. enable_dump (bool): Whether to enable dump. Default: False. - save_dump_path (str): Set path to dump data. Default: ".". + save_dump_path (str): When the program is executed on Ascend, operators can dump data here. + The root dump path is configured in /home/HwHiAiUser/ide_daemon/ide_daemon.cfg. + So the real dump path is "{configured root dump path}/{`save_dump_path`}". Default: ".". enable_dynamic_memory (bool): Whether to enable dynamic memory. Default: False. - graph_memory_max_size (str): Set graph memory max size. Default: "26GB". - variable_memory_max_size (str): Set variable memory max size. Default: "5GB". + graph_memory_max_size (str): Sets graph memory max size. Default: "26GB". + variable_memory_max_size (str): Sets variable memory max size. Default: "5GB". Raises: ValueError: If input key is not an attribute in context. diff --git a/mindspore/dataset/__init__.py b/mindspore/dataset/__init__.py index 1b0397ae26..54068eb762 100644 --- a/mindspore/dataset/__init__.py +++ b/mindspore/dataset/__init__.py @@ -21,7 +21,7 @@ can also create samplers with this module to sample data. from .core.configuration import config from .engine.datasets import StorageDataset, TFRecordDataset, ImageFolderDatasetV2, MnistDataset, MindDataset, \ GeneratorDataset, ManifestDataset, Cifar10Dataset, Cifar100Dataset, VOCDataset, CelebADataset, TextFileDataset, \ - Schema, Shuffle, zip + Schema, Shuffle, zip, RandomDataset from .engine.samplers import DistributedSampler, PKSampler, RandomSampler, SequentialSampler, SubsetRandomSampler, \ WeightedRandomSampler, Sampler from .engine.serializer_deserializer import serialize, deserialize, show diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py index 1648734704..e3ef94e480 100644 --- a/mindspore/dataset/engine/datasets.py +++ b/mindspore/dataset/engine/datasets.py @@ -30,7 +30,9 @@ from enum import Enum from importlib import import_module import threading +import copy import numpy as np + from mindspore._c_dataengine import DataType, TFReaderOp, ImageFolderOp, CifarOp, MnistOp, ManifestOp, \ MindRecordOp, TextFileOp, CBatchInfo from mindspore._c_expression import typing @@ -206,7 +208,6 @@ class Dataset: Add a blocking condition to the input Dataset Args: - input_dataset (Dataset): Input dataset to apply flow control num_batch (int): the number of batches without blocking at the start of each epoch condition_name (str): The condition name that is used to toggle sending next row callback (function): The callback funciton that will be invoked when sync_update is called @@ -428,11 +429,11 @@ class Dataset: If input_columns not provided or empty, all columns will be used. Args: - predicate: python callable which returns a boolean value. - input_columns: (list[str]): List of names of the input columns, when - default=None, the predicate will be applied on all columns in the dataset. + predicate(callable): python callable which returns a boolean value. + input_columns: (list[str], optional): List of names of the input columns, when + default=None, the predicate will be applied on all columns in the dataset. num_parallel_workers (int, optional): Number of workers to process the Dataset - in parallel (default=None). + in parallel (default=None). Returns: FilterDataset, dataset filter. @@ -454,7 +455,7 @@ class Dataset: The order of using repeat and batch reflects the number of batches. Recommend that repeat operation should be used after batch operation. If dataset_sink_mode is False, here repeat operation is invalid. - If dataset_sink_mode is True, repeat count should be euqal to the epoch of training. Otherwise, + If dataset_sink_mode is True, repeat count should be equal to the epoch of training. Otherwise, errors could occur since the amount of data is not the amount training requires. Args: @@ -919,10 +920,13 @@ class Dataset: def sync_update(self, condition_name, num_batch=None, data=None): """ - condition_name (str): The condition name that is used to toggle sending next row - step_size (int or None): The number of steps(rows) that are released - when pass_rows is None, will update the same number as sync_wait specified - data (dict or None): The data passed to the callback + Release a blocking condition and triger callback with given data + + Args: + condition_name (str): The condition name that is used to toggle sending next row + num_batch (int or None): The number of batches(rows) that are released + When num_batch is None, it will default to the number specified by the sync_wait operator + data (dict or None): The data passed to the callback """ notifiers_dict = self.get_sync_notifiers() if condition_name not in notifiers_dict: @@ -1376,6 +1380,23 @@ class MapDataset(DatasetOp): """ return self.input[0].get_dataset_size() + def __deepcopy__(self, memodict): + if id(self) in memodict: + return memodict[id(self)] + cls = self.__class__ + new_op = cls.__new__(cls) + memodict[id(self)] = new_op + new_op.input = copy.deepcopy(self.input, memodict) + new_op.input_columns = copy.deepcopy(self.input_columns, memodict) + new_op.output_columns = copy.deepcopy(self.output_columns, memodict) + new_op.columns_order = copy.deepcopy(self.columns_order, memodict) + new_op.num_parallel_workers = copy.deepcopy(self.num_parallel_workers, memodict) + new_op.output = copy.deepcopy(self.output, memodict) + new_op.input_indexs = copy.deepcopy(self._input_indexs, memodict) + new_op.python_multiprocessing = copy.deepcopy(self.python_multiprocessing, memodict) + new_op.operations = self.operations + return new_op + # Iterator bootstrap will be called on iterator construction. # A deep copy of Dataset object is created prior of iterator_bootstrap. # This method will create per iterator process pool and bind pyfunc execution to the pool. @@ -2483,19 +2504,19 @@ class GeneratorDataset(SourceDataset): Iterable source is required to return a tuple of numpy array as a row of the dataset on iter(source).next(). Random accessible source is required to return a tuple of numpy array as a row of the dataset on source[idx]. - column_names (list[str]): List of column names of the dataset. + column_names (list[str], optional): List of column names of the dataset (default=None). Users are required to + provide either column_names or schema. column_types (list[mindspore.dtype], optional): List of column data types of the dataset (default=None). If provided, sanity check will be performed on generator output. - schema (Schema/String, optional): Path to the json schema file or schema object (default=None). - If the schema is not provided, the meta data from column_names and column_types is considered the schema. + schema (Schema/String, optional): Path to the json schema file or schema object (default=None). Users are + required to provide either column_names or schema. If both are provided, schema will be used. num_samples (int, optional): The number of samples to be included in the dataset (default=None, all images). num_parallel_workers (int, optional): Number of subprocesses used to fetch the dataset in parallel (default=1). shuffle (bool, optional): Whether or not to perform shuffle on the dataset. Random accessible input is required. (default=None, expected order behavior shown in the table). sampler (Sampler/Iterable, optional): Object used to choose samples from the dataset. Random accessible input is - required. - (default=None, expected order behavior shown in the table). + required (default=None, expected order behavior shown in the table). num_shards (int, optional): Number of shards that the dataset should be divided into (default=None). This argument should be specified only when 'num_samples' is "None". Random accessible input is required. shard_id (int, optional): The shard ID within num_shards (default=None). This argument should be specified only @@ -2535,8 +2556,8 @@ class GeneratorDataset(SourceDataset): """ @check_generatordataset - def __init__(self, source, column_names, column_types=None, schema=None, num_samples=None, num_parallel_workers=1, - shuffle=None, sampler=None, num_shards=None, shard_id=None): + def __init__(self, source, column_names=None, column_types=None, schema=None, num_samples=None, + num_parallel_workers=1, shuffle=None, sampler=None, num_shards=None, shard_id=None): super().__init__(num_parallel_workers) self.sampler = _select_sampler(num_samples, sampler, shuffle, num_shards, shard_id) if self.sampler is not None and hasattr(source, "__getitem__"): @@ -2569,6 +2590,8 @@ class GeneratorDataset(SourceDataset): # Random accessible input is also iterable self.source = (lambda: _iter_fn(source, num_samples)) + if column_names is not None and not isinstance(column_names, list): + column_names = [column_names] self.column_names = column_names if column_types is not None: @@ -2576,6 +2599,16 @@ class GeneratorDataset(SourceDataset): else: self.column_types = column_types + if schema is not None: + self.schema = schema + if not isinstance(schema, Schema): + self.schema = Schema(schema) + self.column_names = [] + self.column_types = [] + for col in self.schema.columns: + self.column_names.append(col["name"]) + self.column_types.append(DataType(col["type"])) + def get_args(self): args = super().get_args() args["source"] = self.source @@ -2599,6 +2632,23 @@ class GeneratorDataset(SourceDataset): else: raise ValueError('set dataset_size with negative value {}'.format(value)) + def __deepcopy__(self, memodict): + if id(self) in memodict: + return memodict[id(self)] + cls = self.__class__ + new_op = cls.__new__(cls) + memodict[id(self)] = new_op + new_op.input = copy.deepcopy(self.input, memodict) + new_op.output = copy.deepcopy(self.output, memodict) + new_op.num_parallel_workers = copy.deepcopy(self.num_parallel_workers, memodict) + new_op.column_types = copy.deepcopy(self.column_types, memodict) + new_op.column_names = copy.deepcopy(self.column_names, memodict) + + new_op.source = self.source + new_op.sampler = self.sampler + + return new_op + class TFRecordDataset(SourceDataset): """ @@ -3109,6 +3159,57 @@ class Cifar100Dataset(SourceDataset): return get_num_rows(num_rows, self.num_shards) +class RandomDataset(SourceDataset): + """ + A source dataset that generates random data. + + Args: + num_samples (int): number of samples to generate. + schema (str or Schema, optional): Path to the json schema file or schema object (default=None). + If the schema is not provided, the meta data from the TFRecord file is considered the schema. + columns_list (list[str], optional): List of columns to be read (default=None, read all columns) + num_parallel_workers (int, optional): number of workers to read the data + (default=None, number set in the config). + """ + + def __init__(self, schema=None, columns_list=None, num_samples=None, num_parallel_workers=None): + super().__init__(num_parallel_workers) + schema_obj = None + if (schema is not None) and (not isinstance(schema, Schema)): + schema_obj = Schema(schema) # read the schema file and convert to schema object to validate it + self.schema = schema + self.columns_list = columns_list + self.num_samples = num_samples + if schema_obj is not None and num_samples is None: + self.num_samples = schema_obj.num_rows + + def get_args(self): + args = super().get_args() + if self.schema is not None: + if isinstance(self.schema, Schema): + self.schema.datasetType = 'Random' + if self.num_samples is not None: + self.schema.num_rows = self.num_samples + args["schema_json_string"] = self.schema.to_json() + else: + args["schema_file_path"] = self.schema + args["schema"] = self.schema + if self.columns_list is not None: + args["columns_list"] = self.columns_list + if self.num_samples is not None: + args["num_samples"] = self.num_samples + return args + + def get_dataset_size(self): + """ + Get the number of batches in an epoch. + + Return: + Number, number of batches. + """ + return num_samples + + class Schema: """ Class to represent a schema of dataset. diff --git a/mindspore/dataset/engine/iterators.py b/mindspore/dataset/engine/iterators.py index 81bad14810..e32c188d00 100644 --- a/mindspore/dataset/engine/iterators.py +++ b/mindspore/dataset/engine/iterators.py @@ -17,6 +17,7 @@ from abc import abstractmethod import copy import weakref +from importlib import import_module from mindspore._c_dataengine import DEPipeline from mindspore._c_dataengine import OpName @@ -24,14 +25,29 @@ from mindspore._c_dataengine import OpName from mindspore import log as logger from . import datasets as de +try: + context = import_module("mindspore.context") +except ModuleNotFoundError: + context = None + ITERATORS_LIST = list() def _cleanup(): + """Release all the Iterator.""" for itr_ref in ITERATORS_LIST: - itr = itr_ref() - if itr is not None: - itr.release() + if context: + device_type = context.get_context("device_target") + if device_type == "GPU": + itr_ref.release() + else: + itr = itr_ref() + if itr is not None: + itr.release() + else: + itr = itr_ref() + if itr is not None: + itr.release() def alter_tree(node): @@ -85,7 +101,14 @@ class Iterator: """ def __init__(self, dataset): - ITERATORS_LIST.append(weakref.ref(self)) + if context: + device_type = context.get_context("device_target") + if device_type == "GPU": + ITERATORS_LIST.append(self) + else: + ITERATORS_LIST.append(weakref.ref(self)) + else: + ITERATORS_LIST.append(weakref.ref(self)) # create a copy of tree and work on it. self.dataset = copy.deepcopy(dataset) self.dataset = alter_tree(self.dataset) @@ -169,6 +192,8 @@ class Iterator: op_type = OpName.CIFAR100 elif isinstance(dataset, de.CelebADataset): op_type = OpName.CELEBA + elif isinstance(dataset, de.RandomDataset): + op_type = OpName.RANDOMDATA elif isinstance(dataset, de.TextFileDataset): op_type = OpName.TEXTFILE else: @@ -248,7 +273,7 @@ class Iterator: return self.depipeline.GetNumClasses() def __deepcopy__(self, memo): - return Iterator(copy.deepcopy(self.dataset, memo)) + return self class DictIterator(Iterator): diff --git a/mindspore/dataset/engine/samplers.py b/mindspore/dataset/engine/samplers.py index 82759989cb..972f0af191 100644 --- a/mindspore/dataset/engine/samplers.py +++ b/mindspore/dataset/engine/samplers.py @@ -19,8 +19,8 @@ SequentialSampler, SubsetRandomSampler, WeightedRandomSampler. User can also define custom sampler by extending from Sampler class. """ -import mindspore._c_dataengine as cde import numpy as np +import mindspore._c_dataengine as cde class Sampler: @@ -137,6 +137,7 @@ class DistributedSampler(BuiltinSampler): self.shard_id = shard_id self.shuffle = shuffle self.seed = 0 + super().__init__() def create(self): # each time user calls create_dict_iterator() (to do repeat) sampler would get a different seed to shuffle @@ -152,6 +153,7 @@ class PKSampler(BuiltinSampler): num_val (int): Number of elements to sample for each class. num_class (int, optional): Number of classes to sample (default=None, all classes). shuffle (bool, optional): If true, the class IDs are shuffled (default=False). + class_column (str, optional): Name of column to classify dataset(default='label'), for MindDataset. Examples: >>> import mindspore.dataset as ds @@ -168,7 +170,7 @@ class PKSampler(BuiltinSampler): ValueError: If shuffle is not boolean. """ - def __init__(self, num_val, num_class=None, shuffle=False): + def __init__(self, num_val, num_class=None, shuffle=False, class_column='label'): if num_val <= 0: raise ValueError("num_val should be a positive integer value, but got num_val={}".format(num_val)) @@ -180,12 +182,18 @@ class PKSampler(BuiltinSampler): self.num_val = num_val self.shuffle = shuffle + self.class_column = class_column # work for minddataset + super().__init__() def create(self): return cde.PKSampler(self.num_val, self.shuffle) def _create_for_minddataset(self): - return cde.MindrecordPkSampler(self.num_val, self.shuffle) + if not self.class_column or not isinstance(self.class_column, str): + raise ValueError("class_column should be a not empty string value, \ + but got class_column={}".format(class_column)) + return cde.MindrecordPkSampler(self.num_val, self.class_column, self.shuffle) + class RandomSampler(BuiltinSampler): """ @@ -220,6 +228,7 @@ class RandomSampler(BuiltinSampler): self.replacement = replacement self.num_samples = num_samples + super().__init__() def create(self): # If num_samples is not specified, then call constructor #2 @@ -270,6 +279,7 @@ class SubsetRandomSampler(BuiltinSampler): indices = [indices] self.indices = indices + super().__init__() def create(self): return cde.SubsetRandomSampler(self.indices) @@ -317,6 +327,7 @@ class WeightedRandomSampler(BuiltinSampler): self.weights = weights self.num_samples = num_samples self.replacement = replacement + super().__init__() def create(self): return cde.WeightedRandomSampler(self.weights, self.num_samples, self.replacement) diff --git a/mindspore/dataset/engine/validators.py b/mindspore/dataset/engine/validators.py index dabeb2d424..4f1bb2c2d7 100644 --- a/mindspore/dataset/engine/validators.py +++ b/mindspore/dataset/engine/validators.py @@ -555,16 +555,23 @@ def check_generatordataset(method): except TypeError: raise TypeError("source should be callable, iterable or random accessible") - # check column_names; required argument + # check column_names or schema; required argument column_names = param_dict.get('column_names') - if column_names is None: - raise ValueError("column_names is not provided.") + schema = param_dict.get('schema') + if column_names is None and schema is None: + raise ValueError("Neither columns_names not schema are provided.") + + if schema is not None: + if not isinstance(schema, datasets.Schema) and not isinstance(schema, str): + raise ValueError("schema should be a path to schema file or a schema object.") # check optional argument nreq_param_int = ["num_samples", "num_parallel_workers", "num_shards", "shard_id"] check_param_type(nreq_param_int, param_dict, int) nreq_param_list = ["column_types"] check_param_type(nreq_param_list, param_dict, list) + nreq_param_bool = ["shuffle"] + check_param_type(nreq_param_bool, param_dict, bool) num_shards = param_dict.get("num_shards") shard_id = param_dict.get("shard_id") @@ -587,6 +594,11 @@ def check_generatordataset(method): except TypeError: raise TypeError("sampler should be either iterable or from mindspore.dataset.samplers") + if sampler is not None and not hasattr(source, "__getitem__"): + raise ValueError("sampler is not supported if source does not have attribute '__getitem__'") + if num_shards is not None and not hasattr(source, "__getitem__"): + raise ValueError("num_shards is not supported if source does not have attribute '__getitem__'") + return method(*args, **kwargs) return new_method @@ -700,9 +712,11 @@ def check_map(method): nreq_param_list = ['columns_order'] nreq_param_int = ['num_parallel_workers'] nreq_param_columns = ['input_columns', 'output_columns'] + nreq_param_bool = ['python_multiprocessing'] check_param_type(nreq_param_list, param_dict, list) check_param_type(nreq_param_int, param_dict, int) + check_param_type(nreq_param_bool, param_dict, bool) for param_name in nreq_param_columns: param = param_dict.get(param_name) if param is not None: @@ -906,6 +920,8 @@ def check_textfiledataset(method): check_param_type(nreq_param_int, param_dict, int) + check_sampler_shuffle_shard_options(param_dict) + return method(*args, **kwargs) return new_method diff --git a/mindspore/dataset/transforms/vision/c_transforms.py b/mindspore/dataset/transforms/vision/c_transforms.py index 07011b1d53..1806d22446 100644 --- a/mindspore/dataset/transforms/vision/c_transforms.py +++ b/mindspore/dataset/transforms/vision/c_transforms.py @@ -89,8 +89,8 @@ class Normalize(cde.NormalizeOp): Normalize the input image with respect to mean and standard deviation. Args: - mean (list): List of mean values for each channel, w.r.t channel order. - std (list): List of standard deviations for each channel, w.r.t. channel order. + mean (sequence): List or tuple of mean values for each channel, w.r.t channel order. + std (sequence): List or tuple of standard deviations for each channel, w.r.t. channel order. """ @check_normalize_c @@ -109,6 +109,7 @@ class RandomCrop(cde.RandomCropOp): If size is an int, a square crop of size (size, size) is returned. If size is a sequence of length 2, it should be (height, width). padding (int or sequence, optional): The number of pixels to pad the image (default=None). + If padding is not None, pad image firstly with padding values. If a single number is provided, it pads all borders with this value. If a tuple or list of 2 values are provided, it pads the (left and top) with the first value and (right and bottom) with the second value. @@ -454,8 +455,19 @@ class UniformAugment(cde.UniformAugOp): Tensor operation to perform randomly selected augmentation Args: - operations: list of python operations. + operations: list of C++ operations (python OPs are not accepted). NumOps (int): number of OPs to be selected and applied. + + Examples: + >>> transforms_list = [c_transforms.RandomHorizontalFlip(), + >>> c_transforms.RandomVerticalFlip(), + >>> c_transforms.RandomColorAdjust(), + >>> c_transforms.RandomRotation(degrees=45)] + >>> uni_aug = c_transforms.UniformAugment(operations=transforms_list, num_ops=2) + >>> transforms_all = [c_transforms.Decode(), c_transforms.Resize(size=[224, 224]), + >>> uni_aug, F.ToTensor()] + >>> ds_ua = ds.map(input_columns="image", + >>> operations=transforms_all, num_parallel_workers=1) """ @check_uniform_augmentation diff --git a/mindspore/dataset/transforms/vision/py_transforms.py b/mindspore/dataset/transforms/vision/py_transforms.py index 51bea80b21..e96efe192b 100644 --- a/mindspore/dataset/transforms/vision/py_transforms.py +++ b/mindspore/dataset/transforms/vision/py_transforms.py @@ -225,8 +225,8 @@ class Normalize: The values of the array need to be in range [0.0, 1.0]. Args: - mean (list): List of mean values for each channel, w.r.t channel order. - std (list): List of standard deviations for each channel, w.r.t. channel order. + mean (sequence): List or tuple of mean values for each channel, w.r.t channel order. + std (sequence): List or tuple of standard deviations for each channel, w.r.t. channel order. Examples: >>> py_transforms.ComposeOp([py_transforms.Decode(), @@ -262,6 +262,7 @@ class RandomCrop: If size is an int, a square crop of size (size, size) is returned. If size is a sequence of length 2, it should be (height, width). padding (int or sequence, optional): The number of pixels to pad the image (default=None). + If padding is not None, pad image firstly with padding values. If a single number is provided, it pads all borders with this value. If a tuple or list of 2 values are provided, it pads the (left and top) with the first value and (right and bottom) with the second value. @@ -1354,7 +1355,7 @@ class RandomSharpness: Examples: >>> py_transforms.ComposeOp([py_transforms.Decode(), - >>> py_transforms.RandomColor(0.5,1.5), + >>> py_transforms.RandomSharpness(0.5,1.5), >>> py_transforms.ToTensor()]) """ @@ -1485,4 +1486,4 @@ class UniformAugment: Returns: img (PIL Image), Transformed image. """ - return util.uniform_augment(img, self.transforms, self.num_ops) + return util.uniform_augment(img, self.transforms.copy(), self.num_ops) diff --git a/mindspore/dataset/transforms/vision/validators.py b/mindspore/dataset/transforms/vision/validators.py index 713d9c5714..aff2d3bc0c 100644 --- a/mindspore/dataset/transforms/vision/validators.py +++ b/mindspore/dataset/transforms/vision/validators.py @@ -17,11 +17,12 @@ import numbers from functools import wraps +from mindspore._c_dataengine import TensorOp + from .utils import Inter, Border from ...transforms.validators import check_pos_int32, check_pos_float32, check_value, check_uint8, FLOAT_MAX_INTEGER, \ check_bool, check_2tuple, check_range, check_list, check_type, check_positive, INT32_MAX - def check_inter_mode(mode): if not isinstance(mode, Inter): raise ValueError("Invalid interpolation mode.") @@ -119,7 +120,7 @@ def check_degrees(degrees): degrees = (-degrees, degrees) elif isinstance(degrees, (list, tuple)): if len(degrees) != 2: - raise ValueError("If degrees is a sequence, the length must be 2.") + raise TypeError("If degrees is a sequence, the length must be 2.") else: raise TypeError("Degrees must be a single non-negative number or a sequence") return degrees @@ -836,8 +837,8 @@ def check_uniform_augmentation(method): if not isinstance(operations, list): raise ValueError("operations is not a python list") for op in operations: - if not callable(op): - raise ValueError("non-callable op in operations list") + if not isinstance(op, TensorOp): + raise ValueError("operations list only accepts C++ operations.") kwargs["num_ops"] = num_ops kwargs["operations"] = operations diff --git a/mindspore/mindrecord/tools/imagenet_to_mr.py b/mindspore/mindrecord/tools/imagenet_to_mr.py index 8c8de689c1..e941e76477 100644 --- a/mindspore/mindrecord/tools/imagenet_to_mr.py +++ b/mindspore/mindrecord/tools/imagenet_to_mr.py @@ -111,6 +111,9 @@ class ImageNetToMR: image_file = open(file_name, "rb") image_bytes = image_file.read() image_file.close() + if not image_bytes: + logger.warning("The image file: {} is invalid.".format(file_name)) + continue data["data"] = image_bytes yield data diff --git a/mindspore/model_zoo/Bert_NEZHA/bert_for_pre_training.py b/mindspore/model_zoo/Bert_NEZHA/bert_for_pre_training.py index 046b2adbe2..53a0d03933 100644 --- a/mindspore/model_zoo/Bert_NEZHA/bert_for_pre_training.py +++ b/mindspore/model_zoo/Bert_NEZHA/bert_for_pre_training.py @@ -445,5 +445,5 @@ class BertTrainOneStepWithLossScaleCell(nn.Cell): succ = False else: succ = self.optimizer(grads) - ret = (loss, cond) + ret = (loss, cond, scaling_sens) return F.depend(ret, succ) diff --git a/mindspore/model_zoo/alexnet.py b/mindspore/model_zoo/alexnet.py index 8cd316229c..7ad1c8e37b 100644 --- a/mindspore/model_zoo/alexnet.py +++ b/mindspore/model_zoo/alexnet.py @@ -15,6 +15,7 @@ """Alexnet.""" import mindspore.nn as nn from mindspore.common.initializer import TruncatedNormal +from mindspore.ops import operations as P def conv(in_channels, out_channels, kernel_size, stride=1, padding=0, pad_mode="valid"): weight = weight_variable() @@ -44,7 +45,7 @@ class AlexNet(nn.Cell): self.conv4 = conv(384, 384, 3, pad_mode="same") self.conv5 = conv(384, 256, 3, pad_mode="same") self.relu = nn.ReLU() - self.max_pool2d = nn.MaxPool2d(kernel_size=3, stride=2) + self.max_pool2d = P.MaxPool(ksize=3, strides=2) self.flatten = nn.Flatten() self.fc1 = fc_with_initialize(6*6*256, 4096) self.fc2 = fc_with_initialize(4096, 4096) diff --git a/mindspore/model_zoo/mobilenet.py b/mindspore/model_zoo/mobilenet.py new file mode 100644 index 0000000000..1d4f1b10b5 --- /dev/null +++ b/mindspore/model_zoo/mobilenet.py @@ -0,0 +1,284 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""MobileNetV2 model define""" +import numpy as np +import mindspore.nn as nn +from mindspore.ops import operations as P +from mindspore.ops.operations import TensorAdd +from mindspore import Parameter, Tensor +from mindspore.common.initializer import initializer + +__all__ = ['MobileNetV2', 'mobilenet_v2'] + + +def _make_divisible(v, divisor, min_value=None): + """ + This function is taken from the original tf repo. + It ensures that all layers have a channel number that is divisible by 8 + It can be seen here: + https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py + :param v: + :param divisor: + :param min_value: + :return: + """ + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class GlobalAvgPooling(nn.Cell): + """ + Global avg pooling definition. + + Args: + + Returns: + Tensor, output tensor. + + Examples: + >>> GlobalAvgPooling() + """ + def __init__(self): + super(GlobalAvgPooling, self).__init__() + self.mean = P.ReduceMean(keep_dims=False) + + def construct(self, x): + x = self.mean(x, (2, 3)) + return x + + +class DepthwiseConv(nn.Cell): + """ + Depthwise Convolution warpper definition. + + Args: + in_planes (int): Input channel. + kernel_size (int): Input kernel size. + stride (int): Stride size. + pad_mode (str): pad mode in (pad, same, valid) + channel_multiplier (int): Output channel multiplier + has_bias (bool): has bias or not + + Returns: + Tensor, output tensor. + + Examples: + >>> DepthwiseConv(16, 3, 1, 'pad', 1, channel_multiplier=1) + """ + def __init__(self, in_planes, kernel_size, stride, pad_mode, pad, channel_multiplier=1, has_bias=False): + super(DepthwiseConv, self).__init__() + self.has_bias = has_bias + self.in_channels = in_planes + self.channel_multiplier = channel_multiplier + self.out_channels = in_planes * channel_multiplier + self.kernel_size = (kernel_size, kernel_size) + self.depthwise_conv = P.DepthwiseConv2dNative(channel_multiplier=channel_multiplier, kernel_size=kernel_size, + stride=stride, pad_mode=pad_mode, pad=pad) + self.bias_add = P.BiasAdd() + weight_shape = [channel_multiplier, in_planes, *self.kernel_size] + self.weight = Parameter(initializer('ones', weight_shape), name='weight') + + if has_bias: + bias_shape = [channel_multiplier * in_planes] + self.bias = Parameter(initializer('zeros', bias_shape), name='bias') + else: + self.bias = None + + def construct(self, x): + output = self.depthwise_conv(x, self.weight) + if self.has_bias: + output = self.bias_add(output, self.bias) + return output + + +class ConvBNReLU(nn.Cell): + """ + Convolution/Depthwise fused with Batchnorm and ReLU block definition. + + Args: + in_planes (int): Input channel. + out_planes (int): Output channel. + kernel_size (int): Input kernel size. + stride (int): Stride size for the first convolutional layer. Default: 1. + groups (int): channel group. Convolution is 1 while Depthiwse is input channel. Default: 1. + + Returns: + Tensor, output tensor. + + Examples: + >>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1) + """ + def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1): + super(ConvBNReLU, self).__init__() + padding = (kernel_size - 1) // 2 + if groups == 1: + conv = nn.Conv2d(in_planes, out_planes, kernel_size, stride, pad_mode='pad', + padding=padding) + else: + conv = DepthwiseConv(in_planes, kernel_size, stride, pad_mode='pad', pad=padding) + layers = [conv, nn.BatchNorm2d(out_planes), nn.ReLU6()] + self.features = nn.SequentialCell(layers) + + def construct(self, x): + output = self.features(x) + return output + + +class InvertedResidual(nn.Cell): + """ + Mobilenetv2 residual block definition. + + Args: + inp (int): Input channel. + oup (int): Output channel. + stride (int): Stride size for the first convolutional layer. Default: 1. + expand_ratio (int): expand ration of input channel + + Returns: + Tensor, output tensor. + + Examples: + >>> ResidualBlock(3, 256, 1, 1) + """ + def __init__(self, inp, oup, stride, expand_ratio): + super(InvertedResidual, self).__init__() + assert stride in [1, 2] + + hidden_dim = int(round(inp * expand_ratio)) + self.use_res_connect = stride == 1 and inp == oup + + layers = [] + if expand_ratio != 1: + layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1)) + layers.extend([ + # dw + ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim), + # pw-linear + nn.Conv2d(hidden_dim, oup, kernel_size=1, stride=1, has_bias=False), + nn.BatchNorm2d(oup), + ]) + self.conv = nn.SequentialCell(layers) + self.add = TensorAdd() + self.cast = P.Cast() + + def construct(self, x): + identity = x + x = self.conv(x) + if self.use_res_connect: + return self.add(identity, x) + return x + + +class MobileNetV2(nn.Cell): + """ + MobileNetV2 architecture. + + Args: + class_num (Cell): number of classes. + width_mult (int): Channels multiplier for round to 8/16 and others. Default is 1. + has_dropout (bool): Is dropout used. Default is false + inverted_residual_setting (list): Inverted residual settings. Default is None + round_nearest (list): Channel round to . Default is 8 + Returns: + Tensor, output tensor. + + Examples: + >>> MobileNetV2(num_classes=1000) + """ + def __init__(self, num_classes=1000, width_mult=1., + has_dropout=False, inverted_residual_setting=None, round_nearest=8): + super(MobileNetV2, self).__init__() + block = InvertedResidual + input_channel = 32 + last_channel = 1280 + # setting of inverted residual blocks + self.cfgs = inverted_residual_setting + if inverted_residual_setting is None: + self.cfgs = [ + # t, c, n, s + [1, 16, 1, 1], + [6, 24, 2, 2], + [6, 32, 3, 2], + [6, 64, 4, 2], + [6, 96, 3, 1], + [6, 160, 3, 2], + [6, 320, 1, 1], + ] + + # building first layer + input_channel = _make_divisible(input_channel * width_mult, round_nearest) + self.out_channels = _make_divisible(last_channel * max(1.0, width_mult), round_nearest) + features = [ConvBNReLU(3, input_channel, stride=2)] + # building inverted residual blocks + for t, c, n, s in self.cfgs: + output_channel = _make_divisible(c * width_mult, round_nearest) + for i in range(n): + stride = s if i == 0 else 1 + features.append(block(input_channel, output_channel, stride, expand_ratio=t)) + input_channel = output_channel + # building last several layers + features.append(ConvBNReLU(input_channel, self.out_channels, kernel_size=1)) + # make it nn.CellList + self.features = nn.SequentialCell(features) + # mobilenet head + head = ([GlobalAvgPooling(), nn.Dense(self.out_channels, num_classes, has_bias=True)] if not has_dropout else + [GlobalAvgPooling(), nn.Dropout(0.2), nn.Dense(self.out_channels, num_classes, has_bias=True)]) + self.head = nn.SequentialCell(head) + + self._initialize_weights() + + def construct(self, x): + x = self.features(x) + x = self.head(x) + return x + + def _initialize_weights(self): + """ + Initialize weights. + + Args: + + Returns: + None. + + Examples: + >>> _initialize_weights() + """ + for _, m in self.cells_and_names(): + if isinstance(m, (nn.Conv2d, DepthwiseConv)): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.set_parameter_data(Tensor(np.random.normal(0, np.sqrt(2. / n), + m.weight.data.shape()).astype("float32"))) + if m.bias is not None: + m.bias.set_parameter_data(Tensor(np.zeros(m.bias.data.shape(), dtype="float32"))) + elif isinstance(m, nn.BatchNorm2d): + m.gamma.set_parameter_data(Tensor(np.ones(m.gamma.data.shape(), dtype="float32"))) + m.beta.set_parameter_data(Tensor(np.zeros(m.beta.data.shape(), dtype="float32"))) + elif isinstance(m, nn.Dense): + m.weight.set_parameter_data(Tensor(np.random.normal(0, 0.01, m.weight.data.shape()).astype("float32"))) + if m.bias is not None: + m.bias.set_parameter_data(Tensor(np.zeros(m.bias.data.shape(), dtype="float32"))) + + +def mobilenet_v2(**kwargs): + """ + Constructs a MobileNet V2 model + """ + return MobileNetV2(**kwargs) diff --git a/mindspore/model_zoo/resnet.py b/mindspore/model_zoo/resnet.py index 9d010eede1..001e1db0cf 100755 --- a/mindspore/model_zoo/resnet.py +++ b/mindspore/model_zoo/resnet.py @@ -168,7 +168,7 @@ class ResNet(nn.Cell): self.conv1 = _conv7x7(3, 64, stride=2) self.bn1 = _bn(64) self.relu = P.ReLU() - self.maxpool = P.MaxPoolWithArgmax(padding="same", ksize=3, strides=2) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same") self.layer1 = self._make_layer(block, layer_nums[0], @@ -227,7 +227,7 @@ class ResNet(nn.Cell): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) - c1, argmax = self.maxpool(x) + c1 = self.maxpool(x) c2 = self.layer1(c1) c3 = self.layer2(c2) @@ -260,3 +260,23 @@ def resnet50(class_num=10): [256, 512, 1024, 2048], [1, 2, 2, 2], class_num) + +def resnet101(class_num=1001): + """ + Get ResNet101 neural network. + + Args: + class_num (int): Class number. + + Returns: + Cell, cell instance of ResNet101 neural network. + + Examples: + >>> net = resnet101(1001) + """ + return ResNet(ResidualBlock, + [3, 4, 23, 3], + [64, 256, 512, 1024], + [256, 512, 1024, 2048], + [1, 2, 2, 2], + class_num) diff --git a/mindspore/model_zoo/vgg.py b/mindspore/model_zoo/vgg.py index 6fcd075cc8..66a73a2e50 100644 --- a/mindspore/model_zoo/vgg.py +++ b/mindspore/model_zoo/vgg.py @@ -14,7 +14,6 @@ # ============================================================================ """VGG.""" import mindspore.nn as nn -from mindspore.ops import operations as P from mindspore.common.initializer import initializer import mindspore.common.dtype as mstype @@ -62,9 +61,9 @@ class Vgg(nn.Cell): def __init__(self, base, num_classes=1000, batch_norm=False, batch_size=1): super(Vgg, self).__init__() + _ = batch_size self.layers = _make_layer(base, batch_norm=batch_norm) - self.reshape = P.Reshape() - self.shp = (batch_size, -1) + self.flatten = nn.Flatten() self.classifier = nn.SequentialCell([ nn.Dense(512 * 7 * 7, 4096), nn.ReLU(), @@ -74,7 +73,7 @@ class Vgg(nn.Cell): def construct(self, x): x = self.layers(x) - x = self.reshape(x, self.shp) + x = self.flatten(x) x = self.classifier(x) return x @@ -87,20 +86,19 @@ cfg = { } -def vgg16(batch_size=1, num_classes=1000): +def vgg16(num_classes=1000): """ Get Vgg16 neural network with batch normalization. Args: - batch_size (int): Batch size. Default: 1. num_classes (int): Class numbers. Default: 1000. Returns: Cell, cell instance of Vgg16 neural network with batch normalization. Examples: - >>> vgg16(batch_size=1, num_classes=1000) + >>> vgg16(num_classes=1000) """ - net = Vgg(cfg['16'], num_classes=num_classes, batch_norm=True, batch_size=batch_size) + net = Vgg(cfg['16'], num_classes=num_classes, batch_norm=True) return net diff --git a/mindspore/nn/cell.py b/mindspore/nn/cell.py index a694489f5a..9cea668471 100755 --- a/mindspore/nn/cell.py +++ b/mindspore/nn/cell.py @@ -22,7 +22,7 @@ from ..common import dtype as mstype from ..common.api import _executor from .._checkparam import _check_str_by_regular from ..common.parameter import Parameter, ParameterTuple -from .._c_expression import init_ge +from .._c_expression import init_backend from ..ops.primitive import Primitive from ..parallel._tensor import _load_tensor_by_layout from ..parallel._utils import _get_parallel_mode @@ -56,7 +56,7 @@ class Cell: >>> def construct(self, x): >>> return self.relu(x) """ - def __init__(self, auto_prefix=True): + def __init__(self, auto_prefix=True, flags=None): self._params = OrderedDict() self._cells = OrderedDict() self.training = False @@ -66,7 +66,7 @@ class Cell: self._phase = 'train' self._parameter_layout_dict = {} self._create_time = int(time.time() * 1e9) - init_ge() + init_backend() # call gc to release GE session resources used by non-used cell objects gc.collect() self._construct_inputs_num = 0 @@ -74,6 +74,8 @@ class Cell: if _get_parallel_mode() in ["auto_parallel", "semi_auto_parallel"]: self._get_construct_inputs_number_and_name() self._parallel_inputs_run = None + if flags: + self.add_flags(**flags) @property def create_time(self): @@ -607,6 +609,11 @@ class Cell: cell.add_flags_recursive(**flags) return self + def get_flags(self): + if not hasattr(self, "_mindspore_flags"): + self._mindspore_flags = {} + return self._mindspore_flags + def to_float(self, dst_type): """ Add cast on all inputs of cell and child cells to run with certain float type. diff --git a/mindspore/nn/dynamic_lr.py b/mindspore/nn/dynamic_lr.py index 00e6a45901..6eeba415a7 100644 --- a/mindspore/nn/dynamic_lr.py +++ b/mindspore/nn/dynamic_lr.py @@ -32,6 +32,7 @@ def piecewise_constant_lr(milestone, learning_rates): Args: milestone (Union[list[int], tuple[int]]): A list of milestone. This list is a monotone increasing list. + Every element is a milestone step, and must be greater than 0. learning_rates (Union[list[float], tuple[float]]): A list of learning rates. Returns: @@ -40,7 +41,7 @@ def piecewise_constant_lr(milestone, learning_rates): Examples: >>> milestone = [2, 5, 10] >>> learning_rates = [0.1, 0.05, 0.01] - >>> lr = piecewise_constant_lr(milestone, learning_rates) + >>> piecewise_constant_lr(milestone, learning_rates) [0.1, 0.1, 0.05, 0.05, 0.05, 0.01, 0.01, 0.01, 0.01, 0.01] """ validator.check_value_type('milestone', milestone, (tuple, list), None) @@ -100,7 +101,7 @@ def exponential_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, >>> total_step = 6 >>> step_per_epoch = 2 >>> decay_epoch = 1 - >>> lr = exponential_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch) + >>> exponential_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch) [0.1, 0.1, 0.09000000000000001, 0.09000000000000001, 0.08100000000000002, 0.08100000000000002] """ _check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair) @@ -142,7 +143,7 @@ def natural_exp_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, >>> total_step = 6 >>> step_per_epoch = 2 >>> decay_epoch = 2 - >>> lr = natural_exp_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, True) + >>> natural_exp_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, True) [0.1, 0.1, 0.1, 0.1, 0.016529888822158657, 0.016529888822158657] """ _check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair) @@ -185,7 +186,7 @@ def inverse_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, deca >>> total_step = 6 >>> step_per_epoch = 1 >>> decay_epoch = 1 - >>> lr = inverse_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, True) + >>> inverse_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, True) [0.1, 0.06666666666666667, 0.05, 0.04, 0.03333333333333333, 0.028571428571428574] """ _check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair) @@ -227,7 +228,7 @@ def cosine_decay_lr(min_lr, max_lr, total_step, step_per_epoch, decay_epoch): >>> total_step = 6 >>> step_per_epoch = 2 >>> decay_epoch = 2 - >>> lr = cosine_decay_lr(min_lr, max_lr, total_step, step_per_epoch, decay_epoch) + >>> cosine_decay_lr(min_lr, max_lr, total_step, step_per_epoch, decay_epoch) [0.1, 0.1, 0.05500000000000001, 0.05500000000000001, 0.01, 0.01] """ validator.check_float_positive('min_lr', min_lr, None) @@ -282,7 +283,7 @@ def polynomial_decay_lr(learning_rate, end_learning_rate, total_step, step_per_e >>> step_per_epoch = 2 >>> decay_epoch = 2 >>> power = 0.5 - >>> lr = polynomial_decay_lr(learning_rate, end_learning_rate, total_step, step_per_epoch, decay_epoch, power) + >>> polynomial_decay_lr(learning_rate, end_learning_rate, total_step, step_per_epoch, decay_epoch, power) [0.1, 0.1, 0.07363961030678928, 0.07363961030678928, 0.01, 0.01] """ validator.check_float_positive('learning_rate', learning_rate, None) diff --git a/mindspore/nn/layer/basic.py b/mindspore/nn/layer/basic.py index 2449eea9b4..9c8de85a68 100644 --- a/mindspore/nn/layer/basic.py +++ b/mindspore/nn/layer/basic.py @@ -21,8 +21,10 @@ from mindspore._checkparam import check_int_positive, check_bool from mindspore.ops import operations as P from mindspore.ops import functional as F from mindspore.ops.functional import identity +from mindspore.ops.operations import _inner_ops as inner from mindspore.common.parameter import Parameter from mindspore._extends import cell_attr_register +from mindspore.common.api import ms_function from ..cell import Cell from .activation import get_activation from ..._checkparam import Validator as validator @@ -261,7 +263,9 @@ class ClipByNorm(Cell): self.expand_dims = P.ExpandDims() self.dtype = P.DType() + @ms_function def construct(self, x, clip_norm): + """add ms_function decorator for pynative mode""" mul_x = F.square(x) l2sum = self.cast(self.reduce_sum(mul_x, self.axis), mstype.float32) cond = self.greater_(l2sum, self.zero) @@ -477,7 +481,7 @@ class Unfold(Cell): """ def __init__(self, ksizes, strides, rates, padding="valid"): super(Unfold, self).__init__() - self.extract_image_patches = P.ExtractImagePatches(ksizes, strides, rates, padding) + self.extract_image_patches = inner.ExtractImagePatches(ksizes, strides, rates, padding) self.transpose = P.Transpose() self.format_NHWC = (0, 2, 3, 1) self.format_NCHW = (0, 3, 1, 2) diff --git a/mindspore/nn/layer/image.py b/mindspore/nn/layer/image.py index b46ac4cd6e..f06c5fd30a 100644 --- a/mindspore/nn/layer/image.py +++ b/mindspore/nn/layer/image.py @@ -58,6 +58,7 @@ class ImageGradients(Cell): super(ImageGradients, self).__init__() def construct(self, images): + _check_input_4d(F.shape(images), "images", self.cls_name) batch_size, depth, height, width = P.Shape()(images) dy = images[:, :, 1:, :] - images[:, :, :height - 1, :] dy_last = P.Fill()(P.DType()(images), (batch_size, depth, 1, width), 0) @@ -95,6 +96,11 @@ def _gauss_kernel_helper(filter_size): g = Tensor(g) return filter_size, g +@constexpr +def _check_input_4d(input_shape, param_name, func_name): + if len(input_shape) != 4: + raise ValueError(f"{func_name} {param_name} should be 4d, but got shape {input_shape}") + return True class SSIM(Cell): r""" @@ -146,6 +152,9 @@ class SSIM(Cell): self.mean = P.DepthwiseConv2dNative(channel_multiplier=1, kernel_size=filter_size) def construct(self, img1, img2): + _check_input_4d(F.shape(img1), "img1", self.cls_name) + _check_input_4d(F.shape(img2), "img2", self.cls_name) + P.SameTypeShape()(img1, img2) max_val = _convert_img_dtype_to_float32(self.max_val, self.max_val) img1 = _convert_img_dtype_to_float32(img1, self.max_val) img2 = _convert_img_dtype_to_float32(img2, self.max_val) @@ -236,6 +245,9 @@ class PSNR(Cell): self.max_val = max_val def construct(self, img1, img2): + _check_input_4d(F.shape(img1), "img1", self.cls_name) + _check_input_4d(F.shape(img2), "img2", self.cls_name) + P.SameTypeShape()(img1, img2) max_val = _convert_img_dtype_to_float32(self.max_val, self.max_val) img1 = _convert_img_dtype_to_float32(img1, self.max_val) img2 = _convert_img_dtype_to_float32(img2, self.max_val) diff --git a/mindspore/nn/layer/lstm.py b/mindspore/nn/layer/lstm.py index 84c156a1c2..bdc49739ac 100755 --- a/mindspore/nn/layer/lstm.py +++ b/mindspore/nn/layer/lstm.py @@ -149,7 +149,7 @@ class LSTM(Cell): if self.batch_first: x = self.transpose1(x, (1, 0, 2)) h0, c0 = hx - output, hn, cn, _ = self.lstm(x, h0, c0, self.weight) + output, hn, cn, _, _ = self.lstm(x, h0, c0, self.weight) if self.batch_first: output = self.transpose2(output, (1, 0, 2)) return (output, (hn, cn)) diff --git a/mindspore/nn/layer/normalization.py b/mindspore/nn/layer/normalization.py index ddd1bab1bf..fd9279cf04 100644 --- a/mindspore/nn/layer/normalization.py +++ b/mindspore/nn/layer/normalization.py @@ -62,6 +62,7 @@ class _BatchNorm(Cell): self.beta = Parameter(initializer( beta_init, num_features), name="beta", requires_grad=affine) self.group = check_int_positive(device_num_each_group) + self.is_global = False if self.group != 1: self.rank_id = get_rank() self.rank_size = get_group_size() @@ -74,17 +75,24 @@ class _BatchNorm(Cell): management.create_group('group' + str(i), self.rank_list[i]) self.all_reduce = P.AllReduce(P.ReduceOp.SUM, 'group' + str(i)).add_prim_attr('fusion', 1) self.shape = P.Shape() - self.reduce_mean = P.ReduceMean() + self.reduce_mean = P.ReduceMean(keep_dims=True) self.square = P.Square() + self.sqrt = P.Sqrt() + self.cast = P.Cast() + self.dtype = P.DType() + self.reshape = P.Reshape() + self.is_ascend = context.get_context("device_target") == "Ascend" if context.get_context("enable_ge"): self.is_ge_backend = True self.momentum = Tensor(1.0 - momentum, mstype.float32) - self.bn_train = P.BatchNorm(is_training=True, - epsilon=self.eps) else: self.is_ge_backend = False self.momentum = 1.0 - momentum + if self.is_ge_backend or self.is_ascend: + self.bn_train = P.BatchNorm(is_training=True, + epsilon=self.eps) + else: self.bn_train = P.FusedBatchNorm(mode=1, epsilon=self.eps, momentum=self.momentum) @@ -112,43 +120,47 @@ class _BatchNorm(Cell): group_list = [list(i) for i in world_rank_list] return group_list + + + def _global_sync(self, x, axes, re_shape): + """calculate global batch normalization output""" + x_mean = self.reduce_mean(x, axes) + x_mean_square = self.reduce_mean(self.square(x), axes) + global_batch_mean = self.all_reduce(x_mean) / self.group + global_batch_mean_square = self.all_reduce(x_mean_square) / self.group + global_mean = global_batch_mean + global_var = global_batch_mean_square - self.square(global_mean) + var_sqrt = self.sqrt(global_var + self.eps) + mean_first = (x - global_mean) / var_sqrt + y = mean_first * self.reshape(self.gamma, re_shape) + self.reshape(self.beta, re_shape) + + mean_sub = self.sub_mean(self.reshape(self.moving_mean, re_shape), global_mean) + tmp_mean = self.mul_mean(mean_sub, self.cast(self.momentum, self.dtype(mean_sub))) + mean_sub2 = self.sub_var(self.reshape(self.moving_mean, re_shape), global_var) + tmp_variance = self.mul_var(mean_sub2, self.cast(self.momentum, self.dtype(mean_sub2))) + y = F.depend(y, self.assign_sub_mean(self.reshape(self.moving_mean, re_shape), tmp_mean)) + y = F.depend(y, self.assign_sub_var(self.reshape(self.moving_variance, re_shape), tmp_variance)) + return y + def construct(self, x): if self.training and self.use_batch_statistics: - if self.is_ge_backend: - if self.is_global: - x_mean = self.reduce_mean(x) - x_mean_square = self.reduce_mean(self.square(x)) - global_batch_mean = self.all_reduce(x_mean) / self.group - global_batch_mean_square = self.all_reduce(x_mean_square) / self.group - global_mean = global_batch_mean - global_var = global_batch_mean_square - self.square(global_batch_mean) - y, batch_mean, batch_var, _, _ = \ - self.bn_train(x, - self.gamma, - self.beta, - None, - None) - - mean_sub = self.sub_mean(self.moving_mean, global_mean) - temp_mean = self.mul_mean(mean_sub, self.momentum) - mean_sub2 = self.sub_var(self.moving_variance, global_var) - temp_variance = self.mul_var(mean_sub2, self.momentum) - y = F.depend(y, self.assign_sub_mean(self.moving_mean, temp_mean)) - y = F.depend(y, self.assign_sub_var(self.moving_variance, temp_variance)) - else: - y, batch_mean, batch_var, _, _ = \ - self.bn_train(x, - self.gamma, - self.beta, - None, - None) - - mean_sub = self.sub_mean(self.moving_mean, batch_mean) - temp_mean = self.mul_mean(mean_sub, self.momentum) - mean_sub2 = self.sub_var(self.moving_variance, batch_var) - temp_variance = self.mul_var(mean_sub2, self.momentum) - y = F.depend(y, self.assign_sub_mean(self.moving_mean, temp_mean)) - y = F.depend(y, self.assign_sub_var(self.moving_variance, temp_variance)) + if self.is_ge_backend and self.is_global: + axes, re_shape = _shape_infer(F.shape(x), self.num_features) + y = self._global_sync(x, axes, re_shape) + elif self.is_ge_backend or self.is_ascend: + y, batch_mean, batch_var, _, _ = \ + self.bn_train(x, + self.gamma, + self.beta, + None, + None) + + mean_sub = self.sub_mean(self.moving_mean, batch_mean) + temp_mean = self.mul_mean(mean_sub, self.momentum) + mean_sub2 = self.sub_var(self.moving_variance, batch_var) + temp_variance = self.mul_var(mean_sub2, self.momentum) + y = F.depend(y, self.assign_sub_mean(self.moving_mean, temp_mean)) + y = F.depend(y, self.assign_sub_var(self.moving_variance, temp_variance)) else: y = self.bn_train(x, self.gamma, @@ -172,6 +184,17 @@ def _channel_check(channel, num_channel): if channel != num_channel: raise ValueError("the input channel is not equal with num_channel") +@constexpr +def _shape_infer(x_shape, num_feature): + """global batch normalization shape and axes infer""" + if len(x_shape) == 4: + axes = (0, 2, 3) + re_shape = (1, num_feature, 1, 1) + else: + axes = (0,) + re_shape = (1, num_feature) + return axes, re_shape + class BatchNorm1d(_BatchNorm): r""" Batch normalization layer over a 2D input. @@ -329,7 +352,7 @@ class GlobalBatchNorm(_BatchNorm): Args: num_features (int): `C` from an expected input of size (N, C, H, W). - device_num_each_group (int): The number of device in each group. + device_num_each_group (int): The number of devices in each group. eps (float): A value added to the denominator for numerical stability. Default: 1e-5. momentum (float): A floating hyperparameter of the momentum for the running_mean and running_var computation. Default: 0.9. @@ -402,9 +425,8 @@ class LayerNorm(Cell): y = \frac{x - \mathrm{E}[x]}{\sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta Args: - normalized_shape (Union(tuple[int], list[int]): The normalization is performed over axes - `begin_norm_axis ... R - 1` and centering and scaling parameters are calculated over - `begin_params_axis ... R - 1`. + normalized_shape (Union(tuple[int], list[int]): The normalization is performed over axis + `begin_norm_axis ... R - 1`. begin_norm_axis (int): It first normalization dimension: normalization will be performed along dimensions `begin_norm_axis: rank(inputs)`, the value should be in [-1, rank(input)). Default: -1. begin_params_axis (int): The first parameter(beta, gamma)dimension: scale and centering parameters @@ -475,6 +497,12 @@ class GroupNorm(Cell): num_channels (int): The number of channels per group. eps (float): A value added to the denominator for numerical stability. Default: 1e-5. affine (bool): A bool value, this layer will has learnable affine parameters when set to true. Default: True. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform', + 'he_uniform', etc. Default: 'ones'. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform', + 'he_uniform', etc. Default: 'zeros'. Inputs: - **input_x** (Tensor) - The input feature with shape [N, C, H, W]. diff --git a/mindspore/nn/layer/pooling.py b/mindspore/nn/layer/pooling.py index 6cf06de029..0569a8ada6 100644 --- a/mindspore/nn/layer/pooling.py +++ b/mindspore/nn/layer/pooling.py @@ -19,7 +19,6 @@ from mindspore._checkparam import Validator as validator from ... import context from ..cell import Cell from ..._checkparam import Rel -from ..._checkparam import ParamValidator class _PoolNd(Cell): @@ -265,11 +264,11 @@ class AvgPool1d(_PoolNd): stride=1, pad_mode="valid"): super(AvgPool1d, self).__init__(kernel_size, stride, pad_mode) - ParamValidator.check_type('kernel_size', kernel_size, [int,]) - ParamValidator.check_type('stride', stride, [int,]) - self.pad_mode = ParamValidator.check_string('pad_mode', pad_mode.upper(), ['VALID', 'SAME']) - ParamValidator.check_integer("kernel_size", kernel_size, 1, Rel.GE) - ParamValidator.check_integer("stride", stride, 1, Rel.GE) + validator.check_value_type('kernel_size', kernel_size, [int], self.cls_name) + validator.check_value_type('stride', stride, [int], self.cls_name) + self.pad_mode = validator.check_string('pad_mode', pad_mode.upper(), ['VALID', 'SAME'], self.cls_name) + validator.check_integer("kernel_size", kernel_size, 1, Rel.GE, self.cls_name) + validator.check_integer("stride", stride, 1, Rel.GE, self.cls_name) self.kernel_size = (1, kernel_size) self.stride = (1, stride) self.avg_pool = P.AvgPool(ksize=self.kernel_size, diff --git a/mindspore/nn/optim/adam.py b/mindspore/nn/optim/adam.py index 4e88c3ef93..1a386556d9 100755 --- a/mindspore/nn/optim/adam.py +++ b/mindspore/nn/optim/adam.py @@ -31,8 +31,8 @@ _learning_rate_update_func = ['linear', 'cos', 'sin'] adam_opt = C.MultitypeFuncGraph("adam_opt") -@adam_opt.register("Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor") -def _update_run_op(beta1, beta2, eps, lr, weight_decay_tensor, param, m, v, gradient): +@adam_opt.register("Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Bool") +def _update_run_op(beta1, beta2, eps, lr, weight_decay_tensor, param, m, v, gradient, decay_flag): """ Update parameters. @@ -57,20 +57,22 @@ def _update_run_op(beta1, beta2, eps, lr, weight_decay_tensor, param, m, v, grad op_reshape = P.Reshape() op_shape = P.Shape() - param = op_cast(param, mstype.float32) - m = op_cast(m, mstype.float32) - v = op_cast(v, mstype.float32) - gradient = op_cast(gradient, mstype.float32) + param_fp32 = op_cast(param, mstype.float32) + m_fp32 = op_cast(m, mstype.float32) + v_fp32 = op_cast(v, mstype.float32) + gradient_fp32 = op_cast(gradient, mstype.float32) - next_m = op_mul(beta1, m) + op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32) - beta1, gradient) + next_m = op_mul(beta1, m_fp32) + op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32) - beta1, gradient_fp32) - next_v = op_mul(beta2, v) + op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32) - beta2, op_square(gradient)) + next_v = op_mul(beta2, v_fp32) + op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32) + - beta2, op_square(gradient_fp32)) update = next_m / (op_sqrt(next_v) + eps) - update = update + op_mul(weight_decay_tensor, param) + if decay_flag: + update = update + op_mul(weight_decay_tensor, param_fp32) update_with_lr = op_mul(lr, update) - next_param = param - op_reshape(update_with_lr, op_shape(param)) + next_param = param_fp32 - op_reshape(update_with_lr, op_shape(param_fp32)) next_v = F.depend(next_v, F.assign(param, next_param)) next_v = F.depend(next_v, F.assign(m, next_m)) @@ -90,6 +92,17 @@ def _check_param_value(beta1, beta2, eps, weight_decay, prim_name): validator.check_number_range("weight_decay", weight_decay, 0.0, float("inf"), Rel.INC_LEFT, prim_name) +def _check_learning_rate_value(learning_rate, end_learning_rate, decay_steps, power, prim_name): + """Check the type of inputs.""" + validator.check_float_positive('learning_rate', learning_rate, prim_name) + validator.check_float_legal_value('learning_rate', learning_rate, prim_name) + validator.check_float_positive('end_learning_rate', end_learning_rate, prim_name) + validator.check_float_legal_value('end_learning_rate', end_learning_rate, prim_name) + validator.check_float_positive('power', power, prim_name) + validator.check_float_legal_value('power', power, prim_name) + validator.check_integer('decay_steps', decay_steps, 0, Rel.GT, prim_name) + + @adam_opt.register("Function", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Number", "Tensor", "Tensor", "Tensor", "Tensor") def _run_opt_with_one_number(opt, lr, beta1_power, beta2_power, beta1, beta2, eps, gradient, params, moment1, @@ -126,10 +139,19 @@ class Adam(Optimizer): Args: params (list[Parameter]): A list of parameter, which will be updated. The element in `params` should be class mindspore.Parameter. - learning_rate (float): The Learning rate. - beta1 (float): The exponential decay rate for the 1st moment estimates. Should be in range (0.0, 1.0). - beta2 (float): The exponential decay rate for the 2nd moment estimates. Should be in range (0.0, 1.0). - eps (float): Term added to the denominator to improve numerical stability. Should be greater than 0. + learning_rate (Union[float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is + Iterable or a Tensor and the dims of the Tensor is 1, + use dynamic learning rate, then the i-th step will + take the i-th value as the learning rate. + When the learning_rate is float or learning_rate is a Tensor + but the dims of the Tensor is 0, use fixed learning rate. + Other cases are not supported. Default: 1e-3. + beta1 (float): The exponential decay rate for the 1st moment estimates. Should be in range (0.0, 1.0). Default: + 0.9. + beta2 (float): The exponential decay rate for the 2nd moment estimates. Should be in range (0.0, 1.0). Default: + 0.999. + eps (float): Term added to the denominator to improve numerical stability. Should be greater than 0. Default: + 1e-8. use_locking (bool): Whether to enable a lock to protect updating variable tensors. If True, updating of the var, m, and v tensors will be protected by a lock. If False, the result is unpredictable. Default: False. @@ -137,8 +159,10 @@ class Adam(Optimizer): If True, updates the gradients using NAG. If False, updates the gradients without using NAG. Default: False. weight_decay (float): Weight decay (L2 penalty). Default: 0.0. - loss_scale (float): A floating point value for the loss scale. Default: 1.0. - Should be equal to or greater than 1. + loss_scale (float): A floating point value for the loss scale. Should be equal to or greater than 1. Default: + 1.0. + decay_filter (Function): A function to determine whether to apply weight decay on parameters. Default: + lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name. Inputs: - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`. @@ -206,7 +230,13 @@ class AdamWeightDecay(Optimizer): Args: params (list[Parameter]): A list of parameter, which will be updated. The element in `params` should be class mindspore.Parameter. - learning_rate (float): A floating point value for the learning rate. Default: 1e-3. + learning_rate (Union[float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is + Iterable or a Tensor and the dims of the Tensor is 1, + use dynamic learning rate, then the i-th step will + take the i-th value as the learning rate. + When the learning_rate is float or learning_rate is a Tensor + but the dims of the Tensor is 0, use fixed learning rate. + Other cases are not supported. Default: 1e-3. beta1 (float): The exponential decay rate for the 1st moment estimates. Default: 0.9. Should be in range (0.0, 1.0). beta2 (float): The exponential decay rate for the 2nd moment estimates. Default: 0.999. @@ -214,6 +244,8 @@ class AdamWeightDecay(Optimizer): eps (float): Term added to the denominator to improve numerical stability. Default: 1e-6. Should be greater than 0. weight_decay (float): Weight decay (L2 penalty). Default: 0.0. + decay_filter (Function): A function to determine whether to apply weight decay on parameters. Default: + lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name. Inputs: - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`. @@ -227,10 +259,10 @@ class AdamWeightDecay(Optimizer): >>> optim = nn.AdamWeightDecay(params=net.trainable_params()) >>> model = Model(net, loss_fn=loss, optimizer=optim, metrics=None) """ - def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-6, weight_decay=0.0): + def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-6, weight_decay=0.0, + decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name): super(AdamWeightDecay, self).__init__(learning_rate, params) _check_param_value(beta1, beta2, eps, weight_decay, self.cls_name) - self.lr = Tensor(np.array([learning_rate]).astype(np.float32)) self.beta1 = Tensor(np.array([beta1]).astype(np.float32)) self.beta2 = Tensor(np.array([beta2]).astype(np.float32)) self.eps = Tensor(np.array([eps]).astype(np.float32)) @@ -239,13 +271,15 @@ class AdamWeightDecay(Optimizer): self.params = self.parameters self.moments1 = self.params.clone(prefix="adam_m", init='zeros') self.moments2 = self.params.clone(prefix="adam_v", init='zeros') + self.decay_flag = tuple(decay_filter(x) for x in self.params) self.hyper_map = C.HyperMap() def construct(self, gradients): - updated_velocity = self.hyper_map(F.partial(adam_opt, self.beta1, self.beta2, self.eps, self.lr, + lr = self.get_lr() + updated_velocity = self.hyper_map(F.partial(adam_opt, self.beta1, self.beta2, self.eps, lr, self.weight_decay_tensor), - self.params, self.moments1, self.moments2, gradients) + self.params, self.moments1, self.moments2, gradients, self.decay_flag) return updated_velocity @@ -268,6 +302,8 @@ class AdamWeightDecayDynamicLR(Optimizer): eps (float): Term added to the denominator to improve numerical stability. Default: 1e-6. Should be greater than 0. weight_decay (float): Weight decay (L2 penalty). Default: 0.0. + decay_filter (Function): A function to determine whether to apply weight decay on parameters. Default: + lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name. Inputs: - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`. @@ -290,10 +326,11 @@ class AdamWeightDecayDynamicLR(Optimizer): beta1=0.9, beta2=0.999, eps=1e-6, - weight_decay=0.0): + weight_decay=0.0, + decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name): super(AdamWeightDecayDynamicLR, self).__init__(learning_rate, params) _check_param_value(beta1, beta2, eps, weight_decay, self.cls_name) - + _check_learning_rate_value(learning_rate, end_learning_rate, decay_steps, power, self.cls_name) # turn them to scalar when me support scalar/tensor mix operations self.global_step = Parameter(initializer(0, [1]), name="global_step") self.decay_steps = Tensor(np.array([decay_steps]).astype(np.float32)) @@ -307,7 +344,7 @@ class AdamWeightDecayDynamicLR(Optimizer): self.params = self.parameters self.moments1 = self.params.clone(prefix="adam_m", init='zeros') self.moments2 = self.params.clone(prefix="adam_v", init='zeros') - + self.decay_flag = tuple(decay_filter(x) for x in self.params) self.hyper_map = C.HyperMap() self.min = P.Minimum() self.pow = P.Pow() @@ -319,7 +356,7 @@ class AdamWeightDecayDynamicLR(Optimizer): lr = self.diff_learning_rate * self.pow(self.one - p, self.power) + self.end_learning_rate updated_velocity = self.hyper_map(F.partial(adam_opt, self.beta1, self.beta2, self.eps, lr, self.weight_decay_tensor), - self.params, self.moments1, self.moments2, gradients) + self.params, self.moments1, self.moments2, gradients, self.decay_flag) added_global_step = self.global_step + self.one F.control_depend(lr, added_global_step) diff --git a/mindspore/nn/optim/ftrl.py b/mindspore/nn/optim/ftrl.py index 2bc329f42d..ccc1b3f10b 100644 --- a/mindspore/nn/optim/ftrl.py +++ b/mindspore/nn/optim/ftrl.py @@ -14,8 +14,6 @@ # ============================================================================ """FTRL""" from mindspore.ops import functional as F, composite as C, operations as P -from mindspore.common.initializer import initializer -from mindspore.common.parameter import Parameter from mindspore.common import Tensor import mindspore.common.dtype as mstype from mindspore._checkparam import Validator as validator @@ -23,6 +21,8 @@ from mindspore._checkparam import Rel from .optimizer import Optimizer, apply_decay, grad_scale ftrl_opt = C.MultitypeFuncGraph("ftrl_opt") + + @ftrl_opt.register("Function", "Tensor", "Number", "Number", "Number", "Tensor", "Tensor", "Tensor", "Tensor") def _tensor_run_opt(opt, learning_rate, l1, l2, lr_power, linear, gradient, weight, moment): """Apply ftrl optimizer to the weight parameter.""" @@ -30,8 +30,10 @@ def _tensor_run_opt(opt, learning_rate, l1, l2, lr_power, linear, gradient, weig success = F.depend(success, opt(weight, moment, linear, gradient, learning_rate, l1, l2, lr_power)) return success + def _check_param(initial_accum, learning_rate, lr_power, l1, l2, use_locking, loss_scale=1.0, weight_decay=0.0, prim_name=None): + """Check param.""" validator.check_value_type("initial_accum", initial_accum, [float], prim_name) validator.check_number("initial_accum", initial_accum, 0.0, Rel.GE, prim_name) @@ -104,7 +106,7 @@ class FTRL(Optimizer): self.lr_power = lr_power self.reciprocal_scale = 1.0 / loss_scale self.weight_decay = weight_decay - self.decay_tf = tuple((lambda:True)() for x in self.parameters) + self.decay_tf = tuple((lambda: True)() for x in self.parameters) self.hyper_map = C.HyperMap() self.opt = P.ApplyFtrl(use_locking=use_locking) self.one = Tensor(1, mstype.int32) @@ -118,5 +120,6 @@ class FTRL(Optimizer): if self.reciprocal_scale != 1.0: grads = self.hyper_map(F.partial(grad_scale, self.reciprocal_scale), grads) lr = self.learning_rate - success = self.hyper_map(F.partial(ftrl_opt, self.opt, lr, self.l1, self.l2, self.lr_power), linear, grads, params, moments) + success = self.hyper_map(F.partial(ftrl_opt, self.opt, lr, self.l1, self.l2, self.lr_power), + linear, grads, params, moments) return success diff --git a/mindspore/nn/optim/lamb.py b/mindspore/nn/optim/lamb.py index afcbf8cda4..e026b1c560 100755 --- a/mindspore/nn/optim/lamb.py +++ b/mindspore/nn/optim/lamb.py @@ -67,23 +67,23 @@ def _update_run_op(beta1, beta2, eps, lr, weight_decay_tensor, global_step, para op_fill = P.Fill() op_dtype = P.DType() - param = op_cast(param, mstype.float32) - m = op_cast(m, mstype.float32) - v = op_cast(v, mstype.float32) - gradient = op_cast(gradient, mstype.float32) + param_fp32 = op_cast(param, mstype.float32) + m_fp32 = op_cast(m, mstype.float32) + v_fp32 = op_cast(v, mstype.float32) + gradient_fp32 = op_cast(gradient, mstype.float32) - next_m = op_mul(beta1, m) + op_mul(op_cast(num_one, mstype.float32) - beta1, gradient) + next_m = op_mul(beta1, m_fp32) + op_mul(op_cast(num_one, mstype.float32) - beta1, gradient_fp32) - next_v = op_mul(beta2, v) + op_mul(op_cast(num_one, mstype.float32) - beta2, op_square(gradient)) + next_v = op_mul(beta2, v_fp32) + op_mul(op_cast(num_one, mstype.float32) - beta2, op_square(gradient_fp32)) next_mm = next_m / (op_cast(num_one, mstype.float32) - op_pow(beta1, op_cast(global_step + num_one, mstype.float32))) next_vv = next_v / (op_cast(num_one, mstype.float32) - op_pow(beta2, op_cast(global_step + num_one, mstype.float32))) - w_norm = op_norm(param) - g_norm = op_norm(gradient) + w_norm = op_norm(param_fp32) + g_norm = op_norm(gradient_fp32) - g_norm_hat = op_norm(op_mul(next_mm, op_rsqrt(next_vv + eps)) + weight_decay_tensor * param) + g_norm_hat = op_norm(op_mul(next_mm, op_rsqrt(next_vv + eps)) + weight_decay_tensor * param_fp32) zeros = F.zeros_like_tensor(w_norm) ones = op_fill(op_dtype(w_norm), op_shape(w_norm), 1.0) trust_ratio = op_select( @@ -95,11 +95,11 @@ def _update_run_op(beta1, beta2, eps, lr, weight_decay_tensor, global_step, para update = next_mm / (op_sqrt(next_vv) + eps) if decay_flag: - update = update + op_mul(weight_decay_tensor, param) + update = update + op_mul(weight_decay_tensor, param_fp32) update_with_lr = op_mul(op_mul(trust_ratio, lr), update) - next_param = param - op_reshape(update_with_lr, op_shape(param)) + next_param = param_fp32 - op_reshape(update_with_lr, op_shape(param_fp32)) next_v = F.depend(next_v, F.assign(param, next_param)) next_v = F.depend(next_v, F.assign(m, next_m)) @@ -110,18 +110,20 @@ def _update_run_op(beta1, beta2, eps, lr, weight_decay_tensor, global_step, para def _check_param_value(decay_steps, warmup_steps, start_learning_rate, end_learning_rate, power, beta1, beta2, eps, weight_decay, prim_name): - """Check the type of inputs.""" - validator.check_value_type("decay_steps", decay_steps, [int], prim_name) - validator.check_value_type("warmup_steps", warmup_steps, [int], prim_name) - validator.check_value_type("start_learning_rate", start_learning_rate, [float], prim_name) - validator.check_value_type("end_learning_rate", end_learning_rate, [float], prim_name) - validator.check_value_type("power", power, [float], prim_name) + _ = warmup_steps + validator.check_float_positive('start_learning_rate', start_learning_rate, prim_name) + validator.check_float_legal_value('start_learning_rate', start_learning_rate, prim_name) + validator.check_float_positive('end_learning_rate', end_learning_rate, prim_name) + validator.check_float_legal_value('end_learning_rate', end_learning_rate, prim_name) + validator.check_float_positive('power', power, prim_name) + validator.check_float_legal_value('power', power, prim_name) + validator.check_integer('decay_steps', decay_steps, 0, Rel.GT, prim_name) + validator.check_integer('warmup_steps', decay_steps, 0, Rel.GT, prim_name) validator.check_value_type("beta1", beta1, [float], prim_name) validator.check_value_type("beta2", beta2, [float], prim_name) validator.check_value_type("eps", eps, [float], prim_name) validator.check_value_type("weight_dacay", weight_decay, [float], prim_name) - validator.check_number_range("decay_steps", decay_steps, 1, float("inf"), Rel.INC_LEFT, prim_name) validator.check_number_range("beta1", beta1, 0.0, 1.0, Rel.INC_NEITHER, prim_name) validator.check_number_range("beta2", beta2, 0.0, 1.0, Rel.INC_NEITHER, prim_name) validator.check_number_range("eps", eps, 0.0, float("inf"), Rel.INC_NEITHER, prim_name) diff --git a/mindspore/nn/optim/momentum.py b/mindspore/nn/optim/momentum.py index c69e226df9..67de590c5f 100755 --- a/mindspore/nn/optim/momentum.py +++ b/mindspore/nn/optim/momentum.py @@ -56,7 +56,7 @@ class Momentum(Optimizer): - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`. Outputs: - Tensor[bool], the value is True. + tuple[bool], all elements are True. Raises: ValueError: If the momentum is less than 0.0. diff --git a/mindspore/nn/optim/optimizer.py b/mindspore/nn/optim/optimizer.py index bab539461e..34abc2b1c2 100755 --- a/mindspore/nn/optim/optimizer.py +++ b/mindspore/nn/optim/optimizer.py @@ -46,8 +46,8 @@ class Optimizer(Cell): learning_rate (float): A floating point value for the learning rate. Should be greater than 0. parameters (list): A list of parameter, which will be updated. The element in `parameters` should be class mindspore.Parameter. - weight_decay (float): A floating point value for the weight decay. If the type of `weight_decay` - input is int, it will be convertd to float. Default: 0.0. + weight_decay (float): A floating point value for the weight decay. It should be equal to or greater than 0. + If the type of `weight_decay` input is int, it will be convertd to float. Default: 0.0. loss_scale (float): A floating point value for the loss scale. It should be greater than 0. If the type of `loss_scale` input is int, it will be convertd to float. Default: 1.0. decay_filter (Function): A function to determine whether to apply weight decay on parameters. Default: lambda @@ -87,21 +87,15 @@ class Optimizer(Cell): if isinstance(weight_decay, int): weight_decay = float(weight_decay) - - validator.check_float_legal_value('weight_decay', weight_decay, None) + validator.check_value_type("weight_decay", weight_decay, [float], None) + validator.check_number_range("weight_decay", weight_decay, 0.0, float("inf"), Rel.INC_LEFT, None) if isinstance(loss_scale, int): loss_scale = float(loss_scale) + validator.check_value_type("loss_scale", loss_scale, [float], None) + validator.check_number_range("loss_scale", loss_scale, 0.0, float("inf"), Rel.INC_NEITHER, None) - validator.check_float_legal_value('loss_scale', loss_scale, None) - - if loss_scale <= 0.0: - raise ValueError("Loss scale should be greater than 0, but got {}".format(loss_scale)) self.loss_scale = loss_scale - - if weight_decay < 0.0: - raise ValueError("Weight decay should be equal or greater than 0, but got {}".format(weight_decay)) - self.learning_rate = Parameter(learning_rate, name="learning_rate") self.parameters = ParameterTuple(parameters) self.reciprocal_scale = 1.0 / loss_scale diff --git a/mindspore/nn/optim/rmsprop.py b/mindspore/nn/optim/rmsprop.py index a8f118b709..b1271587b4 100644 --- a/mindspore/nn/optim/rmsprop.py +++ b/mindspore/nn/optim/rmsprop.py @@ -15,6 +15,7 @@ """rmsprop""" from mindspore.ops import functional as F, composite as C, operations as P from mindspore._checkparam import Validator as validator +from mindspore._checkparam import Rel from .optimizer import Optimizer rmsprop_opt = C.MultitypeFuncGraph("rmsprop_opt") @@ -91,14 +92,16 @@ class RMSProp(Optimizer): take the i-th value as the learning rate. When the learning_rate is float or learning_rate is a Tensor but the dims of the Tensor is 0, use fixed learning rate. - Other cases are not supported. - decay (float): Decay rate. - momentum (float): Hyperparameter of type float, means momentum for the moving average. - epsilon (float): Term added to the denominator to improve numerical stability. Should be greater than 0. + Other cases are not supported. Default: 0.1. + decay (float): Decay rate. Should be equal to or greater than 0. Default: 0.9. + momentum (float): Hyperparameter of type float, means momentum for the moving average. Should be equal to or + greater than 0.Default: 0.0. + epsilon (float): Term added to the denominator to improve numerical stability. Should be greater than + 0. Default: 1e-10. use_locking (bool): Enable a lock to protect the update of variable and accumlation tensors. Default: False. - centered (bool): If True, gradients are normalized by the estimated variance of the gradient. Default: False - loss_scale (float): A floating point value for the loss scale. Default: 1.0. - weight_decay (float): Weight decay (L2 penalty). Default: 0.0. + centered (bool): If True, gradients are normalized by the estimated variance of the gradient. Default: False. + loss_scale (float): A floating point value for the loss scale. Should be greater than 0. Default: 1.0. + weight_decay (float): Weight decay (L2 penalty). Should be equal to or greater than 0. Default: 0.0. decay_filter (Function): A function to determine whether to apply weight decay on parameters. Default: lambda x: 'beta' not in x.name and 'gamma' not in x.name. @@ -118,17 +121,15 @@ class RMSProp(Optimizer): use_locking=False, centered=False, loss_scale=1.0, weight_decay=0.0, decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name): super(RMSProp, self).__init__(learning_rate, params, weight_decay, loss_scale, decay_filter) - - if isinstance(momentum, float) and momentum < 0.0: - raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum)) - - if decay < 0.0: - raise ValueError("decay should be at least 0.0, but got dampening {}".format(decay)) - self.decay = decay - self.epsilon = epsilon - + validator.check_value_type("decay", decay, [float], self.cls_name) + validator.check_number_range("decay", decay, 0.0, float("inf"), Rel.INC_LEFT, self.cls_name) + validator.check_value_type("momentum", momentum, [float], self.cls_name) + validator.check_number_range("momentum", momentum, 0.0, float("inf"), Rel.INC_LEFT, self.cls_name) + validator.check_value_type("epsilon", epsilon, [float], self.cls_name) + validator.check_number_range("epsilon", epsilon, 0.0, float("inf"), Rel.INC_NEITHER, self.cls_name) validator.check_value_type("use_locking", use_locking, [bool], self.cls_name) validator.check_value_type("centered", centered, [bool], self.cls_name) + self.centered = centered if centered: self.opt = P.ApplyCenteredRMSProp(use_locking) @@ -137,11 +138,10 @@ class RMSProp(Optimizer): self.opt = P.ApplyRMSProp(use_locking) self.momentum = momentum - self.ms = self.parameters.clone(prefix="mean_square", init='zeros') self.moment = self.parameters.clone(prefix="moment", init='zeros') self.hyper_map = C.HyperMap() - + self.epsilon = epsilon self.decay = decay def construct(self, gradients): diff --git a/mindspore/nn/optim/sgd.py b/mindspore/nn/optim/sgd.py index cda5aa904a..388fe5db47 100755 --- a/mindspore/nn/optim/sgd.py +++ b/mindspore/nn/optim/sgd.py @@ -42,13 +42,19 @@ class SGD(Optimizer): Args: params (list[Parameter]): A list of parameter, which will be updated. The element in `params` should be class mindspore.Parameter. - learning_rate (float): A floating point value for the learning rate. Default: 0.1. - momentum (float): A floating point value the momentum. Default: 0. - dampening (float): A floating point value of dampening for momentum. Default: 0. - weight_decay (float): Weight decay (L2 penalty). Default: 0. + learning_rate (Union[float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is + Iterable or a Tensor and the dims of the Tensor is 1, + use dynamic learning rate, then the i-th step will + take the i-th value as the learning rate. + When the learning_rate is float or learning_rate is a Tensor + but the dims of the Tensor is 0, use fixed learning rate. + Other cases are not supported. Default: 0.1. + momentum (float): A floating point value the momentum. Default: 0.0. + dampening (float): A floating point value of dampening for momentum. Default: 0.0. + weight_decay (float): Weight decay (L2 penalty). Default: 0.0. nesterov (bool): Enables the Nesterov momentum. Default: False. loss_scale (float): A floating point value for the loss scale, which should be larger - than 0.0. Default: 1.0. + than 0.0. Default: 1.0. Inputs: - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`. diff --git a/mindspore/nn/wrap/cell_wrapper.py b/mindspore/nn/wrap/cell_wrapper.py index 641558921a..60718ec2b1 100644 --- a/mindspore/nn/wrap/cell_wrapper.py +++ b/mindspore/nn/wrap/cell_wrapper.py @@ -219,7 +219,7 @@ class DataWrapper(Cell): """ def __init__(self, network, dataset_types, dataset_shapes, queue_name): - super(DataWrapper, self).__init__(auto_prefix=False) + super(DataWrapper, self).__init__(auto_prefix=False, flags=network.get_flags()) self.get_next = P.GetNext(dataset_types, dataset_shapes, len(dataset_types), queue_name) self.network = network @@ -304,15 +304,19 @@ class WithEvalCell(Cell): >>> eval_net = nn.WithEvalCell(net, loss_fn) """ - def __init__(self, network, loss_fn): + def __init__(self, network, loss_fn, add_cast_fp32=False): super(WithEvalCell, self).__init__(auto_prefix=False) self._network = network self._loss_fn = loss_fn + self.add_cast_fp32 = add_cast_fp32 + def construct(self, data, label): outputs = self._network(data) - label = _mp_cast_helper(mstype.float32, label) - loss = self._loss_fn(F.cast(outputs, mstype.float32), label) + if self.add_cast_fp32: + label = _mp_cast_helper(mstype.float32, label) + outputs = F.cast(outputs, mstype.float32) + loss = self._loss_fn(outputs, label) return loss, outputs, label diff --git a/mindspore/nn/wrap/grad_reducer.py b/mindspore/nn/wrap/grad_reducer.py index 01346698ee..ee57297fe0 100644 --- a/mindspore/nn/wrap/grad_reducer.py +++ b/mindspore/nn/wrap/grad_reducer.py @@ -130,7 +130,7 @@ class DistributedGradReducer(Cell): >>> >>> device_id = int(os.environ["DEVICE_ID"]) >>> context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True, - >>> device_id=int(device_id), enable_hccl=True) + >>> device_id=int(device_id)) >>> init() >>> context.reset_auto_parallel_context() >>> context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL) diff --git a/mindspore/ops/_grad/grad_array_ops.py b/mindspore/ops/_grad/grad_array_ops.py index 35d37b3ada..b9281a7456 100644 --- a/mindspore/ops/_grad/grad_array_ops.py +++ b/mindspore/ops/_grad/grad_array_ops.py @@ -211,11 +211,11 @@ def get_bprop_slice(self): def bprop(x, begin, size, out, dout): dx = P.Pad(_slice_grad_pad(begin, size, shape_op(x)))(dout) - return (dx,) + return (dx, zeros_like(begin), zeros_like(size)) def bprop_gpu(x, begin, size, out, dout): dx = dx = G.SliceGrad()(dout, x, begin, size) - return (dx,) + return (dx, zeros_like(begin), zeros_like(size)) if context.get_context('device_target') == "GPU": return bprop_gpu @@ -262,7 +262,7 @@ def get_bprop_gather_v2(self): # Example: out_shape:(3,2,3) axis 2 -> (1,2,0) perm_2 = _generate_inverse_index(x_shp, axis) params_grad = transpose(params_grad, perm_2) - return params_grad, zeros_like(indices) + return params_grad, zeros_like(indices), zeros_like(axis) return bprop diff --git a/mindspore/ops/_grad/grad_comm_ops.py b/mindspore/ops/_grad/grad_comm_ops.py index 3a31c8aeec..97b8b3fdf3 100644 --- a/mindspore/ops/_grad/grad_comm_ops.py +++ b/mindspore/ops/_grad/grad_comm_ops.py @@ -67,11 +67,29 @@ def get_bprop_broad_cast(self): @bprop_getters.register(AllGather) def get_bprop_all_gather(self): """Generate bprop for AllGather""" - reduce_scatter_grad = ReduceScatter(ReduceOp.SUM, self.group) + all_gather_grad = ReduceScatter(ReduceOp.SUM, self.group) + if self.instance_name: + instance_name = "grad" + self.instance_name + all_gather_grad.set_prim_instance_name(instance_name) + + def bprop(x, out, dout): + dx = all_gather_grad(dout) + return (dx,) + + return bprop + + +@bprop_getters.register(ReduceScatter) +def get_bprop_reduce_scatter(self): + """Generate bprop for ReduceScatter""" + reduce_scatter_grad = AllGather(self.group) if self.instance_name: instance_name = "grad" + self.instance_name reduce_scatter_grad.set_prim_instance_name(instance_name) + if self.op != ReduceOp.SUM: + raise RuntimeError("The reducescatter bprop only support ReduceOp.SUM until now.") + def bprop(x, out, dout): dx = reduce_scatter_grad(dout) return (dx,) diff --git a/mindspore/ops/_grad/grad_math_ops.py b/mindspore/ops/_grad/grad_math_ops.py index c334050218..6e253b56e9 100755 --- a/mindspore/ops/_grad/grad_math_ops.py +++ b/mindspore/ops/_grad/grad_math_ops.py @@ -255,13 +255,10 @@ def get_bprop_floordiv(self): @bprop_getters.register(P.FloorMod) def get_bprop_floormod(self): """Grad definition for `FloorMod` operation.""" - div_op = P.FloorMod() - neg = P.Neg() - mul_op = P.Mul() def bprop(x, y, out, dout): - bc_x = div_op(dout, y) - bc_y = neg(mul_op(bc_x, out)) + bc_x = dout + bc_y = -dout * (x // y) return binop_grad_common(x, y, bc_x, bc_y) return bprop @@ -412,6 +409,7 @@ def get_bprop_reducesum(self): def get_bprop_cumsum(self): """Grad definition for `CumSum` operation.""" cumsum = P.CumSum(exclusive=self.exclusive, reverse=not self.reverse) + def bprop(x, axis, out, dout): return cumsum(dout, axis), zeros_like(axis) return bprop @@ -505,7 +503,7 @@ def get_bprop_reducemax(self): def bprop(x, axis, out, dout): dx = _min_or_max_grad(x, axis, out, dout) - return (dx,) + return (dx, zeros_like(axis)) return bprop @@ -528,7 +526,7 @@ def get_bprop_reducemin(self): def bprop(x, axis, out, dout): dx = _min_or_max_grad(x, axis, out, dout) - return (dx,) + return (dx, zeros_like(axis)) return bprop @@ -727,7 +725,7 @@ def get_bprop_acosh(self): input_grad = G.AcoshGrad() def bprop(x, out, dout): - dx = input_grad(x, dout) + dx = input_grad(out, dout) return (dx,) return bprop @@ -787,9 +785,10 @@ def get_bprop_atan2(self): """Generate bprop for Atan2""" square = P.Square() + def bprop(x, y, out, dout): tmp = dout / (square(x) + square(y)) - dx = tmp * y - dy = tmp * (-x) - return (dx, dy) + bc_dx = tmp * y + bc_dy = tmp * (-x) + return binop_grad_common(x, y, bc_dx, bc_dy) return bprop diff --git a/mindspore/ops/_grad/grad_nn_ops.py b/mindspore/ops/_grad/grad_nn_ops.py index 6db059a7bb..153abc0fb6 100755 --- a/mindspore/ops/_grad/grad_nn_ops.py +++ b/mindspore/ops/_grad/grad_nn_ops.py @@ -18,6 +18,7 @@ from mindspore.common import dtype as mstype from .. import functional as F from .. import operations as P from ..operations import _grad_ops as G +from ..operations import _inner_ops as inner from ..composite.multitype_ops.zeros_like_impl import zeros_like from .grad_base import bprop_getters @@ -29,6 +30,7 @@ def get_bprop_bias_add(self): def bprop(x, w, out, dout): return dout, bias_grad(dout) + return bprop @@ -49,18 +51,19 @@ def get_bprop_conv2d(self): dx = input_grad(dout, w, get_shape(x)) dw = filter_grad(dout, x, get_shape(w)) return dx, dw + return bprop -@bprop_getters.register(P.ExtractImagePatches) +@bprop_getters.register(inner.ExtractImagePatches) def get_bprop_extract_image_patches(self): """Grad definition for `ExtractImagePatches` operation.""" get_shape = P.Shape() reshape = P.Reshape() - extract_image_patches = P.ExtractImagePatches(ksizes=self.ksizes, - strides=self.strides, - rates=self.rates, - padding=self.padding) + extract_image_patches = inner.ExtractImagePatches(ksizes=self.ksizes, + strides=self.strides, + rates=self.rates, + padding=self.padding) concat = P.Concat(axis=-1) expand_dims = P.ExpandDims() scatter_nd = P.ScatterNd() @@ -104,6 +107,7 @@ def get_bprop_extract_image_patches(self): dx = transpose(dx, (2, 0, 1, 3)) return (dx,) + return bprop @@ -124,6 +128,7 @@ def get_bprop_depthwise_conv2d_native(self): dx = input_grad(get_shape(x), w, dout) dw = filter_grad(x, get_shape(w), dout) return dx, dw + return bprop @@ -133,11 +138,12 @@ def get_bprop_max_pool_with_argmax(self): maxpool_grad = G.MaxPoolGradWithArgmax( ksize=self.ksize, strides=self.strides, - padding=self.padding,) + padding=self.padding) def bprop(x, out, dout): dx = maxpool_grad(x, dout[0], out[1]) return (dx,) + return bprop @@ -152,6 +158,7 @@ def get_bprop_max_pool_grad(self): def bprop(x, out, dout): dx = maxpool_grad(x, out, dout) return (dx,) + return bprop @@ -192,6 +199,7 @@ def get_bprop_dropout_gen_mask(self): def bprop(shape, keep_prob, out, dout): return (zeros_like(shape), zeros_like(keep_prob)) + return bprop @@ -202,6 +210,7 @@ def get_bprop_dropout_do_mask(self): def bprop(x, y, keep_prob, out, dout): return (do_mask(dout, y, keep_prob), zeros_like(y), zeros_like(keep_prob)) + return bprop @@ -213,6 +222,7 @@ def get_bprop_relu(self): def bprop(x, out, dout): dx = input_grad(dout, out) return (dx,) + return bprop @@ -224,6 +234,7 @@ def get_bprop_relu6(self): def bprop(x, out, dout): dx = input_grad(dout, x) return (dx,) + return bprop @@ -236,6 +247,7 @@ def get_bprop_relu_v2(self): mask = out[1] dx = input_grad(dout[0], mask) return (dx,) + return bprop @@ -247,6 +259,7 @@ def get_bprop_hswish(self): def bprop(x, out, dout): dx = input_grad(dout, x) return (dx,) + return bprop @@ -258,6 +271,7 @@ def get_bprop_hsigmoid(self): def bprop(x, out, dout): dx = input_grad(dout, x) return (dx,) + return bprop @@ -267,8 +281,9 @@ def get_bprop_elu(self): input_grad = G.EluGrad() def bprop(x, out, dout): - dx = input_grad(dout, x) + dx = input_grad(dout, out) return (dx,) + return bprop @@ -280,6 +295,7 @@ def get_bprop_sigmoid(self): def bprop(x, out, dout): dx = input_grad(out, dout) return (dx,) + return bprop @@ -294,6 +310,7 @@ def get_bprop_softmax(self): def bprop(x, out, dout): dx = mul(sub(dout, sum_func(mul(dout, out), axis)), out) return (dx,) + return bprop @@ -305,6 +322,7 @@ def get_bprop_log_softmax(self): def bprop(x, out, dout): dx = logsoftmax_grad(out, dout) return (dx,) + return bprop @@ -316,6 +334,7 @@ def get_bprop_tanh(self): def bprop(x, out, dout): dx = logsoftmax_grad(out, dout) return (dx,) + return bprop @@ -327,6 +346,7 @@ def get_bprop_gelu(self): def bprop(x, out, dout): dx = input_grad(dout, x, out) return (dx,) + return bprop @@ -343,6 +363,7 @@ def get_bprop_fused_batch_norm(self): dscale = out[1] dbias = out[2] return dx, dscale, dbias, zeros_like(mean), zeros_like(variance) + return bprop @@ -364,6 +385,7 @@ def get_bprop_batch_norm(self): dscale = out[1] dbias = out[2] return dx, dscale, dbias, zeros_like(mean), zeros_like(variance) + return bprop @@ -375,6 +397,7 @@ def get_bprop_layer_norm(self): def bprop(x, gamma, beta, out, dout): dx, d_gamma, d_beta = layer_norm_grad(x, dout[0], out[2], out[1], gamma) return dx, d_gamma, d_beta + return bprop @@ -386,6 +409,7 @@ def get_bprop_l2normalize(self): def bprop(x, out, dout): dx = input_grad(x, out, dout) return (dx,) + return bprop @@ -398,6 +422,7 @@ def get_bprop_softmax_cross_entropy_with_logits(self): grad = out[1] grad = grad * expand(dout[0], -1) return grad, zeros_like(labels) + return bprop @@ -415,6 +440,7 @@ def get_bprop_sparse_softmax_cross_entropy_with_logits(self): grad = F.depend(grad, out) grad = grad * dout return grad, zeros_like(labels) + return bprop @@ -426,6 +452,7 @@ def get_bprop_resize_bilinear(self): def bprop(x, out, dout): dx = resize_grad(dout, x) return (dx,) + return bprop @@ -434,7 +461,8 @@ def get_bprop_onehot(self): """Grad definition for `OneHot` operation.""" def bprop(indices, depth, on_value, off_value, out, dout): - return zeros_like(indices), zeros_like(depth) + return zeros_like(indices), zeros_like(depth), zeros_like(on_value), zeros_like(off_value) + return bprop @@ -451,6 +479,7 @@ def get_bprop_top_kv2(self): updates = dout[0] shapes = shape_op(input_x) return scatter(indices, updates, shapes), zeros_like(k) + return bprop @@ -516,6 +545,7 @@ def get_bprop_lstm(self): dx, dhx, dcx = lstm_grad_data(y, dy, dhy, dcy, w, hx, cx, reserve, state) dw = lstm_grad_weight(F.depend(x, dx), hx, y, reserve, state) return dx, dhx, dcx, dw + return bprop @@ -527,6 +557,7 @@ def get_bprop_sigmoid_crossentropy_with_logits(self): def bprop(x, y, out, dout): dx = op(x, y, dout) return (dx, zeros_like(y)) + return bprop @@ -543,6 +574,7 @@ def get_bprop_pad(self): shp = shape_op(x) dx = P.Slice()(dout, begin, shp) return (dx,) + return bprop @@ -554,6 +586,7 @@ def get_bprop_mirror_pad(self): def bprop(x, paddings, out, dout): dx = mirror_pad_grad(dout, paddings, x) return (dx, zeros_like(paddings)) + return bprop diff --git a/mindspore/ops/_op_impl/akg/gpu/squeeze_grad.py b/mindspore/ops/_op_impl/akg/gpu/squeeze_grad.py index ef397ea0a7..17e45a327a 100644 --- a/mindspore/ops/_op_impl/akg/gpu/squeeze_grad.py +++ b/mindspore/ops/_op_impl/akg/gpu/squeeze_grad.py @@ -20,7 +20,6 @@ squeeze_grad_op_info = AkgRegOp("SqueezeGrad") \ .input(0, "y_grad") \ .output(0, "output") \ .attr("x_shape", "required", "listInt") \ - .attr("axis", "optional", "listInt") \ .dtype_format(DataType.F16_Default, DataType.F16_Default) \ .dtype_format(DataType.F32_Default, DataType.F32_Default) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/__init__.py b/mindspore/ops/_op_impl/tbe/__init__.py index 10b916a2c8..9dbe53049b 100644 --- a/mindspore/ops/_op_impl/tbe/__init__.py +++ b/mindspore/ops/_op_impl/tbe/__init__.py @@ -139,8 +139,14 @@ from .smooth_l1_loss_grad import _smooth_l1_loss_grad_tbe from .fused_mul_add import _fused_mul_add_tbe from .fused_mul_add_n import _fused_mul_add_n_tbe from .fused_mul_apply_momentum import _fused_mul_apply_momentum_tbe -from .fill_d import _fill_d_op_tbe +from .fill import _fill_op_tbe from .erf import _erf_op_tbe from .depthwise_conv2d import _depthwise_conv2d_tbe from .depthwise_conv2d_backprop_filter import _depthwise_conv2d_backprop_filter_tbe from .depthwise_conv2d_backprop_input import _depthwise_conv2d_backprop_input_tbe +from .greater_equal import _greater_equal_tbe +from .not_equal import _not_equal_tbe +from .floor_mod import _floor_mod_tbe +from .scatter_nd_update import _scatter_nd_update_tbe +from .avg_pool import _avg_pool_tbe +from .avg_pool_grad import _avg_pool_grad_tbe diff --git a/mindspore/ops/_op_impl/tbe/apply_momentum.py b/mindspore/ops/_op_impl/tbe/apply_momentum.py index 42ce9d0e41..deb8f0d387 100644 --- a/mindspore/ops/_op_impl/tbe/apply_momentum.py +++ b/mindspore/ops/_op_impl/tbe/apply_momentum.py @@ -30,22 +30,23 @@ apply_momentum_op_info = TBERegOp("ApplyMomentum") \ .input(3, "grad", False, "required", "all") \ .input(4, "momentum", False, "required", "all") \ .output(0, "var", False, "required", "all") \ + .output(1, "accum", False, "required", "all") \ .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, - DataType.F16_Default, DataType.F16_Default) \ + DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_Default, DataType.F16_5HD, - DataType.F16_Default, DataType.F16_5HD) \ + DataType.F16_Default, DataType.F16_5HD, DataType.F16_5HD) \ .dtype_format(DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_Default, DataType.F16_C1HWNCoC0, - DataType.F16_Default, DataType.F16_C1HWNCoC0) \ + DataType.F16_Default, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0) \ .dtype_format(DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_Default, DataType.F16_FracZ, - DataType.F16_Default, DataType.F16_FracZ) \ + DataType.F16_Default, DataType.F16_FracZ, DataType.F16_FracZ) \ .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, - DataType.F32_Default, DataType.F32_Default) \ + DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \ .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_Default, DataType.F32_5HD, - DataType.F32_Default, DataType.F32_5HD) \ + DataType.F32_Default, DataType.F32_5HD, DataType.F32_5HD) \ .dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_Default, DataType.F32_C1HWNCoC0, - DataType.F32_Default, DataType.F32_C1HWNCoC0) \ + DataType.F32_Default, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0) \ .dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_Default, DataType.F32_FracZ, - DataType.F32_Default, DataType.F32_FracZ) \ + DataType.F32_Default, DataType.F32_FracZ, DataType.F32_FracZ) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/assign.py b/mindspore/ops/_op_impl/tbe/assign.py index 41a9a0fecd..2fbd152c78 100644 --- a/mindspore/ops/_op_impl/tbe/assign.py +++ b/mindspore/ops/_op_impl/tbe/assign.py @@ -27,6 +27,7 @@ assign_op_info = TBERegOp("Assign") \ .input(1, "value", False, "required", "all") \ .output(0, "y", False, "required", "all") \ .dtype_format(DataType.I8_Default, DataType.I8_Default, DataType.I8_Default) \ + .dtype_format(DataType.BOOL_Default, DataType.BOOL_Default, DataType.BOOL_Default) \ .dtype_format(DataType.I8_5HD, DataType.I8_5HD, DataType.I8_5HD) \ .dtype_format(DataType.U8_Default, DataType.U8_Default, DataType.U8_Default) \ .dtype_format(DataType.U8_5HD, DataType.U8_5HD, DataType.U8_5HD) \ diff --git a/mindspore/ops/_op_impl/tbe/avg_pool.py b/mindspore/ops/_op_impl/tbe/avg_pool.py new file mode 100644 index 0000000000..5db5947b01 --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/avg_pool.py @@ -0,0 +1,39 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""AvgPool op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +avg_pool_op_info = TBERegOp("AvgPool") \ + .fusion_type("OPAQUE") \ + .async_flag(False) \ + .binfile_name("avg_pool.so") \ + .compute_cost(10) \ + .kernel_name("avg_pool") \ + .partial_flag(True) \ + .attr("ksize", "required", "listInt", "all") \ + .attr("strides", "required", "listInt", "all") \ + .attr("padding", "required", "str", "all") \ + .attr("data_format", "optional", "str", "all") \ + .input(0, "x", False, "required", "all") \ + .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.F16_5HD, DataType.F16_5HD) \ + .get_op_info() + + +@op_info_register(avg_pool_op_info) +def _avg_pool_tbe(): + """AvgPool TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/avg_pool_grad.py b/mindspore/ops/_op_impl/tbe/avg_pool_grad.py new file mode 100644 index 0000000000..693636edcd --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/avg_pool_grad.py @@ -0,0 +1,42 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""AvgPoolGrad op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +avg_pool_grad_op_info = TBERegOp("AvgPoolGrad") \ + .fusion_type("OPAQUE") \ + .async_flag(False) \ + .binfile_name("avg_pool_grad_d.so") \ + .compute_cost(10) \ + .kernel_name("avg_pool_grad_d") \ + .partial_flag(True) \ + .attr("x_origin", "required", "listInt", "all") \ + .attr("ksize", "required", "listInt", "all") \ + .attr("strides", "required", "listInt", "all") \ + .attr("padding", "required", "str", "all") \ + .attr("data_format", "optional", "str", "all") \ + .input(0, "input_grad", False, "required", "all") \ + .input(1, "mean_matrix", False, "optional", "all") \ + .input(2, "kernel_matrix", False, "optional", "all") \ + .output(0, "out_grad", True, "required", "all") \ + .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_C1HWNCoC0, DataType.F16_5HD) \ + .get_op_info() + + +@op_info_register(avg_pool_grad_op_info) +def _avg_pool_grad_tbe(): + """AvgPoolGrad TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/batchnorm.py b/mindspore/ops/_op_impl/tbe/batchnorm.py index 6dd79245a3..ddb24ac3e7 100644 --- a/mindspore/ops/_op_impl/tbe/batchnorm.py +++ b/mindspore/ops/_op_impl/tbe/batchnorm.py @@ -36,19 +36,18 @@ batch_norm_op_info = TBERegOp("BatchNorm") \ .output(2, "batch_variance", False, "required", "all") \ .output(3, "reserve_space_1", False, "optional", "all") \ .output(4, "reserve_space_2", False, "optional", "all") \ - .output(5, "reserve_space_3", False, "optional", "all") \ .dtype_format(DataType.F16_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F16_Default, DataType.F32_Default, DataType.F32_Default, - DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \ + DataType.F32_Default, DataType.F32_Default) \ .dtype_format(DataType.F16_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F16_5HD, DataType.F32_5HD, DataType.F32_5HD, - DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD) \ + DataType.F32_5HD, DataType.F32_5HD) \ .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, - DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \ + DataType.F32_Default, DataType.F32_Default) \ .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, - DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD) \ + DataType.F32_5HD, DataType.F32_5HD) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/fill_d.py b/mindspore/ops/_op_impl/tbe/fill.py similarity index 97% rename from mindspore/ops/_op_impl/tbe/fill_d.py rename to mindspore/ops/_op_impl/tbe/fill.py index 97c6b73cf5..90301f123b 100644 --- a/mindspore/ops/_op_impl/tbe/fill_d.py +++ b/mindspore/ops/_op_impl/tbe/fill.py @@ -16,7 +16,7 @@ """FillD op""" from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType -fill_d_op_info = TBERegOp("FillD") \ +fill_d_op_info = TBERegOp("Fill") \ .fusion_type("ELEMWISE") \ .async_flag(False) \ .binfile_name("fill_d.so") \ @@ -50,6 +50,6 @@ fill_d_op_info = TBERegOp("FillD") \ @op_info_register(fill_d_op_info) -def _fill_d_op_tbe(): +def _fill_op_tbe(): """FillD TBE register""" return diff --git a/mindspore/ops/_op_impl/tbe/floor_mod.py b/mindspore/ops/_op_impl/tbe/floor_mod.py new file mode 100644 index 0000000000..031f160e0a --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/floor_mod.py @@ -0,0 +1,38 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""FloorMod op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +floor_mod_op_info = TBERegOp("FloorMod") \ + .fusion_type("ELEMWISE") \ + .async_flag(False) \ + .binfile_name("floor_mod.so") \ + .compute_cost(10) \ + .kernel_name("floor_mod") \ + .partial_flag(True) \ + .input(0, "x1", False, "required", "all") \ + .input(1, "x2", False, "required", "all") \ + .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \ + .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \ + .get_op_info() + + +@op_info_register(floor_mod_op_info) +def _floor_mod_tbe(): + """FloorMod TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/greater_equal.py b/mindspore/ops/_op_impl/tbe/greater_equal.py new file mode 100644 index 0000000000..5609f15f18 --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/greater_equal.py @@ -0,0 +1,45 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""GreaterEqual op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +greater_equal_op_info = TBERegOp("GreaterEqual") \ + .fusion_type("OPAQUE") \ + .async_flag(False) \ + .binfile_name("greater_equal.so") \ + .compute_cost(10) \ + .kernel_name("greater_equal") \ + .partial_flag(True) \ + .input(0, "x1", False, "required", "all") \ + .input(1, "x2", False, "required", "all") \ + .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.I8_Default, DataType.I8_Default, DataType.BOOL_Default) \ + .dtype_format(DataType.I8_5HD, DataType.I8_5HD, DataType.BOOL_5HD) \ + .dtype_format(DataType.U8_Default, DataType.U8_Default, DataType.BOOL_Default) \ + .dtype_format(DataType.U8_5HD, DataType.U8_5HD, DataType.BOOL_5HD) \ + .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.BOOL_Default) \ + .dtype_format(DataType.I32_5HD, DataType.I32_5HD, DataType.BOOL_5HD) \ + .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.BOOL_Default) \ + .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.BOOL_5HD) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.BOOL_Default) \ + .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.BOOL_5HD) \ + .get_op_info() + + +@op_info_register(greater_equal_op_info) +def _greater_equal_tbe(): + """Greater TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/not_equal.py b/mindspore/ops/_op_impl/tbe/not_equal.py new file mode 100644 index 0000000000..bd801d9a40 --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/not_equal.py @@ -0,0 +1,45 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""NotEqual op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +not_equal_op_info = TBERegOp("NotEqual") \ + .fusion_type("ELEMWISE") \ + .async_flag(False) \ + .binfile_name("not_equal.so") \ + .compute_cost(10) \ + .kernel_name("not_equal") \ + .partial_flag(True) \ + .input(0, "x1", False, "required", "all") \ + .input(1, "x2", False, "required", "all") \ + .output(0, "y", False, "required", "all") \ + .dtype_format(DataType.I8_Default, DataType.I8_Default, DataType.BOOL_Default) \ + .dtype_format(DataType.I8_5HD, DataType.I8_5HD, DataType.BOOL_5HD) \ + .dtype_format(DataType.U8_Default, DataType.U8_Default, DataType.BOOL_Default) \ + .dtype_format(DataType.U8_5HD, DataType.U8_5HD, DataType.BOOL_5HD) \ + .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.BOOL_Default) \ + .dtype_format(DataType.I32_5HD, DataType.I32_5HD, DataType.BOOL_5HD) \ + .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.BOOL_Default) \ + .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.BOOL_5HD) \ + .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.BOOL_Default) \ + .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.BOOL_5HD) \ + .get_op_info() + + +@op_info_register(not_equal_op_info) +def _not_equal_tbe(): + """Equal TBE register""" + return diff --git a/mindspore/ops/_op_impl/tbe/reduce_mean.py b/mindspore/ops/_op_impl/tbe/reduce_mean.py index 47548e9036..67b96933a1 100644 --- a/mindspore/ops/_op_impl/tbe/reduce_mean.py +++ b/mindspore/ops/_op_impl/tbe/reduce_mean.py @@ -31,6 +31,7 @@ reduce_mean_op_info = TBERegOp("ReduceMean") \ .dtype_format(DataType.U8_Default, DataType.U8_Default) \ .dtype_format(DataType.F16_Default, DataType.F16_Default) \ .dtype_format(DataType.F32_Default, DataType.F32_Default) \ + .dtype_format(DataType.F16_5HD, DataType.F16_5HD) \ .get_op_info() diff --git a/mindspore/ops/_op_impl/tbe/relu6_grad.py b/mindspore/ops/_op_impl/tbe/relu6_grad.py index eaf3449fe7..5a9af9b425 100644 --- a/mindspore/ops/_op_impl/tbe/relu6_grad.py +++ b/mindspore/ops/_op_impl/tbe/relu6_grad.py @@ -17,7 +17,7 @@ from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType relu6_grad_op_info = TBERegOp("ReLU6Grad") \ - .fusion_type("ELEMWISE") \ + .fusion_type("OPAQUE") \ .async_flag(False) \ .binfile_name("relu6_grad.so") \ .compute_cost(10) \ diff --git a/mindspore/ops/_op_impl/tbe/scatter_nd.py b/mindspore/ops/_op_impl/tbe/scatter_nd.py index 6c9eae3ad4..168b34582f 100644 --- a/mindspore/ops/_op_impl/tbe/scatter_nd.py +++ b/mindspore/ops/_op_impl/tbe/scatter_nd.py @@ -37,5 +37,5 @@ scatter_nd_op_info = TBERegOp("ScatterNd") \ @op_info_register(scatter_nd_op_info) def _scatter_nd_tbe(): - """Conv2D TBE register""" + """ScatterNd TBE register""" return diff --git a/mindspore/ops/_op_impl/tbe/scatter_nd_update.py b/mindspore/ops/_op_impl/tbe/scatter_nd_update.py new file mode 100644 index 0000000000..df0996f26f --- /dev/null +++ b/mindspore/ops/_op_impl/tbe/scatter_nd_update.py @@ -0,0 +1,42 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""ScatterNdUpdate op""" +from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType + +scatter_nd_update_op_info = TBERegOp("ScatterNdUpdate") \ + .fusion_type("ELEMWISE") \ + .async_flag(False) \ + .binfile_name("scatter_nd_update.so") \ + .compute_cost(10) \ + .kernel_name("scatter_nd_update") \ + .partial_flag(True) \ + .attr("use_locking", "optional", "bool", "all") \ + .input(0, "var", False, "required", "all") \ + .input(1, "indices", False, "required", "all") \ + .input(1, "updates", False, "required", "all") \ + .output(0, "var", False, "required", "all") \ + .dtype_format(DataType.F16_Default, DataType.I32_Default, DataType.F16_Default, DataType.F16_Default) \ + .dtype_format(DataType.F32_Default, DataType.I32_Default, DataType.F32_Default, DataType.F32_Default) \ + .dtype_format(DataType.I8_Default, DataType.I32_Default, DataType.I8_Default, DataType.I8_Default) \ + .dtype_format(DataType.U8_Default, DataType.I32_Default, DataType.U8_Default, DataType.U8_Default,) \ + .dtype_format(DataType.BOOL_Default, DataType.I32_Default, DataType.BOOL_Default, DataType.BOOL_Default) \ + .get_op_info() + + +@op_info_register(scatter_nd_update_op_info) +def _scatter_nd_update_tbe(): + """ScatterNdUpdate TBE register""" + return diff --git a/mindspore/ops/composite/multitype_ops/_multitype_ops_util.py b/mindspore/ops/composite/multitype_ops/_multitype_ops_util.py index b3687c553c..d008f96648 100644 --- a/mindspore/ops/composite/multitype_ops/_multitype_ops_util.py +++ b/mindspore/ops/composite/multitype_ops/_multitype_ops_util.py @@ -15,13 +15,60 @@ """constexpr util""" +from functools import reduce +import numpy as np from ...primitive import constexpr +from ....common.tensor import Tensor +from ....common import dtype as mstype +from ...._extends.utils import Slice, Ellipsis_ + +@constexpr +def check_equal(param1, param2, msg="{},{}"): + """Checks whether the two parameters are equal or not.""" + if param1 != param2: + raise ValueError(msg.format(param1, param2)) + return param1 + + +@constexpr +def check_ellipsis_shape_size(data_shape, value_shape, data_size, value_size): + """Checks the shape and size of the sensor and value.""" + if data_shape == value_shape or data_size == value_size or value_size == 1: + return True + raise ValueError("The value(shape={}), can not assign to tensor(shape={}).".format(value_shape, data_shape)) + + +@constexpr +def check_tensor_setitem_index(index, element_type=None): + """Checks tuple index type of tensor assignment.""" + if index is None: + raise IndexError("Tensor's index cannot be None.") + # eg. Tensor[Slice] = u + if isinstance(index, Slice): + return True + # eg. Tensor[tuple] = u + if isinstance(index, tuple): + if not index: + raise IndexError("Tensor's index cannot be empty.") + # eg. Tensor[tuple(Slice...)] = u + if isinstance(index[0], (Slice, Ellipsis_, int)): + return True + raise IndexError("Index of type '{}' is not supported yet.".format(type(index[0]))) + # eg. Tensor[Tensor[dtype=bool]] = u + if index == mstype.tensor: + if element_type is None or element_type != mstype.bool_: + raise TypeError( + "The index of tensor should be a bool type tensor. " + "{} type is not supported yet.".format(element_type)) + return True + + raise IndexError("Index of type '{}' is not supported yet.".format(type(index))) @constexpr def is_same_type(inst, type_): """ - Check whether an object is an instance of a target type. + Checks whether an object is an instance of a target type. Inputs: inst (mindspore.dtype): Inspected type. @@ -33,13 +80,147 @@ def is_same_type(inst, type_): return inst == type_ +def slice_expand(input_slices, shape): + """ + Converts slice to indices. + + Inputs: + slices (Union[Slice, tuple[Slice]]): Slice tuple or slice. + shape (tuple): The shape of a sensor is an integer element tuple. + + Outputs: + tuple[list], This is expressed as (begins, ends, strides). + """ + begin = [] + end = [] + strides = [] + index = 0 + slices = None + # Slice or tuple(Slice...) + if isinstance(input_slices, Slice): + slices = (input_slices,) + elif isinstance(input_slices, (tuple, list)) and input_slices and isinstance(input_slices[0], (Slice, Ellipsis_)): + is_have_ellipsis = False + for _, element in enumerate(input_slices): + if isinstance(element, Ellipsis_): + is_have_ellipsis = True + break + if is_have_ellipsis: + slices = ellipsis2slice(input_slices, shape) + else: + slices = input_slices + else: + raise IndexError("Tensor's index type is not supported yet.") + + for s in slices: + start = 0 if (s.start is None) else s.start + stop = shape[index] if (s.end is None) else s.end + step = 1 if (s.step is None) else s.step + begin.append(start) + end.append(stop) + strides.append(step) + index += 1 + while index < len(shape): + begin.append(0) + end.append(shape[index]) + strides.append(1) + index += 1 + return begin, end, strides + + +def ellipsis2slice(input_, shape): + """Converts ellipsis to slice.""" + input_slice = input_ + result = [] + if isinstance(input_, Ellipsis_): + input_slice = (input_,) + ell_count = 0 + for _, element in enumerate(input_slice): + if not isinstance(element, Ellipsis_): + result.append(element) + continue + ell_count += 1 + if ell_count > 1: + raise IndexError("There cannot be more than one ellisis (...) in the index of the tensor, " + "but it is currently {}".format(input_slice)) + for _ in range(len(shape) - len(input_slice) + 1): + result.append(Slice(None, None, None)) + return tuple(result) + + @constexpr -def error_msg(msg="", format_values=""): +def slice2indices(input_slices, shape): """ - Used to throw exception information. + Converts slice to indices. Inputs: - msg (str): information content. + slices (Union[Slice, tuple[Slice]]): Slice tuple or slice. + shape (tuple): The shape of a tensor is an integer element tuple. + + Outputs: + Tensor, the shape is (n, 1). """ + begin, end, strides = slice_expand(input_slices, shape) + np_r = [] + for i, element in enumerate(shape): + s = begin[i] if (begin[i] >= 0) else (element + begin[i]) + e = end[i] if (end[i] >= 0) else (element + end[i]) + np_r.append(np.r_[s:e:strides[i]]) + # Reference: np.ravel_multi_index((np.ix_(np.r_[1:3:1], np.r_[0:4:1], np.r_[4:0:-1])), a.shape) + np_ix = np.ix_(*np_r) + ravel = np.ravel_multi_index(np_ix, shape) + ravel = Tensor(ravel.reshape(-1, 1), dtype=mstype.int32) + return ravel + +@constexpr +def check_indices(indices_size, index): + """Checks indices whether is empty.""" + if indices_size < 1: + raise IndexError("The tensor's index is unreasonable. index:{}".format(index)) + return indices_size - raise ValueError(msg.format(*format_values)) + +@constexpr +def check_indices_value_size(indices_size, value_size): + """Checks if the sizes are already matched.""" + if value_size < 1: + raise ValueError("The value assigned to tensor cannot be empty.") + if value_size > 1: + if value_size != indices_size: + raise ValueError( + "The value given to tensor does not match the index size," + " value size:{}, indics size:{}".format(value_size, indices_size)) + return value_size + +@constexpr +def integer_to_indices(index, shape): + """Converts int or tuple[int] to indices.""" + size = reduce(lambda x, y: x * y, shape) + range_ = np.arange(size).reshape(shape) + value = range_[index] + value = value.reshape(-1, 1) + return Tensor(value, dtype=mstype.int32) + +@constexpr +def tuple_element_is_slice(indexs): + """Judges tuple element type.""" + if not indexs: + raise IndexError("Tensor's index cannot be empty.") + if isinstance(indexs, tuple): + for _, ele in enumerate(indexs): + if not isinstance(ele, Slice): + return False + return True + return False + +@constexpr +def tuple_element_is_int(indexs): + """Judges tuple element type.""" + if not indexs: + raise IndexError("Tensor's index cannot be empty.") + if isinstance(indexs, tuple): + for _, ele in enumerate(indexs): + if not isinstance(ele, int): + return False + return True + return False diff --git a/mindspore/ops/composite/multitype_ops/getitem_impl.py b/mindspore/ops/composite/multitype_ops/getitem_impl.py index 56617c06a8..540dd28b37 100644 --- a/mindspore/ops/composite/multitype_ops/getitem_impl.py +++ b/mindspore/ops/composite/multitype_ops/getitem_impl.py @@ -147,6 +147,21 @@ def _tensor_getitem_by_number(data, number_index): return _tensor_slice(data, number_index) +@getitem.register("Tensor", "None") +def _tensor_getitem_by_none(data, index): + """ + Getting item of tensor by None. + + Inputs: + data (Tensor): A tensor. + index (None): None. + + Outputs: + Tensor, element type is as same as the element type of data. + """ + return _tensor_slice(data, index) + + @getitem.register("Tensor", "Slice") def _tensor_getitem_by_slice(data, slice_index): """ diff --git a/mindspore/ops/composite/multitype_ops/setitem_impl.py b/mindspore/ops/composite/multitype_ops/setitem_impl.py index 31c96932c5..2f44bdc5ba 100644 --- a/mindspore/ops/composite/multitype_ops/setitem_impl.py +++ b/mindspore/ops/composite/multitype_ops/setitem_impl.py @@ -25,15 +25,14 @@ setitem = base.MultitypeFuncGraph('setitem') @setitem.register("List", "Number", "String") def _list_setitem_with_string(data, number_index, value): """ - Assign value to list. + Assigns value to list. Inputs: data (list): Data of type lis. number_index (Number): Index of data. - value (String): Value given. Outputs: - List, type is same as the element type of data. + list, type is same as the element type of data. """ return F.list_setitem(data, number_index, value) @@ -41,7 +40,7 @@ def _list_setitem_with_string(data, number_index, value): @setitem.register("List", "Number", "Number") def _list_setitem_with_number(data, number_index, value): """ - Assign value to list. + Assigns value to list. Inputs: data (list): Data of type lis. @@ -49,7 +48,7 @@ def _list_setitem_with_number(data, number_index, value): value (Number): Value given. Outputs: - List, type is same as the element type of data. + list, type is same as the element type of data. """ return F.list_setitem(data, number_index, value) @@ -57,7 +56,7 @@ def _list_setitem_with_number(data, number_index, value): @setitem.register("List", "Number", "Tensor") def _list_setitem_with_Tensor(data, number_index, value): """ - Assign value to list. + Assigns value to list. Inputs: data (list): Data of type lis. @@ -65,7 +64,7 @@ def _list_setitem_with_Tensor(data, number_index, value): value (Tensor): Value given. Outputs: - List, type is same as the element type of data. + list, type is same as the element type of data. """ return F.list_setitem(data, number_index, value) @@ -73,15 +72,15 @@ def _list_setitem_with_Tensor(data, number_index, value): @setitem.register("List", "Number", "List") def _list_setitem_with_List(data, number_index, value): """ - Assign value to list. + Assigns value to list. Inputs: data (list): Data of type lis. number_index (Number): Index of data. - value (List): Value given. + value (list): Value given. Outputs: - List, type is same as the element type of data. + list, type is same as the element type of data. """ return F.list_setitem(data, number_index, value) @@ -89,15 +88,15 @@ def _list_setitem_with_List(data, number_index, value): @setitem.register("Dictionary", "String", "Tensor") def _dict_setitem_with_tensor(data, key, value): """ - Assign value to dictionary. + Assigns value to dictionary. Inputs: - data (Dictionary): Data of type dict. + data (dict): Data of type dict. key (str): Key of the data. value (Tensor): Value given. Outputs: - Dict, type is as same as the element type of data. + dict, type is as same as the element type of data. """ return F.dict_setitem(data, key, value) @@ -105,15 +104,15 @@ def _dict_setitem_with_tensor(data, key, value): @setitem.register("Dictionary", "String", "Number") def _dict_setitem_with_number(data, key, value): """ - Assign value to dictionary. + Assigns value to dictionary. Inputs: - data (Dictionary): Data of type dict. + data (dict): Data of type dict. key (str): Key of the data. value (Number): Value given. Outputs: - Dict, type is as same as the element type of data. + dict, type is as same as the element type of data. """ return F.dict_setitem(data, key, value) @@ -138,25 +137,23 @@ def _tensor_setitem_by_tensor_v1(data, index, value_tensor): Outputs: Tensor, element type and shape is same as data. """ + result = None index_dtype = F.dtype(index) index_shape = F.shape(index) - is_bool = mult_util.is_same_type(index_dtype, mstype.bool_) - if not is_bool: - return mult_util.error_msg( - "The tensor index should be a bool type tensor. {} type tensor is not supported yet.", (index_dtype,)) - data_shape = F.shape(data) - if index_shape != data_shape: - return mult_util.error_msg( - "The tensor(shape={}) and tensor index(shape={}) should be the same shape.", (data_shape, index_shape)) - size = F.size(value_tensor) - if size != 1: - return mult_util.error_msg( - "When assign value is a tensor, its size should be 1, but current size is {}.", (size,)) - dtype = F.dtype(data) - u_cast = F.cast(value_tensor, dtype) - one_data = F.ones_like(data) - u = F.tensor_mul(one_data, u_cast) - return F.select(index, u, data) + check_result = mult_util.check_tensor_setitem_index(mstype.tensor, index_dtype) + if check_result: + data_shape = F.shape(data) + data_shape = mult_util.check_equal(data_shape, index_shape, + "The tensor(shape={}) and tensor index(shape={}) should be the same shape.") + size = F.size(value_tensor) + size = mult_util.check_equal(1, size, + "When assign value is a tensor, its size should be {}, but current size is {}.") + dtype = F.dtype(data) + u_cast = F.cast(value_tensor, dtype) + one_data = F.ones_like(data) + u = F.tensor_mul(one_data, u_cast) + result = F.select(index, u, data) + return result @setitem.register("Tensor", "Tensor", "Number") @@ -179,16 +176,216 @@ def _tensor_setitem_by_tensor_v2(data, index, value): Outputs: Tensor, element type and shape is same as data. """ + result = None index_dtype = F.dtype(index) index_shape = F.shape(index) - is_bool = mult_util.is_same_type(index_dtype, mstype.bool_) - if not is_bool: - return mult_util.error_msg( - "The tensor index should be a bool type tensor. {} type tensor is not supported yet.", (index_dtype,)) - shape = F.shape(data) - if index_shape != shape: - return mult_util.error_msg( - "The tensor(shape={}) and tensor index(shape={}) should be the same shape.", (shape, index_shape)) - dtype = F.dtype(data) - u = F.fill(dtype, shape, value) - return F.select(index, u, data) + check_result = mult_util.check_tensor_setitem_index(mstype.tensor, index_dtype) + if check_result: + shape = F.shape(data) + shape = mult_util.check_equal( + shape, index_shape, "The tensor(shape={}) and tensor index(shape={}) should be the same shape.") + dtype = F.dtype(data) + u = F.fill(dtype, shape, value) + result = F.select(index, u, data) + return result + + +@setitem.register("Tensor", "Slice", "Tensor") +def _tensor_setitem_with_slice_v3(data, input_slice, value): + """ + Tensor assignment. + + Note: + Syntax support: A[Slice] = U + Restraint condition: A is a Tensor + Slice like "1:3" + U is a Tensor(size=1) or Tensor(size>1) + + Inputs: + data (Tensor): Assigned tensor. + input_slice (Slice): Slice expression. + value (Number): Assignment value. + + Outputs: + Tensor, element type and shape is same as data. + """ + return _tensor_assgin_tensor(data, input_slice, value) + + +@setitem.register("Tensor", "Tuple", "Tensor") +def _tensor_setitem_with_slice_v4(data, input_slice, value): + """ + Tensor assignment. + + Note: + Syntax support: A[tuple(Slice)] = U, and A[tuple(Number)] = U + Restraint condition: A is a Tensor + Slice like "1:3, ::, :4:-1" + U is a Tensor(size=1) or Tensor(size>1) + + Inputs: + data (Tensor): Assigned tensor. + input_slice (Union[tuple[Slice], tuple[Number]]): Slice expression. + value (Number): Assignment value. + + Outputs: + Tensor, element type and shape is same as data. + """ + return _tensor_assgin_tensor(data, input_slice, value) + + +def _tensor_assgin_tensor(data, input_slice, value): + """Assigns a tensor value to the tensor by slice.""" + result = None + check_result = mult_util.check_tensor_setitem_index(input_slice) + if check_result: + data_shape = F.shape(data) + indices = mult_util.slice2indices(input_slice, data_shape) + is_tuple_int = mult_util.tuple_element_is_int(input_slice) + if is_tuple_int: + indices = mult_util.integer_to_indices(input_slice, data_shape) + result = _tensor_indices_tensor(data, data_shape, input_slice, indices, value) + return result + + +def _tensor_indices_tensor(data, data_shape, index, indices, value): + """Assigns a tensor value to the tensor.""" + data_size = F.size(data) + data_dtype = F.dtype(data) + indices_size = F.size(indices) + indices_size = mult_util.check_indices(indices_size, index) + update = F.fill(mstype.int32, (indices_size,), 1) + condition_1d = F.scatter_nd(indices, update, (data_size,)) + condition = F.reshape(condition_1d, data_shape) + condition = F.cast(condition, mstype.bool_) + value_fill = None + value_size = F.size(value) + + value_size = mult_util.check_indices_value_size(indices_size, value_size) + if value_size == 1: + value_fill = F.fill(data_dtype, (indices_size,), 1) + value = F.cast(value, data_dtype) + value_fill = F.tensor_mul(value_fill, value) + elif value_size > 1: + value_fill = F.reshape(value, (indices_size,)) + value_1d = F.scatter_nd(indices, value_fill, (data_size,)) + u = F.reshape(value_1d, data_shape) + return F.select(condition, u, data) + +@setitem.register("Tensor", "Slice", "Number") +def _tensor_setitem_with_slice_v1(data, input_slice, value): + """ + Tensor assignment. + + Note: + Syntax support: A[Slice] = u + Restraint condition: A is a Tensor. + Slice like "1:3" + u is a scalar + + Inputs: + data (Tensor): Assigned tensor. + input_slice (Slice): slice expression. + value (Number): Assignment value. + + Outputs: + Tensor, element type and shape is same as data. + """ + return _tensor_assgin_number(data, input_slice, value) + + +@setitem.register("Tensor", "Tuple", "Number") +def _tensor_setitem_with_slice_v2(data, input_slice, value): + """ + Tensor assignment. + + Note: + Syntax support: A[tuple(Slice)] = u, and A[tuple(Number)] = u + Restraint condition: A is a Tensor. + Slice like "1:3, ::, :4:-1" + u is a scalar + + Inputs: + data (Tensor): Assigned tensor. + input_slice (Union[tuple[Slice], tuple[Number]]): slice expression. + value (Number): Assignment value. + + Outputs: + Tensor, element type and shape is same as data. + """ + return _tensor_assgin_number(data, input_slice, value) + + +def _tensor_assgin_number(data, input_slice, value): + """Givens a scalar assign to tensor by slice""" + check_result = mult_util.check_tensor_setitem_index(input_slice) + result = None + if check_result: + data_shape = F.shape(data) + indices = mult_util.slice2indices(input_slice, data_shape) + is_tuple_int = mult_util.tuple_element_is_int(input_slice) + if is_tuple_int: + indices = mult_util.integer_to_indices(input_slice, data_shape) + result = _tensor_indices_number(data, data_shape, input_slice, indices, value) + return result + + +def _tensor_indices_number(data, data_shape, index, indices, value): + """Assigns a scalar value to the tensor.""" + data_size = F.size(data) + data_dtype = F.dtype(data) + indices_size = F.size(indices) + indices_size = mult_util.check_indices(indices_size, index) + update = F.fill(mstype.int32, (indices_size,), 1) + condition_1d = F.scatter_nd(indices, update, (data_size,)) + condition = F.reshape(condition_1d, data_shape) + condition = F.cast(condition, mstype.bool_) + value_fill = F.fill(data_dtype, (indices_size,), value) + value_1d = F.scatter_nd(indices, value_fill, (data_size,)) + u = F.reshape(value_1d, data_shape) + return F.select(condition, u, data) + + +@setitem.register("Tensor", "Number", "Number") +def _tensor_setitem_with_int_v1(data, index, value): + """Syntax: A[1] = 3""" + data_shape = F.shape(data) + indices = mult_util.integer_to_indices(index, data_shape) + return _tensor_indices_number(data, data_shape, index, indices, value) + + +@setitem.register("Tensor", "Number", "Tensor") +def _tensor_setitem_with_int_v2(data, index, value): + """Syntax: A[1] = Tensor""" + data_shape = F.shape(data) + indices = mult_util.integer_to_indices(index, data_shape) + return _tensor_indices_tensor(data, data_shape, index, indices, value) + + +@setitem.register("Tensor", "Ellipsis", "Number") +def _tensor_setitem_with_ellipsis_v1(data, index, value): + """Syntax: A[...] = number.""" + data_shape = F.shape(data) + data_dtype = F.dtype(data) + return F.fill(data_dtype, data_shape, value) + + +@setitem.register("Tensor", "Ellipsis", "Tensor") +def _tensor_setitem_with_ellipsis_v2(data, index, value): + """Syntax: A[...] = Tensor.""" + result = None + data_shape = F.shape(data) + data_dtype = F.dtype(data) + data_size = F.size(data) + value_shape = F.shape(value) + value_size = F.size(value) + check_result = mult_util.check_ellipsis_shape_size(data_shape, value_shape, data_size, value_size) + if check_result: + if data_size == value_size: + result = F.reshape(value, data_shape) + result = F.cast(result, data_dtype) + elif value_size == 1: + param1 = F.fill(data_dtype, data_shape, 1) + param2 = F.cast(value, data_dtype) + result = F.tensor_mul(param1, param2) + return result diff --git a/mindspore/ops/composite/multitype_ops/zeros_like_impl.py b/mindspore/ops/composite/multitype_ops/zeros_like_impl.py index 1c1a4f1d12..1308bfd62a 100644 --- a/mindspore/ops/composite/multitype_ops/zeros_like_impl.py +++ b/mindspore/ops/composite/multitype_ops/zeros_like_impl.py @@ -31,6 +31,10 @@ def _zeros_like_scala(x): """Returns 0 which has the same dtype as x where x is a scalar.""" return 0 +@zeros_like_leaf.register("Bool") +def _zeros_like_bool(x): + """Returns False if x is a bool.""" + return False newenv = base.EnvInstance_() diff --git a/mindspore/ops/functional.py b/mindspore/ops/functional.py index c5b8752ae2..5f7cabc54d 100644 --- a/mindspore/ops/functional.py +++ b/mindspore/ops/functional.py @@ -56,6 +56,7 @@ tensor_pow = P.Pow() tensor_mod = P.FloorMod() strided_slice = P.StridedSlice() same_type_shape = P.SameTypeShape() +check_bprop = P.CheckBprop() equal = P.Equal() not_equal = P.NotEqual() assign_sub = P.AssignSub() @@ -68,6 +69,7 @@ tuple_to_array = P.TupleToArray() scalar_cast = P.ScalarCast() print_ = P.Print() expand_dims = P.ExpandDims() +scatter_nd = P.ScatterNd() tuple_setitem = Primitive('tuple_setitem') tuple_getitem = Primitive('tuple_getitem') @@ -143,3 +145,5 @@ stop_gradient = Primitive("stop_gradient") tensor_operator_registry.register('__add__', tensor_add) tensor_operator_registry.register('__mul__', tensor_mul) tensor_operator_registry.register('__div__', tensor_div) +#ms cannot support Tensor(True) compare +tensor_operator_registry.register('__eq__', equal) diff --git a/mindspore/ops/operations/__init__.py b/mindspore/ops/operations/__init__.py index c75c2031d7..d83f5accd0 100644 --- a/mindspore/ops/operations/__init__.py +++ b/mindspore/ops/operations/__init__.py @@ -57,7 +57,7 @@ from .nn_ops import (LSTM, SGD, Adam, ApplyMomentum, BatchNorm, Gelu, Elu, GetNext, L2Normalize, LayerNorm, L2Loss, LogSoftmax, - MaxPool, ExtractImagePatches, + MaxPool, AvgPool, Conv2DBackpropInput, ConfusionMulGrad, MaxPoolWithArgmax, OneHot, Pad, MirrorPad, PReLU, ReLU, ReLU6, ReLUV2, HSwish, HSigmoid, ResizeBilinear, Sigmoid, @@ -67,7 +67,7 @@ from .nn_ops import (LSTM, SGD, Adam, ApplyMomentum, BatchNorm, SparseSoftmaxCrossEntropyWithLogits, Tanh, TopK, BinaryCrossEntropy, SparseApplyAdagrad, LARSUpdate, ApplyFtrl, ApplyRMSProp, ApplyCenteredRMSProp) -from .other_ops import Assign, IOU, BoundingBoxDecode, BoundingBoxEncode, CheckValid, MakeRefKey +from .other_ops import Assign, IOU, BoundingBoxDecode, BoundingBoxEncode, CheckValid, MakeRefKey, CheckBprop from . import _quant_ops from ._quant_ops import * @@ -89,7 +89,6 @@ __all__ = [ 'Sqrt', 'Square', 'Conv2D', - 'ExtractImagePatches', 'Flatten', 'MaxPoolWithArgmax', 'FusedBatchNorm', @@ -179,6 +178,7 @@ __all__ = [ 'GeSwitch', 'Merge', 'SameTypeShape', + 'CheckBprop', 'CheckValid', 'BoundingBoxEncode', 'BoundingBoxDecode', diff --git a/mindspore/ops/operations/_grad_ops.py b/mindspore/ops/operations/_grad_ops.py index 782784ca00..76d464de16 100644 --- a/mindspore/ops/operations/_grad_ops.py +++ b/mindspore/ops/operations/_grad_ops.py @@ -59,6 +59,23 @@ class ACosGrad(PrimitiveWithInfer): return x +class AcoshGrad(PrimitiveWithInfer): + """Performs grad of Acosh operation.""" + + @prim_attr_register + def __init__(self): + """init AcoshGrad""" + + def infer_shape(self, x, dout): + validator.check("x shape", x, "dout shape", dout, Rel.EQ, self.name) + return x + + def infer_dtype(self, x, dout): + args = {"x": x, "dout": dout} + validator.check_tensor_type_same(args, mstype.number_type, self.name) + return x + + class BatchNormGrad(PrimitiveWithInfer): """Performs grad of BatchNorm operation.""" @@ -652,6 +669,9 @@ class PReLUGrad(PrimitiveWithInfer): r""" Gradients of PReLU operation. + Note: + 1-dimensional input_x is not supported. + Inputs: - **y_backprop** (Tensor) - Representing the backprop of the next layer. - **input_x** (Tensor) - Should be the input `input_x` of forward operator PRelu. @@ -666,6 +686,8 @@ class PReLUGrad(PrimitiveWithInfer): pass def infer_shape(self, y_backprop_shape, A_shape, w_shape): + if len(A_shape) == 1: + raise ValueError(f'For \'{self.name}\' input_x rank 1 is not supported.') return y_backprop_shape, w_shape def infer_dtype(self, y_backprop_dtype, A_dtype, w_dtype): diff --git a/mindspore/ops/operations/_inner_ops.py b/mindspore/ops/operations/_inner_ops.py new file mode 100644 index 0000000000..632f9c0a20 --- /dev/null +++ b/mindspore/ops/operations/_inner_ops.py @@ -0,0 +1,98 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Inner operators.""" + +from ..._checkparam import Validator as validator +from ...common import dtype as mstype +from ..primitive import PrimitiveWithInfer, prim_attr_register + + +class ExtractImagePatches(PrimitiveWithInfer): + """ + Extract patches from images. + The input tensor must be a 4-D tensor and the data format is NHWC. + + Args: + ksizes (Union[tuple[int], list[int]]): The size of sliding window, should be a tuple or list of int, + and the format is [1, ksize_row, ksize_col, 1]. + strides (Union[tuple[int], list[int]]): Distance between the centers of the two consecutive patches, + should be a tuple or list of int, and the format is [1, stride_row, stride_col, 1]. + rates (Union[tuple[int], list[int]]): In each extracted patch, the gap between the corresponding dim + pixel positions, should be a tuple or list of int, and the format is [1, rate_row, rate_col, 1]. + padding (str): The type of padding algorithm, is a string whose value is "same" or "valid", + not case sensitive. Default: "valid". + + - same: Means that the patch can take the part beyond the original image, and this part is filled with 0. + + - valid: Means that the patch area taken must be completely contained in the original image. + + Inputs: + - **input_x** (Tensor) - A 4-D tensor whose shape is [in_batch, in_row, in_col, in_depth] and + data type is int8, float16, uint8. + + Outputs: + Tensor, a 4-D tensor whose data type is same as 'input_x', + and the shape is [out_batch, out_row, out_col, out_depth], the out_batch is same as the in_batch. + """ + + @prim_attr_register + def __init__(self, ksizes, strides, rates, padding="valid"): + """init""" + def _check_tuple_or_list(arg_name, arg_val, prim_name): + validator.check_value_type(f"{arg_name}s", ksizes, [tuple, list], self.name) + if len(arg_val) != 4 or arg_val[0] != 1 or arg_val[3] != 1: + raise ValueError(f"For \'{prim_name}\' the format of {arg_name}s should be [1, {arg_name}_row, " + f"{arg_name}_col, 1], but got {arg_val}.") + if not isinstance(arg_val[1], int) or not isinstance(arg_val[2], int) or arg_val[1] < 1 or arg_val[2] < 1: + raise ValueError(f"For '{prim_name}' the {arg_name}_row and {arg_name}_col in {arg_name}s should be an " + f"positive integer number, but got {arg_name}_row is {arg_val[1]}, {arg_name}_col " + f"is {arg_val[2]}") + + _check_tuple_or_list("ksize", ksizes, self.name) + _check_tuple_or_list("stride", strides, self.name) + _check_tuple_or_list("rate", rates, self.name) + self.padding = validator.check_string('padding', padding.upper(), ['VALID', 'SAME'], self.name) + self.add_prim_attr("padding", self.padding) + + def infer_shape(self, input_x): + """infer shape""" + in_batch, in_row, in_col, in_depth = input_x + _, ksize_row, ksize_col, _ = self.ksizes + _, stride_row, stride_col, _ = self.strides + _, rate_row, rate_col, _ = self.rates + if len(input_x) != 4: + raise ValueError("The `input_x` should be a 4-D tensor, " + f"but got a {len(input_x)}-D tensor whose shape is {input_x}") + + out_batch = in_batch + out_depth = ksize_row * ksize_col * in_depth + + if self.padding == "VALID": + out_row = \ + (in_row - (ksize_row + (ksize_row - 1) * (rate_row - 1))) // stride_row + 1 + out_col = \ + (in_col - (ksize_col + (ksize_col - 1) * (rate_col - 1))) // stride_col + 1 + else: + out_row = (in_row - 1) // stride_row + 1 + out_col = (in_col - 1) // stride_col + 1 + + out_shape = [out_batch, out_row, out_col, out_depth] + return out_shape + + def infer_dtype(self, input_x): + """infer dtype""" + validator.check_tensor_type_same({"input_x": input_x}, (mstype.int8, mstype.float16, mstype.float32), self.name) + return input_x diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py index 21dbf81730..aca87cab66 100644 --- a/mindspore/ops/operations/array_ops.py +++ b/mindspore/ops/operations/array_ops.py @@ -24,7 +24,9 @@ import itertools import numbers import numpy as np -from ..._checkparam import ParamValidator as validator +from ..._c_expression import signature_rw as sig_rw +from ..._c_expression import signature_kind as sig_kind +from ..._checkparam import Validator as validator from ..._checkparam import Rel from ...common import dtype as mstype from ...common.tensor import Tensor @@ -32,12 +34,12 @@ from ..operations.math_ops import _infer_shape_reduce from .._utils import _get_concat_offset from ..primitive import Primitive, PrimitiveWithInfer, prim_attr_register -def _check_infer_attr_reduce(axis, keep_dims): - validator.check_type('keep_dims', keep_dims, [bool]) - validator.check_type('axis', axis, [int, tuple]) +def _check_infer_attr_reduce(axis, keep_dims, prim_name): + validator.check_value_type('keep_dims', keep_dims, [bool], prim_name) + validator.check_value_type('axis', axis, [int, tuple], prim_name) if isinstance(axis, tuple): for index, value in enumerate(axis): - validator.check_type('axis[%d]' % index, value, [int]) + validator.check_value_type('axis[%d]' % index, value, [int], prim_name) class ExpandDims(PrimitiveWithInfer): @@ -74,13 +76,11 @@ class ExpandDims(PrimitiveWithInfer): self.init_prim_io_names(inputs=['x', 'axis'], outputs=['output']) def __infer__(self, x, axis): - validator.check_subclass("input_x", x['dtype'], mstype.tensor) + validator.check_subclass("input_x", x['dtype'], mstype.tensor, self.name) x_shape = list(x['shape']) axis_v = axis['value'] rank = len(x_shape) - validator.check_const_input('axis', axis_v) - validator.check_type("axis", axis_v, [int]) - validator.check_int_range('axis', axis_v, -rank - 1, rank, Rel.INC_BOTH) + validator.check_int_range('axis', axis_v, -rank - 1, rank, Rel.INC_BOTH, self.name) if axis_v < 0: axis_v = rank + 1 + axis_v x_shape.insert(axis_v, 1) @@ -110,7 +110,7 @@ class DType(PrimitiveWithInfer): """init DType""" def __infer__(self, x): - validator.check_subclass("input_x", x['dtype'], mstype.tensor) + validator.check_subclass("input_x", x['dtype'], mstype.tensor, self.name) out = {'shape': (), 'dtype': mstype.type_type, 'value': x['dtype'].element_type()} @@ -144,19 +144,17 @@ class SameTypeShape(PrimitiveWithInfer): def __call__(self, x, y): """run in PyNative mode""" - if x.dtype() != y.dtype(): - raise TypeError(f"The {x} and {y} should be same dtype.") - if x.shape() != y.shape(): - raise TypeError(f"The {x} and {y} should have same shape.") + validator.check_subclass('x', x.dtype(), mstype.tensor, self.name) + validator.check_subclass('y', y.dtype(), mstype.tensor, self.name) + validator.check('x dtype', x.dtype(), 'y dtype', y.dtype(), Rel.EQ, self.name, TypeError) + validator.check('x shape', x.shape(), 'y shape', y.shape(), Rel.EQ, self.name) return x def __infer__(self, x, y): - if x['dtype'] != y['dtype']: - raise TypeError(f"The {x} and {y} should be same dtype," - f" but got {x['dtype']} {y['dtype']}.") - if x['shape'] != y['shape']: - raise ValueError(f"The {x} and {y} should be same shape," - f" but got {x['shape']} {y['shape']}.") + validator.check_subclass('x', x['dtype'], mstype.tensor, self.name) + validator.check_subclass('y', y['dtype'], mstype.tensor, self.name) + validator.check('x dtype', x['dtype'], 'y dtype', y['dtype'], Rel.EQ, self.name, TypeError) + validator.check('x shape', x['shape'], 'y shape', y['shape'], Rel.EQ, self.name) return x @@ -191,8 +189,8 @@ class Cast(PrimitiveWithInfer): src_type = x['dtype'] dst_type = t['value'] - validator.check_subclass("input_x", src_type, [mstype.tensor, mstype.number]) - validator.check_subclass("type", dst_type, mstype.number, with_type_of=False) + validator.check_subclass("input_x", src_type, [mstype.tensor, mstype.number], self.name) + validator.check_subclass("type", dst_type, mstype.number, self.name) if isinstance(src_type, type(mstype.tensor)): src_type = x['dtype'].element_type() @@ -238,8 +236,8 @@ class IsSubClass(PrimitiveWithInfer): sub_type_t = sub_type['value'] type_v = type_['value'] - validator.check_type("sub_type", sub_type_t, [mstype.Type]) - validator.check_type("type_", type_v, [mstype.Type]) + validator.check_value_type("sub_type", sub_type_t, [mstype.Type], self.name) + validator.check_value_type("type_", type_v, [mstype.Type], self.name) value = mstype.issubclass_(sub_type_t, type_v) @@ -273,8 +271,8 @@ class IsInstance(PrimitiveWithInfer): sub_type_t = inst['dtype'] type_v = type_['value'] - validator.check_const_input("inst", inst['value']) - validator.check_type("type_", type_v, [mstype.Type]) + validator.check_const_input("inst", inst['value'], self.name) + validator.check_value_type("type_", type_v, [mstype.Type], self.name) value = mstype.issubclass_(sub_type_t, type_v) @@ -316,14 +314,13 @@ class Reshape(PrimitiveWithInfer): def __infer__(self, x, shape): shape_v = shape['value'] x_shp = x['shape'] - validator.check_subclass("x", x['dtype'], mstype.tensor) - validator.check_const_input("shape", shape_v) - validator.check_type("shape", shape_v, [tuple]) + validator.check_subclass("x", x['dtype'], mstype.tensor, self.name) + validator.check_value_type("shape", shape_v, [tuple], self.name) shape_v = list(shape_v) neg_index = -1 dim_prod = 1 for i, shp_i in enumerate(shape_v): - validator.check_type("shape[%d]" % i, shp_i, [int]) + validator.check_value_type("shape[%d]" % i, shp_i, [int], self.name) if shp_i == -1: if neg_index != -1: raise ValueError(f'The shape can only has one -1 at most, but {shape_v}.') @@ -332,7 +329,7 @@ class Reshape(PrimitiveWithInfer): dim_prod *= shp_i arr_prod = np.prod(x_shp) if dim_prod <= 0 or arr_prod % dim_prod != 0: - raise ValueError(f'The product of shape should > 0 and' + raise ValueError(f'For \'{self.name}\' the product of shape should > 0 and' f' can be divided by prod of input {arr_prod},' f' but shape {shape}, product of shape {dim_prod}.') @@ -340,7 +337,7 @@ class Reshape(PrimitiveWithInfer): shape_v[neg_index] = int(arr_prod / dim_prod) dim_prod *= shape_v[neg_index] if dim_prod != arr_prod: - raise ValueError(f'The shape arg for reshape must match array''s size' + raise ValueError(f'For \'{self.name}\' The shape arg for reshape must match array''s size' f' input shape {arr_prod}, shape {dim_prod}.') value = None @@ -406,10 +403,10 @@ class Squeeze(PrimitiveWithInfer): def __init__(self, axis=()): """init Squeeze""" self.init_prim_io_names(inputs=['x'], outputs=['output']) - validator.check_type('axis', axis, [int, tuple]) + validator.check_value_type('axis', axis, [int, tuple], self.name) if isinstance(axis, tuple): - for item in axis: - validator.check_type("item", item, [int]) + for idx, item in enumerate(axis): + validator.check_value_type("axis[%d]" % idx, item, [int], self.name) else: self.axis = (axis,) self.add_prim_attr("axis", (axis,)) @@ -422,14 +419,14 @@ class Squeeze(PrimitiveWithInfer): ret = [d for d in x_shape if d != 1] else: for a in axis: - validator.check_int_range('axis or its elements', a, -ndim, ndim - 1, Rel.INC_BOTH) + validator.check_int_range('axis or its elements', a, -ndim, ndim - 1, Rel.INC_BOTH, self.name) if x_shape[a] != 1: raise ValueError('Cannot select an axis to squeeze out which has size not equal to one.') ret = [x_shape[i] for i in range(ndim) if not (i in axis or (i - ndim) in axis)] return ret def infer_dtype(self, x_dtype): - validator.check_subclass("x", x_dtype, mstype.tensor) + validator.check_subclass("x", x_dtype, mstype.tensor, self.name) return x_dtype @@ -467,14 +464,13 @@ class Transpose(PrimitiveWithInfer): if len(x_shape) != len(p_value): raise ValueError('The dimension of x and perm must be equal.') - validator.check_const_input("perm", p_value) - validator.check_type("p_value", p_value, [tuple]) - validator.check_subclass("x_type", x_type, mstype.tensor) + validator.check_value_type("p_value", p_value, [tuple], self.name) + validator.check_subclass("x_type", x_type, mstype.tensor, self.name) tmp = list(p_value) for i, dim in enumerate(p_value): - validator.check_integer("perm[%d]" % i, dim, 0, Rel.GE) - validator.check_integer("perm[%d]" % i, dim, len(p_value), Rel.LT) + validator.check_integer("perm[%d]" % i, dim, 0, Rel.GE, self.name) + validator.check_integer("perm[%d]" % i, dim, len(p_value), Rel.LT, self.name) tmp.remove(dim) if dim in tmp: raise ValueError('The value of perm is wrong.') @@ -517,15 +513,13 @@ class GatherV2(PrimitiveWithInfer): self.init_prim_io_names(inputs=['params', 'indices', 'axis'], outputs=['output']) def __infer__(self, params, indices, axis): - validator.check_subclass("params", params['dtype'], mstype.tensor) - validator.check_subclass("indices", indices['dtype'], mstype.tensor) - validator.check_subclass("axis", axis['dtype'], mstype.int_) - validator.check_typename("element of indices", indices['dtype'], mstype.int_type) - validator.check_const_input("axis", axis['value']) + validator.check_subclass("params", params['dtype'], mstype.tensor, self.name) + validator.check_tensor_type_same({"indices": indices['dtype']}, mstype.int_type, self.name) + validator.check_subclass("axis", axis['dtype'], mstype.int_, self.name) axis_v = axis['value'] params_shp = params['shape'] rank = len(params_shp) - validator.check_int_range("axis", axis_v, -rank, rank, Rel.INC_LEFT) + validator.check_int_range("axis", axis_v, -rank, rank, Rel.INC_LEFT, self.name) if axis_v < 0: axis_v += rank out_shape = params_shp[:axis_v] + indices['shape'] + params_shp[axis_v + 1:] @@ -564,19 +558,20 @@ class Split(PrimitiveWithInfer): @prim_attr_register def __init__(self, axis=0, output_num=1): """init Split""" - validator.check_type("axis", axis, [int]) - validator.check_type("output_num", output_num, [int]) + validator.check_value_type("axis", axis, [int], self.name) + validator.check_value_type("output_num", output_num, [int], self.name) self.axis = axis self.output_num = output_num def __infer__(self, x): - validator.check_subclass("x", x['dtype'], mstype.tensor) + validator.check_subclass("x", x['dtype'], mstype.tensor, self.name) x_shape = list(x['shape']) dim = len(x_shape) - validator.check_int_range('axis value', self.axis, -dim, dim, Rel.INC_LEFT) - validator.check_integer("output_num", self.output_num, 0, Rel.GT) + validator.check_int_range('axis value', self.axis, -dim, dim, Rel.INC_LEFT, self.name) + validator.check_integer("output_num", self.output_num, 0, Rel.GT, self.name) output_valid_check = x_shape[self.axis] % self.output_num - validator.check_integer("the dimension which to split divides output_num", output_valid_check, 0, Rel.EQ) + validator.check_integer("the dimension which to split divides output_num", output_valid_check, 0, Rel.EQ, + self.name) x_shape[self.axis] = int(x_shape[self.axis] / self.output_num) out_shapes = [] out_dtypes = [] @@ -615,7 +610,7 @@ class Rank(PrimitiveWithInfer): """init Rank""" def __infer__(self, x): - validator.check_subclass("x", x['dtype'], mstype.tensor) + validator.check_subclass("x", x['dtype'], mstype.tensor, self.name) out = {'shape': None, 'dtype': None, 'value': len(x['shape'])} @@ -633,34 +628,29 @@ class TruncatedNormal(PrimitiveWithInfer): dtype (:class:`mindspore.dtype`): Data type. Default: mindspore.float32. Inputs: - - **shape** (Tensor) - Shape of output tensor. The shape is a 1-D tensor, and type is int. + - **shape** (tuple[int]) - Shape of output tensor, is a tuple of positive int. Outputs: Tensor, type of output tensor is same as attribute `dtype`. Examples: - >>> input_shape = Tensor(np.array([1, 2, 3])) + >>> shape = (1, 2, 3) >>> truncated_normal = P.TruncatedNormal() - >>> output = truncated_normal(input_shape) + >>> output = truncated_normal(shape) """ @prim_attr_register def __init__(self, seed=0, dtype=mstype.float32): """init TruncatedNormal""" - validator.check_type('seed', seed, [int]) - validator.check_typename('dtype', dtype, mstype.number_type) + validator.check_value_type('seed', seed, [int], self.name) + validator.check_type_same({'dtype': dtype}, mstype.number_type, self.name) def __infer__(self, shape): - shape_t = shape['value'] - validator.check_subclass("shape", shape['dtype'], mstype.tensor) - shape_n = shape_t.asnumpy() - if shape_n.ndim != 1: - raise ValueError('The rank of input shape must be 1.') - if shape_n.dtype not in (np.int32, np.int64): - raise TypeError('The type of input shape must be int32 or int64.') - for i, item in enumerate(shape_n): - validator.check_integer(f"shape[{i}]", item.item(), 0, Rel.GT) - out = {'shape': tuple(shape_n), + shape_value = shape['value'] + validator.check_value_type("shape", shape_value, [tuple], self.name) + for i, value in enumerate(shape_value): + validator.check_integer(f'{i}th value of shape', value, 0, Rel.GT, self.name) + out = {'shape': shape_value, 'dtype': mstype.tensor_type(self.dtype), 'value': None} return out @@ -691,7 +681,7 @@ class Size(PrimitiveWithInfer): def __infer__(self, x): size = 1 - validator.check_subclass("x", x['dtype'], mstype.tensor) + validator.check_subclass("x", x['dtype'], mstype.tensor, self.name) shp = x['shape'] if not shp: size = 0 @@ -727,25 +717,20 @@ class Fill(PrimitiveWithInfer): """init Fill""" def __infer__(self, dtype, dims, x): - validator.check_const_input("type", dtype['value']) - validator.check_const_input("shape", dims['value']) - validator.check_const_input("value", x['value']) - validator.check_type("shape", dims['value'], [tuple]) - validator.check_type("value", x['value'], [numbers.Number, bool]) - for item in dims['value']: - validator.check_type("item", item, [int]) - validator.check_integer("item", item, 0, Rel.GT) - x_dtype = dtype['value'] + validator.check_value_type("shape", dims['value'], [tuple], self.name) + validator.check_value_type("value", x['value'], [numbers.Number, bool], self.name) + for idx, item in enumerate(dims['value']): + validator.check_integer("dims[%d]" % idx, item, 0, Rel.GT, self.name) valid_types = [mstype.bool_, mstype.int8, mstype.int32, mstype.int64, mstype.uint8, mstype.uint32, mstype.uint64, mstype.float16, mstype.float32, mstype.float64] - validator.check_typename("value", x_dtype, valid_types) - x_nptype = mstype.dtype_to_nptype(x_dtype) + validator.check_type_same({"value": dtype['value']}, valid_types, self.name) + x_nptype = mstype.dtype_to_nptype(dtype['value']) ret = np.full(dims['value'], x['value'], x_nptype) out = { 'value': Tensor(ret), 'shape': dims['value'], - 'dtype': x_dtype, + 'dtype': x['dtype'], } return out @@ -776,8 +761,7 @@ class OnesLike(PrimitiveWithInfer): return x_shape def infer_dtype(self, x_dtype): - validator.check_subclass("x", x_dtype, mstype.tensor) - validator.check_typename('x_dtype', x_dtype, mstype.number_type + (mstype.bool_,)) + validator.check_tensor_type_same({'x': x_dtype}, mstype.number_type + (mstype.bool_,), self.name) return x_dtype @@ -808,8 +792,7 @@ class ZerosLike(PrimitiveWithInfer): return x_shape def infer_dtype(self, x_dtype): - validator.check_subclass("x", x_dtype, mstype.tensor) - validator.check_typename('x_dtype', x_dtype, mstype.number_type + (mstype.bool_,)) + validator.check_tensor_type_same({'x': x_dtype}, mstype.number_type + (mstype.bool_,), self.name) return x_dtype @@ -834,14 +817,13 @@ class TupleToArray(PrimitiveWithInfer): """init TupleToArray""" def infer_value(self, x): - validator.check_const_input("x", x) - validator.check_type("x", x, [tuple]) - validator.check("size of x", len(x), '', 0, Rel.GT) + validator.check_value_type("x", x, [tuple], self.name) + validator.check("size of x", len(x), '', 0, Rel.GT, self.name) dtype = type(x[0]) for i, item in enumerate(x): - validator.check_type(f"x[{i}]", item, [numbers.Number]) + validator.check_value_type(f"x[{i}]", item, [numbers.Number], self.name) if not all(isinstance(item, dtype) for item in x): - raise TypeError("All elements of input x must be have same type.") + raise TypeError("For \'{self.name}\' all elements of input x must be have same type.") if isinstance(x[0], int): ret = np.array(x, np.int32) else: @@ -871,8 +853,7 @@ class ScalarToArray(PrimitiveWithInfer): pass def infer_value(self, x): - validator.check_const_input("x", x) - validator.check_type("x", x, [int, float]) + validator.check_value_type("x", x, [int, float], self.name) if isinstance(x, int): ret = np.array(x, np.int32) else: @@ -903,9 +884,8 @@ class ScalarToTensor(PrimitiveWithInfer): pass def infer_value(self, x, dtype=mstype.float32): - validator.check_const_input("x", x) - validator.check_type("x", x, [int, float]) - validator.check_subclass("dtype", dtype, mstype.number, with_type_of=False) + validator.check_value_type("x", x, [int, float], self.name) + validator.check_subclass("dtype", dtype, mstype.number, self.name) data_type = mstype.dtype_to_nptype(dtype) return Tensor(np.array(x, data_type)) @@ -947,15 +927,14 @@ class InvertPermutation(PrimitiveWithInfer): def __infer__(self, x): x_shp = x['shape'] x_value = x['value'] - validator.check_const_input("shape", x_shp) - validator.check_type("shape", x_shp, [tuple]) + validator.check_value_type("shape", x_shp, [tuple], self.name) z = [x_value[i] for i in range(len(x_value))] z.sort() y = [None]*len(x_value) for i, value in enumerate(x_value): - validator.check_type("input[%d]" % i, value, [int]) - validator.check(f'value', z[i], f'index', i) + validator.check_value_type("input[%d]" % i, value, [int], self.name) + validator.check(f'value', z[i], f'index', i, Rel.EQ, self.name) y[value] = i z.append(value) return {'shape': x_shp, @@ -990,8 +969,8 @@ class Argmax(PrimitiveWithInfer): def __init__(self, axis=-1, output_type=mstype.int64): """init Argmax""" self.init_prim_io_names(inputs=['x'], outputs=['output']) - validator.check_type("axis", axis, [int]) - validator.check_typename('output_type', output_type, [mstype.int32, mstype.int64]) + validator.check_value_type("axis", axis, [int], self.name) + validator.check_type_same({'output': output_type}, [mstype.int32, mstype.int64], self.name) self.axis = axis self.add_prim_attr('output_type', output_type) @@ -1000,14 +979,13 @@ class Argmax(PrimitiveWithInfer): if axis is None: axis = 0 x_rank = len(x_shape) - validator.check_int_range("axis", axis, -x_rank, x_rank, Rel.INC_LEFT) + validator.check_int_range("axis", axis, -x_rank, x_rank, Rel.INC_LEFT, self.name) axis = axis + x_rank if axis < 0 else axis ouput_shape = [x_shape[i] for i in range(x_rank) if i != axis] return ouput_shape def infer_dtype(self, x_dtype): - validator.check_subclass("input_x", x_dtype, mstype.tensor) - validator.check_typename('input_x', x_dtype, [mstype.float32, mstype.float16]) + validator.check_subclass("input_x", x_dtype, mstype.tensor, self.name) return mstype.tensor_type(self.output_type) @@ -1039,7 +1017,8 @@ class Argmin(PrimitiveWithInfer): def __init__(self, axis=-1, output_type=mstype.int64): """init Argmin""" self.init_prim_io_names(inputs=['x'], outputs=['output']) - validator.check_type("axis", axis, [int]) + validator.check_value_type("axis", axis, [int], self.name) + validator.check_type_name("output_type", output_type, [mstype.int32, mstype.int64], self.name) self.axis = axis self.add_prim_attr('output_type', output_type) @@ -1048,13 +1027,13 @@ class Argmin(PrimitiveWithInfer): if axis is None: axis = 0 x_rank = len(x_shape) - validator.check_int_range("axis", axis, -x_rank, x_rank, Rel.INC_LEFT) + validator.check_int_range("axis", axis, -x_rank, x_rank, Rel.INC_LEFT, self.name) axis = axis + x_rank if axis < 0 else axis ouput_shape = [x_shape[i] for i in range(x_rank) if i != axis] return ouput_shape def infer_dtype(self, x_dtype): - validator.check_subclass("input_x", x_dtype, mstype.tensor) + validator.check_subclass("input_x", x_dtype, mstype.tensor, self.name) return mstype.tensor_type(self.output_type) @@ -1091,17 +1070,17 @@ class ArgMaxWithValue(PrimitiveWithInfer): """init ArgMaxWithValue""" self.axis = axis self.keep_dims = keep_dims - _check_infer_attr_reduce(axis, keep_dims) + _check_infer_attr_reduce(axis, keep_dims, self.name) def infer_shape(self, x_shape): axis = self.axis x_rank = len(x_shape) - validator.check_int_range("axis", axis, -x_rank, x_rank, Rel.INC_LEFT) + validator.check_int_range("axis", axis, -x_rank, x_rank, Rel.INC_LEFT, self.name) ouput_shape = _infer_shape_reduce(x_shape, self.axis, self.keep_dims, self.name) return ouput_shape, ouput_shape def infer_dtype(self, x_dtype): - validator.check_subclass("input_x", x_dtype, mstype.tensor) + validator.check_subclass("input_x", x_dtype, mstype.tensor, self.name) return mstype.tensor_type(mstype.int32), x_dtype @@ -1137,17 +1116,17 @@ class ArgMinWithValue(PrimitiveWithInfer): """init ArgMinWithValue""" self.axis = axis self.keep_dims = keep_dims - _check_infer_attr_reduce(axis, keep_dims) + _check_infer_attr_reduce(axis, keep_dims, self.name) def infer_shape(self, x_shape): axis = self.axis x_rank = len(x_shape) - validator.check_int_range("axis", axis, -x_rank, x_rank, Rel.INC_LEFT) + validator.check_int_range("axis", axis, -x_rank, x_rank, Rel.INC_LEFT, self.name) ouput_shape = _infer_shape_reduce(x_shape, self.axis, self.keep_dims, self.name) return ouput_shape, ouput_shape def infer_dtype(self, x_dtype): - validator.check_subclass("input_x", x_dtype, mstype.tensor) + validator.check_subclass("input_x", x_dtype, mstype.tensor, self.name) return mstype.tensor_type(mstype.int32), x_dtype @@ -1187,13 +1166,11 @@ class Tile(PrimitiveWithInfer): def __infer__(self, x, multiples): multiples_v = multiples['value'] x_shp = x['shape'] - validator.check_const_input("shape", multiples_v) - validator.check_type("shape", multiples_v, [tuple]) + validator.check_value_type("shape", multiples_v, [tuple], self.name) for i, multiple in enumerate(multiples_v): - validator.check_type("multiples[%d]" % i, multiple, [int]) - validator.check_typename('x', x['dtype'], - [mstype.int16, mstype.int32, mstype.bool_, - mstype.float16, mstype.float32]) + validator.check_value_type("multiples[%d]" % i, multiple, [int], self.name) + valid_types = [mstype.int16, mstype.int32, mstype.bool_, mstype.float16, mstype.float32] + validator.check_tensor_type_same({'x': x['dtype']}, valid_types, self.name) len_sub = len(multiples_v) - len(x_shp) multiples_w = None if len_sub == 0: @@ -1203,7 +1180,8 @@ class Tile(PrimitiveWithInfer): x_shp.insert(0, 1) multiples_w = multiples_v elif len_sub < 0: - raise ValueError("The length of multiples can not be smaller than the length of dimension in input_x.") + raise ValueError(f'For \'{self.name}\' the length of multiples can not be smaller than ' + f'the length of dimension in input_x.') for i, a in enumerate(multiples_w): x_shp[i] *= a value = None @@ -1235,8 +1213,8 @@ class UnsortedSegmentSum(PrimitiveWithInfer): Tensor, the shape is :math:`(z, x_{N+1}, ..., x_R)`. Examples: - >>> input_x = [1, 2, 3, 4] - >>> segment_ids = [0, 0, 1, 2] + >>> input_x = Tensor([1, 2, 3, 4], mindspore.float) + >>> segment_ids = Tensor([0, 0, 1, 2], mindspore.int32) >>> num_segments = 4 >>> P.UnsortedSegmentSum()(input_x, segment_ids, num_segments) [3, 3, 4, 0] @@ -1250,23 +1228,23 @@ class UnsortedSegmentSum(PrimitiveWithInfer): def __infer__(self, x, segment_ids, num_segments): x_type = x['dtype'] x_shp = x['shape'] - validator.check_subclass("input_x", x_type, mstype.tensor) - validator.check_type("x_shape", x_shp, [list]) + validator.check_subclass("input_x", x_type, mstype.tensor, self.name) + validator.check_value_type("x_shape", x_shp, [list], self.name) x_shp_len = len(x_shp) - validator.check_integer("rank of input_x", x_shp_len, 0, Rel.GT) + validator.check_integer("rank of input_x", x_shp_len, 0, Rel.GT, self.name) segment_ids_shp = segment_ids['shape'] segment_ids_type = segment_ids['dtype'] - validator.check_subclass("segment_ids", segment_ids_type, mstype.tensor) - validator.check_type("segment_ids", segment_ids_shp, [list]) + validator.check_subclass("segment_ids", segment_ids_type, mstype.tensor, self.name) + validator.check_value_type("segment_ids", segment_ids_shp, [list], self.name) segment_ids_shp_len = len(segment_ids_shp) - validator.check_integer("rank of segment_ids", segment_ids_shp_len, 0, Rel.GT) + validator.check_integer("rank of segment_ids", segment_ids_shp_len, 0, Rel.GT, self.name) validator.check(f'rank of input_x', len(x_shp), - 'rank of segments_id', len(segment_ids_shp), Rel.GE) + 'rank of segments_id', len(segment_ids_shp), Rel.GE, self.name) for i, value in enumerate(segment_ids_shp): - validator.check("ids[%d]" % i, value, 'input[%d]' % i, x_shp[i]) + validator.check("ids[%d]" % i, value, 'input[%d]' % i, x_shp[i], Rel.EQ, self.name) num_segments_v = num_segments['value'] - validator.check_type('num_segments', num_segments_v, [int]) - validator.check_integer("num_segments", num_segments_v, 0, Rel.GT) + validator.check_value_type('num_segments', num_segments_v, [int], self.name) + validator.check_integer("num_segments", num_segments_v, 0, Rel.GT, self.name) shp = [num_segments_v] shp += x_shp[segment_ids_shp_len:] out = {'shape': shp, @@ -1310,7 +1288,7 @@ class Concat(PrimitiveWithInfer): def __init__(self, axis=0): """init Tile""" self.__setattr_flag__ = True - validator.check_type("axis", axis, [int]) + validator.check_value_type("axis", axis, [int], self.name) def __infer__(self, input_x): axis = self.axis @@ -1327,25 +1305,25 @@ class Concat(PrimitiveWithInfer): return out -def _get_pack_shape(x_shape, x_type, axis): +def _get_pack_shape(x_shape, x_type, axis, prim_name): """for pack output shape""" - validator.check_type("shape", x_shape, [tuple, list]) - validator.check_integer("len of input_x shape", len(x_shape), 0, Rel.GT) - validator.check_subclass("shape0", x_type[0], mstype.tensor) - validator.check_integer("len of input_x0 shape", len(x_shape[0]), 0, Rel.GT) + validator.check_value_type("shape", x_shape, [tuple, list], prim_name) + validator.check_integer("len of input_x", len(x_shape), 1, Rel.GT, prim_name) + validator.check_subclass("input_x[0]", x_type[0], mstype.tensor, prim_name) + validator.check_integer("len of input_x0 shape", len(x_shape[0]), 0, Rel.GT, prim_name) rank_base = len(x_shape[0]) N = len(x_shape) out_shape = x_shape[0] - validator.check_int_range('axis', axis, -rank_base - 1, rank_base, Rel.INC_BOTH) + validator.check_int_range('axis', axis, -rank_base - 1, rank_base, Rel.INC_BOTH, prim_name) if axis < 0: axis = axis + rank_base + 1 for i in range(1, N): v = x_shape[i] - validator.check('len of x_shape[%d]' % i, len(v), 'len of rank_base', rank_base) - validator.check('x_type[%d]' % i, x_type[i], 'base', x_type[0]) + validator.check('len of x_shape[%d]' % i, len(v), 'len of rank_base', rank_base, Rel.EQ, prim_name) + validator.check('x_type[%d]' % i, x_type[i], 'base', x_type[0], Rel.EQ, prim_name, TypeError) for j in range(rank_base): if v[j] != x_shape[0][j]: - raise ValueError("Pack evaluator element %d shape in input can not pack with first element" % i) + raise ValueError(f"For \'{prim_name}\' element {i} shape in input can not pack with first element") out_shape.insert(axis, N) return out_shape @@ -1368,6 +1346,12 @@ class Pack(PrimitiveWithInfer): Outputs: Tensor. A packed Tensor with the same type as `input_x`. + Raises: + TypeError: If the data types of elements in input_x are not the same. + ValueError: If length of input_x is not greater than 1; + or if axis is out of the range [-(R+1), R+1); + or if the shapes of elements in input_x are not the same. + Examples: >>> data1 = Tensor(np.array([0, 1]).astype(np.float32)) >>> data2 = Tensor(np.array([2, 3]).astype(np.float32)) @@ -1380,14 +1364,14 @@ class Pack(PrimitiveWithInfer): def __init__(self, axis=0): """init Pack""" self.__setattr_flag__ = True - validator.check_type("axis", axis, [int]) + validator.check_value_type("axis", axis, [int], self.name) self.axis = axis def __infer__(self, value): x_shape = value['shape'] x_type = value['dtype'] self.add_prim_attr('num', len(x_shape)) - all_shape = _get_pack_shape(x_shape, x_type, self.axis) + all_shape = _get_pack_shape(x_shape, x_type, self.axis, self.name) out = {'shape': all_shape, 'dtype': x_type[0], 'value': None} @@ -1408,8 +1392,6 @@ class Unpack(PrimitiveWithInfer): Args: axis (int): Dimension along which to pack. Default: 0. Negative values wrap around. The range is [-R, R). - num (int): The number of tensors to be unpacked to. Default : "None". - If `num` is not specified, it is inferred from the shape of `input_x`. Inputs: - **input_x** (Tensor) - The shape is :math:`(x_1, x_2, ..., x_R)`. @@ -1419,8 +1401,7 @@ class Unpack(PrimitiveWithInfer): A tuple of Tensors, the shape of each objects is same. Raises: - ValueError: If axis is out of the range [-len(input_x.shape()), len(input_x.shape())), - or if len(input_x.shape[axis]) not equal to num. + ValueError: If axis is out of the range [-len(input_x.shape()), len(input_x.shape())). Examples: >>> unpack = P.Unpack() @@ -1433,22 +1414,23 @@ class Unpack(PrimitiveWithInfer): def __init__(self, axis=0): """init Unpack""" self.__setattr_flag__ = True - validator.check_type("axis", axis, [int]) + validator.check_value_type("axis", axis, [int], self.name) self.axis = axis def __infer__(self, x): - validator.check_subclass("x", x['dtype'], mstype.tensor) + validator.check_subclass("x", x['dtype'], mstype.tensor, self.name) x_shape = list(x['shape']) dim = len(x_shape) - validator.check_int_range('axis value', self.axis, -dim, dim, Rel.INC_LEFT) + validator.check_int_range('axis value', self.axis, -dim, dim, Rel.INC_LEFT, self.name) if self.axis < 0: self.axis = self.axis + dim output_num = x_shape[self.axis] - validator.check_type("num", output_num, [int]) - validator.check_integer("output_num", output_num, 0, Rel.GT) + validator.check_value_type("num", output_num, [int], self.name) + validator.check_integer("output_num", output_num, 0, Rel.GT, self.name) self.add_prim_attr('num', output_num) output_valid_check = x_shape[self.axis] - output_num - validator.check_integer("The dimension which to unpack divides output_num", output_valid_check, 0, Rel.EQ) + validator.check_integer("The dimension which to unpack divides output_num", output_valid_check, 0, Rel.EQ, + self.name) out_shapes = [] out_dtypes = [] out_shape = x_shape[:self.axis] + x_shape[self.axis + 1:] @@ -1490,8 +1472,8 @@ class Slice(PrimitiveWithInfer): def __infer__(self, x, begin, size): x_shape = x['shape'] x_shp_len = len(x_shape) - validator.check_const_input('begin', begin['value']) - validator.check_const_input('size', size['value']) + validator.check_const_input('begin', begin['value'], self.name) + validator.check_const_input('size', size['value'], self.name) begin_v, size_v = begin['value'], size['value'] if begin_v is None or size_v is None: return {'shape': None, @@ -1503,7 +1485,8 @@ class Slice(PrimitiveWithInfer): for i in range(x_shp_len): if x_shape[i] < begin_v[i] + size_v[i]: y = begin_v[i] + size_v[i] - raise ValueError("Slice shape can not bigger than orign shape %d, %d." % (x_shape[i], y)) + raise ValueError("For '%s' slice shape can not bigger than orign shape %d, %d." % + (self.name, x_shape[i], y)) return {'shape': size_v, 'dtype': x['dtype'], 'value': None} @@ -1569,11 +1552,11 @@ class Select(PrimitiveWithInfer): def infer_dtype(self, cond_type, x_type, y_type): self.add_prim_attr('T', x_type) - validator.check_subclass("x_type", x_type, mstype.tensor) - validator.check_subclass("y_type", y_type, mstype.tensor) - validator.check_typename("cond_type", cond_type, [mstype.bool_]) + validator.check_subclass("x_type", x_type, mstype.tensor, self.name) + validator.check_subclass("y_type", y_type, mstype.tensor, self.name) + validator.check_tensor_type_same({"cond": cond_type}, [mstype.bool_], self.name) if x_type != y_type: - raise TypeError('The x_type %s must be the same as y_type %s.' % (x_type, y_type)) + raise TypeError('\'%s\' the x_type %s must be the same as y_type %s.' % (self.name, x_type, y_type)) return x_type @@ -1641,27 +1624,24 @@ class StridedSlice(PrimitiveWithInfer): shrink_axis_mask=0): """init StrideSlice""" self.init_prim_io_names(inputs=['x', 'begin', 'end', 'strides'], outputs=['output']) - validator.check_type('begin_mask', begin_mask, [int]) - validator.check_type('end_mask', end_mask, [int]) - validator.check_type('ellipsis_mask', ellipsis_mask, [int]) - validator.check_type('new_axis_mask', new_axis_mask, [int]) - validator.check_type('shrink_axis_mask', shrink_axis_mask, [int]) + validator.check_value_type('begin_mask', begin_mask, [int], self.name) + validator.check_value_type('end_mask', end_mask, [int], self.name) + validator.check_value_type('ellipsis_mask', ellipsis_mask, [int], self.name) + validator.check_value_type('new_axis_mask', new_axis_mask, [int], self.name) + validator.check_value_type('shrink_axis_mask', shrink_axis_mask, [int], self.name) def __infer__(self, x, begin, end, strides): - begin_shape, end_shape, strides_shape = begin['shape'], end['shape'], strides['shape'] - if begin_shape != strides_shape or end_shape != strides_shape: - raise ValueError("The shape of begin, end and strides in 'StridedSlice' must be equal.") - - validator.check_const_input("begin", begin['value']) - validator.check_const_input("end", end['value']) - validator.check_const_input("strides", strides['value']) - validator.check_type("begin", begin['value'], [tuple]) - validator.check_type("end", end['value'], [tuple]) - validator.check_type("strides", strides['value'], [tuple]) + begin_v, end_v, strides_v = begin['value'], end['value'], strides['value'] + validator.check_value_type("begin", begin_v, [tuple], self.name) + validator.check_value_type("end", end_v, [tuple], self.name) + validator.check_value_type("strides", strides_v, [tuple], self.name) x_shape = x['shape'] x_shp_len = len(x_shape) - begin_v, end_v, strides_v = begin['value'], end['value'], strides['value'] + if len(begin_v) != x_shp_len or len(end_v) != x_shp_len or len(strides_v) != x_shp_len: + raise ValueError(f"For \'{self.name}\' the length of begin index{begin_v}, end index{end_v} and " + f"strides{strides_v} must be equal to the dims({x_shp_len}) of input.") + ret_shape = [] append_dimensions = [] shrink_pos = bin(self.shrink_axis_mask)[::-1] @@ -1673,8 +1653,8 @@ class StridedSlice(PrimitiveWithInfer): append_dimensions.append(x_shape[x_shp_len - 1 - len(append_dimensions)]) continue if i < (len(shrink_pos) - 2) and shrink_pos[i] == '1': - validator.check_integer(f'begin[{i}]', begin_v[i], -x_shape[i], Rel.GE) - validator.check_integer(f'begin[{i}]', begin_v[i], x_shape[i], Rel.LT) + validator.check_integer(f'begin[{i}]', begin_v[i], -x_shape[i], Rel.GE, self.name) + validator.check_integer(f'begin[{i}]', begin_v[i], x_shape[i], Rel.LT, self.name) continue begin_idx = begin_v[i] @@ -1684,9 +1664,9 @@ class StridedSlice(PrimitiveWithInfer): begin_idx = 0 if self.end_mask: end_idx = x_shape[i] - validator.check_integer(f'begin[{i}]', begin_idx, x_shape[i], Rel.LE) - validator.check_integer(f'end[{i}]', end_idx, x_shape[i], Rel.LE) - validator.check_integer(f'strides[{i}]', strides_idx, 0, Rel.NE) + validator.check_integer(f'begin[{i}]', begin_idx, x_shape[i], Rel.LE, self.name) + validator.check_integer(f'end[{i}]', end_idx, x_shape[i], Rel.LE, self.name) + validator.check_integer(f'strides[{i}]', strides_idx, 0, Rel.NE, self.name) if strides_idx > 0: # If sliced forward , end_idx >= begin_idx validator.check(f'begin[{i}]', begin_idx, f'end[{i}]', end_idx, Rel.LE) @@ -1740,7 +1720,7 @@ class Diag(PrimitiveWithInfer): """init Diag""" def infer_dtype(self, x_type): - validator.check_subclass('input_x', x_type, mstype.tensor) + validator.check_subclass('input_x', x_type, mstype.tensor, self.name) return x_type def infer_shape(self, x_shape): @@ -1752,7 +1732,9 @@ class Diag(PrimitiveWithInfer): def infer_value(self, x): if x is None: return None - validator.check("input x rank", len(x.shape()), "", 1) + # do constant-folding only when x rank is 1 + if len(x.shape()) != 1: + return None ret = np.diag(x.asnumpy()) return Tensor(ret) @@ -1778,7 +1760,7 @@ class DiagPart(PrimitiveWithInfer): >>> [0, 0, 3, 0], >>> [0, 0, 0, 4]]) >>> diag_part = P.DiagPart() - >>> diag_part(x) + >>> diag_part(input_x) [1, 2, 3, 4] """ @@ -1787,13 +1769,13 @@ class DiagPart(PrimitiveWithInfer): """init DiagPart""" def infer_dtype(self, x_type): - validator.check_subclass('input_x', x_type, mstype.tensor) + validator.check_subclass('input_x', x_type, mstype.tensor, self.name) return x_type def infer_shape(self, x_shape): if len(x_shape)%2 != 0 or \ not x_shape: - raise ValueError(f"DiagPart input rank must be non-zero and even, but got rank {len(x_shape)}, " + raise ValueError(f"For \'{self.name}\' input rank must be non-zero and even, but got rank {len(x_shape)}, " f"with shapes {x_shape}") length = len(x_shape) // 2 ret_shape = x_shape[0:length] @@ -1802,7 +1784,9 @@ class DiagPart(PrimitiveWithInfer): def infer_value(self, x): if x is None: return None - validator.check("x rank", len(x.shape()), "", 2) + # do constant-folding only when x rank is 2 + if len(x.shape()) != 2: + return None ret = np.diag(x.asnumpy()) return Tensor(ret) @@ -1830,12 +1814,10 @@ class Eye(PrimitiveWithInfer): """init Eye""" def infer_value(self, n, m, t): - validator.check_type("n", n, [int]) - validator.check_integer("n", n, 0, Rel.GT) - validator.check_type("m", m, [int]) - validator.check_integer("m", m, 0, Rel.GT) + validator.check_integer("n", n, 0, Rel.GT, self.name) + validator.check_integer("m", m, 0, Rel.GT, self.name) args = {"dtype": t} - validator.check_type_same(args, mstype.number_type + (mstype.bool_,)) + validator.check_type_same(args, mstype.number_type + (mstype.bool_,), self.name) np_type = mstype.dtype_to_nptype(t) ret = np.eye(n, m, dtype=np_type) return Tensor(ret) @@ -1870,16 +1852,15 @@ class ScatterNd(PrimitiveWithInfer): def __infer__(self, indices, update, shape): shp = shape['value'] - validator.check_subclass("indices_dtype", indices['dtype'], mstype.tensor) - validator.check_subclass("update_dtype", update['dtype'], mstype.tensor) - validator.check_typename("indices_dtype", indices['dtype'], mstype.int_type) - validator.check_type("shape", shp, [tuple]) + validator.check_subclass("update_dtype", update['dtype'], mstype.tensor, self.name) + validator.check_tensor_type_same({"indices": indices['dtype']}, mstype.int_type, self.name) + validator.check_value_type("shape", shp, [tuple], self.name) for i, x in enumerate(shp): - validator.check_integer("shape[%d]" % i, x, 0, Rel.GT) + validator.check_integer("shape[%d]" % i, x, 0, Rel.GT, self.name) indices_shape, update_shape = indices["shape"], update["shape"] if indices_shape[0] != update_shape[0]: - raise ValueError('The indices_shape[0] and update_shape[0] must be equal.') + raise ValueError(f'For \'{self.name}\' The indices_shape[0] and update_shape[0] must be equal.') return {'shape': shp, 'dtype': update['dtype'], @@ -1914,10 +1895,15 @@ class ResizeNearestNeighbor(PrimitiveWithInfer): @prim_attr_register def __init__(self, size, align_corners=False): """Init ResizeNearestNeighbor""" + validator.check_value_type("size", size, [tuple, list], self.name) + validator.check_value_type("align_corners", align_corners, [bool], self.name) + validator.check_integer("length of size", len(size), 2, Rel.EQ, self.name) + for i, value in enumerate(size): + validator.check_integer(f'{i}th value of size', value, 0, Rel.GE, self.name) self.init_prim_io_names(inputs=['image_in'], outputs=['image_out']) def infer_shape(self, x): - validator.check('the dimension of input_x', len(x), '', 2, Rel.GE) + validator.check('the dimension of input_x', len(x), '', 2, Rel.GE, self.name) return tuple(x)[:-2] + tuple(self.size) def infer_dtype(self, x): @@ -1951,13 +1937,12 @@ class GatherNd(PrimitiveWithInfer): def infer_shape(self, x_shape, indices_shape): validator.check('the dimension of x', len(x_shape), - 'the dimension of indices', indices_shape[-1], Rel.GE) + 'the dimension of indices', indices_shape[-1], Rel.GE, self.name) return indices_shape[:-1] + x_shape[indices_shape[-1]:] def infer_dtype(self, x_dtype, indices_dtype): - validator.check_subclass("x_dtype", x_dtype, mstype.tensor) - validator.check_subclass("indices_dtype", indices_dtype, mstype.tensor) - validator.check_typename("indices_dtype", indices_dtype, mstype.int_type) + validator.check_subclass("x_dtype", x_dtype, mstype.tensor, self.name) + validator.check_tensor_type_same({"indices": indices_dtype}, mstype.int_type, self.name) return x_dtype @@ -1985,6 +1970,11 @@ class ScatterNdUpdate(PrimitiveWithInfer): >>> op = P.ScatterNdUpdate() >>> output = op(input_x, indices, update) """ + __mindspore_signature__ = ( + ('input_x', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD), + ('indices', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD), + ('value', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD) + ) @prim_attr_register def __init__(self, use_locking=True): @@ -1999,12 +1989,9 @@ class ScatterNdUpdate(PrimitiveWithInfer): return x_shape def infer_dtype(self, x_dtype, indices_dtype, value_dtype): - validator.check_subclass("x_dtype", x_dtype, mstype.tensor) - validator.check_subclass("indices_dtype", indices_dtype, mstype.tensor) - validator.check_subclass("value_dtype", value_dtype, mstype.tensor) - validator.check_typename('indices_dtype', indices_dtype, mstype.int_type) - args = {"x_dtype": x_dtype, "value_dtype": value_dtype} - validator.check_type_same(args, (mstype.bool_,) + mstype.number_type) + validator.check_tensor_type_same({'indices': indices_dtype}, mstype.int_type, self.name) + args = {"x": x_dtype, "value": value_dtype} + validator.check_tensor_type_same(args, (mstype.bool_,) + mstype.number_type, self.name) return x_dtype @@ -2042,7 +2029,7 @@ class SpaceToDepth(PrimitiveWithInfer): def __init__(self, block_size): """Init SpaceToDepth""" self.init_prim_io_names(inputs=['x'], outputs=['y']) - validator.check_type('block_size', block_size, [int]) + validator.check_value_type('block_size', block_size, [int], self.name) validator.check('block_size', block_size, '', 2, Rel.GE) self.block_size = block_size self.add_prim_attr("data_format", "NCHW") @@ -2052,7 +2039,7 @@ class SpaceToDepth(PrimitiveWithInfer): out_shape = copy.deepcopy(x_shape) for i in range(2): if out_shape[i+2] % self.block_size != 0: - raise ValueError(f'SpaceToDepth input shape[{i+2}] {out_shape[i+2]} should be ' + raise ValueError(f'For \'{self.name}\' input shape[{i+2}] {out_shape[i+2]} should be ' f'fully divided by block_size {self.block_size}') out_shape[i+2] //= self.block_size @@ -2060,7 +2047,7 @@ class SpaceToDepth(PrimitiveWithInfer): return out_shape def infer_dtype(self, x_dtype): - validator.check_subclass("x_dtype", x_dtype, mstype.tensor) + validator.check_subclass("x_dtype", x_dtype, mstype.tensor, self.name) return x_dtype @@ -2100,8 +2087,8 @@ class DepthToSpace(PrimitiveWithInfer): def __init__(self, block_size): """Init DepthToSpace""" self.init_prim_io_names(inputs=['x'], outputs=['y']) - validator.check_type('block_size', block_size, [int]) - validator.check('block_size', block_size, '', 2, Rel.GE) + validator.check_value_type('block_size', block_size, [int], self.name) + validator.check('block_size', block_size, '', 2, Rel.GE, self.name) self.block_size = block_size self.add_prim_attr("data_format", "NCHW") @@ -2111,12 +2098,13 @@ class DepthToSpace(PrimitiveWithInfer): for i in range(2): out_shape[i+2] *= self.block_size - validator.check('x_shape[1] % (block_size*block_size)', x_shape[1] % (self.block_size*self.block_size), '', 0) + validator.check_integer('x_shape[1] % (block_size*block_size)', x_shape[1] % (self.block_size*self.block_size), + 0, Rel.EQ, self.name) out_shape[1] //= self.block_size * self.block_size return out_shape def infer_dtype(self, x_dtype): - validator.check_subclass("x_dtype", x_dtype, mstype.tensor) + validator.check_subclass("x_dtype", x_dtype, mstype.tensor, self.name) return x_dtype @@ -2163,27 +2151,26 @@ class SpaceToBatch(PrimitiveWithInfer): @prim_attr_register def __init__(self, block_size, paddings): """Init SpaceToBatch""" - validator.check_type('block_size', block_size, [int]) - validator.check('block_size', block_size, '', 1, Rel.GT) + validator.check_value_type('block_size', block_size, [int], self.name) + validator.check('block_size', block_size, '', 1, Rel.GT, self.name) self.block_size = block_size - validator.check('paddings shape', np.array(paddings).shape, '', (2, 2)) + validator.check('paddings shape', np.array(paddings).shape, '', (2, 2), Rel.EQ, self.name) for elem in itertools.chain(*paddings): - validator.check_type('paddings element', elem, [int]) + validator.check_value_type('paddings element', elem, [int], self.name) self.paddings = paddings def infer_dtype(self, x_dtype): - validator.check_subclass("input_x", x_dtype, mstype.tensor) - validator.check_typename('input_x', x_dtype, mstype.number_type) + validator.check_tensor_type_same({'input_x': x_dtype}, mstype.number_type, self.name) return x_dtype def infer_shape(self, x_shape): - validator.check('rank of input_x', len(x_shape), '', 4) + validator.check_integer('rank of input_x', len(x_shape), 4, Rel.EQ, self.name) out_shape = copy.deepcopy(x_shape) for i in range(2): padded = out_shape[i+2] + self.paddings[i][0] + \ self.paddings[i][1] if padded % self.block_size != 0: - raise ValueError(f'padded[{i}] {padded} should be divisible by ' + raise ValueError(f'For \'{self.name}\' padded[{i}] {padded} should be divisible by ' f'block_size {self.block_size}') out_shape[i+2] = padded // self.block_size out_shape[0] *= self.block_size * self.block_size @@ -2231,17 +2218,16 @@ class BatchToSpace(PrimitiveWithInfer): @prim_attr_register def __init__(self, block_size, crops): """Init BatchToSpace""" - validator.check_type('block_size', block_size, [int]) - validator.check('block_size', block_size, '', 1, Rel.GT) + validator.check_value_type('block_size', block_size, [int], self.name) + validator.check('block_size', block_size, '', 1, Rel.GT, self.name) self.block_size = block_size validator.check('crops shape', np.array(crops).shape, '', (2, 2)) for elem in itertools.chain(*crops): - validator.check_type('crops element', elem, [int]) + validator.check_value_type('crops element', elem, [int], self.name) self.crops = crops def infer_dtype(self, x_dtype): - validator.check_subclass("input_x", x_dtype, mstype.tensor) - validator.check_typename('input_x', x_dtype, mstype.number_type) + validator.check_tensor_type_same({'input_x': x_dtype}, mstype.number_type, self.name) return x_dtype def infer_shape(self, x_shape): @@ -2250,11 +2236,11 @@ class BatchToSpace(PrimitiveWithInfer): for i in range(2): x_block_prod = out_shape[i+2] * self.block_size crops_sum = self.crops[i][0] + self.crops[i][1] - validator.check("x block shape prod", x_block_prod, 'crops sum', crops_sum, Rel.GT) + validator.check("x block shape prod", x_block_prod, 'crops sum', crops_sum, Rel.GT, self.name) out_shape[i+2] = x_block_prod - crops_sum block_size_prod = self.block_size * self.block_size if out_shape[0] % block_size_prod != 0: - raise ValueError(f'input_x dimension 0 {out_shape[0]} should be divisible by ' + raise ValueError(f'For \'{self.name}\' input_x dimension 0 {out_shape[0]} should be divisible by ' f'block_size_prod {block_size_prod}') out_shape[0] = out_shape[0] // block_size_prod return out_shape diff --git a/mindspore/ops/operations/comm_ops.py b/mindspore/ops/operations/comm_ops.py index fbad5b49d3..5fb5f3ed95 100644 --- a/mindspore/ops/operations/comm_ops.py +++ b/mindspore/ops/operations/comm_ops.py @@ -17,7 +17,7 @@ from ..._checkparam import Validator as validator from ..._checkparam import Rel -from ...communication.management import get_rank, get_group_size, GlobalComm, get_group +from ...communication.management import get_rank, get_group_size, GlobalComm, _get_group from ...common import dtype as mstype from ..primitive import PrimitiveWithInfer, prim_attr_register @@ -45,7 +45,6 @@ class AllReduce(PrimitiveWithInfer): Note: The operation of AllReduce does not support "prod" currently. - The input of AllReduce does not support dtype "Bool". Tensor must have same shape and format in all processes participating in the collective. Args: @@ -88,10 +87,10 @@ class AllReduce(PrimitiveWithInfer): raise TypeError("The operation of AllReduce should be str.") if op == ReduceOp.PROD: raise RuntimeError("The operation of AllReduce 'prod' is not supported yet.") - if not isinstance(get_group(group), str): + if not isinstance(_get_group(group), str): raise TypeError("The group of AllReduce should be str.") self.op = op - self.add_prim_attr('group', get_group(group)) + self.add_prim_attr('group', _get_group(group)) self.add_prim_attr('fusion', 0) def vm_impl(self, x): @@ -103,7 +102,7 @@ class AllReduce(PrimitiveWithInfer): return x_shape def infer_dtype(self, x_dtype): - if x_dtype == mstype.bool_: + if x_dtype.element_type() == mstype.bool_: raise TypeError("AllReduce does not support 'Bool' as the dtype of input!") return x_dtype @@ -149,19 +148,19 @@ class AllGather(PrimitiveWithInfer): @prim_attr_register def __init__(self, group=GlobalComm.WORLD_COMM_GROUP): - validator.check_value_type('group', get_group(group), (str,), self.name) - self.rank = get_rank(get_group(group)) - self.rank_size = get_group_size(get_group(group)) + validator.check_value_type('group', _get_group(group), (str,), self.name) + self.rank = get_rank(_get_group(group)) + self.rank_size = get_group_size(_get_group(group)) validator.check('rank', self.rank, 'rank_size', self.rank_size, Rel.LT, self.name) self.add_prim_attr('rank_size', self.rank_size) - self.add_prim_attr('group', get_group(group)) + self.add_prim_attr('group', _get_group(group)) def infer_shape(self, x_shape): x_shape[0] = x_shape[0] * self.rank_size return x_shape def infer_dtype(self, x_dtype): - if x_dtype == mstype.bool_: + if x_dtype.element_type() == mstype.bool_: raise TypeError(f"{self.name} does not support 'Bool' as the dtype of input!") return x_dtype @@ -176,6 +175,7 @@ class ReduceScatter(PrimitiveWithInfer): Note: The back propagation of the op is not surported yet. Stay tuned for more. Tensor must have the same shape and format in all processes participating in the collective. + Args: op (str): Specifies an operation used for element-wise reductions, like sum, max, avg. Default: ReduceOp.SUM. @@ -205,11 +205,11 @@ class ReduceScatter(PrimitiveWithInfer): @prim_attr_register def __init__(self, op=ReduceOp.SUM, group=GlobalComm.WORLD_COMM_GROUP): validator.check_value_type('op', op, (type(ReduceOp.SUM),), self.name) - validator.check_value_type('group', get_group(group), (str,), self.name) + validator.check_value_type('group', _get_group(group), (str,), self.name) self.op = op - self.rank_size = get_group_size(get_group(group)) + self.rank_size = get_group_size(_get_group(group)) self.add_prim_attr('rank_size', self.rank_size) - self.add_prim_attr('group', get_group(group)) + self.add_prim_attr('group', _get_group(group)) def infer_shape(self, x_shape): if x_shape[0] % self.rank_size != 0: @@ -218,7 +218,7 @@ class ReduceScatter(PrimitiveWithInfer): return x_shape def infer_dtype(self, x_dtype): - if x_dtype == mstype.bool_: + if x_dtype.element_type() == mstype.bool_: raise TypeError(f"{self.name} does not support 'Bool' as the dtype of input!") return x_dtype @@ -268,15 +268,18 @@ class Broadcast(PrimitiveWithInfer): @prim_attr_register def __init__(self, root_rank, group=GlobalComm.WORLD_COMM_GROUP): validator.check_value_type('root_rank', root_rank, (int,), self.name) - validator.check_value_type('group', get_group(group), (str,), self.name) - self.add_prim_attr('group', get_group(group)) + validator.check_value_type('group', _get_group(group), (str,), self.name) + self.add_prim_attr('group', _get_group(group)) def infer_shape(self, x_shape): return x_shape def infer_dtype(self, x_dtype): - if x_dtype == mstype.bool_: - raise TypeError(f"{self.name} does not support 'Bool' as the dtype of input!") + if not isinstance(x_dtype, tuple): + raise TypeError(f"{self.name}'s input should be a tuple!") + for _ele in x_dtype: + if _ele.element_type() == mstype.bool_: + raise TypeError(f"{self.name} does not support 'Bool' as the dtype of input!") return x_dtype @@ -306,11 +309,11 @@ class _AlltoAll(PrimitiveWithInfer): @prim_attr_register def __init__(self, split_count, split_dim, concat_dim, group=GlobalComm.WORLD_COMM_GROUP): """init AlltoAll""" - validator.check_value_type('group', get_group(group), (str,), self.name) + validator.check_value_type('group', _get_group(group), (str,), self.name) self.split_count = split_count self.split_dim = split_dim self.concat_dim = concat_dim - self.add_prim_attr('group', get_group(group)) + self.add_prim_attr('group', _get_group(group)) def infer_shape(self, x_shape): x_shape[self.concat_dim] = x_shape[self.concat_dim] * self.split_count @@ -318,7 +321,7 @@ class _AlltoAll(PrimitiveWithInfer): return x_shape def infer_dtype(self, x_dtype): - if x_dtype == mstype.bool_: + if x_dtype.element_type() == mstype.bool_: raise TypeError(f"{self.name} does not support 'Bool' as the dtype of input!") return x_dtype diff --git a/mindspore/ops/operations/debug_ops.py b/mindspore/ops/operations/debug_ops.py index 21c9c519b9..6887c778ed 100644 --- a/mindspore/ops/operations/debug_ops.py +++ b/mindspore/ops/operations/debug_ops.py @@ -45,6 +45,9 @@ class ScalarSummary(Primitive): def __init__(self): """init""" + def __call__(self, *args, **kwargs): + pass + class ImageSummary(Primitive): """ @@ -70,6 +73,9 @@ class ImageSummary(Primitive): def __init__(self): """init""" + def __call__(self, *args, **kwargs): + pass + class TensorSummary(Primitive): """ @@ -97,6 +103,9 @@ class TensorSummary(Primitive): def __init__(self): """init""" + def __call__(self, *args, **kwargs): + pass + class HistogramSummary(Primitive): """ @@ -189,7 +198,11 @@ class Print(PrimitiveWithInfer): Output tensor or string to stdout. Note: - The print operation cannot support float64 and bool types currently. + The print operation cannot support the following cases currently. + + 1. The type of tensor is float64 or bool. + + 2. The data of tensor is a scalar type. Inputs: - **input_x** (Union[Tensor, str]) - The graph node to attach to. The input supports diff --git a/mindspore/ops/operations/math_ops.py b/mindspore/ops/operations/math_ops.py index 6eebde3a84..f380c4620a 100644 --- a/mindspore/ops/operations/math_ops.py +++ b/mindspore/ops/operations/math_ops.py @@ -1251,7 +1251,8 @@ class Acosh(PrimitiveWithInfer): Compute inverse hyperbolic cosine of x element-wise. Inputs: - - **input_x** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`. + - **input_x** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`, + and the data type of 'input_x' is number, the element in 'input_x' should be greater than or equal to 1. Outputs: Tensor, has the same shape as `input_x`. @@ -2038,7 +2039,7 @@ class Atan2(_MathBinaryOp): r""" Returns arctangent of input_x/input_y element-wise. - It returns :math:`\theta\ \in\ (-\frac{\pi}{2}, \frac{\pi}{2})` + It returns :math:`\theta\ \in\ [-\pi, \pi]` such that :math:`x = r*\sin(\theta), y = r*\cos(\theta)`, where :math:`r = \sqrt{x^2 + y^2}`. Inputs: diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py index 0687806bb2..2a2dbe08a8 100644 --- a/mindspore/ops/operations/nn_ops.py +++ b/mindspore/ops/operations/nn_ops.py @@ -111,6 +111,12 @@ class Softmax(PrimitiveWithInfer): Outputs: Tensor, with the same type and shape as the logits. + + Examples: + >>> input_x = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32) + >>> softmax = P.Softmax() + >>> softmax(input_x) + [0.01165623, 0.03168492, 0.08612854, 0.23412167, 0.6364086] """ @prim_attr_register @@ -155,6 +161,12 @@ class LogSoftmax(PrimitiveWithInfer): Outputs: Tensor, with the same type and shape as the logits. + + Examples: + >>> input_x = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32) + >>> log_softmax = P.LogSoftmax() + >>> log_softmax(input_x) + [-4.4519143, -3.4519143, -2.4519143, -1.4519144, -0.4519144] """ @prim_attr_register @@ -296,7 +308,8 @@ class Elu(PrimitiveWithInfer): The data type of input tensor should be float. Args: - alpha (float): The coefficient of negative factor whose type is float. Default: 1.0. + alpha (float): The coefficient of negative factor whose type is float, + only support '1.0' currently. Default: 1.0. Inputs: - **input_x** (Tensor) - The input tensor whose data type should be float. @@ -316,6 +329,7 @@ class Elu(PrimitiveWithInfer): def __init__(self, alpha=1.0): """Init Elu""" validator.check_value_type("alpha", alpha, [float], self.name) + validator.check_number("alpha", alpha, 1.0, Rel.EQ, self.name) def infer_shape(self, input_x): return input_x @@ -375,6 +389,11 @@ class Sigmoid(PrimitiveWithInfer): Outputs: Tensor, with the same type and shape as the input_x. + Examples: + >>> input_x = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32) + >>> sigmoid = P.Sigmoid() + >>> sigmoid(input_x) + [0.73105866, 0.880797, 0.9525742, 0.98201376, 0.9933071] """ @prim_attr_register @@ -438,6 +457,12 @@ class Tanh(PrimitiveWithInfer): Outputs: Tensor, with the same type and shape as the input_x. + + Examples: + >>> input_x = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32) + >>> tanh = P.Tanh() + >>> tanh(input_x) + [0.7615941, 0.9640276, 0.9950548, 0.9993293, 0.99990916] """ @prim_attr_register @@ -490,6 +515,15 @@ class FusedBatchNorm(Primitive): - **updated_bias** (Tensor) - Tensor of shape :math:`(C,)`. - **updated_moving_mean** (Tensor) - Tensor of shape :math:`(C,)`. - **updated_moving_variance** (Tensor) - Tensor of shape :math:`(C,)`. + + Examples: + >>> input_x = Tensor(np.ones([128, 64, 32, 64]), mindspore.float32) + >>> scale = Tensor(np.ones([64]), mindspore.float32) + >>> bias = Tensor(np.ones([64]), mindspore.float32) + >>> mean = Tensor(np.ones([64]), mindspore.float32) + >>> variance = Tensor(np.ones([64]), mindspore.float32) + >>> op = P.FusedBatchNorm() + >>> output = op(input_x, scale, bias, mean, variance) """ @prim_attr_register @@ -537,6 +571,16 @@ class BatchNorm(PrimitiveWithInfer): - **updated_bias** (Tensor) - Tensor of shape :math:`(C,)`. - **reserve_space_1** (Tensor) - Tensor of shape :math:`(C,)`. - **reserve_space_2** (Tensor) - Tensor of shape :math:`(C,)`. + - **reserve_space_3** (Tensor) - Tensor of shape :math:`(C,)`. + + Examples: + >>> input_x = Tensor(np.ones([128, 64, 32, 64]), mindspore.float32) + >>> scale = Tensor(np.ones([64]), mindspore.float32) + >>> bias = Tensor(np.ones([64]), mindspore.float32) + >>> mean = Tensor(np.ones([64]), mindspore.float32) + >>> variance = Tensor(np.ones([64]), mindspore.float32) + >>> batch_norm = P.BatchNorm() + >>> output = batch_norm(input_x, scale, bias, mean, variance """ @prim_attr_register @@ -623,6 +667,12 @@ class Conv2D(PrimitiveWithInfer): Outputs: Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`. + + Examples: + >>> input = Tensor(np.ones([10, 32, 32, 32]), mindspore.float32) + >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32) + >>> conv2d = P.Conv2D(out_channel=32, kernel_size=3) + >>> conv2d(input, weight) """ @prim_attr_register @@ -652,6 +702,7 @@ class Conv2D(PrimitiveWithInfer): self.add_prim_attr('data_format', "NCHW") self.out_channel = validator.check_integer('out_channel', out_channel, 0, Rel.GT, self.name) self.group = validator.check_integer('group', group, 0, Rel.GT, self.name) + self.add_prim_attr('offset_a', 0) def infer_shape(self, x_shape, w_shape): validator.check_integer("weight rank", len(w_shape), 4, Rel.EQ, self.name) @@ -731,10 +782,17 @@ class DepthwiseConv2dNative(PrimitiveWithInfer): Inputs: - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`. - **weight** (Tensor) - Set size of kernel is :math:`(K_1, K_2)`, then the shape is - :math:`(\text{channel_multiplier}, C_{in}, K_1, K_2)`. + :math:`(K, C_{in}, K_1, K_2)`, `K` must be 1. Outputs: Tensor of shape :math:`(N, C_{in} * \text{channel_multiplier}, H_{out}, W_{out})`. + + Examples: + >>> input = Tensor(np.ones([10, 32, 32, 32]), mindspore.float32) + >>> weight = Tensor(np.ones([1, 32, 3, 3]), mindspore.float32) + >>> depthwise_conv2d = P.DepthwiseConv2dNative(channel_multiplier = 3, kernel_size = (3, 3)) + >>> output = depthwise_conv2d(input, weight) + >>> assert output.shape() == (10, 96, 30, 30) """ @prim_attr_register @@ -751,8 +809,15 @@ class DepthwiseConv2dNative(PrimitiveWithInfer): self.init_prim_io_names(inputs=['x', 'w'], outputs=['output']) self.kernel_size = _check_positive_int_or_tuple('kernel_size', kernel_size, self.name) self.stride = _check_positive_int_or_tuple('stride', stride, self.name) + if self.stride[0] != self.stride[1]: + raise ValueError("The height and width of stride should be equal," + f"but got height:{self.stride[0]}, width:{self.stride[1]}") self.add_prim_attr('stride', (1, 1, self.stride[0], self.stride[1])) + self.dilation = _check_positive_int_or_tuple('dilation', dilation, self.name) + if self.dilation[0] != self.dilation[1]: + raise ValueError("The height and width of dilation should be equal," + f"but got height:{self.dilation[0]}, width:{self.dilation[1]}") self.add_prim_attr('dilation', (1, 1, self.dilation[0], self.dilation[1])) validator.check_value_type('pad', pad, (int,), self.name) self.pad_mode = validator.check_string('pad_mode', pad_mode, ['valid', 'same', 'pad'], self.name) @@ -769,13 +834,11 @@ class DepthwiseConv2dNative(PrimitiveWithInfer): validator.check("x_shape[1]", x_shape[1], "w_shape[1]", w_shape[1], Rel.EQ, self.name) validator.check('kernel_size', self.kernel_size, 'w_shape[2:4]', tuple(w_shape[2:4]), Rel.EQ, self.name) - kernel_size_h = w_shape[2] - kernel_size_w = w_shape[3] - stride_h = self.stride[2] - stride_w = self.stride[3] - dilation_h = self.dilation[2] - dilation_w = self.dilation[3] - + kernel_size_n, _, kernel_size_h, kernel_size_w = w_shape + _, _, stride_h, stride_w = self.stride + _, _, dilation_h, dilation_w = self.dilation + if kernel_size_n != 1: + raise ValueError(f"The batch of input weight should be 1, but got {kernel_size_n}") if self.pad_mode == "valid": h_out = math.ceil((x_shape[2] - dilation_h * (kernel_size_h - 1)) / stride_h) w_out = math.ceil((x_shape[3] - dilation_w * (kernel_size_w - 1)) / stride_w) @@ -1067,6 +1130,13 @@ class Conv2DBackpropInput(PrimitiveWithInfer): Returns: Tensor, the gradients of convolution. + + Examples: + >>> dout = Tensor(np.ones([10, 32, 30, 30]), mindspore.float32) + >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32) + >>> x = Tensor(np.ones([10, 32, 32, 32])) + >>> conv2d_backprop_input = P.Conv2DBackpropInput(out_channel=32, kernel_size=3) + >>> conv2d_backprop_input(dout, weight, F.shape(x)) """ @prim_attr_register @@ -1153,6 +1223,12 @@ class BiasAdd(PrimitiveWithInfer): Outputs: Tensor, with the same shape and type as `input_x`. + + Examples: + >>> input_x = Tensor(np.arange(6).reshape((2, 3)), mindspore.float32) + >>> bias = Tensor(np.random.random(3).reshape((3,)), mindspore.float32) + >>> bias_add = P.BiasAdd() + >>> bias_add(input_x, bias) """ @prim_attr_register @@ -1196,8 +1272,8 @@ class TopK(PrimitiveWithInfer): >>> input_x = Tensor([1, 2, 3, 4, 5], mindspore.float16) >>> k = 3 >>> values, indices = topk(input_x, k) - >>> assert values == Tensor(np.array([5, 4, 3])) - >>> assert indices == Tensor(np.array([4, 3, 2])) + >>> assert values == Tensor(np.array([5, 4, 3]), mstype.float16) + >>> assert indices == Tensor(np.array([4, 3, 2]), mstype.int32) """ @prim_attr_register @@ -1239,6 +1315,14 @@ class SoftmaxCrossEntropyWithLogits(PrimitiveWithInfer): Outputs: Tuple of 2 Tensor, the loss shape is `(N,)`, and the dlogits with the same shape as `logits`. + + Examples: + >>> logits = Tensor([[2, 4, 1, 4, 5], [2, 1, 2, 4, 3]], mindspore.float32) + >>> labels = Tensor([[0, 0, 0, 0, 1], [0, 0, 0, 1, 0]], mindspore.float32) + >>> softmax_cross = P.SoftmaxCrossEntropyWithLogits() + >>> loss, backprop = softmax_cross(logits, labels) + ([0.5899297, 0.52374405], [[0.02760027, 0.20393994, 0.01015357, 0.20393994, -0.44563377], + [0.08015892, 0.02948882, 0.08015892, -0.4077012, 0.21789455]]) """ @prim_attr_register @@ -1283,6 +1367,9 @@ class SparseSoftmaxCrossEntropyWithLogits(PrimitiveWithInfer): Outputs: Tensor, if `is_grad` is False, the output tensor is the value of loss which is a scalar tensor; if `is_grad` is True, the output tensor is the gradient of input with the same shape as `logits`. + + Examples: + Please refer to the usage in nn.SoftmaxCrossEntropyWithLogits source code. """ @prim_attr_register @@ -1340,8 +1427,11 @@ class ApplyMomentum(PrimitiveWithInfer): def __init__(self, use_nesterov=False, use_locking=False, gradient_scale=1.0): self.init_prim_io_names(inputs=['variable', 'accumulation', 'learning_rate', 'gradient', 'momentum'], outputs=['output']) + self.is_tbe = context.get_context("device_target") == "Ascend" def infer_shape(self, v_shape, a_shape, l_shape, g_shape, m_shape): + if self.is_tbe: + return v_shape, v_shape return v_shape def infer_dtype(self, v_dtype, a_dtype, l_dtype, g_dtype, m_dtype): @@ -1352,6 +1442,8 @@ class ApplyMomentum(PrimitiveWithInfer): validator.check_scalar_or_tensor_type_same({"l_dtype": l_dtype}, valid_types, self.name) validator.check_scalar_or_tensor_type_same({"g_dtype": g_dtype}, valid_types, self.name) validator.check_scalar_or_tensor_type_same({"m_dtype": m_dtype}, valid_types, self.name) + if self.is_tbe: + return g_dtype, g_dtype return g_dtype @@ -1379,6 +1471,13 @@ class SmoothL1Loss(PrimitiveWithInfer): Outputs: Tensor, with the same type and shape as `prediction`. + + Examples: + >>> loss = P.SmoothL1Loss() + >>> input_data = Tensor(np.array([1, 2, 3]), mindspore.float32) + >>> target_data = Tensor(np.array([1, 2, 2]), mindspore.float32) + >>> loss(input_data, target_data) + [0, 0, 0.5] """ @prim_attr_register @@ -1616,9 +1715,11 @@ class ApplyCenteredRMSProp(PrimitiveWithInfer): "mean_square": mean_square_dtype, "moment": moment_dtype, "grad": grad_dtype} validator.check_tensor_type_same(args, mstype.number_type, self.name) - args = {"learning_rate": learning_rate_dtype, "rho": rho_dtype, 'momentum': momentum_dtype, - "epsilon": epsilon_dtype} - validator.check_scalar_or_tensor_type_same(args, [mstype.float16, mstype.float32], self.name) + valid_types = [mstype.float16, mstype.float32] + args_rho = {"rho": rho_dtype, 'momentum': momentum_dtype, "epsilon": epsilon_dtype} + validator.check_type_same(args_rho, valid_types, self.name) + args_lr = {"learning_rate": learning_rate_dtype, "rho": rho_dtype} + validator.check_scalar_or_tensor_type_same(args_lr, valid_types, self.name, allow_mix=True) return var_dtype @@ -1630,7 +1731,7 @@ class LayerNorm(Primitive): `Layer Normalization `_. .. math:: - y = \frac{x - mean}{\sqrt{variance + \epsilon}} * \gamma + \beta + y = \frac{x - mean]}{\sqrt{variance + \epsilon}} * \gamma + \beta where :math:`\gamma` is scale, :math:`\beta` is bias, :math:`\epsilon` is epsilon. @@ -1655,6 +1756,15 @@ class LayerNorm(Primitive): The shape is :math:`(N, C)`. - **updated_gamma** (Tensor) - Tensor of shape :math:`(C,)`. - **updated_beta** (Tensor) - Tensor of shape :math:`(C,)`. + + Examples: + >>> input_x = Tensor(np.array([[1, 2, 3], [1, 2, 3]]), mindspore.float32) + >>> gamma = Tensor(np.ones([3]), mindspore.float32) + >>> beta = Tensor(np.ones([3]), mindspore.float32) + >>> layer_norm = P.LayerNorm() + >>> output = layer_norm(input_x, gamma, beta) + ([[-0.22474492, 1., 2.2247488], [-0.22474492, 1., 2.2247488]], + [[2.], [2.]], [[0.6666667], [0.6666667]]) """ @prim_attr_register @@ -1981,6 +2091,9 @@ class PReLU(PrimitiveWithInfer): where :math:`x_i` is an element of an channel of the input. + Note: + 1-dimensional input_x is not supported. + Inputs: - **input_x** (Tensor) - Float tensor, representing the output of the preview layer. - **weight** (Tensor) - Float Tensor, w > 0, there is only two shapes are legitimate, @@ -2000,14 +2113,13 @@ class PReLU(PrimitiveWithInfer): input_x_dim = len(input_x_shape) weight_dim = len(weight_shape) + if input_x_dim == 1: + raise ValueError(f'For \'{self.name}\' input_x rank 1 is not supported.') + if weight_dim != 1: raise ValueError(f'For \'{self.name}\' weight_dim must be 1, while weight_dim is {weight_dim}.') - if input_x_dim == 1 and weight_shape[0] != 1: - raise ValueError(f'For \'{self.name}\' when input_x_dim is 1, weight_shape[0] must be 1, ' - f'while weight_shape[0] is {weight_shape[0]}.') - - if input_x_dim != 1 and weight_shape[0] != input_x_shape[1] and weight_shape[0] != 1: + if weight_shape[0] != input_x_shape[1] and weight_shape[0] != 1: raise ValueError(f'For \'{self.name}\' channel of input_x and weight must be matched,' f' while channel of input_x is {input_x_shape[1]},' f' weight_shape[0] is {weight_shape[0]}.') @@ -2015,8 +2127,9 @@ class PReLU(PrimitiveWithInfer): return input_x_shape def infer_dtype(self, input_x_dtype, weight_dtype): - args = {"input_x": input_x_dtype, "weight": weight_dtype} - validator.check_tensor_type_same(args, (mstype.float16, mstype.float32), self.name) + valid_types = (mstype.float16, mstype.float32) + validator.check_tensor_type_same({"input_x": input_x_dtype}, valid_types, self.name) + validator.check_tensor_type_same({"weight": weight_dtype}, valid_types, self.name) return input_x_dtype @@ -2063,7 +2176,7 @@ class LSTM(PrimitiveWithInfer): return (y_shape, h_shape, c_shape, reserved_shape, state_shape) def infer_dtype(self, x_dtype, h_dtype, c_dtype, w_dtype): - args = {'x': x_dtype, 'h': h_dtype, 'c': c_dtype, 'w': w_dtype} + args = {'x': x_dtype, 'h': h_dtype, 'c': c_dtype, 'w': w_dtype} validator.check_tensor_type_same(args, (mstype.float32, mstype.float16), self.name) return (x_dtype, x_dtype, x_dtype, x_dtype, x_dtype) @@ -2087,6 +2200,12 @@ class SigmoidCrossEntropyWithLogits(PrimitiveWithInfer): Outputs: Tensor, with the same shape and type as input `logits`. + + Examples: + >>> logits = Tensor(np.random.randn(2, 3).astype(np.float16)) + >>> labels = Tensor(np.random.randn(2, 3).astype(np.float16)) + >>> sigmoid = P.SigmoidCrossEntropyWithLogits() + >>> sigmoid(logits, labels) """ @prim_attr_register @@ -2312,11 +2431,13 @@ class Adam(PrimitiveWithInfer): Inputs: - **var** (Tensor) - Weights to be updated. - - **m** (Tensor) - The 1st moment vector in the updating formula. + - **m** (Tensor) - The 1st moment vector in the updating formula. Has the same type as `var`. - **v** (Tensor) - the 2nd moment vector in the updating formula. + Mean square gradients, has the same type as `var`. - **beta1_power** (float) - :math:`beta_1^t` in the updating formula. - **beta2_power** (float) - :math:`beta_2^t` in the updating formula. - - **lr** (float) - :math:`l` in the updating formula. + - **lr** (Union[float, Tensor, Iterable]) - :math:`l` in the updating formula. + Iterable type is used for the dynamic learning rate. - **beta1** (float) - The exponential decay rate for the 1st moment estimates. - **beta2** (float) - The exponential decay rate for the 2nd moment estimates. - **epsilon** (float) - Term added to the denominator to improve numerical stability. @@ -2328,6 +2449,9 @@ class Adam(PrimitiveWithInfer): - **var** (Tensor) - The same shape and data type as `var`. - **m** (Tensor) - The same shape and data type as `m`. - **v** (Tensor) - The same shape and data type as `v`. + + Examples: + Please refer to the usage in nn.Adam. """ @prim_attr_register @@ -2435,10 +2559,15 @@ class SparseApplyAdagrad(PrimitiveWithInfer): The shape of `indices` must be the same as `grad` in first dimension, the type must be int32. Outputs: - Tuple of 2 Tensor, the updated parameters. + Tensor, has the same shape and type as `var`. - - **var** (Tensor) - The same shape and data type as `var`. - - **accum** (Tensor) - The same shape and data type as `accum`. + Examples: + >>> var = Tensor(np.random.random((3, 3)), mindspore.float32) + >>> accum = Tensor(np.random.random((3, 3)), mindspore.float32) + >>> grad = Tensor(np.random.random((3, 3)), mindspore.float32) + >>> indices = Tensor(np.ones((3,), np.int32)) + >>> sparse_apply_ada_grad = P.SparseApplyAdagrad(0.5) + >>> sparse_apply_ada_grad(var, accum, grad, indices) """ @prim_attr_register @@ -2481,6 +2610,27 @@ class LARSUpdate(PrimitiveWithInfer): Outputs: Tensor, representing the new gradient. + + Examples: + >>> from mindspore import Tensor + >>> from mindspore.ops import operations as P + >>> from mindspore.ops import functional as F + >>> import mindspore.nn as nn + >>> import numpy as np + >>> class Net(nn.Cell): + >>> def __init__(self): + >>> super(Net, self).__init__() + >>> self.lars = P.LARSUpdate() + >>> self.reduce = P.ReduceSum() + >>> def construct(self, weight, gradient): + >>> w_square_sum = self.reduce(F.square(weight)) + >>> grad_square_sum = self.reduce(F.square(gradient)) + >>> grad_t = self.lars(weight, gradient, w_square_sum, grad_square_sum, 0.0, 1.0) + >>> return grad_t + >>> weight = np.random.random(size=(2, 3)).astype(np.float32) + >>> gradient = np.random.random(size=(2, 3)).astype(np.float32) + >>> net = Net() + >>> ms_output = net(Tensor(weight), Tensor(gradient)) """ @prim_attr_register @@ -2566,82 +2716,6 @@ class ApplyFtrl(PrimitiveWithInfer): return var_type -class ExtractImagePatches(PrimitiveWithInfer): - """ - Extract patches from images. - The input tensor must be a 4-D tensor and the data format is NHWC. - - Args: - ksizes (Union[tuple[int], list[int]]): The size of sliding window, should be a tuple or list of int, - and the format is [1, ksize_row, ksize_col, 1]. - strides (Union[tuple[int], list[int]]): Distance between the centers of the two consecutive patches, - should be a tuple or list of int, and the format is [1, stride_row, stride_col, 1]. - rates (Union[tuple[int], list[int]]): In each extracted patch, the gap between the corresponding dim - pixel positions, should be a tuple or list of int, and the format is [1, rate_row, rate_col, 1]. - padding (str): The type of padding algorithm, is a string whose value is "same" or "valid", - not case sensitive. Default: "valid". - - - same: Means that the patch can take the part beyond the original image, and this part is filled with 0. - - - valid: Means that the patch area taken must be completely contained in the original image. - - Inputs: - - **input_x** (Tensor) - A 4-D tensor whose shape is [in_batch, in_row, in_col, in_depth] and - data type is int8, float16, uint8. - - Outputs: - Tensor, a 4-D tensor whose data type is same as 'input_x', - and the shape is [out_batch, out_row, out_col, out_depth], the out_batch is same as the in_batch. - """ - - @prim_attr_register - def __init__(self, ksizes, strides, rates, padding="valid"): - """init""" - def _check_tuple_or_list(arg_name, arg_val, prim_name): - validator.check_value_type(f"{arg_name}s", ksizes, [tuple, list], self.name) - if len(arg_val) != 4 or arg_val[0] != 1 or arg_val[3] != 1: - raise ValueError(f"For \'{prim_name}\' the format of {arg_name}s should be [1, {arg_name}_row, " - f"{arg_name}_col, 1], but got {arg_val}.") - if not isinstance(arg_val[1], int) or not isinstance(arg_val[2], int) or arg_val[1] < 1 or arg_val[2] < 1: - raise ValueError(f"For '{prim_name}' the {arg_name}_row and {arg_name}_col in {arg_name}s should be an " - f"positive integer number, but got {arg_name}_row is {arg_val[1]}, {arg_name}_col " - f"is {arg_val[2]}") - - _check_tuple_or_list("ksize", ksizes, self.name) - _check_tuple_or_list("stride", strides, self.name) - _check_tuple_or_list("rate", rates, self.name) - self.padding = validator.check_string('padding', padding.upper(), ['VALID', 'SAME'], self.name) - self.add_prim_attr("padding", self.padding) - - def infer_shape(self, input_x): - in_batch, in_row, in_col, in_depth = input_x - _, ksize_row, ksize_col, _ = self.ksizes - _, stride_row, stride_col, _ = self.strides - _, rate_row, rate_col, _ = self.rates - if len(input_x) != 4: - raise ValueError("The `input_x` should be a 4-D tensor, " - f"but got a {len(input_x)}-D tensor whose shape is {input_x}") - - out_batch = in_batch - out_depth = ksize_row * ksize_col * in_depth - - if self.padding == "VALID": - out_row = \ - (in_row - (ksize_row + (ksize_row - 1) * (rate_row - 1))) // stride_row + 1 - out_col = \ - (in_col - (ksize_col + (ksize_col - 1) * (rate_col - 1))) // stride_col + 1 - else: - out_row = (in_row - 1) // stride_row + 1 - out_col = (in_col - 1) // stride_col + 1 - - out_shape = [out_batch, out_row, out_col, out_depth] - return out_shape - - def infer_dtype(self, input_x): - validator.check_tensor_type_same({"input_x": input_x}, (mstype.int8, mstype.float16, mstype.float32), self.name) - return input_x - - class ConfusionMulGrad(PrimitiveWithInfer): """ `output0` is the result of which input0 dot multily input1. @@ -2673,8 +2747,8 @@ class ConfusionMulGrad(PrimitiveWithInfer): """ @prim_attr_register - def __init__(self, axis = (), keep_dims = False): - self.init_prim_io_names(inputs = ["input0", "input1", "input2"], outputs = ["output0", "output1"]) + def __init__(self, axis=(), keep_dims=False): + self.init_prim_io_names(inputs=["input0", "input1", "input2"], outputs=["output0", "output1"]) self.axis_ = validator.check_value_type("axis", axis, [int, tuple, list], self.name) self.keep_dims_ = validator.check_value_type("keep_dims", keep_dims, [bool], self.name) diff --git a/mindspore/ops/operations/other_ops.py b/mindspore/ops/operations/other_ops.py index 12a8a2cfde..f2c0fccca9 100644 --- a/mindspore/ops/operations/other_ops.py +++ b/mindspore/ops/operations/other_ops.py @@ -76,8 +76,13 @@ class BoundingBoxEncode(PrimitiveWithInfer): Tensor, encoded bounding boxes. Examples: + >>> anchor_box = Tensor([[4,1,2,1],[2,2,2,3]],mindspore.float32) + >>> groundtruth_box = Tensor([[3,1,2,2],[1,2,1,4]],mindspore.float32) >>> boundingbox_encode = P.BoundingBoxEncode(means=(0.0, 0.0, 0.0, 0.0), stds=(1.0, 1.0, 1.0, 1.0)) - >>> delta_box = boundingbox_encode(anchor_box, groundtruth_box) + >>> boundingbox_encode(anchor_box, groundtruth_box) + [[5.0000000e-01 5.0000000e-01 -6.5504000e+04 6.9335938e-01] + [-1.0000000e+00 2.5000000e-01 0.0000000e+00 4.0551758e-01]] + """ @prim_attr_register @@ -118,9 +123,14 @@ class BoundingBoxDecode(PrimitiveWithInfer): Tensor, decoded boxes. Examples: + >>> anchor_box = Tensor([[4,1,2,1],[2,2,2,3]],mindspore.float32) + >>> deltas = Tensor([[3,1,2,2],[1,2,1,4]],mindspore.float32) >>> boundingbox_decode = P.BoundingBoxDecode(means=(0.0, 0.0, 0.0, 0.0), stds=(1.0, 1.0, 1.0, 1.0), >>> max_shape=(768, 1280), wh_ratio_clip=0.016) - >>> bbox = boundingbox_decode(anchor_box, deltas) + >>> boundingbox_decode(anchor_box, deltas) + [[4.1953125 0. 0. 5.1953125] + [2.140625 0. 3.859375 60.59375]] + """ @prim_attr_register @@ -269,3 +279,66 @@ class MakeRefKey(Primitive): def __call__(self): pass + + +class CheckBprop(PrimitiveWithInfer): + """ + Checks whether data type and shape of corresponding element from tuple x and y are the same. + + Raises: + TypeError: If not the same. + + Inputs: + - **input_x** (tuple[Tensor]) - The input_x contains the outputs of bprop to be checked. + - **input_y** (tuple[Tensor]) - The input_y contains the inputs of bprop to check against. + + Outputs: + (tuple[Tensor]), the input_x, + if data type and shape of corresponding elements from `input_x` and `input_y` are the same. + + Examples: + >>> input_x = (Tensor(np.array([[2, 2], [2, 2]]), mindspore.float32),) + >>> input_y = (Tensor(np.array([[2, 2], [2, 2]]), mindspore.float32),) + >>> out = P.CheckBprop()(input_x, input_y) + """ + + @prim_attr_register + def __init__(self): + """init CheckBprop""" + + def infer_shape(self, xshapes, yshapes): + tips = f'Bprop of {self.prim_to_check}' + if len(xshapes) < len(yshapes): + raise TypeError(f"{tips}, the size of output should be {len(yshapes)}," + f" but got {len(xshapes)}.") + checking_range = len(yshapes) + for i in range(checking_range): + xshape = xshapes[i] + yshape = yshapes[i] + if not xshape or not yshape: + continue + if xshape != yshape: + raise TypeError(f"{tips}, the shape of {i}th output should be {yshape}," + f" but got {xshape}.") + return xshapes + + def infer_dtype(self, xdtypes, ydtypes): + tips = f'Bprop of {self.prim_to_check}' + if len(xdtypes) < len(ydtypes): + raise TypeError(f"{tips}, the size of output should be {len(ydtypes)}," + f" but got {len(xdtypes)}.") + checking_range = len(ydtypes) + for i in range(checking_range): + xdtype = xdtypes[i] + ydtype = ydtypes[i] + if isinstance(xdtype, mstype.anything_type) or isinstance(ydtype, mstype.anything_type): + continue + if isinstance(ydtype, mstype.function_type): + if not isinstance(xdtype, mstype.env_type_type): + raise TypeError(f"{tips}, the dtype of {i}th output should be {mstype.env_type_type}," + f" but got {xdtype}.") + continue + if xdtype != ydtype: + raise TypeError(f"{tips}, the dtype of {i}th output should be {ydtype}," + f" but got {xdtype}.") + return xdtypes diff --git a/mindspore/ops/primitive.py b/mindspore/ops/primitive.py index d281b4f76c..78e8778c52 100644 --- a/mindspore/ops/primitive.py +++ b/mindspore/ops/primitive.py @@ -88,6 +88,8 @@ class Primitive(Primitive_): for name in self.attrs: value = self.attrs[name] cloned.add_prim_attr(name, value) + if hasattr(self, 'instance_name'): + cloned.set_prim_instance_name(self.instance_name) return cloned def add_prim_attr(self, name, value): @@ -327,6 +329,10 @@ def _run_op(obj, op_name, args): if hasattr(arg, '__parameter__'): op_inputs.append(arg.default_input) op_mask[i] = 1 + elif isinstance(arg, tuple): + convert = lambda x: x.default_input if hasattr(x, '__parameter__') else x + args_ = tuple(convert(x) for x in arg) + op_inputs.append(args_) else: op_inputs.append(arg) output = real_run_op(obj, op_name, tuple(op_inputs), tuple(op_mask)) diff --git a/mindspore/parallel/_auto_parallel_context.py b/mindspore/parallel/_auto_parallel_context.py index bf4b99085e..f3f8d443e9 100644 --- a/mindspore/parallel/_auto_parallel_context.py +++ b/mindspore/parallel/_auto_parallel_context.py @@ -208,6 +208,36 @@ class _AutoParallelContext: self.check_context_handle() return self._context_handle.get_parameter_broadcast() + def set_strategy_ckpt_load_file(self, strategy_ckpt_load_file): + """ + Set strategy checkpoint load path. + + Args: + strategy_ckpt_load_file (bool): Path to load parallel strategy checkpoint. + """ + self.check_context_handle() + self._context_handle.set_strategy_ckpt_load_file(strategy_ckpt_load_file) + + def get_strategy_ckpt_load_file(self): + """Get strategy checkpoint load path.""" + self.check_context_handle() + return self._context_handle.get_strategy_ckpt_load_file() + + def set_strategy_ckpt_save_file(self, strategy_ckpt_save_file): + """ + Set strategy checkpoint save path. + + Args: + strategy_ckpt_save_file (bool): Path to save parallel strategy checkpoint. + """ + self.check_context_handle() + self._context_handle.set_strategy_ckpt_save_file(strategy_ckpt_save_file) + + def get_strategy_ckpt_save_file(self): + """Get strategy checkpoint save path.""" + self.check_context_handle() + return self._context_handle.get_strategy_ckpt_save_file() + def get_parameter_broadcast_is_set(self): """Get parameter broadcast is set or not.""" self.check_context_handle() @@ -259,6 +289,23 @@ class _AutoParallelContext: self.check_context_handle() return self._context_handle.get_all_reduce_fusion_split_sizes() + def set_enable_all_reduce_fusion(self, enable_all_reduce_fusion): + """ + Set enable/disable all reduce fusion. + + Args: + enable_all_reduce_fusion (bool): Enable/disable all reduce fusion. + """ + self.check_context_handle() + if not isinstance(enable_all_reduce_fusion, bool): + raise TypeError('enable_all_reduce_fusion is invalid type') + self._context_handle.set_enable_all_reduce_fusion(enable_all_reduce_fusion) + + def get_enable_all_reduce_fusion(self): + """Get all reduce fusion flag.""" + self.check_context_handle() + return self._context_handle.get_enable_all_reduce_fusion() + def get_device_num_is_set(self): """Get device number is set or not.""" self.check_context_handle() @@ -298,7 +345,9 @@ _set_auto_parallel_context_func_map = { "cast_before_mirror": auto_parallel_context().set_cast_before_mirror, "loss_repeated_mean": auto_parallel_context().set_loss_repeated_mean, "parallel_mode": auto_parallel_context().set_parallel_mode, - "parameter_broadcast": auto_parallel_context().set_parameter_broadcast} + "parameter_broadcast": auto_parallel_context().set_parameter_broadcast, + "strategy_ckpt_load_file": auto_parallel_context().set_strategy_ckpt_load_file, + "strategy_ckpt_save_file": auto_parallel_context().set_strategy_ckpt_save_file} _get_auto_parallel_context_func_map = { @@ -308,11 +357,14 @@ _get_auto_parallel_context_func_map = { "cast_before_mirror": auto_parallel_context().get_cast_before_mirror, "loss_repeated_mean": auto_parallel_context().get_loss_repeated_mean, "parallel_mode": auto_parallel_context().get_parallel_mode, - "parameter_broadcast": auto_parallel_context().get_parameter_broadcast} + "parameter_broadcast": auto_parallel_context().get_parameter_broadcast, + "strategy_ckpt_load_file": auto_parallel_context().get_strategy_ckpt_load_file, + "strategy_ckpt_save_file": auto_parallel_context().get_strategy_ckpt_save_file} @args_type_check(device_num=int, global_rank=int, mirror_mean=bool, cast_before_mirror=bool, - loss_repeated_mean=bool, parallel_mode=str, parameter_broadcast=bool) + loss_repeated_mean=bool, parallel_mode=str, parameter_broadcast=bool, + strategy_ckpt_load_file=str, strategy_ckpt_save_file=str) def _set_auto_parallel_context(**kwargs): """ Set auto parallel context. @@ -343,6 +395,8 @@ def _set_auto_parallel_context(**kwargs): parameter_broadcast (bool): Indicating whether to broadcast parameters before training. "stand_alone", "semi_auto_parallel" and "auto_parallel" do not support parameter broadcast. Default: False. + strategy_ckpt_load_file (str): The path to load parallel strategy checkpoint. Default: '' + strategy_ckpt_save_file (str): The path to save parallel strategy checkpoint. Default: '' Raises: ValueError: If input key is not attribute in auto parallel context. @@ -383,5 +437,7 @@ def _reset_auto_parallel_context(): - cast_before_mirror: True. - parallel_mode: "stand_alone". - parameter_broadcast: False. + - strategy_ckpt_load_file: "" + - strategy_ckpt_save_file: "" """ auto_parallel_context().reset() diff --git a/mindspore/parallel/_cost_model_context.py b/mindspore/parallel/_cost_model_context.py index 54cca5516b..2790aed855 100644 --- a/mindspore/parallel/_cost_model_context.py +++ b/mindspore/parallel/_cost_model_context.py @@ -214,6 +214,31 @@ class _CostModelContext: raise ValueError("Context handle is none in context!!!") return self._context_handle.get_costmodel_communi_bias() + def set_multi_subgraphs(self, multi_subgraph): + """ + Set the flag of ANF graph containing multiple subgraphs. + + Args: + multi_subgraph (bool): A parameter used in marking the multi-subgraphs flag. + + Raises: + ValueError: If context handle is none. + """ + if self._context_handle is None: + raise ValueError("Context handle is none in context!!!") + self._context_handle.set_multi_subgraphs(multi_subgraph) + + def get_multi_subgraphs(self): + """ + Get the flag of ANF graph containing multiple subgraphs. + + Raises: + ValueError: If context handle is none. + """ + if self._context_handle is None: + raise ValueError("Context handle is none in context!!!") + return self._context_handle.get_multi_subgraphs() + def set_costmodel_allreduce_fusion_algorithm(self, algorithm): """ Set costmodel allreduce fusion algorithm. @@ -427,6 +452,7 @@ set_cost_model_context_func_map = { "costmodel_communi_threshold": cost_model_context().set_costmodel_communi_threshold, "costmodel_communi_const": cost_model_context().set_costmodel_communi_const, "costmodel_communi_bias": cost_model_context().set_costmodel_communi_bias, + "multi_subgraphs": cost_model_context().set_multi_subgraphs, "costmodel_allreduce_fusion_algorithm": cost_model_context().set_costmodel_allreduce_fusion_algorithm, "costmodel_allreduce_fusion_times": cost_model_context().set_costmodel_allreduce_fusion_times, "costmodel_allreduce_fusion_tail_percent": cost_model_context().set_costmodel_allreduce_fusion_tail_percent, @@ -447,6 +473,7 @@ get_cost_model_context_func_map = { "costmodel_communi_threshold": cost_model_context().get_costmodel_communi_threshold, "costmodel_communi_const": cost_model_context().get_costmodel_communi_const, "costmodel_communi_bias": cost_model_context().get_costmodel_communi_bias, + "multi_subgraphs": cost_model_context().get_multi_subgraphs(), "costmodel_allreduce_fusion_algorithm": cost_model_context().get_costmodel_allreduce_fusion_algorithm, "costmodel_allreduce_fusion_times": cost_model_context().get_costmodel_allreduce_fusion_times, "costmodel_allreduce_fusion_tail_percent": cost_model_context().get_costmodel_allreduce_fusion_tail_percent, @@ -461,6 +488,7 @@ get_cost_model_context_func_map = { @args_type_check(device_memory_capacity=float, costmodel_alpha=float, costmodel_beta=float, costmodel_gamma=float, costmodel_communi_threshold=float, costmodel_communi_const=float, costmodel_communi_bias=float, + multi_subgraphs=bool, costmodel_allreduce_fusion_algorithm=int, costmodel_allreduce_fusion_times=int, costmodel_allreduce_fusion_tail_percent=float, costmodel_allreduce_fusion_tail_time=float, costmodel_allreduce_fusion_allreduce_inherent_time=float, @@ -481,6 +509,7 @@ def set_cost_model_context(**kwargs): costmodel_communi_threshold (float): A parameter used in adjusting communication calculation for practice. costmodel_communi_const (float): A parameter used in adjusting communication calculation for practice. costmodel_communi_bias (float): A parameter used in adjusting communication calculation for practice. + multi_subgraphs (bool): A parameter used in marking the flag of ANF graph containing multiple subgraphs. costmodel_allreduce_fusion_algorithm (int): The allreduce fusion algorithm. 0: bypass allreduce fusion; 1: only use backward computation time to group allreduce; diff --git a/mindspore/parallel/_utils.py b/mindspore/parallel/_utils.py index 3ce5463edf..cb3a0c0ac7 100644 --- a/mindspore/parallel/_utils.py +++ b/mindspore/parallel/_utils.py @@ -117,6 +117,7 @@ _cast_before_mirror = None _loss_repeated_mean = None _communication_backend = None _has_checkpointed = False +_enable_all_reduce_fusion = None def _checkpoint_auto_parallel_context(): @@ -133,6 +134,7 @@ def _checkpoint_auto_parallel_context(): global _cast_before_mirror global _loss_repeated_mean global _communication_backend + global _enable_all_reduce_fusion _parallel_mode = auto_parallel_context().get_parallel_mode() _device_num = _get_device_num() _global_rank = _get_global_rank() @@ -141,6 +143,7 @@ def _checkpoint_auto_parallel_context(): _cast_before_mirror = auto_parallel_context().get_cast_before_mirror() _loss_repeated_mean = auto_parallel_context().get_loss_repeated_mean() _communication_backend = auto_parallel_context().get_communication_backend() + _enable_all_reduce_fusion = auto_parallel_context().get_enable_all_reduce_fusion() _has_checkpointed = True @@ -154,10 +157,12 @@ def _restore_auto_parallel_context(): global _cast_before_mirror global _loss_repeated_mean global _communication_backend + global _enable_all_reduce_fusion _set_auto_parallel_context(parallel_mode=_parallel_mode, device_num=_device_num, global_rank=_global_rank, parameter_broadcast=_parameter_broadcast, mirror_mean=_mirror_mean, cast_before_mirror=_cast_before_mirror, loss_repeated_mean=_loss_repeated_mean) auto_parallel_context().set_communication_backend(_communication_backend) + auto_parallel_context().set_enable_all_reduce_fusion(_enable_all_reduce_fusion) def _reset_checkpoint_auto_parallel_context(): diff --git a/mindspore/parallel/algo_parameter_config.py b/mindspore/parallel/algo_parameter_config.py index 244156da33..5c13c13153 100644 --- a/mindspore/parallel/algo_parameter_config.py +++ b/mindspore/parallel/algo_parameter_config.py @@ -45,14 +45,6 @@ class _AlgoParameterConfig(): if self._config_handle is None: raise ValueError("Config handle is none!!!") - def set_simplify_cal(self, simplify_cal): - self.check_config_handle() - self._config_handle.set_simplify_cal(simplify_cal) - - def get_simplify_cal(self): - self.check_config_handle() - return self._config_handle.get_simplify_cal() - def set_fully_use_devices(self, not_fully): self.check_config_handle() self._config_handle.set_fully_use_devices(not_fully) @@ -118,7 +110,6 @@ def _algo_parameter_config(): set_algo_parameters_config_func_map = { - "simplify_cal": _algo_parameter_config().set_simplify_cal, "fully_use_devices": _algo_parameter_config().set_fully_use_devices, "elementwise_op_strategy_follow": _algo_parameter_config().set_elementwise_op_strategy_follow, "tensor_slice_align_enable": _algo_parameter_config().set_tensor_slice_align_enable, @@ -126,14 +117,13 @@ set_algo_parameters_config_func_map = { get_algo_parameters_config_func_map = { - "simplify_cal": _algo_parameter_config().get_simplify_cal, "fully_use_devices": _algo_parameter_config().get_fully_use_devices, "elementwise_op_strategy_follow": _algo_parameter_config().get_elementwise_op_strategy_follow, "tensor_slice_align_enable": _algo_parameter_config().get_tensor_slice_align_enable, "tensor_slice_align_size": _algo_parameter_config().get_tensor_slice_align_size} -@args_type_check(simplify_cal=bool, tensor_slice_align_enable=bool, tensor_slice_align_size=int, +@args_type_check(tensor_slice_align_enable=bool, tensor_slice_align_size=int, fully_use_devices=bool, elementwise_op_strategy_follow=bool) def set_algo_parameters(**kwargs): """ @@ -143,10 +133,10 @@ def set_algo_parameters(**kwargs): Attribute name is needed. Args: - simplify_cal (bool): Whether simplifying calculations in strategy-searching algorithm. Default: True - tensor_slice_align_enable (bool): Whether checking tensor slice shape. Default: False - tensor_slice_align_size (int): The minimum tensor slice shape, the value must be in [1, 1024]. Default: 16 - fully_use_devices (bool): Whether generating strategies that fully use all available devices. Default: True + tensor_slice_align_enable (bool): Whether checking tensor slice shape for MatMul. Default: False + tensor_slice_align_size (int): The minimum tensor slice shape of MatMul, the value must be in [1, 1024]. + Default: 16 + fully_use_devices (bool): Whether ONLY generating strategies that fully use all available devices. Default: True elementwise_op_strategy_follow (bool): Whether the elementwise operator have the same strategies as its subsequent operators. Default: False diff --git a/mindspore/train/amp.py b/mindspore/train/amp.py index 917b4c3359..2e758b0e9d 100644 --- a/mindspore/train/amp.py +++ b/mindspore/train/amp.py @@ -41,6 +41,7 @@ class OutputTo16(nn.Cell): def _do_keep_batchnorm_fp32(network): + """Do keep batchnorm fp32.""" cells = network.name_cells() change = False for name in cells: @@ -68,6 +69,7 @@ _config_level = { def _check_kwargs(key_words): + """Check kwargs.""" for arg in key_words: if arg not in ['cast_model_type', 'keep_batchnorm_fp32', 'loss_scale_manager']: raise ValueError(f"Unsupported arg '{arg}'") @@ -84,6 +86,7 @@ def _check_kwargs(key_words): def _add_loss_network(network, loss_fn, cast_model_type): + """Add loss network.""" class WithLossCell(nn.Cell): "Wrap loss for amp. Cast network output back to float32" diff --git a/mindspore/train/callback.py b/mindspore/train/callback.py index b9635acc62..50fe5fcb26 100644 --- a/mindspore/train/callback.py +++ b/mindspore/train/callback.py @@ -150,8 +150,8 @@ class CheckpointConfig: keep_checkpoint_max (int): Maximum step to save checkpoint. Default: 5. keep_checkpoint_per_n_minutes (int): Keep one checkpoint every n minutes. Default: 0. Can't be used with keep_checkpoint_max at the same time. - integrated_save (bool): Whether to intergrated save in automatic model parall scene. Default: True. - Integrated save function is only supported in automatic parall scene, not supported in manual parallel. + integrated_save (bool): Whether to intergrated save in automatic model parallel scene. Default: True. + Integrated save function is only supported in automatic parallel scene, not supported in manual parallel. Raises: ValueError: If the input_param is None or 0. @@ -365,7 +365,7 @@ class Callback: >>> print(cb_params.cur_step_num) >>> >>> print_cb = Print_info() - >>> model.train(epoch, dataset, callback=print_cb) + >>> model.train(epoch, dataset, callbacks=print_cb) """ def __init__(self): pass @@ -683,22 +683,15 @@ class LossMonitor(Callback): class TimeMonitor(Callback): + """Time Monitor.""" def __init__(self, data_size): super(TimeMonitor, self).__init__() self.data_size = data_size def epoch_begin(self, run_context): self.epoch_time = time.time() - + def epoch_end(self, run_context): epoch_mseconds = (time.time() - self.epoch_time) * 1000 per_step_mseconds = epoch_mseconds / self.data_size print("epoch time: {0}, per step time: {1}".format(epoch_mseconds, per_step_mseconds), flush=True) - - def step_begin(self, run_context): - self.step_time = time.time() - - def step_end(self, run_context): - step_mseconds = (time.time() - self.step_time) * 1000 - print('step time', step_mseconds, flush=True) - diff --git a/mindspore/train/model.py b/mindspore/train/model.py index 3fccb7aa2b..c943252e43 100755 --- a/mindspore/train/model.py +++ b/mindspore/train/model.py @@ -99,12 +99,8 @@ class Model: self._loss_scale_manager_set = False self._keep_bn_fp32 = True self._check_kwargs(kwargs) - if 'keep_batchnorm_fp32' in kwargs: - self._keep_bn_fp32 = kwargs['keep_batchnorm_fp32'] - if 'loss_scale_manager' in kwargs: - self._loss_scale_manager = kwargs['loss_scale_manager'] - self._loss_scale_manager_set = True self._amp_level = amp_level + self._process_amp_args(kwargs) self._parallel_mode = _get_parallel_mode() self._device_number = _get_device_num() self._global_rank = _get_global_rank() @@ -114,10 +110,19 @@ class Model: self._build_eval_network(metrics, eval_network, eval_indexes) self._build_predict_network() + def _process_amp_args(self, kwargs): + if self._amp_level == "O0": + self._keep_bn_fp32 = False + if 'keep_batchnorm_fp32' in kwargs: + self._keep_bn_fp32 = kwargs['keep_batchnorm_fp32'] + if 'loss_scale_manager' in kwargs: + self._loss_scale_manager = kwargs['loss_scale_manager'] + self._loss_scale_manager_set = True + def _check_kwargs(self, kwargs): for arg in kwargs: if arg not in ['loss_scale_manager', 'keep_batchnorm_fp32']: - raise ValueError(f"Unsupport arg '{arg}'") + raise ValueError(f"Unsupport arg '{arg}'") def _build_train_network(self): """Build train network""" @@ -125,17 +130,17 @@ class Model: if self._optimizer: if self._loss_scale_manager_set: network = amp.build_train_network(network, - self._optimizer, - self._loss_fn, - level=self._amp_level, - loss_scale_manager=self._loss_scale_manager, - keep_batchnorm_fp32=self._keep_bn_fp32) + self._optimizer, + self._loss_fn, + level=self._amp_level, + loss_scale_manager=self._loss_scale_manager, + keep_batchnorm_fp32=self._keep_bn_fp32) else: network = amp.build_train_network(network, - self._optimizer, - self._loss_fn, - level=self._amp_level, - keep_batchnorm_fp32=self._keep_bn_fp32) + self._optimizer, + self._loss_fn, + level=self._amp_level, + keep_batchnorm_fp32=self._keep_bn_fp32) elif self._loss_fn: network = nn.WithLossCell(network, self._loss_fn) # If need to check if loss_fn is not None, but optimizer is None @@ -157,7 +162,7 @@ class Model: else: if self._loss_fn is None: raise ValueError("loss_fn can not be None.") - self._eval_network = nn.WithEvalCell(self._network, self._loss_fn) + self._eval_network = nn.WithEvalCell(self._network, self._loss_fn, self._amp_level == "O2") self._eval_indexes = [0, 1, 2] def _build_predict_network(self): @@ -268,14 +273,14 @@ class Model: # remove later to deal with loop sink need_wrap = False if not hasattr(train_dataset, '__ME_INITED__') and context.get_context("enable_loop_sink") \ - and not context.get_context("enable_ge"): + and not context.get_context("enable_ge"): need_wrap = True dataset_helper = DatasetHelper(train_dataset) # remove later to deal with loop sink if need_wrap: self._train_network = nn.DataWrapper(self._train_network, *(dataset_helper.types_shapes()), - train_dataset.__ME_INITED__) + train_dataset.__ME_INITED__) cb_params.train_network = self._train_network self._train_network.set_train() @@ -435,7 +440,7 @@ class Model: # remove later to deal with loop sink need_wrap = False if not hasattr(valid_dataset, '__ME_INITED__') and context.get_context("enable_loop_sink") \ - and not context.get_context("enable_ge"): + and not context.get_context("enable_ge"): need_wrap = True valid_dataset.__loop_size__ = 1 @@ -444,7 +449,7 @@ class Model: # remove later to deal with loop sink if need_wrap: self._eval_network = nn.DataWrapper(self._eval_network, *(dataset_helper.types_shapes()), - valid_dataset.__ME_INITED__) + valid_dataset.__ME_INITED__) self._eval_network.set_train(mode=False) self._eval_network.phase = 'eval' diff --git a/mindspore/train/serialization.py b/mindspore/train/serialization.py index 49cc5318fa..6ab45358eb 100644 --- a/mindspore/train/serialization.py +++ b/mindspore/train/serialization.py @@ -230,15 +230,6 @@ def load_param_into_net(net, parameter_dict): raise TypeError(msg) logger.info("Execute load parameter into net process.") - for name in parameter_dict: - for _, param in net.parameters_and_names(): - if name == param.name and param.layerwise_parallel: - # layerwise parallel parameter data loaded from checkpoint file, - # was a complete(merged) data, need to be splited - new_param = parameter_dict[param.name] - _load_tensor_for_layerwise(new_param, param) - break - param_not_load = [] for _, param in net.parameters_and_names(): if param.name in parameter_dict: @@ -267,16 +258,17 @@ def _load_dismatch_prefix_params(net, parameter_dict, param_not_load): longest_name = param_not_load[0] while prefix_name != longest_name and param_not_load: logger.debug("Count: {} parameters has not been loaded, try to load continue.".format(len(param_not_load))) - longest_name = sorted(param_not_load, key=len, reverse=True)[0] prefix_name = longest_name for net_param_name in param_not_load: for dict_name in parameter_dict: if dict_name.endswith(net_param_name): - tmp_name = dict_name[:-len(net_param_name)] - prefix_name = prefix_name if len(prefix_name) < len(tmp_name) else tmp_name + prefix_name = dict_name[:-len(net_param_name)] + break + if prefix_name != longest_name: + break if prefix_name != longest_name: - logger.info("Remove parameter prefix name: {}, continue to load.".format(prefix_name)) + logger.warning("Remove parameter prefix name: {}, continue to load.".format(prefix_name)) for _, param in net.parameters_and_names(): new_param_name = prefix_name + param.name if param.name in param_not_load and new_param_name in parameter_dict: @@ -368,34 +360,6 @@ def _get_merged_param_data(net, param_name, param_data): return param_data -def _load_tensor_for_layerwise(new_param, old_param): - """ - Replaces parameters with sliced tensors by layerwise parallel strategies. - - Args: - new_param (Parameter): The new layerwise parallel parameter, will be loaded into net. - old_param(Parameter): The current parameter in the net. - """ - if not isinstance(new_param.data, Tensor) or not isinstance(old_param.data, Tensor): - logger.error("Failed to combine the net and the parameters.") - msg = ("layerwise parallel parameter should be a Tensor, but got {}.".format(type(new_param.data))) - raise TypeError(msg) - - if old_param.data.shape() == new_param.data.shape(): - return - - from mindspore.parallel._tensor import _load_tensor - from mindspore.communication.management import get_group_size - dev_mat = [get_group_size()] - shape = new_param.data.shape() - for x in range(len(shape)): # dim 0 set 0, others set -1 - if x: - tensor_map.append(-1) - - new_tensor = _load_tensor(new_param.data, dev_mat, tensor_map) - new_param.set_parameter_data(new_tensor) - - def _fill_param_into_net(net, parameter_list): """ Fills parameter_list into net. @@ -450,7 +414,7 @@ def export(net, *inputs, file_name, file_format='GEIR'): _executor.export(net, file_name, file_format) elif file_format == 'ONNX': # file_format is 'ONNX' phase_name = 'export_onnx' - graph_id, _ = _executor.compile(net, *inputs, phase=phase_name) + graph_id, _ = _executor.compile(net, *inputs, phase=phase_name, do_convert=False) onnx_stream = _executor._get_func_graph_proto(graph_id) with open(file_name, 'wb') as f: os.chmod(file_name, stat.S_IWUSR | stat.S_IRUSR) diff --git a/mindspore/train/summary/_summary_adapter.py b/mindspore/train/summary/_summary_adapter.py index 7db80de693..9669d0f054 100644 --- a/mindspore/train/summary/_summary_adapter.py +++ b/mindspore/train/summary/_summary_adapter.py @@ -15,6 +15,7 @@ """Generate the summary event which conform to proto format.""" import time import socket +import math from enum import Enum, unique import numpy as np from PIL import Image @@ -292,6 +293,36 @@ def _get_tensor_summary(tag: str, np_value, summary_tensor): return summary_tensor +def _calc_histogram_bins(count): + """ + Calculates experience-based optimal bins number for histogram. + + There should be enough number in each bin. So we calc bin numbers according to count. For very small count(1 - + 10), we assign carefully chosen number. For large count, we tried to make sure there are 9-10 numbers in each + bucket on average. Too many bins will slow down performance, so we set max number of bins to 90. + + Args: + count (int): Valid number count for the tensor. + + Returns: + int, number of histogram bins. + """ + number_per_bucket = 10 + max_bins = 90 + + if not count: + return 1 + if count <= 5: + return 2 + if count <= 10: + return 3 + if count <= 880: + # note that math.ceil(881/10) + 1 equals 90 + return int(math.ceil(count / number_per_bucket) + 1) + + return max_bins + + def _fill_histogram_summary(tag: str, np_value: np.array, summary_histogram) -> None: """ Package the histogram summary. @@ -347,7 +378,8 @@ def _fill_histogram_summary(tag: str, np_value: np.array, summary_histogram) -> return - counts, edges = np.histogram(np_value, bins='auto', range=(tensor_min, tensor_max)) + bin_number = _calc_histogram_bins(masked_value.count()) + counts, edges = np.histogram(np_value, bins=bin_number, range=(tensor_min, tensor_max)) for ind, count in enumerate(counts): bucket = summary_histogram.buckets.add() diff --git a/mindspore/train/summary/summary_record.py b/mindspore/train/summary/summary_record.py index 3dbe31f0e4..4c60dce862 100644 --- a/mindspore/train/summary/summary_record.py +++ b/mindspore/train/summary/summary_record.py @@ -158,6 +158,9 @@ class SummaryRecord: package_graph_event(graph_proto).SerializeToString()) self.event_writer.flush() self.has_graph = True + data = _summary_tensor_cache.get("SummaryRecord") + if data is None: + return True data = _summary_tensor_cache.get("SummaryRecord") if data is None: diff --git a/setup.py b/setup.py index 82e6d70fcc..d929d5d707 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ from setuptools import setup, find_packages from setuptools.command.egg_info import egg_info from setuptools.command.build_py import build_py -version = '0.1.0' +version = '0.2.0' backend_policy = os.getenv('BACKEND_POLICY') commit_id = os.getenv('COMMIT_ID').replace("\n", "") diff --git a/tests/mindspore_test_framework/apps/test_bert_parts.py b/tests/mindspore_test_framework/apps/test_bert_parts.py index 226d175c3d..dcc679b528 100644 --- a/tests/mindspore_test_framework/apps/test_bert_parts.py +++ b/tests/mindspore_test_framework/apps/test_bert_parts.py @@ -173,9 +173,8 @@ test_sets = [ embedding_size=768, embedding_shape=[1, 128, 768], use_one_hot_embeddings=True, - initializer_range=0.02), 1, 1), { - 'init_param_with': lambda shp: np.ones(shp).astype(np.float32) - }), + initializer_range=0.02), 1, 1), + {'init_param_with': lambda shp: np.ones(shp).astype(np.float32)}), 'desc_inputs': [input_ids], 'desc_bprop': [[128]]}), ('EmbeddingLookup_multi_outputs_init_param', { @@ -183,9 +182,8 @@ test_sets = [ embedding_size=768, embedding_shape=[1, 128, 768], use_one_hot_embeddings=False, - initializer_range=0.02), { - 'init_param_with': lambda shp: np.ones(shp).astype(np.float32) - }), + initializer_range=0.02), + {'init_param_with': lambda shp: np.ones(shp).astype(np.float32)}), 'desc_inputs': [input_ids], 'desc_bprop': [[1, 128, 768], [128]]}), ('EmbeddingLookup_multi_outputs_grad_with_no_sens', { @@ -193,9 +191,8 @@ test_sets = [ embedding_size=768, embedding_shape=[1, 128, 768], use_one_hot_embeddings=False, - initializer_range=0.02), { - 'init_param_with': lambda shp: np.ones(shp).astype(np.float32) - }), + initializer_range=0.02), + {'init_param_with': lambda shp: np.ones(shp).astype(np.float32)}), 'desc_inputs': [input_ids]}), ('GetMaskedLMOutput_grad_with_no_sens', { 'block': GetMaskedLMOutput(BertConfig(batch_size=1)), diff --git a/tests/mindspore_test_framework/components/executor/check_exceptions.py b/tests/mindspore_test_framework/components/executor/check_exceptions.py index fe57a3d287..a4eb1cd8a0 100644 --- a/tests/mindspore_test_framework/components/executor/check_exceptions.py +++ b/tests/mindspore_test_framework/components/executor/check_exceptions.py @@ -44,4 +44,4 @@ class CheckExceptionsEC(IExectorComponent): raise Exception(f"Expect {e}, but got {sys.exc_info()[0]}") if error_kws and any(keyword not in str(exec_info.value) for keyword in error_kws): raise ValueError('Error message `{}` does not contain all keywords `{}`'.format( - str(exec_info.value), error_kws)) + str(exec_info.value), error_kws)) diff --git a/tests/mindspore_test_framework/utils/block_util.py b/tests/mindspore_test_framework/utils/block_util.py index 75946c3559..0d59728584 100644 --- a/tests/mindspore_test_framework/utils/block_util.py +++ b/tests/mindspore_test_framework/utils/block_util.py @@ -69,6 +69,7 @@ class IthOutputCell(nn.Cell): return predict def get_output_cell(network, num_input, output_index, training=True): + _ = num_input net = IthOutputCell(network, output_index) set_block_training(net, training) return net diff --git a/tests/perf_test/mindrecord/imagenet/imagenet_to_mindrecord.py b/tests/perf_test/mindrecord/imagenet/imagenet_to_mindrecord.py new file mode 100644 index 0000000000..cc3b6d78b0 --- /dev/null +++ b/tests/perf_test/mindrecord/imagenet/imagenet_to_mindrecord.py @@ -0,0 +1,32 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""use ImageNetToMR tool generate mindrecord""" +import os +from mindspore.mindrecord import ImageNetToMR + +IMAGENET_MAP_FILE = "../../../ut/data/mindrecord/testImageNetDataWhole/labels_map.txt" +IMAGENET_IMAGE_DIR = "../../../ut/data/mindrecord/testImageNetDataWhole/images" +MINDRECORD_FILE = "./imagenet.mindrecord" +PARTITION_NUMBER = 16 + +def imagenet_to_mindrecord(): + imagenet_transformer = ImageNetToMR(IMAGENET_MAP_FILE, + IMAGENET_IMAGE_DIR, + MINDRECORD_FILE, + PARTITION_NUMBER) + imagenet_transformer.transform() + +if __name__ == '__main__': + imagenet_to_mindrecord() diff --git a/tests/perf_test/mindrecord/imagenet/imagenet_to_tfrecord.py b/tests/perf_test/mindrecord/imagenet/imagenet_to_tfrecord.py new file mode 100644 index 0000000000..86d18a7d94 --- /dev/null +++ b/tests/perf_test/mindrecord/imagenet/imagenet_to_tfrecord.py @@ -0,0 +1,113 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""generate tfrecord""" +import collections +import os +import tensorflow as tf + +IMAGENET_MAP_FILE = "../../../ut/data/mindrecord/testImageNetDataWhole/labels_map.txt" +IMAGENET_IMAGE_DIR = "../../../ut/data/mindrecord/testImageNetDataWhole/images" +TFRECORD_FILE = "./imagenet.tfrecord" +PARTITION_NUMBER = 16 + +def get_imagenet_filename_label_pic(map_file, image_dir): + """ + Get data from imagenet. + + Yields: + filename, label, image_bytes + """ + if not os.path.exists(map_file): + raise IOError("map file {} not exists".format(map_file)) + + label_dict = {} + with open(map_file) as fp: + line = fp.readline() + while line: + labels = line.split(" ") + label_dict[labels[1]] = labels[0] + line = fp.readline() + + # get all the dir which are n02087046, n02094114, n02109525 + dir_paths = {} + for item in label_dict: + real_path = os.path.join(image_dir, label_dict[item]) + if not os.path.isdir(real_path): + print("{} dir is not exist".format(real_path)) + continue + dir_paths[item] = real_path + + if not dir_paths: + raise PathNotExistsError("not valid image dir in {}".format(image_dir)) + + # get the filename, label and image binary as a dict + for label in dir_paths: + for item in os.listdir(dir_paths[label]): + file_name = os.path.join(dir_paths[label], item) + if not item.endswith("JPEG") and not item.endswith("jpg"): + print("{} file is not suffix with JPEG/jpg, skip it.".format(file_name)) + continue + + # get the image data + image_file = open(file_name, "rb") + image_bytes = image_file.read() + image_file.close() + if not image_bytes: + print("The image file: {} is invalid.".format(file_name)) + continue + yield str(file_name), int(label), image_bytes + +def create_int_feature(values): + feature = tf.train.Feature(int64_list=tf.train.Int64List(value=[values])) + return feature + +def create_string_feature(values): + feature = tf.train.Feature(bytes_list=tf.train.BytesList(value=[bytes(values, encoding='utf-8')])) + return feature + +def create_bytes_feature(values): + feature = tf.train.Feature(bytes_list=tf.train.BytesList(value=[values])) + return feature + +def imagenet_to_tfrecord(): + writers = [] + for i in range(PARTITION_NUMBER): + output_file = TFRECORD_FILE + str(i).rjust(2, '0') + writers.append(tf.io.TFRecordWriter(output_file)) + + writer_index = 0 + total_written = 0 + + for file_name, label, image_bytes in get_imagenet_filename_label_pic(IMAGENET_MAP_FILE, + IMAGENET_IMAGE_DIR): + features = collections.OrderedDict() + features["file_name"] = create_string_feature(file_name) + features["label"] = create_int_feature(label) + features["data"] = create_bytes_feature(image_bytes) + + tf_example = tf.train.Example(features=tf.train.Features(feature=features)) + + writers[writer_index].write(tf_example.SerializeToString()) + writer_index = (writer_index + 1) % len(writers) + + total_written += 1 + + for writer in writers: + writer.close() + + print("Write {} total examples".format(total_written)) + +if __name__ == '__main__': + imagenet_to_tfrecord() diff --git a/tests/perf_test/mindrecord/imagenet/perf_read_imagenet.py b/tests/perf_test/mindrecord/imagenet/perf_read_imagenet.py new file mode 100644 index 0000000000..fa5baef266 --- /dev/null +++ b/tests/perf_test/mindrecord/imagenet/perf_read_imagenet.py @@ -0,0 +1,106 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""test dataset performance about mindspore.MindDataset, mindspore.TFRecordDataset, tf.data.TFRecordDataset""" +import time +import mindspore.dataset as ds +from mindspore.mindrecord import FileReader + +import tensorflow as tf + +print_step = 5000 + +def print_log(count): + if count % print_step == 0: + print("Read {} rows ...".format(count)) + +def use_filereader(mindrecord): + start = time.time() + columns_list = ["data", "label"] + reader = FileReader(file_name=mindrecord, + num_consumer=4, + columns=columns_list) + num_iter = 0 + for index, item in enumerate(reader.get_next()): + num_iter += 1 + print_log(num_iter) + end = time.time() + print("Read by FileReader - total rows: {}, cost time: {}s".format(num_iter, end - start)) + +def use_minddataset(mindrecord): + start = time.time() + columns_list = ["data", "label"] + data_set = ds.MindDataset(dataset_file=mindrecord, + columns_list=columns_list, + num_parallel_workers=4) + num_iter = 0 + for item in data_set.create_dict_iterator(): + num_iter += 1 + print_log(num_iter) + end = time.time() + print("Read by MindDataset - total rows: {}, cost time: {}s".format(num_iter, end - start)) + +def use_tfrecorddataset(tfrecord): + start = time.time() + columns_list = ["data", "label"] + data_set = ds.TFRecordDataset(dataset_files=tfrecord, + columns_list=columns_list, + num_parallel_workers=4, + shuffle=ds.Shuffle.GLOBAL) + data_set = data_set.shuffle(10000) + num_iter = 0 + for item in data_set.create_dict_iterator(): + num_iter += 1 + print_log(num_iter) + end = time.time() + print("Read by TFRecordDataset - total rows: {}, cost time: {}s".format(num_iter, end - start)) + +def use_tensorflow_tfrecorddataset(tfrecord): + start = time.time() + def _parse_record(example_photo): + features = { + 'file_name': tf.io.FixedLenFeature([], tf.string), + 'label': tf.io.FixedLenFeature([1], tf.int64), + 'data': tf.io.FixedLenFeature([], tf.string)} + parsed_features = tf.io.parse_single_example(example_photo, features=features) + return parsed_features + + data_set = tf.data.TFRecordDataset(filenames=tfrecord, + buffer_size=100000, + num_parallel_reads=4) + data_set = data_set.map(_parse_record, num_parallel_calls=4) + num_iter = 0 + for item in data_set.__iter__(): + num_iter += 1 + print_log(num_iter) + end = time.time() + print("Read by TensorFlow TFRecordDataset - total rows: {}, cost time: {}s".format(num_iter, end - start)) + +if __name__ == '__main__': + # use MindDataset + mindrecord = './imagenet.mindrecord00' + use_minddataset(mindrecord) + + # use TFRecordDataset + tfrecord = ['imagenet.tfrecord00', 'imagenet.tfrecord01', 'imagenet.tfrecord02', 'imagenet.tfrecord03', + 'imagenet.tfrecord04', 'imagenet.tfrecord05', 'imagenet.tfrecord06', 'imagenet.tfrecord07', + 'imagenet.tfrecord08', 'imagenet.tfrecord09', 'imagenet.tfrecord10', 'imagenet.tfrecord11', + 'imagenet.tfrecord12', 'imagenet.tfrecord13', 'imagenet.tfrecord14', 'imagenet.tfrecord15'] + use_tfrecorddataset(tfrecord) + + # use TensorFlow TFRecordDataset + use_tensorflow_tfrecorddataset(tfrecord) + + # use FileReader + # use_filereader(mindrecord) diff --git a/tests/perf_test/mindrecord/imagenet/schema.json b/tests/perf_test/mindrecord/imagenet/schema.json new file mode 100644 index 0000000000..b76f9113d7 --- /dev/null +++ b/tests/perf_test/mindrecord/imagenet/schema.json @@ -0,0 +1,18 @@ +{ + "datasetType": "TF", + "numRows": 930059, + "columns": { + "file_name": { + "type": "uint8", + "rank": 0 + }, + "label": { + "type": "int64", + "rank": 0 + }, + "data": { + "type": "uint8", + "rank": 0 + } + } +} diff --git a/tests/st/auto_parallel/onehot_model_parallel.py b/tests/st/auto_parallel/onehot_model_parallel.py index 1f35ac1f80..3c41e2975e 100644 --- a/tests/st/auto_parallel/onehot_model_parallel.py +++ b/tests/st/auto_parallel/onehot_model_parallel.py @@ -33,7 +33,6 @@ def setup_module(): global rank_id np.random.seed(0) context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") - context.set_context(enable_hccl=True) context.set_context(enable_task_sink=True, device_id=device_id) context.set_context(enable_ir_fusion=True) diff --git a/tests/st/auto_parallel/soft_entropy_loss_expand_parallel.py b/tests/st/auto_parallel/soft_entropy_loss_expand_parallel.py index 86a8b89521..767094c044 100644 --- a/tests/st/auto_parallel/soft_entropy_loss_expand_parallel.py +++ b/tests/st/auto_parallel/soft_entropy_loss_expand_parallel.py @@ -46,7 +46,6 @@ def setup_module(): global rank_id np.random.seed(0) context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") - context.set_context(enable_hccl=True) context.set_context(enable_task_sink=True, device_id=device_id) context.set_context(enable_ir_fusion=True) diff --git a/tests/st/auto_parallel/test_resnet50_expand_loss_2p.py b/tests/st/auto_parallel/test_resnet50_expand_loss_2p.py index b28ad510e3..41f08f54ee 100644 --- a/tests/st/auto_parallel/test_resnet50_expand_loss_2p.py +++ b/tests/st/auto_parallel/test_resnet50_expand_loss_2p.py @@ -31,7 +31,6 @@ from mindspore.train.callback import Callback from mindspore.parallel import set_algo_parameters context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") -context.set_context(enable_hccl=True) context.set_context(enable_task_sink=True, device_id=int(os.getenv('DEVICE_ID'))) context.set_context(enable_ir_fusion=True) context.set_context(enable_loop_sink=False) diff --git a/tests/st/control/test_multigraph_sink.py b/tests/st/control/test_multigraph_sink.py new file mode 100644 index 0000000000..2b9a1a020a --- /dev/null +++ b/tests/st/control/test_multigraph_sink.py @@ -0,0 +1,205 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" test_multigraph_sink """ +import pytest +import numpy as np +import mindspore.nn as nn +import mindspore.context as context +from mindspore.common.tensor import Tensor +from mindspore.common import dtype as mstype +from mindspore.common import ms_function +from mindspore.ops import operations as P + + +def setup_module(module): + context.set_context(mode = context.PYNATIVE_MODE, device_target = "Ascend") + +c1 = Tensor([2], mstype.int32) +c2 = Tensor([14], mstype.int32) +c3 = Tensor([1], mstype.int32) +c4 = Tensor([0], mstype.int32) +c5 = Tensor([14], mstype.int32) + + +@ms_function +def simple_if(x, y, z): + if x < y: + x = x + 1 + else: + x = x + 2 + x = x + 3 + return x + + +@ms_function +def if_by_if(x, y, z): + if x < y: + x = x + 1 + if y > x: + x = x + 2 + x = x + 3 + return x + + +@ms_function +def if_in_if(x, y, z): + out = c4 + if x < y: + z = c4 + c4 + if z < y: + z = z + 2 + out = out + z + x = x + 3 + out = out + x + return out + + +@ms_function +def simple_while(x, y, z): + y = y + 4 + while x < y: + x = x + 1 + x = x + 3 + return x + + +@ms_function +def while_by_while(x, y, z): + while x < y: + x = x + 1 + while z < c5: + z = z + 1 + x = x + 1 + x = x + 1 + return x + + +@ms_function +def while_in_while(x, y, z): + out = c4 + while x < y: + z = c4 + c4 + while z < y: + z = z + 1 + out = out + z + x = x + 1 + out = out + x + return out + + +@ms_function +def while_by_while_in_while(x, y, z): + out = c4 + while x < c2: + y = c4 + c4 + while y < c2: + y = y + 1 + out = out + y + z = c4 + c4 + while z < c2: + z = z + 1 + out = out + z + x = x + 1 + out = out + x + return out + + +@ms_function +def while_in_while_in_while(x, y, z): + out = c4 + while x < c2: + y = c4 + c4 + while y < c2: + y = y + 1 + z = c4 + c4 + while z < c2: + z = z + 1 + out = out + z + out = out + y + x = x + 1 + out = out + x + return out + + +@pytest.mark.level0 +@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_arm_ascend_training +@pytest.mark.env_onecard +def test_simple_if(): + output = simple_if(c1, c2, c3) + expect = Tensor([6], mstype.int32) + assert output == expect + + +def test_if_by_if(): + output = if_by_if(c1, c2, c3) + expect = Tensor([8], mstype.int32) + assert output == expect + + +def test_if_in_if(): + output = if_in_if(c1, c2, c3) + expect = Tensor([7], mstype.int32) + assert output == expect + + +@pytest.mark.level0 +@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_arm_ascend_training +@pytest.mark.env_onecard +def test_simple_while(): + output = simple_while(c1, c2, c3) + expect = Tensor([21], mstype.int32) + assert output == expect + + +@pytest.mark.level0 +@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_arm_ascend_training +@pytest.mark.env_onecard +def test_while_by_while(): + output = while_by_while(c1, c2, c3) + expect = Tensor([28], mstype.int32) + assert output == expect + + +@pytest.mark.level0 +@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_arm_ascend_training +@pytest.mark.env_onecard +def test_while_in_while(): + output = while_in_while(c1, c2, c3) + expect = Tensor([1274], mstype.int32) + assert output == expect + + +@pytest.mark.level0 +@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_arm_ascend_training +@pytest.mark.env_onecard +def test_while_by_while_in_while(): + output = while_by_while_in_while(c1, c2, c3) + expect = Tensor([350], mstype.int32) + assert output == expect + +@pytest.mark.level0 +@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_arm_ascend_training +@pytest.mark.env_onecard +def test_while_in_while_in_while(): + output = while_in_while_in_while(c1, c2, c3) + expect = Tensor([2534], mstype.int32) + assert output == expect + diff --git a/tests/st/gnn/aggregator.py b/tests/st/gnn/aggregator.py new file mode 100644 index 0000000000..18f189d979 --- /dev/null +++ b/tests/st/gnn/aggregator.py @@ -0,0 +1,222 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Aggregator.""" +import mindspore.nn as nn +from mindspore.ops import operations as P +from mindspore.ops import functional as F +from mindspore._extends import cell_attr_register +from mindspore import Tensor, Parameter +from mindspore.common.initializer import initializer +from mindspore._checkparam import check_int_positive, check_bool +from mindspore.nn.layer.activation import get_activation + + +class GNNFeatureTransform(nn.Cell): + r""" + The GNN featuren transform layer for input. + + Applies linear transformation for the input feature. This layer implements the operation as: + + .. math:: + \text{outputs} = \text{inputs} * \text{kernel} + \text{bias}, + + where :math:`\text{activation}` is the activation function passed as the activation + argument (if passed in),:math:`\text{activation}` is a weight matrix with the same + data type as the inputs created by the layer, and :math:`\text{bias}` is a bias vector + with the same data type as the inputs created by the layer (only if has_bias is True). + + Args: + in_channels (int): The number of channels in the input space. + out_channels (int): The number of channels in the output space. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as input x. The values of str refer to the function `initializer`. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is + same as input x. The values of str refer to the function `initializer`. Default: 'zeros'. + has_bias (bool): Specifies whether the layer uses a bias vector. Default: True. + + Raises: + ValueError: If weight_init or bias_init shape is incorrect. + + Inputs: + - **input_x** (Tensor) - The first tensor to be multiplied. The shape of the tensor is :math:`(*B, N, C)`, + where :math:`*B` represents the batch size which can be multidimensional, :math:`N` and :math:`C` are the + size of the last two dimensions. If `transpose_a` is True, its shape should be :math:`(*B, C, N)`. + + Outputs: + Tensor, the shape of the output tensor is :math:`(*B, N, M)`. + + Examples: + >>> net = nn.Dense(3, 4) + >>> input = Tensor(np.random.randint(0, 255, [2, 3]), mindspore.float32) + >>> net(input) + [[ 2.5246444 2.2738023 0.5711005 -3.9399147 ] + [ 1.0739875 4.0155234 0.94188046 -5.459526 ]] + """ + @cell_attr_register(attrs=['has_bias', 'activation']) + def __init__(self, + in_channels, + out_channels, + weight_init='normal', + bias_init='zeros', + has_bias=True): + super(GNNFeatureTransform, self).__init__() + self.in_channels = check_int_positive(in_channels) + self.out_channels = check_int_positive(out_channels) + self.has_bias = check_bool(has_bias) + + if isinstance(weight_init, Tensor): + if weight_init.dim() != 2 or weight_init.shape()[0] != out_channels or \ + weight_init.shape()[1] != in_channels: + raise ValueError("weight_init shape error") + + self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight") + + if self.has_bias: + if isinstance(bias_init, Tensor): + if bias_init.dim() != 1 or bias_init.shape()[0] != out_channels: + raise ValueError("bias_init shape error") + + self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias") + + self.matmul = P.MatMul(transpose_b=True) + self.bias_add = P.BiasAdd() + + def construct(self, x): + tensor_shape = F.shape(x) + input_feature = F.reshape(x, (tensor_shape[0] * tensor_shape[1], tensor_shape[2])) + output = self.matmul(input_feature, self.weight) + if self.has_bias: + output = self.bias_add(output, self.bias) + output = F.reshape(output, (tensor_shape[0], tensor_shape[1], self.out_channels)) + return output + + def extend_repr(self): + str_info = 'in_channels={}, out_channels={}, weight={}, has_bias={}' \ + .format(self.in_channels, self.out_channels, self.weight, self.has_bias) + if self.has_bias: + str_info = str_info + ', bias={}'.format(self.bias) + + return str_info + + +class _BaseAggregator(nn.Cell): + """ + Base Aggregator of GNN + + Args: + feature_in_dim (int): Node or edge input feature dim. + feature_out_dim (int): Node or edge outpout feature dim. + use_fc (bool): Specifies whether a linear transformation before message is aggregated. Default: True + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as input x. The values of str refer to the function `initializer`. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is + same as input x. The values of str refer to the function `initializer`. Default: 'zeros'. + has_bias (bool): Specifies whether the layer uses a bias vector. Default: True. + dropout_ratio (float): The keep rate of dropout layer, greater than 0 and less equal than 1. Default: None. + activation (str): Regularizer function applied to the output of the layer, eg. 'relu'. Default: None. + + Examples: + >>> class MyAggregator(_BaseAggregator): + >>> def __init__(self): + >>> super(MyAggregator, self).__init__(self, feature_in_dim, feature_out_dim) + >>> self.reduce_mean = P.ReduceSum() + >>> + >>> def construct(self, x): + >>> return self.reduce_mean(x, 1) + """ + def __init__(self, + feature_in_dim, + feature_out_dim, + use_fc=True, + weight_init="normal", + bias_init="zeros", + has_bias=True, + dropout_ratio=None, + activation=None): + super(_BaseAggregator, self).__init__() + self.in_dim = feature_in_dim + self.out_dim = feature_out_dim + self.use_fc = use_fc + if self.use_fc: + self.weight_init = weight_init + self.bias_init = bias_init + self.has_bias = has_bias + self.fc = GNNFeatureTransform(self.in_dim, + self.out_dim, + weight_init=self.weight_init, + bias_init=self.bias_init, + has_bias=self.has_bias) + self.dropout_ratio = dropout_ratio + if self.dropout_ratio is not None: + self.dropout = nn.Dropout(keep_prob=self.dropout_ratio) + self.dropout_flag = self.dropout_ratio is not None + self.activation = get_activation(activation) + self.activation_flag = self.activation is not None + + def construct(self, **kward): + """Must be overridden by all subclasses.""" + raise NotImplementedError + + +class MeanAggregator(_BaseAggregator): + """ + Mean Aggregator of GNN + + Args: + feature_in_dim (int): Node or edge input feature dim. + feature_out_dim (int): Node or edge outpout feature dim. + use_fc (bool): Specifies whether a linear transformation before message is aggregated. Default: True + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as input x. The values of str refer to the function `initializer`. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is + same as input x. The values of str refer to the function `initializer`. Default: 'zeros'. + has_bias (bool): Specifies whether the layer uses a bias vector. Default: True. + dropout_ratio (float): The keep rate of dropout layer, greater than 0 and less equal than 1. Default: None. + activation (str): Regularizer function applied to the output of the layer, eg. 'relu'. Default: None. + + Examples: + >>> net = MeanAggregator(32, 64, activation="relu", dropout=0.5) + >>> input_data = Tensor(np.array(np.random.rand(32, 3, 32), dtypy=np.float32)) + >>> output = net(input_data) + """ + def __init__(self, + feature_in_dim, + feature_out_dim, + use_fc=True, + weight_init="normal", + bias_init="zeros", + has_bias=True, + dropout_ratio=None, + activation=None): + super(MeanAggregator, self).__init__( + feature_in_dim, + feature_out_dim, + use_fc=True, + weight_init="normal", + bias_init="zeros", + has_bias=True, + dropout_ratio=None, + activation=None) + self.reduce_mean = P.ReduceMean(keep_dims=False) + + def construct(self, input_feature): + if self.use_fc: + input_feature = self.fc(input_feature) + if self.dropout_flag: + input_feature = self.dropout(input_feature) + if self.activation_flag: + input_feature = self.activation(input_feature) + output_feature = self.reduce_mean(input_feature, 1) + return output_feature diff --git a/tests/st/gnn/test_gnn_aggregator.py b/tests/st/gnn/test_gnn_aggregator.py new file mode 100644 index 0000000000..bba7c09c31 --- /dev/null +++ b/tests/st/gnn/test_gnn_aggregator.py @@ -0,0 +1,53 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""test gnn aggregator.""" +import numpy as np + +import mindspore.nn as nn +import mindspore.context as context +from mindspore import Tensor +from mindspore.common.api import _executor +import mindspore.ops.composite as C +from aggregator import MeanAggregator + +context.set_context(mode=context.GRAPH_MODE) + + +class MeanAggregatorGrad(nn.Cell): + """Backward of MeanAggregator""" + def __init__(self, network): + super(MeanAggregatorGrad, self).__init__() + self.grad_op = C.grad_all_with_sens + self.network = network + + def construct(self, x, sens): + grad_op = self.grad_op(self.network)(x, sens) + return grad_op + + +def test_MeanAggregator(): + """Compile MeanAggregator forward graph""" + aggregator = MeanAggregator(32, 64, activation="relu", dropout_ratio=0.5) + input_data = Tensor(np.array(np.random.rand(32, 3, 32), dtype=np.float32)) + _executor.compile(aggregator, input_data) + + +def test_MeanAggregator_grad(): + """Compile MeanAggregator backward graph""" + aggregator = MeanAggregator(32, 64, activation="relu", dropout_ratio=0.5) + input_data = Tensor(np.array(np.random.rand(32, 3, 32), dtype=np.float32)) + sens = Tensor(np.ones([32, 64]).astype(np.float32)) + grad_op = MeanAggregatorGrad(aggregator) + _executor.compile(grad_op, input_data, sens) diff --git a/tests/st/mem_reuse/resnet_cifar_memreuse.py b/tests/st/mem_reuse/resnet_cifar_memreuse.py index d6310612b6..2604fe5850 100644 --- a/tests/st/mem_reuse/resnet_cifar_memreuse.py +++ b/tests/st/mem_reuse/resnet_cifar_memreuse.py @@ -122,16 +122,10 @@ class CrossEntropyLoss(nn.Cell): if __name__ == '__main__': - if args_opt.do_eval: - context.set_context(enable_hccl=False) - else: - if args_opt.run_distribute: - context.set_context(enable_hccl=True) - context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL) - context.set_auto_parallel_context(all_reduce_fusion_split_indices=[140]) - init() - else: - context.set_context(enable_hccl=False) + if not args_opt.do_eval and args_opt.run_distribute: + context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL) + context.set_auto_parallel_context(all_reduce_fusion_split_indices=[140]) + init() context.set_context(mode=context.GRAPH_MODE) epoch_size = args_opt.epoch_size diff --git a/tests/st/mem_reuse/resnet_cifar_normal.py b/tests/st/mem_reuse/resnet_cifar_normal.py index 2b6741e57a..8e037212d0 100644 --- a/tests/st/mem_reuse/resnet_cifar_normal.py +++ b/tests/st/mem_reuse/resnet_cifar_normal.py @@ -123,16 +123,10 @@ class CrossEntropyLoss(nn.Cell): if __name__ == '__main__': - if args_opt.do_eval: - context.set_context(enable_hccl=False) - else: - if args_opt.run_distribute: - context.set_context(enable_hccl=True) - context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL) - context.set_auto_parallel_context(all_reduce_fusion_split_indices=[140]) - init() - else: - context.set_context(enable_hccl=False) + if not args_opt.do_eval and args_opt.run_distribute: + context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL) + context.set_auto_parallel_context(all_reduce_fusion_split_indices=[140]) + init() context.set_context(mode=context.GRAPH_MODE) epoch_size = args_opt.epoch_size diff --git a/tests/st/nccl/test_nccl_lenet.py b/tests/st/nccl/test_nccl_lenet.py index 3880f1d473..4ed424b6ee 100644 --- a/tests/st/nccl/test_nccl_lenet.py +++ b/tests/st/nccl/test_nccl_lenet.py @@ -21,11 +21,12 @@ from mindspore.nn.optim import Momentum from mindspore.nn import TrainOneStepCell, WithLossCell from mindspore.ops import operations as P from mindspore.communication.management import init, get_rank, get_group_size +from mindspore.common import dtype as mstype context.set_context(mode=context.GRAPH_MODE, device_target="GPU") init('nccl') -epoch = 2 +epoch = 5 total = 5000 batch_size = 32 mini_batch = total // batch_size @@ -67,12 +68,20 @@ class LeNet(nn.Cell): return output +def multisteplr(total_steps, gap, base_lr=0.9, gamma=0.1, dtype=mstype.float32): + lr = [] + for step in range(total_steps): + lr_ = base_lr * gamma ** (step//gap) + lr.append(lr_) + return Tensor(np.array(lr), dtype) + + def test_lenet_nccl(): net = LeNet() net.set_train() - learning_rate = 0.01 - momentum = 0.9 + learning_rate = multisteplr(epoch, 2) + momentum = Tensor(np.array([0.9]).astype(np.float32)) mom_optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) net_with_criterion = WithLossCell(net, criterion) diff --git a/tests/st/networks/models/bert/bert_tdt_no_lossscale.py b/tests/st/networks/models/bert/bert_tdt_lossscale.py similarity index 75% rename from tests/st/networks/models/bert/bert_tdt_no_lossscale.py rename to tests/st/networks/models/bert/bert_tdt_lossscale.py index 7d30592044..ec46633657 100644 --- a/tests/st/networks/models/bert/bert_tdt_no_lossscale.py +++ b/tests/st/networks/models/bert/bert_tdt_lossscale.py @@ -18,22 +18,22 @@ import os import pytest import numpy as np -import mindspore.context as context +from numpy import allclose import mindspore.common.dtype as mstype import mindspore.dataset.engine.datasets as de import mindspore.dataset.transforms.c_transforms as C -from mindspore import Tensor +from mindspore import context +from mindspore.common.tensor import Tensor from mindspore.train.model import Model -from mindspore.train.callback import Callback -from mindspore.model_zoo.Bert_NEZHA import BertConfig, BertNetworkWithLoss, BertTrainOneStepCell +from mindspore.train.callback import Callback, LossMonitor +from mindspore.train.loss_scale_manager import DynamicLossScaleManager +from mindspore.model_zoo.Bert_NEZHA import BertConfig, BertNetworkWithLoss, BertTrainOneStepWithLossScaleCell from mindspore.nn.optim import Momentum from mindspore import log as logger - _current_dir = os.path.dirname(os.path.realpath(__file__)) DATA_DIR = ["/home/workspace/mindspore_dataset/bert/example/examples.tfrecord"] SCHEMA_DIR = "/home/workspace/mindspore_dataset/bert/example/datasetSchema.json" - def get_config(version='base', batch_size=1): """get config""" if version == 'base': @@ -100,14 +100,13 @@ def get_config(version='base', batch_size=1): bert_config = BertConfig(batch_size=batch_size) return bert_config - def me_de_train_dataset(): """test me de train dataset""" # apply repeat operations repeat_count = 1 ds = de.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["input_ids", "input_mask", "segment_ids", - "next_sentence_labels", "masked_lm_positions", - "masked_lm_ids", "masked_lm_weights"], shuffle=False) + "next_sentence_labels", "masked_lm_positions", + "masked_lm_ids", "masked_lm_weights"], shuffle=False) type_cast_op = C.TypeCast(mstype.int32) ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) @@ -116,29 +115,30 @@ def me_de_train_dataset(): ds = ds.map(input_columns="input_mask", operations=type_cast_op) ds = ds.map(input_columns="input_ids", operations=type_cast_op) # apply batch operations - batch_size = 16 + batch_size = int(os.getenv('BATCH_SIZE', '16')) ds = ds.batch(batch_size, drop_remainder=True) ds = ds.repeat(repeat_count) return ds - def weight_variable(shape): """weight variable""" np.random.seed(1) ones = np.random.uniform(-0.1, 0.1, size=shape).astype(np.float32) return Tensor(ones) - class ModelCallback(Callback): def __init__(self): super(ModelCallback, self).__init__() self.loss_list = [] + self.overflow_list = [] + self.lossscale_list = [] def step_end(self, run_context): cb_params = run_context.original_args() - self.loss_list.append(cb_params.net_outputs.asnumpy()[0]) - logger.info("epoch: {}, outputs are {}".format(cb_params.cur_epoch_num, str(cb_params.net_outputs))) - + self.loss_list.append(cb_params.net_outputs[0].asnumpy()[0]) + self.overflow_list.append(cb_params.net_outputs[1]) + self.lossscale_list.append(cb_params.net_outputs[2]) + print("epoch: {}, outputs are: {}".format(cb_params.cur_epoch_num, str(cb_params.net_outputs))) @pytest.mark.level0 @pytest.mark.platform_arm_ascend_training @@ -150,16 +150,18 @@ def test_bert_tdt(): context.set_context(enable_task_sink=True) context.set_context(enable_loop_sink=True) context.set_context(enable_mem_reuse=True) - parallel_callback = ModelCallback() ds = me_de_train_dataset() version = os.getenv('VERSION', 'large') batch_size = int(os.getenv('BATCH_SIZE', '16')) config = get_config(version=version, batch_size=batch_size) netwithloss = BertNetworkWithLoss(config, True) optimizer = Momentum(netwithloss.trainable_params(), learning_rate=2e-5, momentum=0.9) - netwithgrads = BertTrainOneStepCell(netwithloss, optimizer=optimizer) + scale_window = 3 + scale_manager = DynamicLossScaleManager(2**32, 2, scale_window) + netwithgrads = BertTrainOneStepWithLossScaleCell(netwithloss, optimizer=optimizer, scale_update_cell=scale_manager.get_update_cell()) netwithgrads.set_train(True) model = Model(netwithgrads) + callback = ModelCallback() params = netwithloss.trainable_params() for param in params: value = param.default_input @@ -178,13 +180,24 @@ def test_bert_tdt(): else: logger.info("***************** BERT param name is 3 {}".format(name)) param.default_input = weight_variable(value.asnumpy().shape) - model.train(ds.get_repeat_count(), ds, callbacks=parallel_callback, dataset_sink_mode=False) - loss_value = np.array(parallel_callback.loss_list) - expect_out = [12.19179, 11.965041, 11.969687, 11.97815, 11.969171, 12.603289, 12.165594, - 12.824818, 12.38842, 12.604046] - logger.info("expected loss value output: {}".format(expect_out)) - assert np.allclose(loss_value, expect_out, 0.00001, 0.00001) - + model.train(ds.get_repeat_count(), ds, callbacks=callback, dataset_sink_mode=False) + + # assertion occurs while the loss_scale value is wrong + count = 0 + for i in range(len(callback.overflow_list)): + if callback.overflow_list[i] == Tensor(True, mstype.bool_) and i > 0: + count = 0 + assert callback.lossscale_list[i] == callback.lossscale_list[i - 1] * Tensor(0.5, mstype.float32) + if callback.overflow_list[i] == Tensor(False, mstype.bool_): + count = count + 1 + if count == scale_window: + count = 0 + assert callback.lossscale_list[i] == callback.lossscale_list[i - 1] * Tensor(2.0, mstype.float32) + # assertion occurs while the loss value is wrong + loss_value = np.array(callback.loss_list) + expect_value = [12.1918125, 11.966035, 11.972114, 11.982671, 11.976399, 12.616986, 12.180658, 12.850562, 12.415608, 12.640145] + print("loss value: {}".format(loss_value)) + assert np.allclose(loss_value, expect_value, 0.00001, 0.00001) if __name__ == '__main__': test_bert_tdt() diff --git a/tests/st/networks/test_gpu_resnet.py b/tests/st/networks/test_gpu_resnet.py index a5f450d5e3..a045f97501 100644 --- a/tests/st/networks/test_gpu_resnet.py +++ b/tests/st/networks/test_gpu_resnet.py @@ -32,9 +32,7 @@ from mindspore.nn.optim import Momentum from mindspore.ops import operations as P from mindspore.nn import TrainOneStepCell, WithLossCell from mindspore.nn import Dense -from mindspore.common.initializer import initializer - -context.set_context(mode=context.GRAPH_MODE, device_target="GPU") +from mindspore import amp def random_normal_init(shape, mean=0.0, stddev=0.01, seed=None): @@ -326,6 +324,7 @@ def resnet50(num_classes): @pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_trainTensor(num_classes=10, epoch=8, batch_size=1): + context.set_context(mode=context.GRAPH_MODE, device_target="GPU") net = resnet50(num_classes) lr = 0.1 momentum = 0.9 @@ -341,3 +340,26 @@ def test_trainTensor(num_classes=10, epoch=8, batch_size=1): loss = train_network(data, label) losses.append(loss) assert(losses[-1].asnumpy() < 1) + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_trainTensor_amp(num_classes=10, epoch=18, batch_size=16): + context.set_context(mode=context.GRAPH_MODE, device_target="GPU", enable_mem_reuse=False, + enable_dynamic_memory=False) + net = resnet50(num_classes) + lr = 0.1 + momentum = 0.9 + optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, momentum) + criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + train_network = amp.build_train_network(net, optimizer, criterion, level="O2") + train_network.set_train() + losses = [] + for i in range(0, epoch): + data = Tensor(np.ones([batch_size, 3, 224, 224]).astype(np.float32) * 0.01) + label = Tensor(np.ones([batch_size]).astype(np.int32)) + loss = train_network(data, label) + losses.append(loss) + assert(losses[-1][0].asnumpy() < 1) + assert(losses[-1][1].asnumpy() == False) + assert(losses[-1][2].asnumpy() > 1) diff --git a/tests/st/ops/davinci/test_aicpu_ops/test_expand_dims.py b/tests/st/ops/ascend/test_aicpu_ops/test_expand_dims.py similarity index 100% rename from tests/st/ops/davinci/test_aicpu_ops/test_expand_dims.py rename to tests/st/ops/ascend/test_aicpu_ops/test_expand_dims.py diff --git a/tests/st/ops/davinci/test_aicpu_ops/test_flatten.py b/tests/st/ops/ascend/test_aicpu_ops/test_flatten.py similarity index 100% rename from tests/st/ops/davinci/test_aicpu_ops/test_flatten.py rename to tests/st/ops/ascend/test_aicpu_ops/test_flatten.py diff --git a/tests/st/ops/davinci/test_aicpu_ops/test_is_finite.py b/tests/st/ops/ascend/test_aicpu_ops/test_is_finite.py similarity index 100% rename from tests/st/ops/davinci/test_aicpu_ops/test_is_finite.py rename to tests/st/ops/ascend/test_aicpu_ops/test_is_finite.py diff --git a/tests/st/ops/davinci/test_aicpu_ops/test_reshape.py b/tests/st/ops/ascend/test_aicpu_ops/test_reshape.py similarity index 100% rename from tests/st/ops/davinci/test_aicpu_ops/test_reshape.py rename to tests/st/ops/ascend/test_aicpu_ops/test_reshape.py diff --git a/tests/st/ops/davinci/test_aicpu_ops/test_squeeze.py b/tests/st/ops/ascend/test_aicpu_ops/test_squeeze.py similarity index 100% rename from tests/st/ops/davinci/test_aicpu_ops/test_squeeze.py rename to tests/st/ops/ascend/test_aicpu_ops/test_squeeze.py diff --git a/tests/st/ops/davinci/test_tbe_ops/test_relu_v2_grad.py b/tests/st/ops/ascend/test_tbe_ops/test_relu_v2_grad.py similarity index 100% rename from tests/st/ops/davinci/test_tbe_ops/test_relu_v2_grad.py rename to tests/st/ops/ascend/test_tbe_ops/test_relu_v2_grad.py diff --git a/tests/st/ops/gpu/test_assign_add_op.py b/tests/st/ops/gpu/test_assign_add_op.py index f8faf2be64..4c95177fb6 100644 --- a/tests/st/ops/gpu/test_assign_add_op.py +++ b/tests/st/ops/gpu/test_assign_add_op.py @@ -14,19 +14,20 @@ # ============================================================================ import pytest -from mindspore import Tensor +from mindspore import Tensor, Parameter from mindspore.ops import operations as P import mindspore.nn as nn import numpy as np import mindspore.context as context class AssignAdd(nn.Cell): - def __init__( self): + def __init__(self, value): super(AssignAdd, self).__init__() + self.var = Parameter(value, name="var") self.add = P.AssignAdd() - def construct(self, x, y): - res = self.add(x, y) + def construct(self, y): + res = self.add(self.var, y) return res @pytest.mark.level0 @@ -51,19 +52,24 @@ def test_assign_add(): [[54, 57, 60], [63, 66, 69], [72, 75, 78]]]]) - x = Tensor(np.arange(1 * 3 * 3 * 3).reshape(1, 3, 3, 3).astype(np.float32)) - y = Tensor(np.arange(1 * 3 * 3 * 3).reshape(1, 3, 3, 3).astype(np.float32)) + x1 = Tensor(np.arange(1 * 3 * 3 * 3).reshape(1, 3, 3, 3).astype(np.float32)) + y1 = Tensor(np.arange(1 * 3 * 3 * 3).reshape(1, 3, 3, 3).astype(np.float32)) + + x2 = Tensor(np.arange(1 * 3 * 3 * 3).reshape(1, 3, 3, 3).astype(np.float32)) + y2 = Tensor(np.arange(1 * 3 * 3 * 3).reshape(1, 3, 3, 3).astype(np.float32)) context.set_context(mode=context.PYNATIVE_MODE, device_target='GPU') - add = AssignAdd() - output1 = add(x, y) + add = AssignAdd(x1) + output1 = add(y1) assert (output1.asnumpy() == expect1).all() - output2 = add(output1, y) + add = AssignAdd(output1) + output2 = add(y1) assert (output2.asnumpy() == expect2).all() context.set_context(mode=context.GRAPH_MODE, device_target='GPU') - add = AssignAdd() - output1 = add(x, y) + add = AssignAdd(x2) + output1 = add(y2) assert (output1.asnumpy() == expect1).all() - output2 = add(output1, y) + add = AssignAdd(output1) + output2 = add(y2) assert (output2.asnumpy() == expect2).all() diff --git a/tests/st/ops/gpu/test_assign_op.py b/tests/st/ops/gpu/test_assign_op.py index 4cf730d763..f1fb908268 100644 --- a/tests/st/ops/gpu/test_assign_op.py +++ b/tests/st/ops/gpu/test_assign_op.py @@ -14,7 +14,7 @@ # ============================================================================ import pytest -from mindspore import Tensor +from mindspore import Tensor, Parameter from mindspore.ops import operations as P import mindspore.nn as nn import numpy as np @@ -22,12 +22,13 @@ import mindspore.context as context class Net(nn.Cell): - def __init__(self): + def __init__(self, value): super(Net, self).__init__() + self.var = Parameter(value, name="var") self.assign = P.Assign() - def construct(self, var, value): - return self.assign(var, value) + def construct(self, value): + return self.assign(self.var, value) x = np.array([[1.2, 1], [1, 0]]).astype(np.float32) value = np.array([[1, 2], [3, 4.0]]).astype(np.float32) @@ -37,13 +38,13 @@ value = np.array([[1, 2], [3, 4.0]]).astype(np.float32) @pytest.mark.env_onecard def test_assign(): context.set_context(mode=context.GRAPH_MODE, device_target="GPU") - assign = Net() var = Tensor(x) - output = assign(var, Tensor(value)) + assign = Net(var) + output = assign(Tensor(value)) error = np.ones(shape=[2, 2]) * 1.0e-6 diff1 = output.asnumpy() - value - diff2 = var.asnumpy() - value + diff2 = assign.var.default_input.asnumpy() - value assert np.all(diff1 < error) assert np.all(-diff1 < error) assert np.all(diff2 < error) diff --git a/tests/st/ops/gpu/test_layer_norm_grad_op.py b/tests/st/ops/gpu/test_layer_norm_grad_op.py new file mode 100644 index 0000000000..0cef113d7c --- /dev/null +++ b/tests/st/ops/gpu/test_layer_norm_grad_op.py @@ -0,0 +1,140 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import pytest +import numpy as np +from mindspore import Tensor +from mindspore.ops import operations as P +from mindspore.ops.operations import _grad_ops as G +from mindspore.ops import composite as C +import mindspore.nn as nn +import mindspore.context as context + + +context.set_context(mode=context.GRAPH_MODE, device_target="GPU") + +class LayerNormGradNet(nn.Cell): + def __init__(self, begin_norm_axis, begin_params_axis): + super(LayerNormGradNet, self).__init__() + self.norm = G.LayerNormGrad(begin_norm_axis, begin_params_axis) + + def construct(self, dy, x, var, mean, gamma): + return self.norm(dy, x, var, mean, gamma) + +def LayerNormGradReference(x, dy, gamma, epsilon, begin_norm_axis, begin_params_axis): + begin_norm_axis = begin_norm_axis if begin_norm_axis >=0 else begin_norm_axis + len(x.shape) + begin_params_axis = begin_params_axis if begin_params_axis >=0 else begin_params_axis + len(x.shape) + + norm_axis = [i for i in range(begin_norm_axis, len(x.shape))] + param_axis = [i for i in range(0, begin_params_axis)] + num = 1 + for i in range(begin_norm_axis, len(x.shape)): + num *= x.shape[i] + + mean = np.mean(x, axis=tuple(norm_axis), keepdims=True) + var = np.var(x, axis=tuple(norm_axis), keepdims=True) + + gamma = gamma.reshape((*((1,)*begin_params_axis), *x.shape[begin_params_axis:])) + dg = np.sum(dy * np.power(var + epsilon, -0.5) * (x - mean), axis=tuple(param_axis), keepdims=True) + db = np.sum(dy, axis=tuple(param_axis), keepdims=True) + + sum1 = np.sum((-0.5) * dy * gamma * (x - mean) * np.power(var + epsilon, -1.5), axis=tuple(norm_axis), keepdims=True) + sum2 = np.sum(dy * gamma, axis=tuple(norm_axis), keepdims=True) + sum3 = np.sum(-2.0 * (x - mean), axis=tuple(norm_axis), keepdims=True) + + dx1 = dy * gamma * np.power(var + epsilon, -0.5) + dx2 = sum1 * 2.0 / num * (x - mean) + dx3 = ((-1.0) * np.power(var + epsilon, -0.5) * sum2 + (1.0 / num) * sum1 * sum3) * (1.0 / num) + dx = dx1 + dx2 + dx3 + return dx, dg, db, mean, var + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_layernormgrad0(): + begin_norm_axis = 1 + begin_params_axis = 1 + x_np = np.random.randn(4096, 3072).astype(np.float32) + dy_np = np.random.randn(4096, 3072).astype(np.float32) + gamma_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + epsilon = 10e-12 + dx_np, dg_np, db_np, mean_np, var_np = LayerNormGradReference(x_np, dy_np, gamma_np, epsilon, begin_norm_axis, begin_params_axis) + + dy_ms = Tensor(dy_np) + x_ms = Tensor(x_np) + var_ms = Tensor(var_np) + mean_ms = Tensor(mean_np) + gamma_ms = Tensor(gamma_np) + + net = LayerNormGradNet(begin_norm_axis, begin_params_axis) + dx_ms, dg_ms, db_ms = net(dy_ms, x_ms, var_ms, mean_ms, gamma_ms) + + assert np.allclose(dx_ms.asnumpy(), dx_np, rtol=1e-6, atol=1e-6) + assert np.allclose(dg_ms.asnumpy(), dg_np, rtol=1e-6, atol=1e-3) + assert np.allclose(db_ms.asnumpy(), db_np, rtol=1e-6, atol=1e-3) + + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_layernormgrad1(): + begin_norm_axis = 1 + begin_params_axis = 1 + x_np = np.random.randn(640, 768).astype(np.float32) + dy_np = np.random.randn(640, 768).astype(np.float32) + gamma_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + epsilon = 10e-12 + dx_np, dg_np, db_np, mean_np, var_np = LayerNormGradReference(x_np, dy_np, gamma_np, epsilon, begin_norm_axis, begin_params_axis) + + dy_ms = Tensor(dy_np) + x_ms = Tensor(x_np) + var_ms = Tensor(var_np) + mean_ms = Tensor(mean_np) + gamma_ms = Tensor(gamma_np) + + net = LayerNormGradNet(begin_norm_axis, begin_params_axis) + dx_ms, dg_ms, db_ms = net(dy_ms, x_ms, var_ms, mean_ms, gamma_ms) + + assert np.allclose(dx_ms.asnumpy(), dx_np, rtol=1e-6, atol=1e-6) + assert np.allclose(dg_ms.asnumpy(), dg_np, rtol=1e-6, atol=1e-3) + assert np.allclose(db_ms.asnumpy(), db_np, rtol=1e-6, atol=1e-3) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_layernormgrad2(): + begin_norm_axis = -1 + begin_params_axis = -1 + x_np = np.random.randn(32, 128, 768).astype(np.float32) + dy_np = np.random.randn(32, 128, 768).astype(np.float32) + gamma_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + epsilon = 10e-12 + dx_np, dg_np, db_np, mean_np, var_np = LayerNormGradReference(x_np, dy_np, gamma_np, epsilon, begin_norm_axis, begin_params_axis) + + dy_ms = Tensor(dy_np) + x_ms = Tensor(x_np) + var_ms = Tensor(var_np) + mean_ms = Tensor(mean_np) + gamma_ms = Tensor(gamma_np) + + net = LayerNormGradNet(begin_norm_axis, begin_params_axis) + dx_ms, dg_ms, db_ms = net(dy_ms, x_ms, var_ms, mean_ms, gamma_ms) + + assert np.allclose(dx_ms.asnumpy(), dx_np, rtol=1e-6, atol=1e-6) + assert np.allclose(dg_ms.asnumpy(), dg_np, rtol=1e-6, atol=1e-3) + assert np.allclose(db_ms.asnumpy(), db_np, rtol=1e-6, atol=1e-3) \ No newline at end of file diff --git a/tests/st/ops/gpu/test_layer_norm_op.py b/tests/st/ops/gpu/test_layer_norm_op.py new file mode 100644 index 0000000000..a281cd0f5f --- /dev/null +++ b/tests/st/ops/gpu/test_layer_norm_op.py @@ -0,0 +1,134 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import pytest +import numpy as np +from mindspore import Tensor +from mindspore.ops import operations as P +import mindspore.nn as nn +import mindspore.context as context + +context.set_context(mode=context.GRAPH_MODE, device_target="GPU") + + +class LayerNormNet(nn.Cell): + def __init__(self, begin_norm_axis, begin_params_axis): + super(LayerNormNet, self).__init__() + self.norm = P.LayerNorm(begin_norm_axis, begin_params_axis) + + def construct(self, x, gamma, beta): + return self.norm(x, gamma, beta) + +def LayerNormReference(begin_norm_axis, begin_params_axis, x, gamma, beta): + begin_norm_axis = begin_norm_axis if begin_norm_axis >=0 else begin_norm_axis + len(x.shape) + begin_params_axis = begin_params_axis if begin_params_axis >=0 else begin_params_axis + len(x.shape) + + axis = [i for i in range(begin_norm_axis, len(x.shape))] + mean = np.mean(x, axis=tuple(axis), keepdims=True) + var = np.var(x, axis=tuple(axis), keepdims=True) + + gamma = gamma.reshape((*((1,)*begin_params_axis), *x.shape[begin_params_axis:])) + beta = beta.reshape((*((1,)*begin_params_axis), *x.shape[begin_params_axis:])) + y = np.subtract(x, mean) / np.sqrt(var + 1e-12) * gamma + beta + return y, mean, var + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_layernorm0(): + begin_norm_axis = 1 + begin_params_axis = 1 + x_np = np.random.randn(4096, 3072).astype(np.float32) + gamma_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + beta_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + y_np, mean_np, var_np = LayerNormReference(begin_norm_axis, begin_params_axis, x_np, gamma_np, beta_np) + + x_ms = Tensor(x_np) + gamma_ms = Tensor(gamma_np) + beta_ms = Tensor(beta_np) + net = LayerNormNet(begin_norm_axis, begin_params_axis) + y_ms, mean_ms, var_ms = net(x_ms, gamma_ms, beta_ms) + + assert np.allclose(y_ms.asnumpy(), y_np, atol=1e-6) + assert np.allclose(mean_ms.asnumpy(), mean_np, atol=1e-6) + assert np.allclose(var_ms.asnumpy(), var_np, atol=1e-6) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_layernorm1(): + begin_norm_axis = 1 + begin_params_axis = 1 + x_np = np.random.randn(640, 768).astype(np.float32) + gamma_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + beta_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + y_np, mean_np, var_np = LayerNormReference(begin_norm_axis, begin_params_axis, x_np, gamma_np, beta_np) + + x_ms = Tensor(x_np) + gamma_ms = Tensor(gamma_np) + beta_ms = Tensor(beta_np) + net = LayerNormNet(begin_norm_axis, begin_params_axis) + y_ms, mean_ms, var_ms = net(x_ms, gamma_ms, beta_ms) + + + assert np.allclose(y_ms.asnumpy(), y_np, rtol=1e-6, atol=1e-6) + assert np.allclose(mean_ms.asnumpy(), mean_np, rtol=1e-6, atol=1e-6) + assert np.allclose(var_ms.asnumpy(), var_np, rtol=1e-6, atol=1e-6) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_layernorm3d_1(): + begin_norm_axis = -1 + begin_params_axis = -1 + x_np = np.random.randn(32, 128, 768).astype(np.float32) + gamma_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + beta_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + y_np, mean_np, var_np = LayerNormReference(begin_norm_axis, begin_params_axis, x_np, gamma_np, beta_np) + + x_ms = Tensor(x_np) + gamma_ms = Tensor(gamma_np) + beta_ms = Tensor(beta_np) + net = LayerNormNet(begin_norm_axis, begin_params_axis) + y_ms, mean_ms, var_ms = net(x_ms, gamma_ms, beta_ms) + + assert np.allclose(y_ms.asnumpy(), y_np, rtol=1e-6, atol=1e-6) + assert np.allclose(mean_ms.asnumpy(), mean_np, rtol=1e-6, atol=1e-6) + assert np.allclose(var_ms.asnumpy(), var_np, rtol=1e-6, atol=1e-6) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_layernorm3d_2(): + begin_norm_axis = -1 + begin_params_axis = 1 + x_np = np.random.randn(32, 128, 768).astype(np.float32) + gamma_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + beta_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + y_np, mean_np, var_np = LayerNormReference(begin_norm_axis, begin_params_axis, x_np, gamma_np, beta_np) + + x_ms = Tensor(x_np) + gamma_ms = Tensor(gamma_np) + beta_ms = Tensor(beta_np) + net = LayerNormNet(begin_norm_axis, begin_params_axis) + y_ms, mean_ms, var_ms = net(x_ms, gamma_ms, beta_ms) + + assert np.allclose(y_ms.asnumpy(), y_np, rtol=1e-6, atol=1e-6) + assert np.allclose(mean_ms.asnumpy(), mean_np, rtol=1e-6, atol=1e-6) + assert np.allclose(var_ms.asnumpy(), var_np, rtol=1e-6, atol=1e-6) diff --git a/tests/st/tbe_networks/resnet_cifar.py b/tests/st/tbe_networks/resnet_cifar.py index 7bd03f5d81..4709b3ac70 100644 --- a/tests/st/tbe_networks/resnet_cifar.py +++ b/tests/st/tbe_networks/resnet_cifar.py @@ -122,16 +122,10 @@ class CrossEntropyLoss(nn.Cell): if __name__ == '__main__': - if args_opt.do_eval: - context.set_context(enable_hccl=False) - else: - if args_opt.run_distribute: - context.set_context(enable_hccl=True) - context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL) - auto_parallel_context().set_all_reduce_fusion_split_indices([140]) - init() - else: - context.set_context(enable_hccl=False) + if not args_opt.do_eval and args_opt.run_distribute: + context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL) + auto_parallel_context().set_all_reduce_fusion_split_indices([140]) + init() context.set_context(mode=context.GRAPH_MODE) epoch_size = args_opt.epoch_size diff --git a/tests/st/tbe_networks/test_resnet_cifar_1p.py b/tests/st/tbe_networks/test_resnet_cifar_1p.py new file mode 100644 index 0000000000..058ec3aeec --- /dev/null +++ b/tests/st/tbe_networks/test_resnet_cifar_1p.py @@ -0,0 +1,198 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import pytest +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.ops import operations as P +from mindspore.nn.optim.momentum import Momentum +from mindspore.train.model import Model +from mindspore import context +import mindspore.common.dtype as mstype +import os +import numpy as np +import mindspore.ops.functional as F +from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, Callback +from mindspore.train.serialization import load_checkpoint, load_param_into_net +import mindspore.dataset as ds +import mindspore.dataset.transforms.c_transforms as C +import mindspore.dataset.transforms.vision.c_transforms as vision +from resnet import resnet50 +import random +import time + +random.seed(1) +np.random.seed(1) +ds.config.set_seed(1) + +data_home = "/home/workspace/mindspore_dataset" + + +def create_dataset(repeat_num=1, training=True, batch_size=32): + data_dir = data_home + "/cifar-10-batches-bin" + if not training: + data_dir = data_home + "/cifar-10-verify-bin" + data_set = ds.Cifar10Dataset(data_dir) + + resize_height = 224 + resize_width = 224 + rescale = 1.0 / 255.0 + shift = 0.0 + + # define map operations + random_crop_op = vision.RandomCrop( + (32, 32), (4, 4, 4, 4)) # padding_mode default CONSTANT + random_horizontal_op = vision.RandomHorizontalFlip() + # interpolation default BILINEAR + resize_op = vision.Resize((resize_height, resize_width)) + rescale_op = vision.Rescale(rescale, shift) + normalize_op = vision.Normalize( + (0.4465, 0.4822, 0.4914), (0.2010, 0.1994, 0.2023)) + changeswap_op = vision.HWC2CHW() + type_cast_op = C.TypeCast(mstype.int32) + + c_trans = [] + if training: + c_trans = [random_crop_op, random_horizontal_op] + c_trans += [resize_op, rescale_op, normalize_op, + changeswap_op] + + # apply map operations on images + data_set = data_set.map(input_columns="label", operations=type_cast_op) + data_set = data_set.map(input_columns="image", operations=c_trans) + + # apply shuffle operations + data_set = data_set.shuffle(buffer_size=1000) + + # apply batch operations + data_set = data_set.batch(batch_size=batch_size, drop_remainder=True) + + # apply repeat operations + data_set = data_set.repeat(repeat_num) + + return data_set + + +class CrossEntropyLoss(nn.Cell): + def __init__(self): + super(CrossEntropyLoss, self).__init__() + self.cross_entropy = P.SoftmaxCrossEntropyWithLogits() + self.mean = P.ReduceMean() + self.one_hot = P.OneHot() + self.one = Tensor(1.0, mstype.float32) + self.zero = Tensor(0.0, mstype.float32) + + def construct(self, logits, label): + label = self.one_hot(label, F.shape(logits)[1], self.one, self.zero) + loss = self.cross_entropy(logits, label)[0] + loss = self.mean(loss, (-1,)) + return loss + + +class LossGet(Callback): + def __init__(self, per_print_times=1): + super(LossGet, self).__init__() + if not isinstance(per_print_times, int) or per_print_times < 0: + raise ValueError("print_step must be int and >= 0.") + self._per_print_times = per_print_times + self._loss = 0.0 + + def step_end(self, run_context): + cb_params = run_context.original_args() + loss = cb_params.net_outputs + + if isinstance(loss, (tuple, list)): + if isinstance(loss[0], Tensor) and isinstance(loss[0].asnumpy(), np.ndarray): + loss = loss[0] + + if isinstance(loss, Tensor) and isinstance(loss.asnumpy(), np.ndarray): + loss = np.mean(loss.asnumpy()) + + cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1 + + if isinstance(loss, float) and (np.isnan(loss) or np.isinf(loss)): + raise ValueError("epoch: {} step: {}. Invalid loss, terminating training." + .format(cb_params.cur_epoch_num, cur_step_in_epoch)) + if self._per_print_times != 0 and cb_params.cur_step_num % self._per_print_times == 0: + self._loss = loss + print("epoch: %s step: %s, loss is %s" % (cb_params.cur_epoch_num, cur_step_in_epoch, loss)) + + def get_loss(self): + return self._loss + + +def train_process(device_id, epoch_size, num_classes, device_num, batch_size): + os.system("mkdir " + str(device_id)) + os.chdir(str(device_id)) + context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(enable_task_sink=True, device_id=device_id) + context.set_context(enable_loop_sink=True) + context.set_context(enable_mem_reuse=True) + context.set_context(mode=context.GRAPH_MODE) + net = resnet50(batch_size, num_classes) + loss = CrossEntropyLoss() + opt = Momentum(filter(lambda x: x.requires_grad, + net.get_parameters()), 0.01, 0.9) + + model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'}) + + dataset = create_dataset(epoch_size, training=True, batch_size=batch_size) + batch_num = dataset.get_dataset_size() + config_ck = CheckpointConfig(save_checkpoint_steps=batch_num, keep_checkpoint_max=1) + ckpoint_cb = ModelCheckpoint(prefix="train_resnet_cifar10_device_id_" + str(device_id), directory="./", + config=config_ck) + loss_cb = LossGet() + model.train(epoch_size, dataset, callbacks=[ckpoint_cb, loss_cb]) + + +def eval(batch_size, num_classes): + context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(enable_task_sink=True, device_id=0) + context.set_context(enable_loop_sink=True) + context.set_context(enable_mem_reuse=True) + + net = resnet50(batch_size, num_classes) + loss = CrossEntropyLoss() + opt = Momentum(filter(lambda x: x.requires_grad, + net.get_parameters()), 0.01, 0.9) + + model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'}) + checkpoint_path = "./train_resnet_cifar10_device_id_0-1_1562.ckpt" + param_dict = load_checkpoint(checkpoint_path) + load_param_into_net(net, param_dict) + net.set_train(False) + eval_dataset = create_dataset(1, training=False) + res = model.eval(eval_dataset) + print("result: ", res) + return res + + +@pytest.mark.level0 +@pytest.mark.platform_arm_ascend_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.env_onecard +def test_resnet_cifar_1p(): + device_num = 1 + epoch_size = 1 + num_classes = 10 + batch_size = 32 + device_id = 0 + train_process(device_id, epoch_size, num_classes, device_num, batch_size) + time.sleep(3) + acc = eval(batch_size, num_classes) + os.chdir("../") + os.system("rm -rf " + str(device_id)) + print("End training...") + assert (acc['acc'] > 0.35) diff --git a/tests/st/tbe_networks/test_resnet_cifar_8p.py b/tests/st/tbe_networks/test_resnet_cifar_8p.py index 69f0a80d12..1e43729252 100644 --- a/tests/st/tbe_networks/test_resnet_cifar_8p.py +++ b/tests/st/tbe_networks/test_resnet_cifar_8p.py @@ -153,7 +153,6 @@ def train_process(q, device_id, epoch_size, num_classes, device_num, batch_size, context.set_context(enable_task_sink=True, device_id=device_id) context.set_context(enable_loop_sink=True) context.set_context(enable_mem_reuse=True) - context.set_context(enable_hccl=enable_hccl) os.environ['MINDSPORE_HCCL_CONFIG_PATH'] = MINDSPORE_HCCL_CONFIG_PATH os.environ['RANK_ID'] = str(device_id) os.environ['RANK_SIZE'] = str(device_num) diff --git a/tests/ut/cpp/dataset/interrupt_test.cc b/tests/ut/cpp/dataset/interrupt_test.cc index 7816346c15..ee2018a050 100644 --- a/tests/ut/cpp/dataset/interrupt_test.cc +++ b/tests/ut/cpp/dataset/interrupt_test.cc @@ -20,7 +20,6 @@ #include "dataset/util/intrp_service.h" #include "dataset/util/task_manager.h" #include "dataset/util/queue.h" -#include "dataset/util/semaphore.h" using namespace mindspore::dataset; using mindspore::MsLogLevel::INFO; @@ -55,11 +54,12 @@ TEST_F(MindDataTestIntrpService, Test1) { TEST_F(MindDataTestIntrpService, Test2) { MS_LOG(INFO) << "Test Semaphore"; Status rc; - Semaphore sem(0); - sem.Register(&vg_); + WaitPost wp; + rc = wp.Register(&vg_); + EXPECT_TRUE(rc.IsOk()); vg_.CreateAsyncTask("Test1", [&]() -> Status { TaskManager::FindMe()->Post(); - Status rc = sem.P(); + Status rc = wp.Wait(); EXPECT_TRUE(rc.IsInterrupted()); return rc; }); diff --git a/tests/ut/cpp/dataset/random_data_op_test.cc b/tests/ut/cpp/dataset/random_data_op_test.cc new file mode 100644 index 0000000000..f8a7440c03 --- /dev/null +++ b/tests/ut/cpp/dataset/random_data_op_test.cc @@ -0,0 +1,457 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dataset/core/client.h" +#include "common/common.h" +#include "gtest/gtest.h" +#include +#include +#include +#include "dataset/core/tensor_shape.h" +#include "dataset/engine/datasetops/source/random_data_op.h" +#include "dataset/engine/data_schema.h" + +using namespace mindspore::dataset; +using mindspore::MsLogLevel::INFO; +using mindspore::ExceptionType::NoExceptionType; +using mindspore::LogStream; + +class MindDataTestRandomDataOp : public UT::DatasetOpTesting { + +}; + +// Test info: +// - Simple test with a user-provided schema generated purely from DataSchema C API +// - has an interation loop +// +// Tree: single node tree with RandomDataOp +// +// RandomDataOp +// +TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic1) { + Status rc; + int32_t rank = 0; // not used + MS_LOG(INFO) << "UT test RandomDataOpBasic1"; + + // Start with an empty execution tree + auto myTree = std::make_shared(); + + // Create a schema using the C api's + std::unique_ptr testSchema = std::make_unique(); + + // RandomDataOp can randomly fill in unknown dimension lengths of a shape. + // Most other ops cannot do that as they are limited by the physical data itself. We're + // more flexible with random data since it is just making stuff up on the fly. + TensorShape c1Shape({TensorShape::kDimUnknown, TensorShape::kDimUnknown, 3}); + ColDescriptor c1("image", + DataType(DataType::DE_INT8), + TensorImpl::kFlexible, + rank, // not used + &c1Shape); + + // Column 2 will just be a scalar label number + TensorShape c2Shape({}); // empty shape is a 1-value scalar Tensor + ColDescriptor c2("label", + DataType(DataType::DE_UINT32), + TensorImpl::kFlexible, + rank, + &c2Shape); + + testSchema->AddColumn(c1); + testSchema->AddColumn(c2); + + std::shared_ptr myRandomDataOp; + RandomDataOp::Builder builder; + + rc = builder.SetRowsPerBuffer(2) + .SetNumWorkers(1) + .SetDataSchema(std::move(testSchema)) + .SetTotalRows(25) + .Build(&myRandomDataOp); + EXPECT_TRUE(rc.IsOk()); + + rc = myTree->AssociateNode(myRandomDataOp); + EXPECT_TRUE(rc.IsOk()); + + rc = myTree->AssignRoot(myRandomDataOp); + EXPECT_TRUE(rc.IsOk()); + + std::ostringstream ss; + ss << *myRandomDataOp; + MS_LOG(INFO) << "RandomDataOp print: %s" << ss.str(); + + MS_LOG(INFO) << "Launching tree and begin iteration"; + rc = myTree->Prepare(); + EXPECT_TRUE(rc.IsOk()); + rc = myTree->Launch(); + EXPECT_TRUE(rc.IsOk()); + + // Start the loop of reading tensors from our pipeline + DatasetIterator dI(myTree); + TensorRow tensorList; + rc = dI.FetchNextTensorRow(&tensorList); + EXPECT_TRUE(rc.IsOk()); + int rowCount = 0; + while (!tensorList.empty()) { + // Don't display these rows...too big to show + MS_LOG(INFO) << "Row fetched #: " << rowCount; + + rc = dI.FetchNextTensorRow(&tensorList); + EXPECT_TRUE(rc.IsOk()); + rowCount++; + } + ASSERT_EQ(rowCount, 25); +} + +// Test info: +// - Simple test with a randomly generated schema +// - no iteration loop on this one, just create the op +// +// Tree: single node tree with RandomDataOp +// +// RandomDataOp +// +TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic2) { + Status rc; + MS_LOG(INFO) << "UT test RandomDataOpBasic2"; + + // Start with an empty execution tree + auto myTree = std::make_shared(); + + std::shared_ptr myRandomDataOp; + RandomDataOp::Builder builder; + + rc = builder.SetRowsPerBuffer(2) + .SetNumWorkers(1) + .Build(&myRandomDataOp); + EXPECT_TRUE(rc.IsOk()); + + rc = myTree->AssociateNode(myRandomDataOp); + EXPECT_TRUE(rc.IsOk()); + + rc = myTree->AssignRoot(myRandomDataOp); + EXPECT_TRUE(rc.IsOk()); + + std::ostringstream ss; + ss << *myRandomDataOp; + MS_LOG(INFO) << "RandomDataOp print: " << ss.str(); +} + +// Test info: +// - json file test with iteration +// +// Tree: single node tree with RandomDataOp +// +// RandomDataOp +// +TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic3) { + Status rc; + MS_LOG(INFO) << "UT test RandomDataOpBasic3"; + + // Start with an empty execution tree + auto myTree = std::make_shared(); + + std::unique_ptr testSchema = std::make_unique(); + rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema.json", {}); + EXPECT_TRUE(rc.IsOk()); + + std::shared_ptr myRandomDataOp; + RandomDataOp::Builder builder; + + rc = builder.SetRowsPerBuffer(2) + .SetNumWorkers(1) + .SetDataSchema(std::move(testSchema)) + .SetTotalRows(10) + .Build(&myRandomDataOp); + EXPECT_TRUE(rc.IsOk()); + + rc = myTree->AssociateNode(myRandomDataOp); + EXPECT_TRUE(rc.IsOk()); + + rc = myTree->AssignRoot(myRandomDataOp); + EXPECT_TRUE(rc.IsOk()); + + std::ostringstream ss; + ss << *myRandomDataOp; + MS_LOG(INFO) << "RandomDataOp print: " << ss.str(); + + MS_LOG(INFO) << "Launching tree and begin iteration"; + rc = myTree->Prepare(); + EXPECT_TRUE(rc.IsOk()); + rc = myTree->Launch(); + EXPECT_TRUE(rc.IsOk()); + + // Start the loop of reading tensors from our pipeline + DatasetIterator dI(myTree); + TensorRow tensorList; + rc = dI.FetchNextTensorRow(&tensorList); + EXPECT_TRUE(rc.IsOk()); + int rowCount = 0; + while (!tensorList.empty()) { + // Don't display these rows...too big to show + MS_LOG(INFO) << "Row fetched #: " << rowCount; + + rc = dI.FetchNextTensorRow(&tensorList); + EXPECT_TRUE(rc.IsOk()); + rowCount++; + } + ASSERT_EQ(rowCount, 10); +} + +// Test info: +// - json schema input it's a fairly simple one +// - has an interation loop +// +// Tree: RepeatOp over RandomDataOp +// +// RepeatOp +// | +// RandomDataOp +// +TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic4) { + Status rc; + MS_LOG(INFO) << "UT test RandomDataOpBasic4"; + + // Start with an empty execution tree + auto myTree = std::make_shared(); + + std::unique_ptr testSchema = std::make_unique(); + rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {}); + EXPECT_TRUE(rc.IsOk()); + + std::shared_ptr myRandomDataOp; + RandomDataOp::Builder builder; + + rc = builder.SetRowsPerBuffer(2) + .SetNumWorkers(1) + .SetDataSchema(std::move(testSchema)) + .SetTotalRows(10) + .Build(&myRandomDataOp); + EXPECT_TRUE(rc.IsOk()); + + rc = myTree->AssociateNode(myRandomDataOp); + EXPECT_TRUE(rc.IsOk()); + + uint32_t numRepeats = 2; + std::shared_ptr myRepeatOp; + rc = RepeatOp::Builder(numRepeats) + .Build(&myRepeatOp); + EXPECT_TRUE(rc.IsOk()); + rc = myTree->AssociateNode(myRepeatOp); + EXPECT_TRUE(rc.IsOk()); + + rc = myRepeatOp->AddChild(myRandomDataOp); + EXPECT_TRUE(rc.IsOk()); + + rc = myTree->AssignRoot(myRepeatOp); + EXPECT_TRUE(rc.IsOk()); + + MS_LOG(INFO) << "Launching tree and begin iteration"; + rc = myTree->Prepare(); + EXPECT_TRUE(rc.IsOk()); + rc = myTree->Launch(); + EXPECT_TRUE(rc.IsOk()); + + // Start the loop of reading tensors from our pipeline + DatasetIterator dI(myTree); + TensorRow tensorList; + rc = dI.FetchNextTensorRow(&tensorList); + EXPECT_TRUE(rc.IsOk()); + int rowCount = 0; + while (!tensorList.empty()) { + MS_LOG(INFO) << "Row display for row #: " << rowCount; + + // Display the tensor by calling the printer on it + for (int i = 0; i < tensorList.size(); i++) { + std::ostringstream ss; + ss << *tensorList[i] << std::endl; + MS_LOG(INFO) << "Tensor print: %s" << ss.str(); + } + + rc = dI.FetchNextTensorRow(&tensorList); + EXPECT_TRUE(rc.IsOk()); + rowCount++; + } + ASSERT_EQ(rowCount, 20); +} + +// Test info: +// - json schema input it's a fairly simple one +// - has an interation loop +// - same as MindDataTestRandomDataOpBasic4 except that this one will have parallel workers +// +// Tree: RepeatOp over RandomDataOp +// +// RepeatOp +// | +// RandomDataOp +// +TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic5) { + Status rc; + MS_LOG(INFO) << "UT test RandomDataOpBasic5"; + + // Start with an empty execution tree + auto myTree = std::make_shared(); + + std::unique_ptr testSchema = std::make_unique(); + rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {}); + EXPECT_TRUE(rc.IsOk()); + + std::shared_ptr myRandomDataOp; + RandomDataOp::Builder builder; + + rc = builder.SetRowsPerBuffer(2) + .SetNumWorkers(4) + .SetDataSchema(std::move(testSchema)) + .SetTotalRows(10) + .Build(&myRandomDataOp); + EXPECT_TRUE(rc.IsOk()); + + rc = myTree->AssociateNode(myRandomDataOp); + EXPECT_TRUE(rc.IsOk()); + + uint32_t numRepeats = 3; + std::shared_ptr myRepeatOp; + rc = RepeatOp::Builder(numRepeats) + .Build(&myRepeatOp); + EXPECT_TRUE(rc.IsOk()); + rc = myTree->AssociateNode(myRepeatOp); + EXPECT_TRUE(rc.IsOk()); + + rc = myRepeatOp->AddChild(myRandomDataOp); + EXPECT_TRUE(rc.IsOk()); + + rc = myTree->AssignRoot(myRepeatOp); + EXPECT_TRUE(rc.IsOk()); + + MS_LOG(INFO) << "Launching tree and begin iteration"; + rc = myTree->Prepare(); + EXPECT_TRUE(rc.IsOk()); + rc = myTree->Launch(); + EXPECT_TRUE(rc.IsOk()); + + // Start the loop of reading tensors from our pipeline + DatasetIterator dI(myTree); + TensorRow tensorList; + rc = dI.FetchNextTensorRow(&tensorList); + EXPECT_TRUE(rc.IsOk()); + int rowCount = 0; + while (!tensorList.empty()) { + MS_LOG(INFO) << "Row display for row #: " << rowCount; + + // Display the tensor by calling the printer on it + for (int i = 0; i < tensorList.size(); i++) { + std::ostringstream ss; + ss << *tensorList[i] << std::endl; + MS_LOG(INFO) << "Tensor print: ", ss.str(); + } + + rc = dI.FetchNextTensorRow(&tensorList); + EXPECT_TRUE(rc.IsOk()); + rowCount++; + } + ASSERT_EQ(rowCount, 30); +} + +// Test info: +// - repeat shuffle random +// +// Tree: RepeatOp over RandomDataOp +// +// RepeatOp +// | +// ShuffleOp +// | +// RandomDataOp +// +TEST_F(MindDataTestRandomDataOp, RandomDataOpTree1) { + Status rc; + MS_LOG(INFO) << "UT test RandomDataOpTree1"; + + // Start with an empty execution tree + auto myTree = std::make_shared(); + + std::unique_ptr testSchema = std::make_unique(); + rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {}); + EXPECT_TRUE(rc.IsOk()); + + std::shared_ptr myRandomDataOp; + RandomDataOp::Builder builder; + + rc = builder.SetRowsPerBuffer(2) + .SetNumWorkers(4) + .SetDataSchema(std::move(testSchema)) + .SetTotalRows(10) + .Build(&myRandomDataOp); + EXPECT_TRUE(rc.IsOk()); + + rc = myTree->AssociateNode(myRandomDataOp); + EXPECT_TRUE(rc.IsOk()); + + std::shared_ptr myShuffleOp; + rc = ShuffleOp::Builder() + .SetRowsPerBuffer(2) + .SetShuffleSize(4) + .Build(&myShuffleOp); + EXPECT_TRUE(rc.IsOk()); + rc = myTree->AssociateNode(myShuffleOp); + EXPECT_TRUE(rc.IsOk()); + + uint32_t numRepeats = 3; + std::shared_ptr myRepeatOp; + rc = RepeatOp::Builder(numRepeats) + .Build(&myRepeatOp); + EXPECT_TRUE(rc.IsOk()); + rc = myTree->AssociateNode(myRepeatOp); + EXPECT_TRUE(rc.IsOk()); + + rc = myRepeatOp->AddChild(myShuffleOp); + EXPECT_TRUE(rc.IsOk()); + + rc = myShuffleOp->AddChild(myRandomDataOp); + EXPECT_TRUE(rc.IsOk()); + + rc = myTree->AssignRoot(myRepeatOp); + EXPECT_TRUE(rc.IsOk()); + + MS_LOG(INFO) << "Launching tree and begin iteration"; + rc = myTree->Prepare(); + EXPECT_TRUE(rc.IsOk()); + rc = myTree->Launch(); + EXPECT_TRUE(rc.IsOk()); + + // Start the loop of reading tensors from our pipeline + DatasetIterator dI(myTree); + TensorRow tensorList; + rc = dI.FetchNextTensorRow(&tensorList); + EXPECT_TRUE(rc.IsOk()); + int rowCount = 0; + while (!tensorList.empty()) { + MS_LOG(INFO) << "Row display for row #: " << rowCount; + + // Display the tensor by calling the printer on it + for (int i = 0; i < tensorList.size(); i++) { + std::ostringstream ss; + ss << *tensorList[i] << std::endl; + MS_LOG(INFO) << "Tensor print: " << ss.str(); + } + + rc = dI.FetchNextTensorRow(&tensorList); + EXPECT_TRUE(rc.IsOk()); + rowCount++; + } + ASSERT_EQ(rowCount, 30); +} diff --git a/tests/ut/cpp/dataset/skip_op_test.cc b/tests/ut/cpp/dataset/skip_op_test.cc index c2168b24d4..697745512d 100644 --- a/tests/ut/cpp/dataset/skip_op_test.cc +++ b/tests/ut/cpp/dataset/skip_op_test.cc @@ -47,7 +47,7 @@ TEST_F(MindDataTestSkipOp, TestSkipOpFuntions) { ASSERT_TRUE(rc.IsOk()); // SkipOp - std::shared_ptr skip_op = std::make_shared(5); + std::shared_ptr skip_op = std::make_shared(5, 2); rc = my_tree->AssociateNode(skip_op); ASSERT_TRUE(rc.IsOk()); diff --git a/tests/ut/cpp/ir/manager_test.cc b/tests/ut/cpp/ir/manager_test.cc index a7a19a7d24..8816277c49 100644 --- a/tests/ut/cpp/ir/manager_test.cc +++ b/tests/ut/cpp/ir/manager_test.cc @@ -127,12 +127,18 @@ class NestingSpecs { return; } - auto counter_p = dynamic_pointer_cast(results); + auto counter_p = dynamic_pointer_cast>(results); if (counter_p != nullptr) { CheckAnfNodeCounter(counter_p); return; } + auto counter_pair = dynamic_pointer_cast>(results); + if (counter_pair != nullptr) { + CheckCNodeIndexPairCounter(counter_pair); + return; + } + auto nodes = dynamic_pointer_cast(results); if (nodes != nullptr) { CheckNodes(nodes); @@ -226,7 +232,7 @@ class NestingSpecs { // Add CheckNesting function - void CheckAnfNodeCounter(std::shared_ptr results) { + void CheckAnfNodeCounter(std::shared_ptr> results) { std::map> clean_results; for (auto& iter : results->count_nodes_map()) { auto key = iter.first; @@ -252,6 +258,32 @@ class NestingSpecs { ASSERT_EQ(clean_results, expected_); } + void CheckCNodeIndexPairCounter(std::shared_ptr> results) { + std::map> clean_results; + for (auto& iter : results->count_nodes_map()) { + auto key = iter.first; + auto value = iter.second; + if (key == nullptr) { + continue; + } + std::string k = Name(key); + + std::set v; + for (auto& node : value) { + auto fg = node.first->first; + if (!Name(fg).empty()) { + v.insert(Name(fg)); + } + } + + if (!v.empty()) { + clean_results[k] = v; + } + } + + ASSERT_EQ(clean_results, expected_); + } + void CheckGraphCounter(std::shared_ptr results) { std::map> clean_results; for (auto& iter : results->count_func_graphs_map()) { @@ -447,9 +479,8 @@ void TestManager::CheckAnalysisSize(std::shared_ptr mng) { ASSERT_EQ(size, mng->free_variables_total().size()); ASSERT_EQ(size, mng->valuenodes().size()); ASSERT_EQ(size, mng->free_variables_direct().size()); - ASSERT_EQ(size, mng->func_graph_valuenodes().size()); + ASSERT_EQ(size, mng->func_graph_cnodes_index().size()); ASSERT_EQ(size, mng->func_graph_parents_direct().size()); - ASSERT_EQ(size, mng->func_graph_users().size()); ASSERT_EQ(size, mng->func_graphs_used().size()); } @@ -508,10 +539,6 @@ TEST_F(TestManager, test_nested_manual) { ASSERT_EQ(1, graphs_used[f].size()); ASSERT_EQ(0, graphs_used[g].size()); - auto graph_users = mng->func_graph_users(); - ASSERT_EQ(0, graph_users[f].size()); - ASSERT_EQ(1, graph_users[g].size()); - auto fv_direct = mng->free_variables_direct(); ASSERT_EQ(0, fv_direct[f].size()); ASSERT_EQ(1, fv_direct[g].size()); @@ -520,9 +547,9 @@ TEST_F(TestManager, test_nested_manual) { ASSERT_EQ(0, fv_total[f].size()); ASSERT_EQ(1, fv_total[g].size()); - auto graph_valuenodes = mng->func_graph_valuenodes(); - ASSERT_EQ(0, graph_valuenodes[f].size()); - ASSERT_EQ(1, graph_valuenodes[g].size()); + auto cnodes = mng->func_graph_cnodes_index(); + ASSERT_EQ(0, cnodes[f].size()); + ASSERT_EQ(1, cnodes[g].size()); } TEST_F(TestManager, test_deep_nested2_manual) { diff --git a/tests/ut/cpp/mindrecord/ut_common.cc b/tests/ut/cpp/mindrecord/ut_common.cc index 76aa5fc503..2d2d69bd54 100644 --- a/tests/ut/cpp/mindrecord/ut_common.cc +++ b/tests/ut/cpp/mindrecord/ut_common.cc @@ -16,9 +16,9 @@ #include "ut_common.h" -using mindspore::MsLogLevel::ERROR; -using mindspore::ExceptionType::NoExceptionType; using mindspore::LogStream; +using mindspore::ExceptionType::NoExceptionType; +using mindspore::MsLogLevel::ERROR; namespace mindspore { namespace mindrecord { @@ -33,23 +33,6 @@ void Common::SetUp() {} void Common::TearDown() {} -void Common::LoadData(const std::string &directory, std::vector &json_buffer, const int max_num) { - int count = 0; - string input_path = directory; - ifstream infile(input_path); - if (!infile.is_open()) { - MS_LOG(ERROR) << "can not open the file "; - return; - } - string temp; - while (getline(infile, temp) && count != max_num) { - count++; - json j = json::parse(temp); - json_buffer.push_back(j); - } - infile.close(); -} - #ifdef __cplusplus #if __cplusplus } @@ -70,5 +53,353 @@ const std::string FormatInfo(const std::string &message, uint32_t message_total_ std::string right_padding(static_cast(floor(padding_length / 2.0)), '='); return left_padding + part_message + right_padding; } + +void LoadData(const std::string &directory, std::vector &json_buffer, const int max_num) { + int count = 0; + string input_path = directory; + ifstream infile(input_path); + if (!infile.is_open()) { + MS_LOG(ERROR) << "can not open the file "; + return; + } + string temp; + while (getline(infile, temp) && count != max_num) { + count++; + json j = json::parse(temp); + json_buffer.push_back(j); + } + infile.close(); +} + +void LoadDataFromImageNet(const std::string &directory, std::vector &json_buffer, const int max_num) { + int count = 0; + string input_path = directory; + ifstream infile(input_path); + if (!infile.is_open()) { + MS_LOG(ERROR) << "can not open the file "; + return; + } + string temp; + string filename; + string label; + json j; + while (getline(infile, temp) && count != max_num) { + count++; + std::size_t pos = temp.find(",", 0); + if (pos != std::string::npos) { + j["file_name"] = temp.substr(0, pos); + j["label"] = atoi(common::SafeCStr(temp.substr(pos + 1, temp.length()))); + json_buffer.push_back(j); + } + } + infile.close(); +} + +int Img2DataUint8(const std::vector &img_absolute_path, std::vector> &bin_data) { + for (auto &file : img_absolute_path) { + // read image file + std::ifstream in(common::SafeCStr(file), std::ios::in | std::ios::binary | std::ios::ate); + if (!in) { + MS_LOG(ERROR) << common::SafeCStr(file) << " is not a directory or not exist!"; + return -1; + } + + // get the file size + uint64_t size = in.tellg(); + in.seekg(0, std::ios::beg); + std::vector file_data(size); + in.read(reinterpret_cast(&file_data[0]), size); + in.close(); + bin_data.push_back(file_data); + } + return 0; +} + +int GetAbsoluteFiles(std::string directory, std::vector &files_absolute_path) { + DIR *dir = opendir(common::SafeCStr(directory)); + if (dir == nullptr) { + MS_LOG(ERROR) << common::SafeCStr(directory) << " is not a directory or not exist!"; + return -1; + } + struct dirent *d_ent = nullptr; + char dot[3] = "."; + char dotdot[6] = ".."; + while ((d_ent = readdir(dir)) != nullptr) { + if ((strcmp(d_ent->d_name, dot) != 0) && (strcmp(d_ent->d_name, dotdot) != 0)) { + if (d_ent->d_type == DT_DIR) { + std::string new_directory = directory + std::string("/") + std::string(d_ent->d_name); + if (directory[directory.length() - 1] == '/') { + new_directory = directory + string(d_ent->d_name); + } + if (-1 == GetAbsoluteFiles(new_directory, files_absolute_path)) { + closedir(dir); + return -1; + } + } else { + std::string absolute_path = directory + std::string("/") + std::string(d_ent->d_name); + if (directory[directory.length() - 1] == '/') { + absolute_path = directory + std::string(d_ent->d_name); + } + files_absolute_path.push_back(absolute_path); + } + } + } + closedir(dir); + return 0; +} + +void ShardWriterImageNet() { + MS_LOG(INFO) << common::SafeCStr(FormatInfo("Write imageNet")); + + // load binary data + std::vector> bin_data; + std::vector filenames; + if (-1 == mindrecord::GetAbsoluteFiles("./data/mindrecord/testImageNetData/images", filenames)) { + MS_LOG(INFO) << "-- ATTN -- Missed data directory. Skip this case. -----------------"; + return; + } + mindrecord::Img2DataUint8(filenames, bin_data); + + // init shardHeader + ShardHeader header_data; + MS_LOG(INFO) << "Init ShardHeader Already."; + + // create schema + json anno_schema_json = R"({"file_name": {"type": "string"}, "label": {"type": "int32"}})"_json; + std::shared_ptr anno_schema = mindrecord::Schema::Build("annotation", anno_schema_json); + if (anno_schema == nullptr) { + MS_LOG(ERROR) << "Build annotation schema failed"; + return; + } + + // add schema to shardHeader + int anno_schema_id = header_data.AddSchema(anno_schema); + MS_LOG(INFO) << "Init Schema Already."; + + // create index + std::pair index_field1(anno_schema_id, "file_name"); + std::pair index_field2(anno_schema_id, "label"); + std::vector> fields; + fields.push_back(index_field1); + fields.push_back(index_field2); + + // add index to shardHeader + header_data.AddIndexFields(fields); + MS_LOG(INFO) << "Init Index Fields Already."; + // load meta data + std::vector annotations; + LoadDataFromImageNet("./data/mindrecord/testImageNetData/annotation.txt", annotations, 10); + + // add data + std::map> rawdatas; + rawdatas.insert(pair>(anno_schema_id, annotations)); + MS_LOG(INFO) << "Init Images Already."; + + // init file_writer + std::vector file_names; + int file_count = 4; + for (int i = 1; i <= file_count; i++) { + file_names.emplace_back(std::string("./imagenet.shard0") + std::to_string(i)); + MS_LOG(INFO) << "shard name is: " << common::SafeCStr(file_names[i - 1]); + } + + MS_LOG(INFO) << "Init Output Files Already."; + { + ShardWriter fw_init; + fw_init.Open(file_names); + + // set shardHeader + fw_init.SetShardHeader(std::make_shared(header_data)); + + // close file_writer + fw_init.Commit(); + } + std::string filename = "./imagenet.shard01"; + { + MS_LOG(INFO) << "=============== images " << bin_data.size() << " ============================"; + mindrecord::ShardWriter fw; + fw.OpenForAppend(filename); + fw.WriteRawData(rawdatas, bin_data); + fw.Commit(); + } + mindrecord::ShardIndexGenerator sg{filename}; + sg.Build(); + sg.WriteToDatabase(); + + MS_LOG(INFO) << "Done create index"; +} + +void ShardWriterImageNetOneSample() { + // load binary data + std::vector> bin_data; + std::vector filenames; + if (-1 == mindrecord::GetAbsoluteFiles("./data/mindrecord/testImageNetData/images", filenames)) { + MS_LOG(INFO) << "-- ATTN -- Missed data directory. Skip this case. -----------------"; + return; + } + mindrecord::Img2DataUint8(filenames, bin_data); + + // init shardHeader + mindrecord::ShardHeader header_data; + MS_LOG(INFO) << "Init ShardHeader Already."; + + // create schema + json anno_schema_json = R"({"file_name": {"type": "string"}, "label": {"type": "int32"}})"_json; + std::shared_ptr anno_schema = mindrecord::Schema::Build("annotation", anno_schema_json); + if (anno_schema == nullptr) { + MS_LOG(ERROR) << "Build annotation schema failed"; + return; + } + + // add schema to shardHeader + int anno_schema_id = header_data.AddSchema(anno_schema); + MS_LOG(INFO) << "Init Schema Already."; + + // create index + std::pair index_field1(anno_schema_id, "file_name"); + std::pair index_field2(anno_schema_id, "label"); + std::vector> fields; + fields.push_back(index_field1); + fields.push_back(index_field2); + + // add index to shardHeader + header_data.AddIndexFields(fields); + MS_LOG(INFO) << "Init Index Fields Already."; + + // load meta data + std::vector annotations; + LoadDataFromImageNet("./data/mindrecord/testImageNetData/annotation.txt", annotations, 1); + + // add data + std::map> rawdatas; + rawdatas.insert(pair>(anno_schema_id, annotations)); + MS_LOG(INFO) << "Init Images Already."; + + // init file_writer + std::vector file_names; + for (int i = 1; i <= 4; i++) { + file_names.emplace_back(std::string("./OneSample.shard0") + std::to_string(i)); + MS_LOG(INFO) << "shard name is: " << common::SafeCStr(file_names[i - 1]); + } + + MS_LOG(INFO) << "Init Output Files Already."; + { + mindrecord::ShardWriter fw_init; + fw_init.Open(file_names); + + // set shardHeader + fw_init.SetShardHeader(std::make_shared(header_data)); + + // close file_writer + fw_init.Commit(); + } + + std::string filename = "./OneSample.shard01"; + { + MS_LOG(INFO) << "=============== images " << bin_data.size() << " ============================"; + mindrecord::ShardWriter fw; + fw.OpenForAppend(filename); + bin_data = std::vector>(bin_data.begin(), bin_data.begin() + 1); + fw.WriteRawData(rawdatas, bin_data); + fw.Commit(); + } + + mindrecord::ShardIndexGenerator sg{filename}; + sg.Build(); + sg.WriteToDatabase(); + MS_LOG(INFO) << "Done create index"; +} + +void ShardWriterImageNetOpenForAppend(string filename) { + for (int i = 1; i <= 4; i++) { + string filename = std::string("./OpenForAppendSample.shard0") + std::to_string(i); + string db_name = std::string("./OpenForAppendSample.shard0") + std::to_string(i) + ".db"; + remove(common::SafeCStr(filename)); + remove(common::SafeCStr(db_name)); + } + + // load binary data + std::vector> bin_data; + std::vector filenames; + if (-1 == mindrecord::GetAbsoluteFiles("./data/mindrecord/testImageNetData/images", filenames)) { + MS_LOG(INFO) << "-- ATTN -- Missed data directory. Skip this case. -----------------"; + return; + } + mindrecord::Img2DataUint8(filenames, bin_data); + + // init shardHeader + mindrecord::ShardHeader header_data; + MS_LOG(INFO) << "Init ShardHeader Already."; + + // create schema + json anno_schema_json = R"({"file_name": {"type": "string"}, "label": {"type": "int32"}})"_json; + std::shared_ptr anno_schema = mindrecord::Schema::Build("annotation", anno_schema_json); + if (anno_schema == nullptr) { + MS_LOG(ERROR) << "Build annotation schema failed"; + return; + } + + // add schema to shardHeader + int anno_schema_id = header_data.AddSchema(anno_schema); + MS_LOG(INFO) << "Init Schema Already."; + + // create index + std::pair index_field1(anno_schema_id, "file_name"); + std::pair index_field2(anno_schema_id, "label"); + std::vector> fields; + fields.push_back(index_field1); + fields.push_back(index_field2); + + // add index to shardHeader + header_data.AddIndexFields(fields); + MS_LOG(INFO) << "Init Index Fields Already."; + + // load meta data + std::vector annotations; + LoadDataFromImageNet("./data/mindrecord/testImageNetData/annotation.txt", annotations, 1); + + // add data + std::map> rawdatas; + rawdatas.insert(pair>(anno_schema_id, annotations)); + MS_LOG(INFO) << "Init Images Already."; + + // init file_writer + std::vector file_names; + for (int i = 1; i <= 4; i++) { + file_names.emplace_back(std::string("./OpenForAppendSample.shard0") + std::to_string(i)); + MS_LOG(INFO) << "shard name is: " << common::SafeCStr(file_names[i - 1]); + } + + MS_LOG(INFO) << "Init Output Files Already."; + { + mindrecord::ShardWriter fw_init; + fw_init.Open(file_names); + + // set shardHeader + fw_init.SetShardHeader(std::make_shared(header_data)); + + // close file_writer + fw_init.Commit(); + } + { + MS_LOG(INFO) << "=============== images " << bin_data.size() << " ============================"; + mindrecord::ShardWriter fw; + auto ret = fw.OpenForAppend(filename); + if (ret == FAILED) { + return; + } + + bin_data = std::vector>(bin_data.begin(), bin_data.begin() + 1); + fw.WriteRawData(rawdatas, bin_data); + fw.Commit(); + } + + ShardIndexGenerator sg{filename}; + sg.Build(); + sg.WriteToDatabase(); + MS_LOG(INFO) << "Done create index"; +} + + } // namespace mindrecord } // namespace mindspore diff --git a/tests/ut/cpp/mindrecord/ut_common.h b/tests/ut/cpp/mindrecord/ut_common.h index 398c59779b..8b244bf87a 100644 --- a/tests/ut/cpp/mindrecord/ut_common.h +++ b/tests/ut/cpp/mindrecord/ut_common.h @@ -17,6 +17,7 @@ #ifndef TESTS_MINDRECORD_UT_UT_COMMON_H_ #define TESTS_MINDRECORD_UT_UT_COMMON_H_ +#include #include #include #include @@ -25,7 +26,9 @@ #include "gtest/gtest.h" #include "utils/log_adapter.h" #include "mindrecord/include/shard_index.h" - +#include "mindrecord/include/shard_header.h" +#include "mindrecord/include/shard_index_generator.h" +#include "mindrecord/include/shard_writer.h" using json = nlohmann::json; using std::ifstream; using std::pair; @@ -40,11 +43,10 @@ class Common : public testing::Test { std::string install_root; // every TEST_F macro will enter one - void SetUp(); + virtual void SetUp(); - void TearDown(); + virtual void TearDown(); - static void LoadData(const std::string &directory, std::vector &json_buffer, const int max_num); }; } // namespace UT @@ -55,6 +57,21 @@ class Common : public testing::Test { /// /// return the formatted string const std::string FormatInfo(const std::string &message, uint32_t message_total_length = 128); + + +void LoadData(const std::string &directory, std::vector &json_buffer, const int max_num); + +void LoadDataFromImageNet(const std::string &directory, std::vector &json_buffer, const int max_num); + +int Img2DataUint8(const std::vector &img_absolute_path, std::vector> &bin_data); + +int GetAbsoluteFiles(std::string directory, std::vector &files_absolute_path); + +void ShardWriterImageNet(); + +void ShardWriterImageNetOneSample(); + +void ShardWriterImageNetOpenForAppend(string filename); } // namespace mindrecord } // namespace mindspore #endif // TESTS_MINDRECORD_UT_UT_COMMON_H_ diff --git a/tests/ut/cpp/mindrecord/ut_shard.cc b/tests/ut/cpp/mindrecord/ut_shard.cc index 88fdb7e167..994ff1b859 100644 --- a/tests/ut/cpp/mindrecord/ut_shard.cc +++ b/tests/ut/cpp/mindrecord/ut_shard.cc @@ -29,7 +29,6 @@ #include "mindrecord/include/shard_statistics.h" #include "securec.h" #include "ut_common.h" -#include "ut_shard_writer_test.h" using mindspore::MsLogLevel::INFO; using mindspore::ExceptionType::NoExceptionType; @@ -43,7 +42,7 @@ class TestShard : public UT::Common { }; TEST_F(TestShard, TestShardSchemaPart) { - TestShardWriterImageNet(); + ShardWriterImageNet(); MS_LOG(INFO) << FormatInfo("Test schema"); @@ -55,6 +54,12 @@ TEST_F(TestShard, TestShardSchemaPart) { ASSERT_TRUE(schema != nullptr); MS_LOG(INFO) << "schema description: " << schema->get_desc() << ", schema: " << common::SafeCStr(schema->GetSchema().dump()); + for (int i = 1; i <= 4; i++) { + string filename = std::string("./imagenet.shard0") + std::to_string(i); + string db_name = std::string("./imagenet.shard0") + std::to_string(i) + ".db"; + remove(common::SafeCStr(filename)); + remove(common::SafeCStr(db_name)); + } } TEST_F(TestShard, TestStatisticPart) { @@ -128,6 +133,5 @@ TEST_F(TestShard, TestShardHeaderPart) { ASSERT_EQ(resFields, fields); } -TEST_F(TestShard, TestShardWriteImage) { MS_LOG(INFO) << FormatInfo("Test writer"); } } // namespace mindrecord } // namespace mindspore diff --git a/tests/ut/cpp/mindrecord/ut_shard_index_generator_test.cc b/tests/ut/cpp/mindrecord/ut_shard_index_generator_test.cc index 0c33d33ffd..140fff4166 100644 --- a/tests/ut/cpp/mindrecord/ut_shard_index_generator_test.cc +++ b/tests/ut/cpp/mindrecord/ut_shard_index_generator_test.cc @@ -53,38 +53,6 @@ class TestShardIndexGenerator : public UT::Common { TestShardIndexGenerator() {} }; -/* -TEST_F(TestShardIndexGenerator, GetField) { - MS_LOG(INFO) << FormatInfo("Test ShardIndex: get field"); - - int max_num = 1; - string input_path1 = install_root + "/test/testCBGData/data/annotation.data"; - std::vector json_buffer1; // store the image_raw_meta.data - Common::LoadData(input_path1, json_buffer1, max_num); - - MS_LOG(INFO) << "Fetch fields: "; - for (auto &j : json_buffer1) { - auto v_name = ShardIndexGenerator::GetField("anno_tool", j); - auto v_attr_name = ShardIndexGenerator::GetField("entity_instances.attributes.attr_name", j); - auto v_entity_name = ShardIndexGenerator::GetField("entity_instances.entity_name", j); - vector names = {"\"CVAT\""}; - for (unsigned int i = 0; i != names.size(); i++) { - ASSERT_EQ(names[i], v_name[i]); - } - vector attr_names = {"\"脸部评分\"", "\"特征点\"", "\"points_example\"", "\"polyline_example\"", - "\"polyline_example\""}; - for (unsigned int i = 0; i != attr_names.size(); i++) { - ASSERT_EQ(attr_names[i], v_attr_name[i]); - } - vector entity_names = {"\"276点人脸\"", "\"points_example\"", "\"polyline_example\"", - "\"polyline_example\""}; - for (unsigned int i = 0; i != entity_names.size(); i++) { - ASSERT_EQ(entity_names[i], v_entity_name[i]); - } - } -} -*/ - TEST_F(TestShardIndexGenerator, TakeFieldType) { MS_LOG(INFO) << FormatInfo("Test ShardSchema: take field Type"); diff --git a/tests/ut/cpp/mindrecord/ut_shard_operator_test.cc b/tests/ut/cpp/mindrecord/ut_shard_operator_test.cc index bfd49069b2..9c177d7a40 100644 --- a/tests/ut/cpp/mindrecord/ut_shard_operator_test.cc +++ b/tests/ut/cpp/mindrecord/ut_shard_operator_test.cc @@ -40,6 +40,17 @@ namespace mindrecord { class TestShardOperator : public UT::Common { public: TestShardOperator() {} + + void SetUp() override { ShardWriterImageNet(); } + + void TearDown() override { + for (int i = 1; i <= 4; i++) { + string filename = std::string("./imagenet.shard0") + std::to_string(i); + string db_name = std::string("./imagenet.shard0") + std::to_string(i) + ".db"; + remove(common::SafeCStr(filename)); + remove(common::SafeCStr(db_name)); + } + } }; TEST_F(TestShardOperator, TestShardSampleBasic) { @@ -165,7 +176,7 @@ TEST_F(TestShardOperator, TestShardPkSamplerBasic) { auto x = dataset.GetNext(); if (x.empty()) break; std::cout << "index: " << i << ", filename: " << common::SafeCStr((std::get<1>(x[0]))["file_name"]) - << ", label: " << common::SafeCStr((std::get<1>(x[0]))["label"].dump()) << std::endl; + << ", label: " << common::SafeCStr((std::get<1>(x[0]))["label"].dump()) << std::endl; i++; } dataset.Finish(); @@ -191,7 +202,7 @@ TEST_F(TestShardOperator, TestShardPkSamplerNumClass) { if (x.empty()) break; std::cout << "index: " << i << ", filename: " << common::SafeCStr((std::get<1>(x[0]))["file_name"]) - << ", label: " << common::SafeCStr((std::get<1>(x[0]))["label"].dump()) << std::endl; + << ", label: " << common::SafeCStr((std::get<1>(x[0]))["label"].dump()) << std::endl; i++; } dataset.Finish(); diff --git a/tests/ut/cpp/mindrecord/ut_shard_reader_test.cc b/tests/ut/cpp/mindrecord/ut_shard_reader_test.cc index f7ed39a006..e88c2fe3d6 100644 --- a/tests/ut/cpp/mindrecord/ut_shard_reader_test.cc +++ b/tests/ut/cpp/mindrecord/ut_shard_reader_test.cc @@ -37,6 +37,16 @@ namespace mindrecord { class TestShardReader : public UT::Common { public: TestShardReader() {} + void SetUp() override { ShardWriterImageNet(); } + + void TearDown() override { + for (int i = 1; i <= 4; i++) { + string filename = std::string("./imagenet.shard0") + std::to_string(i); + string db_name = std::string("./imagenet.shard0") + std::to_string(i) + ".db"; + remove(common::SafeCStr(filename)); + remove(common::SafeCStr(db_name)); + } + } }; TEST_F(TestShardReader, TestShardReaderGeneral) { @@ -51,8 +61,8 @@ TEST_F(TestShardReader, TestShardReaderGeneral) { while (true) { auto x = dataset.GetNext(); if (x.empty()) break; - for (auto& j : x) { - for (auto& item : std::get<1>(j).items()) { + for (auto &j : x) { + for (auto &item : std::get<1>(j).items()) { MS_LOG(INFO) << "key: " << item.key() << ", value: " << item.value().dump(); } } @@ -74,8 +84,8 @@ TEST_F(TestShardReader, TestShardReaderSample) { while (true) { auto x = dataset.GetNext(); if (x.empty()) break; - for (auto& j : x) { - for (auto& item : std::get<1>(j).items()) { + for (auto &j : x) { + for (auto &item : std::get<1>(j).items()) { MS_LOG(INFO) << "key: " << item.key() << ", value: " << item.value().dump(); } } @@ -99,8 +109,8 @@ TEST_F(TestShardReader, TestShardReaderBlock) { while (true) { auto x = dataset.GetBlockNext(); if (x.empty()) break; - for (auto& j : x) { - for (auto& item : std::get<1>(j).items()) { + for (auto &j : x) { + for (auto &item : std::get<1>(j).items()) { MS_LOG(INFO) << "key: " << item.key() << ", value: " << item.value().dump(); } } @@ -119,8 +129,8 @@ TEST_F(TestShardReader, TestShardReaderEasy) { while (true) { auto x = dataset.GetNext(); if (x.empty()) break; - for (auto& j : x) { - for (auto& item : std::get<1>(j).items()) { + for (auto &j : x) { + for (auto &item : std::get<1>(j).items()) { MS_LOG(INFO) << "key: " << item.key() << ", value: " << item.value().dump(); } } @@ -140,8 +150,8 @@ TEST_F(TestShardReader, TestShardReaderColumnNotInIndex) { while (true) { auto x = dataset.GetNext(); if (x.empty()) break; - for (auto& j : x) { - for (auto& item : std::get<1>(j).items()) { + for (auto &j : x) { + for (auto &item : std::get<1>(j).items()) { MS_LOG(INFO) << "key: " << item.key() << ", value: " << item.value().dump(); } } @@ -169,9 +179,9 @@ TEST_F(TestShardReader, TestShardVersion) { while (true) { auto x = dataset.GetNext(); if (x.empty()) break; - for (auto& j : x) { + for (auto &j : x) { MS_LOG(INFO) << "result size: " << std::get<0>(j).size(); - for (auto& item : std::get<1>(j).items()) { + for (auto &item : std::get<1>(j).items()) { MS_LOG(INFO) << "key: " << common::SafeCStr(item.key()) << ", value: " << common::SafeCStr(item.value().dump()); } } @@ -201,8 +211,8 @@ TEST_F(TestShardReader, TestShardReaderConsumer) { while (true) { auto x = dataset.GetNext(); if (x.empty()) break; - for (auto& j : x) { - for (auto& item : std::get<1>(j).items()) { + for (auto &j : x) { + for (auto &item : std::get<1>(j).items()) { MS_LOG(INFO) << "key: " << common::SafeCStr(item.key()) << ", value: " << common::SafeCStr(item.value().dump()); } } diff --git a/tests/ut/cpp/mindrecord/ut_shard_segment_test.cc b/tests/ut/cpp/mindrecord/ut_shard_segment_test.cc index c803f584aa..bf0a35df7d 100644 --- a/tests/ut/cpp/mindrecord/ut_shard_segment_test.cc +++ b/tests/ut/cpp/mindrecord/ut_shard_segment_test.cc @@ -33,15 +33,25 @@ #include "mindrecord/include/shard_segment.h" #include "ut_common.h" -using mindspore::MsLogLevel::INFO; -using mindspore::ExceptionType::NoExceptionType; using mindspore::LogStream; +using mindspore::ExceptionType::NoExceptionType; +using mindspore::MsLogLevel::INFO; namespace mindspore { namespace mindrecord { class TestShardSegment : public UT::Common { public: TestShardSegment() {} + void SetUp() override { ShardWriterImageNet(); } + + void TearDown() override { + for (int i = 1; i <= 4; i++) { + string filename = std::string("./imagenet.shard0") + std::to_string(i); + string db_name = std::string("./imagenet.shard0") + std::to_string(i) + ".db"; + remove(common::SafeCStr(filename)); + remove(common::SafeCStr(db_name)); + } + } }; TEST_F(TestShardSegment, TestShardSegment) { diff --git a/tests/ut/cpp/mindrecord/ut_shard_writer_test.cc b/tests/ut/cpp/mindrecord/ut_shard_writer_test.cc index 18e9214b08..3fa248c2e0 100644 --- a/tests/ut/cpp/mindrecord/ut_shard_writer_test.cc +++ b/tests/ut/cpp/mindrecord/ut_shard_writer_test.cc @@ -16,7 +16,6 @@ #include #include -#include #include #include #include @@ -30,7 +29,6 @@ #include "mindrecord/include/shard_index_generator.h" #include "securec.h" #include "ut_common.h" -#include "ut_shard_writer_test.h" using mindspore::LogStream; using mindspore::ExceptionType::NoExceptionType; @@ -44,249 +42,10 @@ class TestShardWriter : public UT::Common { TestShardWriter() {} }; -void LoadDataFromImageNet(const std::string &directory, std::vector &json_buffer, const int max_num) { - int count = 0; - string input_path = directory; - ifstream infile(input_path); - if (!infile.is_open()) { - MS_LOG(ERROR) << "can not open the file "; - return; - } - string temp; - string filename; - string label; - json j; - while (getline(infile, temp) && count != max_num) { - count++; - std::size_t pos = temp.find(",", 0); - if (pos != std::string::npos) { - j["file_name"] = temp.substr(0, pos); - j["label"] = atoi(common::SafeCStr(temp.substr(pos + 1, temp.length()))); - json_buffer.push_back(j); - } - } - infile.close(); -} - -int Img2DataUint8(const std::vector &img_absolute_path, std::vector> &bin_data) { - for (auto &file : img_absolute_path) { - // read image file - std::ifstream in(common::SafeCStr(file), std::ios::in | std::ios::binary | std::ios::ate); - if (!in) { - MS_LOG(ERROR) << common::SafeCStr(file) << " is not a directory or not exist!"; - return -1; - } - - // get the file size - uint64_t size = in.tellg(); - in.seekg(0, std::ios::beg); - std::vector file_data(size); - in.read(reinterpret_cast(&file_data[0]), size); - in.close(); - bin_data.push_back(file_data); - } - return 0; -} - -int GetAbsoluteFiles(std::string directory, std::vector &files_absolute_path) { - DIR *dir = opendir(common::SafeCStr(directory)); - if (dir == nullptr) { - MS_LOG(ERROR) << common::SafeCStr(directory) << " is not a directory or not exist!"; - return -1; - } - struct dirent *d_ent = nullptr; - char dot[3] = "."; - char dotdot[6] = ".."; - while ((d_ent = readdir(dir)) != nullptr) { - if ((strcmp(d_ent->d_name, dot) != 0) && (strcmp(d_ent->d_name, dotdot) != 0)) { - if (d_ent->d_type == DT_DIR) { - std::string new_directory = directory + std::string("/") + std::string(d_ent->d_name); - if (directory[directory.length() - 1] == '/') { - new_directory = directory + string(d_ent->d_name); - } - if (-1 == GetAbsoluteFiles(new_directory, files_absolute_path)) { - closedir(dir); - return -1; - } - } else { - std::string absolute_path = directory + std::string("/") + std::string(d_ent->d_name); - if (directory[directory.length() - 1] == '/') { - absolute_path = directory + std::string(d_ent->d_name); - } - files_absolute_path.push_back(absolute_path); - } - } - } - closedir(dir); - return 0; -} - -void TestShardWriterImageNet() { - MS_LOG(INFO) << common::SafeCStr(FormatInfo("Write imageNet")); - - // load binary data - std::vector> bin_data; - std::vector filenames; - if (-1 == mindrecord::GetAbsoluteFiles("./data/mindrecord/testImageNetData/images", filenames)) { - MS_LOG(INFO) << "-- ATTN -- Missed data directory. Skip this case. -----------------"; - return; - } - mindrecord::Img2DataUint8(filenames, bin_data); - - // init shardHeader - mindrecord::ShardHeader header_data; - MS_LOG(INFO) << "Init ShardHeader Already."; - - // create schema - json anno_schema_json = R"({"file_name": {"type": "string"}, "label": {"type": "int32"}})"_json; - std::shared_ptr anno_schema = mindrecord::Schema::Build("annotation", anno_schema_json); - if (anno_schema == nullptr) { - MS_LOG(ERROR) << "Build annotation schema failed"; - return; - } - - // add schema to shardHeader - int anno_schema_id = header_data.AddSchema(anno_schema); - MS_LOG(INFO) << "Init Schema Already."; - - // create index - std::pair index_field1(anno_schema_id, "file_name"); - std::pair index_field2(anno_schema_id, "label"); - std::vector> fields; - fields.push_back(index_field1); - fields.push_back(index_field2); - - // add index to shardHeader - header_data.AddIndexFields(fields); - MS_LOG(INFO) << "Init Index Fields Already."; - // load meta data - std::vector annotations; - LoadDataFromImageNet("./data/mindrecord/testImageNetData/annotation.txt", annotations, 10); - - // add data - std::map> rawdatas; - rawdatas.insert(pair>(anno_schema_id, annotations)); - MS_LOG(INFO) << "Init Images Already."; - - // init file_writer - std::vector file_names; - int file_count = 4; - for (int i = 1; i <= file_count; i++) { - file_names.emplace_back(std::string("./imagenet.shard0") + std::to_string(i)); - MS_LOG(INFO) << "shard name is: " << common::SafeCStr(file_names[i - 1]); - } - - MS_LOG(INFO) << "Init Output Files Already."; - { - mindrecord::ShardWriter fw_init; - fw_init.Open(file_names); - - // set shardHeader - fw_init.SetShardHeader(std::make_shared(header_data)); - - // close file_writer - fw_init.Commit(); - } - std::string filename = "./imagenet.shard01"; - { - MS_LOG(INFO) << "=============== images " << bin_data.size() << " ============================"; - mindrecord::ShardWriter fw; - fw.OpenForAppend(filename); - fw.WriteRawData(rawdatas, bin_data); - fw.Commit(); - } - mindrecord::ShardIndexGenerator sg{filename}; - sg.Build(); - sg.WriteToDatabase(); - - MS_LOG(INFO) << "Done create index"; -} - -void TestShardWriterImageNetOneSample() { - // load binary data - std::vector> bin_data; - std::vector filenames; - if (-1 == mindrecord::GetAbsoluteFiles("./data/mindrecord/testImageNetData/images", filenames)) { - MS_LOG(INFO) << "-- ATTN -- Missed data directory. Skip this case. -----------------"; - return; - } - mindrecord::Img2DataUint8(filenames, bin_data); - - // init shardHeader - mindrecord::ShardHeader header_data; - MS_LOG(INFO) << "Init ShardHeader Already."; - - // create schema - json anno_schema_json = R"({"file_name": {"type": "string"}, "label": {"type": "int32"}})"_json; - std::shared_ptr anno_schema = mindrecord::Schema::Build("annotation", anno_schema_json); - if (anno_schema == nullptr) { - MS_LOG(ERROR) << "Build annotation schema failed"; - return; - } - - // add schema to shardHeader - int anno_schema_id = header_data.AddSchema(anno_schema); - MS_LOG(INFO) << "Init Schema Already."; - - // create index - std::pair index_field1(anno_schema_id, "file_name"); - std::pair index_field2(anno_schema_id, "label"); - std::vector> fields; - fields.push_back(index_field1); - fields.push_back(index_field2); - - // add index to shardHeader - header_data.AddIndexFields(fields); - MS_LOG(INFO) << "Init Index Fields Already."; - - // load meta data - std::vector annotations; - LoadDataFromImageNet("./data/mindrecord/testImageNetData/annotation.txt", annotations, 1); - - // add data - std::map> rawdatas; - rawdatas.insert(pair>(anno_schema_id, annotations)); - MS_LOG(INFO) << "Init Images Already."; - - // init file_writer - std::vector file_names; - for (int i = 1; i <= 4; i++) { - file_names.emplace_back(std::string("./OneSample.shard0") + std::to_string(i)); - MS_LOG(INFO) << "shard name is: " << common::SafeCStr(file_names[i - 1]); - } - - MS_LOG(INFO) << "Init Output Files Already."; - { - mindrecord::ShardWriter fw_init; - fw_init.Open(file_names); - - // set shardHeader - fw_init.SetShardHeader(std::make_shared(header_data)); - - // close file_writer - fw_init.Commit(); - } - - std::string filename = "./OneSample.shard01"; - { - MS_LOG(INFO) << "=============== images " << bin_data.size() << " ============================"; - mindrecord::ShardWriter fw; - fw.OpenForAppend(filename); - bin_data = std::vector>(bin_data.begin(), bin_data.begin() + 1); - fw.WriteRawData(rawdatas, bin_data); - fw.Commit(); - } - - mindrecord::ShardIndexGenerator sg{filename}; - sg.Build(); - sg.WriteToDatabase(); - MS_LOG(INFO) << "Done create index"; -} - TEST_F(TestShardWriter, TestShardWriterBench) { MS_LOG(INFO) << common::SafeCStr(FormatInfo("Test write imageNet")); - TestShardWriterImageNet(); + ShardWriterImageNet(); for (int i = 1; i <= 4; i++) { string filename = std::string("./imagenet.shard0") + std::to_string(i); string db_name = std::string("./imagenet.shard0") + std::to_string(i) + ".db"; @@ -297,7 +56,7 @@ TEST_F(TestShardWriter, TestShardWriterBench) { TEST_F(TestShardWriter, TestShardWriterOneSample) { MS_LOG(INFO) << common::SafeCStr(FormatInfo("Test write imageNet int32 of sample less than num of shards")); - TestShardWriterImageNetOneSample(); + ShardWriterImageNetOneSample(); std::string filename = "./OneSample.shard01"; ShardReader dataset; @@ -342,7 +101,7 @@ TEST_F(TestShardWriter, TestShardWriterShiftRawPage) { std::vector image_filenames; // save all files' path within path_dir // read image_raw_meta.data - Common::LoadData(input_path1, json_buffer1, kMaxNum); + LoadData(input_path1, json_buffer1, kMaxNum); MS_LOG(INFO) << "Load Meta Data Already."; // get files' pathes stored in vector image_filenames @@ -375,7 +134,7 @@ TEST_F(TestShardWriter, TestShardWriterShiftRawPage) { MS_LOG(INFO) << "Init Schema Already."; // create/init statistics - Common::LoadData(input_path3, json_buffer4, 2); + LoadData(input_path3, json_buffer4, 2); json static1_json = json_buffer4[0]; json static2_json = json_buffer4[1]; MS_LOG(INFO) << "Initial statistics 1 is: " << common::SafeCStr(static1_json.dump()); @@ -474,7 +233,7 @@ TEST_F(TestShardWriter, TestShardWriterTrial) { std::vector image_filenames; // save all files' path within path_dir // read image_raw_meta.data - Common::LoadData(input_path1, json_buffer1, kMaxNum); + LoadData(input_path1, json_buffer1, kMaxNum); MS_LOG(INFO) << "Load Meta Data Already."; // get files' pathes stored in vector image_filenames @@ -508,7 +267,7 @@ TEST_F(TestShardWriter, TestShardWriterTrial) { MS_LOG(INFO) << "Init Schema Already."; // create/init statistics - Common::LoadData(input_path3, json_buffer4, 2); + LoadData(input_path3, json_buffer4, 2); json static1_json = json_buffer4[0]; json static2_json = json_buffer4[1]; MS_LOG(INFO) << "Initial statistics 1 is: " << common::SafeCStr(static1_json.dump()); @@ -613,7 +372,7 @@ TEST_F(TestShardWriter, TestShardWriterTrialNoFields) { std::vector image_filenames; // save all files' path within path_dir // read image_raw_meta.data - Common::LoadData(input_path1, json_buffer1, kMaxNum); + LoadData(input_path1, json_buffer1, kMaxNum); MS_LOG(INFO) << "Load Meta Data Already."; // get files' pathes stored in vector image_filenames @@ -644,7 +403,7 @@ TEST_F(TestShardWriter, TestShardWriterTrialNoFields) { MS_LOG(INFO) << "Init Schema Already."; // create/init statistics - Common::LoadData(input_path3, json_buffer4, 2); + LoadData(input_path3, json_buffer4, 2); json static1_json = json_buffer4[0]; json static2_json = json_buffer4[1]; MS_LOG(INFO) << "Initial statistics 1 is: " << common::SafeCStr(static1_json.dump()); @@ -1357,107 +1116,24 @@ TEST_F(TestShardWriter, TestWriteOpenFileName) { } } -void TestShardWriterImageNetOpenForAppend(string filename) { - for (int i = 1; i <= 4; i++) { - string filename = std::string("./OpenForAppendSample.shard0") + std::to_string(i); - string db_name = std::string("./OpenForAppendSample.shard0") + std::to_string(i) + ".db"; - remove(common::SafeCStr(filename)); - remove(common::SafeCStr(db_name)); - } - - // load binary data - std::vector> bin_data; - std::vector filenames; - if (-1 == mindrecord::GetAbsoluteFiles("./data/mindrecord/testImageNetData/images", filenames)) { - MS_LOG(INFO) << "-- ATTN -- Missed data directory. Skip this case. -----------------"; - return; - } - mindrecord::Img2DataUint8(filenames, bin_data); - - // init shardHeader - mindrecord::ShardHeader header_data; - MS_LOG(INFO) << "Init ShardHeader Already."; - - // create schema - json anno_schema_json = R"({"file_name": {"type": "string"}, "label": {"type": "int32"}})"_json; - std::shared_ptr anno_schema = mindrecord::Schema::Build("annotation", anno_schema_json); - if (anno_schema == nullptr) { - MS_LOG(ERROR) << "Build annotation schema failed"; - return; - } - - // add schema to shardHeader - int anno_schema_id = header_data.AddSchema(anno_schema); - MS_LOG(INFO) << "Init Schema Already."; - - // create index - std::pair index_field1(anno_schema_id, "file_name"); - std::pair index_field2(anno_schema_id, "label"); - std::vector> fields; - fields.push_back(index_field1); - fields.push_back(index_field2); - - // add index to shardHeader - header_data.AddIndexFields(fields); - MS_LOG(INFO) << "Init Index Fields Already."; - - // load meta data - std::vector annotations; - LoadDataFromImageNet("./data/mindrecord/testImageNetData/annotation.txt", annotations, 1); - - // add data - std::map> rawdatas; - rawdatas.insert(pair>(anno_schema_id, annotations)); - MS_LOG(INFO) << "Init Images Already."; - - // init file_writer - std::vector file_names; - for (int i = 1; i <= 4; i++) { - file_names.emplace_back(std::string("./OpenForAppendSample.shard0") + std::to_string(i)); - MS_LOG(INFO) << "shard name is: " << common::SafeCStr(file_names[i - 1]); - } - - MS_LOG(INFO) << "Init Output Files Already."; - { - mindrecord::ShardWriter fw_init; - fw_init.Open(file_names); - - // set shardHeader - fw_init.SetShardHeader(std::make_shared(header_data)); - - // close file_writer - fw_init.Commit(); - } - { - MS_LOG(INFO) << "=============== images " << bin_data.size() << " ============================"; - mindrecord::ShardWriter fw; - auto ret = fw.OpenForAppend(filename); - if (ret == FAILED) { - return; - } - - bin_data = std::vector>(bin_data.begin(), bin_data.begin() + 1); - fw.WriteRawData(rawdatas, bin_data); - fw.Commit(); - } - - mindrecord::ShardIndexGenerator sg{filename}; - sg.Build(); - sg.WriteToDatabase(); - MS_LOG(INFO) << "Done create index"; -} - TEST_F(TestShardWriter, TestOpenForAppend) { MS_LOG(INFO) << "start ---- TestOpenForAppend\n"; string filename = "./"; - TestShardWriterImageNetOpenForAppend(filename); + ShardWriterImageNetOpenForAppend(filename); string filename1 = "./▒AppendSample.shard01"; - TestShardWriterImageNetOpenForAppend(filename1); + ShardWriterImageNetOpenForAppend(filename1); string filename2 = "./ä\xA9ü"; - TestShardWriterImageNetOpenForAppend(filename2); + ShardWriterImageNetOpenForAppend(filename2); + MS_LOG(INFO) << "end ---- TestOpenForAppend\n"; + for (int i = 1; i <= 4; i++) { + string filename = std::string("./OpenForAppendSample.shard0") + std::to_string(i); + string db_name = std::string("./OpenForAppendSample.shard0") + std::to_string(i) + ".db"; + remove(common::SafeCStr(filename)); + remove(common::SafeCStr(db_name)); + } } } // namespace mindrecord diff --git a/tests/ut/cpp/optimizer/optimizer_test.cc b/tests/ut/cpp/optimizer/optimizer_test.cc index d700225894..ca7c589d47 100644 --- a/tests/ut/cpp/optimizer/optimizer_test.cc +++ b/tests/ut/cpp/optimizer/optimizer_test.cc @@ -57,8 +57,7 @@ TEST_F(TestOptOptimizer, test_step_opt) { true); EXPECT_TRUE(optimizer.get() != nullptr); - abstract::AbstractBasePtrList args; - auto after = optimizer->step(before, args); + auto after = optimizer->step(before); draw::Draw("optimizer_test_expendJ_before.dot", before); draw::Draw("optimizer_test_expendJ_after.dot", after); diff --git a/tests/ut/cpp/parallel/auto_parallel/rec_partition_test.cc b/tests/ut/cpp/parallel/auto_parallel/rec_partition_test.cc index 509b00f428..1eb65b468f 100644 --- a/tests/ut/cpp/parallel/auto_parallel/rec_partition_test.cc +++ b/tests/ut/cpp/parallel/auto_parallel/rec_partition_test.cc @@ -227,19 +227,22 @@ TEST_F(TestPartition, test_PartitionNode) { TEST_F(TestPartition, test_PartitionForAllDevices) { std::shared_ptr graph = MakeMatMulData(9); - ASSERT_EQ(PartitionForAllDevices(1024, graph), SUCCESS); + double device_memory = 1024.0 * 1024.0 * 1024.0 * 16.0; + ASSERT_EQ(PartitionForAllDevices(1024, device_memory, graph), SUCCESS); } TEST_F(TestPartition, test_PartitionForAllDevices2) { std::shared_ptr graph = MakeMatMulData(9); - ASSERT_EQ(PartitionForAllDevices(2, graph), SUCCESS); + double device_memory = 1024.0 * 1024.0 * 1024.0 * 16.0; + ASSERT_EQ(PartitionForAllDevices(2, device_memory, graph), SUCCESS); } // Negative case: parition on 0 device TEST_F(TestPartition, test_PartitionForAllDevices0) { std::shared_ptr graph = MakeMatMulData(9); + double device_memory = 1024.0 * 1024.0 * 1024.0 * 16.0; // Throw Exception "Number of devices can't be 0" - EXPECT_ANY_THROW(PartitionForAllDevices(0, graph)); + EXPECT_ANY_THROW(PartitionForAllDevices(0, device_memory, graph)); } TEST_F(TestPartition, test_ApplyStrToTensor) { diff --git a/tests/ut/cpp/pre_activate/ascend/buffer_fusion/buffer_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/buffer_fusion/buffer_fusion_test.cc deleted file mode 100644 index 9807344139..0000000000 --- a/tests/ut/cpp/pre_activate/ascend/buffer_fusion/buffer_fusion_test.cc +++ /dev/null @@ -1,1298 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include - -#include "common/common_test.h" -#include "common/py_func_graph_fetcher.h" - -#include "ir/anf.h" -#include "ir/func_graph_cloner.h" -#include "utils/context/ms_context.h" -#include "debug/draw.h" -#include "debug/anf_ir_dump.h" -#include "operator/ops.h" -#include "utils/utils.h" -#include "kernel/tbe/tbe_kernel_mod.h" -#include "session/kernel_graph.h" -#include "device/kernel_info.h" -#include "session/anf_runtime_algorithm.h" -#include "pre_activate/common/pattern_engine.h" -#define private public -#include "pre_activate/ascend/buffer_fusion/buffer_fusion.h" - -namespace mindspore { -namespace opt { -using Primitive = mindspore::Primitive; -using session::KernelGraph; -using KernelGraphPtr = std::shared_ptr; -using KernelBuildInfoBuilder = kernel::KernelBuildInfo::KernelBuildInfoBuilder; -class TestHWBufferFusion : public UT::Common { - public: - TestHWBufferFusion() : getPyFun_("gtest_input.pre_activate.hw_opt_test", true) {} - - public: - UT::PyFuncGraphFetcher getPyFun_; -}; - -static KernelGraphPtr CreateKernelGraphForBufferFusionMultipleIn( - uint32_t after_layers, mindspore::kernel::FusionType fusiontype = mindspore::kernel::CONVLUTION) { - KernelGraphPtr g = std::make_shared(); - std::vector inputs; - - std::vector shp = {1, 3, 3, 4}; - TensorTypePtr tensor_type = std::make_shared(kFloat32); - tensor::DeviceInfo device_info{kOpFormat_NCHW, tensor_type}; - - uint32_t layerscount = 1; - CNodePtr ptr_formerlayer; - std::string name = ""; - - // Construct first node - tensor::TensorPtr y_tensor = std::make_shared(kFloat32->type_id(), shp); - y_tensor->set_device_info(device_info); - tensor::TensorPtr z_tensor = std::make_shared(kFloat32->type_id(), shp); - z_tensor->set_device_info(device_info); - - auto y_const = NewValueNode(y_tensor); - auto z_const = NewValueNode(z_tensor); - y_const->set_abstract(y_tensor->ToAbstract()); - z_const->set_abstract(z_tensor->ToAbstract()); - g->MutableInputs()->push_back(y_const); - g->MutableInputs()->push_back(z_const); - - auto p_conv = std::make_shared("Conv2D"); - std::vector input_names = {"x", "y"}; - std::vector output_names = {"output"}; - - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_conv->set_attr("input_names", input_names_v); - p_conv->set_attr("output_names", output_names_v); - - inputs.clear(); - inputs.push_back(NewValueNode(p_conv)); - inputs.push_back(y_const); - inputs.push_back(z_const); - name = "test_conv_" + std::to_string(layerscount) + "layers_graph.dot"; - - auto kernelptr_first = g->NewCNode(inputs); - kernelptr_first->set_abstract(y_tensor->ToAbstract()); - kernelptr_first->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder; - - builder.SetInputsFormat({kOpFormat_NCHW, kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id(), kFloat32->type_id()}); - builder.SetOutputsFormat({kOpFormat_NCHW}); - builder.SetOutputsDeviceType({kFloat32->type_id()}); - builder.SetKernelType(KernelType::TBE_KERNEL); - builder.SetFusionType(fusiontype); - builder.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_first.get()); - ptr_formerlayer = kernelptr_first; - - // configure fusion successor layers - int layer_idx = 0; - while (after_layers--) { - auto p_relu = std::make_shared("ReLU6"); - if (layer_idx == 0) { - tensor::TensorPtr x_tensor = std::make_shared(kFloat32->type_id(), shp); - x_tensor->set_device_info(device_info); - - auto x_const = NewValueNode(x_tensor); - x_const->set_abstract(x_tensor->ToAbstract()); - std::vector input_names = {"x", "y"}; - std::vector output_names = {"output"}; - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_relu->set_attr("input_names", input_names_v); - p_relu->set_attr("output_names", output_names_v); - - inputs.clear(); - inputs.push_back(NewValueNode(p_relu)); - inputs.push_back(ptr_formerlayer); - inputs.push_back(x_const); - } else { - std::vector input_names = {"x"}; - std::vector output_names = {"output"}; - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_relu->set_attr("input_names", input_names_v); - p_relu->set_attr("output_names", output_names_v); - - inputs.clear(); - inputs.push_back(NewValueNode(p_relu)); - inputs.push_back(ptr_formerlayer); - } - auto kernelptr_floor = g->NewCNode(inputs); - kernelptr_floor->set_abstract(y_tensor->ToAbstract()); - kernelptr_floor->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder; - if (layer_idx == 0) { - builder.SetInputsFormat({kOpFormat_NCHW, kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id(), kFloat32->type_id()}); - } else { - builder.SetInputsFormat({kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id()}); - } - - builder.SetOutputsFormat({kOpFormat_NCHW}); - builder.SetOutputsDeviceType({kFloat32->type_id()}); - builder.SetKernelType(KernelType::TBE_KERNEL); - builder.SetFusionType(kernel::FusionType::ELEMWISE); - builder.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_floor.get()); - ptr_formerlayer = kernelptr_floor; - layerscount++; - layer_idx++; - } - - // return res - auto p_return = std::make_shared("return"); - inputs.clear(); - inputs.push_back(NewValueNode(p_return)); - inputs.push_back(ptr_formerlayer); - auto ret = g->NewCNode(inputs); - ret->set_abstract(y_tensor->ToAbstract()); - - g->set_return(ret); - - draw::Draw(name, g); - - return g; -} - -static KernelGraphPtr CreateKernelGraphForBufferFusionEltwiseBeforeAndAfter( - uint32_t before_layers, uint32_t after_layers = 3, - mindspore::kernel::FusionType fusiontype = mindspore::kernel::SEGMENT) { - KernelGraphPtr g = std::make_shared(); - std::vector inputs; - - std::vector shp = {1, 3, 3, 4}; - TensorTypePtr tensor_type = std::make_shared(kFloat32); - tensor::DeviceInfo device_info{kOpFormat_NCHW, tensor_type}; - - uint32_t layerscount = 1; - CNodePtr ptr_formerlayer; - std::string name = ""; - tensor::TensorPtr x_tensor = std::make_shared(kFloat32->type_id(), shp); - auto x_abstract = x_tensor->ToAbstract(); - auto x_const = NewValueNode(x_tensor); - x_const->set_abstract(x_abstract); - g->MutableInputs()->push_back(x_const); - - while (before_layers--) { - auto p_relu = std::make_shared("ReLU6"); - std::vector input_names = {"x"}; - std::vector output_names = {"output"}; - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_relu->set_attr("input_names", input_names_v); - p_relu->set_attr("output_names", output_names_v); - - inputs.clear(); - if (layerscount == 1) { - inputs.push_back(NewValueNode(p_relu)); - inputs.push_back(x_const); - } else { - inputs.push_back(NewValueNode(p_relu)); - inputs.push_back(ptr_formerlayer); - } - auto kernelptr_floor = g->NewCNode(inputs); - kernelptr_floor->set_abstract(x_abstract); - kernelptr_floor->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder; - builder.SetInputsFormat({kOpFormat_NCHW}); - builder.SetOutputsFormat({kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id()}); - builder.SetOutputsDeviceType({kFloat32->type_id()}); - builder.SetKernelType(KernelType::TBE_KERNEL); - builder.SetFusionType(kernel::FusionType::ELEMWISE); - builder.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_floor.get()); - ptr_formerlayer = kernelptr_floor; - layerscount++; - } - - // Construct the conv2d node - tensor::TensorPtr y_tensor = std::make_shared(kFloat32->type_id(), shp); - y_tensor->set_device_info(device_info); - auto y_const = NewValueNode(y_tensor); - y_const->set_abstract(y_tensor->ToAbstract()); - - if (fusiontype == kernel::FusionType::CONVLUTION) { - auto p_conv = std::make_shared("Conv2D"); - std::vector input_names = {"x", "y"}; - std::vector output_names = {"output"}; - - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_conv->set_attr("input_names", input_names_v); - p_conv->set_attr("output_names", output_names_v); - - inputs.clear(); - inputs.push_back(NewValueNode(p_conv)); - inputs.push_back(y_const); - inputs.push_back(ptr_formerlayer); - name = "test_conv_" + std::to_string(layerscount) + "layers_graph.dot"; - } else { - auto p_red_seg = std::make_shared("ReduceOrSegment"); - std::vector input_names = {"x"}; - std::vector output_names = {"output"}; - - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_red_seg->set_attr("input_names", input_names_v); - p_red_seg->set_attr("output_names", output_names_v); - - inputs.clear(); - inputs.push_back(NewValueNode(p_red_seg)); - inputs.push_back(ptr_formerlayer); - name = "test_regOrSeg_" + std::to_string(layerscount) + "layers_graph.dot"; - } - - auto kernelptr_first = g->NewCNode(inputs); - kernelptr_first->set_abstract(y_tensor->ToAbstract()); - kernelptr_first->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder; - if (fusiontype == kernel::FusionType::CONVLUTION) { - builder.SetInputsFormat({kOpFormat_NCHW, kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id(), kFloat32->type_id()}); - } else { - builder.SetInputsFormat({kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id()}); - } - builder.SetOutputsFormat({kOpFormat_NCHW}); - builder.SetOutputsDeviceType({kFloat32->type_id()}); - builder.SetKernelType(KernelType::TBE_KERNEL); - builder.SetFusionType(fusiontype); - builder.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_first.get()); - ptr_formerlayer = kernelptr_first; - - // configure fusion successor layers - while (after_layers--) { - auto p_relu = std::make_shared("ReLU6"); - std::vector input_names = {"x"}; - std::vector output_names = {"output"}; - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_relu->set_attr("input_names", input_names_v); - p_relu->set_attr("output_names", output_names_v); - - inputs.clear(); - inputs.push_back(NewValueNode(p_relu)); - inputs.push_back(ptr_formerlayer); - - auto kernelptr_floor = g->NewCNode(inputs); - kernelptr_floor->set_abstract(y_tensor->ToAbstract()); - kernelptr_floor->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder; - builder.SetInputsFormat({kOpFormat_NCHW}); - builder.SetOutputsFormat({kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id()}); - builder.SetOutputsDeviceType({kFloat32->type_id()}); - builder.SetKernelType(KernelType::TBE_KERNEL); - builder.SetFusionType(kernel::FusionType::ELEMWISE); - builder.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_floor.get()); - ptr_formerlayer = kernelptr_floor; - layerscount++; - } - - // return res - auto p_return = std::make_shared("return"); - inputs.clear(); - inputs.push_back(NewValueNode(p_return)); - inputs.push_back(ptr_formerlayer); - auto ret = g->NewCNode(inputs); - ret->set_abstract(y_tensor->ToAbstract()); - g->set_return(ret); - draw::Draw(name, g); - return g; -} - -static KernelGraphPtr CreateKernelGraphForBufferFusionSingleIn( - uint32_t after_layers, mindspore::kernel::FusionType fusiontype = mindspore::kernel::CONVLUTION) { - // build the func_graph manually, eg: - /* CreateKernelGraphForBufferFusionSingleIn(1) - * @mindspore - * def f(x): - * z=conv2d(x, y) - * ret=relu(z) - * return ret - */ - KernelGraphPtr g = std::make_shared(); - std::vector inputs; - - std::vector shp = {1, 3, 3, 4}; - TensorTypePtr tensor_type = std::make_shared(kFloat32); - tensor::DeviceInfo device_info{kOpFormat_NCHW, tensor_type}; - - uint32_t layerscount = 1; - CNodePtr ptr_formerlayer; - std::string name = ""; - - // Construct first node - tensor::TensorPtr y_tensor = std::make_shared(kFloat32->type_id(), shp); - y_tensor->set_device_info(device_info); - tensor::TensorPtr z_tensor = std::make_shared(kFloat32->type_id(), shp); - z_tensor->set_device_info(device_info); - - auto y_const = NewValueNode(y_tensor); - auto z_const = NewValueNode(z_tensor); - y_const->set_abstract(y_tensor->ToAbstract()); - z_const->set_abstract(z_tensor->ToAbstract()); - g->MutableInputs()->push_back(y_const); - g->MutableInputs()->push_back(z_const); - - if (fusiontype == kernel::FusionType::CONVLUTION) { - auto p_conv = std::make_shared("Conv2D"); - std::vector input_names = {"x", "y"}; - std::vector output_names = {"output"}; - - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_conv->set_attr("input_names", input_names_v); - p_conv->set_attr("output_names", output_names_v); - - inputs.clear(); - inputs.push_back(NewValueNode(p_conv)); - inputs.push_back(y_const); - inputs.push_back(z_const); - name = "test_conv_" + std::to_string(layerscount) + "layers_graph.dot"; - } else { - auto p_red_seg = std::make_shared("ReduceOrSegment"); - std::vector input_names = {"x"}; - std::vector output_names = {"output"}; - - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_red_seg->set_attr("input_names", input_names_v); - p_red_seg->set_attr("output_names", output_names_v); - - inputs.clear(); - inputs.push_back(NewValueNode(p_red_seg)); - inputs.push_back(y_const); - name = "test_regOrSeg_" + std::to_string(layerscount) + "layers_graph.dot"; - } - - auto kernelptr_first = g->NewCNode(inputs); - kernelptr_first->set_abstract(y_tensor->ToAbstract()); - kernelptr_first->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder; - if (fusiontype == kernel::FusionType::CONVLUTION) { - builder.SetInputsFormat({kOpFormat_NCHW, kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id(), kFloat32->type_id()}); - } else { - builder.SetInputsFormat({kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id()}); - } - - builder.SetOutputsFormat({kOpFormat_NCHW}); - builder.SetOutputsDeviceType({kFloat32->type_id()}); - builder.SetKernelType(KernelType::TBE_KERNEL); - builder.SetFusionType(fusiontype); - builder.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_first.get()); - ptr_formerlayer = kernelptr_first; - - // configure fusion successor layers - while (after_layers--) { - auto p_relu = std::make_shared("ReLU6"); - std::vector input_names = {"x"}; - std::vector output_names = {"output"}; - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_relu->set_attr("input_names", input_names_v); - p_relu->set_attr("output_names", output_names_v); - - inputs.clear(); - inputs.push_back(NewValueNode(p_relu)); - inputs.push_back(ptr_formerlayer); - - auto kernelptr_floor = g->NewCNode(inputs); - kernelptr_floor->set_abstract(y_tensor->ToAbstract()); - kernelptr_floor->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder; - builder.SetInputsFormat({kOpFormat_NCHW}); - builder.SetOutputsFormat({kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id()}); - builder.SetOutputsDeviceType({kFloat32->type_id()}); - builder.SetKernelType(KernelType::TBE_KERNEL); - builder.SetFusionType(kernel::FusionType::ELEMWISE); - builder.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_floor.get()); - ptr_formerlayer = kernelptr_floor; - layerscount++; - } - - // return res - auto p_return = std::make_shared("return"); - inputs.clear(); - inputs.push_back(NewValueNode(p_return)); - inputs.push_back(ptr_formerlayer); - auto ret = g->NewCNode(inputs); - ret->set_abstract(y_tensor->ToAbstract()); - - g->set_return(ret); - - draw::Draw(name, g); - - return g; -} - -static KernelGraphPtr CreateKernelGraphForBufferFusion( - uint32_t targetlayers, bool conv_flag = false, - mindspore::kernel::FusionType fusiontype = mindspore::kernel::CONVLUTION) { - // build the func_graph manually, eg: - /* CreateKernelGraphForBufferFusion(3) - * @mindspore - * def f(x): - * y=relu(x) - * z=relu(y) - * ret=relu(z) - * return ret - */ - KernelGraphPtr g = std::make_shared(); - std::vector inputs; - // x is input tensor. - std::vector shp = {1, 3, 3, 4}; - tensor::TensorPtr x_tensor = std::make_shared(kFloat32->type_id(), shp); - - TensorTypePtr tensor_type = std::make_shared(kFloat32); - tensor::DeviceInfo device_info{kOpFormat_NCHW, tensor_type}; - x_tensor->set_device_info(device_info); - - auto x_abstract = x_tensor->ToAbstract(); - auto x_const = NewValueNode(x_tensor); - x_const->set_abstract(x_abstract); - g->MutableInputs()->push_back(x_const); - - uint32_t layerscount = 1; - CNodePtr ptr_formerlayer; - // configure func_graph hiden layers - while (targetlayers--) { - auto p_relu = std::make_shared("ReLU6"); - std::vector input_names = {"x"}; - std::vector output_names = {"output"}; - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_relu->set_attr("input_names", input_names_v); - p_relu->set_attr("output_names", output_names_v); - - inputs.clear(); - if (layerscount == 1) { - inputs.push_back(NewValueNode(p_relu)); - inputs.push_back(x_const); - } else { - inputs.push_back(NewValueNode(p_relu)); - inputs.push_back(ptr_formerlayer); - } - auto kernelptr_floor = g->NewCNode(inputs); - kernelptr_floor->set_abstract(x_abstract); - kernelptr_floor->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder; - builder.SetInputsFormat({kOpFormat_NCHW}); - builder.SetOutputsFormat({kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id()}); - builder.SetOutputsDeviceType({kFloat32->type_id()}); - builder.SetKernelType(KernelType::TBE_KERNEL); - builder.SetFusionType(kernel::FusionType::ELEMWISE); - builder.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_floor.get()); - ptr_formerlayer = kernelptr_floor; - layerscount++; - } - std::string name = "test_construct_" + std::to_string(layerscount) + "layers_graph.dot"; - if (conv_flag) { - tensor::TensorPtr y_tensor = std::make_shared(kFloat32->type_id(), shp); - y_tensor->set_device_info(device_info); - tensor::TensorPtr z_tensor = std::make_shared(kFloat32->type_id(), shp); - z_tensor->set_device_info(device_info); - auto y_const = NewValueNode(y_tensor); - auto z_const = NewValueNode(y_tensor); - - y_const->set_abstract(y_tensor->ToAbstract()); - z_const->set_abstract(z_tensor->ToAbstract()); - - g->MutableInputs()->push_back(y_const); - - if (fusiontype == kernel::FusionType::CONVLUTION) { - auto p_conv = std::make_shared("Conv2D"); - std::vector input_names = {"x", "y"}; - std::vector output_names = {"output"}; - - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_conv->set_attr("input_names", input_names_v); - p_conv->set_attr("output_names", output_names_v); - - inputs.clear(); - inputs.push_back(NewValueNode(p_conv)); - inputs.push_back(y_const); - inputs.push_back(ptr_formerlayer); - } else { - auto p_conv = std::make_shared("ReduceOrSegment"); - std::vector input_names = {"x"}; - std::vector output_names = {"output"}; - - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_conv->set_attr("input_names", input_names_v); - p_conv->set_attr("output_names", output_names_v); - - inputs.clear(); - inputs.push_back(NewValueNode(p_conv)); - inputs.push_back(ptr_formerlayer); - } - - auto kernelptr_conv = g->NewCNode(inputs); - kernelptr_conv->set_abstract(x_abstract); - kernelptr_conv->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder; - if (fusiontype == kernel::FusionType::CONVLUTION) { - builder.SetInputsFormat({kOpFormat_NCHW, kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id(), kFloat32->type_id()}); - } else { - builder.SetInputsFormat({kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id()}); - } - builder.SetOutputsFormat({kOpFormat_NCHW}); - builder.SetOutputsDeviceType({kFloat32->type_id()}); - builder.SetKernelType(KernelType::TBE_KERNEL); - builder.SetFusionType(fusiontype); - builder.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_conv.get()); - ptr_formerlayer = kernelptr_conv; - name = "test_conv_" + std::to_string(layerscount) + "layers_graph.dot"; - } - // return res - auto p_return = std::make_shared("return"); - inputs.clear(); - inputs.push_back(NewValueNode(p_return)); - inputs.push_back(ptr_formerlayer); - auto ret = g->NewCNode(inputs); - ret->set_abstract(x_abstract); - - g->set_return(ret); - - draw::Draw(name, g); - - return g; -} - -CNodePtr CreateKernelGraphBranch(KernelGraphPtr g, CNodePtr inputptr, int layers, - const kernel::FusionType fusiontype = kernel::FusionType::CONVLUTION) { - std::vector shp = {1, 3, 3, 4}; - tensor::TensorPtr x_tensor = std::make_shared(kFloat32->type_id(), shp); - TensorTypePtr tensor_type = std::make_shared(kFloat32); - tensor::DeviceInfo device_info{kOpFormat_NCHW, tensor_type}; - x_tensor->set_device_info(device_info); - auto x_abstract = x_tensor->ToAbstract(); - auto x_const = NewValueNode(x_tensor); - x_const->set_abstract(x_abstract); - - CNodePtr ptr_formerlayer = inputptr; - while (layers--) { - auto p_relu = std::make_shared("ReLU6"); - std::vector input_names = {"x"}; - std::vector output_names = {"output"}; - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_relu->set_attr("input_names", input_names_v); - p_relu->set_attr("output_names", output_names_v); - - std::vector inputs; - inputs.clear(); - inputs.push_back(NewValueNode(p_relu)); - inputs.push_back(ptr_formerlayer); - auto kernelptr_floor = g->NewCNode(inputs); - kernelptr_floor->set_abstract(x_abstract); - kernelptr_floor->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder; - builder.SetInputsFormat({kOpFormat_NCHW}); - builder.SetOutputsFormat({kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id()}); - builder.SetOutputsDeviceType({kFloat32->type_id()}); - builder.SetKernelType(KernelType::TBE_KERNEL); - builder.SetFusionType(kernel::FusionType::ELEMWISE); - builder.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_floor.get()); - ptr_formerlayer = kernelptr_floor; - } - - tensor::TensorPtr y_tensor = std::make_shared(kFloat32->type_id(), shp); - y_tensor->set_device_info(device_info); - tensor::TensorPtr z_tensor = std::make_shared(kFloat32->type_id(), shp); - z_tensor->set_device_info(device_info); - auto y_const = NewValueNode(y_tensor); - auto z_const = NewValueNode(y_tensor); - - y_const->set_abstract(y_tensor->ToAbstract()); - z_const->set_abstract(z_tensor->ToAbstract()); - - g->MutableInputs()->push_back(y_const); - - auto p_conv = std::make_shared("Conv2D"); - std::vector input_names = {"x", "y"}; - std::vector output_names = {"output"}; - - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_conv->set_attr("input_names", input_names_v); - p_conv->set_attr("output_names", output_names_v); - - std::vector inputs; - inputs.clear(); - inputs.push_back(NewValueNode(p_conv)); - inputs.push_back(y_const); - inputs.push_back(ptr_formerlayer); - - auto kernelptr_conv = g->NewCNode(inputs); - kernelptr_conv->set_abstract(x_abstract); - kernelptr_conv->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder; - builder.SetInputsFormat({kOpFormat_NCHW, kOpFormat_NCHW}); - builder.SetOutputsFormat({kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id(), kFloat32->type_id()}); - builder.SetOutputsDeviceType({kFloat32->type_id()}); - builder.SetKernelType(KernelType::TBE_KERNEL); - builder.SetFusionType(fusiontype); - builder.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_conv.get()); - return kernelptr_conv; -} - -static KernelGraphPtr CreateKernelGraphForMultiUse(uint32_t targetlayer1s, uint32_t targetlayer2s) { - /* @mindspore - * def f(x): - * multi_use=relu(x) - * y=relu(multi_use) - * z=relu(multi_use) - * ret=relu(y, z) - * return ret - */ - KernelGraphPtr g = std::make_shared(); - std::vector inputs; - // x is input tensor. - std::vector shp = {1, 3, 3, 4}; - tensor::TensorPtr x_tensor = std::make_shared(kFloat32->type_id(), shp); - TensorTypePtr tensor_type = std::make_shared(kFloat32); - tensor::DeviceInfo device_info{kOpFormat_NCHW, tensor_type}; - x_tensor->set_device_info(device_info); - - auto x_abstract = x_tensor->ToAbstract(); - auto x_const = NewValueNode(x_tensor); - x_const->set_abstract(x_abstract); - - g->MutableInputs()->push_back(x_const); - - auto p_multi = std::make_shared("MULTI_USE_ReLU6"); - std::vector input_names = {"x"}; - std::vector output_names = {"output"}; - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_multi->set_attr("input_names", input_names_v); - p_multi->set_attr("output_names", output_names_v); - inputs.clear(); - inputs.push_back(NewValueNode(p_multi)); - inputs.push_back(x_const); - auto kernelptr_multi = g->NewCNode(inputs); - kernelptr_multi->set_abstract(x_abstract); - kernelptr_multi->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder; - builder.SetInputsFormat({kOpFormat_NCHW}); - builder.SetOutputsFormat({kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id()}); - builder.SetOutputsDeviceType({kFloat32->type_id()}); - builder.SetKernelType(KernelType::TBE_KERNEL); - builder.SetFusionType(kernel::FusionType::ELEMWISE); - builder.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_multi.get()); - - CNodePtr outptrbranch1 = CreateKernelGraphBranch(g, kernelptr_multi, targetlayer2s); - CNodePtr outptrbranch2 = CreateKernelGraphBranch(g, kernelptr_multi, targetlayer1s); - - auto p_relu = std::make_shared("ReLU6"); - input_names = {"x"}; - output_names = {"output"}; - input_names_v = MakeValue(input_names); - output_names_v = MakeValue(output_names); - p_relu->set_attr("input_names", input_names_v); - p_relu->set_attr("output_names", output_names_v); - - inputs.clear(); - inputs.push_back(NewValueNode(p_relu)); - inputs.push_back(outptrbranch1); - inputs.push_back(outptrbranch2); - auto kernelptr_floor = g->NewCNode(inputs); - kernelptr_floor->set_abstract(x_abstract); - kernelptr_floor->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder1; - builder1.SetInputsFormat({kOpFormat_NCHW, kOpFormat_NCHW}); - builder1.SetOutputsFormat({kOpFormat_NCHW}); - builder1.SetInputsDeviceType({kFloat32->type_id(), kFloat32->type_id()}); - builder1.SetOutputsDeviceType({kFloat32->type_id()}); - builder1.SetKernelType(KernelType::TBE_KERNEL); - builder1.SetFusionType(kernel::FusionType::ELEMWISE); - builder1.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder1.Build(), kernelptr_floor.get()); - - // return res - auto p_return = std::make_shared("return"); - inputs.clear(); - inputs.push_back(NewValueNode(p_return)); - inputs.push_back(kernelptr_floor); - auto ret = g->NewCNode(inputs); - ret->set_abstract(x_abstract); - - g->set_return(ret); - string name = "multi_use_graph.dot"; - draw::Draw(name, g); - - return g; -} -#ifdef BUFFER_FUSION_MULTI_OUT -static KernelGraphPtr CreateKernelGraphForMultiOutputWithLinearInput( - uint32_t targetlayer1s, uint32_t targetlayer2s, bool use_flag = true, - const kernel::FusionType fusion_type = kernel::FusionType::CONVLUTION) { - KernelGraphPtr g = std::make_shared(); - std::vector inputs; - // x is input tensor. - std::vector shp = {1, 3, 3, 4}; - tensor::TensorPtr x_tensor = std::make_shared(kFloat32->type_id(), shp); - TensorTypePtr tensor_type = std::make_shared(kFloat32); - tensor::DeviceInfo device_info{kOpFormat_NCHW, tensor_type}; - x_tensor->set_device_info(device_info); - - auto x_abstract = x_tensor->ToAbstract(); - auto x_const = NewValueNode(x_tensor); - x_const->set_abstract(x_abstract); - g->MutableInputs()->push_back(x_const); - - auto p_relu0 = std::make_shared("ReLU6"); - std::vector input_names0 = {"x"}; - std::vector output_names0 = {"output"}; - ValuePtr input_names_v0 = MakeValue(input_names0); - ValuePtr output_names_v0 = MakeValue(output_names0); - p_relu0->set_attr("input_names", input_names_v0); - p_relu0->set_attr("output_names", output_names_v0); - inputs.clear(); - inputs.push_back(NewValueNode(p_relu0)); - inputs.push_back(x_const); - auto kernelptr_floor0 = g->NewCNode(inputs); - kernelptr_floor0->set_abstract(x_abstract); - kernelptr_floor0->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder0; - builder0.SetInputsFormat({kOpFormat_NCHW}); - builder0.SetOutputsFormat({kOpFormat_NCHW}); - builder0.SetInputsDeviceType({kFloat32->type_id()}); - builder0.SetOutputsDeviceType({kFloat32->type_id()}); - builder0.SetKernelType(KernelType::TBE_KERNEL); - builder0.SetFusionType(kernel::FusionType::ELEMWISE); - builder0.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder0.Build(), kernelptr_floor0.get()); - CNodePtr ptr_formerlayer; - ptr_formerlayer = kernelptr_floor0; - - auto p_multi = std::make_shared("MULTI_USE_ReLU6"); - std::vector input_names = {"x"}; - std::vector output_names = {"output"}; - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_multi->set_attr("input_names", input_names_v); - p_multi->set_attr("output_names", output_names_v); - inputs.clear(); - inputs.push_back(NewValueNode(p_multi)); - inputs.push_back(ptr_formerlayer); - auto kernelptr_multi = g->NewCNode(inputs); - kernelptr_multi->set_abstract(x_abstract); - kernelptr_multi->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder; - builder.SetInputsFormat({kOpFormat_NCHW}); - builder.SetOutputsFormat({kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id()}); - builder.SetOutputsDeviceType({kFloat16->type_id()}); - builder.SetKernelType(KernelType::TBE_KERNEL); - builder.SetFusionType(kernel::FusionType::ELEMWISE); - builder.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_multi.get()); - - CNodePtr outptrbranch2 = nullptr; - CNodePtr outptrbranch1 = CreateKernelGraphBranch(g, kernelptr_multi, targetlayer2s, fusion_type); - if (use_flag) { - outptrbranch2 = CreateKernelGraphBranch(g, kernelptr_multi, targetlayer1s, fusion_type); - } - auto p_relu = std::make_shared("ReLU6"); - input_names = {"x"}; - output_names = {"output"}; - input_names_v = MakeValue(input_names); - output_names_v = MakeValue(output_names); - p_relu->set_attr("input_names", input_names_v); - p_relu->set_attr("output_names", output_names_v); - - inputs.clear(); - inputs.push_back(NewValueNode(p_relu)); - inputs.push_back(outptrbranch1); - if (use_flag) { - inputs.push_back(outptrbranch2); - } - - auto kernelptr_floor = g->NewCNode(inputs); - kernelptr_floor->set_abstract(x_abstract); - kernelptr_floor->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder1; - if (use_flag) { - builder1.SetInputsFormat({kOpFormat_NCHW, kOpFormat_NCHW}); - builder1.SetInputsDeviceType({kFloat32->type_id(), kFloat32->type_id()}); - } else { - builder1.SetInputsFormat({kOpFormat_NCHW}); - builder1.SetInputsDeviceType({kFloat32->type_id()}); - } - builder1.SetOutputsFormat({kOpFormat_NCHW}); - builder1.SetOutputsDeviceType({kFloat32->type_id()}); - builder1.SetKernelType(KernelType::TBE_KERNEL); - builder1.SetFusionType(kernel::FusionType::ELEMWISE); - builder1.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder1.Build(), kernelptr_floor.get()); - cout << "built two branches done" << endl; - // return res - auto p_return = std::make_shared("return"); - inputs.clear(); - inputs.push_back(NewValueNode(p_return)); - inputs.push_back(kernelptr_floor); - auto ret = g->NewCNode(inputs); - ret->set_abstract(x_abstract); - - g->set_return(ret); - string name = "multi_use_graph.dot"; - draw::Draw(name, g); - - return g; -} - -static KernelGraphPtr CreateKernelGraphForMultiOutput( - uint32_t targetlayer1s, uint32_t targetlayer2s, bool use_flag = true, - const kernel::FusionType fusion_type = kernel::FusionType::CONVLUTION) { - KernelGraphPtr g = std::make_shared(); - std::vector inputs; - // x is input tensor. - std::vector shp = {1, 3, 3, 4}; - tensor::TensorPtr x_tensor = std::make_shared(kFloat32->type_id(), shp); - TensorTypePtr tensor_type = std::make_shared(kFloat32); - tensor::DeviceInfo device_info{kOpFormat_NCHW, tensor_type}; - x_tensor->set_device_info(device_info); - - auto x_abstract = x_tensor->ToAbstract(); - auto x_const = NewValueNode(x_tensor); - x_const->set_abstract(x_abstract); - g->MutableInputs()->push_back(x_const); - - auto p_multi = std::make_shared("MULTI_USE_ReLU6"); - std::vector input_names = {"x"}; - std::vector output_names = {"output"}; - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_multi->set_attr("input_names", input_names_v); - p_multi->set_attr("output_names", output_names_v); - inputs.clear(); - inputs.push_back(NewValueNode(p_multi)); - inputs.push_back(x_const); - auto kernelptr_multi = g->NewCNode(inputs); - kernelptr_multi->set_abstract(x_abstract); - kernelptr_multi->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder; - builder.SetInputsFormat({kOpFormat_NCHW}); - builder.SetOutputsFormat({kOpFormat_NCHW, kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id()}); - builder.SetOutputsDeviceType({kFloat16->type_id(), kFloat32->type_id()}); - builder.SetKernelType(KernelType::TBE_KERNEL); - builder.SetFusionType(kernel::FusionType::ELEMWISE); - builder.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_multi.get()); - - CNodePtr outptrbranch2 = nullptr; - CNodePtr outptrbranch1 = CreateKernelGraphBranch(g, kernelptr_multi, targetlayer2s, fusion_type); - if (use_flag) { - outptrbranch2 = CreateKernelGraphBranch(g, kernelptr_multi, targetlayer1s, fusion_type); - } - auto p_relu = std::make_shared("ReLU6"); - input_names = {"x"}; - output_names = {"output"}; - input_names_v = MakeValue(input_names); - output_names_v = MakeValue(output_names); - p_relu->set_attr("input_names", input_names_v); - p_relu->set_attr("output_names", output_names_v); - - inputs.clear(); - inputs.push_back(NewValueNode(p_relu)); - inputs.push_back(outptrbranch1); - if (use_flag) { - inputs.push_back(outptrbranch2); - } - auto kernelptr_floor = g->NewCNode(inputs); - kernelptr_floor->set_abstract(x_abstract); - kernelptr_floor->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder1; - if (use_flag) { - builder1.SetInputsFormat({kOpFormat_NCHW, kOpFormat_NCHW}); - builder1.SetInputsDeviceType({kFloat32->type_id(), kFloat32->type_id()}); - } else { - builder1.SetInputsFormat({kOpFormat_NCHW}); - builder1.SetInputsDeviceType({kFloat32->type_id()}); - } - builder1.SetOutputsFormat({kOpFormat_NCHW}); - builder1.SetOutputsDeviceType({kFloat32->type_id()}); - builder1.SetKernelType(KernelType::TBE_KERNEL); - builder1.SetFusionType(kernel::FusionType::ELEMWISE); - builder1.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder1.Build(), kernelptr_floor.get()); - - // return res - auto p_return = std::make_shared("return"); - inputs.clear(); - inputs.push_back(NewValueNode(p_return)); - inputs.push_back(kernelptr_floor); - auto ret = g->NewCNode(inputs); - ret->set_abstract(x_abstract); - - g->set_return(ret); - string name = "multi_use_graph.dot"; - draw::Draw(name, g); - - return g; -} -#endif -TEST_F(TestHWBufferFusion, BufferFusionlayerSingleIn1) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusionSingleIn(1); - ASSERT_TRUE(nullptr != graph_ptr); - draw::Draw("before_BufferFusionlayerSingleIn1.dot", graph_ptr); - - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 8); - buffer_fusion.Run(graph_ptr); - draw::Draw("after_BufferFusionlayerSingleIn1.dot", graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 6); -} - -TEST_F(TestHWBufferFusion, BufferFusionlayerSingleIn2) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusionSingleIn(2); - ASSERT_TRUE(nullptr != graph_ptr); - draw::Draw("before_BufferFusionlayerSingleIn2.dot", graph_ptr); - - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 10); - buffer_fusion.Run(graph_ptr); - draw::Draw("after_BufferFusionlayerSingleIn2.dot", graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 6); -} - -TEST_F(TestHWBufferFusion, BufferFusionlayerSingleIn3) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusionSingleIn(3); - ASSERT_TRUE(nullptr != graph_ptr); - draw::Draw("before_BufferFusionlayerSingleIn3.dot", graph_ptr); - - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 12); - buffer_fusion.Run(graph_ptr); - draw::Draw("after_BufferFusionlayerSingleIn3.dot", graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 6); -} - -TEST_F(TestHWBufferFusion, BufferFusionlayer1) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusion(1); - ASSERT_TRUE(nullptr != graph_ptr); - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 5); - buffer_fusion.Run(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 5); -} - -TEST_F(TestHWBufferFusion, BufferFusionlayer2) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusion(2); - ASSERT_TRUE(nullptr != graph_ptr); - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 7); - buffer_fusion.Run(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 5); -} - -TEST_F(TestHWBufferFusion, BufferFusionlayer4) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusion(4); - ASSERT_TRUE(nullptr != graph_ptr); - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 11); - buffer_fusion.Run(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 5); -} - -TEST_F(TestHWBufferFusion, BufferFusionlayer6) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusion(6); - ASSERT_TRUE(nullptr != graph_ptr); - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 15); - buffer_fusion.Run(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 7); -} - -TEST_F(TestHWBufferFusion, BufferFusionlayer8) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusion(8); - ASSERT_TRUE(nullptr != graph_ptr); - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 19); - buffer_fusion.Run(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 7); -} - -TEST_F(TestHWBufferFusion, BufferFusionconv1) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusion(1, true); - ASSERT_TRUE(nullptr != graph_ptr); - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(buffer_fusion.MatchBufferFusionPattern(*graph_ptr), false); -} - -TEST_F(TestHWBufferFusion, BufferFusionconv8) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusion(8, true); - draw::Draw("before_BufferFusionconv8.dot", graph_ptr); - - ASSERT_TRUE(nullptr != graph_ptr); - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(buffer_fusion.MatchBufferFusionPattern(*graph_ptr), true); - kernel::KernelPackPtr kernel_pack = std::make_shared(); - auto kernel_ptr = std::make_shared(kernel_pack); - std::unordered_map buffer_fusion_infos; - buffer_fusion.GetBufferFusionInfo(*graph_ptr, &buffer_fusion_infos); - std::vector fusion_ids; - for (auto &buffer_fusion_info : buffer_fusion_infos) { - fusion_ids.push_back(buffer_fusion_info.first); - } - std::sort(fusion_ids.begin(), fusion_ids.end()); - for (auto &fusion_id : fusion_ids) { - buffer_fusion.ReplaceFusionOp(buffer_fusion_infos[fusion_id], kernel_ptr, graph_ptr.get()); - } - draw::Draw("after_BufferFusionconv8.dot", graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 10); -} - -#ifdef BUFFER_FUSION_MULTI_OUT -TEST_F(TestHWBufferFusion, BufferFusionMultiOutWithLinearInput) { - KernelGraphPtr graph_ptr = CreateKernelGraphForMultiOutputWithLinearInput(1, 1, true, mindspore::kernel::OPAQUE); - ASSERT_TRUE(nullptr != graph_ptr); - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 19); - - ASSERT_EQ(buffer_fusion.MatchBufferFusionPattern(*graph_ptr), true); - kernel::KernelPackPtr kernel_pack = std::make_shared(); - auto kernel_ptr = std::make_shared(kernel_pack); - std::unordered_map buffer_fusion_infos; - buffer_fusion.GetBufferFusionInfo(*graph_ptr, &buffer_fusion_infos); - for (auto &buffer_fusion_info : buffer_fusion_infos) { - EXPECT_EQ(buffer_fusion_info.second.anf_nodes.size(), 3); - EXPECT_EQ(buffer_fusion_info.second.inputs_list.size(), 1); - EXPECT_EQ(buffer_fusion_info.second.outputs_list.size(), 2); - buffer_fusion.ReplaceFusionOp(buffer_fusion_info.second, kernel_ptr, graph_ptr.get()); - } - ASSERT_EQ(manager->all_nodes().size(), 21); -} - -TEST_F(TestHWBufferFusion, BufferFusionMultiOut) { - KernelGraphPtr graph_ptr = CreateKernelGraphForMultiOutput(1, 1, true, mindspore::kernel::OPAQUE); - draw::Draw("before_BufferFusionMultiOut.dot", graph_ptr); - ASSERT_TRUE(nullptr != graph_ptr); - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 17); - ASSERT_EQ(buffer_fusion.MatchBufferFusionPattern(*graph_ptr), true); - kernel::KernelPackPtr kernel_pack = std::make_shared(); - auto kernel_ptr = std::make_shared(kernel_pack); - std::unordered_map buffer_fusion_infos; - buffer_fusion.GetBufferFusionInfo(*graph_ptr, &buffer_fusion_infos); - for (auto &buffer_fusion_info : buffer_fusion_infos) { - EXPECT_EQ(buffer_fusion_info.second.anf_nodes.size(), 2); - EXPECT_EQ(buffer_fusion_info.second.inputs_list.size(), 1); - EXPECT_EQ(buffer_fusion_info.second.outputs_list.size(), 2); - buffer_fusion.ReplaceFusionOp(buffer_fusion_info.second, kernel_ptr, graph_ptr.get()); - } - draw::Draw("after_BufferFusionMultiOut.dot", graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 21); -} -#endif - -TEST_F(TestHWBufferFusion, BufferMultiUse) { - KernelGraphPtr graph_ptr = CreateKernelGraphForMultiUse(3, 4); - draw::Draw("before_BufferMultiUse.dot", graph_ptr); - ASSERT_TRUE(nullptr != graph_ptr); - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(buffer_fusion.MatchBufferFusionPattern(*graph_ptr), true); - kernel::KernelPackPtr kernel_pack = std::make_shared(); - auto kernel_ptr = std::make_shared(kernel_pack); - std::unordered_map buffer_fusion_infos; - buffer_fusion.GetBufferFusionInfo(*graph_ptr, &buffer_fusion_infos); - std::vector fusion_ids; - for (auto &buffer_fusion_info : buffer_fusion_infos) { - fusion_ids.push_back(buffer_fusion_info.first); - } - std::sort(fusion_ids.begin(), fusion_ids.end()); - for (auto &fusion_id : fusion_ids) { - buffer_fusion.ReplaceFusionOp(buffer_fusion_infos[fusion_id], kernel_ptr, graph_ptr.get()); - } - draw::Draw("after_BufferMultiUse.dot", graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 15); -} - -TEST_F(TestHWBufferFusion, BufferFusionReduce) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusion(2, true, mindspore::kernel::COMMREDUCE); - ASSERT_TRUE(nullptr != graph_ptr); - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(buffer_fusion.MatchBufferFusionPattern(*graph_ptr), true); - kernel::KernelPackPtr kernel_pack = std::make_shared(); - auto kernel_ptr = std::make_shared(kernel_pack); - std::unordered_map buffer_fusion_infos; - buffer_fusion.GetBufferFusionInfo(*graph_ptr, &buffer_fusion_infos); - for (auto &buffer_fusion_info : buffer_fusion_infos) { - EXPECT_EQ(buffer_fusion_info.second.anf_nodes.size(), 3); - EXPECT_EQ(buffer_fusion_info.second.inputs_list.size(), 1); - EXPECT_EQ(buffer_fusion_info.second.outputs_list.size(), 1); - buffer_fusion.ReplaceFusionOp(buffer_fusion_info.second, kernel_ptr, graph_ptr.get()); - } - ASSERT_EQ(manager->all_nodes().size(), 5); -} - -TEST_F(TestHWBufferFusion, BufferFusionSegment) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusion(2, true, mindspore::kernel::SEGMENT); - ASSERT_TRUE(nullptr != graph_ptr); - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(buffer_fusion.MatchBufferFusionPattern(*graph_ptr), true); - kernel::KernelPackPtr kernel_pack = std::make_shared(); - auto kernel_ptr = std::make_shared(kernel_pack); - std::unordered_map buffer_fusion_infos; - buffer_fusion.GetBufferFusionInfo(*graph_ptr, &buffer_fusion_infos); - for (auto &buffer_fusion_info : buffer_fusion_infos) { - EXPECT_EQ(buffer_fusion_info.second.anf_nodes.size(), 3); - EXPECT_EQ(buffer_fusion_info.second.inputs_list.size(), 1); - EXPECT_EQ(buffer_fusion_info.second.outputs_list.size(), 1); - buffer_fusion.ReplaceFusionOp(buffer_fusion_info.second, kernel_ptr, graph_ptr.get()); - } - ASSERT_EQ(manager->all_nodes().size(), 5); -} - -TEST_F(TestHWBufferFusion, BufferFusionEltwise1BeforeAnd3After) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusionEltwiseBeforeAndAfter(1); - ASSERT_TRUE(nullptr != graph_ptr); - draw::Draw("before_BufferFusionEltwiseBeforeAndAfter1.dot", graph_ptr); - - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 13); - buffer_fusion.Run(graph_ptr); - draw::Draw("after_BufferFusionEltwiseBeforeAndAfter1.dot", graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 5); -} - -TEST_F(TestHWBufferFusion, BufferFusionEltwise2BeforeAnd3After) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusionEltwiseBeforeAndAfter(2); - ASSERT_TRUE(nullptr != graph_ptr); - draw::Draw("before_BufferFusionEltwiseBeforeAndAfter2.dot", graph_ptr); - - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 15); - buffer_fusion.Run(graph_ptr); - draw::Draw("after_BufferFusionEltwiseBeforeAndAfter2.dot", graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 5); -} - -TEST_F(TestHWBufferFusion, BufferFusionEltwise3BeforeAnd3After) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusionEltwiseBeforeAndAfter(3); - ASSERT_TRUE(nullptr != graph_ptr); - draw::Draw("before_BufferFusionEltwiseBeforeAndAfter3.dot", graph_ptr); - - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 17); - buffer_fusion.Run(graph_ptr); - draw::Draw("after_BufferFusionEltwiseBeforeAndAfter3.dot", graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 5); -} - -TEST_F(TestHWBufferFusion, BufferFusionMultipleIn) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusionMultipleIn(2); - ASSERT_TRUE(nullptr != graph_ptr); - draw::Draw("before_BufferFusionMultipleIn.dot", graph_ptr); - - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 11); - buffer_fusion.Run(graph_ptr); - draw::Draw("after_BufferFusionMultipleIn.dot", graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 7); -} -} // namespace opt -} // namespace mindspore diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/add_memcpy_async_test.cc b/tests/ut/cpp/pre_activate/ascend/enhancer/add_memcpy_async_test.cc similarity index 97% rename from tests/ut/cpp/pre_activate/ascend/ir_fission/add_memcpy_async_test.cc rename to tests/ut/cpp/pre_activate/ascend/enhancer/add_memcpy_async_test.cc index 516bcb89f0..367ab25054 100644 --- a/tests/ut/cpp/pre_activate/ascend/ir_fission/add_memcpy_async_test.cc +++ b/tests/ut/cpp/pre_activate/ascend/enhancer/add_memcpy_async_test.cc @@ -22,7 +22,7 @@ #include "utils/utils.h" #include "kernel/kernel_build_info.h" #include "pre_activate/common/optimizer.h" -#include "pre_activate/ascend/ir_fission/add_memcpy_async.h" +#include "pre_activate/ascend/enhancer/add_memcpy_async.h" namespace mindspore { namespace opt { diff --git a/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc b/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc index 2616354e4c..56bf0ae4e0 100644 --- a/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc +++ b/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc @@ -16,6 +16,7 @@ #include "common/backend_common_test.h" #include "common/py_func_graph_fetcher.h" #include "session/ascend_session.h" +#include "session/anf_runtime_algorithm.h" #include "pipeline/resource.h" #include "operator/ops.h" #include "ir/manager.h" diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/topk_split_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/topk_split_test.cc index 94fa04ef7a..43ddc046b7 100644 --- a/tests/ut/cpp/pre_activate/ascend/ir_fission/topk_split_test.cc +++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/topk_split_test.cc @@ -17,8 +17,13 @@ #include "common/backend_common_test.h" #include "common/py_func_graph_fetcher.h" #include "device/kernel_info.h" -#include "pre_activate/ascend/ir_fission/topk_split.h" +#include "pre_activate/pass/convert_const_input_to_attr.h" #include "debug/anf_ir_dump.h" +#define private public +#define protected public +#include "pre_activate/ascend/ir_fission/topk_split.h" +#undef private +#undef protected namespace mindspore { namespace opt { @@ -30,6 +35,15 @@ class TestHWTopKSplit : public BackendCommon { UT::PyFuncGraphFetcher get_py_fun_; }; +class MockSupportedChecker : public SupportedChecker { + public: + MockSupportedChecker() = default; + ~MockSupportedChecker() override = default; + bool CheckSupported(const AnfNodePtr &anf_node, const kernel::KernelBuildInfoPtr &select_kernel_build_info) override { + return true; + } +}; // namespace opt + TEST_F(TestHWTopKSplit, test_topk_split) { /* * def before(input): @@ -40,19 +54,25 @@ TEST_F(TestHWTopKSplit, test_topk_split) { FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_topk_split", "before"); std::vector shp{4, 4}; auto x_abstract = std::make_shared(kFloat32, shp); - g->parameters()[0]->set_abstract(x_abstract); - auto ret = g->get_return(); - EXPECT_NE(ret, nullptr); - auto tuple_getitem = ret->input(1); - EXPECT_NE(tuple_getitem, nullptr); - auto topk = tuple_getitem->cast()->input(1); - topk->set_abstract(x_abstract); + AbstractBasePtrList args_spec_list{x_abstract}; + auto kernel_graph = GetKernelGraph(g, args_spec_list); auto optimizer = std::make_shared(); auto pm = std::make_shared(); - pm->AddPass(std::make_shared()); + pm->AddPass(std::make_shared()); + auto topk_split = std::make_shared(); + topk_split->supported_checker_ = std::make_shared(); + pm->AddPass(topk_split); optimizer->AddPassManager(pm); - FuncGraphPtr new_graph = optimizer->Optimize(g); + FuncGraphPtr new_graph = optimizer->Optimize(kernel_graph); + + auto ret = new_graph->get_return(); + EXPECT_NE(ret, nullptr); + auto make_tuple = ret->input(1); + EXPECT_NE(make_tuple, nullptr); + auto tuple_getitem = make_tuple->cast()->input(1); + EXPECT_NE(tuple_getitem, nullptr); + auto topk = tuple_getitem->cast()->input(1); auto topk_cnode = topk->cast(); EXPECT_EQ(topk_cnode->inputs().size(), 3); EXPECT_TRUE(topk_cnode->input(2)->isa()); diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/transdata_split_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/transdata_split_test.cc similarity index 99% rename from tests/ut/cpp/pre_activate/ascend/ir_fusion/transdata_split_test.cc rename to tests/ut/cpp/pre_activate/ascend/ir_fission/transdata_split_test.cc index 19215d2f1c..b358b002a4 100644 --- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/transdata_split_test.cc +++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/transdata_split_test.cc @@ -23,7 +23,7 @@ #define private public #define protected public #include "pre_activate/ascend/format_type/insert_trans_op.h" -#include "pre_activate/ascend/ir_fusion/transdata_split.h" +#include "pre_activate/ascend/ir_fission/transdata_split.h" #undef private #undef protected diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion_test.cc new file mode 100644 index 0000000000..3d13f4a336 --- /dev/null +++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion_test.cc @@ -0,0 +1,54 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h" +#include "common/backend_common_test.h" +#include "common/py_func_graph_fetcher.h" + +namespace mindspore { +namespace opt { +class TestHWFusedBatchNormFusion : public BackendCommon { + public: + TestHWFusedBatchNormFusion() : get_py_fun_("gtest_input.pre_activate.fused_batch_norm_fusion_test", true) {} + ~TestHWFusedBatchNormFusion() override = default; + + UT::PyFuncGraphFetcher get_py_fun_; +}; + +TEST_F(TestHWFusedBatchNormFusion, test_fused_batch_norm_fusion) { + FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_fused_batch_norm_fusion", "before"); + EXPECT_NE(g, nullptr); + std::vector shp_x{32, 64, 112, 112}; + auto x_abstract = std::make_shared(kFloat32, shp_x); + std::vector shp_y{64}; + auto y_abstract = std::make_shared(kFloat32, shp_y); + AbstractBasePtrList args_spec_list{x_abstract}; + for (size_t i = 0; i < 6; ++i) { + args_spec_list.push_back(y_abstract); + } + auto kg = GetKernelGraph(g, args_spec_list); + + auto optimizer = std::make_shared(); + auto pm = std::make_shared(); + pm->AddPass(std::make_shared()); + optimizer->AddPassManager(pm); + FuncGraphPtr new_graph = optimizer->Optimize(kg); + + FuncGraphPtr g_after = get_py_fun_.CallAndParseRet("test_fused_batch_norm_fusion", "after"); + EXPECT_TRUE(CheckEqualGraph(g_after, new_graph)); +} +} // namespace opt +} // namespace mindspore \ No newline at end of file diff --git a/tests/ut/cpp/pre_activate/common/ir_fusion/allreduce_fusion_test.cc b/tests/ut/cpp/pre_activate/common/ir_fusion/allreduce_fusion_test.cc index d5f2fa636d..7f3b9d4c9d 100644 --- a/tests/ut/cpp/pre_activate/common/ir_fusion/allreduce_fusion_test.cc +++ b/tests/ut/cpp/pre_activate/common/ir_fusion/allreduce_fusion_test.cc @@ -20,7 +20,7 @@ #include "ir/manager.h" #include "debug/anf_ir_dump.h" #include "session/anf_runtime_algorithm.h" -#include "pre_activate/pass/allreduce_fusion.h" +#include "pre_activate/pass/communication_op_fusion.h" #include "pre_activate/common/optimizer.h" #include "device/kernel_info.h" #include "pre_activate/common/pass_manager.h" diff --git a/tests/ut/cpp/python_input/gtest_input/pre_activate/batch_norm_grad_split.py b/tests/ut/cpp/python_input/gtest_input/pre_activate/batch_norm_grad_split.py new file mode 100644 index 0000000000..dc783f1fbd --- /dev/null +++ b/tests/ut/cpp/python_input/gtest_input/pre_activate/batch_norm_grad_split.py @@ -0,0 +1,61 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +from mindspore.ops import operations as P +from mindspore.ops.operations import _grad_ops as G +from mindspore.ops import Primitive + +batch_norm_grad = G.BatchNormGrad(is_training=True) +bn_training_update_grad = Primitive('BNTrainingUpdateGrad') +bn_training_reduce_grad = Primitive('BNTrainingReduceGrad') +make_tuple = Primitive('make_tuple') +tuple_getitem = Primitive('tuple_getitem') + +class FnDict: + def __init__(self): + self.fnDict = {} + + def __call__(self, fn): + self.fnDict[fn.__name__] = fn + + def __getitem__(self, name): + return self.fnDict[name] + +def test_batch_norm_grad_split(tag): + fns = FnDict() + + @fns + def before(i0, i1, i2, i3, i4, i5): + bn_grad_output = batch_norm_grad(i0, i1, i2, i3, i4, i5) + item0 = tuple_getitem(bn_grad_output, 0) + item1 = tuple_getitem(bn_grad_output, 1) + item2 = tuple_getitem(bn_grad_output, 2) + output = make_tuple(item0, item1, item2) + return output + + @fns + def after(i0, i1, i2, i3, i4, i5): + bn_update_grad_output = bn_training_update_grad(i0, i1, i3, i4) + update_item0 = tuple_getitem(bn_update_grad_output, 0) + update_item1 = tuple_getitem(bn_update_grad_output, 1) + bn_reduce_grad_output = bn_training_reduce_grad(i0, i1, update_item0, update_item1, i2, i3, i4) + output = make_tuple(bn_reduce_grad_output, update_item0, update_item1) + item0 = tuple_getitem(output, 0) + item1 = tuple_getitem(output, 1) + item2 = tuple_getitem(output, 2) + output = make_tuple(item0, item1, item2) + return make_tuple(output) + + return fns[tag] diff --git a/tests/ut/cpp/python_input/gtest_input/pre_activate/confusion_softmax_grad_rule.py b/tests/ut/cpp/python_input/gtest_input/pre_activate/confusion_softmax_grad_rule.py index 2727ef641d..cd71eb5d0b 100644 --- a/tests/ut/cpp/python_input/gtest_input/pre_activate/confusion_softmax_grad_rule.py +++ b/tests/ut/cpp/python_input/gtest_input/pre_activate/confusion_softmax_grad_rule.py @@ -41,7 +41,7 @@ def test_confusion_softmax_grad_rule(tag): @fns def before(input0, input1): - res = mul(input0, input1) + res = mul(input1, input0) # input axis will be convert to attr in ConstructKernelGraph step res = reduce_sum(res, axis) res = sub(input0, res) diff --git a/tests/ut/cpp/python_input/gtest_input/pre_activate/derelu_fusion.py b/tests/ut/cpp/python_input/gtest_input/pre_activate/derelu_fusion.py index 497975542b..767f85332f 100644 --- a/tests/ut/cpp/python_input/gtest_input/pre_activate/derelu_fusion.py +++ b/tests/ut/cpp/python_input/gtest_input/pre_activate/derelu_fusion.py @@ -17,7 +17,7 @@ from mindspore.ops import Primitive relu = P.ReLU() relu_grad = Primitive('ReluGrad') -relu_v2 = Primitive('ReluV2') +relu_v2 = Primitive('ReLUV2') relu_grad_v2 = Primitive('ReluGradV2') make_tuple = Primitive('make_tuple') tuple_getitem = Primitive('tuple_getitem') diff --git a/tests/ut/cpp/python_input/gtest_input/pre_activate/fused_batch_norm_fusion_test.py b/tests/ut/cpp/python_input/gtest_input/pre_activate/fused_batch_norm_fusion_test.py index 8f4b8b476f..ca93d40443 100644 --- a/tests/ut/cpp/python_input/gtest_input/pre_activate/fused_batch_norm_fusion_test.py +++ b/tests/ut/cpp/python_input/gtest_input/pre_activate/fused_batch_norm_fusion_test.py @@ -24,7 +24,8 @@ make_tuple = Primitive('make_tuple') tuple_getitem = Primitive('tuple_getitem') depend = Primitive('depend') BatchNorm = P.BatchNorm() -FusedBatchNorm = P.FusedBatchNorm() +BNTrainingReduce = Primitive('BNTrainingReduce') +BNTrainingUpdate = Primitive('BNTrainingUpdate') constant0 = Tensor(0.1, mstype.float32) constant1 = Tensor(0.1, mstype.float32) @@ -40,7 +41,7 @@ class FnDict: return self.fnDict[name] -def useless_test_fused_batch_norm_fusion(tag): +def test_fused_batch_norm_fusion(tag): fns = FnDict() @fns @@ -60,9 +61,11 @@ def useless_test_fused_batch_norm_fusion(tag): @fns def after(input0, input1, input2, input3, input4, var0, var1): - fused_batch_norm = FusedBatchNorm(input0, input1, input2, var0, var1) - outputs = make_tuple(tuple_getitem(fused_batch_norm, 0), tuple_getitem(fused_batch_norm, 3), - tuple_getitem(fused_batch_norm, 4)) + bn_training_reduce = BNTrainingReduce(input0) + bn_training_update = BNTrainingUpdate(input0, tuple_getitem(bn_training_reduce, 0), + tuple_getitem(bn_training_reduce, 1), input1, input2, var0, var1) + outputs = make_tuple(tuple_getitem(bn_training_update, 0), tuple_getitem(bn_training_update, 3), + tuple_getitem(bn_training_update, 4)) output = tuple_getitem(outputs, 0) return make_tuple(output) diff --git a/tests/ut/cpp/python_input/gtest_input/pre_activate/topk_split_test.py b/tests/ut/cpp/python_input/gtest_input/pre_activate/topk_split_test.py index 4cdbfa084e..c173419897 100644 --- a/tests/ut/cpp/python_input/gtest_input/pre_activate/topk_split_test.py +++ b/tests/ut/cpp/python_input/gtest_input/pre_activate/topk_split_test.py @@ -35,7 +35,7 @@ def test_topk_split(tag): @fns def before(input): - topk = TopK(input) + topk = TopK(input, 2) output = tuple_getitem(topk, 0) return output diff --git a/tests/ut/cpp/stub/parallel_strategy_checkpoint/parallel_strategy_checkpoint_stub.cc b/tests/ut/cpp/stub/parallel_strategy_checkpoint/parallel_strategy_checkpoint_stub.cc index 73de5071cd..43d0dd4b3f 100644 --- a/tests/ut/cpp/stub/parallel_strategy_checkpoint/parallel_strategy_checkpoint_stub.cc +++ b/tests/ut/cpp/stub/parallel_strategy_checkpoint/parallel_strategy_checkpoint_stub.cc @@ -25,9 +25,7 @@ StrategyCheckpoint& StrategyCheckpoint::GetInstance() { return instance; } -bool StrategyCheckpoint::CheckPointExit() const { return false; } - -Status StrategyCheckpoint::RemoveCheckPoint() const { return SUCCESS; } +bool StrategyCheckpoint::CheckPointExit(const std::string path) const { return false; } Status StrategyCheckpoint::Load(StrategyMap* strategy_map) { return SUCCESS; } diff --git a/tests/ut/cpp/transform/convert_test.cc b/tests/ut/cpp/transform/convert_test.cc index 277aaa15c3..7d18663f38 100644 --- a/tests/ut/cpp/transform/convert_test.cc +++ b/tests/ut/cpp/transform/convert_test.cc @@ -147,13 +147,13 @@ TEST_F(TestConvert, TestReluOps) { } TEST_F(TestConvert, TestConvertBatchNorm) { - PrimitivePtr fused_batch_norm = prim::kPrimFusedBatchNorm; - fused_batch_norm->AddAttr("epsilon", MakeValue(0.001f)); - fused_batch_norm->AddAttr("momentum", MakeValue(0.1f)); + PrimitivePtr batch_norm = prim::kPrimBatchNorm; + batch_norm->AddAttr("epsilon", MakeValue(0.001f)); + batch_norm->AddAttr("momentum", MakeValue(0.1f)); FuncGraphPtr anf_graph = std::make_shared(); std::vector inputs; - inputs.push_back(NewValueNode(fused_batch_norm)); + inputs.push_back(NewValueNode(batch_norm)); for (unsigned int i = 0; i < 5; i++) { inputs.push_back(anf_graph->add_parameter()); } diff --git a/tests/ut/data/dataset/testRandomData/datasetSchema.json b/tests/ut/data/dataset/testRandomData/datasetSchema.json new file mode 100644 index 0000000000..cea11c52b3 --- /dev/null +++ b/tests/ut/data/dataset/testRandomData/datasetSchema.json @@ -0,0 +1,14 @@ +{ + "columns": { + "image": { + "type": "uint8", + "rank": 3, + "shape": [1920,1080,3] + }, + "label": { + "type": "int32", + "rank": 1, + "shape": [1] + } + } +} diff --git a/tests/ut/data/dataset/testRandomData/datasetSchema2.json b/tests/ut/data/dataset/testRandomData/datasetSchema2.json new file mode 100644 index 0000000000..541ad7c071 --- /dev/null +++ b/tests/ut/data/dataset/testRandomData/datasetSchema2.json @@ -0,0 +1,14 @@ +{ + "columns": { + "image": { + "type": "uint8", + "rank": 2, + "shape": [28,28] + }, + "label": { + "type": "uint8", + "rank": 1, + "shape": [1] + } + } +} diff --git a/tests/ut/data/dataset/testTextFileDataset/1.txt b/tests/ut/data/dataset/testTextFileDataset/1.txt index 9d911eacc0..a5ffab4fdc 100644 --- a/tests/ut/data/dataset/testTextFileDataset/1.txt +++ b/tests/ut/data/dataset/testTextFileDataset/1.txt @@ -1,3 +1,4 @@ This is a text file. + Be happy every day. Good luck to everyone. diff --git a/tests/ut/python/communication/test_comm.py b/tests/ut/python/communication/test_comm.py index 38fd7199fd..31beb1fe5a 100644 --- a/tests/ut/python/communication/test_comm.py +++ b/tests/ut/python/communication/test_comm.py @@ -14,7 +14,7 @@ """ test Communicate """ import numpy as np -from mindspore.ops.operations.comm_ops import AllReduce, AllGather, _AlltoAll, ReduceOp +from mindspore.ops.operations.comm_ops import AllReduce, AllGather, _AlltoAll, ReduceOp, ReduceScatter from mindspore.ops.operations.comm_ops import Broadcast from mindspore.communication.management import HCCL_WORLD_COMM_GROUP, NCCL_WORLD_COMM_GROUP, GlobalComm, init from mindspore.communication._comm_helper import Backend @@ -55,7 +55,7 @@ class BroadCastNet(nn.Cell): self.broadcast = Broadcast(0) def construct(self, x): - x = self.broadcast((x)) + x, = self.broadcast((x,)) x = self.dense(x) return x @@ -78,6 +78,19 @@ class AllGatherNet(nn.Cell): x = self.allgather(x) return self.relu(x) +class ReduceScatterNet(nn.Cell): + """ReduceScatterNet definition""" + def __init__(self, input_channel, out_channel, op): + super(ReduceScatterNet, self).__init__() + self.dense = Dense(input_channel, out_channel) + self.reducescatter = ReduceScatter(op) + self.relu = ReLU() + + def construct(self, x): + x = self.dense(x) + x = self.reducescatter(x) + return self.relu(x) + class AlltoAllNet(nn.Cell): """AlltoAllNet definition""" def __init__(self, input_channel, out_channel): @@ -126,6 +139,25 @@ def test_allgather(): network = TrainOneStepCell(network, optimizer) _executor.compile(network, input_tensor, label_tensor) +def run_reducescatter(op): + """run_reducescatter""" + context.set_context(mode=context.GRAPH_MODE) + input_tensor = Tensor(np.array([[1.2, 2.1], [2.2, 3.2]], dtype=np.float32)) + label_tensor = Tensor(np.array([[1.2], [2.2]], dtype=np.float32)) + network = ReduceScatterNet(2, 1, op) + loss_fn = nn.SoftmaxCrossEntropyWithLogits() + optimizer = Momentum(filter(lambda x: x.requires_grad, network.get_parameters()), + learning_rate=0.1, + momentum=0.9) + network = WithLossCell(network, loss_fn) + network = TrainOneStepCell(network, optimizer) + _executor.compile(network, input_tensor, label_tensor) + +def test_reducescatter(): + """test_reducescatter""" + context.set_context(mode=context.GRAPH_MODE) + run_reducescatter(ReduceOp.SUM) + def test_broadcast(): """test_broadcast""" context.set_context(mode=context.GRAPH_MODE) diff --git a/tests/ut/python/dataset/test_datasets_celeba.py b/tests/ut/python/dataset/test_datasets_celeba.py index 6b8859f433..11c5fcb67a 100644 --- a/tests/ut/python/dataset/test_datasets_celeba.py +++ b/tests/ut/python/dataset/test_datasets_celeba.py @@ -20,7 +20,7 @@ DATA_DIR = "../data/dataset/testCelebAData/" def test_celeba_dataset_label(): - data = ds.CelebADataset(DATA_DIR, decode=True) + data = ds.CelebADataset(DATA_DIR, decode=True, shuffle=False) expect_labels = [ [0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1], diff --git a/tests/ut/python/dataset/test_generator.py b/tests/ut/python/dataset/test_generator.py index 4daf952eba..529788fcaa 100644 --- a/tests/ut/python/dataset/test_generator.py +++ b/tests/ut/python/dataset/test_generator.py @@ -580,6 +580,41 @@ def test_num_samples_underflow(): count = count + 1 assert count == 64 + +def type_tester_with_type_check_2c_schema(t, c): + logger.info("Test with Type {}".format(t.__name__)) + + schema = ds.Schema() + schema.add_column("data0", c[0]) + schema.add_column("data1", c[1]) + + # apply dataset operations + data1 = ds.GeneratorDataset((lambda: generator_with_type_2c(t)), schema=schema) + + data1 = data1.batch(4) + + i = 0 + for item in data1.create_dict_iterator(): # each data is a dictionary + golden = np.array([[i], [i + 1], [i + 2], [i + 3]], dtype=t) + assert np.array_equal(item["data0"], golden) + i = i + 4 + + +def test_schema(): + """ + Test 2 column Generator on different data type with type check with schema input + """ + logger.info("Test 2 column Generator on all data types with type check") + + np_types = [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64, np.float32, + np.float64] + de_types = [mstype.int8, mstype.int16, mstype.int32, mstype.int64, mstype.uint8, mstype.uint16, mstype.uint32, + mstype.uint64, mstype.float32, mstype.float64] + + for i in range(len(np_types)): + type_tester_with_type_check_2c_schema(np_types[i], [de_types[i], de_types[i]]) + + def manual_test_keyborad_interrupt(): """ Test keyborad_interrupt @@ -626,5 +661,6 @@ if __name__ == "__main__": test_sequential_sampler() test_distributed_sampler() test_random_sampler() + test_schema() diff --git a/tests/ut/python/dataset/test_iterator.py b/tests/ut/python/dataset/test_iterator.py index 7c69adf561..58beecbe16 100644 --- a/tests/ut/python/dataset/test_iterator.py +++ b/tests/ut/python/dataset/test_iterator.py @@ -14,7 +14,7 @@ # ============================================================================== import numpy as np import pytest - +import copy import mindspore.dataset as ds from mindspore.dataset.engine.iterators import ITERATORS_LIST, _cleanup @@ -81,3 +81,33 @@ def test_iterator_weak_ref(): assert sum(itr() is not None for itr in ITERATORS_LIST) == 2 _cleanup() + + +class MyDict(dict): + def __getattr__(self, key): + return self[key] + + def __setattr__(self, key, value): + self[key] = value + + def __call__(self, t): + return t + + +def test_tree_copy(): + # Testing copying the tree with a pyfunc that cannot be pickled + + data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=COLUMNS) + data1 = data.map(operations=[MyDict()]) + + itr = data1.create_tuple_iterator() + + assert id(data1) != id(itr.dataset) + assert id(data) != id(itr.dataset.input[0]) + assert id(data1.operations[0]) == id(itr.dataset.operations[0]) + + itr.release() + + +if __name__ == '__main__': + test_tree_copy() \ No newline at end of file diff --git a/tests/ut/python/dataset/test_minddataset_exception.py b/tests/ut/python/dataset/test_minddataset_exception.py index 70add46b68..e1d54fa7c8 100644 --- a/tests/ut/python/dataset/test_minddataset_exception.py +++ b/tests/ut/python/dataset/test_minddataset_exception.py @@ -82,3 +82,18 @@ def test_minddataset_lack_db(): num_iter += 1 assert num_iter == 0 os.remove(CV_FILE_NAME) + + +def test_cv_minddataset_pk_sample_error_class_column(): + create_cv_mindrecord(1) + columns_list = ["data", "file_name", "label"] + num_readers = 4 + sampler = ds.PKSampler(5, None, True, 'no_exsit_column') + with pytest.raises(Exception, match="MindRecordOp launch failed"): + data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, sampler=sampler) + num_iter = 0 + for item in data_set.create_dict_iterator(): + num_iter += 1 + os.remove(CV_FILE_NAME) + os.remove("{}.db".format(CV_FILE_NAME)) + diff --git a/tests/ut/python/dataset/test_random_dataset.py b/tests/ut/python/dataset/test_random_dataset.py new file mode 100644 index 0000000000..16c43ea971 --- /dev/null +++ b/tests/ut/python/dataset/test_random_dataset.py @@ -0,0 +1,71 @@ +# Copyright 2019 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +import mindspore.common.dtype as mstype +import mindspore.dataset as ds +from mindspore import log as logger +from pathlib import Path + +# just a basic test with parallel random data op +def test_randomdataset_basic1(): + logger.info("Test randomdataset basic") + + schema = ds.Schema() + schema.add_column('image', de_type=mstype.uint8, shape=[2]) + schema.add_column('label', de_type=mstype.uint8, shape=[1]) + + # apply dataset operations + ds1 = ds.RandomDataset(schema=schema, num_samples=50, num_parallel_workers=4) + ds1 = ds1.repeat(4) + + num_iter = 0 + for data in ds1.create_dict_iterator(): # each data is a dictionary + # in this example, each dictionary has keys "image" and "label" + logger.info("{} image: {}".format(num_iter, data["image"])) + logger.info("{} label: {}".format(num_iter, data["label"])) + num_iter += 1 + + logger.info("Number of data in ds1: ", num_iter) + assert(num_iter == 200) + +# Another simple test +def test_randomdataset_basic2(): + logger.info("Test randomdataset basic 2") + + schema = ds.Schema() + schema.add_column('image', de_type=mstype.uint8, shape=[640,480,3]) # 921600 bytes (a bit less than 1 MB per image) + schema.add_column('label', de_type=mstype.uint8, shape=[1]) + + # Make up about 10 samples + ds1 = ds.RandomDataset(schema=schema, num_samples=10, num_parallel_workers=1) + + # cache size allows for about 4 images since each image just a bit less than 1MB, after that we will have to spill + ds1 = ds1.repeat(4) + + num_iter = 0 + for data in ds1.create_dict_iterator(): # each data is a dictionary + # in this example, each dictionary has keys "image" and "label" + #logger.info(data["image"]) + logger.info("printing the label: {}".format(data["label"])) + num_iter += 1 + + logger.info("Number of data in ds1: ", num_iter) + assert(num_iter == 40) + + +if __name__ == '__main__': + test_randomdataset_basic1() + test_randomdataset_basic2() + logger.info('test_randomdataset_basic Ended.\n') + diff --git a/tests/ut/python/dataset/test_skip.py b/tests/ut/python/dataset/test_skip.py index 59893f6ded..ccbf40a55b 100644 --- a/tests/ut/python/dataset/test_skip.py +++ b/tests/ut/python/dataset/test_skip.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== - import numpy as np import mindspore.dataset.transforms.vision.c_transforms as vision @@ -51,7 +50,7 @@ def generator_md(): def test_generator_skip(): - ds1 = ds.GeneratorDataset(generator_md, ["data"]) + ds1 = ds.GeneratorDataset(generator_md, ["data"], num_parallel_workers=4) # Here ds1 should be [3, 4] ds1 = ds1.skip(3) @@ -60,6 +59,7 @@ def test_generator_skip(): for data in ds1: buf.append(data[0][0]) assert len(buf) == 2 + assert buf == [3, 4] def test_skip_1(): @@ -72,6 +72,7 @@ def test_skip_1(): for data in ds1: buf.append(data[0][0]) assert len(buf) == 0 + assert buf == [] def test_skip_2(): @@ -84,6 +85,7 @@ def test_skip_2(): for data in ds1: buf.append(data[0][0]) assert len(buf) == 5 + assert buf == [0, 1, 2, 3, 4] def test_skip_repeat_1(): @@ -99,6 +101,7 @@ def test_skip_repeat_1(): for data in ds1: buf.append(data[0][0]) assert len(buf) == 7 + assert buf == [3, 4, 0, 1, 2, 3, 4] def test_skip_repeat_2(): @@ -114,6 +117,7 @@ def test_skip_repeat_2(): for data in ds1: buf.append(data[0][0]) assert len(buf) == 4 + assert buf == [3, 4, 3, 4] def test_skip_repeat_3(): @@ -132,6 +136,62 @@ def test_skip_repeat_3(): for data in ds1: buf.append(data[0][0]) assert len(buf) == 6 + assert buf == [3, 4, 3, 4, 3, 4] + +def test_skip_take_1(): + ds1 = ds.GeneratorDataset(generator_md, ["data"]) + + # Here ds1 should be [0, 1, 2, 3] + ds1 = ds1.take(4) + + # Here ds1 should be [2, 3] + ds1 = ds1.skip(2) + + buf = [] + for data in ds1: + buf.append(data[0][0]) + assert len(buf) == 2 + assert buf == [2, 3] + +def test_skip_take_2(): + ds1 = ds.GeneratorDataset(generator_md, ["data"]) + + # Here ds1 should be [2, 3, 4] + ds1 = ds1.skip(2) + + # Here ds1 should be [2, 3] + ds1 = ds1.take(2) + + buf = [] + for data in ds1: + buf.append(data[0][0]) + assert len(buf) == 2 + assert buf == [2, 3] + + +def generator_1d(): + for i in range(64): + yield (np.array([i]), ) + +def test_skip_filter_1(): + dataset = ds.GeneratorDataset(generator_1d, ['data']) + dataset = dataset.skip(5) + dataset = dataset.filter(predicate=lambda data: data < 11, num_parallel_workers=4) + + buf = [] + for item in dataset: + buf.append(item[0][0]) + assert buf == [5, 6, 7, 8, 9, 10] + +def test_skip_filter_2(): + dataset = ds.GeneratorDataset(generator_1d, ['data']) + dataset = dataset.filter(predicate=lambda data: data < 11, num_parallel_workers=4) + dataset = dataset.skip(5) + + buf = [] + for item in dataset: + buf.append(item[0][0]) + assert buf == [5, 6, 7, 8, 9, 10] if __name__ == "__main__": @@ -142,3 +202,7 @@ if __name__ == "__main__": test_skip_repeat_1() test_skip_repeat_2() test_skip_repeat_3() + test_skip_take_1() + test_skip_take_2() + test_skip_filter_1() + test_skip_filter_2() diff --git a/tests/ut/python/dataset/test_sync_wait.py b/tests/ut/python/dataset/test_sync_wait.py index 277499d9ae..7e9fade39d 100644 --- a/tests/ut/python/dataset/test_sync_wait.py +++ b/tests/ut/python/dataset/test_sync_wait.py @@ -107,6 +107,7 @@ def test_two_sync(): if count % 2 == 0: dataset.sync_update(condition_name="every 2 batches") + def test_sync_epoch(): """ Test sync wait with epochs: test sync with epochs in dataset pipeline @@ -130,6 +131,34 @@ def test_sync_epoch(): dataset.sync_update(condition_name="policy", data=data) +def test_multiple_iterators(): + """ + Test sync wait with multiple iterators: will start multiple + """ + logger.info("test_sync_epoch") + batch_size = 30 + dataset = ds.GeneratorDataset(gen, column_names=["input"]) + + aug = Augment(0) + dataset = dataset.sync_wait(condition_name="policy", callback=aug.update) + dataset = dataset.map(input_columns=["input"], operations=[aug.preprocess]) + dataset = dataset.batch(batch_size, drop_remainder=True) + # 2nd dataset + dataset2 = ds.GeneratorDataset(gen, column_names=["input"]) + + aug = Augment(0) + dataset2 = dataset2.sync_wait(condition_name="policy", callback=aug.update) + dataset2 = dataset2.map(input_columns=["input"], operations=[aug.preprocess]) + dataset2 = dataset2.batch(batch_size, drop_remainder=True) + + for item1, item2 in zip(dataset.create_dict_iterator(), dataset2.create_dict_iterator()): + assert (item1["input"][0] == item2["input"][0]) + data1 = {"loss": item1["input"][0]} + data2 = {"loss": item2["input"][0]} + dataset.sync_update(condition_name="policy", data=data1) + dataset2.sync_update(condition_name="policy", data=data2) + + def test_sync_exception_01(): """ Test sync: with shuffle in sync mode @@ -179,4 +208,5 @@ if __name__ == "__main__": test_two_sync() test_sync_exception_01() test_sync_exception_02() - test_sync_epoch() \ No newline at end of file + test_sync_epoch() + test_multiple_iterators() diff --git a/tests/ut/python/dataset/test_take.py b/tests/ut/python/dataset/test_take.py index ed71f67e26..64efc7a785 100644 --- a/tests/ut/python/dataset/test_take.py +++ b/tests/ut/python/dataset/test_take.py @@ -30,6 +30,12 @@ def generator_10(): yield np.array([i]), +def filter_func_ge(data): + if data > 3: + return False + return True + + def test_take_01(): """ Test take: origin there are 3 row, and take 1 row, in this case: will not meet eoe and eof @@ -297,6 +303,44 @@ def test_take_16(): assert sum([1 for _ in data1]) == 5 +def test_take_17(): + """ + Test take: take first, then do fiter operation + """ + logger.info("test_take_17") + data1 = ds.GeneratorDataset(generator_10, ["data"]) + + data1 = data1.take(8) + data1 = data1.filter(predicate=filter_func_ge, num_parallel_workers=4) + + # Here i refers to index, d refers to data element + for i, d in enumerate(data1): + assert i == d[0][0] + + assert sum([1 for _ in data1]) == 4 + + +def test_take_18(): + """ + Test take: take first, then do fiter, skip, batch and repeat operation + """ + logger.info("test_take_18") + data1 = ds.GeneratorDataset(generator_10, ["data"]) + + data1 = data1.take(8) + data1 = data1.filter(predicate=filter_func_ge, num_parallel_workers=4) + data1 = data1.skip(2) + + data1 = data1.batch(2) + data1 = data1.repeat(2) + + # Here i refers to index, d refers to data element + for i, d in enumerate(data1): + assert 2 == d[0][0] + + assert sum([1 for _ in data1]) == 2 + + if __name__ == '__main__': test_take_01() test_take_02() @@ -314,4 +358,6 @@ if __name__ == '__main__': test_take_14() test_take_15() test_take_16() + test_take_17() + test_take_18() logger.info('== test take operation finished ==') \ No newline at end of file diff --git a/tests/ut/python/dataset/test_uniform_augment.py b/tests/ut/python/dataset/test_uniform_augment.py index ce0490336e..98c22fb3cb 100644 --- a/tests/ut/python/dataset/test_uniform_augment.py +++ b/tests/ut/python/dataset/test_uniform_augment.py @@ -18,6 +18,7 @@ import matplotlib.pyplot as plt from mindspore import log as logger import mindspore.dataset.engine as de import mindspore.dataset.transforms.vision.py_transforms as F +import mindspore.dataset.transforms.vision.c_transforms as C DATA_DIR = "../data/dataset/testImageNetData/train/" @@ -101,7 +102,129 @@ def test_uniform_augment(plot=False, num_ops=2): if plot: visualize(images_original, images_ua) +def test_cpp_uniform_augment(plot=False, num_ops=2): + """ + Test UniformAugment + """ + logger.info("Test CPP UniformAugment") + + # Original Images + ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) + + transforms_original = [C.Decode(), C.Resize(size=[224, 224]), + F.ToTensor()] + + ds_original = ds.map(input_columns="image", + operations=transforms_original) + + ds_original = ds_original.batch(512) + + for idx, (image,label) in enumerate(ds_original): + if idx == 0: + images_original = np.transpose(image, (0, 2, 3, 1)) + else: + images_original = np.append(images_original, + np.transpose(image, (0, 2, 3, 1)), + axis=0) + + + # UniformAugment Images + ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) + transforms_ua = [C.RandomCrop(size=[224, 224], padding=[32, 32, 32, 32]), + C.RandomHorizontalFlip(), + C.RandomVerticalFlip(), + C.RandomColorAdjust(), + C.RandomRotation(degrees=45)] + + uni_aug = C.UniformAugment(operations=transforms_ua, num_ops=num_ops) + + transforms_all = [C.Decode(), C.Resize(size=[224, 224]), + uni_aug, + F.ToTensor()] + + ds_ua = ds.map(input_columns="image", + operations=transforms_all, num_parallel_workers=1) + + ds_ua = ds_ua.batch(512) + + for idx, (image,label) in enumerate(ds_ua): + if idx == 0: + images_ua = np.transpose(image, (0, 2, 3, 1)) + else: + images_ua = np.append(images_ua, + np.transpose(image, (0, 2, 3, 1)), + axis=0) + if plot: + visualize(images_original, images_ua) + + num_samples = images_original.shape[0] + mse = np.zeros(num_samples) + for i in range(num_samples): + mse[i] = np.mean((images_ua[i] - images_original[i]) ** 2) + logger.info("MSE= {}".format(str(np.mean(mse)))) + +def test_cpp_uniform_augment_exception_pyops(num_ops=2): + """ + Test UniformAugment invalid op in operations + """ + logger.info("Test CPP UniformAugment invalid OP exception") + + transforms_ua = [C.RandomCrop(size=[224, 224], padding=[32, 32, 32, 32]), + C.RandomHorizontalFlip(), + C.RandomVerticalFlip(), + C.RandomColorAdjust(), + C.RandomRotation(degrees=45), + F.Invert()] + + try: + uni_aug = C.UniformAugment(operations=transforms_ua, num_ops=num_ops) + + except BaseException as e: + logger.info("Got an exception in DE: {}".format(str(e))) + assert "operations" in str(e) + +def test_cpp_uniform_augment_exception_large_numops(num_ops=6): + """ + Test UniformAugment invalid large number of ops + """ + logger.info("Test CPP UniformAugment invalid large num_ops exception") + + transforms_ua = [C.RandomCrop(size=[224, 224], padding=[32, 32, 32, 32]), + C.RandomHorizontalFlip(), + C.RandomVerticalFlip(), + C.RandomColorAdjust(), + C.RandomRotation(degrees=45)] + + try: + uni_aug = C.UniformAugment(operations=transforms_ua, num_ops=num_ops) + + except BaseException as e: + logger.info("Got an exception in DE: {}".format(str(e))) + assert "num_ops" in str(e) + +def test_cpp_uniform_augment_exception_nonpositive_numops(num_ops=0): + """ + Test UniformAugment invalid non-positive number of ops + """ + logger.info("Test CPP UniformAugment invalid non-positive num_ops exception") + + transforms_ua = [C.RandomCrop(size=[224, 224], padding=[32, 32, 32, 32]), + C.RandomHorizontalFlip(), + C.RandomVerticalFlip(), + C.RandomColorAdjust(), + C.RandomRotation(degrees=45)] + + try: + uni_aug = C.UniformAugment(operations=transforms_ua, num_ops=num_ops) + + except BaseException as e: + logger.info("Got an exception in DE: {}".format(str(e))) + assert "num_ops" in str(e) if __name__ == "__main__": test_uniform_augment(num_ops=1) - + test_cpp_uniform_augment(num_ops=1) + test_cpp_uniform_augment_exception_pyops(num_ops=1) + test_cpp_uniform_augment_exception_large_numops(num_ops=6) + test_cpp_uniform_augment_exception_nonpositive_numops(num_ops=0) + diff --git a/tests/ut/python/model/test_bert_cell.py b/tests/ut/python/model/test_bert_cell.py index fdaaac397b..2cb642c75f 100644 --- a/tests/ut/python/model/test_bert_cell.py +++ b/tests/ut/python/model/test_bert_cell.py @@ -317,7 +317,7 @@ test_case_cell_ops = [ initializer_range=0.02, dropout_prob=0.1), 'desc_inputs': [[1, 768], [1, 768]], - 'desc_bprop': [[1, 128, 768]]}), # maybe not right + 'desc_bprop': [[1, 768]]}), ('BertTransformer_2', { 'block': bert_trans(), 'desc_inputs': [[1, 128, 768], [1, 128, 128]]}), @@ -331,7 +331,7 @@ test_case_cell_ops = [ 'desc_inputs': [Tensor(np.random.rand(128).astype(np.int32)), Tensor(np.random.rand(128).astype(np.int32)), [128]], 'desc_bprop': [[1, 128, 768], [1, 128, 768], [1, 128, 768]], - 'num_output': 3}), # maybe not right + 'num_output': 3}), ('BertModel_1', { 'block': BertModel(config=BertConfig(batch_size=1, @@ -342,7 +342,7 @@ test_case_cell_ops = [ 'desc_inputs': [Tensor(np.random.rand(128).astype(np.int32)), Tensor(np.random.rand(128).astype(np.int32)), [128]], 'desc_bprop': [[1, 128, 768], [1, 128, 768], [1, 128, 768]], - 'num_output': 3}), # maybe not right + 'num_output': 3}), ('BertModel_2', { 'block': BertModel(config=BertConfig(batch_size=1, @@ -354,7 +354,7 @@ test_case_cell_ops = [ 'desc_inputs': [Tensor(np.random.rand(128).astype(np.int32)), Tensor(np.random.rand(128).astype(np.int32)), [128]], 'desc_bprop': [[1, 128, 768], [1, 128, 768], [1, 128, 768]], - 'num_output': 3}), # maybe not right + 'num_output': 3}), ('BertPretrainingLoss', { 'block': BertPretrainingLoss(config=BertConfig(batch_size=1)), diff --git a/tests/ut/python/model/test_mix_precision.py b/tests/ut/python/model/test_mix_precision.py index 0a8b185e8c..0c762f42b9 100644 --- a/tests/ut/python/model/test_mix_precision.py +++ b/tests/ut/python/model/test_mix_precision.py @@ -175,7 +175,7 @@ class GetParamGrad(nn.Cell): def test_grad_conv_prelu(): shapes = [[64, 64, 112, 112]] - outshape = [[64, 64, 56, 56]] + outshape = [[64, 64, 112, 112]] net = IRBlockZ(inplanes=64, planes=64).add_flags_recursive(fp16=True) inputs = [convert(shp, dtype=np.float16) for shp in shapes] sens_shape = outshape[0] diff --git a/tests/ut/python/nn/optim/test_rmsprop.py b/tests/ut/python/nn/optim/test_rmsprop.py new file mode 100644 index 0000000000..647f1e8d45 --- /dev/null +++ b/tests/ut/python/nn/optim/test_rmsprop.py @@ -0,0 +1,62 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" test adam """ +import numpy as np +import pytest +import mindspore.nn as nn +from mindspore.common.api import _executor +from mindspore import Tensor, Parameter +from mindspore.nn import TrainOneStepCell, WithLossCell +from mindspore.ops import operations as P +from mindspore.nn.optim import RMSProp + + +class Net(nn.Cell): + """ Net definition """ + def __init__(self): + super(Net, self).__init__() + self.weight = Parameter(Tensor(np.ones([64, 10]).astype(np.float32)), name="weight") + self.bias = Parameter(Tensor(np.ones([10]).astype((np.float32))), name="bias") + self.matmul = P.MatMul() + self.biasAdd = P.BiasAdd() + + def construct(self, x): + x = self.biasAdd(self.matmul(x, self.weight), self.bias) + return x + + +def test_rmsprop_compile(): + """ test_adamw_compile """ + inputs = Tensor(np.ones([1, 64]).astype(np.float32)) + label = Tensor(np.zeros([1, 10]).astype(np.float32)) + net = Net() + net.set_train() + + loss = nn.SoftmaxCrossEntropyWithLogits() + optimizer = RMSProp(net.trainable_params(), learning_rate=0.1) + + net_with_loss = WithLossCell(net, loss) + train_network = TrainOneStepCell(net_with_loss, optimizer) + _executor.compile(train_network, inputs, label) + + +def test_rmsprop_e(): + net = Net() + with pytest.raises(ValueError): + RMSProp(net.get_parameters(), momentum=-0.1, learning_rate=0.1) + + with pytest.raises(TypeError): + RMSProp(net.get_parameters(), momentum=1, learning_rate=0.1) + diff --git a/tests/ut/python/nn/test_image_gradients.py b/tests/ut/python/nn/test_image_gradients.py index a2b9495443..e268ceb9d9 100644 --- a/tests/ut/python/nn/test_image_gradients.py +++ b/tests/ut/python/nn/test_image_gradients.py @@ -14,6 +14,7 @@ # ============================================================================ """ test image gradients """ import numpy as np +import pytest import mindspore.nn as nn import mindspore.context as context import mindspore.common.dtype as mstype @@ -47,3 +48,10 @@ def test_compile_multi_channel(): [[[10,20],[30,40]], [[50,60],[70,80]]]]), dtype=dtype) net = Net() _executor.compile(net, image) + +def test_invalid_5d_input(): + dtype = mstype.float32 + image = Tensor(np.random.random([4, 1, 16, 16, 1]), dtype=dtype) + net = Net() + with pytest.raises(ValueError): + _executor.compile(net, image) \ No newline at end of file diff --git a/tests/ut/python/nn/test_psnr.py b/tests/ut/python/nn/test_psnr.py index 32e7b570aa..c07d246810 100644 --- a/tests/ut/python/nn/test_psnr.py +++ b/tests/ut/python/nn/test_psnr.py @@ -18,10 +18,12 @@ test psnr import numpy as np import pytest import mindspore.nn as nn +from mindspore.common import dtype as mstype from mindspore.common.api import _executor from mindspore import Tensor + class PSNRNet(nn.Cell): def __init__(self, max_val=1.0): super(PSNRNet, self).__init__() @@ -59,3 +61,38 @@ def test_psnr_max_val_zero(): max_val = 0 with pytest.raises(ValueError): net = PSNRNet(max_val) + +def test_psnr_different_shape(): + shape_1 = (8, 3, 16, 16) + shape_2 = (8, 3, 8, 8) + img1 = Tensor(np.random.random(shape_1)) + img2 = Tensor(np.random.random(shape_2)) + net = PSNRNet() + with pytest.raises(ValueError): + _executor.compile(net, img1, img2) + +def test_psnr_different_dtype(): + dtype_1 = mstype.float32 + dtype_2 = mstype.float16 + img1 = Tensor(np.random.random((8, 3, 16, 16)), dtype=dtype_1) + img2 = Tensor(np.random.random((8, 3, 16, 16)), dtype=dtype_2) + net = PSNRNet() + with pytest.raises(TypeError): + _executor.compile(net, img1, img2) + +def test_psnr_invalid_5d_input(): + shape_1 = (8, 3, 16, 16) + shape_2 = (8, 3, 8, 8) + invalid_shape = (8, 3, 16, 16, 1) + img1 = Tensor(np.random.random(shape_1)) + invalid_img1 = Tensor(np.random.random(invalid_shape)) + img2 = Tensor(np.random.random(shape_2)) + invalid_img2 = Tensor(np.random.random(invalid_shape)) + + net = PSNRNet() + with pytest.raises(ValueError): + _executor.compile(net, invalid_img1, img2) + with pytest.raises(ValueError): + _executor.compile(net, img1, invalid_img2) + with pytest.raises(ValueError): + _executor.compile(net, invalid_img1, invalid_img2) diff --git a/tests/ut/python/nn/test_ssim.py b/tests/ut/python/nn/test_ssim.py index 77d065b100..7389c2dbda 100644 --- a/tests/ut/python/nn/test_ssim.py +++ b/tests/ut/python/nn/test_ssim.py @@ -18,6 +18,7 @@ test ssim import numpy as np import pytest import mindspore.nn as nn +import mindspore.common.dtype as mstype from mindspore.common.api import _executor from mindspore import Tensor @@ -93,3 +94,38 @@ def test_ssim_k1_k2_wrong_value(): net = SSIMNet(k2=0.0) with pytest.raises(ValueError): net = SSIMNet(k2=-1.0) + +def test_ssim_different_shape(): + shape_1 = (8, 3, 16, 16) + shape_2 = (8, 3, 8, 8) + img1 = Tensor(np.random.random(shape_1)) + img2 = Tensor(np.random.random(shape_2)) + net = SSIMNet() + with pytest.raises(ValueError): + _executor.compile(net, img1, img2) + +def test_ssim_different_dtype(): + dtype_1 = mstype.float32 + dtype_2 = mstype.float16 + img1 = Tensor(np.random.random((8, 3, 16, 16)), dtype=dtype_1) + img2 = Tensor(np.random.random((8, 3, 16, 16)), dtype=dtype_2) + net = SSIMNet() + with pytest.raises(TypeError): + _executor.compile(net, img1, img2) + +def test_ssim_invalid_5d_input(): + shape_1 = (8, 3, 16, 16) + shape_2 = (8, 3, 8, 8) + invalid_shape = (8, 3, 16, 16, 1) + img1 = Tensor(np.random.random(shape_1)) + invalid_img1 = Tensor(np.random.random(invalid_shape)) + img2 = Tensor(np.random.random(shape_2)) + invalid_img2 = Tensor(np.random.random(invalid_shape)) + + net = SSIMNet() + with pytest.raises(ValueError): + _executor.compile(net, invalid_img1, img2) + with pytest.raises(ValueError): + _executor.compile(net, img1, invalid_img2) + with pytest.raises(ValueError): + _executor.compile(net, invalid_img1, invalid_img2) diff --git a/tests/ut/python/ops/test_array_ops.py b/tests/ut/python/ops/test_array_ops.py index 01e7e32d50..61b8d48fea 100644 --- a/tests/ut/python/ops/test_array_ops.py +++ b/tests/ut/python/ops/test_array_ops.py @@ -14,16 +14,15 @@ # ============================================================================ """ test array ops """ import functools +import pytest import numpy as np import mindspore as ms from mindspore import Tensor from mindspore.nn import Cell from mindspore.ops import operations as P -from mindspore.ops import functional as F -from mindspore.ops import composite as C from mindspore.ops import prim_attr_register +from mindspore.common import dtype as mstype from mindspore.ops.primitive import Primitive, PrimitiveWithInfer -from mindspore.common.dtype import get_py_obj_dtype from mindspore._c_expression import signature_dtype as sig_dtype from mindspore._c_expression import signature_rw as sig_rw from mindspore._c_expression import signature_kind as sig_kind @@ -96,6 +95,17 @@ def test_select(): expect = np.array([[1, 8, 9], [10, 5, 6]]) assert np.all(output.asnumpy() == expect) +def test_argmin_invalid_output_type(): + P.Argmin(-1, mstype.int64) + P.Argmin(-1, mstype.int32) + with pytest.raises(TypeError): + P.Argmin(-1, mstype.float32) + with pytest.raises(TypeError): + P.Argmin(-1, mstype.float64) + with pytest.raises(TypeError): + P.Argmin(-1, mstype.uint8) + with pytest.raises(TypeError): + P.Argmin(-1, mstype.bool_) class CustomOP(PrimitiveWithInfer): __mindspore_signature__ = (sig_dtype.T, sig_dtype.T, sig_dtype.T1, diff --git a/tests/ut/python/ops/test_array_ops_check.py b/tests/ut/python/ops/test_array_ops_check.py new file mode 100755 index 0000000000..f7b77bbb5b --- /dev/null +++ b/tests/ut/python/ops/test_array_ops_check.py @@ -0,0 +1,159 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" test ops """ +import functools +import numpy as np +from mindspore import ops +from mindspore.ops import functional as F +from mindspore.ops import operations as P +from mindspore.ops.operations import _grad_ops as G +import mindspore.ops.composite as C +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.common import dtype as mstype +from mindspore.common.parameter import Parameter +from ..ut_filter import non_graph_engine +from mindspore.common.api import _executor + +from ....mindspore_test_framework.mindspore_test import mindspore_test +from ....mindspore_test_framework.pipeline.forward.compile_forward\ + import (pipeline_for_compile_forward_ge_graph_for_case_by_case_config, + pipeline_for_compile_forward_ge_graph_for_case_by_case_config_exception) +from ....mindspore_test_framework.pipeline.gradient.compile_gradient\ + import pipeline_for_compile_grad_ge_graph_for_case_by_case_config + + +class ExpandDimsNet(nn.Cell): + def __init__(self, axis): + super(ExpandDimsNet, self).__init__() + self.axis = axis + self.op = P.ExpandDims() + + def construct(self, x): + return self.op(x, self.axis) + + +class IsInstanceNet(nn.Cell): + def __init__(self, inst): + super(IsInstanceNet, self).__init__() + self.inst = inst + self.op = P.IsInstance() + + def construct(self, t): + return self.op(self.inst, t) + + +class ReshapeNet(nn.Cell): + def __init__(self, shape): + super(ReshapeNet, self).__init__() + self.shape = shape + self.op = P.Reshape() + + def construct(self, x): + return self.op(x, self.shape) + + +raise_set = [ + # input is scala, not Tensor + ('ExpandDims0', { + 'block': (P.ExpandDims(), {'exception': TypeError, 'error_keywords': ['ExpandDims']}), + 'desc_inputs': [5.0, 1], + 'skip': ['backward']}), + # axis is as a parameter + ('ExpandDims1', { + 'block': (P.ExpandDims(), {'exception': TypeError, 'error_keywords': ['ExpandDims']}), + 'desc_inputs': [Tensor(np.ones([3, 4]).astype(np.float32)), 1], + 'skip': ['backward']}), + # axis as an attribute, but less then lower limit + ('ExpandDims2', { + 'block': (ExpandDimsNet(-4), {'exception': ValueError, 'error_keywords': ['ExpandDims']}), + 'desc_inputs': [Tensor(np.ones([3, 4]).astype(np.float32))], + 'skip': ['backward']}), + # axis as an attribute, but greater then upper limit + ('ExpandDims3', { + 'block': (ExpandDimsNet(3), {'exception': ValueError, 'error_keywords': ['ExpandDims']}), + 'desc_inputs': [Tensor(np.ones([3, 4]).astype(np.float32))], + 'skip': ['backward']}), + + # input is scala, not Tensor + ('DType0', { + 'block': (P.DType(), {'exception': TypeError, 'error_keywords': ['DType']}), + 'desc_inputs': [5.0], + 'skip': ['backward']}), + + # input x scala, not Tensor + ('SameTypeShape0', { + 'block': (P.SameTypeShape(), {'exception': TypeError, 'error_keywords': ['SameTypeShape']}), + 'desc_inputs': [5.0, Tensor(np.ones([3, 4]).astype(np.float32))], + 'skip': ['backward']}), + # input y scala, not Tensor + ('SameTypeShape1', { + 'block': (P.SameTypeShape(), {'exception': TypeError, 'error_keywords': ['SameTypeShape']}), + 'desc_inputs': [Tensor(np.ones([3, 4]).astype(np.float32)), 5.0], + 'skip': ['backward']}), + # type of x and y not match + ('SameTypeShape2', { + 'block': (P.SameTypeShape(), {'exception': TypeError, 'error_keywords': ['SameTypeShape']}), + 'desc_inputs': [Tensor(np.ones([3, 4]).astype(np.float32)), Tensor(np.ones([3, 4]).astype(np.int32))], + 'skip': ['backward']}), + # shape of x and y not match + ('SameTypeShape3', { + 'block': (P.SameTypeShape(), {'exception': ValueError, 'error_keywords': ['SameTypeShape']}), + 'desc_inputs': [Tensor(np.ones([3, 4]).astype(np.float32)), Tensor(np.ones([3, 3]).astype(np.float32))], + 'skip': ['backward']}), + + # sub_type is None + ('IsSubClass0', { + 'block': (P.IsSubClass(), {'exception': TypeError, 'error_keywords': ['IsSubClass']}), + 'desc_inputs': [None, mstype.number], + 'skip': ['backward']}), + # type_ is None + ('IsSubClass1', { + 'block': (P.IsSubClass(), {'exception': TypeError, 'error_keywords': ['IsSubClass']}), + 'desc_inputs': [mstype.number, None], + 'skip': ['backward']}), + + # inst is var + ('IsInstance0', { + 'block': (P.IsInstance(), {'exception': ValueError, 'error_keywords': ['IsInstance']}), + 'desc_inputs': [5.0, mstype.number], + 'skip': ['backward']}), + # t is not mstype.Type + ('IsInstance1', { + 'block': (IsInstanceNet(5.0), {'exception': TypeError, 'error_keywords': ['IsInstance']}), + 'desc_inputs': [None], + 'skip': ['backward']}), + + # input x is scalar, not Tensor + ('Reshape0', { + 'block': (P.Reshape(), {'exception': TypeError, 'error_keywords': ['Reshape']}), + 'desc_inputs': [5.0, (1, 2)], + 'skip': ['backward']}), + # input shape is var + ('Reshape1', { + 'block': (P.Reshape(), {'exception': TypeError, 'error_keywords': ['Reshape']}), + 'desc_inputs': [Tensor(np.ones([3, 4]).astype(np.float32)), (2, 3, 2)], + 'skip': ['backward']}), + # element of shape is not int + ('Reshape3', { + 'block': (ReshapeNet((2, 3.0, 2)), {'exception': TypeError, 'error_keywords': ['Reshape']}), + 'desc_inputs': [Tensor(np.ones([3, 4]).astype(np.float32))], + 'skip': ['backward']}), +] + + +@mindspore_test(pipeline_for_compile_forward_ge_graph_for_case_by_case_config_exception) +def test_check_exception(): + return raise_set diff --git a/tests/ut/python/ops/test_math_ops.py b/tests/ut/python/ops/test_math_ops.py index b866c7c556..7ada847aac 100755 --- a/tests/ut/python/ops/test_math_ops.py +++ b/tests/ut/python/ops/test_math_ops.py @@ -17,6 +17,7 @@ import functools import numpy as np import mindspore as ms import mindspore.nn as nn +from mindspore.common.api import _executor from mindspore.common import dtype as mstype from mindspore.ops import prim_attr_register, PrimitiveWithInfer from mindspore import Tensor @@ -340,6 +341,15 @@ class SignNet(nn.Cell): def construct(self, x): return self.sign(x) +class AssignAdd(nn.Cell): + def __init__(self): + super().__init__() + self.op = P.AssignAdd() + self.inputdata = Parameter(initializer(1, [1], ms.float32), name="global_step") + + def construct(self, input_): + self.inputdata = input_ + return self.op(self.inputdata, input_) test_case_math_ops = [ ('MatMulGrad', { @@ -412,6 +422,9 @@ raise_set = [ ('StridedSlice_4_Error', { 'block': (lambda x: P.StridedSlice(new_axis_mask="1.1"), {'exception': TypeError}), 'desc_inputs': [0]}), + ('AssignAdd_Error', { + 'block': (P.AssignAdd(), {'exception': TypeError}), + 'desc_inputs': [[1]]}), ] diff --git a/tests/ut/python/ops/test_momentum.py b/tests/ut/python/ops/test_momentum.py index 3334f1670a..f25e4faf2d 100644 --- a/tests/ut/python/ops/test_momentum.py +++ b/tests/ut/python/ops/test_momentum.py @@ -38,8 +38,7 @@ def tensor_run_opt(opt, iters, learning_rate, momentum, gradient, variable, moment): """ tensor_run_opt """ success = True - new_weight = opt(gradient, moment, variable, - learning_rate, momentum) + new_weight = opt(variable, moment, learning_rate, gradient, momentum)[0] success = F.depend(success, F.assign(variable, new_weight)) return success diff --git a/tests/ut/python/ops/test_nn_ops.py b/tests/ut/python/ops/test_nn_ops.py index ab6f31095d..5038ee28a0 100644 --- a/tests/ut/python/ops/test_nn_ops.py +++ b/tests/ut/python/ops/test_nn_ops.py @@ -446,12 +446,6 @@ test_cases = [ 'desc_inputs': [[128, 32, 32, 64]], 'desc_bprop': [[128, 32, 32, 64]], }), - ('ApplyMomentum', { - 'block': P.ApplyMomentum(), - 'desc_inputs': [[2], [128, 32, 32, 64], [128, 32, 32, 64], [128, 32, 32, 64], [128, 32, 32, 64]], - 'desc_bprop': [[128, 32, 32, 64]], - 'skip': ['backward'] - }), ('ScalarSummary', { 'block': ScalarSummaryNet(), 'desc_inputs': [2.2], @@ -515,6 +509,12 @@ test_cases = [ ] test_cases_for_verify_exception = [ + ('ApplyMomentum_Error', { + 'block': (P.ApplyMomentum(), {'exception': TypeError}), + 'desc_inputs': [[2], [128, 32, 32, 64], [128, 32, 32, 64], [128, 32, 32, 64], [128, 32, 32, 64]], + 'desc_bprop': [[128, 32, 32, 64]], + 'skip': ['backward'] + }), ('Conv2d_ValueError_1', { 'block': (lambda _: P.Conv2D(3, 4, mode=-2.0), {'exception': TypeError}), 'desc_inputs': [0], diff --git a/tests/ut/python/ops/test_nn_ops_check.py b/tests/ut/python/ops/test_nn_ops_check.py index c2a751aa0c..4060bb2e15 100755 --- a/tests/ut/python/ops/test_nn_ops_check.py +++ b/tests/ut/python/ops/test_nn_ops_check.py @@ -123,7 +123,7 @@ raise_set = [ 'skip': ['backward']}), # input is Tensor(int32) ('Elu1', { - 'block': (P.Elu(alpha=0.9), {'exception': TypeError, 'error_keywords': ['Elu']}), + 'block': (P.Elu(), {'exception': TypeError, 'error_keywords': ['Elu']}), 'desc_inputs': [Tensor(np.ones([3, 4]).astype(np.int32))], 'skip': ['backward']}), diff --git a/tests/ut/python/ops/test_ops.py b/tests/ut/python/ops/test_ops.py index 1c0f5eb5fe..9d7e8c898a 100755 --- a/tests/ut/python/ops/test_ops.py +++ b/tests/ut/python/ops/test_ops.py @@ -265,8 +265,8 @@ test_case_math_ops = [ 'desc_bprop': [[2, 3]]}), ('Acosh', { 'block': P.Acosh(), - 'desc_inputs': [Tensor(np.random.rand(4).astype(np.float16))], - 'skip': ['backward']}), + 'desc_inputs': [[3, 4, 5]], + 'desc_bprop': [[3, 4, 5]]}), ('Sin', { 'block': P.Sin(), 'desc_inputs': [[2, 3]], @@ -351,9 +351,8 @@ test_case_math_ops = [ 'skip': ['backward']}), ('FloorMod', { 'block': P.FloorMod(), - 'desc_inputs': [Tensor(np.random.rand(4).astype(np.float16)), - Tensor(np.random.rand(4).astype(np.float16))], - 'skip': ['backward']}), + 'desc_inputs': [[3, 4, 5], [2, 3, 4, 5]], + 'desc_bprop': [[2, 3, 4, 5]]}), ('identity', { 'block': ops.functional.identity, 'desc_inputs': [[2, 2]], @@ -372,7 +371,7 @@ test_case_math_ops = [ 'desc_bprop': [[3]]}), ('TruncatedNormal', { 'block': P.TruncatedNormal(), - 'desc_const': [Tensor(np.array([1, 2, 3]))], + 'desc_const': [(1, 2, 3)], 'desc_inputs': [], 'skip': ['backward'], 'add_fake_input': True}), @@ -585,7 +584,7 @@ test_case_nn_ops = [ ('ReLUV2', { 'block': P.ReLUV2(), 'desc_inputs': [[1, 3, 4, 4]], - 'desc_bprop': [[1, 3, 4, 4], [1, 3, 4, 4]]}), + 'desc_bprop': [[1, 3, 4, 4], ([1, 1, 4, 4, 2], {'dtype': np.uint8})]}), ('ReLUGrad', { 'block': G.ReluGrad(), 'desc_inputs': [[1, 3, 4, 4], [1, 3, 4, 4]], @@ -626,7 +625,7 @@ test_case_nn_ops = [ ('MaxPoolWithArgmax', { 'block': P.MaxPoolWithArgmax(ksize=2, strides=2), 'desc_inputs': [[128, 32, 32, 64]], - 'desc_bprop': [[128, 32, 8, 16], [128, 32, 8, 16]]}), + 'desc_bprop': [[128, 32, 16, 32], ([128, 32, 4, 33], {'dtype': np.uint16})]}), ('SoftmaxCrossEntropyWithLogits', { 'block': P.SoftmaxCrossEntropyWithLogits(), 'desc_inputs': [[1, 10], [1, 10]], @@ -639,7 +638,7 @@ test_case_nn_ops = [ ('LogSoftmax', { 'block': P.LogSoftmax(), 'desc_inputs': [[64, 2]], - 'desc_bprop': [[160, 30522]]}), + 'desc_bprop': [[64, 2]]}), ('LogSoftmaxGrad', { 'block': G.LogSoftmaxGrad(), 'desc_inputs': [[16, 1234], [16, 1234]], @@ -648,7 +647,7 @@ test_case_nn_ops = [ ('LayerNorm', { 'block': P.LayerNorm(), 'desc_inputs': [[2, 16], [16], [16]], - 'desc_bprop': [[2, 16], [2, 16], [2, 16]]}), + 'desc_bprop': [[2, 16], [2, 1], [2, 1]]}), ('LayerNormGrad', { 'block': G.LayerNormGrad(), 'desc_inputs': [[2, 16], [2, 16], [2, 16], [2, 16], [16]], @@ -674,12 +673,6 @@ test_case_nn_ops = [ 'desc_inputs': [[128, 64, 32, 32], [128, 64, 32, 32], [64], [64], [64]], 'desc_bprop': [[128, 64, 32, 32], [64], [64], [64], [64]], 'skip': ['backward']}), - ('ApplyMomentum', { - 'block': P.ApplyMomentum(), - 'desc_inputs': [[128, 32, 32, 64], [128, 32, 32, 64], - [32, 32, 64], [32, 32, 64], [32, 32, 64]], - 'desc_bprop': [[128, 32, 32, 64]], - 'skip': ['backward']}), ('TopK', { 'block': P.TopK(), 'desc_const': [5], @@ -793,12 +786,12 @@ test_case_nn_ops = [ 'desc_bprop': [[5, 5]]}), ('DepthwiseConv2dNative_1', { 'block': P.DepthwiseConv2dNative(3, (3, 3), pad_mode="pad", pad=1, stride=2), - 'desc_inputs': [[10, 32, 32, 32], [3, 32, 3, 3]], - 'desc_bprop': [[10, 30, 16, 16]]}), + 'desc_inputs': [[10, 32, 32, 32], [1, 32, 3, 3]], + 'desc_bprop': [[10, 32, 16, 16]]}), ('DepthwiseConv2dNative_2', { 'block': P.DepthwiseConv2dNative(1, (3, 3), pad_mode="same", pad=0, stride=1), 'desc_inputs': [[2592, 2048, 4, 4], [1, 2048, 3, 3]], - 'desc_bprop': [[2592, 2048, 2, 2]]}), + 'desc_bprop': [[2592, 2048, 4, 4]]}), ('SigmoidCrossEntropyWithLogits', { 'block': P.SigmoidCrossEntropyWithLogits(), 'desc_inputs': [[128, 10], [128, 10]], @@ -845,7 +838,7 @@ test_case_nn_ops = [ 'block': P.OneHot(), 'desc_const': [3, Tensor(1.0, mstype.float32), Tensor(0.0, mstype.float32)], 'desc_inputs': [Tensor(np.array([64]).astype(np.int32))], - 'desc_bprop': [[64, 2]]}), + 'desc_bprop': [[1, 3]]}), ('ReduceProd_0', { 'block': P.ReduceProd(), 'desc_const': [0], @@ -950,7 +943,7 @@ test_case_array_ops = [ 'block': P.Cast(), 'desc_const': [mstype.int32], 'desc_inputs': [[2, 3, 4, 5]], - 'desc_bprop': [Tensor(np.ones((2, 3, 3, 5)).astype(np.int32))]}), + 'desc_bprop': [Tensor(np.ones((2, 3, 4, 5)).astype(np.int32))]}), ('ExpandDims', { 'block': P.ExpandDims(), 'desc_const': [0], @@ -1002,12 +995,12 @@ test_case_array_ops = [ 'desc_inputs': [ (Tensor(np.array([[0, 1], [2, 1]]).astype(np.int32)), Tensor(np.array([[0, 1], [2, 1]]).astype(np.int32)))], - 'desc_bprop': [[4, 2]]}), + 'desc_bprop': [([4, 2], {'dtype': np.int32})]}), ('ConcatV2_1', { 'block': P.Concat(axis=2), 'desc_inputs': [(Tensor(np.array([[[0, 1, 2]], [[2, 1, 2]]]).astype(np.int32)), Tensor(np.array([[[0, 1]], [[2, 1]]]).astype(np.int32)))], - 'desc_bprop': [[2, 1, 5]]}), + 'desc_bprop': [([2, 1, 5], {'dtype': np.int32})]}), ('ConcatV2_2', { 'block': NetForConcat(), 'desc_inputs': [[2, 2]], @@ -1040,11 +1033,6 @@ test_case_array_ops = [ 'desc_bprop':[[3, 2, 3, 3]], }), ('Pack_2', { - 'block': NetForPackInput(P.Pack()), - 'desc_inputs':[[2, 2]], - 'desc_bprop':[[2, 2, 2]], - }), - ('Pack_3', { 'block': NetForPackInput(P.Pack()), 'desc_inputs':[[128, 128], [128, 128]], 'desc_bprop':[[2, 128, 128]], @@ -1059,16 +1047,26 @@ test_case_array_ops = [ 'desc_inputs':[Tensor(np.array([[1, 1, 1]], np.float32))], 'desc_bprop':[[1], [1], [1]], }), - ('Diag', { + ('Diag_1', { 'block': P.Diag(), 'desc_inputs': [[4]], 'desc_bprop': [[4, 4]], }), - ('DiagPart', { + ('Diag_2', { + 'block': P.Diag(), + 'desc_inputs': [[4, 4]], + 'desc_bprop': [[4, 4, 4, 4]], + }), + ('DiagPart_1', { 'block': P.DiagPart(), 'desc_inputs': [[4, 4]], 'desc_bprop': [[4]], }), + ('DiagPart_2', { + 'block': P.DiagPart(), + 'desc_inputs': [[4, 4, 4, 4]], + 'desc_bprop': [[4, 4]], + }), ('SpaceToBatch_1', { 'block': P.SpaceToBatch(2, [[0, 0], [0, 0]]), 'desc_inputs': [[1, 3, 2, 2]], @@ -1077,7 +1075,7 @@ test_case_array_ops = [ ('SpaceToBatch_2', { 'block': P.SpaceToBatch(2, [[1, 1], [0, 4]]), 'desc_inputs': [[1, 3, 2, 2]], - 'desc_bprop': [[4, 3, 2, 4]], + 'desc_bprop': [[4, 3, 2, 3]], }), ('BatchToSpace_1', { 'block': P.BatchToSpace(2, [[0, 0], [0, 0]]), @@ -1113,18 +1111,12 @@ test_case_other_ops = [ 'desc_inputs': (Tensor(np.ones((1, 3, 6, 6), np.float32)), Tensor(np.ones((2, 4), np.int32))), 'desc_bprop': [[2]]}), - ('ScatterNdUpdate', { - 'block': P.ScatterNdUpdate(), - 'desc_inputs': (Tensor(np.ones((2, 3), np.float32)), - Tensor(np.ones((2, 2), np.int32)), - Tensor(np.ones((2,), np.float32))), - 'desc_bprop': [[2, 3]]}), ('ScatterNd', { 'block': P.ScatterNd(), 'desc_const': [(3, 3)], 'desc_inputs': (Tensor(np.ones((2, 2), np.int32)), Tensor(np.ones((2,), np.int32))), - 'desc_bprop': [[3, 3]]}), + 'desc_bprop': [([3, 3], {'dtype': np.int32})]}), ('SmoothL1Loss', { 'block': P.SmoothL1Loss(), 'desc_inputs': [[256, 4], [256, 4]], @@ -1178,7 +1170,7 @@ import mindspore.context as context @non_graph_engine @mindspore_test(pipeline_for_compile_forward_ge_graph_for_case_by_case_config) def test_exec(): - context.set_context(mode=context.GRAPH_MODE) + context.set_context(mode=context.GRAPH_MODE, save_graphs=True) return test_exec_case @@ -1207,6 +1199,21 @@ raise_set = [ 'block': (NetForFlatten0D(), {'exception': ValueError}), 'desc_inputs': [Tensor(np.array(0).astype(np.int32))], 'desc_bprop': [Tensor(np.array(0).astype(np.int32))]}), + ('ScatterNdUpdate', { + 'block': (P.ScatterNdUpdate(), {'exception': TypeError}), + 'desc_inputs': (Tensor(np.ones((2, 3), np.float32)), + Tensor(np.ones((2, 2), np.int32)), + Tensor(np.ones((2,), np.float32))), + 'desc_bprop': [[2, 3]]}), + ('Pack', { + 'block': (NetForPackInput(P.Pack()), {'exception': ValueError}), + 'desc_inputs':[[2, 2]], + 'desc_bprop':[[1, 2, 2]]}), + ('PReLU', { + 'block': (P.PReLU(), {'exception': ValueError}), + 'desc_inputs':[[2], [1]], + 'desc_bprop':[[1]]}), + ] diff --git a/tests/ut/python/ops/test_tensor_slice.py b/tests/ut/python/ops/test_tensor_slice.py index ddd1fb46a1..32c4025368 100644 --- a/tests/ut/python/ops/test_tensor_slice.py +++ b/tests/ut/python/ops/test_tensor_slice.py @@ -52,8 +52,9 @@ class NetWorkSliceEllipsis(Cell): def construct(self, tensor): ret0 = tensor[0:4:2, ..., 1] + self.tensor_ret0 ret1 = tensor[...] + self.tensor_ret1 - ret2 = tensor[True] + self.tensor_ret2 - return ret0, ret1, ret2 + ret2 = tensor[None] + self.tensor_ret2 + ret3 = tensor[True] + self.tensor_ret2 + return ret0, ret1, ret2, ret3 class NetWorkReduceDimension(Cell): @@ -94,13 +95,203 @@ class NetWorkReduceToScalar(Cell): return ret +class TensorAssignWithSliceError1(Cell): + def __init__(self): + super(TensorAssignWithSliceError1, self).__init__() + + def construct(self, a, b): + a[1:3:-1,::] = b + return a + + +class TensorAssignWithSliceError2(Cell): + def __init__(self): + super(TensorAssignWithSliceError2, self).__init__() + + def construct(self, a, b): + a[1:3:-1] = b + return a + + +class TensorAssignWithSlice2(Cell): + def __init__(self): + super(TensorAssignWithSlice2, self).__init__() + + def construct(self, a, b, ck): + a[1:5] = b + a[3:4] = 5 + a[-1:1:-1] = b + a[-1:3:-1] = 5 + a[::] = b + a[::] = 9 + z = a + ck + return z + + +class TensorAssignWithSlice(Cell): + def __init__(self): + super(TensorAssignWithSlice, self).__init__() + self.c = 2 + + def construct(self, a, b, ck): + a[1:3,::] = b + a[2:3:,3:] = b + a[::] = b + a[::] = self.c + a[::,::] = b + a[::,::] = self.c + a[2:3:,0:, 4:1:-1] = b + a[2:3:,0:, 4:1:-1] = self.c + z = a + ck + return z + + +def test_tensor_assign(): + context.set_context(mode=context.GRAPH_MODE, save_graphs=True) + net = TensorAssignWithSlice() + net2= TensorAssignWithSlice2() + net_e1 = TensorAssignWithSliceError1() + net_e2 = TensorAssignWithSliceError2() + a = np.arange(60).reshape(3,4,5) + ck = np.arange(60).reshape(3,4,5) + b = Tensor([1], dtype=mstype.float32) + Ta = Tensor(a, dtype=mstype.float32) + Tck = Tensor(ck, dtype=mstype.float32) + Ta4d = Tensor(a.reshape(1,3,4,5), dtype=mstype.float32) + Ta4d_ck = Tensor(ck.reshape(1,3,4,5), dtype=mstype.float32) + Tb= Tensor([1,3], dtype=mstype.float32) + Tc= Tensor([], dtype=mstype.float32) + t = Tensor([1, 2, 3, 4, 5, 6, 7, 8], dtype=mstype.float32) + tck = Tensor([1, 2, 3, 4, 5, 6, 7, 8], dtype=mstype.float32) + net(Ta, b, Tck) + net2(t, b, tck) + # Error for A[Slice] = Number + # 1. A[Slice] = Number, Slice error + with pytest.raises(IndexError): + net_e2(t, 2) + + # Error for A[Slice] = U, U is a Tensor + # 1. A[Slice] = U, u.size is error + with pytest.raises(ValueError): + net2(t, Tb, tck) + # 2. A[Slice] = U, U is empty + with pytest.raises(ValueError): + net2(t, Tc, tck) + # 3. A[Slice] = U, U.size error + with pytest.raises(ValueError): + net2(t, Tb, tck) + + # Error for A[Tuple(Slice...)] = Tensor + # 1. A[Tuple(Slice...)] = U, U is empty + with pytest.raises(ValueError): + net(Ta, Tc, Tck) + # 2. A[Tuple(Slice...)] = U, U.size error + with pytest.raises(ValueError): + net(Ta, Tb, Tck) + # 3. A[Tuple(Slice...)] = U, Slice error + with pytest.raises(IndexError): + net_e1(Ta, b) + + # Error for A[Tuple(Slice...)] = Number + # 1. A[Tuple(Slice...)] = Number, Slice error + with pytest.raises(IndexError): + net_e1(Ta, 2) + + net = TensorAssignWithInteger() + # Error for A[Number] = scalar/Tensor + # 1. A[Number] = U, U is a Tensor, u.size not match + with pytest.raises(ValueError): + net(Ta, Tb, Tck) + with pytest.raises(ValueError): + net(Ta, Tc, Tck) + # 2. A[Number] = U, the number index error + with pytest.raises(IndexError): + net(Ta4d, b, Ta4d_ck) + + # Error for A[(n,m)] = scalar/Tensor + # 1. A[(n,m)] = U, U is a tensor. u.size not match + net = TensorAssignWithTupleInteger() + with pytest.raises(ValueError): + net(Ta, Tc, Tck) + with pytest.raises(ValueError): + net(Ta, Tb, Tck) + # 2. A[(n,m)] = U, the number index error + with pytest.raises(IndexError): + net(Ta4d, b, Ta4d_ck) + + #Error for A[...] = U or A[1:, ...] = u + #1. A[...] = scalar/tensor + net = TensorAssignWithEllipsis() + net(Ta, Ta4d) + with pytest.raises(ValueError): + net(Ta, Tc) + with pytest.raises(ValueError): + net(Ta, Tb) + #2. A[::, 1:, ...] = scalar/tensor + net = TensorAssignWithTupleEllipsis() + net(Ta, b) + with pytest.raises(ValueError): + net(Ta, Tc) + with pytest.raises(ValueError): + net(Ta, Tb) + + +class TensorAssignWithTupleEllipsis2(Cell): + def __init__(self): + super(TensorAssignWithTupleEllipsis2, self).__init__() + def construct(self, a, b): + a[1:, ..., ::] = b + return a + + +class TensorAssignWithTupleEllipsis(Cell): + def __init__(self): + super(TensorAssignWithTupleEllipsis, self).__init__() + def construct(self, a, b): + a[:2, ...] = 1 + a[1:, ...] = b + return a + + +class TensorAssignWithEllipsis(Cell): + def __init__(self): + super(TensorAssignWithEllipsis, self).__init__() + def construct(self, a, b): + a[...] = 1 + a[...] = b + return a + + +class TensorAssignWithInteger(Cell): + def __init__(self): + super(TensorAssignWithInteger, self).__init__() + + def construct(self, a, b, ck): + a[1] = 1 + a[0] = b + z = a + ck + return z + +class TensorAssignWithTupleInteger(Cell): + def __init__(self): + super(TensorAssignWithTupleInteger, self).__init__() + + def construct(self, a, b, ck): + a[(1)] = 1 + a[(1)] = b + a[(1,1)] = b + a[(1,1)] = 1 + z = a + ck + return z + class TensorAssignWithBoolTensorIndex(Cell): def __init__(self): super(TensorAssignWithBoolTensorIndex, self).__init__() - self.t = Tensor(np.arange(6).reshape([2, 3]), dtype=mstype.float64) + self.t = Tensor(np.arange(60).reshape([3,4,5]), dtype = mstype.float32) + self.u_scalar = 5 - def construct(self, a, b, c, u_tensor, _scalar): - a[c] = u_scalar + def construct(self, a, b, c, u_tensor): + a[c] = self.u_scalar a[b] = u_tensor z = a + self.t return z @@ -118,14 +309,16 @@ class TensorAssignWithBoolTensorIndexError(Cell): class TensorAssignWithBoolTensorIndex2(Cell): def __init__(self): super(TensorAssignWithBoolTensorIndex2, self).__init__() - self.t = Tensor(np.arange(6).reshape([2, 3]), dtype=mstype.float64) + self.t = Tensor(np.arange(6).reshape([2, 3]), dtype=mstype.float32) + self.t = Tensor(np.arange(60).reshape([3,4,5]), dtype = mstype.float32) + self.u_scalar = 5 - def construct(self, a, u_tensor, _scalar): + def construct(self, a, u_tensor): a[a > 8] = u_tensor - a[a >= 6] = u_scalar - a[a < 3] = u_scalar + a[a >= 6] = self.u_scalar + a[a < 3] = self.u_scalar a[a <= 5] = u_tensor - a[a == 5] = u_scalar + a[a == 5] = self.u_scalar z = a + self.t return z @@ -139,36 +332,41 @@ class TensorAssignWithBoolTensorIndex2Error(Cell): return a -a = np.random.uniform(1, 10, [2, 3]) +a = np.arange(60).reshape(3, 4, 5) +ck = np.arange(60).reshape(3, 4, 5) +a4 = np.arange(60).reshape(3, 2, 2, 5) b = a > 5 c = a < 3 -Ta = Tensor(a) +Ta = Tensor(a, dtype=mstype.float32) +Tck = Tensor(ck, dtype=mstype.float32) +Ta4 = Tensor(a4, dtype=mstype.float32) Tb = Tensor(b) Tc = Tensor(c) Td = Tensor([True, True]) -u_tensor = Tensor([1]) -u_tensor_error = Tensor([1, 2]) +u_tensor = Tensor([1], dtype=mstype.float32) +u_tensor_error = Tensor([1, 2], dtype=mstype.float32) +t_1d = Tensor([1, 2, 3, 4, 5, 6, 7, 8], dtype=mstype.float32) +tck_1d = Tensor([1, 2, 3, 4, 5, 6, 7, 8], dtype=mstype.float32) u_scalar = 5 - def test_tensor_assign_bool_index(): net1 = TensorAssignWithBoolTensorIndex() net2 = TensorAssignWithBoolTensorIndex2() - - net1(Ta, Tb, Tc, u_tensor, u_scalar) - with pytest.raises(ValueError): - net1(Ta, Td, Tc, u_tensor, u_scalar) - with pytest.raises(ValueError): - net1(Ta, u_tensor, Tc, u_tensor, u_scalar) + net1(Ta, Tb, Tc, u_tensor) + net1(Ta, Tb, Tc, u_tensor) with pytest.raises(ValueError): - net1(Ta, Tb, Td, u_tensor, u_scalar) + net1(Ta, Td, Tc, u_tensor) + with pytest.raises(TypeError): + net1(Ta, u_tensor, Tc, u_tensor) with pytest.raises(ValueError): - net1(Ta, Tb, Ta, u_tensor, u_scalar) + net1(Ta, Tb, Td, u_tensor) + with pytest.raises(TypeError): + net1(Ta, Tb, Ta, u_tensor) with pytest.raises(ValueError): - net1(Ta, Tb, Tc, u_tensor_error, u_scalar) + net1(Ta, Tb, Tc, u_tensor_error) # net1(Ta, u_tensor, Tc, u_tensor_error, u_scalar) with pytest.raises(ValueError): - net2(Ta, u_tensor_error, u_scalar) + net2(Ta, u_tensor_error) net3 = TensorAssignWithBoolTensorIndexError() with pytest.raises(AttributeError): net3(Ta, Tb, Tc, u_tensor) @@ -180,15 +378,42 @@ def test_tensor_assign_bool_index(): with pytest.raises(AttributeError): net4(Ta, u_scalar) - test_cases = [ + ('TensorAssignWithTupleEllipsis2', { + 'block': TensorAssignWithTupleEllipsis2(), + 'desc_inputs': [Ta4, u_tensor], + }), + ('TensorAssignWithTupleEllipsis', { + 'block': TensorAssignWithTupleEllipsis(), + 'desc_inputs': [Ta, u_tensor], + }), + ('TensorAssignWithEllipsis', { + 'block': TensorAssignWithEllipsis(), + 'desc_inputs': [Ta, u_tensor], + }), + ('TensorAssignWithTupleInteger', { + 'block': TensorAssignWithTupleInteger(), + 'desc_inputs': [Ta, u_tensor, Tck], + }), + ('TensorAssignWithInteger', { + 'block': TensorAssignWithInteger(), + 'desc_inputs': [Ta, u_tensor, Tck], + }), + ('TensorAssignWithSlice', { + 'block': TensorAssignWithSlice(), + 'desc_inputs': [Ta, u_tensor, Tck], + }), + ('TensorAssignWithSlice2', { + 'block': TensorAssignWithSlice2(), + 'desc_inputs': [t_1d, u_tensor, tck_1d], + }), ('TensorAssignWithBoolTensorIndex', { 'block': TensorAssignWithBoolTensorIndex(), - 'desc_inputs': [Ta, Tb, Tc, u_tensor, u_scalar], + 'desc_inputs': [Ta, Tb, Tc, u_tensor], }), ('TensorAssignWithBoolTensorIndex2', { 'block': TensorAssignWithBoolTensorIndex2(), - 'desc_inputs': [Ta, u_tensor, u_scalar], + 'desc_inputs': [Ta, u_tensor], }), ('SlicePositive', { 'block': NetWorkSlicePositive(), @@ -206,7 +431,7 @@ test_cases = [ 'block': NetWorkReduceToScalar(), 'desc_inputs': [Tensor(np.ones([6, 8, 10], np.int32))], }), - ('NetWorkSliceEllipsis', { + ('TensorSliceEllipsis', { 'block': NetWorkSliceEllipsis(), 'desc_inputs': [Tensor(np.ones([6, 7, 8, 9], np.int32))], }), @@ -233,7 +458,7 @@ def test_tensor_slice_reduce_out_of_bounds_neg(): net = NetWork() with pytest.raises(ValueError) as ex: net(input_tensor) - assert "The `begin[0]` should be an int and must greater or equal to -6, but got -7" in str(ex.value) + assert "For 'StridedSlice' the `begin[0]` should be an int and must greater or equal to -6, but got `-7`" in str(ex.value) def test_tensor_slice_reduce_out_of_bounds_positive(): @@ -250,4 +475,4 @@ def test_tensor_slice_reduce_out_of_bounds_positive(): net = NetWork() with pytest.raises(ValueError) as ex: net(input_tensor) - assert "The `begin[0]` should be an int and must less than 6, but got 6" in str(ex.value) + assert "For 'StridedSlice' the `begin[0]` should be an int and must less than 6, but got `6`" in str(ex.value) diff --git a/tests/ut/python/parallel/__init__.py b/tests/ut/python/parallel/__init__.py index c08f8e247b..b26962bc3a 100644 --- a/tests/ut/python/parallel/__init__.py +++ b/tests/ut/python/parallel/__init__.py @@ -13,10 +13,12 @@ # limitations under the License. import mindspore.context as context +from mindspore.parallel._auto_parallel_context import auto_parallel_context from mindspore.parallel._utils import _reset_op_id def setup_module(module): + auto_parallel_context().set_enable_all_reduce_fusion(enable_all_reduce_fusion=True) context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False) _reset_op_id() diff --git a/tests/ut/python/parallel/test_allreduce_fusion.py b/tests/ut/python/parallel/test_allreduce_fusion.py index fcbee10587..b8bf9ccc0f 100644 --- a/tests/ut/python/parallel/test_allreduce_fusion.py +++ b/tests/ut/python/parallel/test_allreduce_fusion.py @@ -23,7 +23,7 @@ from tests.dataset_mock import MindData from mindspore import context from mindspore.common.api import _executor from mindspore.parallel import _cost_model_context as cost_model_context - +from mindspore.parallel._auto_parallel_context import auto_parallel_context class Dataset(MindData): @@ -105,6 +105,7 @@ def train_common(net): epoch_size = 2 device_num=4 context.reset_auto_parallel_context() + auto_parallel_context().set_enable_all_reduce_fusion(enable_all_reduce_fusion=True) context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, device_num=device_num, parameter_broadcast=False) context.set_context(mode=context.GRAPH_MODE) diff --git a/tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py b/tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py new file mode 100644 index 0000000000..5e8c89de25 --- /dev/null +++ b/tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py @@ -0,0 +1,101 @@ +import numpy as np +from mindspore import context +import mindspore as ms +import mindspore.nn as nn +from mindspore.nn.optim import Adam, FTRL +from mindspore.ops import operations as P +from mindspore.ops import functional as F +from mindspore import Tensor, Parameter, ParameterTuple +from mindspore.ops import composite as C +from mindspore.parallel import _cost_model_context as cost_model_context +from mindspore.common.api import _executor +from mindspore.parallel import set_algo_parameters, get_algo_parameters, reset_algo_parameters +from mindspore.parallel._utils import _reset_op_id as reset_op_id + +class Net(nn.Cell): + def __init__(self): + super(Net, self).__init__() + self.mul = P.Mul() + self.relu = P.ReLU() + self.wd = Parameter(Tensor(np.ones([8, 8, 8, 8]).astype(np.float32)), name="wide") + self.wt = Parameter(Tensor(np.ones([8, 8, 8, 8]).astype(np.float32)), name="l") + def construct(self, x): + out = self.mul(x, self.wd) + out = self.mul(out, self.wt) + out = self.relu(out) + return out + +class NetWithLoss(nn.Cell): + def __init__(self, network): + super(NetWithLoss, self).__init__() + self.sum = P.ReduceSum() + self.mean = P.ReduceMean() + self.net = network + + def construct(self, x): + predict = self.net(x) + loss1 = self.sum(predict, -1) + loss2 = self.mean(predict, -1) + return loss1, loss2 + +class IthOutputCell(nn.Cell): + def __init__(self, network, output_index): + super(IthOutputCell, self).__init__() + self.network = network + self.output_index = output_index + + def construct(self, x): + predict = self.network(x)[self.output_index] + return predict + +class TrainStepWarp(nn.Cell): + def __init__(self, network, sens=1000.0): + super(TrainStepWarp, self).__init__() + self.network = network + self.network.set_train() + self.trainable_params = network.trainable_params() + weights_w = [] + weights_d = [] + for params in self.trainable_params: + weights_w.append(params) + weights_d.append(params) + self.weights_w = ParameterTuple(weights_w) + self.weights_d = ParameterTuple(weights_d) + self.optimizer_w = FTRL(learning_rate=1e-2, params=self.weights_w, l1=1e-8, + l2=1e-8, initial_accum=1.0) + self.optimizer_d = Adam(self.weights_d, learning_rate=3.5e-4, eps=1e-8, + loss_scale=sens) + self.hyper_map = C.HyperMap() + self.grad_w = C.GradOperation('grad_w', get_by_list=True, sens_param=True) + self.grad_d = C.GradOperation('grad_d', get_by_list=True, sens_param=True) + self.sens = sens + self.loss_net_w = IthOutputCell(network, output_index=0) + self.loss_net_d = IthOutputCell(network, output_index=1) + + def construct(self, x): + weights_w = self.weights_w + weights_d = self.weights_d + loss_w, loss_d = self.network(x) + sens_w = P.Fill()(P.DType()(loss_w), P.Shape()(loss_w), self.sens) + sens_d = P.Fill()(P.DType()(loss_d), P.Shape()(loss_d), self.sens) + grads_w = self.grad_w(self.loss_net_w, weights_w)(x, sens_w) + grads_d = self.grad_d(self.loss_net_d, weights_d)(x, sens_d) + return F.depend(loss_w, self.optimizer_w(grads_w)), F.depend(loss_d, self.optimizer_d(grads_d)) + +def test_double_subgraphs(): + cost_model_context.set_cost_model_context(multi_subgraphs=True) + context.set_context(save_graphs=True) + context.set_auto_parallel_context(device_num=8, global_rank=0) + net = TrainStepWarp(NetWithLoss(Net())) + context.set_auto_parallel_context(parallel_mode="auto_parallel") + + x = Tensor(np.ones([8, 8, 8, 8]), dtype=ms.float32) + reset_op_id() + _executor.compile(net, x, phase='train') + strategies = _executor._get_strategy(net) + expected_strategies = {'Default/network-NetWithLoss/ReduceMean-op0': [[8, 1, 1, 1]], + 'Default/network-NetWithLoss/net-Net/ReLU-op1': [[8, 1, 1, 1]], + 'Default/network-NetWithLoss/net-Net/Mul-op2': [[8, 1, 1, 1], [8, 1, 1, 1]], + 'Default/network-NetWithLoss/net-Net/Mul-op3': [[8, 1, 1, 1], [8, 1, 1, 1]], + 'Default/network-NetWithLoss/ReduceSum-op4': [[8, 1, 1, 1]]} + assert strategies == expected_strategies diff --git a/tests/ut/python/parallel/test_auto_parallel_resnet.py b/tests/ut/python/parallel/test_auto_parallel_resnet.py index ae7bd952d9..1088ad736d 100644 --- a/tests/ut/python/parallel/test_auto_parallel_resnet.py +++ b/tests/ut/python/parallel/test_auto_parallel_resnet.py @@ -19,6 +19,7 @@ from mindspore import Tensor from mindspore.ops import operations as P from mindspore.nn.optim.momentum import Momentum from mindspore.common.initializer import TruncatedNormal +from mindspore.communication.management import init from mindspore.train.model import Model, ParallelMode from mindspore import context import os @@ -31,10 +32,10 @@ from mindspore.parallel import set_algo_parameters from mindspore.parallel import _cost_model_context as cost_model_context context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") -context.set_context(enable_hccl=True) context.set_context(enable_task_sink=True, device_id= 0) context.set_context(enable_ir_fusion=True) context.set_context(enable_loop_sink=False) +init() def weight_variable(shape, factor=0.1): return TruncatedNormal(0.02) @@ -273,7 +274,7 @@ class DatasetLenet(): return 1 -def train_32k_8p(epoch_size=3, batch_size=32, num_classes=32768): +def test_train_32k_8p(epoch_size=3, batch_size=32, num_classes=32768): dev_num = 8 context.set_auto_parallel_context(parallel_mode=ParallelMode.AUTO_PARALLEL, device_num=dev_num) set_algo_parameters(elementwise_op_strategy_follow=True) @@ -303,12 +304,12 @@ def train_32k_8p(epoch_size=3, batch_size=32, num_classes=32768): return allreduce_fusion_dict -def test_train_32k_8p_fusion1(epoch_size=3, batch_size=32, num_classes=32768): #1048576 #131072 #32768 #8192 +def train_32k_8p_fusion1(epoch_size=3, batch_size=32, num_classes=32768): #1048576 #131072 #32768 #8192 cost_model_context.set_cost_model_context(costmodel_gamma=0.001, costmodel_beta=400.0) cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_algorithm=1) cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_times=2) cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_tail_percent=0.5) - allreduce_fusion_dict = train_32k_8p(epoch_size, batch_size, num_classes) + allreduce_fusion_dict = test_train_32k_8p(epoch_size, batch_size, num_classes) expect_dict = {'end_point.bias': 2, 'end_point.weight': 2, 'layer4.2.bn3.beta': 2, @@ -475,13 +476,13 @@ def test_train_32k_8p_fusion1(epoch_size=3, batch_size=32, num_classes=32768): # cost_model_context.reset_cost_model_context() -def test_train_32k_8p_fusion2(epoch_size=3, batch_size=32, num_classes=32768): #1048576 #131072 #32768 #8192 +def train_32k_8p_fusion2(epoch_size=3, batch_size=32, num_classes=32768): #1048576 #131072 #32768 #8192 cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_algorithm=2) cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_tail_time=0.1) cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_allreduce_inherent_time=0.05) cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_allreduce_bandwidth=0.000001) cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_computation_time_parameter=0.0000015) - allreduce_fusion_dict = train_32k_8p(epoch_size, batch_size, num_classes) + allreduce_fusion_dict = test_train_32k_8p(epoch_size, batch_size, num_classes) expect_dict = {'end_point.bias': 2, 'end_point.weight': 2, 'layer4.2.bn3.beta': 2, diff --git a/tests/ut/python/parallel/test_auto_parallel_two_bn.py b/tests/ut/python/parallel/test_auto_parallel_two_bn.py new file mode 100644 index 0000000000..8eb6074f9f --- /dev/null +++ b/tests/ut/python/parallel/test_auto_parallel_two_bn.py @@ -0,0 +1,70 @@ +import numpy as np +from mindspore import context +import mindspore as ms +import mindspore.nn as nn +from mindspore.ops import operations as P +from mindspore import Tensor +from mindspore.common.api import _executor +from tests.ut.python.ops.test_math_ops import VirtualLoss +from mindspore.parallel import set_algo_parameters +from mindspore.parallel._utils import _reset_op_id as reset_op_id +import re + +class NetWithLoss(nn.Cell): + def __init__(self, network): + super(NetWithLoss, self).__init__() + self.loss = VirtualLoss() + self.network = network + + def construct(self, x): + predict = self.network(x) + return self.loss(predict) + +class Blockcell(nn.Cell): + def __init__(self): + super(Blockcell, self).__init__() + self.bn = nn.BatchNorm2d(64, momentum=0.9) + + def construct(self, x): + out = self.bn(x) + return out + +def getBlock(): + return Blockcell() + +def test_two_bn(): + class Net(nn.Cell): + def __init__(self): + super().__init__() + self.block1 = getBlock() + self.block2 = getBlock() + self.relu = P.ReLU() + self.add = P.TensorAdd() + self.bias = Tensor(np.ones([64, 64]), dtype=ms.float32) + + def construct(self, x): + out = self.block1(x) + out = self.relu(out) + out = self.add(out, self.bias) + out = self.block2(out) + return out + + net = NetWithLoss(Net()) + x = Tensor(np.ones([64, 64]), dtype=ms.float32) + context.set_context(save_graphs=True) + context.set_auto_parallel_context(device_num=8, global_rank=0) + context.set_auto_parallel_context(parallel_mode="auto_parallel") + set_algo_parameters(elementwise_op_strategy_follow=True) + reset_op_id() + + _executor.compile(net, x, phase='train') + strategies = _executor._get_strategy(net) + assert len(strategies) == 4 + + for (k, v) in strategies.items(): + if re.search('BatchNorm-op', k) is not None: + assert v == [[8, 1], [1], [1], [1], [1]] + elif re.search('TensorAdd-op', k) is not None: + assert v == [[8, 1], [8, 1]] + elif re.search('ReLU-op', k) is not None: + assert v == [[8, 1]] diff --git a/tests/ut/python/parallel/test_auto_parallel_two_matmul.py b/tests/ut/python/parallel/test_auto_parallel_two_matmul.py index 848c8025cb..2e2ddd8f32 100644 --- a/tests/ut/python/parallel/test_auto_parallel_two_matmul.py +++ b/tests/ut/python/parallel/test_auto_parallel_two_matmul.py @@ -97,13 +97,8 @@ def test_two_matmul(): assert costmodel_communi_bias == 1024.0 - set_algo_parameters(simplify_cal=True, - tensor_slice_align_enable=False, - tensor_slice_align_size=32, - fully_use_devices=False, - elementwise_op_strategy_follow=False) - para_simplify_cal = get_algo_parameters("simplify_cal") - assert para_simplify_cal == True + set_algo_parameters(tensor_slice_align_enable=False, tensor_slice_align_size=32, + fully_use_devices=False, elementwise_op_strategy_follow=False) para_slice_align_enable = get_algo_parameters("tensor_slice_align_enable") assert para_slice_align_enable == False para_slice_align_size = get_algo_parameters("tensor_slice_align_size") @@ -114,8 +109,6 @@ def test_two_matmul(): assert elementwise_op_strategy_follow == False reset_algo_parameters() - para_simplify_cal = get_algo_parameters("simplify_cal") - assert para_simplify_cal == True para_slice_align_enable = get_algo_parameters("tensor_slice_align_enable") assert para_slice_align_enable == False para_slice_align_size = get_algo_parameters("tensor_slice_align_size") diff --git a/tests/ut/python/parallel/test_bool_grad.py b/tests/ut/python/parallel/test_bool_grad.py index f3cdfc8030..491707103b 100644 --- a/tests/ut/python/parallel/test_bool_grad.py +++ b/tests/ut/python/parallel/test_bool_grad.py @@ -52,7 +52,7 @@ class CommonNet(nn.Cell): def __init__(self): super(CommonNet, self).__init__() self.weight = Parameter(Tensor(np.ones([256, 64]), dtype=ms.float32), name="mul_weight") - self.logicalnot = P.LogicalNot().set_strategy(((4,1),)) + self.logicalnot = P.LogicalNot().set_strategy(((4,2),)) self.equal = P.Equal().set_strategy(((4,2),(4,2))) def construct(self, x, label): @@ -78,4 +78,5 @@ def common_net(): def test_bool_grad(): - common_net() \ No newline at end of file + common_net() + diff --git a/tests/ut/python/parallel/test_forward_graph.py b/tests/ut/python/parallel/test_forward_graph.py new file mode 100644 index 0000000000..76cd5b4178 --- /dev/null +++ b/tests/ut/python/parallel/test_forward_graph.py @@ -0,0 +1,82 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import numpy as np +import mindspore as ms +from mindspore import context, Tensor, Parameter +from mindspore.nn import Cell +from mindspore.ops import operations as P +from mindspore.common.api import _executor + + +class Net(Cell): + def __init__(self, mul_weight, strategy1=None, strategy2=None): + super().__init__() + self.mul = P.Mul().set_strategy(strategy1) + self.neg = P.Neg().set_strategy(strategy2) + self.mul_weight = Parameter(mul_weight, "w1") + + def construct(self, x, b): + out = self.mul(x, self.mul_weight) + out = self.neg(out) + return out, b + + +_x = Tensor(np.ones([128, 64, 32]), dtype=ms.float32) +_w1 = Tensor(np.ones([128, 64, 32]), dtype=ms.float32) +_b = Tensor(np.ones([128, 64, 32]), dtype=ms.float32) + + +def compile(net): + _executor.compile(net, _x, _b) + context.reset_auto_parallel_context() + + +def test_forward_graph_data_parallel(): + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0) + strategy1 = ((16, 1, 1), (16, 1, 1)) + strategy2 = ((16, 1, 1), ) + net = Net(_w1, strategy1, strategy2) + compile(net) + + +def test_forward_graph_model_parallel(): + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0) + strategy1 = ((1, 1, 16), (1, 1, 16)) + strategy2 = ((1, 1, 16), ) + net = Net(_w1, strategy1, strategy2) + compile(net) + + +def test_forward_graph_hybrid_parallel(): + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0) + strategy1 = ((2, 2, 4), (2, 2, 4)) + strategy2 = ((2, 2, 4), ) + net = Net(_w1, strategy1, strategy2) + compile(net) + + +def test_forward_graph_auto_parallel(): + context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0) + net = Net(_w1) + compile(net) + + +def test_forward_graph_repeat_calc(): + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0) + strategy1 = ((2, 2, 4), (2, 2, 4)) + strategy2 = ((1, 2, 2), ) + net = Net(_w1, strategy1, strategy2) + compile(net) + diff --git a/tests/ut/python/parallel/test_layer_norm.py b/tests/ut/python/parallel/test_layer_norm.py index c65ee5fc8e..50f49dcc77 100644 --- a/tests/ut/python/parallel/test_layer_norm.py +++ b/tests/ut/python/parallel/test_layer_norm.py @@ -11,8 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +# ============================================================================ import numpy as np +import pytest import mindspore as ms from mindspore import context, Tensor, Parameter from mindspore.nn import Cell, TrainOneStepCell, Momentum @@ -24,7 +25,7 @@ from mindspore.common.initializer import initializer class Net(Cell): def __init__(self, mul_weight, strategy1=None, strategy2=None, strategy3=None): super().__init__() - self.begin_norm_axis = -1 + self.begin_norm_axis = 2 self.begin_params_axis = 1 self.mul = P.Mul().set_strategy(strategy1) self.layer_norm = P.LayerNorm(self.begin_norm_axis, self.begin_params_axis).set_strategy(strategy2) @@ -64,18 +65,18 @@ def test_layer_norm_data_parallel(): def test_layer_norm_model_parallel(): context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0) - strategy1 = ((1, 1, 16, 1), (1, 1, 16, 1)) - strategy2 = ((1, 1, 16, 1), (1, 16, 1), (1, 16, 1)) - strategy3 = ((1, 1, 16, 1), (1, 1, 16, 1)) + strategy1 = ((1, 16, 1, 1), (1, 16, 1, 1)) + strategy2 = ((1, 16, 1, 1), (16, 1, 1), (16, 1, 1)) + strategy3 = ((1, 16, 1, 1), (1, 16, 1, 1)) net = Net(_w, strategy1, strategy2, strategy3) compile(net) def test_layer_norm_hybrid_parallel(): context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0) - strategy1 = ((2, 2, 4, 1), (2, 2, 4, 1)) - strategy2 = ((2, 2, 4, 1), (2, 4, 1), (2, 4, 1)) - strategy3 = ((2, 2, 4, 1), (2, 2, 4, 1)) + strategy1 = ((2, 8, 1, 1), (2, 8, 1, 1)) + strategy2 = ((2, 8, 1, 1), (8, 1, 1), (8, 1, 1)) + strategy3 = ((2, 8, 1, 1), (2, 8, 1, 1)) net = Net(_w, strategy1, strategy2, strategy3) compile(net) @@ -89,8 +90,17 @@ def test_layer_norm_auto_parallel(): def test_layer_norm_repeat_calc(): context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0) strategy1 = ((2, 2, 4, 1), (2, 2, 4, 1)) - strategy2 = ((1, 2, 2, 1), (2, 2, 1), (2, 2, 1)) + strategy2 = ((2, 2, 1, 1), (2, 1, 1), (2, 1, 1)) strategy3 = ((2, 2, 4, 1), (2, 2, 4, 1)) net = Net(_w, strategy1, strategy2, strategy3) compile(net) + +def test_layer_norm_wrong_strategy(): + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0) + strategy1 = ((2, 2, 4, 1), (2, 2, 4, 1)) + strategy2 = ((1, 2, 1, 2), (2, 1, 2), (2, 1, 2)) + strategy3 = ((2, 2, 4, 1), (2, 2, 4, 1)) + net = Net(_w, strategy1, strategy2, strategy3) + with pytest.raises(RuntimeError): + compile(net) diff --git a/tests/ut/python/parallel/test_prelu.py b/tests/ut/python/parallel/test_prelu.py index d3ad1cc710..5638c9cdbd 100755 --- a/tests/ut/python/parallel/test_prelu.py +++ b/tests/ut/python/parallel/test_prelu.py @@ -166,3 +166,21 @@ def test_prelu_parallel_success4(): w = Tensor(np.random.rand(16),dtype=ms.float32) net = GradWrap(NetWithLoss(Net(strategy))) _executor.compile(net, x, w) + +def test_prelu_parallel_success5(): + class Net(nn.Cell): + def __init__(self, strategy): + super().__init__() + self.prelu = P.PReLU().set_strategy(strategy) + def construct(self, x, y): + out = self.prelu(x, y) + return out + context.reset_auto_parallel_context() + context.set_auto_parallel_context(device_num=64, global_rank=0) + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") + strategy = ((2, 4, 4, 2), (1, )) + x = Tensor(np.random.rand(4, 16, 32, 64),dtype=ms.float32) + w = Tensor(np.random.rand(1),dtype=ms.float32) + net = GradWrap(NetWithLoss(Net(strategy))) + _executor.compile(net, x, w) + diff --git a/tests/ut/python/parallel/test_reshape_parameter.py b/tests/ut/python/parallel/test_reshape_parameter.py new file mode 100644 index 0000000000..be35e8f43a --- /dev/null +++ b/tests/ut/python/parallel/test_reshape_parameter.py @@ -0,0 +1,75 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import mindspore as ms +import mindspore.nn as nn +from mindspore.ops import operations as P +from mindspore.ops import composite as C +from mindspore import Tensor +from mindspore import context +from mindspore.common.api import _executor +from tests.ut.python.ops.test_math_ops import VirtualLoss +import numpy as np + + +class NetWithLoss(nn.Cell): + def __init__(self, network): + super(NetWithLoss, self).__init__() + self.loss = VirtualLoss() + self.network = network + + def construct(self, x, y): + predict = self.network(x, y) + return self.loss(predict) + + +class GradWrap(nn.Cell): + def __init__(self, network): + super(GradWrap, self).__init__() + self.network = network + + def construct(self, x, y): + return C.grad_all(self.network)(x, y) + + +class Net(nn.Cell): + def __init__(self, strategy): + super().__init__() + self.reshape = P.Reshape() + self.mul = P.Mul().set_strategy(strategy) + self.relu = P.ReLU() + + def construct(self, x, y): + out = self.reshape(x, (10000, 36, 1)) + out = self.mul(out, y) + out = self.relu(out) + return out + + +def test_reshape_parameter_data_parallel(): + context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel") + strategy = ((8, 1, 1), (8, 1, 1)) + net = GradWrap(NetWithLoss(Net(strategy))) + x = Tensor(np.ones([10000, 36]), dtype=ms.float32) + y = Tensor(np.ones([10000, 36, 1]), dtype=ms.float32) + _executor.compile(net, x, y) + + +def test_reshape_parameter_model_parallel(): + context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel") + strategy = ((4, 2, 1), (4, 2, 1)) + net = GradWrap(NetWithLoss(Net(strategy))) + x = Tensor(np.ones([10000, 36]), dtype=ms.float32) + y = Tensor(np.ones([10000, 36, 1]), dtype=ms.float32) + _executor.compile(net, x, y) diff --git a/tests/ut/python/parallel/test_strategy_checkpoint.py b/tests/ut/python/parallel/test_strategy_checkpoint.py index 09f4a54cbf..d95b13f435 100644 --- a/tests/ut/python/parallel/test_strategy_checkpoint.py +++ b/tests/ut/python/parallel/test_strategy_checkpoint.py @@ -14,10 +14,10 @@ import numpy as np from mindspore import context -from mindspore.context import set_auto_parallel_context +from mindspore.context import set_auto_parallel_context, reset_auto_parallel_context import mindspore.nn as nn from mindspore.ops import operations as P -from mindspore import Tensor +from mindspore import Tensor, Parameter from tests.ut.python.ops.test_math_ops import VirtualLoss import mindspore as ms from mindspore.common.api import _executor @@ -25,17 +25,15 @@ from mindspore.ops import composite as C # model_parallel test -# export PARALLEL_CHECKPOINT_ON=on -# export PARALLEL_TRAIN_TIMES=4 -def test_six_matmul(): +def test_six_matmul_save(): class NetWithLoss(nn.Cell): def __init__(self, network): super(NetWithLoss, self).__init__() self.loss = VirtualLoss() self.network = network - def construct(self, x1, x2, x3, x4, x5, x6, x7): - predict = self.network(x1, x2, x3, x4, x5, x6, x7) + def construct(self, x1, x6): + predict = self.network(x1, x6) return self.loss(predict) @@ -44,8 +42,8 @@ def test_six_matmul(): super(GradWrap, self).__init__() self.network = network - def construct(self, x1, x2, x3, x4, x5, x6, x7): - return C.grad_all(self.network)(x1, x2, x3, x4, x5, x6, x7) + def construct(self, x1, x6): + return C.grad_all(self.network)(x1, x6) class Net(nn.Cell): def __init__(self, strategy1, strategy2, strategy3, strategy4, strategy5, strategy6): @@ -56,45 +54,48 @@ def test_six_matmul(): self.matmul4 = P.MatMul().set_strategy(strategy4) self.matmul5 = P.MatMul().set_strategy(strategy5) self.matmul6 = P.MatMul().set_strategy(strategy6) - - def construct(self, x1, x2, x3, x4, x5, x6, x7): - out = self.matmul1(x1, x2) - out = self.matmul2(out, x3) - out = self.matmul3(out, x4) - out = self.matmul4(out, x5) - out = self.matmul5(out, x6) - out = self.matmul6(out, x7) + self.weight1 = Parameter(Tensor(np.ones([32, 64]), dtype=ms.float32), name="weight1") + self.weight2 = Parameter(Tensor(np.ones([64, 64]), dtype=ms.float32), name="weight2") + self.weight3 = Parameter(Tensor(np.ones([64, 128]), dtype=ms.float32), name="weight3") + self.weight4 = Parameter(Tensor(np.ones([128, 64]), dtype=ms.float32), name="weight4") + self.weight5 = Parameter(Tensor(np.ones([64, 128]), dtype=ms.float32), name="weight5") + self.weight6 = Parameter(Tensor(np.ones([32, 128]), dtype=ms.float32), name="weight6") + + def construct(self, x1, x6): + out = self.matmul1(x1, self.weight1) + out = self.matmul2(out, self.weight2) + out = self.matmul3(out, self.weight3) + out = self.matmul4(out, self.weight4) + out = self.matmul5(out, self.weight5) + out = out + self.weight6 + out = self.matmul6(out, x6) return out - set_auto_parallel_context(device_num=512, global_rank=0) + reset_auto_parallel_context() + set_auto_parallel_context(device_num=8, global_rank=0, strategy_ckpt_save_file="./strategy_stage1.ckpt") strategy1 = ((8, 1), (1, 1)) strategy2 = ((1, 8), (8, 1)) strategy3 = ((2, 2), (2, 2)) - strategy4 = ((4, 2), (2, 4)) - strategy5 = ((2, 4), (4, 2)) - strategy6 = ((4, 4), (4, 4)) + strategy4 = ((1, 1), (1, 8)) + strategy5 = ((4, 2), (2, 1)) + strategy6 = ((4, 1), (1, 2)) net = GradWrap(NetWithLoss(Net(strategy1, strategy2, strategy3, strategy4, strategy5, strategy6))) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - x1 = Tensor(np.ones([128, 32]), dtype=ms.float32) - x2 = Tensor(np.ones([32, 64]), dtype=ms.float32) - x3 = Tensor(np.ones([64, 64]), dtype=ms.float32) - x4 = Tensor(np.ones([64, 128]), dtype=ms.float32) - x5 = Tensor(np.ones([128, 64]), dtype=ms.float32) - x6 = Tensor(np.ones([64, 32]), dtype=ms.float32) - x7 = Tensor(np.ones([32, 32]), dtype=ms.float32) - _executor.compile(net, x1, x2, x3, x4, x5, x6, x7) + x1 = Tensor(np.ones([32, 32]), dtype=ms.float32) + x6 = Tensor(np.ones([128, 32]), dtype=ms.float32) + _executor.compile(net, x1, x6) -# remove matmul2 -def test_six_matmul_repeated1(): +# remove matmul2, add matmul7 +def test_six_matmul_load(): class NetWithLoss(nn.Cell): def __init__(self, network): super(NetWithLoss, self).__init__() self.loss = VirtualLoss() self.network = network - def construct(self, x1, x2, x4, x5, x6, x7): - predict = self.network(x1, x2, x4, x5, x6, x7) + def construct(self, x1, x6, x7): + predict = self.network(x1, x6, x7) return self.loss(predict) @@ -103,53 +104,60 @@ def test_six_matmul_repeated1(): super(GradWrap, self).__init__() self.network = network - def construct(self, x1, x2, x4, x5, x6, x7): - return C.grad_all(self.network)(x1, x2, x4, x5, x6, x7) + def construct(self, x1, x6, x7): + return C.grad_all(self.network)(x1, x6, x7) class Net(nn.Cell): - def __init__(self, strategy1, strategy3, strategy4, strategy5, strategy6): + def __init__(self, strategy1, strategy3, strategy4, strategy5, strategy6, strategy7): super().__init__() self.matmul1 = P.MatMul().set_strategy(strategy1) self.matmul3 = P.MatMul().set_strategy(strategy3) self.matmul4 = P.MatMul().set_strategy(strategy4) self.matmul5 = P.MatMul().set_strategy(strategy5) self.matmul6 = P.MatMul().set_strategy(strategy6) - - def construct(self, x1, x2, x4, x5, x6, x7): - out = self.matmul1(x1, x2) - out = self.matmul3(out, x4) - out = self.matmul4(out, x5) - out = self.matmul5(out, x6) - out = self.matmul6(out, x7) + self.matmul7 = P.MatMul().set_strategy(strategy7) + self.weight1 = Parameter(Tensor(np.ones([32, 64]), dtype=ms.float32), name="weight1") + self.weight3 = Parameter(Tensor(np.ones([64, 128]), dtype=ms.float32), name="weight3") + self.weight4 = Parameter(Tensor(np.ones([128, 64]), dtype=ms.float32), name="weight4") + self.weight5 = Parameter(Tensor(np.ones([64, 128]), dtype=ms.float32), name="weight5") + self.weight6 = Parameter(Tensor(np.ones([32, 128]), dtype=ms.float32), name="weight6") + + def construct(self, x1, x6, x7): + out = self.matmul1(x1, self.weight1) + out = self.matmul3(out, self.weight3) + out = self.matmul4(out, self.weight4) + out = self.matmul5(out, self.weight5) + out = out + self.weight6 + out = self.matmul6(out, x6) + out = self.matmul7(out, x7) return out - set_auto_parallel_context(device_num=512, global_rank=0) + reset_auto_parallel_context() + set_auto_parallel_context(device_num=8, global_rank=0, strategy_ckpt_load_file="./strategy_stage1.ckpt") strategy1 = ((8, 1), (1, 1)) strategy3 = ((8, 1), (1, 1)) strategy4 = ((8, 1), (1, 1)) strategy5 = ((8, 1), (1, 1)) strategy6 = ((8, 1), (1, 1)) - net = GradWrap(NetWithLoss(Net(strategy1, strategy3, strategy4, strategy5, strategy6))) + strategy7 = ((8, 1), (1, 1)) + net = GradWrap(NetWithLoss(Net(strategy1, strategy3, strategy4, strategy5, strategy6, strategy7))) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - x1 = Tensor(np.ones([128, 32]), dtype=ms.float32) - x2 = Tensor(np.ones([32, 64]), dtype=ms.float32) - x4 = Tensor(np.ones([64, 128]), dtype=ms.float32) - x5 = Tensor(np.ones([128, 64]), dtype=ms.float32) - x6 = Tensor(np.ones([64, 32]), dtype=ms.float32) + x1 = Tensor(np.ones([32, 32]), dtype=ms.float32) + x6 = Tensor(np.ones([128, 32]), dtype=ms.float32) x7 = Tensor(np.ones([32, 32]), dtype=ms.float32) - _executor.compile(net, x1, x2, x4, x5, x6, x7) + _executor.compile(net, x1, x6, x7) -# add matmul7 -def test_six_matmul_repeated2(): +# model_parallel test +def test_six_matmul_save_auto(): class NetWithLoss(nn.Cell): def __init__(self, network): super(NetWithLoss, self).__init__() self.loss = VirtualLoss() self.network = network - def construct(self, x1, x2, x4, x5, x6, x7, x8): - predict = self.network(x1, x2, x4, x5, x6, x7, x8) + def construct(self, x1, x6): + predict = self.network(x1, x6) return self.loss(predict) @@ -158,60 +166,54 @@ def test_six_matmul_repeated2(): super(GradWrap, self).__init__() self.network = network - def construct(self, x1, x2, x4, x5, x6, x7, x8): - return C.grad_all(self.network)(x1, x2, x4, x5, x6, x7, x8) + def construct(self, x1, x6): + return C.grad_all(self.network)(x1, x6) class Net(nn.Cell): - def __init__(self, strategy1, strategy3, strategy4, strategy5, strategy6, strategy7): + def __init__(self): super().__init__() - self.matmul1 = P.MatMul().set_strategy(strategy1) - self.matmul3 = P.MatMul().set_strategy(strategy3) - self.matmul4 = P.MatMul().set_strategy(strategy4) - self.matmul5 = P.MatMul().set_strategy(strategy5) - self.matmul6 = P.MatMul().set_strategy(strategy6) - self.matmul7 = P.MatMul().set_strategy(strategy7) - - def construct(self, x1, x2, x4, x5, x6, x7, x8): - out = self.matmul1(x1, x2) - out = self.matmul3(out, x4) - out = self.matmul4(out, x5) - out = self.matmul5(out, x6) - out = self.matmul6(out, x7) - out = self.matmul7(out, x8) + self.matmul1 = P.MatMul() + self.matmul2 = P.MatMul() + self.matmul3 = P.MatMul() + self.matmul4 = P.MatMul() + self.matmul5 = P.MatMul() + self.matmul6 = P.MatMul() + self.weight1 = Parameter(Tensor(np.ones([32, 64]), dtype=ms.float32), name="weight1") + self.weight2 = Parameter(Tensor(np.ones([64, 64]), dtype=ms.float32), name="weight2") + self.weight3 = Parameter(Tensor(np.ones([64, 128]), dtype=ms.float32), name="weight3") + self.weight4 = Parameter(Tensor(np.ones([128, 64]), dtype=ms.float32), name="weight4") + self.weight5 = Parameter(Tensor(np.ones([64, 128]), dtype=ms.float32), name="weight5") + self.weight6 = Parameter(Tensor(np.ones([32, 128]), dtype=ms.float32), name="weight6") + + def construct(self, x1, x6): + out = self.matmul1(x1, self.weight1) + out = self.matmul2(out, self.weight2) + out = self.matmul3(out, self.weight3) + out = self.matmul4(out, self.weight4) + out = self.matmul5(out, self.weight5) + out = out + self.weight6 + out = self.matmul6(out, x6) return out - set_auto_parallel_context(device_num=512, global_rank=0) - strategy1 = ((8, 1), (1, 1)) - strategy3 = ((8, 1), (1, 1)) - strategy4 = ((8, 1), (1, 1)) - strategy5 = ((8, 1), (1, 1)) - strategy6 = ((8, 1), (1, 1)) - strategy7 = ((8, 1), (1, 1)) - net = GradWrap(NetWithLoss(Net(strategy1, strategy3, strategy4, strategy5, strategy6, strategy7))) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - - x1 = Tensor(np.ones([128, 32]), dtype=ms.float32) - x2 = Tensor(np.ones([32, 64]), dtype=ms.float32) - x4 = Tensor(np.ones([64, 128]), dtype=ms.float32) - x5 = Tensor(np.ones([128, 64]), dtype=ms.float32) - x6 = Tensor(np.ones([64, 32]), dtype=ms.float32) - x7 = Tensor(np.ones([32, 32]), dtype=ms.float32) - x8 = Tensor(np.ones([32, 128]), dtype=ms.float32) - _executor.compile(net, x1, x2, x4, x5, x6, x7, x8) + reset_auto_parallel_context() + set_auto_parallel_context(device_num=8, global_rank=0, strategy_ckpt_save_file="./strategy_stage1_auto.ckpt") + net = GradWrap(NetWithLoss(Net())) + context.set_auto_parallel_context(parallel_mode="auto_parallel") + x1 = Tensor(np.ones([32, 32]), dtype=ms.float32) + x6 = Tensor(np.ones([128, 32]), dtype=ms.float32) + _executor.compile(net, x1, x6) -# add scope2 -def test_six_matmul_repeated3(): +# remove matmul2, add matmul7 +def test_six_matmul_load_auto(): class NetWithLoss(nn.Cell): - def __init__(self, network1, network2): + def __init__(self, network): super(NetWithLoss, self).__init__() self.loss = VirtualLoss() - self.network = network1 - self.network2 = network2 + self.network = network - def construct(self, x1, x2, x4, x5, x6, x7, x8, x9, x10): - predict = self.network(x1, x2, x4, x5, x6, x7, x8) - predict = self.network2(predict, x9, x10) + def construct(self, x1, x6, x7): + predict = self.network(x1, x6, x7) return self.loss(predict) @@ -220,62 +222,44 @@ def test_six_matmul_repeated3(): super(GradWrap, self).__init__() self.network = network - def construct(self, x1, x2, x4, x5, x6, x7, x8, x9, x10): - return C.grad_all(self.network)(x1, x2, x4, x5, x6, x7, x8, x9, x10) + def construct(self, x1, x6, x7): + return C.grad_all(self.network)(x1, x6, x7) class Net(nn.Cell): - def __init__(self, strategy1, strategy3, strategy4, strategy5, strategy6, strategy7): + def __init__(self, strategy1, strategy3, strategy4, strategy5): super().__init__() self.matmul1 = P.MatMul().set_strategy(strategy1) self.matmul3 = P.MatMul().set_strategy(strategy3) self.matmul4 = P.MatMul().set_strategy(strategy4) self.matmul5 = P.MatMul().set_strategy(strategy5) - self.matmul6 = P.MatMul().set_strategy(strategy6) - self.matmul7 = P.MatMul().set_strategy(strategy7) - - def construct(self, x1, x2, x4, x5, x6, x7, x8): - out = self.matmul1(x1, x2) - out = self.matmul3(out, x4) - out = self.matmul4(out, x5) - out = self.matmul5(out, x6) - out = self.matmul6(out, x7) - out = self.matmul7(out, x8) - return out - - class Net1(nn.Cell): - def __init__(self, strategy1, strategy2): - super().__init__() - self.matmul1 = P.MatMul().set_strategy(strategy1) - self.matmul2 = P.MatMul().set_strategy(strategy2) - - def construct(self, x1, x2, x3): - out = self.matmul1(x1, x2) - out = self.matmul2(out, x3) + self.matmul6 = P.MatMul() + self.matmul7 = P.MatMul() + self.weight1 = Parameter(Tensor(np.ones([32, 64]), dtype=ms.float32), name="weight1") + self.weight3 = Parameter(Tensor(np.ones([64, 128]), dtype=ms.float32), name="weight3") + self.weight4 = Parameter(Tensor(np.ones([128, 64]), dtype=ms.float32), name="weight4") + self.weight5 = Parameter(Tensor(np.ones([64, 128]), dtype=ms.float32), name="weight5") + self.weight6 = Parameter(Tensor(np.ones([32, 128]), dtype=ms.float32), name="weight6") + + def construct(self, x1, x6, x7): + out = self.matmul1(x1, self.weight1) + out = self.matmul3(out, self.weight3) + out = self.matmul4(out, self.weight4) + out = self.matmul5(out, self.weight5) + out = out + self.weight6 + out = self.matmul6(out, x6) + out = self.matmul7(out, x7) return out + reset_auto_parallel_context() + set_auto_parallel_context(device_num=8, global_rank=0, strategy_ckpt_load_file="./strategy_stage1_auto.ckpt") + strategy1 = ((2, 2), (2, 2)) + strategy3 = ((2, 2), (2, 2)) + strategy4 = ((2, 2), (2, 2)) + strategy5 = ((2, 2), (2, 2)) + net = GradWrap(NetWithLoss(Net(strategy1, strategy3, strategy4, strategy5))) + context.set_auto_parallel_context(parallel_mode="auto_parallel") - set_auto_parallel_context(device_num=512, global_rank=0) - strategy1 = ((8, 1), (1, 1)) - strategy3 = ((8, 1), (1, 1)) - strategy4 = ((8, 1), (1, 1)) - strategy5 = ((8, 1), (1, 1)) - strategy6 = ((8, 1), (1, 1)) - strategy7 = ((8, 1), (1, 1)) - strategy8 = ((8, 1), (1, 1)) - strategy9 = ((8, 1), (1, 1)) - net1 = Net(strategy1, strategy3, strategy4, strategy5, strategy6, strategy7) - net2 = Net1(strategy8, strategy9) - net = GradWrap(NetWithLoss(net1, net2)) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - - x1 = Tensor(np.ones([128, 32]), dtype=ms.float32) - x2 = Tensor(np.ones([32, 64]), dtype=ms.float32) - x4 = Tensor(np.ones([64, 128]), dtype=ms.float32) - x5 = Tensor(np.ones([128, 64]), dtype=ms.float32) - x6 = Tensor(np.ones([64, 32]), dtype=ms.float32) + x1 = Tensor(np.ones([32, 32]), dtype=ms.float32) + x6 = Tensor(np.ones([128, 32]), dtype=ms.float32) x7 = Tensor(np.ones([32, 32]), dtype=ms.float32) - x8 = Tensor(np.ones([32, 128]), dtype=ms.float32) - x9 = Tensor(np.ones([128, 64]), dtype=ms.float32) - x10 = Tensor(np.ones([64, 64]), dtype=ms.float32) - _executor.compile(net, x1, x2, x4, x5, x6, x7, x8, x9, x10) - + _executor.compile(net, x1, x6, x7) \ No newline at end of file diff --git a/tests/ut/python/pynative_mode/ops/test_multitype.py b/tests/ut/python/pynative_mode/ops/test_multitype.py index 0073041b96..58fd31256d 100644 --- a/tests/ut/python/pynative_mode/ops/test_multitype.py +++ b/tests/ut/python/pynative_mode/ops/test_multitype.py @@ -16,6 +16,7 @@ import numpy as np from mindspore.common.api import ms_function +from mindspore.common.parameter import Parameter from mindspore.ops import Primitive from mindspore.ops import composite as C from mindspore.ops import operations as P @@ -24,6 +25,7 @@ from ...ut_filter import non_graph_engine tensor_add = P.TensorAdd() +op_add = P.AddN() scala_add = Primitive('scalar_add') add = C.MultitypeFuncGraph('add') @@ -50,5 +52,14 @@ def test_multitype_tensor(): mainf(tensor1, tensor2) +@non_graph_engine +def test_multitype_tuple(): + tensor1 = Tensor(np.array([[1.2, 2.1], [2.2, 3.2]]).astype('float32')) + params1 = Parameter(tensor1, name="params1") + tensor2 = Tensor(np.array([[1.2, 2.1], [2.2, 3.2]]).astype('float32')) + output = op_add((params1, tensor2)) + assert output == Tensor(np.array([[2.4, 4.2], [4.4, 6.4]]).astype('float32')) + + def test_multitype_scalar(): mainf(1, 2) diff --git a/tests/ut/python/pynative_mode/test_cell_bprop.py b/tests/ut/python/pynative_mode/test_cell_bprop.py index c69b80412e..bd9f46d21d 100644 --- a/tests/ut/python/pynative_mode/test_cell_bprop.py +++ b/tests/ut/python/pynative_mode/test_cell_bprop.py @@ -229,12 +229,6 @@ class TwoInputBprop(nn.Cell): def bprop(self, x, y, out, dout): return 5 * x, 8 * y -class TwoInput(nn.Cell): - def __init__(self): - super().__init__() - self.op = P.Mul() - def construct(self, x, y): - return self.op(x, y) class TwoInputWithParameter(nn.Cell): def __init__(self): @@ -301,8 +295,37 @@ class MulAddWithWrongOutputNum(nn.Cell): def construct(self, x, y): return 2 * x + y def bprop(self, x, y, out, dout): - return 2 * dout, 2 * y, out + return 2 * dout, def test_grad_mul_add_with_wrong_output_num(): mul_add = MulAddWithWrongOutputNum() - C.grad_all(mul_add)(1, 2) + with pytest.raises(TypeError): + C.grad_all(mul_add)(1, 2) + +class MulAddWithWrongOutputType(nn.Cell): + def __init__(self): + super(MulAddWithWrongOutputType, self).__init__() + def construct(self, x, y): + return 2 * x + y + def bprop(self, x, y, out, dout): + return 2 * dout, 2 + +def test_grad_mul_add_with_wrong_output_type(): + mul_add = MulAddWithWrongOutputType() + with pytest.raises(TypeError): + C.grad_all(mul_add)(1, Tensor(np.ones([2, 2]))) + + +class MulAddWithWrongOutputShape(nn.Cell): + def __init__(self): + super(MulAddWithWrongOutputShape, self).__init__() + self.ones = Tensor(np.ones([2,])) + def construct(self, x, y): + return 2 * x + y + def bprop(self, x, y, out, dout): + return 2, self.ones + +def test_grad_mul_add_with_wrong_output_shape(): + mul_add = MulAddWithWrongOutputShape() + with pytest.raises(TypeError): + C.grad_all(mul_add)(1, Tensor(np.ones([2, 2]))) diff --git a/tests/ut/python/pynative_mode/test_framstruct.py b/tests/ut/python/pynative_mode/test_framstruct.py index eb3b76765a..7e504c405f 100644 --- a/tests/ut/python/pynative_mode/test_framstruct.py +++ b/tests/ut/python/pynative_mode/test_framstruct.py @@ -32,6 +32,8 @@ from ....mindspore_test_framework.utils.check_gradient import ( OperationGradChecker, check_gradient, ScalarGradChecker) from ....mindspore_test_framework.utils.bprop_util import bprop import mindspore.context as context +from mindspore.ops._grad.grad_base import bprop_getters +from mindspore.ops.primitive import prim_attr_register, PrimitiveWithInfer def setup_module(module): @@ -721,3 +723,94 @@ def test_grad_if_defer_inline(): inp = Tensor(np.ones([128, 96]).astype(np.float32)) grads = C.grad_all(network)(inp) assert grads == (Tensor(np.full([128, 96], 0.6, dtype=np.float32)),) + +def test_bprop_with_wrong_output_num(): + class BpropWithWrongOutputNum(PrimitiveWithInfer): + @prim_attr_register + def __init__(self): + super(BpropWithWrongOutputNum, self).__init__('BpropWithWrongOutputNum') + + def __call__(self, x, y): + return x + + def infer_shape(self, x_shape, yshape): + return x_shape + + def infer_dtype(self, x_type, y_type): + return x_type + + @bprop_getters.register(BpropWithWrongOutputNum) + def get_bprop_with_wrong_output_num(self): + """Generate bprop for BpropWithWrongOutputNum""" + def bprop(x, y, out, dout): + return (dout,) + return bprop + + class BpropWithWrongOutputNumCell(nn.Cell): + def __init__(self): + super(BpropWithWrongOutputNumCell, self).__init__() + def construct(self, x, y): + return BpropWithWrongOutputNum()(x, y) + with pytest.raises(TypeError): + C.grad_all(BpropWithWrongOutputNumCell())(1, 2) + +def test_bprop_with_wrong_output_type(): + class BpropWithWrongOutputType(PrimitiveWithInfer): + @prim_attr_register + def __init__(self): + super(BpropWithWrongOutputType, self).__init__('BpropWithWrongOutputType') + + def __call__(self, x): + return x + + def infer_shape(self, x_shape): + return x_shape + + def infer_dtype(self, x_type): + return x_type + + @bprop_getters.register(BpropWithWrongOutputType) + def get_bprop_with_wrong_output_type(self): + """Generate bprop for BpropWithWrongOutputType""" + def bprop(x, out, dout): + return (1,) + return bprop + + class BpropWithWrongOutputTypeCell(nn.Cell): + def __init__(self): + super(BpropWithWrongOutputTypeCell, self).__init__() + def construct(self, x): + return BpropWithWrongOutputType()(x) + with pytest.raises(TypeError): + C.grad_all(BpropWithWrongOutputTypeCell())(Tensor(np.ones([64, 10]).astype(np.int32))) + +def test_bprop_with_wrong_output_shape(): + class BpropWithWrongOutputShape(PrimitiveWithInfer): + @prim_attr_register + def __init__(self): + super(BpropWithWrongOutputShape, self).__init__('BpropWithWrongOutputShape') + + def __call__(self, x): + return x + + def infer_shape(self, x_shape): + return x_shape + + def infer_dtype(self, x_type): + return x_type + + @bprop_getters.register(BpropWithWrongOutputShape) + def get_bprop_with_wrong_output_shape(self): + """Generate bprop for BpropWithWrongOutputShape""" + ones = Tensor(np.ones([2,]).astype(np.int32)) + def bprop(x, out, dout): + return (ones,) + return bprop + + class BpropWithWrongOutputShapeCell(nn.Cell): + def __init__(self): + super(BpropWithWrongOutputShapeCell, self).__init__() + def construct(self, x): + return BpropWithWrongOutputShape()(x) + with pytest.raises(TypeError): + C.grad_all(BpropWithWrongOutputShapeCell())(Tensor(np.ones([64, 10]).astype(np.int32))) diff --git a/tests/ut/python/pynative_mode/test_insert_grad_of.py b/tests/ut/python/pynative_mode/test_insert_grad_of.py index a11c5fa2b1..0527365a98 100644 --- a/tests/ut/python/pynative_mode/test_insert_grad_of.py +++ b/tests/ut/python/pynative_mode/test_insert_grad_of.py @@ -79,7 +79,7 @@ def test_InsertGradientOf_2(): summary = P.ScalarSummary() def debug_gradient(dx): """ debug_gradient """ - dx = summary("dx: ", dx) + summary("dx: ", dx) return dx debug = P.InsertGradientOf(debug_gradient) diff --git a/tests/ut/python/pynative_mode/test_multigraph_sink.py b/tests/ut/python/pynative_mode/test_multigraph_sink.py new file mode 100644 index 0000000000..0c69c7c2c1 --- /dev/null +++ b/tests/ut/python/pynative_mode/test_multigraph_sink.py @@ -0,0 +1,119 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" test_multigraph_sink """ +import pytest +import numpy as np +import mindspore.nn as nn +import mindspore.context as context +from mindspore.common.tensor import Tensor +from mindspore.common import dtype as mstype +from mindspore.common import ms_function +from mindspore.ops import operations as P + + +def setup_module(module): + context.set_context(mode = context.PYNATIVE_MODE, save_graphs = True, device_target = "Ascend") + context.set_context(enable_task_sink = True, device_id = 0) + + +c1 = Tensor([2], mstype.int32) +c2 = Tensor([14], mstype.int32) +c3 = Tensor([1], mstype.int32) +c4 = Tensor([0], mstype.int32) +c5 = Tensor([14], mstype.int32) + + +@ms_function +def simple_if(x, y, z): + if x < y: + x = x + 1 + else: + x = x + 2 + x = x + 3 + return x + + +@ms_function +def if_by_if(x, y, z): + if x < y: + x = x + 1 + if y > x: + x = x + 2 + x = x + 3 + return x + + +@ms_function +def if_in_if(x, y, z): + out = c4 + if x < y: + z = c4 + c4 + if z < y: + z = z + 2 + out = out + z + x = x + 3 + out = out + x + return out + + +@ms_function +def simple_while(x, y, z): + y = y + 4 + while x < y: + x = x + 1 + x = x + 3 + return x + + +@ms_function +def while_by_while(x, y, z): + while x < y: + x = x + 1 + while z < c5: + z = z + 1 + x = x + 1 + x = x + 1 + return x + + +def test_simple_if(): + output = simple_if(c1, c2, c3) + expect = Tensor([6], mstype.int32) + assert output == expect + + +def test_if_by_if(): + output = if_by_if(c1, c2, c3) + expect = Tensor([8], mstype.int32) + assert output == expect + + +def test_if_in_if(): + output = if_in_if(c1, c2, c3) + expect = Tensor([7], mstype.int32) + assert output == expect + + +def test_simple_while(): + output = simple_while(c1, c2, c3) + expect = Tensor([21], mstype.int32) + assert output == expect + + +def test_while_by_while(): + output = while_by_while(c1, c2, c3) + expect = Tensor([28], mstype.int32) + assert output == expect + diff --git a/tests/ut/python/train/summary/test_histogram_summary.py b/tests/ut/python/train/summary/test_histogram_summary.py index 50204cd757..53c62990b1 100644 --- a/tests/ut/python/train/summary/test_histogram_summary.py +++ b/tests/ut/python/train/summary/test_histogram_summary.py @@ -22,6 +22,7 @@ import numpy as np from mindspore.common.tensor import Tensor from mindspore.train.summary.summary_record import SummaryRecord, _cache_summary_tensor_data +from mindspore.train.summary._summary_adapter import _calc_histogram_bins from .summary_reader import SummaryReader CUR_DIR = os.getcwd() @@ -139,7 +140,7 @@ def test_histogram_summary_same_value(): event = reader.read_event() LOG.debug(event) - assert len(event.summary.value[0].histogram.buckets) == 1 + assert len(event.summary.value[0].histogram.buckets) == _calc_histogram_bins(dim1 * dim2) def test_histogram_summary_high_dims(): diff --git a/tests/ut/python/train/summary/test_summary_ops_params_valid_check.py b/tests/ut/python/train/summary/test_summary_ops_params_valid_check.py index 23c85d398c..ab1eb88d96 100644 --- a/tests/ut/python/train/summary/test_summary_ops_params_valid_check.py +++ b/tests/ut/python/train/summary/test_summary_ops_params_valid_check.py @@ -22,6 +22,7 @@ import os import logging import random import numpy as np +import pytest from mindspore.train.summary.summary_record import SummaryRecord from mindspore.common.tensor import Tensor import mindspore.nn as nn @@ -180,7 +181,8 @@ def test_summary_use_invalid_tag_None(): def test_summary_use_invalid_tag_Bool(): log.debug("begin test_summary_use_invalid_tag_Bool") net = SummaryDemoTag(True, True, True) - run_case(net) + with pytest.raises(TypeError): + run_case(net) log.debug("finished test_summary_use_invalid_tag_Bool") @@ -196,7 +198,8 @@ def test_summary_use_invalid_tag_null(): def test_summary_use_invalid_tag_Int(): log.debug("begin test_summary_use_invalid_tag_Int") net = SummaryDemoTag(1, 2, 3) - run_case(net) + with pytest.raises(TypeError): + run_case(net) log.debug("finished test_summary_use_invalid_tag_Int") diff --git a/tests/ut/python/utils/test_serialize.py b/tests/ut/python/utils/test_serialize.py index cc6f346b77..59a4b93833 100644 --- a/tests/ut/python/utils/test_serialize.py +++ b/tests/ut/python/utils/test_serialize.py @@ -362,6 +362,31 @@ def test_lenet5_onnx_export(): net = LeNet5() export(net, input, file_name='lenet5.onnx', file_format='ONNX') +class DefinedNet(nn.Cell): + """simple Net definition with maxpoolwithargmax.""" + def __init__(self, num_classes=10): + super(DefinedNet, self).__init__() + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=0, weight_init="zeros") + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU() + self.maxpool = P.MaxPoolWithArgmax(padding="same", ksize=2, strides=2) + self.flatten = nn.Flatten() + self.fc = nn.Dense(int(56*56*64), num_classes) + + def construct(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x, argmax = self.maxpool(x) + x = self.flatten(x) + x = self.fc(x) + return x + +def test_net_onnx_maxpoolwithargmax_export(): + input = Tensor(np.ones([1, 3, 224, 224]).astype(np.float32) * 0.01) + net = DefinedNet() + export(net, input, file_name='definedNet.onnx', file_format='ONNX') + @run_on_onnxruntime def test_lenet5_onnx_load_run(): diff --git a/tests/vm_impl/math_ops_vm_impl.py b/tests/vm_impl/math_ops_vm_impl.py index 01df0b824e..e42ba92d5e 100644 --- a/tests/vm_impl/math_ops_vm_impl.py +++ b/tests/vm_impl/math_ops_vm_impl.py @@ -172,7 +172,7 @@ def vm_impl_equal(self): x = x.asnumpy() y = y.asnumpy() out = vm.equal(x, y) - return Tensor(out) + return Tensor(np.array(out)) return vm_impl @@ -183,7 +183,7 @@ def vm_impl_not_equal(self): x = x.asnumpy() y = y.asnumpy() out = vm.not_equal(x, y) - return Tensor(out) + return Tensor(np.array(out)) return vm_impl @@ -194,7 +194,7 @@ def vm_impl_greater(self): x = x.asnumpy() y = y.asnumpy() out = vm.greater(x, y) - return Tensor(out) + return Tensor(np.array(out)) return vm_impl @vm_impl_getters.register(P.Maximum) @@ -219,17 +219,17 @@ def vm_impl_minimum(self): return vm_impl @vm_impl_getters.register(P.Less) -def vm_impl_greater(self): +def vm_impl_less(self): """Generate vm_impl function for Less""" def vm_impl(x, y): x = x.asnumpy() y = y.asnumpy() out = vm.less(x, y) - return Tensor(out) + return Tensor(np.array(out)) return vm_impl @vm_impl_getters.register(P.ScalarCast) -def vm_impl_greater(self): +def vm_impl_scalar_cast(self): """Generate vm_impl function for ScalarCast""" def vm_impl(x, t): np_type = dtype_to_nptype(t) diff --git a/tests/vm_impl/vm_me.py b/tests/vm_impl/vm_me.py index da7fc1ecbe..82b0324fb5 100644 --- a/tests/vm_impl/vm_me.py +++ b/tests/vm_impl/vm_me.py @@ -16,7 +16,7 @@ import numpy as np from mindspore._checkparam import Rel -from mindspore._checkparam import ParamValidator as validator +from mindspore._checkparam import Validator as validator def avg_pooling(x, pool_h, pool_w, stride): @@ -32,7 +32,7 @@ def avg_pooling(x, pool_h, pool_w, stride): Returns: numpy.ndarray, an output array after applying average pooling on input array. """ - validator.check_integer("stride", stride, 0, Rel.GT) + validator.check_integer("stride", stride, 0, Rel.GT, None) num, channel, height, width = x.shape out_h = (height - pool_h)//stride + 1 out_w = (width - pool_w)//stride + 1 @@ -217,7 +217,7 @@ def conv2d(x, weight, bias=None, stride=1, pad=0, dilation=1, groups=1, padding_mode='zeros'): """Convolution 2D.""" # pylint: disable=unused-argument - validator.check_type('stride', stride, (int, tuple)) + validator.check_value_type('stride', stride, (int, tuple), None) if isinstance(stride, int): stride = (stride, stride) elif len(stride) == 4: @@ -229,7 +229,7 @@ def conv2d(x, weight, bias=None, stride=1, pad=0, f"a tuple of two positive int numbers, but got {stride}") stride_h = stride[0] stride_w = stride[1] - validator.check_type('dilation', dilation, (int, tuple)) + validator.check_value_type('dilation', dilation, (int, tuple), None) if isinstance(dilation, int): dilation = (dilation, dilation) elif len(dilation) == 4: @@ -384,7 +384,7 @@ def matmul(x, w, b=None): def max_pooling(x, pool_h, pool_w, stride): """Max pooling.""" - validator.check_integer("stride", stride, 0, Rel.GT) + validator.check_integer("stride", stride, 0, Rel.GT, None) num, channel, height, width = x.shape out_h = (height - pool_h)//stride + 1 out_w = (width - pool_w)//stride + 1 @@ -427,7 +427,7 @@ def max_pool_grad_with_argmax(x, dout, arg_max, pool_h, pool_w, stride): def max_pool_with_argmax(x, pool_h, pool_w, stride): """Max pooling with argmax.""" - validator.check_integer("stride", stride, 0, Rel.GT) + validator.check_integer("stride", stride, 0, Rel.GT, None) num, channel, height, width = x.shape out_h = (height - pool_h)//stride + 1 out_w = (width - pool_w)//stride + 1