!93 syn code 430 for ms-incubator

Merge pull request !93 from changzherui/syn-code430
5 years ago · 11050c68b4
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,10 @@
 cmake_minimum_required(VERSION 3.14)
 project (MindSpore)

 if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.3.0)
    message(FATAL_ERROR "GCC vesion ${CMAKE_CXX_COMPILER_VERSION} must not be less than 7.3.0")
 endif ()

 include(${CMAKE_SOURCE_DIR}/cmake/options.cmake)
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/modules/")

@@ -18,7 +23,16 @@ set(PYBIND11_CPP_STANDARD -std=c++17)
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPTION_CXX_FLAGS}")

 find_package(Threads)
 find_package(Patch)
 if (DEFINED ENV{MS_PATCH_PATH})
    find_program(Patch_EXECUTABLE patch PATHS $ENV{MS_PATCH_PATH})
    set(Patch_FOUND ${Patch_EXECUTABLE})
 else ()
    find_package(Patch)
 endif ()
 if (NOT Patch_FOUND)
    message(FATAL_ERROR "Patch not found, please set env variable MS_PATCH_PATH, "
            "usually locate in GIT_PATH/usr/bin in windows")
 endif ()
 message(PATCH_EXECUTABLE = ${Patch_EXECUTABLE})

 include(${CMAKE_SOURCE_DIR}/cmake/mind_expression.cmake)
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 ![MindSpore Logo](docs/MindSpore-logo.png "MindSpore logo")
 ============================================================

 - [What is MindSpore?](#what-is-mindspore)
 - [What Is MindSpore?](#what-is-mindspore)
    - [Automatic Differentiation](#automatic-differentiation)
    - [Automatic Parallel](#automatic-parallel)
 - [Installation](#installation)
@@ -29,7 +29,7 @@ enrichment of the AI software/hardware application ecosystem.

 <img src="docs/MindSpore-architecture.png" alt="MindSpore Architecture" width="600"/>

 For more details please check out our [Architecture Guide](https://www.mindspore.cn/docs/en/0.1.0-alpha/architecture.html).
 For more details please check out our [Architecture Guide](https://www.mindspore.cn/docs/en/0.2.0-alpha/architecture.html).

 ### Automatic Differentiation

@@ -76,13 +76,36 @@ For installation using `pip`, take `CPU` and `Ubuntu-x86` build version as an ex
 1. Download whl from [MindSpore download page](https://www.mindspore.cn/versions/en), and install the package.

    ```
    pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/cpu/ubuntu-x86/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl
    pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/cpu/x86_ubuntu/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl
    ```

 2. Run the following command to verify the install.

    ```python
    import numpy as np
    import mindspore.context as context
    import mindspore.nn as nn
    from mindspore import Tensor
    from mindspore.ops import operations as P

    context.set_context(mode=context.GRAPH_MODE, device_target="CPU")

    class Mul(nn.Cell):
        def __init__(self):
            super(Mul, self).__init__()
            self.mul = P.Mul()

        def construct(self, x, y):
            return self.mul(x, y)

    x = Tensor(np.array([1.0, 2.0, 3.0]).astype(np.float32))
    y = Tensor(np.array([4.0, 5.0, 6.0]).astype(np.float32))

    mul = Mul()
    print(mul(x, y))
    ```
    python -c 'import mindspore'
    ```
    [ 4. 10. 18.]
    ```

 ### From Source
@@ -96,20 +119,22 @@ currently the containerized build options are supported as follows:

 | Hardware Platform | Docker Image Repository | Tag | Description |
 | :---------------- | :---------------------- | :-- | :---------- |
 | CPU | `mindspore/mindspore-cpu` | `0.1.0-alpha` | Production environment with pre-installed MindSpore `0.1.0-alpha` CPU release. |
 | CPU | `mindspore/mindspore-cpu` | `x.y.z` | Production environment with pre-installed MindSpore `x.y.z` CPU release. |
 |  |  | `devel` | Development environment provided to build MindSpore (with `CPU` backend) from the source, refer to https://www.mindspore.cn/install/en for installation details. |
 |  |  | `runtime` | Runtime environment provided to install MindSpore binary package with `CPU` backend. |
 | GPU | `mindspore/mindspore-gpu` | `0.1.0-alpha` | Production environment with pre-installed MindSpore `0.1.0-alpha` GPU release. |
 | GPU | `mindspore/mindspore-gpu` | `x.y.z` | Production environment with pre-installed MindSpore `x.y.z` GPU release. |
 |  |  | `devel` | Development environment provided to build MindSpore (with `GPU CUDA10.1` backend) from the source, refer to https://www.mindspore.cn/install/en for installation details. |
 |  |  | `runtime` | Runtime environment provided to install MindSpore binary package with `GPU` backend. |
 |  |  | `runtime` | Runtime environment provided to install MindSpore binary package with `GPU CUDA10.1` backend. |
 | Ascend | <center>&mdash;</center> | <center>&mdash;</center> | Coming soon. |

 > **NOTICE:** For GPU `devel` docker image, it's NOT suggested to directly install the whl package after building from the source, instead we strongly RECOMMEND you transfer and install the whl package inside GPU `runtime` docker image.

 * CPU

    For `CPU` backend, you can directly pull and run the image using the below command:
    For `CPU` backend, you can directly pull and run the latest stable image using the below command:
    ```
    docker pull mindspore/mindspore-cpu:0.1.0-alpha
    docker run -it mindspore/mindspore-cpu:0.1.0-alpha python -c 'import mindspore'
    docker pull mindspore/mindspore-cpu:0.2.0-alpha
    docker run -it mindspore/mindspore-cpu:0.2.0-alpha /bin/bash
    ```

 * GPU
@@ -124,20 +149,21 @@ currently the containerized build options are supported as follows:
    sudo systemctl restart docker
    ```

    Then you can pull and run the image using the below command:
    Then you can pull and run the latest stable image using the below command:
    ```
    docker pull mindspore/mindspore-gpu:0.1.0-alpha
    docker run -it --runtime=nvidia --privileged=true mindspore/mindspore-gpu:0.1.0-alpha /bin/bash
    docker pull mindspore/mindspore-gpu:0.2.0-alpha
    docker run -it --runtime=nvidia --privileged=true mindspore/mindspore-gpu:0.2.0-alpha /bin/bash
    ```

    To test if the docker image works, please execute the python code below and check the output:
    ```python
    import numpy as np
    import mindspore.context as context
    from mindspore import Tensor
    from mindspore.ops import functional as F
    import mindspore.context as context

    context.set_context(device_target="GPU")

    x = Tensor(np.ones([1,3,3,4]).astype(np.float32))
    y = Tensor(np.ones([1,3,3,4]).astype(np.float32))
    print(F.tensor_add(x, y))
@@ -157,11 +183,11 @@ currently the containerized build options are supported as follows:
    ```

 If you want to learn more about the building process of MindSpore docker images,
 please check out `docker` folder for the details.
 please check out [docker](docker/README.md) repo for the details.

 ## Quickstart

 See the [Quick Start](https://www.mindspore.cn/tutorial/en/0.1.0-alpha/quick_start/quick_start.html)
 See the [Quick Start](https://www.mindspore.cn/tutorial/en/0.2.0-alpha/quick_start/quick_start.html)
 to implement the image classification.

 ## Docs
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,3 +1,75 @@
 # Release 0.2.0-alpha

 ## Major Features and Improvements

 ### Ascend 910 Training and Inference Framework
 * New models
    * MobileNetV2: Inverted Residuals and Linear Bottlenecks.
    * ResNet101: Deep Residual Learning for Image Recognition.

 * Frontend and User Interface
   * Support for all python comparison operators.
   * Support for math operators **,//,%. Support for other python operators like and/or/not/is/is not/ in/ not in.
   * Support for the gradients of function with variable arguments.
   * Support for tensor indexing assignment for certain indexing type.
   * Support for dynamic learning rate.
   * User interfaces change log
     * DepthwiseConv2dNative, DepthwiseConv2dNativeBackpropFilter, DepthwiseConv2dNativeBackpropInput([!424](https://gitee.com/mindspore/mindspore/pulls/424))
     * ReLU6, ReLU6Grad([!224](https://gitee.com/mindspore/mindspore/pulls/224))
     * GeneratorDataset([!183](https://gitee.com/mindspore/mindspore/pulls/183))
     * VOCDataset([!477](https://gitee.com/mindspore/mindspore/pulls/477))
     * MindDataset, PKSampler([!514](https://gitee.com/mindspore/mindspore/pulls/514))
     * map([!506](https://gitee.com/mindspore/mindspore/pulls/506))
     * Conv([!226](https://gitee.com/mindspore/mindspore/pulls/226))
     * Adam([!253](https://gitee.com/mindspore/mindspore/pulls/253))
     * _set_fusion_strategy_by_idx, _set_fusion_strategy_by_size([!189](https://gitee.com/mindspore/mindspore/pulls/189))
     * CheckpointConfig([!122](https://gitee.com/mindspore/mindspore/pulls/122))
     * Constant([!54](https://gitee.com/mindspore/mindspore/pulls/54))
 * Executor and Performance Optimization
    * Support parallel execution of data prefetching and forward/backward computing.
    * Support parallel execution of gradient aggregation and forward/backward computing in distributed training scenarios.
    * Support operator fusion optimization.
    * Optimize compilation process and improve the performance.
 * Data processing, augmentation, and save format
    * Support multi-process of GeneratorDataset/PyFunc for high performance
    * Support variable batchsize
    * Support new Dataset operators, such as filter,skip,take,TextLineDataset

 ### Other Hardware Support
 * GPU platform
    * Use dynamic memory pool by default on GPU.
    * Support parallel execution of computation and communication.
    * Support continuous address allocation by memory pool.
 * CPU platform
    * Support for windows 10 OS.

 ## Bugfixes
 * Models
    * Fix mixed precision bug for VGG16 model ([!629](https://gitee.com/mindspore/mindspore/pulls/629)).
 * Python API
    * Fix ControlDepend operator bugs on CPU and GPU ([!396](https://gitee.com/mindspore/mindspore/pulls/396)).
    * Fix ArgMinWithValue operator bugs ([!338](https://gitee.com/mindspore/mindspore/pulls/338)).
    * Fix Dense operator bugs on PyNative mode ([!276](https://gitee.com/mindspore/mindspore/pulls/276)).
    * Fix MatMul operator bugs on PyNative mode ([!288](https://gitee.com/mindspore/mindspore/pulls/288)).
 * Executor
    * Fix operator selection bugs and make it general ([!300](https://gitee.com/mindspore/mindspore/pulls/300)).
    * Fix memory reuse bug for GetNext op ([!291](https://gitee.com/mindspore/mindspore/pulls/291)).
 * GPU platform
    * Fix memory allocation in multi-graph scenarios ([!444](https://gitee.com/mindspore/mindspore/pulls/444)).
    * Fix bias_add_grad under fp16 precision ([!598](https://gitee.com/mindspore/mindspore/pulls/598)).
    * Fix support for fp16 kernels on nvidia 1080Ti([!571](https://gitee.com/mindspore/mindspore/pulls/571)).
    * Fix parsing of tuple type parameters ([!316](https://gitee.com/mindspore/mindspore/pulls/316)).
 * Data processing
    * Fix TypeErrors about can't pickle mindspore._c_dataengine.DEPipeline objects([!434](https://gitee.com/mindspore/mindspore/pulls/434)).
    * Add TFRecord file verification([!406](https://gitee.com/mindspore/mindspore/pulls/406)).

 ## Contributors
 Thanks goes to these wonderful people:

 Alexey_Shevlyakov, Cathy, Chong, Hoai, Jonathan, Junhan, JunhanHu, Peilin, SanjayChan, StrawNoBerry, VectorSL, Wei, WeibiaoYu, Xiaoda, Yanjun, YuJianfeng, ZPaC, Zhang, ZhangQinghua, ZiruiWu, amongo, anthonyaje, anzhengqi, biffex, caifubi, candanzg, caojian05, casgj, cathwong, ch-l, chang, changzherui, chenfei, chengang, chenhaozhe, chenjianping, chentingting, chenzomi, chujinjin, dengwentao, dinghao, fanglei, fary86, flywind, gaojing, geekun, gengdongjie, ghzl, gong, gongchen, gukecai, guohongzilong, guozhijian, gziyan, h.farahat, hesham, huangdongrun, huanghui, jiangzhiwen, jinyaohui, jjfeing, jojobugfree, jonathan_yan, jonyguo, jzw, kingfo, kisnwang, laiyongqiang, leonwanghui, lianliguang, lichen, lichenever, limingqi107, liubuyu, liuxiao, liyong, liyong126, lizhenyu, lupengcheng, lvliang, maoweiyong, ms_yan, mxm, ougongchang, panfengfeng, panyifeng, pengyanjun, penn, qianlong, seatea, simson, suteng, thlinh, vlne-v1, wangchengke, wanghua, wangnan39, wangqiuliang, wenchunjiang, wenkai, wukesong, xiefangqi, xulei, yanghaitao, yanghaoran, yangjie159, yangzhenzhang, yankai10, yanzhenxiang2020, yao_yf, yoonlee666, zhangbuxue, zhangz0911gm, zhangzheng, zhaojichen, zhaoting, zhaozhenlong, zhongligeng, zhoufeng, zhousiyi, zjun, zyli2020, yuhuijun, limingqi107, lizhenyu, chenweifeng.

 Contributions of any kind are welcome!

 # Release 0.1.0-alpha

 ## Main Features
--- a/build.bat
+++ b/build.bat
@@ -14,27 +14,27 @@
@rem ============================================================================
@echo off
@title mindspore_build
 

 SET BASEPATH=%CD%
 IF NOT EXIST %BASEPATH%/build (
         md "build"
         )
 

 cd %BASEPATH%/build
 SET BUILD_PATH=%CD%
 

 IF NOT EXIST %BUILD_PATH%/mindspore (
         md "mindspore"
         )
 

 cd %CD%/mindspore
 

 cmake -DCMAKE_BUILD_TYPE=Release -DENABLE_CPU=ON -DENABLE_MINDDATA=ON -DUSE_GLOG=ON -G "CodeBlocks - MinGW Makefiles" ../..
 IF NOT %errorlevel% == 0 (
    echo "cmake fail."
    goto run_fail
    )
 

 IF "%1%" == "" (
    cmake --build . --target package -- -j6
    ) ELSE (
--- a/build.sh
+++ b/build.sh
@@ -433,9 +433,9 @@ build_predict()

    cd "${BASEPATH}/predict/output/"
    if [[ "$PREDICT_PLATFORM" == "x86_64" ]]; then
      tar -cf MSPredict-0.1.0-linux_x86_64.tar.gz include/ lib/ --warning=no-file-changed
      tar -cf MSPredict-0.2.0-linux_x86_64.tar.gz include/ lib/ --warning=no-file-changed
    elif [[ "$PREDICT_PLATFORM" == "arm64" ]]; then
      tar -cf MSPredict-0.1.0-linux_aarch64.tar.gz include/ lib/ --warning=no-file-changed
      tar -cf MSPredict-0.2.0-linux_aarch64.tar.gz include/ lib/ --warning=no-file-changed
    fi
    echo "success to build predict project!"
 }
--- a/cmake/external_libs/mkl_dnn.cmake
+++ b/cmake/external_libs/mkl_dnn.cmake
@@ -4,7 +4,7 @@ if (CMAKE_SYSTEM_NAME MATCHES "Windows")
    mindspore_add_pkg(onednn
        VER 1.1.1
        LIBS dnnl mkldnn
        HEAD_ONLY ./
        HEAD_ONLY ./include
        RELEASE on
        URL https://github.com/oneapi-src/oneDNN/releases/download/v1.1.1/dnnl_win_1.1.1_cpu_vcomp.zip
        MD5 ecaab9ed549643067699c80e5cea1c23)
--- a/cmake/external_libs/protobuf.cmake
+++ b/cmake/external_libs/protobuf.cmake
@@ -38,17 +38,17 @@ function(ms_protobuf_generate c_var h_var)
        get_filename_component(file_dir ${abs_file} PATH)
        file(RELATIVE_PATH rel_path ${CMAKE_CURRENT_SOURCE_DIR} ${file_dir})

        list(APPEND ${c_var} "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.cc")
        list(APPEND ${h_var} "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.h")
        list(APPEND ${c_var} "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.cc")
        list(APPEND ${h_var} "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.h")

        add_custom_command(
                OUTPUT "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.cc"
                "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.h"
                OUTPUT "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.cc"
                "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.h"
                WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
                COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/${rel_path}"
                COMMAND protobuf::protoc -I${file_dir} --cpp_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file}
                COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/proto"
                COMMAND protobuf::protoc -I${file_dir} --cpp_out=${CMAKE_BINARY_DIR}/proto ${abs_file}
                DEPENDS protobuf::protoc ${abs_file}
                COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM )
                COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM)
    endforeach()

    set_source_files_properties(${${c_var}} ${${h_var}} PROPERTIES GENERATED TRUE)
@@ -71,40 +71,38 @@ function(ms_protobuf_generate_py c_var h_var py_var)
        get_filename_component(abs_file ${file} ABSOLUTE)
        get_filename_component(file_name ${file} NAME_WE)
        get_filename_component(file_dir ${abs_file} PATH)
        file(RELATIVE_PATH rel_path ${CMAKE_CURRENT_SOURCE_DIR} ${file_dir})


        list(APPEND ${c_var} "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.cc")
        list(APPEND ${h_var} "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.h")
        list(APPEND ${py_var} "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py")
        list(APPEND ${c_var} "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.cc")
        list(APPEND ${h_var} "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.h")
        list(APPEND ${py_var} "${CMAKE_BINARY_DIR}/proto/${file_name}_pb2.py")
        if (WIN32)
            add_custom_command(
                    OUTPUT "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.cc"
                    "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.h"
                    "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py"
                    OUTPUT "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.cc"
                    "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.h"
                    "${CMAKE_BINARY_DIR}/proto/${file_name}_pb2.py"
                    WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
                    COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/${rel_path}"
                    COMMAND protobuf::protoc -I${file_dir} --cpp_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file}
                    COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file}
                    COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file}
                    COMMAND perl -pi.bak -e "s/import (.+_pb2.*)/from . import \\1/"  "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py"
                    COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py" "${PROJECT_SOURCE_DIR}/mindspore/train/"
                    COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/proto"
                    COMMAND protobuf::protoc -I${file_dir} --cpp_out=${CMAKE_BINARY_DIR}/proto ${abs_file}
                    COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/proto ${abs_file}
                    COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/proto ${abs_file}
                    COMMAND perl -pi.bak -e "s/import (.+_pb2.*)/from . import \\1/"  "${CMAKE_BINARY_DIR}/proto/${file_name}_pb2.py"
                    COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_BINARY_DIR}/proto/${file_name}_pb2.py" "${PROJECT_SOURCE_DIR}/mindspore/train/"
                    DEPENDS protobuf::protoc ${abs_file}
                    COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM )
        else()
            add_custom_command(
                    OUTPUT "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.cc"
                    "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.h"
                    "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py"
                    OUTPUT "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.cc"
                    "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.h"
                    "${CMAKE_BINARY_DIR}/proto/${file_name}_pb2.py"
                    WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
                    COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/${rel_path}"
                    COMMAND protobuf::protoc -I${file_dir} --cpp_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file}
                    COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file}
                    COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file}
                    COMMAND perl -pi -e "s/import (.+_pb2.*)/from . import \\1/"  "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py"
                    COMMAND cp "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py" "${PROJECT_SOURCE_DIR}/mindspore/train/"
                    COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/proto"
                    COMMAND protobuf::protoc -I${file_dir} --cpp_out=${CMAKE_BINARY_DIR}/proto ${abs_file}
                    COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/proto ${abs_file}
                    COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/proto ${abs_file}
                    COMMAND perl -pi -e "s/import (.+_pb2.*)/from . import \\1/"  "${CMAKE_BINARY_DIR}/proto/${file_name}_pb2.py"
                    COMMAND cp "${CMAKE_BINARY_DIR}/proto/${file_name}_pb2.py" "${PROJECT_SOURCE_DIR}/mindspore/train/"
                    DEPENDS protobuf::protoc ${abs_file}
                    COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM )
                    COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM)
        endif()
    endforeach()
    set_source_files_properties(${${c_var}} ${${h_var}} ${${py_var}} PROPERTIES GENERATED TRUE)
--- a/cmake/package.cmake
+++ b/cmake/package.cmake
@@ -153,10 +153,14 @@ endif ()
 if (CMAKE_SYSTEM_NAME MATCHES "Windows")
    get_filename_component(CXX_DIR ${CMAKE_CXX_COMPILER} PATH)
    file(GLOB CXX_LIB_LIST ${CXX_DIR}/*.dll)

    string(REPLACE "\\" "/" SystemRoot $ENV{SystemRoot})
    file(GLOB VC_LIB_LIST ${SystemRoot}/System32/msvcp140.dll ${SystemRoot}/System32/vcomp140.dll)

    file(GLOB JPEG_LIB_LIST ${jpeg_turbo_LIBPATH}/*.dll)
    file(GLOB SQLITE_LIB_LIST ${sqlite_LIBPATH}/*.dll)
    install(
        FILES ${CXX_LIB_LIST} ${JPEG_LIB_LIST} ${SQLITE_LIB_LIST}
        FILES ${CXX_LIB_LIST} ${JPEG_LIB_LIST} ${SQLITE_LIB_LIST} ${VC_LIB_LIST}
        DESTINATION ${INSTALL_LIB_DIR}
        COMPONENT mindspore
    )
--- a/docker/README.md
+++ b/docker/README.md
@@ -4,14 +4,13 @@ This folder hosts all the `Dockerfile` to build MindSpore container images with

 ### MindSpore docker build command

 * CPU
 | Hardware Platform | Version | Build Command |
 | :---------------- | :------ | :------------ |
 | CPU | `x.y.z` | cd mindspore-cpu/x.y.z && docker build . -t mindspore/mindspore-cpu:x.y.z |
 |  | `devel` | cd mindspore-cpu/devel && docker build . -t mindspore/mindspore-cpu:devel |
 |  | `runtime` | cd mindspore-cpu/runtime && docker build . -t mindspore/mindspore-cpu:runtime |
 | GPU | `x.y.z` | cd mindspore-gpu/x.y.z  && docker build . -t mindspore/mindspore-gpu:x.y.z  |
 |  | `devel` | cd mindspore-gpu/devel && docker build . -t mindspore/mindspore-gpu:devel |
 |  | `runtime` | cd mindspore-gpu/runtime && docker build . -t mindspore/mindspore-gpu:runtime |

    ```
    cd mindspore-cpu/0.1.0-alpha && docker build . -t mindspore/mindspore-cpu:0.1.0-alpha
    ```

 * GPU

    ```
    cd mindspore-gpu/0.1.0-alpha && docker build . -t mindspore/mindspore-gpu:0.1.0-alpha
    ```
 > **NOTICE:** The `x.y.z` version shown above should be replaced with the real version number.
--- a/docker/mindspore-cpu/0.2.0-alpha/Dockerfile
+++ b/docker/mindspore-cpu/0.2.0-alpha/Dockerfile
@@ -0,0 +1,67 @@
 FROM ubuntu:18.04

 MAINTAINER leonwanghui <leon.wanghui@huawei.com>

 # Set env
 ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5
 ENV PATH /usr/local/bin:$PATH

 # Install base tools
 RUN apt update \
    && DEBIAN_FRONTEND=noninteractive apt install -y \
    vim \
    wget \
    curl \
    xz-utils \
    net-tools \
    openssh-client \
    git \
    ntpdate \
    tzdata \
    tcl \
    sudo \
    bash-completion

 # Install compile tools
 RUN DEBIAN_FRONTEND=noninteractive apt install -y \
    gcc \
    g++ \
    zlibc \
    make \
    libgmp-dev \
    patch \
    autoconf \
    libtool \
    automake \
    flex

 # Set bash
 RUN echo "dash dash/sh boolean false" | debconf-set-selections
 RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash

 # Install python (v3.7.5)
 RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
    libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \
    && cd /tmp \
    && wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \
    && tar -xvf v3.7.5.tar.gz \
    && cd /tmp/cpython-3.7.5 \
    && mkdir -p ${PYTHON_ROOT_PATH} \
    && ./configure --prefix=${PYTHON_ROOT_PATH} \
    && make -j4 \
    && make install -j4 \
    && rm -f /usr/local/bin/python \
    && rm -f /usr/local/bin/pip \
    && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \
    && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \
    && rm -rf /tmp/cpython-3.7.5 \
    && rm -f /tmp/v3.7.5.tar.gz

 # Set pip source
 RUN mkdir -pv /root/.pip \
    && echo "[global]" > /root/.pip/pip.conf \
    && echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \
    && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf

 # Install MindSpore cpu whl package
 RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/cpu/x86_ubuntu/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl
--- a/docker/mindspore-gpu/0.2.0-alpha/Dockerfile
+++ b/docker/mindspore-gpu/0.2.0-alpha/Dockerfile
@@ -0,0 +1,83 @@
 FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04

 MAINTAINER leonwanghui <leon.wanghui@huawei.com>

 # Set env
 ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5
 ENV OMPI_ROOT_PATH /usr/local/openmpi-3.1.5
 ENV PATH ${OMPI_ROOT_PATH}/bin:/usr/local/bin:$PATH
 ENV LD_LIBRARY_PATH ${OMPI_ROOT_PATH}/lib:$LD_LIBRARY_PATH

 # Install base tools
 RUN apt update \
    && DEBIAN_FRONTEND=noninteractive apt install -y \
    vim \
    wget \
    curl \
    xz-utils \
    net-tools \
    openssh-client \
    git \
    ntpdate \
    tzdata \
    tcl \
    sudo \
    bash-completion

 # Install compile tools
 RUN DEBIAN_FRONTEND=noninteractive apt install -y \
    gcc \
    g++ \
    zlibc \
    make \
    libgmp-dev \
    patch \
    autoconf \
    libtool \
    automake \
    flex \
    libnccl2=2.4.8-1+cuda10.1 \
    libnccl-dev=2.4.8-1+cuda10.1

 # Set bash
 RUN echo "dash dash/sh boolean false" | debconf-set-selections
 RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash

 # Install python (v3.7.5)
 RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
    libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \
    && cd /tmp \
    && wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \
    && tar -xvf v3.7.5.tar.gz \
    && cd /tmp/cpython-3.7.5 \
    && mkdir -p ${PYTHON_ROOT_PATH} \
    && ./configure --prefix=${PYTHON_ROOT_PATH} \
    && make -j4 \
    && make install -j4 \
    && rm -f /usr/local/bin/python \
    && rm -f /usr/local/bin/pip \
    && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \
    && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \
    && rm -rf /tmp/cpython-3.7.5 \
    && rm -f /tmp/v3.7.5.tar.gz

 # Set pip source
 RUN mkdir -pv /root/.pip \
    && echo "[global]" > /root/.pip/pip.conf \
    && echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \
    && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf

 # Install openmpi (v3.1.5)
 RUN cd /tmp \
    && wget https://download.open-mpi.org/release/open-mpi/v3.1/openmpi-3.1.5.tar.gz \
    && tar -xvf openmpi-3.1.5.tar.gz \
    && cd /tmp/openmpi-3.1.5 \
    && mkdir -p ${OMPI_ROOT_PATH} \
    && ./configure --prefix=${OMPI_ROOT_PATH} \
    && make -j4 \
    && make install -j4 \
    && rm -rf /tmp/openmpi-3.1.5 \
    && rm -f /tmp/openmpi-3.1.5.tar.gz

 # Install MindSpore cuda-10.1 whl package
 RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/gpu/cuda-10.1/mindspore_gpu-0.2.0-cp37-cp37m-linux_x86_64.whl
--- a/example/Bert_NEZHA_cnwiki/config.py
+++ b/example/Bert_NEZHA_cnwiki/config.py
@@ -1,57 +0,0 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================

 """
 network config setting, will be used in train.py
 """

 from easydict import EasyDict as edict
 import mindspore.common.dtype as mstype
 from mindspore.model_zoo.Bert_NEZHA import BertConfig
 bert_train_cfg = edict({
    'epoch_size': 10,
    'num_warmup_steps': 0,
    'start_learning_rate': 1e-4,
    'end_learning_rate': 0.0,
    'decay_steps': 1000,
    'power': 10.0,
    'save_checkpoint_steps': 2000,
    'keep_checkpoint_max': 10,
    'checkpoint_prefix': "checkpoint_bert",
    # please add your own dataset path
    'DATA_DIR': "/your/path/examples.tfrecord",
    # please add your own dataset schema path
    'SCHEMA_DIR': "/your/path/datasetSchema.json"
 })
 bert_net_cfg = BertConfig(
    batch_size=16,
    seq_length=128,
    vocab_size=21136,
    hidden_size=1024,
    num_hidden_layers=24,
    num_attention_heads=16,
    intermediate_size=4096,
    hidden_act="gelu",
    hidden_dropout_prob=0.0,
    attention_probs_dropout_prob=0.0,
    max_position_embeddings=512,
    type_vocab_size=2,
    initializer_range=0.02,
    use_relative_positions=True,
    input_mask_from_dataset=True,
    token_type_ids_from_dataset=True,
    dtype=mstype.float32,
    compute_type=mstype.float16,
 )
--- a/example/Bert_NEZHA_cnwiki/train.py
+++ b/example/Bert_NEZHA_cnwiki/train.py
@@ -1,96 +0,0 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================

 """
 NEZHA (NEural contextualiZed representation for CHinese lAnguage understanding) is the Chinese pretrained language
 model currently based on BERT developed by Huawei.
 1. Prepare data
 Following the data preparation as in BERT, run command as below to get dataset for training:
    python ./create_pretraining_data.py \
      --input_file=./sample_text.txt \
      --output_file=./examples.tfrecord \
      --vocab_file=./your/path/vocab.txt \
      --do_lower_case=True \
      --max_seq_length=128 \
      --max_predictions_per_seq=20 \
      --masked_lm_prob=0.15 \
      --random_seed=12345 \
      --dupe_factor=5
 2. Pretrain
 First, prepare the distributed training environment, then adjust configurations in config.py, finally run train.py.
 """

 import os
 import numpy as np
 from config import bert_train_cfg, bert_net_cfg
 import mindspore.dataset.engine.datasets as de
 import mindspore.dataset.transforms.c_transforms as C
 from mindspore import context
 from mindspore.common.tensor import Tensor
 import mindspore.common.dtype as mstype
 from mindspore.train.model import Model
 from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor
 from mindspore.model_zoo.Bert_NEZHA import BertNetworkWithLoss, BertTrainOneStepCell
 from mindspore.nn.optim import Lamb
 _current_dir = os.path.dirname(os.path.realpath(__file__))

 def create_train_dataset(batch_size):
    """create train dataset"""
    # apply repeat operations
    repeat_count = bert_train_cfg.epoch_size
    ds = de.TFRecordDataset([bert_train_cfg.DATA_DIR], bert_train_cfg.SCHEMA_DIR,
                            columns_list=["input_ids", "input_mask", "segment_ids", "next_sentence_labels",
                                          "masked_lm_positions", "masked_lm_ids", "masked_lm_weights"])
    type_cast_op = C.TypeCast(mstype.int32)
    ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op)
    ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op)
    ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op)
    ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
    ds = ds.map(input_columns="input_mask", operations=type_cast_op)
    ds = ds.map(input_columns="input_ids", operations=type_cast_op)
    # apply batch operations
    ds = ds.batch(batch_size, drop_remainder=True)
    ds = ds.repeat(repeat_count)
    return ds

 def weight_variable(shape):
    """weight variable"""
    np.random.seed(1)
    ones = np.random.uniform(-0.1, 0.1, size=shape).astype(np.float32)
    return Tensor(ones)

 def train_bert():
    """train bert"""
    context.set_context(mode=context.GRAPH_MODE)
    context.set_context(device_target="Ascend")
    context.set_context(enable_task_sink=True)
    context.set_context(enable_loop_sink=True)
    context.set_context(enable_mem_reuse=True)
    ds = create_train_dataset(bert_net_cfg.batch_size)
    netwithloss = BertNetworkWithLoss(bert_net_cfg, True)
    optimizer = Lamb(netwithloss.trainable_params(), decay_steps=bert_train_cfg.decay_steps,
                     start_learning_rate=bert_train_cfg.start_learning_rate,
                     end_learning_rate=bert_train_cfg.end_learning_rate, power=bert_train_cfg.power,
                     warmup_steps=bert_train_cfg.num_warmup_steps, decay_filter=lambda x: False)
    netwithgrads = BertTrainOneStepCell(netwithloss, optimizer=optimizer)
    netwithgrads.set_train(True)
    model = Model(netwithgrads)
    config_ck = CheckpointConfig(save_checkpoint_steps=bert_train_cfg.save_checkpoint_steps,
                                 keep_checkpoint_max=bert_train_cfg.keep_checkpoint_max)
    ckpoint_cb = ModelCheckpoint(prefix=bert_train_cfg.checkpoint_prefix, config=config_ck)
    model.train(ds.get_repeat_count(), ds, callbacks=[LossMonitor(), ckpoint_cb], dataset_sink_mode=False)

 if __name__ == '__main__':
    train_bert()
--- a/example/alexnet_cifar10/README.md
+++ b/example/alexnet_cifar10/README.md
@@ -25,7 +25,7 @@ This is the simple tutorial for training AlexNet in MindSpore.
 python train.py --data_path cifar-10-batches-bin
 ```

 You can get loss with each step similar to this:
 You will get the loss value of each step as following:

 ```bash
 epoch: 1 step: 1, loss is 2.2791853
@@ -36,17 +36,16 @@ epoch: 1 step: 1538, loss is 1.0221305
 ...
 ```

 Then, test AlexNet according to network model
 Then, evaluate AlexNet according to network model
 ```python
 # test AlexNet, 1 epoch training accuracy is up to 51.1%; 10 epoch training accuracy is up to 81.2%
 # evaluate AlexNet, 1 epoch training accuracy is up to 51.1%; 10 epoch training accuracy is up to 81.2%
 python eval.py --data_path cifar-10-verify-bin --mode test --ckpt_path checkpoint_alexnet-1_1562.ckpt
 ```

 ## Note
 There are some optional arguments:
 Here are some optional parameters:

 ```bash
 -h, --help           show this help message and exit
 --device_target {Ascend,GPU}
                     device where the code will be implemented (default: Ascend)
 --data_path DATA_PATH
--- a/example/bert_clue/README.md
+++ b/example/bert_clue/README.md
@@ -0,0 +1,149 @@
 # BERT Example
 ## Description
 This example implements pre-training, fine-tuning and evaluation of [BERT-base](https://github.com/google-research/bert)(the base version of BERT model) and [BERT-NEZHA](https://github.com/huawei-noah/Pretrained-Language-Model)(a Chinese pretrained language model developed by Huawei, which introduced a improvement of Functional Relative Positional Encoding as an effective positional encoding scheme).

 ## Requirements
 - Install [MindSpore](https://www.mindspore.cn/install/en).
 - Download the zhwiki dataset from <https://dumps.wikimedia.org/zhwiki> for pre-training. Extract and clean text in the dataset with [WikiExtractor](https://github.com/attardi/wil
 kiextractor). Convert the dataset to TFRecord format and move the files to a specified path.
 - Download the CLUE dataset from <https://www.cluebenchmarks.com> for fine-tuning and evaluation.
 >  Notes:
   If you are running a fine-tuning or evaluation task, prepare the corresponding checkpoint file.

 ## Running the Example
 ### Pre-Training
 - Set options in `config.py`, including lossscale, optimizer and network. Click [here](https://www.mindspore.cn/tutorial/zh-CN/master/use/data_preparation/loading_the_datasets.html#tfrecord) for more information about dataset and the json schema file.

 - Run `run_standalone_pretrain.sh` for non-distributed pre-training of BERT-base and BERT-NEZHA model.

    ``` bash   
    sh run_standalone_pretrain.sh DEVICE_ID EPOCH_SIZE DATA_DIR SCHEMA_DIR MINDSPORE_PATH
    ```
 - Run `run_distribute_pretrain.sh` for distributed pre-training of BERT-base and BERT-NEZHA model.

    ``` bash   
    sh run_distribute_pretrain.sh DEVICE_NUM EPOCH_SIZE DATA_DIR SCHEMA_DIR MINDSPORE_HCCL_CONFIG_PATH MINDSPORE_PATH
    ```  

 ### Fine-Tuning
 - Set options in `finetune_config.py`. Make sure the 'data_file', 'schema_file' and 'ckpt_file' are set to your own path, set the 'pre_training_ckpt' to save the checkpoint files generated.

 - Run `finetune.py` for fine-tuning of BERT-base and BERT-NEZHA model.

    ```bash
    python finetune.py --backend=ms
    ```

 ### Evaluation
 - Set options in `evaluation_config.py`. Make sure the 'data_file', 'schema_file' and 'finetune_ckpt' are set to your own path.

 - Run `evaluation.py` for evaluation of BERT-base and BERT-NEZHA model.

    ```bash
    python evaluation.py --backend=ms
    ```

 ## Usage
 ### Pre-Training
 ``` 
 usage: run_pretrain.py  [--distribute DISTRIBUTE] [--epoch_size N] [----device_num N] [--device_id N] 
                        [--enable_task_sink ENABLE_TASK_SINK] [--enable_loop_sink ENABLE_LOOP_SINK]
                        [--enable_mem_reuse ENABLE_MEM_REUSE] [--enable_save_ckpt ENABLE_SAVE_CKPT]
                        [--enable_lossscale ENABLE_LOSSSCALE] [--do_shuffle DO_SHUFFLE]
                        [--enable_data_sink ENABLE_DATA_SINK] [--data_sink_steps N] [--checkpoint_path CHECKPOINT_PATH]
                        [--save_checkpoint_steps N] [--save_checkpoint_num N] 
                        [--data_dir DATA_DIR] [--schema_dir SCHEMA_DIR]

 options:
    --distribute               pre_training by serveral devices: "true"(training by more than 1 device) | "false", default is "false"
    --epoch_size               epoch size: N, default is 1
    --device_num               number of used devices: N, default is 1
    --device_id                device id: N, default is 0
    --enable_task_sink         enable task sink: "true" | "false", default is "true"
    --enable_loop_sink         enable loop sink: "true" | "false", default is "true"
    --enable_mem_reuse         enable memory reuse: "true" | "false", default is "true"
    --enable_save_ckpt         enable save checkpoint: "true" | "false", default is "true"
    --enable_lossscale         enable lossscale: "true" | "false", default is "true"
    --do_shuffle               enable shuffle: "true" | "false", default is "true"
    --enable_data_sink         enable data sink: "true" | "false", default is "true"
    --data_sink_steps          set data sink steps: N, default is 1
    --checkpoint_path          path to save checkpoint files: PATH, default is ""
    --save_checkpoint_steps    steps for saving checkpoint files: N, default is 1000
    --save_checkpoint_num      number for saving checkpoint files: N, default is 1
    --data_dir                 path to dataset directory: PATH, default is ""
    --schema_dir               path to schema.json file, PATH, default is ""
 ```
 ## Options and Parameters
 It contains of parameters of BERT model and options for training, which is set in file `config.py`, `finetune_config.py` and `evaluation_config.py` respectively.
 ### Options:
 ```
 Pre-Training:
    bert_network                    version of BERT model: base | nezha, default is base
    loss_scale_value                initial value of loss scale: N, default is 2^32
    scale_factor                    factor used to update loss scale: N, default is 2
    scale_window                    steps for once updatation of loss scale: N, default is 1000   
    optimizer                       optimizer used in the network: AdamWerigtDecayDynamicLR | Lamb | Momentum, default is "Lamb"

 Fine-Tuning:
    task                            task type: NER | XNLI | LCQMC | SENTI
    data_file                       dataset file to load: PATH, default is "/your/path/cn-wiki-128"
    schema_file                     dataset schema file to load: PATH, default is "/your/path/datasetSchema.json"
    epoch_num                       repeat counts of training: N, default is 40
    ckpt_prefix                     prefix used to save checkpoint files: PREFIX, default is "bert"
    ckpt_dir                        path to save checkpoint files: PATH, default is None
    pre_training_ckpt               checkpoint file to load: PATH, default is "/your/path/pre_training.ckpt"
    optimizer                       optimizer used in the network: AdamWeigtDecayDynamicLR | Lamb | Momentum, default is "Lamb"

 Evaluation:
    task                            task type: NER | XNLI | LCQMC | SENTI
    data_file                       dataset file to load: PATH, default is "/your/path/evaluation.tfrecord"
    schema_file                     dataset schema file to load: PATH, default is "/your/path/schema.json"
    finetune_ckpt                   checkpoint file to load: PATH, default is "/your/path/your.ckpt"
 ```

 ### Parameters:
 ```
 Parameters for dataset and network (Pre-Training/Fine-Tuning/Evaluation):
    batch_size                      batch size of input dataset: N, default is 16
    seq_length                      length of input sequence: N, default is 128
    vocab_size                      size of each embedding vector: N, default is 21136
    hidden_size                     size of bert encoder layers: N, default is 768
    num_hidden_layers               number of hidden layers: N, default is 12
    num_attention_heads             number of attention heads: N, default is 12
    intermediate_size               size of intermediate layer: N, default is 3072
    hidden_act                      activation function used: ACTIVATION, default is "gelu"
    hidden_dropout_prob             dropout probability for BertOutput: Q, default is 0.1
    attention_probs_dropout_prob    dropout probability for BertAttention: Q, default is 0.1
    max_position_embeddings         maximum length of sequences: N, default is 512
    type_vocab_size                 size of token type vocab: N, default is 16
    initializer_range               initialization value of TruncatedNormal: Q, default is 0.02
    use_relative_positions          use relative positions or not: True | False, default is False
    input_mask_from_dataset         use the input mask loaded form dataset or not: True | False, default is True
    token_type_ids_from_dataset     use the token type ids loaded from dataset or not: True | False, default is True
    dtype                           data type of input: mstype.float16 | mstype.float32, default is mstype.float32
    compute_type                    compute type in BertTransformer: mstype.float16 | mstype.float32, default is mstype.float16

 Parameters for optimizer:
    AdamWeightDecayDynamicLR:
    decay_steps                     steps of the learning rate decay: N, default is 12276*3
    learning_rate                   value of learning rate: Q, default is 1e-5
    end_learning_rate               value of end learning rate: Q, default is 0.0
    power                           power: Q, default is 10.0
    warmup_steps                    steps of the learning rate warm up: N, default is 2100
    weight_decay                    weight decay: Q, default is 1e-5
    eps                             term added to the denominator to improve numerical stability: Q, default is 1e-6

    Lamb:
    decay_steps                     steps of the learning rate decay: N, default is 12276*3
    learning_rate                   value of learning rate: Q, default is 1e-5
    end_learning_rate               value of end learning rate: Q, default is 0.0
    power                           power: Q, default is 5.0
    warmup_steps                    steps of the learning rate warm up: N, default is 2100
    weight_decay                    weight decay: Q, default is 1e-5
    decay_filter                    function to determine whether to apply weight decay on parameters: FUNCTION, default is lambda x: False

    Momentum:
    learning_rate                   value of learning rate: Q, default is 2e-5
    momentum                        momentum for the moving average: Q, default is 0.9
 ```

--- a/example/bert_clue/config.py
+++ b/example/bert_clue/config.py
@@ -0,0 +1,95 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """
 network config setting, will be used in dataset.py, run_pretrain.py
 """
 from easydict import EasyDict as edict
 import mindspore.common.dtype as mstype
 from mindspore.model_zoo.Bert_NEZHA import BertConfig
 cfg = edict({
    'bert_network': 'base',
    'loss_scale_value': 2**32,
    'scale_factor': 2,
    'scale_window': 1000,
    'optimizer': 'Lamb',
    'AdamWeightDecayDynamicLR': edict({
        'learning_rate': 3e-5,
        'end_learning_rate': 1e-7,
        'power': 5.0,
        'weight_decay': 1e-5,
        'eps': 1e-6,
    }),
    'Lamb': edict({
        'start_learning_rate': 3e-5,
        'end_learning_rate': 1e-7,
        'power': 10.0,
        'warmup_steps': 10000,
        'weight_decay': 0.01,
        'eps': 1e-6,
    }),
    'Momentum': edict({
        'learning_rate': 2e-5,
        'momentum': 0.9,
    }),
 })

 '''
 Including two kinds of network: \
 base: Goole BERT-base(the base version of BERT model).
 large: BERT-NEZHA(a Chinese pretrained language model developed by Huawei, which introduced a improvement of \
       Functional Relative Posetional Encoding as an effective positional encoding scheme).
 '''
 if cfg.bert_network == 'base':
    bert_net_cfg = BertConfig(
        batch_size=32,
        seq_length=128,
        vocab_size=21128,
        hidden_size=768,
        num_hidden_layers=12,
        num_attention_heads=12,
        intermediate_size=3072,
        hidden_act="gelu",
        hidden_dropout_prob=0.1,
        attention_probs_dropout_prob=0.1,
        max_position_embeddings=512,
        type_vocab_size=2,
        initializer_range=0.02,
        use_relative_positions=False,
        input_mask_from_dataset=True,
        token_type_ids_from_dataset=True,
        dtype=mstype.float32,
        compute_type=mstype.float16,
    )
 if cfg.bert_network == 'nezha':
    bert_net_cfg = BertConfig(
        batch_size=32,
        seq_length=128,
        vocab_size=21128,
        hidden_size=1024,
        num_hidden_layers=24,
        num_attention_heads=16,
        intermediate_size=4096,
        hidden_act="gelu",
        hidden_dropout_prob=0.1,
        attention_probs_dropout_prob=0.1,
        max_position_embeddings=512,
        type_vocab_size=2,
        initializer_range=0.02,
        use_relative_positions=True,
        input_mask_from_dataset=True,
        token_type_ids_from_dataset=True,
        dtype=mstype.float32,
        compute_type=mstype.float16,
    )
--- a/example/bert_clue/dataset.py
+++ b/example/bert_clue/dataset.py
@@ -0,0 +1,58 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """
 Data operations, will be used in run_pretrain.py
 """
 import os
 import mindspore.common.dtype as mstype
 import mindspore.dataset.engine.datasets as de
 import mindspore.dataset.transforms.c_transforms as C
 from mindspore import log as logger
 from config import bert_net_cfg


 def create_bert_dataset(epoch_size=1, device_num=1, rank=0, do_shuffle="true", enable_data_sink="true",
                        data_sink_steps=1, data_dir=None, schema_dir=None):
    """create train dataset"""
    # apply repeat operations
    repeat_count = epoch_size
    files = os.listdir(data_dir)
    data_files = []
    for file_name in files:
        data_files.append(os.path.join(data_dir, file_name))
    ds = de.TFRecordDataset(data_files, schema_dir,
                            columns_list=["input_ids", "input_mask", "segment_ids", "next_sentence_labels",
                                          "masked_lm_positions", "masked_lm_ids", "masked_lm_weights"],
                            shuffle=(do_shuffle == "true"), num_shards=device_num, shard_id=rank,
                            shard_equal_rows=True)
    ori_dataset_size = ds.get_dataset_size()
    new_size = ori_dataset_size
    if enable_data_sink == "true":
        new_size = data_sink_steps * bert_net_cfg.batch_size
    ds.set_dataset_size(new_size)
    repeat_count = int(repeat_count * ori_dataset_size // ds.get_dataset_size())
    type_cast_op = C.TypeCast(mstype.int32)
    ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op)
    ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op)
    ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op)
    ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
    ds = ds.map(input_columns="input_mask", operations=type_cast_op)
    ds = ds.map(input_columns="input_ids", operations=type_cast_op)
    # apply batch operations
    ds = ds.batch(bert_net_cfg.batch_size, drop_remainder=True)
    ds = ds.repeat(repeat_count)
    logger.info("data size: {}".format(ds.get_dataset_size()))
    logger.info("repeatcount: {}".format(ds.get_repeat_count()))
    return ds
--- a/example/bert_clue/run_distribute_pretrain.sh
+++ b/example/bert_clue/run_distribute_pretrain.sh
@@ -0,0 +1,64 @@
 #!/bin/bash
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================

 echo "=============================================================================================================="
 echo "Please run the scipt as: "
 echo "sh run_distribute_pretrain.sh DEVICE_NUM EPOCH_SIZE DATA_DIR SCHEMA_DIR MINDSPORE_HCCL_CONFIG_PATH"
 echo "for example: sh run_distribute_pretrain.sh 8 40 /path/zh-wiki/ /path/Schema.json /path/hccl.json"
 echo "It is better to use absolute path."
 echo "=============================================================================================================="

 EPOCH_SIZE=$2
 DATA_DIR=$3
 SCHEMA_DIR=$4

 export MINDSPORE_HCCL_CONFIG_PATH=$5
 export RANK_SIZE=$1

 for((i=0;i<RANK_SIZE;i++))
 do
    export DEVICE_ID=$i
    start=`expr $i \* 12`
    end=`expr $start \+ 11`
    cmdopt=$start"-"$end

    rm -rf LOG$i
    mkdir ./LOG$i
    cp  *.py ./LOG$i
    cd ./LOG$i || exit
    export RANK_ID=$i
    echo "start training for rank $i, device $DEVICE_ID"
    env > env.log
    taskset -c $cmdopt python ../run_pretrain.py  \
    --distribute="true" \
    --epoch_size=$EPOCH_SIZE \
    --device_id=$DEVICE_ID \
    --device_num=$RANK_SIZE \
    --enable_task_sink="true" \
    --enable_loop_sink="true" \
    --enable_mem_reuse="true" \
    --enable_save_ckpt="true" \
    --enable_lossscale="true" \
    --do_shuffle="true" \
    --enable_data_sink="true" \
    --data_sink_steps=1 \
    --checkpoint_path="" \
    --save_checkpoint_steps=1000 \
    --save_checkpoint_num=1 \
    --data_dir=$DATA_DIR \
    --schema_dir=$SCHEMA_DIR > log.txt 2>&1 &
    cd ../
 done
--- a/example/bert_clue/run_pretrain.py
+++ b/example/bert_clue/run_pretrain.py
@@ -0,0 +1,144 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """
 #################pre_train bert example on zh-wiki########################
 python run_pretrain.py
 """

 import os
 import argparse
 import mindspore.communication.management as D
 from mindspore import context
 from mindspore.train.model import Model
 from mindspore.train.parallel_utils import ParallelMode
 from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell
 from mindspore.train.callback import Callback, ModelCheckpoint, CheckpointConfig
 from mindspore.model_zoo.Bert_NEZHA import BertNetworkWithLoss, BertTrainOneStepCell, BertTrainOneStepWithLossScaleCell
 from mindspore.nn.optim import Lamb, Momentum, AdamWeightDecayDynamicLR
 from dataset import create_bert_dataset
 from config import cfg, bert_net_cfg
 _current_dir = os.path.dirname(os.path.realpath(__file__))

 class LossCallBack(Callback):
    """
    Monitor the loss in training.
    If the loss in NAN or INF terminating training.
    Note:
        if per_print_times is 0 do not print loss.
    Args:
        per_print_times (int): Print loss every times. Default: 1.
    """
    def __init__(self, per_print_times=1):
        super(LossCallBack, self).__init__()
        if not isinstance(per_print_times, int) or per_print_times < 0:
            raise ValueError("print_step must be int and >= 0")
        self._per_print_times = per_print_times
    def step_end(self, run_context):
        cb_params = run_context.original_args()
        with open("./loss.log", "a+") as f:
            f.write("epoch: {}, step: {}, outputs are {}".format(cb_params.cur_epoch_num, cb_params.cur_step_num,
                                                                 str(cb_params.net_outputs)))
            f.write('\n')

 def run_pretrain():
    """pre-train bert_clue"""
    parser = argparse.ArgumentParser(description='bert pre_training')
    parser.add_argument("--distribute", type=str, default="false", help="Run distribute, default is false.")
    parser.add_argument("--epoch_size", type=int, default="1", help="Epoch size, default is 1.")
    parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
    parser.add_argument("--device_num", type=int, default=1, help="Use device nums, default is 1.")
    parser.add_argument("--enable_task_sink", type=str, default="true", help="Enable task sink, default is true.")
    parser.add_argument("--enable_loop_sink", type=str, default="true", help="Enable loop sink, default is true.")
    parser.add_argument("--enable_mem_reuse", type=str, default="true", help="Enable mem reuse, default is true.")
    parser.add_argument("--enable_save_ckpt", type=str, default="true", help="Enable save checkpoint, default is true.")
    parser.add_argument("--enable_lossscale", type=str, default="true", help="Use lossscale or not, default is not.")
    parser.add_argument("--do_shuffle", type=str, default="true", help="Enable shuffle for dataset, default is true.")
    parser.add_argument("--enable_data_sink", type=str, default="true", help="Enable data sink, default is true.")
    parser.add_argument("--data_sink_steps", type=int, default="1", help="Sink steps for each epoch, default is 1.")
    parser.add_argument("--checkpoint_path", type=str, default="", help="Checkpoint file path")
    parser.add_argument("--save_checkpoint_steps", type=int, default=1000, help="Save checkpoint steps, "
                                                                                "default is 1000.")
    parser.add_argument("--save_checkpoint_num", type=int, default=1, help="Save checkpoint numbers, default is 1.")
    parser.add_argument("--data_dir", type=str, default="", help="Data path, it is better to use absolute path")
    parser.add_argument("--schema_dir", type=str, default="", help="Schema path, it is better to use absolute path")

    args_opt = parser.parse_args()
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id)
    context.set_context(enable_task_sink=(args_opt.enable_task_sink == "true"),
                        enable_loop_sink=(args_opt.enable_loop_sink == "true"),
                        enable_mem_reuse=(args_opt.enable_mem_reuse == "true"))
    context.set_context(reserve_class_name_in_scope=False)

    if args_opt.distribute == "true":
        device_num = args_opt.device_num
        context.reset_auto_parallel_context()
        context.set_context(enable_hccl=True)
        context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True,
                                          device_num=device_num)
        D.init()
        rank = args_opt.device_id % device_num
    else:
        context.set_context(enable_hccl=False)
        rank = 0
        device_num = 1

    ds = create_bert_dataset(args_opt.epoch_size, device_num, rank, args_opt.do_shuffle, args_opt.enable_data_sink,
                             args_opt.data_sink_steps, args_opt.data_dir, args_opt.schema_dir)

    netwithloss = BertNetworkWithLoss(bert_net_cfg, True)

    if cfg.optimizer == 'Lamb':
        optimizer = Lamb(netwithloss.trainable_params(), decay_steps=ds.get_dataset_size() * ds.get_repeat_count(),
                         start_learning_rate=cfg.Lamb.start_learning_rate, end_learning_rate=cfg.Lamb.end_learning_rate,
                         power=cfg.Lamb.power, warmup_steps=cfg.Lamb.warmup_steps, weight_decay=cfg.Lamb.weight_decay,
                         eps=cfg.Lamb.eps, decay_filter=cfg.Lamb.decay_filter)
    elif cfg.optimizer == 'Momentum':
        optimizer = Momentum(netwithloss.trainable_params(), learning_rate=cfg.Momentum.learning_rate,
                             momentum=cfg.Momentum.momentum)
    elif cfg.optimizer == 'AdamWeightDecayDynamicLR':
        optimizer = AdamWeightDecayDynamicLR(netwithloss.trainable_params(),
                                             decay_steps=ds.get_dataset_size() * ds.get_repeat_count(),
                                             learning_rate=cfg.AdamWeightDecayDynamicLR.learning_rate,
                                             end_learning_rate=cfg.AdamWeightDecayDynamicLR.end_learning_rate,
                                             power=cfg.AdamWeightDecayDynamicLR.power,
                                             weight_decay=cfg.AdamWeightDecayDynamicLR.weight_decay,
                                             eps=cfg.AdamWeightDecayDynamicLR.eps)
    else:
        raise ValueError("Don't support optimizer {}, only support [Lamb, Momentum, AdamWeightDecayDynamicLR]".
                         format(cfg.optimizer))
    callback = [LossCallBack()]
    if args_opt.enable_save_ckpt == "true":
        config_ck = CheckpointConfig(save_checkpoint_steps=args_opt.save_checkpoint_steps,
                                     keep_checkpoint_max=args_opt.save_checkpoint_num)
        ckpoint_cb = ModelCheckpoint(prefix='checkpoint_bert', config=config_ck)
        callback.append(ckpoint_cb)

    if args_opt.checkpoint_path:
        param_dict = load_checkpoint(args_opt.checkpoint_path)
        load_param_into_net(netwithloss, param_dict)

    if args_opt.enable_lossscale == "true":
        update_cell = DynamicLossScaleUpdateCell(loss_scale_value=cfg.loss_scale_value,
                                                 scale_factor=cfg.scale_factor,
                                                 scale_window=cfg.scale_window)
        netwithgrads = BertTrainOneStepWithLossScaleCell(netwithloss, optimizer=optimizer,
                                                         scale_update_cell=update_cell)
    else:
        netwithgrads = BertTrainOneStepCell(netwithloss, optimizer=optimizer)

    model = Model(netwithgrads)
    model.train(ds.get_repeat_count(), ds, callbacks=callback, dataset_sink_mode=(args_opt.enable_data_sink == "true"))
 if __name__ == '__main__':
    run_pretrain()
--- a/example/bert_clue/run_standalone_pretrain.sh
+++ b/example/bert_clue/run_standalone_pretrain.sh
@@ -0,0 +1,44 @@
 #!/bin/bash
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================

 echo "=============================================================================================================="
 echo "Please run the scipt as: "
 echo "sh run_standalone_pretrain.sh DEVICE_ID EPOCH_SIZE DATA_DIR SCHEMA_DIR"
 echo "for example: sh run_standalone_pretrain.sh 0 40 /path/zh-wiki/ /path/Schema.json"
 echo "=============================================================================================================="

 DEVICE_ID=$1
 EPOCH_SIZE=$2
 DATA_DIR=$3
 SCHEMA_DIR=$4

 python run_pretrain.py  \
    --distribute="false" \
    --epoch_size=$EPOCH_SIZE \
    --device_id=$DEVICE_ID \
    --enable_task_sink="true" \
    --enable_loop_sink="true" \
    --enable_mem_reuse="true" \
    --enable_save_ckpt="true" \
    --enable_lossscale="true" \
    --do_shuffle="true" \
    --enable_data_sink="true" \
    --data_sink_steps=1 \
    --checkpoint_path="" \
    --save_checkpoint_steps=1000 \
    --save_checkpoint_num=1 \
    --data_dir=$DATA_DIR \
    --schema_dir=$SCHEMA_DIR > log.txt 2>&1 &
--- a/example/convert_to_mindrecord/README.md
+++ b/example/convert_to_mindrecord/README.md
@@ -1,46 +0,0 @@
 # MindRecord generating guidelines

 <!-- TOC -->

 - [MindRecord generating guidelines](#mindrecord-generating-guidelines)
    - [Create work space](#create-work-space)
    - [Implement data generator](#implement-data-generator)
    - [Run data generator](#run-data-generator)

 <!-- /TOC -->

 ## Create work space

 Assume the dataset name is 'xyz'
 * Create work space from template
    ```shell
    cd ${your_mindspore_home}/example/convert_to_mindrecord
    cp -r template xyz
    ```

 ## Implement data generator 

 Edit dictionary data generator  
 * Edit file 
    ```shell
    cd ${your_mindspore_home}/example/convert_to_mindrecord
    vi xyz/mr_api.py
    ```

 Two API, 'mindrecord_task_number' and 'mindrecord_dict_data', must be implemented
 - 'mindrecord_task_number()' returns number of tasks. Return 1 if data row is generated serially. Return N if generator can be split into N parallel-run tasks.
 - 'mindrecord_dict_data(task_id)' yields dictionary data row by row. 'task_id' is 0..N-1, if N is return value of mindrecord_task_number()


 Tricky for parallel run
 - For imagenet, one directory can be a task.
 - For TFRecord with multiple files, each file can be a task.
 - For TFRecord with 1 file only, it could also be split into N tasks. Task_id=K means: data row is picked only if (count % N == K) 


 ## Run data generator 
 * run python script 
    ```shell
    cd ${your_mindspore_home}/example/convert_to_mindrecord
    python writer.py --mindrecord_script imagenet [...]
    ```
--- a/example/cv_to_mindrecord/ImageNet_Similar_Perf/README.md
+++ b/example/cv_to_mindrecord/ImageNet_Similar_Perf/README.md
@@ -0,0 +1,95 @@
 # Guideline to Efficiently Generating MindRecord

 <!-- TOC -->

 - [What does the example do](#what-does-the-example-do)
 - [Example test for ImageNet](#example-test-for-imagenet)
 - [How to use the example for other dataset](#how-to-use-the-example-for-other-dataset)
    - [Create work space](#create-work-space)
    - [Implement data generator](#implement-data-generator)
    - [Run data generator](#run-data-generator)


 <!-- /TOC -->

 ## What does the example do

 This example provides an efficient way to generate MindRecord. Users only need to define the parallel granularity of training data reading and the data reading function of a single task. That is, they can efficiently convert the user's training data into MindRecord.

 1.  run_template.sh: entry script, users need to modify parameters according to their own training data.
 2.  writer.py: main script, called by run_template.sh, it mainly reads user training data in parallel and generates MindRecord.
 3.  template/mr_api.py: uers define their own parallel granularity of training data reading and single task reading function through the template.

 ## Example test for ImageNet

 1. Download and prepare the ImageNet dataset as required.

    > [ImageNet dataset download address](http://image-net.org/download)

    Store the downloaded ImageNet dataset in a folder. The folder contains all images and a mapping file that records labels of the images.

    In the mapping file, there are three columns, which are separated by spaces. They indicate image classes, label IDs, and label names. The following is an example of the mapping file:
    ```
    n02119789 1 pen
    n02100735 2 notbook
    n02110185 3 mouse
    n02096294 4 orange
    ```

 2. Edit run_imagenet.sh and modify the parameters
    ```
    --mindrecord_file: output MindRecord file.
    --mindrecord_partitions: the partitions for MindRecord.
    --label_file: ImageNet label map file.
    --image_dir: ImageNet dir which contain sub dir.
    ```

 3. Run the bash script
    ```bash  
    bash run_imagenet.sh
    ```  

 4. Performance result

    |  Training Data |  General API | Current Example |  Env  |
    | ---- | ---- | ---- | ---- |
    |ImageNet(140G)|  2h40m |  50m  |  CPU: Intel Xeon Gold 6130 x 64, Memory: 256G, Storage: HDD |

 ## How to use the example for other dataset

 ### Create work space

 Assume the dataset name is 'xyz'
 * Create work space from template
    ```shell
    cd ${your_mindspore_home}/example/cv_to_mindrecord/ImageNet_Similar_Perf
    cp -r template xyz
    ```

 ### Implement data generator

 Edit dictionary data generator.
 * Edit file 
    ```shell
    cd ${your_mindspore_home}/example/cv_to_mindrecord/ImageNet_Similar_Perf
    vi xyz/mr_api.py
    ```

 Two API, 'mindrecord_task_number' and 'mindrecord_dict_data', must be implemented.
 - 'mindrecord_task_number()' returns number of tasks. Return 1 if data row is generated serially. Return N if generator can be split into N parallel-run tasks.
 - 'mindrecord_dict_data(task_id)' yields dictionary data row by row. 'task_id' is 0..N-1, if N is return value of mindrecord_task_number()

 Tricky for parallel run.
 - For ImageNet, one directory can be a task.
 - For TFRecord with multiple files, each file can be a task.
 - For TFRecord with 1 file only, it could also be split into N tasks. Task_id=K means: data row is picked only if (count % N == K) 

 ### Run data generator

 * run python script 
    ```shell
    cd ${your_mindspore_home}/example/cv_to_mindrecord/ImageNet_Similar_Perf
    python writer.py --mindrecord_script xyz [...]
    ```
    > You can put this command in script **run_xyz.sh** for easy execution

--- a/example/cv_to_mindrecord/ImageNet_Similar_Perf/imagenet/init.py
+++ b/example/cv_to_mindrecord/ImageNet_Similar_Perf/imagenet/init.py
--- a/example/cv_to_mindrecord/ImageNet_Similar_Perf/imagenet/mr_api.py
+++ b/example/cv_to_mindrecord/ImageNet_Similar_Perf/imagenet/mr_api.py
@@ -118,5 +118,8 @@ def mindrecord_dict_data(task_id):
        image_file = open(file_name, "rb")
        image_bytes = image_file.read()
        image_file.close()
        if not image_bytes:
            print("The image file: {} is invalid.".format(file_name))
            continue
        data["data"] = image_bytes
        yield data
--- a/example/cv_to_mindrecord/ImageNet_Similar_Perf/run_imagenet.sh
+++ b/example/cv_to_mindrecord/ImageNet_Similar_Perf/run_imagenet.sh
--- a/example/cv_to_mindrecord/ImageNet_Similar_Perf/run_template.sh
+++ b/example/cv_to_mindrecord/ImageNet_Similar_Perf/run_template.sh
--- a/example/cv_to_mindrecord/ImageNet_Similar_Perf/template/init.py
+++ b/example/cv_to_mindrecord/ImageNet_Similar_Perf/template/init.py
--- a/example/cv_to_mindrecord/ImageNet_Similar_Perf/template/mr_api.py
+++ b/example/cv_to_mindrecord/ImageNet_Similar_Perf/template/mr_api.py
--- a/example/cv_to_mindrecord/ImageNet_Similar_Perf/writer.py
+++ b/example/cv_to_mindrecord/ImageNet_Similar_Perf/writer.py
--- a/example/lenet_mnist/README.md
+++ b/example/lenet_mnist/README.md
@@ -19,8 +19,8 @@ This is the simple and basic tutorial for constructing a network in MindSpore.
    │      t10k-labels.idx1-ubyte
    │
    └─train
            train-images.idx3-ubyte
            train-labels.idx1-ubyte
           train-images.idx3-ubyte
           train-labels.idx1-ubyte
 ```

 ## Running the example
@@ -30,7 +30,7 @@ This is the simple and basic tutorial for constructing a network in MindSpore.
 python train.py --data_path MNIST_Data
 ```

 You can get loss with each step similar to this:
 You will get the loss value of each step as following:

 ```bash
 epoch: 1 step: 1, loss is 2.3040335
@@ -41,17 +41,16 @@ epoch: 1 step: 1741, loss is 0.05018193
 ...
 ```

 Then, test LeNet according to network model
 Then, evaluate LeNet according to network model
 ```python
 # test LeNet, after 1 epoch training, the accuracy is up to 96.5%
 # evaluate LeNet, after 1 epoch training, the accuracy is up to 96.5%
 python eval.py --data_path MNIST_Data --mode test --ckpt_path checkpoint_lenet-1_1875.ckpt
 ```

 ## Note
 There are some optional arguments:
 Here are some optional parameters:

 ```bash
 -h, --help           show this help message and exit
 --device_target {Ascend,GPU,CPU}
                     device where the code will be implemented (default: Ascend)
 --data_path DATA_PATH
--- a/example/mobilenetv2_imagenet2012/README.md
+++ b/example/mobilenetv2_imagenet2012/README.md
@@ -0,0 +1,101 @@
 # MobileNetV2 Example

 ## Description

 This is an example of training MobileNetV2 with ImageNet2012 dataset in MindSpore. 

 ## Requirements

 * Install [MindSpore](https://www.mindspore.cn/install/en). 

 * Download the dataset [ImageNet2012](http://www.image-net.org/). 

 > Unzip the ImageNet2012 dataset to any path you want and the folder structure should be as follows:
 > ```
 > .  
 > ├── train  # train dataset
 > └── val   # infer dataset
 > ```

 ## Example structure

 ``` shell
 .
 ├── config.py               # parameter configuration
 ├── dataset.py              # data preprocessing
 ├── eval.py                 # infer script
 ├── launch.py               # launcher for distributed training
 ├── lr_generator.py         # generate learning rate for each step
 ├── run_infer.sh            # launch infering
 ├── run_train.sh            # launch training
 └── train.py                # train script
 ```

 ## Parameter configuration

 Parameters for both training and inference can be set in 'config.py'. 

 ``` 
 "num_classes": 1000,                    # dataset class num
 "image_height": 224,                    # image height
 "image_width": 224,                     # image width
 "batch_size": 256,                      # training or infering batch size
 "epoch_size": 200,                      # total training epochs, including warmup_epochs
 "warmup_epochs": 4,                     # warmup epochs
 "lr": 0.4,                              # base learning rate
 "momentum": 0.9,                        # momentum
 "weight_decay": 4e-5,                   # weight decay
 "loss_scale": 1024,                     # loss scale
 "save_checkpoint": True,                # whether save checkpoint
 "save_checkpoint_epochs": 1,            # the epoch interval between two checkpoints
 "keep_checkpoint_max": 200,             # only keep the last keep_checkpoint_max checkpoint
 "save_checkpoint_path": "./checkpoint"  # path to save checkpoint
 ```

 ## Running the example

 ### Train

 #### Usage
 Usage: sh run_train.sh [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]

 #### Launch

 ``` 
 # training example
 sh run_train.sh 8 192.168.0.1 0,1,2,3,4,5,6,7 ~/imagenet
 ```

 #### Result

 Training result will be stored in the example path. Checkpoints will be stored at `. /checkpoint` by default, and training log  will be redirected to `./train/train.log` like followings. 

 ``` 
 epoch: [  0/200], step:[  624/  625], loss:[5.258/5.258], time:[140412.236], lr:[0.100]
 epoch time: 140522.500, per step time: 224.836, avg loss: 5.258
 epoch: [  1/200], step:[  624/  625], loss:[3.917/3.917], time:[138221.250], lr:[0.200]
 epoch time: 138331.250, per step time: 221.330, avg loss: 3.917
 ```

 ### Infer

 #### Usage

 Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH]

 #### Launch

 ``` 
 # infer example
 sh run_infer.sh ~/imagenet ~/train/mobilenet-200_625.ckpt
 ```

 > checkpoint can be produced in training process. 

 #### Result

 Inference result will be stored in the example path, you can find result like the followings in `val.log`. 

 ``` 
 result: {'acc': 0.71976314102564111} ckpt=/path/to/checkpoint/mobilenet-200_625.ckpt
 ```
--- a/example/mobilenetv2_imagenet2012/config.py
+++ b/example/mobilenetv2_imagenet2012/config.py
@@ -0,0 +1,36 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """
 network config setting, will be used in train.py and eval.py
 """
 from easydict import EasyDict as ed

 config = ed({
    "num_classes": 1000,
    "image_height": 224,
    "image_width": 224,
    "batch_size": 256,
    "epoch_size": 200,
    "warmup_epochs": 4,
    "lr": 0.4,
    "momentum": 0.9,
    "weight_decay": 4e-5,
    "label_smooth": 0.1,
    "loss_scale": 1024,
    "save_checkpoint": True,
    "save_checkpoint_epochs": 1,
    "keep_checkpoint_max": 200,
    "save_checkpoint_path": "./checkpoint",
 })
--- a/example/mobilenetv2_imagenet2012/dataset.py
+++ b/example/mobilenetv2_imagenet2012/dataset.py
@@ -0,0 +1,84 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """
 create train or eval dataset.
 """
 import os
 import mindspore.common.dtype as mstype
 import mindspore.dataset.engine as de
 import mindspore.dataset.transforms.vision.c_transforms as C
 import mindspore.dataset.transforms.c_transforms as C2
 from config import config


 def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
    """
    create a train or eval dataset

    Args:
        dataset_path(string): the path of dataset.
        do_train(bool): whether dataset is used for train or eval.
        repeat_num(int): the repeat times of dataset. Default: 1
        batch_size(int): the batch size of dataset. Default: 32

    Returns:
        dataset
    """
    rank_size = int(os.getenv("RANK_SIZE"))
    rank_id = int(os.getenv("RANK_ID"))

    if rank_size == 1:
        ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=16, shuffle=True)
    else:
        ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=16, shuffle=True,
                                     num_shards=rank_size, shard_id=rank_id)

    resize_height = config.image_height
    resize_width = config.image_width
    rescale = 1.0 / 255.0
    shift = 0.0
    buffer_size = 1000

    # define map operations
    decode_op = C.Decode()
    resize_crop_op = C.RandomResizedCrop(resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333))
    horizontal_flip_op = C.RandomHorizontalFlip(prob=0.5)

    resize_op = C.Resize((256, 256))
    center_crop = C.CenterCrop(resize_width)
    rescale_op = C.Rescale(rescale, shift)
    normalize_op = C.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    change_swap_op = C.HWC2CHW()

    if do_train:
        trans = [decode_op, resize_crop_op, horizontal_flip_op, rescale_op, normalize_op, change_swap_op]
    else:
        trans = [decode_op, resize_op, center_crop, rescale_op, normalize_op, change_swap_op]

    type_cast_op = C2.TypeCast(mstype.int32)

    ds = ds.map(input_columns="image", operations=trans)
    ds = ds.map(input_columns="label", operations=type_cast_op)

    # apply shuffle operations
    ds = ds.shuffle(buffer_size=buffer_size)

    # apply batch operations
    ds = ds.batch(batch_size, drop_remainder=True)

    # apply dataset repeat operation
    ds = ds.repeat(repeat_num)

    return ds
--- a/example/mobilenetv2_imagenet2012/eval.py
+++ b/example/mobilenetv2_imagenet2012/eval.py
@@ -0,0 +1,54 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """
 eval.
 """
 import os
 import argparse
 from dataset import create_dataset
 from config import config
 from mindspore import context
 from mindspore.model_zoo.mobilenet import mobilenet_v2
 from mindspore.train.model import Model
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
 from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits

 parser = argparse.ArgumentParser(description='Image classification')
 parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path')
 parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
 args_opt = parser.parse_args()

 device_id = int(os.getenv('DEVICE_ID'))

 context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=device_id, save_graphs=False)
 context.set_context(enable_task_sink=True)
 context.set_context(enable_loop_sink=True)
 context.set_context(enable_mem_reuse=True)

 if __name__ == '__main__':
    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')
    net = mobilenet_v2()

    dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=False, batch_size=config.batch_size)
    step_size = dataset.get_dataset_size()

    if args_opt.checkpoint_path:
        param_dict = load_checkpoint(args_opt.checkpoint_path)
        load_param_into_net(net, param_dict)
    net.set_train(False)

    model = Model(net, loss_fn=loss, metrics={'acc'})
    res = model.eval(dataset)
    print("result:", res, "ckpt=", args_opt.checkpoint_path)
--- a/example/mobilenetv2_imagenet2012/launch.py
+++ b/example/mobilenetv2_imagenet2012/launch.py
@@ -0,0 +1,143 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """launch train script"""
 import os
 import sys
 import json
 from argparse import ArgumentParser


 def parse_args():
    """
    parse args .

    Args:

    Returns:
        args.

    Examples:
        >>> parse_args()
    """
    parser = ArgumentParser(description="mindspore distributed training launch "
                                        "helper utilty that will spawn up "
                                        "multiple distributed processes")
    parser.add_argument("--nproc_per_node", type=int, default=1,
                        help="The number of processes to launch on each node, "
                             "for D training, this is recommended to be set "
                             "to the number of D in your system so that "
                             "each process can be bound to a single D.")
    parser.add_argument("--visible_devices", type=str, default="0,1,2,3,4,5,6,7",
                        help="will use the visible devices sequentially")
    parser.add_argument("--server_id", type=str, default="",
                        help="server ip")
    parser.add_argument("--training_script", type=str,
                        help="The full path to the single D training "
                             "program/script to be launched in parallel, "
                             "followed by all the arguments for the "
                             "training script")
    # rest from the training program
    args, unknown = parser.parse_known_args()
    args.training_script_args = unknown
    return args


 def main():
    print("start", __file__)
    args = parse_args()
    print(args)
    visible_devices = args.visible_devices.split(',')
    assert os.path.isfile(args.training_script)
    assert len(visible_devices) >= args.nproc_per_node
    print('visible_devices:{}'.format(visible_devices))
    if not args.server_id:
        print('pleaser input server ip!!!')
        exit(0)
    print('server_id:{}'.format(args.server_id))

    # construct hccn_table
    hccn_configs = open('/etc/hccn.conf', 'r').readlines()
    device_ips = {}
    for hccn_item in hccn_configs:
        hccn_item = hccn_item.strip()
        if hccn_item.startswith('address_'):
            device_id, device_ip = hccn_item.split('=')
            device_id = device_id.split('_')[1]
            device_ips[device_id] = device_ip
            print('device_id:{}, device_ip:{}'.format(device_id, device_ip))
    hccn_table = {}
    hccn_table['board_id'] = '0x0000'
    hccn_table['chip_info'] = '910'
    hccn_table['deploy_mode'] = 'lab'
    hccn_table['group_count'] = '1'
    hccn_table['group_list'] = []
    instance_list = []
    usable_dev = ''
    for instance_id in range(args.nproc_per_node):
        instance = {}
        instance['devices'] = []
        device_id = visible_devices[instance_id]
        device_ip = device_ips[device_id]
        usable_dev += str(device_id)
        instance['devices'].append({
            'device_id': device_id,
            'device_ip': device_ip,
        })
        instance['rank_id'] = str(instance_id)
        instance['server_id'] = args.server_id
        instance_list.append(instance)
    hccn_table['group_list'].append({
        'device_num': str(args.nproc_per_node),
        'server_num': '1',
        'group_name': '',
        'instance_count': str(args.nproc_per_node),
        'instance_list': instance_list,
    })
    hccn_table['para_plane_nic_location'] = 'device'
    hccn_table['para_plane_nic_name'] = []
    for instance_id in range(args.nproc_per_node):
        eth_id = visible_devices[instance_id]
        hccn_table['para_plane_nic_name'].append('eth{}'.format(eth_id))
    hccn_table['para_plane_nic_num'] = str(args.nproc_per_node)
    hccn_table['status'] = 'completed'

    # save hccn_table to file
    table_path = os.getcwd()
    if not os.path.exists(table_path):
        os.mkdir(table_path)
    table_fn = os.path.join(table_path,
                            'rank_table_{}p_{}_{}.json'.format(args.nproc_per_node, usable_dev, args.server_id))
    with open(table_fn, 'w') as table_fp:
        json.dump(hccn_table, table_fp, indent=4)
    sys.stdout.flush()

    # spawn the processes
    for rank_id in range(0, args.nproc_per_node):
        device_id = visible_devices[rank_id]
        device_dir = os.path.join(os.getcwd(), 'device{}'.format(rank_id))
        rank_process = 'export RANK_SIZE={} && export RANK_ID={} && export DEVICE_ID={} && '.format(args.nproc_per_node,
                                                                                                    rank_id, device_id)
        if args.nproc_per_node > 1:
            rank_process += 'export MINDSPORE_HCCL_CONFIG_PATH={} && '.format(table_fn)
            rank_process += 'export RANK_TABLE_FILE={} && '.format(table_fn)
        rank_process += 'rm -rf {dir} && mkdir {dir} && cd {dir} && python {script} '.format(dir=device_dir,
                                                                                             script=args.training_script
                                                                                             )
        rank_process += ' '.join(args.training_script_args) + ' > log{}.log 2>&1 &'.format(rank_id)
        os.system(rank_process)


 if __name__ == "__main__":
    main()
--- a/example/mobilenetv2_imagenet2012/lr_generator.py
+++ b/example/mobilenetv2_imagenet2012/lr_generator.py
@@ -0,0 +1,54 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """learning rate generator"""
 import math
 import numpy as np


 def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch):
    """
    generate learning rate array

    Args:
       global_step(int): total steps of the training
       lr_init(float): init learning rate
       lr_end(float): end learning rate
       lr_max(float): max learning rate
       warmup_epochs(int): number of warmup epochs
       total_epochs(int): total epoch of training
       steps_per_epoch(int): steps of one epoch

    Returns:
       np.array, learning rate array
    """
    lr_each_step = []
    total_steps = steps_per_epoch * total_epochs
    warmup_steps = steps_per_epoch * warmup_epochs
    for i in range(total_steps):
        if i < warmup_steps:
            lr = lr_init + (lr_max - lr_init) * i / warmup_steps
        else:
            lr = lr_end + \
                 (lr_max - lr_end) * \
                 (1. + math.cos(math.pi * (i - warmup_steps) / (total_steps - warmup_steps))) / 2.
        if lr < 0.0:
            lr = 0.0
        lr_each_step.append(lr)

    current_step = global_step
    lr_each_step = np.array(lr_each_step).astype(np.float32)
    learning_rate = lr_each_step[current_step:]

    return learning_rate
--- a/example/mobilenetv2_imagenet2012/run_infer.sh
+++ b/example/mobilenetv2_imagenet2012/run_infer.sh
@@ -0,0 +1,33 @@
 #!/usr/bin/env bash
 if [ $# != 2 ]
 then
    echo "Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH]"
 exit 1
 fi

 if [ ! -d $1 ]
 then
    echo "error: DATASET_PATH=$1 is not a directory"
 exit 1
 fi

 if [ ! -f $2 ]
 then
    echo "error: CHECKPOINT_PATH=$2 is not a file"
 exit 1
 fi

 BASEPATH=$(cd "`dirname $0`" || exit; pwd)
 export PYTHONPATH=${BASEPATH}:$PYTHONPATH
 export DEVICE_ID=0
 export RANK_ID=0
 export RANK_SIZE=1
 if [ -d "eval" ];
 then
    rm -rf ./eval
 fi
 mkdir ./eval
 cd ./eval || exit
 python ${BASEPATH}/eval.py \
        --checkpoint_path=$2 \
        --dataset_path=$1 &> infer.log &  # dataset val folder path
--- a/example/mobilenetv2_imagenet2012/run_train.sh
+++ b/example/mobilenetv2_imagenet2012/run_train.sh
@@ -0,0 +1,33 @@
 #!/usr/bin/env bash
 if [ $# != 4 ]
 then
    echo "Usage: sh run_train.sh [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]"
 exit 1
 fi

 if [ $1 -lt 1 ] && [ $1 -gt 8 ]
 then
    echo "error: DEVICE_NUM=$1 is not in (1-8)"
 exit 1
 fi

 if [ ! -d $4 ]
 then
    echo "error: DATASET_PATH=$4 is not a directory"
 exit 1
 fi

 BASEPATH=$(cd "`dirname $0`" || exit; pwd)
 export PYTHONPATH=${BASEPATH}:$PYTHONPATH
 if [ -d "train" ];
 then
    rm -rf ./train
 fi
 mkdir ./train
 cd ./train || exit
 python ${BASEPATH}/launch.py \
        --nproc_per_node=$1 \
        --visible_devices=$3 \
        --server_id=$2 \
        --training_script=${BASEPATH}/train.py \
        --dataset_path=$4 &> train.log &  # dataset train folder
--- a/example/mobilenetv2_imagenet2012/train.py
+++ b/example/mobilenetv2_imagenet2012/train.py
@@ -0,0 +1,186 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """train_imagenet."""
 import os
 import time
 import argparse
 import random
 import numpy as np
 from dataset import create_dataset
 from lr_generator import get_lr
 from config import config
 from mindspore import context
 from mindspore import Tensor
 from mindspore import nn
 from mindspore.model_zoo.mobilenet import mobilenet_v2
 from mindspore.parallel._auto_parallel_context import auto_parallel_context
 from mindspore.nn.optim.momentum import Momentum
 from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
 from mindspore.nn.loss.loss import _Loss
 from mindspore.ops import operations as P
 from mindspore.ops import functional as F
 from mindspore.common import dtype as mstype

 from mindspore.train.model import Model, ParallelMode

 from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, Callback
 from mindspore.train.loss_scale_manager import FixedLossScaleManager
 import mindspore.dataset.engine as de
 from mindspore.communication.management import init

 random.seed(1)
 np.random.seed(1)
 de.config.set_seed(1)

 parser = argparse.ArgumentParser(description='Image classification')
 parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
 args_opt = parser.parse_args()

 device_id = int(os.getenv('DEVICE_ID'))
 rank_id = int(os.getenv('RANK_ID'))
 rank_size = int(os.getenv('RANK_SIZE'))
 run_distribute = rank_size > 1

 context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=device_id, save_graphs=False)
 context.set_context(enable_task_sink=True)
 context.set_context(enable_loop_sink=True)
 context.set_context(enable_mem_reuse=True)

 class CrossEntropyWithLabelSmooth(_Loss):
    """
    CrossEntropyWith LabelSmooth.

    Args:
        smooth_factor (float): smooth factor, default=0.
        num_classes (int): num classes

    Returns:
        None.

    Examples:
        >>> CrossEntropyWithLabelSmooth(smooth_factor=0., num_classes=1000)
    """

    def __init__(self, smooth_factor=0., num_classes=1000):
        super(CrossEntropyWithLabelSmooth, self).__init__()
        self.onehot = P.OneHot()
        self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
        self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32)
        self.ce = nn.SoftmaxCrossEntropyWithLogits()
        self.mean = P.ReduceMean(False)
        self.cast = P.Cast()

    def construct(self, logit, label):
        one_hot_label = self.onehot(self.cast(label, mstype.int32), F.shape(logit)[1], self.on_value, self.off_value)
        out_loss = self.ce(logit, one_hot_label)
        out_loss = self.mean(out_loss, 0)
        return out_loss

 class Monitor(Callback):
    """
    Monitor loss and time.

    Args:
        lr_init (numpy array): train lr

    Returns:
        None

    Examples:
        >>> Monitor(100,lr_init=Tensor([0.05]*100).asnumpy())
    """

    def __init__(self, lr_init=None):
        super(Monitor, self).__init__()
        self.lr_init = lr_init
        self.lr_init_len = len(lr_init)

    def epoch_begin(self, run_context):
        self.losses = []
        self.epoch_time = time.time()

    def epoch_end(self, run_context):
        cb_params = run_context.original_args()

        epoch_mseconds = (time.time() - self.epoch_time) * 1000
        per_step_mseconds = epoch_mseconds / cb_params.batch_num
        print("epoch time: {:5.3f}, per step time: {:5.3f}, avg loss: {:5.3f}".format(epoch_mseconds,
                                                                                      per_step_mseconds,
                                                                                      np.mean(self.losses)
                                                                                      ), flush=True)

    def step_begin(self, run_context):
        self.step_time = time.time()

    def step_end(self, run_context):
        cb_params = run_context.original_args()
        step_mseconds = (time.time() - self.step_time) * 1000
        step_loss = cb_params.net_outputs

        if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor):
            step_loss = step_loss[0]
        if isinstance(step_loss, Tensor):
            step_loss = np.mean(step_loss.asnumpy())

        self.losses.append(step_loss)
        cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num

        print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:5.3f}/{:5.3f}], time:[{:5.3f}], lr:[{:5.3f}]".format(
            cb_params.cur_epoch_num - 1, cb_params.epoch_num, cur_step_in_epoch, cb_params.batch_num, step_loss,
            np.mean(self.losses), step_mseconds, self.lr_init[cb_params.cur_step_num - 1]), flush=True)


 if __name__ == '__main__':
    if run_distribute:
        context.set_auto_parallel_context(device_num=rank_size, parallel_mode=ParallelMode.DATA_PARALLEL,
                                          parameter_broadcast=True, mirror_mean=True)
        auto_parallel_context().set_all_reduce_fusion_split_indices([140])
        init()

    epoch_size = config.epoch_size
    net = mobilenet_v2(num_classes=config.num_classes)
    net.add_flags_recursive(fp16=True)
    for _, cell in net.cells_and_names():
        if isinstance(cell, nn.Dense):
            cell.add_flags_recursive(fp32=True)
    if config.label_smooth > 0:
        loss = CrossEntropyWithLabelSmooth(smooth_factor=config.label_smooth, num_classes=config.num_classes)
    else:
        loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')

    print("train args: ", args_opt, "\ncfg: ", config,
          "\nparallel args: rank_id {}, device_id {}, rank_size {}".format(rank_id, device_id, rank_size))

    dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True,
                             repeat_num=epoch_size, batch_size=config.batch_size)
    step_size = dataset.get_dataset_size()

    loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
    lr = Tensor(get_lr(global_step=0, lr_init=0, lr_end=0, lr_max=config.lr,
                       warmup_epochs=config.warmup_epochs, total_epochs=epoch_size, steps_per_epoch=step_size))
    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum,
                   config.weight_decay, config.loss_scale)

    model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale)

    cb = None
    if rank_id == 0:
        cb = [Monitor(lr_init=lr.asnumpy())]
        if config.save_checkpoint:
            config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_epochs * step_size,
                                         keep_checkpoint_max=config.keep_checkpoint_max)
            ckpt_cb = ModelCheckpoint(prefix="mobilenet", directory=config.save_checkpoint_path, config=config_ck)
            cb += [ckpt_cb]
    model.train(epoch_size, dataset, callbacks=cb)
--- a/example/nlp_to_mindrecord/CLUERNER2020/README.md
+++ b/example/nlp_to_mindrecord/CLUERNER2020/README.md
@@ -0,0 +1,82 @@
 # Guideline to Convert Training Data CLUERNER2020 to MindRecord For Bert Fine Tuning

 <!-- TOC -->

 - [What does the example do](#what-does-the-example-do)
 - [How to use the example to process CLUERNER2020](#how-to-use-the-example-to-process-cluerner2020)
    - [Download CLUERNER2020 and unzip](#download-cluerner2020-and-unzip)
    - [Generate MindRecord](#generate-mindrecord)
    - [Create MindDataset By MindRecord](#create-minddataset-by-mindrecord)


 <!-- /TOC -->

 ## What does the example do

 This example is based on [CLUERNER2020](https://www.cluebenchmarks.com/introduce.html) training data, generating MindRecord file, and finally used for Bert Fine Tuning progress.

 1.  run.sh: generate MindRecord entry script
    - data_processor_seq.py: the script from [CLUEbenchmark/CLUENER2020](https://github.com/CLUEbenchmark/CLUENER2020/tree/master/tf_version), we just change the part of the generated tfrecord to MindRecord.
    - label2id.json: the file from [CLUEbenchmark/CLUENER2020](https://github.com/CLUEbenchmark/CLUENER2020/tree/master/tf_version).
    - tokenization.py: the script from [CLUEbenchmark/CLUENER2020](https://github.com/CLUEbenchmark/CLUENER2020/tree/master/tf_version).
    - vocab.txt: the file from [CLUEbenchmark/CLUENER2020](https://github.com/CLUEbenchmark/CLUENER2020/tree/master/tf_version).
 2.  run_read.py: create MindDataset by MindRecord entry script.
    - create_dataset.py: use MindDataset to read MindRecord to generate dataset.
 3. data: the output directory for MindRecord.
 4. cluener_public: the CLUENER2020 training data.

 ## How to use the example to process CLUERNER2020

 Download CLUERNER2020, convert it to MindRecord, use MindDataset to read MindRecord.

 ### Download CLUERNER2020 and unzip

 1. Download the training data zip.
    > [CLUERNER2020 dataset download address](https://www.cluebenchmarks.com/introduce.html) **-> 任务介绍 -> CLUENER 细粒度命名实体识别 -> cluener下载链接**

 2. Unzip the training data to dir example/nlp_to_mindrecord/CLUERNER2020/cluener_public.
    ```
    unzip -d {your-mindspore}/example/nlp_to_mindrecord/CLUERNER2020/cluener_public cluener_public.zip
    ```

 ### Generate MindRecord

 1. Run the run.sh script.
    ```bash
    bash run.sh
    ```

 2. Output like this:
    ```
    ...
    [INFO] ME(17603:139620983514944,MainProcess):2020-04-28-16:56:12.498.235 [mindspore/mindrecord/filewriter.py:313] The list of mindrecord files created are: ['data/train.mindrecord'], and the list of index files are: ['data/train.mindrecord.db']
    ...
    [INFO] ME(17603,python):2020-04-28-16:56:13.400.175 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
    [INFO] ME(17603,python):2020-04-28-16:56:13.400.863 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
    [INFO] ME(17603,python):2020-04-28-16:56:13.401.534 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
    [INFO] ME(17603,python):2020-04-28-16:56:13.402.179 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
    [INFO] ME(17603,python):2020-04-28-16:56:13.402.702 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
    ...
    [INFO] ME(17603:139620983514944,MainProcess):2020-04-28-16:56:13.431.208 [mindspore/mindrecord/filewriter.py:313] The list of mindrecord files created are: ['data/dev.mindrecord'], and the list of index files are: ['data/dev.mindrecord.db']
    ```

 ### Create MindDataset By MindRecord

 1. Run the run_read.sh script.
    ```bash
    bash run_read.sh
    ```

 2. Output like this:
    ```
    ...
    example 1340: input_ids: [ 101 3173 1290 4852 7676 3949  122 3299  123  126 3189 4510 8020 6381 5442 7357 2590 3636 8021 7676 3949 4294 1166 6121 3124 1277 6121 3124 7270 2135 3295 5789 3326 123  126 3189 1355 6134 1093 1325 3173 2399 6590 6791 8024  102    0    0    0    0    0    0    0    0    0    0   0    0    0    0    0    0    0    0]
    example 1340: input_mask: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1  1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
    example 1340: segment_ids: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
    example 1340: label_ids: [ 0 18 19 20  2  4  0  0  0  0  0  0  0 34 36 26 27 28  0 34 35 35 35 35 35 35 35 35 35 36 26 27 28  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
    example 1341: input_ids: [ 101 1728  711 4293 3868 1168 2190 2150 3791  934 3633 3428 4638 6237 7025 8024 3297 1400 5310 3362 6206 5023 5401 1744 3297 7770 3791 7368  976 1139 1104 2137  511 102    0    0    0    0    0    0    0    0   0    0    0    0    0    0    0    0    0    0    0    0    0    0   0    0    0    0    0    0    0    0]
    example 1341: input_mask: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
    example 1341: segment_ids: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
   example 1341: label_ids: [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 18 19 19 19 19 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
    ...
    ```
--- a/example/nlp_to_mindrecord/CLUERNER2020/create_dataset.py
+++ b/example/nlp_to_mindrecord/CLUERNER2020/create_dataset.py
@@ -0,0 +1,36 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """create MindDataset by MindRecord"""
 import mindspore.dataset as ds

 def create_dataset(data_file):
    """create MindDataset"""
    num_readers = 4
    data_set = ds.MindDataset(dataset_file=data_file, num_parallel_workers=num_readers, shuffle=True)
    index = 0
    for item in data_set.create_dict_iterator():
        # print("example {}: {}".format(index, item))
        print("example {}: input_ids: {}".format(index, item['input_ids']))
        print("example {}: input_mask: {}".format(index, item['input_mask']))
        print("example {}: segment_ids: {}".format(index, item['segment_ids']))
        print("example {}: label_ids: {}".format(index, item['label_ids']))
        index += 1
        if index % 1000 == 0:
            print("read rows: {}".format(index))
    print("total rows: {}".format(index))

 if __name__ == '__main__':
    create_dataset('data/train.mindrecord')
    create_dataset('data/dev.mindrecord')
--- a/example/nlp_to_mindrecord/CLUERNER2020/data/README.md
+++ b/example/nlp_to_mindrecord/CLUERNER2020/data/README.md
@@ -0,0 +1 @@
 ## output dir
--- a/example/nlp_to_mindrecord/CLUERNER2020/data_processor_seq.py
+++ b/example/nlp_to_mindrecord/CLUERNER2020/data_processor_seq.py
@@ -0,0 +1,162 @@
 #!/usr/bin/python
 # coding:utf8
 """
@author: Cong Yu
@time: 2019-12-07 17:03
 """
 import json
 import tokenization
 import collections

 import numpy as np
 from mindspore.mindrecord import FileWriter

 # pylint: skip-file

 def _truncate_seq_pair(tokens_a, tokens_b, max_length):
    """Truncates a sequence pair in place to the maximum length."""

    # This is a simple heuristic which will always truncate the longer sequence
    # one token at a time. This makes more sense than truncating an equal percent
    # of tokens from each, since if one sequence is very short then each token
    # that's truncated likely contains more information than a longer sequence.
    while True:
        total_length = len(tokens_a) + len(tokens_b)
        if total_length <= max_length:
            break
        if len(tokens_a) > len(tokens_b):
            tokens_a.pop()
        else:
            tokens_b.pop()


 def process_one_example(tokenizer, label2id, text, label, max_seq_len=128):
    # textlist = text.split(' ')
    # labellist = label.split(' ')
    textlist = list(text)
    labellist = list(label)
    tokens = []
    labels = []
    for i, word in enumerate(textlist):
        token = tokenizer.tokenize(word)
        tokens.extend(token)
        label_1 = labellist[i]
        for m in range(len(token)):
            if m == 0:
                labels.append(label_1)
            else:
                print("some unknown token...")
                labels.append(labels[0])
    # tokens = tokenizer.tokenize(example.text)  -2 的原因是因为序列需要加一个句首和句尾标志
    if len(tokens) >= max_seq_len - 1:
        tokens = tokens[0:(max_seq_len - 2)]
        labels = labels[0:(max_seq_len - 2)]
    ntokens = []
    segment_ids = []
    label_ids = []
    ntokens.append("[CLS]")  # 句子开始设置CLS 标志
    segment_ids.append(0)
    # [CLS] [SEP] 可以为 他们构建标签，或者 统一到某个标签，反正他们是不变的，基本不参加训练 即：x-l 永远不变
    label_ids.append(0)  # label2id["[CLS]"]
    for i, token in enumerate(tokens):
        ntokens.append(token)
        segment_ids.append(0)
        label_ids.append(label2id[labels[i]])
    ntokens.append("[SEP]")
    segment_ids.append(0)
    # append("O") or append("[SEP]") not sure!
    label_ids.append(0)  # label2id["[SEP]"]
    input_ids = tokenizer.convert_tokens_to_ids(ntokens)
    input_mask = [1] * len(input_ids)
    while len(input_ids) < max_seq_len:
        input_ids.append(0)
        input_mask.append(0)
        segment_ids.append(0)
        label_ids.append(0)
        ntokens.append("**NULL**")
    assert len(input_ids) == max_seq_len
    assert len(input_mask) == max_seq_len
    assert len(segment_ids) == max_seq_len
    assert len(label_ids) == max_seq_len

    feature = (input_ids, input_mask, segment_ids, label_ids)
    return feature


 def prepare_mindrecord_data(tokenizer, max_seq_len, label2id, path, out_path):
    """
        生成训练数据， *.mindrecord, 单标签分类模型, 随机打乱数据
    """
    writer = FileWriter(out_path)

    data_schema = {"input_ids": {"type": "int64", "shape": [-1]},
                   "input_mask": {"type": "int64", "shape": [-1]},
                   "segment_ids": {"type": "int64", "shape": [-1]},
                   "label_ids": {"type": "int64", "shape": [-1]}}
    writer.add_schema(data_schema, "CLUENER2020 schema")

    example_count = 0

    for line in open(path):
        if not line.strip():
            continue
        _ = json.loads(line.strip())
        len_ = len(_["text"])
        labels = ["O"] * len_
        for k, v in _["label"].items():
            for kk, vv in v.items():
                for vvv in vv:
                    span = vvv
                    s = span[0]
                    e = span[1] + 1
                    # print(s, e)
                    if e - s == 1:
                        labels[s] = "S_" + k
                    else:
                        labels[s] = "B_" + k
                        for i in range(s + 1, e - 1):
                            labels[i] = "M_" + k
                        labels[e - 1] = "E_" + k
            # print()
        # feature = process_one_example(tokenizer, label2id, row[column_name_x1], row[column_name_y],
        #                               max_seq_len=max_seq_len)
        feature = process_one_example(tokenizer, label2id, list(_["text"]), labels,
                                      max_seq_len=max_seq_len)

        features = collections.OrderedDict()
        # 序列标注任务
        features["input_ids"] = np.asarray(feature[0])
        features["input_mask"] = np.asarray(feature[1])
        features["segment_ids"] = np.asarray(feature[2])
        features["label_ids"] = np.asarray(feature[3])
        if example_count < 5:
            print("*** Example ***")
            print(_["text"])
            print(_["label"])
            print("input_ids: %s" % " ".join([str(x) for x in feature[0]]))
            print("input_mask: %s" % " ".join([str(x) for x in feature[1]]))
            print("segment_ids: %s" % " ".join([str(x) for x in feature[2]]))
            print("label: %s " % " ".join([str(x) for x in feature[3]]))

        writer.write_raw_data([features])
        example_count += 1

        # if example_count == 20:
        #     break
        if example_count % 3000 == 0:
            print(example_count)
    print("total example:", example_count)
    writer.commit()


 if __name__ == "__main__":
    vocab_file = "./vocab.txt"
    tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file)
    label2id = json.loads(open("label2id.json").read())

    max_seq_len = 64

    prepare_mindrecord_data(tokenizer, max_seq_len, label2id, path="cluener_public/train.json",
                           out_path="data/train.mindrecord")
    prepare_mindrecord_data(tokenizer, max_seq_len, label2id, path="cluener_public/dev.json",
                           out_path="data/dev.mindrecord")
--- a/example/nlp_to_mindrecord/CLUERNER2020/label2id.json
+++ b/example/nlp_to_mindrecord/CLUERNER2020/label2id.json
@@ -0,0 +1,43 @@
 {
  "O": 0,
  "S_address": 1,
  "B_address": 2,
  "M_address": 3,
  "E_address": 4,
  "S_book": 5,
  "B_book": 6,
  "M_book": 7,
  "E_book": 8,
  "S_company": 9,
  "B_company": 10,
  "M_company": 11,
  "E_company": 12,
  "S_game": 13,
  "B_game": 14,
  "M_game": 15,
  "E_game": 16,
  "S_government": 17,
  "B_government": 18,
  "M_government": 19,
  "E_government": 20,
  "S_movie": 21,
  "B_movie": 22,
  "M_movie": 23,
  "E_movie": 24,
  "S_name": 25,
  "B_name": 26,
  "M_name": 27,
  "E_name": 28,
  "S_organization": 29,
  "B_organization": 30,
  "M_organization": 31,
  "E_organization": 32,
  "S_position": 33,
  "B_position": 34,
  "M_position": 35,
  "E_position": 36,
  "S_scene": 37,
  "B_scene": 38,
  "M_scene": 39,
  "E_scene": 40
 }
--- a/example/nlp_to_mindrecord/CLUERNER2020/run.sh
+++ b/example/nlp_to_mindrecord/CLUERNER2020/run.sh
@@ -0,0 +1,20 @@
 #!/bin/bash
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================

 rm data/train.mindrecord*
 rm data/dev.mindrecord*

 python data_processor_seq.py
--- a/example/nlp_to_mindrecord/CLUERNER2020/run_read.sh
+++ b/example/nlp_to_mindrecord/CLUERNER2020/run_read.sh
@@ -0,0 +1,17 @@
 #!/bin/bash
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================

 python create_dataset.py
--- a/example/nlp_to_mindrecord/CLUERNER2020/tokenization.py
+++ b/example/nlp_to_mindrecord/CLUERNER2020/tokenization.py
@@ -0,0 +1,388 @@
 """Tokenization classes."""

 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

 import collections
 import re
 import unicodedata
 import six

 # pylint: skip-file

 def validate_case_matches_checkpoint(do_lower_case, init_checkpoint):
    """Checks whether the casing config is consistent with the checkpoint name."""

    # The casing has to be passed in by the user and there is no explicit check
    # as to whether it matches the checkpoint. The casing information probably
    # should have been stored in the bert_config.json file, but it's not, so
    # we have to heuristically detect it to validate.

    if not init_checkpoint:
        return

    m = re.match("^.*?([A-Za-z0-9_-]+)/bert_model.ckpt", init_checkpoint)
    if m is None:
        return

    model_name = m.group(1)

    lower_models = [
        "uncased_L-24_H-1024_A-16", "uncased_L-12_H-768_A-12",
        "multilingual_L-12_H-768_A-12", "chinese_L-12_H-768_A-12"
    ]

    cased_models = [
        "cased_L-12_H-768_A-12", "cased_L-24_H-1024_A-16",
        "multi_cased_L-12_H-768_A-12"
    ]

    is_bad_config = False
    if model_name in lower_models and not do_lower_case:
        is_bad_config = True
        actual_flag = "False"
        case_name = "lowercased"
        opposite_flag = "True"

    if model_name in cased_models and do_lower_case:
        is_bad_config = True
        actual_flag = "True"
        case_name = "cased"
        opposite_flag = "False"

    if is_bad_config:
        raise ValueError(
            "You passed in `--do_lower_case=%s` with `--init_checkpoint=%s`. "
            "However, `%s` seems to be a %s model, so you "
            "should pass in `--do_lower_case=%s` so that the fine-tuning matches "
            "how the model was pre-training. If this error is wrong, please "
            "just comment out this check." % (actual_flag, init_checkpoint,
                                              model_name, case_name, opposite_flag))


 def convert_to_unicode(text):
    """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
    if six.PY3:
        if isinstance(text, str):
            return text
        elif isinstance(text, bytes):
            return text.decode("utf-8", "ignore")
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    elif six.PY2:
        if isinstance(text, str):
            return text.decode("utf-8", "ignore")
        elif isinstance(text, unicode):
            return text
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    else:
        raise ValueError("Not running on Python2 or Python 3?")


 def printable_text(text):
    """Returns text encoded in a way suitable for print or `tf.logging`."""

    # These functions want `str` for both Python2 and Python3, but in one case
    # it's a Unicode string and in the other it's a byte string.
    if six.PY3:
        if isinstance(text, str):
            return text
        elif isinstance(text, bytes):
            return text.decode("utf-8", "ignore")
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    elif six.PY2:
        if isinstance(text, str):
            return text
        elif isinstance(text, unicode):
            return text.encode("utf-8")
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    else:
        raise ValueError("Not running on Python2 or Python 3?")


 def load_vocab(vocab_file):
    """Loads a vocabulary file into a dictionary."""
    vocab = collections.OrderedDict()
    index = 0
    with open(vocab_file, "r") as reader:
        while True:
            token = convert_to_unicode(reader.readline())
            if not token:
                break
            token = token.strip()
            vocab[token] = index
            index += 1
    return vocab


 def convert_by_vocab(vocab, items):
    """Converts a sequence of [tokens|ids] using the vocab."""
    output = []
    for item in items:
        if item in vocab:
            output.append(vocab[item])
        else:
            output.append(vocab['[UNK]'])
    return output


 def convert_tokens_to_ids(vocab, tokens):
    return convert_by_vocab(vocab, tokens)


 def convert_ids_to_tokens(inv_vocab, ids):
    return convert_by_vocab(inv_vocab, ids)


 def whitespace_tokenize(text):
    """Runs basic whitespace cleaning and splitting on a piece of text."""
    text = text.strip()
    if not text:
        return []
    tokens = text.split()
    return tokens


 class FullTokenizer(object):
    """Runs end-to-end tokenziation."""

    def __init__(self, vocab_file, do_lower_case=True):
        self.vocab = load_vocab(vocab_file)
        self.inv_vocab = {v: k for k, v in self.vocab.items()}
        self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case)
        self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)

    def tokenize(self, text):
        split_tokens = []
        for token in self.basic_tokenizer.tokenize(text):
            for sub_token in self.wordpiece_tokenizer.tokenize(token):
                split_tokens.append(sub_token)

        return split_tokens

    def convert_tokens_to_ids(self, tokens):
        return convert_by_vocab(self.vocab, tokens)

    def convert_ids_to_tokens(self, ids):
        return convert_by_vocab(self.inv_vocab, ids)


 class BasicTokenizer(object):
    """Runs basic tokenization (punctuation splitting, lower casing, etc.)."""

    def __init__(self, do_lower_case=True):
        """Constructs a BasicTokenizer.

        Args:
          do_lower_case: Whether to lower case the input.
        """
        self.do_lower_case = do_lower_case

    def tokenize(self, text):
        """Tokenizes a piece of text."""
        text = convert_to_unicode(text)
        text = self._clean_text(text)

        # This was added on November 1st, 2018 for the multilingual and Chinese
        # models. This is also applied to the English models now, but it doesn't
        # matter since the English models were not trained on any Chinese data
        # and generally don't have any Chinese data in them (there are Chinese
        # characters in the vocabulary because Wikipedia does have some Chinese
        # words in the English Wikipedia.).
        text = self._tokenize_chinese_chars(text)

        orig_tokens = whitespace_tokenize(text)
        split_tokens = []
        for token in orig_tokens:
            if self.do_lower_case:
                token = token.lower()
                token = self._run_strip_accents(token)
            split_tokens.extend(self._run_split_on_punc(token))

        output_tokens = whitespace_tokenize(" ".join(split_tokens))
        return output_tokens

    def _run_strip_accents(self, text):
        """Strips accents from a piece of text."""
        text = unicodedata.normalize("NFD", text)
        output = []
        for char in text:
            cat = unicodedata.category(char)
            if cat == "Mn":
                continue
            output.append(char)
        return "".join(output)

    def _run_split_on_punc(self, text):
        """Splits punctuation on a piece of text."""
        chars = list(text)
        i = 0
        start_new_word = True
        output = []
        while i < len(chars):
            char = chars[i]
            if _is_punctuation(char):
                output.append([char])
                start_new_word = True
            else:
                if start_new_word:
                    output.append([])
                start_new_word = False
                output[-1].append(char)
            i += 1

        return ["".join(x) for x in output]

    def _tokenize_chinese_chars(self, text):
        """Adds whitespace around any CJK character."""
        output = []
        for char in text:
            cp = ord(char)
            if self._is_chinese_char(cp):
                output.append(" ")
                output.append(char)
                output.append(" ")
            else:
                output.append(char)
        return "".join(output)

    def _is_chinese_char(self, cp):
        """Checks whether CP is the codepoint of a CJK character."""
        # This defines a "chinese character" as anything in the CJK Unicode block:
        #   https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
        #
        # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
        # despite its name. The modern Korean Hangul alphabet is a different block,
        # as is Japanese Hiragana and Katakana. Those alphabets are used to write
        # space-separated words, so they are not treated specially and handled
        # like the all of the other languages.
        if ((cp >= 0x4E00 and cp <= 0x9FFF) or  #
                (cp >= 0x3400 and cp <= 0x4DBF) or  #
                (cp >= 0x20000 and cp <= 0x2A6DF) or  #
                (cp >= 0x2A700 and cp <= 0x2B73F) or  #
                (cp >= 0x2B740 and cp <= 0x2B81F) or  #
                (cp >= 0x2B820 and cp <= 0x2CEAF) or
                (cp >= 0xF900 and cp <= 0xFAFF) or  #
                (cp >= 0x2F800 and cp <= 0x2FA1F)):  #
            return True

        return False

    def _clean_text(self, text):
        """Performs invalid character removal and whitespace cleanup on text."""
        output = []
        for char in text:
            cp = ord(char)
            if cp == 0 or cp == 0xfffd or _is_control(char):
                continue
            if _is_whitespace(char):
                output.append(" ")
            else:
                output.append(char)
        return "".join(output)


 class WordpieceTokenizer(object):
    """Runs WordPiece tokenziation."""

    def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=200):
        self.vocab = vocab
        self.unk_token = unk_token
        self.max_input_chars_per_word = max_input_chars_per_word

    def tokenize(self, text):
        """Tokenizes a piece of text into its word pieces.

        This uses a greedy longest-match-first algorithm to perform tokenization
        using the given vocabulary.

        For example:
          input = "unaffable"
          output = ["un", "##aff", "##able"]

        Args:
          text: A single token or whitespace separated tokens. This should have
            already been passed through `BasicTokenizer.

        Returns:
          A list of wordpiece tokens.
        """

        text = convert_to_unicode(text)

        output_tokens = []
        for token in whitespace_tokenize(text):
            chars = list(token)
            if len(chars) > self.max_input_chars_per_word:
                output_tokens.append(self.unk_token)
                continue

            is_bad = False
            start = 0
            sub_tokens = []
            while start < len(chars):
                end = len(chars)
                cur_substr = None
                while start < end:
                    substr = "".join(chars[start:end])
                    if start > 0:
                        substr = "##" + substr
                    if substr in self.vocab:
                        cur_substr = substr
                        break
                    end -= 1
                if cur_substr is None:
                    is_bad = True
                    break
                sub_tokens.append(cur_substr)
                start = end

            if is_bad:
                output_tokens.append(self.unk_token)
            else:
                output_tokens.extend(sub_tokens)
        return output_tokens


 def _is_whitespace(char):
    """Checks whether `chars` is a whitespace character."""
    # \t, \n, and \r are technically contorl characters but we treat them
    # as whitespace since they are generally considered as such.
    if char == " " or char == "\t" or char == "\n" or char == "\r":
        return True
    cat = unicodedata.category(char)
    if cat == "Zs":
        return True
    return False


 def _is_control(char):
    """Checks whether `chars` is a control character."""
    # These are technically control characters but we count them as whitespace
    # characters.
    if char == "\t" or char == "\n" or char == "\r":
        return False
    cat = unicodedata.category(char)
    if cat.startswith("C"):
        return True
    return False


 def _is_punctuation(char):
    """Checks whether `chars` is a punctuation character."""
    cp = ord(char)
    # We treat all non-letter/number ASCII as punctuation.
    # Characters such as "^", "$", and "`" are not in the Unicode
    # Punctuation class but we treat them as punctuation anyways, for
    # consistency.
    if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or
            (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)):
        return True
    cat = unicodedata.category(char)
    if cat.startswith("P"):
        return True
    return False
--- a/example/nlp_to_mindrecord/CLUERNER2020/vocab.txt
+++ b/example/nlp_to_mindrecord/CLUERNER2020/vocab.txt
--- a/example/nlp_to_mindrecord/zhwiki/README.md
+++ b/example/nlp_to_mindrecord/zhwiki/README.md
@@ -0,0 +1,107 @@
 # Guideline to Convert Training Data zhwiki to MindRecord For Bert Pre Training

 <!-- TOC -->

 - [What does the example do](#what-does-the-example-do)
 - [Run simple test](#run-simple-test)
 - [How to use the example to process zhwiki](#how-to-use-the-example-to-process-zhwiki)
    - [Download zhwiki training data](#download-zhwiki-training-data)
    - [Extract the zhwiki](#extract-the-zhwiki)
    - [Generate MindRecord](#generate-mindrecord)
    - [Create MindDataset By MindRecord](#create-minddataset-by-mindrecord)


 <!-- /TOC -->

 ## What does the example do

 This example is based on [zhwiki](https://dumps.wikimedia.org/zhwiki) training data, generating MindRecord file, and finally used for Bert network training.

 1.  run.sh: generate MindRecord entry script.
    - create_pretraining_data.py: the script from [google-research/bert](https://github.com/google-research/bert), we just change the part of the generated tfrecord to MindRecord.
    - tokenization.py: the script from [google-research/bert](https://github.com/google-research/bert).
    - vocab.txt: the file from [huawei-noah/Pretrained-Language-Model](https://github.com/huawei-noah/Pretrained-Language-Model/tree/master/NEZHA-TensorFlow/nezha).
    - sample_text.txt: the file from [google-research/bert](https://github.com/google-research/bert).
 2.  run_read.py: create MindDataset by MindRecord entry script.
    - create_dataset.py: use MindDataset to read MindRecord to generate dataset.

 ## Run simple test

 Follow the step:

 ```bash
 bash run.sh         # generate zhwiki.mindrecord* by sample_text.txt
 bash run_read.sh    # use MindDataset to read zhwiki.mindrecord* 
 ```

 ## How to use the example to process zhwiki

 Download zhwikidata, extract it, convert it to MindRecord, use MindDataset to read MindRecord.

 ### Download zhwiki training data

 > [zhwiki dataset download address](https://dumps.wikimedia.org/zhwiki) **-> 20200401 -> zhwiki-20200401-pages-articles-multistream.xml.bz2**

 ### Extract the zhwiki

 1. Download [wikiextractor](https://github.com/attardi/wikiextractor) script.

 2. Extract the zhwiki.
    ```python
    python WikiExtractor.py -o {output_path}/extract {input_path}/zhwiki-20200401-pages-articles-multistream.xml.bz2
    ```

 3. Generate like this:
    ```
    $ ls {output_path}/extract
    AA AB AC AD AE AF AG AH AI AJ AK AL AM AN
    ```

 ### Generate MindRecord

 1. Modify the parameters in run.sh: --input_file, --output_file, --partition_number.
    ```
    --input_file: Input raw text file (or comma-separated list of files).
    --output_file: Output MindRecord file.
    --partition_number: The MindRecord file will be split into the number of partition.
    ```

 2. Run the run.sh script.
    ```
    bash run.sh
    ```
    > Caution: This process is slow, please wait patiently. Run it on server is recommended.

 3. The output like this:
    ```
    ...
    [INFO] ME(23485,python):2020-04-28-17:16:40.670.744 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
    [INFO] ME(23485,python):2020-04-28-17:16:40.671.227 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
    [INFO] ME(23485,python):2020-04-28-17:16:40.671.660 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
    [INFO] ME(23485,python):2020-04-28-17:16:40.672.037 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
    [INFO] ME(23485,python):2020-04-28-17:16:40.672.453 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
    [INFO] ME(23485,python):2020-04-28-17:16:40.672.833 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
    ...
    [INFO] ME(23485:140354285963072,MainProcess):2020-04-28-17:16:40.718.039 [mindspore/mindrecord/filewriter.py:313] The list of mindrecord files created are: ['zhwiki.mindrecord0', 'zhwiki.mindrecord1', 'zhwiki.mindrecord2', 'zhwiki.mindrecord3'], and the list of index files are: ['zhwiki.mindrecord0.db', 'zhwiki.mindrecord1.db', 'zhwiki.mindrecord2.db', 'zhwiki.mindrecord3.db']
    ...
    ```

 ### Create MindDataset By MindRecord

 1. Run the run_read.sh script.
    ```bash
    bash run_read.sh
    ```

 2. The output like this:
    ```
    ...
    example 74: input_ids: [  101  8168   118 12847  8783  9977 15908   117  8256  9245 11643  8168  8847  8588 11575  8154  8228   143  8384  8376  9197 10241   103 10564 11421  8199 12268   112   161  8228 11541  9586  8436  8174  8363  9864  9702   103   103   119   103  9947 10564   103  8436  8806 11479   103  8912   119   103   103   103 12209  8303   103  8757  8824   117  8256   103  8619  8168 11541   102 11684  8196   103  8228  8847 11523   117  9059  9064 12410  8358  8181 10764   117 11167 11706  9920   148  8332 11390  8936  8205 10951 11997   103  8154   117   103  8670 10467   112   161 10951 13139 12413   117 10288   143 10425  8205   152 10795  8472  8196   103   161 12126  9172 13129 12106  8217  8174 12244  8205   143   103  8461  8277 10628   160  8221   119   102]
    example 74: input_mask: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
    example 74: segment_ids: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
    example 74: masked_lm_positions: [  6  22  37  38  40  43  47  50  51  52  55  60  67  76  89  92  98 109 120   0]
    example 74: masked_lm_ids: [ 8118  8165  8329  8890  8554  8458   119  8850  8565 10392  8174 11467  10291  8181  8549 12718 13139   112   158     0]
    example 74: masked_lm_weights: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.]
    example 74: next_sentence_labels: [0]
    ...
    ```
--- a/example/nlp_to_mindrecord/zhwiki/create_dataset.py
+++ b/example/nlp_to_mindrecord/zhwiki/create_dataset.py
@@ -0,0 +1,43 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """create MindDataset by MindRecord"""
 import argparse
 import mindspore.dataset as ds

 def create_dataset(data_file):
    """create MindDataset"""
    num_readers = 4
    data_set = ds.MindDataset(dataset_file=data_file, num_parallel_workers=num_readers, shuffle=True)
    index = 0
    for item in data_set.create_dict_iterator():
        # print("example {}: {}".format(index, item))
        print("example {}: input_ids: {}".format(index, item['input_ids']))
        print("example {}: input_mask: {}".format(index, item['input_mask']))
        print("example {}: segment_ids: {}".format(index, item['segment_ids']))
        print("example {}: masked_lm_positions: {}".format(index, item['masked_lm_positions']))
        print("example {}: masked_lm_ids: {}".format(index, item['masked_lm_ids']))
        print("example {}: masked_lm_weights: {}".format(index, item['masked_lm_weights']))
        print("example {}: next_sentence_labels: {}".format(index, item['next_sentence_labels']))
        index += 1
        if index % 1000 == 0:
            print("read rows: {}".format(index))
    print("total rows: {}".format(index))

 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--input_file", type=str, required=True, help='Input mindreord file')
    args = parser.parse_args()

    create_dataset(args.input_file)
--- a/example/nlp_to_mindrecord/zhwiki/create_pretraining_data.py
+++ b/example/nlp_to_mindrecord/zhwiki/create_pretraining_data.py
@@ -0,0 +1,428 @@
 # coding=utf-8
 # Copyright 2018 The Google AI Language Team Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Create masked LM/next sentence masked_lm MindRecord files for BERT."""

 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

 import argparse
 import collections
 import logging
 import random
 import tokenization

 import numpy as np
 from mindspore.mindrecord import FileWriter

 # pylint: skip-file

 logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
                    datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO)


 class TrainingInstance(object):
  """A single training instance (sentence pair)."""

  def __init__(self, tokens, segment_ids, masked_lm_positions, masked_lm_labels,
               is_random_next):
    self.tokens = tokens
    self.segment_ids = segment_ids
    self.is_random_next = is_random_next
    self.masked_lm_positions = masked_lm_positions
    self.masked_lm_labels = masked_lm_labels

  def __str__(self):
    s = ""
    s += "tokens: %s\n" % (" ".join(
        [tokenization.printable_text(x) for x in self.tokens]))
    s += "segment_ids: %s\n" % (" ".join([str(x) for x in self.segment_ids]))
    s += "is_random_next: %s\n" % self.is_random_next
    s += "masked_lm_positions: %s\n" % (" ".join(
        [str(x) for x in self.masked_lm_positions]))
    s += "masked_lm_labels: %s\n" % (" ".join(
        [tokenization.printable_text(x) for x in self.masked_lm_labels]))
    s += "\n"
    return s

  def __repr__(self):
    return self.__str__()


 def write_instance_to_example_files(instances, tokenizer, max_seq_length,
                                    max_predictions_per_seq, output_file, partition_number):
  """Create MindRecord files from `TrainingInstance`s."""
  writer = FileWriter(output_file, int(partition_number))

  data_schema = {"input_ids": {"type": "int64", "shape": [-1]},
                 "input_mask": {"type": "int64", "shape": [-1]},
                 "segment_ids": {"type": "int64", "shape": [-1]},
                 "masked_lm_positions": {"type": "int64", "shape": [-1]},
                 "masked_lm_ids": {"type": "int64", "shape": [-1]},
                 "masked_lm_weights": {"type": "float64", "shape": [-1]},
                 "next_sentence_labels": {"type": "int64", "shape": [-1]},
                }
  writer.add_schema(data_schema, "zhwiki schema")

  total_written = 0
  for (inst_index, instance) in enumerate(instances):
    input_ids = tokenizer.convert_tokens_to_ids(instance.tokens)
    input_mask = [1] * len(input_ids)
    segment_ids = list(instance.segment_ids)
    assert len(input_ids) <= max_seq_length

    while len(input_ids) < max_seq_length:
      input_ids.append(0)
      input_mask.append(0)
      segment_ids.append(0)

    assert len(input_ids) == max_seq_length
    assert len(input_mask) == max_seq_length
    assert len(segment_ids) == max_seq_length

    masked_lm_positions = list(instance.masked_lm_positions)
    masked_lm_ids = tokenizer.convert_tokens_to_ids(instance.masked_lm_labels)
    masked_lm_weights = [1.0] * len(masked_lm_ids)

    while len(masked_lm_positions) < max_predictions_per_seq:
      masked_lm_positions.append(0)
      masked_lm_ids.append(0)
      masked_lm_weights.append(0.0)

    next_sentence_label = 1 if instance.is_random_next else 0

    features = collections.OrderedDict()
    features["input_ids"] = np.asarray(input_ids)
    features["input_mask"] = np.asarray(input_mask)
    features["segment_ids"] = np.asarray(segment_ids)
    features["masked_lm_positions"] = np.asarray(masked_lm_positions)
    features["masked_lm_ids"] = np.asarray(masked_lm_ids)
    features["masked_lm_weights"] = np.asarray(masked_lm_weights)
    features["next_sentence_labels"] = np.asarray([next_sentence_label])

    total_written += 1

    if inst_index < 20:
      logging.info("*** Example ***")
      logging.info("tokens: %s" % " ".join(
          [tokenization.printable_text(x) for x in instance.tokens]))

      for feature_name in features.keys():
        feature = features[feature_name]
        logging.info(
            "%s: %s" % (feature_name, " ".join([str(x) for x in feature])))
    writer.write_raw_data([features])

  writer.commit()

  logging.info("Wrote %d total instances", total_written)


 def create_training_instances(input_files, tokenizer, max_seq_length,
                              dupe_factor, short_seq_prob, masked_lm_prob,
                              max_predictions_per_seq, rng, do_whole_word_mask):
  """Create `TrainingInstance`s from raw text."""
  all_documents = [[]]

  # Input file format:
  # (1) One sentence per line. These should ideally be actual sentences, not
  # entire paragraphs or arbitrary spans of text. (Because we use the
  # sentence boundaries for the "next sentence prediction" task).
  # (2) Blank lines between documents. Document boundaries are needed so
  # that the "next sentence prediction" task doesn't span between documents.
  for input_file in input_files:
    with open(input_file, "r") as reader:
      while True:
        line = tokenization.convert_to_unicode(reader.readline())
        if not line:
          break
        line = line.strip()

        # Empty lines are used as document delimiters
        if not line:
          all_documents.append([])
        tokens = tokenizer.tokenize(line)
        if tokens:
          all_documents[-1].append(tokens)

  # Remove empty documents
  all_documents = [x for x in all_documents if x]
  rng.shuffle(all_documents)

  vocab_words = list(tokenizer.vocab.keys())
  instances = []
  for _ in range(dupe_factor):
    for document_index in range(len(all_documents)):
      instances.extend(
          create_instances_from_document(
              all_documents, document_index, max_seq_length, short_seq_prob,
              masked_lm_prob, max_predictions_per_seq, vocab_words, rng, do_whole_word_mask))

  rng.shuffle(instances)
  return instances


 def create_instances_from_document(
    all_documents, document_index, max_seq_length, short_seq_prob,
    masked_lm_prob, max_predictions_per_seq, vocab_words, rng, do_whole_word_mask):
  """Creates `TrainingInstance`s for a single document."""
  document = all_documents[document_index]

  # Account for [CLS], [SEP], [SEP]
  max_num_tokens = max_seq_length - 3

  # We *usually* want to fill up the entire sequence since we are padding
  # to `max_seq_length` anyways, so short sequences are generally wasted
  # computation. However, we *sometimes*
  # (i.e., short_seq_prob == 0.1 == 10% of the time) want to use shorter
  # sequences to minimize the mismatch between pre-training and fine-tuning.
  # The `target_seq_length` is just a rough target however, whereas
  # `max_seq_length` is a hard limit.
  target_seq_length = max_num_tokens
  if rng.random() < short_seq_prob:
    target_seq_length = rng.randint(2, max_num_tokens)

  # We DON'T just concatenate all of the tokens from a document into a long
  # sequence and choose an arbitrary split point because this would make the
  # next sentence prediction task too easy. Instead, we split the input into
  # segments "A" and "B" based on the actual "sentences" provided by the user
  # input.
  instances = []
  current_chunk = []
  current_length = 0
  i = 0
  while i < len(document):
    segment = document[i]
    current_chunk.append(segment)
    current_length += len(segment)
    if i == len(document) - 1 or current_length >= target_seq_length:
      if current_chunk:
        # `a_end` is how many segments from `current_chunk` go into the `A`
        # (first) sentence.
        a_end = 1
        if len(current_chunk) >= 2:
          a_end = rng.randint(1, len(current_chunk) - 1)

        tokens_a = []
        for j in range(a_end):
          tokens_a.extend(current_chunk[j])

        tokens_b = []
        # Random next
        is_random_next = False
        if len(current_chunk) == 1 or rng.random() < 0.5:
          is_random_next = True
          target_b_length = target_seq_length - len(tokens_a)

          # This should rarely go for more than one iteration for large
          # corpora. However, just to be careful, we try to make sure that
          # the random document is not the same as the document
          # we're processing.
          for _ in range(10):
            random_document_index = rng.randint(0, len(all_documents) - 1)
            if random_document_index != document_index:
              break

          random_document = all_documents[random_document_index]
          random_start = rng.randint(0, len(random_document) - 1)
          for j in range(random_start, len(random_document)):
            tokens_b.extend(random_document[j])
            if len(tokens_b) >= target_b_length:
              break
          # We didn't actually use these segments so we "put them back" so
          # they don't go to waste.
          num_unused_segments = len(current_chunk) - a_end
          i -= num_unused_segments
        # Actual next
        else:
          is_random_next = False
          for j in range(a_end, len(current_chunk)):
            tokens_b.extend(current_chunk[j])
        truncate_seq_pair(tokens_a, tokens_b, max_num_tokens, rng)

        assert len(tokens_a) >= 1
        assert len(tokens_b) >= 1

        tokens = []
        segment_ids = []
        tokens.append("[CLS]")
        segment_ids.append(0)
        for token in tokens_a:
          tokens.append(token)
          segment_ids.append(0)

        tokens.append("[SEP]")
        segment_ids.append(0)

        for token in tokens_b:
          tokens.append(token)
          segment_ids.append(1)
        tokens.append("[SEP]")
        segment_ids.append(1)

        (tokens, masked_lm_positions,
         masked_lm_labels) = create_masked_lm_predictions(
             tokens, masked_lm_prob, max_predictions_per_seq, vocab_words, rng, do_whole_word_mask)
        instance = TrainingInstance(
            tokens=tokens,
            segment_ids=segment_ids,
            is_random_next=is_random_next,
            masked_lm_positions=masked_lm_positions,
            masked_lm_labels=masked_lm_labels)
        instances.append(instance)
      current_chunk = []
      current_length = 0
    i += 1

  return instances


 MaskedLmInstance = collections.namedtuple("MaskedLmInstance",
                                          ["index", "label"])


 def create_masked_lm_predictions(tokens, masked_lm_prob,
                                 max_predictions_per_seq, vocab_words, rng, do_whole_word_mask):
  """Creates the predictions for the masked LM objective."""

  cand_indexes = []
  for (i, token) in enumerate(tokens):
    if token == "[CLS]" or token == "[SEP]":
      continue
    # Whole Word Masking means that if we mask all of the wordpieces
    # corresponding to an original word. When a word has been split into
    # WordPieces, the first token does not have any marker and any subsequence
    # tokens are prefixed with ##. So whenever we see the ## token, we
    # append it to the previous set of word indexes.
    #
    # Note that Whole Word Masking does *not* change the training code
    # at all -- we still predict each WordPiece independently, softmaxed
    # over the entire vocabulary.
    if (do_whole_word_mask and len(cand_indexes) >= 1 and
        token.startswith("##")):
      cand_indexes[-1].append(i)
    else:
      cand_indexes.append([i])

  rng.shuffle(cand_indexes)

  output_tokens = list(tokens)

  num_to_predict = min(max_predictions_per_seq,
                       max(1, int(round(len(tokens) * masked_lm_prob))))

  masked_lms = []
  covered_indexes = set()
  for index_set in cand_indexes:
    if len(masked_lms) >= num_to_predict:
      break
    # If adding a whole-word mask would exceed the maximum number of
    # predictions, then just skip this candidate.
    if len(masked_lms) + len(index_set) > num_to_predict:
      continue
    is_any_index_covered = False
    for index in index_set:
      if index in covered_indexes:
        is_any_index_covered = True
        break
    if is_any_index_covered:
      continue
    for index in index_set:
      covered_indexes.add(index)

      masked_token = None
      # 80% of the time, replace with [MASK]
      if rng.random() < 0.8:
        masked_token = "[MASK]"
      else:
        # 10% of the time, keep original
        if rng.random() < 0.5:
          masked_token = tokens[index]
        # 10% of the time, replace with random word
        else:
          masked_token = vocab_words[rng.randint(0, len(vocab_words) - 1)]

      output_tokens[index] = masked_token

      masked_lms.append(MaskedLmInstance(index=index, label=tokens[index]))
  assert len(masked_lms) <= num_to_predict
  masked_lms = sorted(masked_lms, key=lambda x: x.index)

  masked_lm_positions = []
  masked_lm_labels = []
  for p in masked_lms:
    masked_lm_positions.append(p.index)
    masked_lm_labels.append(p.label)

  return (output_tokens, masked_lm_positions, masked_lm_labels)


 def truncate_seq_pair(tokens_a, tokens_b, max_num_tokens, rng):
  """Truncates a pair of sequences to a maximum sequence length."""
  while True:
    total_length = len(tokens_a) + len(tokens_b)
    if total_length <= max_num_tokens:
      break

    trunc_tokens = tokens_a if len(tokens_a) > len(tokens_b) else tokens_b
    assert len(trunc_tokens) >= 1

    # We want to sometimes truncate from the front and sometimes from the
    # back to add more randomness and avoid biases.
    if rng.random() < 0.5:
      del trunc_tokens[0]
    else:
      trunc_tokens.pop()


 def main():
  parser = argparse.ArgumentParser()
  parser.add_argument("--input_file", type=str, required=True, help='Input raw text file (or comma-separated list of files).')
  parser.add_argument("--output_file", type=str, required=True, help='Output MindRecord file.')
  parser.add_argument("--partition_number", type=int, default=1, help='The MindRecord file will be split into the number of partition.')
  parser.add_argument("--vocab_file", type=str, required=True, help='The vocabulary file than the BERT model was trained on.')
  parser.add_argument("--do_lower_case", type=bool, default=False, help='Whether to lower case the input text. Should be True for uncased models and False for cased models.')
  parser.add_argument("--do_whole_word_mask", type=bool, default=False, help='Whether to use whole word masking rather than per-WordPiece masking.')
  parser.add_argument("--max_seq_length", type=int, default=128, help='Maximum sequence length.')
  parser.add_argument("--max_predictions_per_seq", type=int, default=20, help='Maximum number of masked LM predictions per sequence.')
  parser.add_argument("--random_seed", type=int, default=12345, help='Random seed for data generation.')
  parser.add_argument("--dupe_factor", type=int, default=10, help='Number of times to duplicate the input data (with diffrent masks).')
  parser.add_argument("--masked_lm_prob", type=float, default=0.15, help='Masked LM probability.')
  parser.add_argument("--short_seq_prob", type=float, default=0.1, help='Probability of creating sequences which are shorter than the maximum length.')
  args = parser.parse_args()

  tokenizer = tokenization.FullTokenizer(
      vocab_file=args.vocab_file, do_lower_case=args.do_lower_case)

  input_files = []
  for input_pattern in args.input_file.split(","):
    input_files.append(input_pattern)

  logging.info("*** Reading from input files ***")
  for input_file in input_files:
    logging.info("  %s", input_file)

  rng = random.Random(args.random_seed)
  instances = create_training_instances(
      input_files, tokenizer, args.max_seq_length, args.dupe_factor,
      args.short_seq_prob, args.masked_lm_prob, args.max_predictions_per_seq,
      rng, args.do_whole_word_mask)

  write_instance_to_example_files(instances, tokenizer, args.max_seq_length,
                                  args.max_predictions_per_seq, args.output_file, args.partition_number)


 if __name__ == "__main__":
  main()
--- a/example/nlp_to_mindrecord/zhwiki/run.sh
+++ b/example/nlp_to_mindrecord/zhwiki/run.sh
@@ -0,0 +1,29 @@
 #!/bin/bash
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================

 rm zhwiki.mindrecord*

 python create_pretraining_data.py \
 --input_file=./sample_text.txt \
 --output_file=zhwiki.mindrecord \
 --partition_number=4 \
 --vocab_file=./vocab.txt \
 --do_lower_case=True \
 --max_seq_length=128 \
 --max_predictions_per_seq=20 \
 --masked_lm_prob=0.15 \
 --random_seed=12345 \
 --dupe_factor=5
--- a/example/nlp_to_mindrecord/zhwiki/run_read.sh
+++ b/example/nlp_to_mindrecord/zhwiki/run_read.sh
@@ -0,0 +1,17 @@
 #!/bin/bash
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================

 python create_dataset.py --input_file=zhwiki.mindrecord0
--- a/example/nlp_to_mindrecord/zhwiki/sample_text.txt
+++ b/example/nlp_to_mindrecord/zhwiki/sample_text.txt
@@ -0,0 +1,33 @@
 This text is included to make sure Unicode is handled properly: 力加勝北区ᴵᴺᵀᵃছজটডণত
 Text should be one-sentence-per-line, with empty lines between documents.
 This sample text is public domain and was randomly selected from Project Guttenberg.

 The rain had only ceased with the gray streaks of morning at Blazing Star, and the settlement awoke to a moral sense of cleanliness, and the finding of forgotten knives, tin cups, and smaller camp utensils, where the heavy showers had washed away the debris and dust heaps before the cabin doors.
 Indeed, it was recorded in Blazing Star that a fortunate early riser had once picked up on the highway a solid chunk of gold quartz which the rain had freed from its incumbering soil, and washed into immediate and glittering popularity.
 Possibly this may have been the reason why early risers in that locality, during the rainy season, adopted a thoughtful habit of body, and seldom lifted their eyes to the rifted or india-ink washed skies above them.
 "Cass" Beard had risen early that morning, but not with a view to discovery.
 A leak in his cabin roof,--quite consistent with his careless, improvident habits,--had roused him at 4 A. M., with a flooded "bunk" and wet blankets.
 The chips from his wood pile refused to kindle a fire to dry his bed-clothes, and he had recourse to a more provident neighbor's to supply the deficiency.
 This was nearly opposite.
 Mr. Cassius crossed the highway, and stopped suddenly.
 Something glittered in the nearest red pool before him.
 Gold, surely!
 But, wonderful to relate, not an irregular, shapeless fragment of crude ore, fresh from Nature's crucible, but a bit of jeweler's handicraft in the form of a plain gold ring.
 Looking at it more attentively, he saw that it bore the inscription, "May to Cass."
 Like most of his fellow gold-seekers, Cass was superstitious.

 The fountain of classic wisdom, Hypatia herself.
 As the ancient sage--the name is unimportant to a monk--pumped water nightly that he might study by day, so I, the guardian of cloaks and parasols, at the sacred doors of her lecture-room, imbibe celestial knowledge.
 From my youth I felt in me a soul above the matter-entangled herd.
 She revealed to me the glorious fact, that I am a spark of Divinity itself.
 A fallen star, I am, sir!' continued he, pensively, stroking his lean stomach--'a fallen star!--fallen, if the dignity of philosophy will allow of the simile, among the hogs of the lower world--indeed, even into the hog-bucket itself. Well, after all, I will show you the way to the Archbishop's.
 There is a philosophic pleasure in opening one's treasures to the modest young.
 Perhaps you will assist me by carrying this basket of fruit?' And the little man jumped up, put his basket on Philammon's head, and trotted off up a neighbouring street.
 Philammon followed, half contemptuous, half wondering at what this philosophy might be, which could feed the self-conceit of anything so abject as his ragged little apish guide;
 but the novel roar and whirl of the street, the perpetual stream of busy faces, the line of curricles, palanquins, laden asses, camels, elephants, which met and passed him, and squeezed him up steps and into doorways, as they threaded their way through the great Moon-gate into the ample street beyond, drove everything from his mind but wondering curiosity, and a vague, helpless dread of that great living wilderness, more terrible than any dead wilderness of sand which he had left behind.
 Already he longed for the repose, the silence of the Laura--for faces which knew him and smiled upon him; but it was too late to turn back now.
 His guide held on for more than a mile up the great main street, crossed in the centre of the city, at right angles, by one equally magnificent, at each end of which, miles away, appeared, dim and distant over the heads of the living stream of passengers, the yellow sand-hills of the desert;
 while at the end of the vista in front of them gleamed the blue harbour, through a network of countless masts.
 At last they reached the quay at the opposite end of the street;
 and there burst on Philammon's astonished eyes a vast semicircle of blue sea, ringed with palaces and towers.
 He stopped involuntarily; and his little guide stopped also, and looked askance at the young monk, to watch the effect which that grand panorama should produce on him.
--- a/example/nlp_to_mindrecord/zhwiki/tokenization.py
+++ b/example/nlp_to_mindrecord/zhwiki/tokenization.py
@@ -0,0 +1,394 @@
 # coding=utf-8
 # Copyright 2018 The Google AI Language Team Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Tokenization classes."""

 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

 import collections
 import re
 import unicodedata
 import six

 # pylint: skip-file

 def validate_case_matches_checkpoint(do_lower_case, init_checkpoint):
  """Checks whether the casing config is consistent with the checkpoint name."""

  # The casing has to be passed in by the user and there is no explicit check
  # as to whether it matches the checkpoint. The casing information probably
  # should have been stored in the bert_config.json file, but it's not, so
  # we have to heuristically detect it to validate.

  if not init_checkpoint:
    return

  m = re.match("^.*?([A-Za-z0-9_-]+)/bert_model.ckpt", init_checkpoint)
  if m is None:
    return

  model_name = m.group(1)

  lower_models = [
      "uncased_L-24_H-1024_A-16", "uncased_L-12_H-768_A-12",
      "multilingual_L-12_H-768_A-12", "chinese_L-12_H-768_A-12"
  ]

  cased_models = [
      "cased_L-12_H-768_A-12", "cased_L-24_H-1024_A-16",
      "multi_cased_L-12_H-768_A-12"
  ]

  is_bad_config = False
  if model_name in lower_models and not do_lower_case:
    is_bad_config = True
    actual_flag = "False"
    case_name = "lowercased"
    opposite_flag = "True"

  if model_name in cased_models and do_lower_case:
    is_bad_config = True
    actual_flag = "True"
    case_name = "cased"
    opposite_flag = "False"

  if is_bad_config:
    raise ValueError(
        "You passed in `--do_lower_case=%s` with `--init_checkpoint=%s`. "
        "However, `%s` seems to be a %s model, so you "
        "should pass in `--do_lower_case=%s` so that the fine-tuning matches "
        "how the model was pre-training. If this error is wrong, please "
        "just comment out this check." % (actual_flag, init_checkpoint,
                                          model_name, case_name, opposite_flag))


 def convert_to_unicode(text):
  """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
  if six.PY3:
    if isinstance(text, str):
      return text
    elif isinstance(text, bytes):
      return text.decode("utf-8", "ignore")
    else:
      raise ValueError("Unsupported string type: %s" % (type(text)))
  elif six.PY2:
    if isinstance(text, str):
      return text.decode("utf-8", "ignore")
    elif isinstance(text, unicode):
      return text
    else:
      raise ValueError("Unsupported string type: %s" % (type(text)))
  else:
    raise ValueError("Not running on Python2 or Python 3?")


 def printable_text(text):
  """Returns text encoded in a way suitable for print or `tf.logging`."""

  # These functions want `str` for both Python2 and Python3, but in one case
  # it's a Unicode string and in the other it's a byte string.
  if six.PY3:
    if isinstance(text, str):
      return text
    elif isinstance(text, bytes):
      return text.decode("utf-8", "ignore")
    else:
      raise ValueError("Unsupported string type: %s" % (type(text)))
  elif six.PY2:
    if isinstance(text, str):
      return text
    elif isinstance(text, unicode):
      return text.encode("utf-8")
    else:
      raise ValueError("Unsupported string type: %s" % (type(text)))
  else:
    raise ValueError("Not running on Python2 or Python 3?")


 def load_vocab(vocab_file):
  """Loads a vocabulary file into a dictionary."""
  vocab = collections.OrderedDict()
  index = 0
  with open(vocab_file, "r") as reader:
    while True:
      token = convert_to_unicode(reader.readline())
      if not token:
        break
      token = token.strip()
      vocab[token] = index
      index += 1
  return vocab


 def convert_by_vocab(vocab, items):
  """Converts a sequence of [tokens|ids] using the vocab."""
  output = []
  for item in items:
    output.append(vocab[item])
  return output


 def convert_tokens_to_ids(vocab, tokens):
  return convert_by_vocab(vocab, tokens)


 def convert_ids_to_tokens(inv_vocab, ids):
  return convert_by_vocab(inv_vocab, ids)


 def whitespace_tokenize(text):
  """Runs basic whitespace cleaning and splitting on a piece of text."""
  text = text.strip()
  if not text:
    return []
  tokens = text.split()
  return tokens


 class FullTokenizer(object):
  """Runs end-to-end tokenziation."""

  def __init__(self, vocab_file, do_lower_case=True):
    self.vocab = load_vocab(vocab_file)
    self.inv_vocab = {v: k for k, v in self.vocab.items()}
    self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case)
    self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)

  def tokenize(self, text):
    split_tokens = []
    for token in self.basic_tokenizer.tokenize(text):
      for sub_token in self.wordpiece_tokenizer.tokenize(token):
        split_tokens.append(sub_token)

    return split_tokens

  def convert_tokens_to_ids(self, tokens):
    return convert_by_vocab(self.vocab, tokens)

  def convert_ids_to_tokens(self, ids):
    return convert_by_vocab(self.inv_vocab, ids)


 class BasicTokenizer(object):
  """Runs basic tokenization (punctuation splitting, lower casing, etc.)."""

  def __init__(self, do_lower_case=True):
    """Constructs a BasicTokenizer.
    Args:
      do_lower_case: Whether to lower case the input.
    """
    self.do_lower_case = do_lower_case

  def tokenize(self, text):
    """Tokenizes a piece of text."""
    text = convert_to_unicode(text)
    text = self._clean_text(text)

    # This was added on November 1st, 2018 for the multilingual and Chinese
    # models. This is also applied to the English models now, but it doesn't
    # matter since the English models were not trained on any Chinese data
    # and generally don't have any Chinese data in them (there are Chinese
    # characters in the vocabulary because Wikipedia does have some Chinese
    # words in the English Wikipedia.).
    text = self._tokenize_chinese_chars(text)

    orig_tokens = whitespace_tokenize(text)
    split_tokens = []
    for token in orig_tokens:
      if self.do_lower_case:
        token = token.lower()
        token = self._run_strip_accents(token)
      split_tokens.extend(self._run_split_on_punc(token))

    output_tokens = whitespace_tokenize(" ".join(split_tokens))
    return output_tokens

  def _run_strip_accents(self, text):
    """Strips accents from a piece of text."""
    text = unicodedata.normalize("NFD", text)
    output = []
    for char in text:
      cat = unicodedata.category(char)
      if cat == "Mn":
        continue
      output.append(char)
    return "".join(output)

  def _run_split_on_punc(self, text):
    """Splits punctuation on a piece of text."""
    chars = list(text)
    i = 0
    start_new_word = True
    output = []
    while i < len(chars):
      char = chars[i]
      if _is_punctuation(char):
        output.append([char])
        start_new_word = True
      else:
        if start_new_word:
          output.append([])
        start_new_word = False
        output[-1].append(char)
      i += 1

    return ["".join(x) for x in output]

  def _tokenize_chinese_chars(self, text):
    """Adds whitespace around any CJK character."""
    output = []
    for char in text:
      cp = ord(char)
      if self._is_chinese_char(cp):
        output.append(" ")
        output.append(char)
        output.append(" ")
      else:
        output.append(char)
    return "".join(output)

  def _is_chinese_char(self, cp):
    """Checks whether CP is the codepoint of a CJK character."""
    # This defines a "chinese character" as anything in the CJK Unicode block:
    #   https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
    #
    # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
    # despite its name. The modern Korean Hangul alphabet is a different block,
    # as is Japanese Hiragana and Katakana. Those alphabets are used to write
    # space-separated words, so they are not treated specially and handled
    # like the all of the other languages.
    if ((cp >= 0x4E00 and cp <= 0x9FFF) or  #
        (cp >= 0x3400 and cp <= 0x4DBF) or  #
        (cp >= 0x20000 and cp <= 0x2A6DF) or  #
        (cp >= 0x2A700 and cp <= 0x2B73F) or  #
        (cp >= 0x2B740 and cp <= 0x2B81F) or  #
        (cp >= 0x2B820 and cp <= 0x2CEAF) or
        (cp >= 0xF900 and cp <= 0xFAFF) or  #
        (cp >= 0x2F800 and cp <= 0x2FA1F)):  #
      return True

    return False

  def _clean_text(self, text):
    """Performs invalid character removal and whitespace cleanup on text."""
    output = []
    for char in text:
      cp = ord(char)
      if cp == 0 or cp == 0xfffd or _is_control(char):
        continue
      if _is_whitespace(char):
        output.append(" ")
      else:
        output.append(char)
    return "".join(output)


 class WordpieceTokenizer(object):
  """Runs WordPiece tokenziation."""

  def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=200):
    self.vocab = vocab
    self.unk_token = unk_token
    self.max_input_chars_per_word = max_input_chars_per_word

  def tokenize(self, text):
    """Tokenizes a piece of text into its word pieces.
    This uses a greedy longest-match-first algorithm to perform tokenization
    using the given vocabulary.
    For example:
      input = "unaffable"
      output = ["un", "##aff", "##able"]
    Args:
      text: A single token or whitespace separated tokens. This should have
        already been passed through `BasicTokenizer.
    Returns:
      A list of wordpiece tokens.
    """

    text = convert_to_unicode(text)

    output_tokens = []
    for token in whitespace_tokenize(text):
      chars = list(token)
      if len(chars) > self.max_input_chars_per_word:
        output_tokens.append(self.unk_token)
        continue

      is_bad = False
      start = 0
      sub_tokens = []
      while start < len(chars):
        end = len(chars)
        cur_substr = None
        while start < end:
          substr = "".join(chars[start:end])
          if start > 0:
            substr = "##" + substr
          if substr in self.vocab:
            cur_substr = substr
            break
          end -= 1
        if cur_substr is None:
          is_bad = True
          break
        sub_tokens.append(cur_substr)
        start = end

      if is_bad:
        output_tokens.append(self.unk_token)
      else:
        output_tokens.extend(sub_tokens)
    return output_tokens


 def _is_whitespace(char):
  """Checks whether `chars` is a whitespace character."""
  # \t, \n, and \r are technically contorl characters but we treat them
  # as whitespace since they are generally considered as such.
  if char == " " or char == "\t" or char == "\n" or char == "\r":
    return True
  cat = unicodedata.category(char)
  if cat == "Zs":
    return True
  return False


 def _is_control(char):
  """Checks whether `chars` is a control character."""
  # These are technically control characters but we count them as whitespace
  # characters.
  if char == "\t" or char == "\n" or char == "\r":
    return False
  cat = unicodedata.category(char)
  if cat in ("Cc", "Cf"):
    return True
  return False


 def _is_punctuation(char):
  """Checks whether `chars` is a punctuation character."""
  cp = ord(char)
  # We treat all non-letter/number ASCII as punctuation.
  # Characters such as "^", "$", and "`" are not in the Unicode
  # Punctuation class but we treat them as punctuation anyways, for
  # consistency.
  if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or
      (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)):
    return True
  cat = unicodedata.category(char)
  if cat.startswith("P"):
    return True
  return False
--- a/example/nlp_to_mindrecord/zhwiki/vocab.txt
+++ b/example/nlp_to_mindrecord/zhwiki/vocab.txt
--- a/example/resnet101_imagenet2012/README.md
+++ b/example/resnet101_imagenet2012/README.md
@@ -0,0 +1,135 @@
 # ResNet101 Example
 
 ## Description
 
 This is an example of training ResNet101 with ImageNet dataset in MindSpore.

 ## Requirements

 - Install [MindSpore](https://www.mindspore.cn/install/en).

 - Download the dataset [ImageNet](http://image-net.org/download).
 
 > Unzip the ImageNet dataset to any path you want, the folder should include train and eval dataset as follows:
 
 ```
 .
 └─dataset
    ├─ilsvrc
    │
    └─validation_preprocess
 ```

 ## Example structure
 
 ```shell
 .
 ├── crossentropy.py                 # CrossEntropy loss function
 ├── config.py                       # parameter configuration
 ├── dataset.py                      # data preprocessing
 ├── eval.py                         # eval net
 ├── lr_generator.py                 # generate learning rate
 ├── run_distribute_train.sh         # launch distributed training(8p)
 ├── run_infer.sh                    # launch evaluating
 ├── run_standalone_train.sh         # launch standalone training(1p)
 └── train.py                        # train net
 ```
 
 ## Parameter configuration
 
 Parameters for both training and evaluating can be set in config.py.
 
 ```
 "class_num": 1001,                # dataset class number
 "batch_size": 32,                 # batch size of input tensor
 "loss_scale": 1024,               # loss scale
 "momentum": 0.9,                  # momentum optimizer
 "weight_decay": 1e-4,             # weight decay
 "epoch_size": 120,                # epoch sizes for training
 "buffer_size": 1000,              # number of queue size in data preprocessing
 "image_height": 224,              # image height
 "image_width": 224,               # image width
 "save_checkpoint": True,          # whether save checkpoint or not
 "save_checkpoint_epochs": 1,      # the epoch interval between two checkpoints. By default, the last checkpoint will be saved after the last epoch
 "keep_checkpoint_max": 10,        # only keep the last keep_checkpoint_max checkpoint
 "save_checkpoint_path": "./",     # path to save checkpoint relative to the executed path
 "warmup_epochs": 0,               # number of warmup epoch
 "lr_decay_mode": "cosine"         # decay mode for generating learning rate
 "label_smooth": 1,                # label_smooth
 "label_smooth_factor": 0.1,       # label_smooth_factor
 "lr": 0.1                         # base learning rate
 ```

 ## Running the example

 ### Train
 
 #### Usage

 ```
 # distributed training
 sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATASET_PATH]
 
 # standalone training
 sh run_standalone_train.sh [DATASET_PATH]
 ```
 
 #### Launch
 
 ```bash
 # distributed training example(8p)
 sh run_distribute_train.sh rank_table_8p.json dataset/ilsvrc
 
 # standalone training example（1p）
 sh run_standalone_train.sh dataset/ilsvrc
 ```
 
 > About rank_table.json, you can refer to the [distributed training tutorial](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html).

 #### Result
 
 Training result will be stored in the example path, whose folder name begins with "train" or "train_parallel". You can find checkpoint file together with result like the followings in log.

 
 ```
 # distribute training result(8p)
 epoch: 1 step: 5004, loss is 4.805483
 epoch: 2 step: 5004, loss is 3.2121816
 epoch: 3 step: 5004, loss is 3.429647
 epoch: 4 step: 5004, loss is 3.3667371
 epoch: 5 step: 5004, loss is 3.1718972
 ...
 epoch: 67 step: 5004, loss is 2.2768745
 epoch: 68 step: 5004, loss is 1.7223864
 epoch: 69 step: 5004, loss is 2.0665488
 epoch: 70 step: 5004, loss is 1.8717369
 ...
 ```

 ### Infer
 
 #### Usage
 
 ```
 # infer
 sh run_infer.sh [VALIDATION_DATASET_PATH] [CHECKPOINT_PATH]
 ```
 
 #### Launch
 
 ```bash
 # infer with checkpoint
 sh run_infer.sh dataset/validation_preprocess/ train_parallel0/resnet-120_5004.ckpt

 ```
 
 > checkpoint can be produced in training process.
 

 #### Result
 
 Inference result will be stored in the example path, whose folder name is "infer". Under this, you can find result like the followings in log.
 
 ```
 result: {'top_5_accuracy': 0.9429417413572343, 'top_1_accuracy': 0.7853513124199744} ckpt=train_parallel0/resnet-120_5004.ckpt
 ```
--- a/example/resnet101_imagenet2012/config.py
+++ b/example/resnet101_imagenet2012/config.py
@@ -0,0 +1,39 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """
 network config setting, will be used in train.py and eval.py
 """
 from easydict import EasyDict as ed

 config = ed({
    "class_num": 1001,
    "batch_size": 32,
    "loss_scale": 1024,
    "momentum": 0.9,
    "weight_decay": 1e-4,
    "epoch_size": 120,
    "buffer_size": 1000,
    "image_height": 224,
    "image_width": 224,
    "save_checkpoint": True,
    "save_checkpoint_epochs": 1,
    "keep_checkpoint_max": 10,
    "save_checkpoint_path": "./",
    "warmup_epochs": 0,
    "lr_decay_mode": "cosine",
    "label_smooth": 1,
    "label_smooth_factor": 0.1,
    "lr": 0.1
 })
--- a/example/resnet101_imagenet2012/crossentropy.py
+++ b/example/resnet101_imagenet2012/crossentropy.py
@@ -0,0 +1,36 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """define loss function for network"""
 from mindspore.nn.loss.loss import _Loss
 from mindspore.ops import operations as P
 from mindspore.ops import functional as F
 from mindspore import Tensor
 from mindspore.common import dtype as mstype
 import mindspore.nn as nn

 class CrossEntropy(_Loss):
    """the redefined loss function with SoftmaxCrossEntropyWithLogits"""
    def __init__(self, smooth_factor=0., num_classes=1001):
        super(CrossEntropy, self).__init__()
        self.onehot = P.OneHot()
        self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
        self.off_value = Tensor(1.0 * smooth_factor / (num_classes -1), mstype.float32)
        self.ce = nn.SoftmaxCrossEntropyWithLogits()
        self.mean = P.ReduceMean(False)
    def construct(self, logit, label):
        one_hot_label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value)
        loss = self.ce(logit, one_hot_label)
        loss = self.mean(loss, 0)
        return loss
--- a/example/resnet101_imagenet2012/dataset.py
+++ b/example/resnet101_imagenet2012/dataset.py
@@ -0,0 +1,89 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """
 create train or eval dataset.
 """
 import os
 import mindspore.common.dtype as mstype
 import mindspore.dataset.engine as de
 import mindspore.dataset.transforms.vision.c_transforms as C
 import mindspore.dataset.transforms.c_transforms as C2
 from config import config

 def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
    """
    create a train or evaluate dataset
    Args:
        dataset_path(string): the path of dataset.
        do_train(bool): whether dataset is used for train or eval.
        repeat_num(int): the repeat times of dataset. Default: 1
        batch_size(int): the batch size of dataset. Default: 32

    Returns:
        dataset
    """
    device_num = int(os.getenv("RANK_SIZE"))
    rank_id = int(os.getenv("RANK_ID"))

    if device_num == 1:
        ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True)
    else:
        ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True,
                                     num_shards=device_num, shard_id=rank_id)
    resize_height = 224
    rescale = 1.0 / 255.0
    shift = 0.0

    # define map operations
    decode_op = C.Decode()

    random_resize_crop_op = C.RandomResizedCrop(resize_height, (0.08, 1.0), (0.75, 1.33), max_attempts=100)
    horizontal_flip_op = C.RandomHorizontalFlip(rank_id / (rank_id + 1))
    resize_op_256 = C.Resize((256, 256))
    center_crop = C.CenterCrop(224)
    rescale_op = C.Rescale(rescale, shift)
    normalize_op = C.Normalize((0.475, 0.451, 0.392), (0.275, 0.267, 0.278))
    changeswap_op = C.HWC2CHW()

    trans = []
    if do_train:
        trans = [decode_op,
                 random_resize_crop_op,
                 horizontal_flip_op,
                 rescale_op,
                 normalize_op,
                 changeswap_op]

    else:
        trans = [decode_op,
                 resize_op_256,
                 center_crop,
                 rescale_op,
                 normalize_op,
                 changeswap_op]

    type_cast_op = C2.TypeCast(mstype.int32)

    ds = ds.map(input_columns="image", operations=trans)
    ds = ds.map(input_columns="label", operations=type_cast_op)

    # apply shuffle operations
    ds = ds.shuffle(buffer_size=config.buffer_size)
    # apply batch operations
    ds = ds.batch(batch_size, drop_remainder=True)
    # apply dataset repeat operation
    ds = ds.repeat(repeat_num)

    return ds
--- a/example/resnet101_imagenet2012/eval.py
+++ b/example/resnet101_imagenet2012/eval.py
@@ -0,0 +1,78 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """
 eval.
 """
 import os
 import argparse
 import random
 import numpy as np
 from dataset import create_dataset
 from config import config
 from mindspore import context
 from mindspore.model_zoo.resnet import resnet101
 from mindspore.parallel._auto_parallel_context import auto_parallel_context
 from mindspore.train.model import Model, ParallelMode
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
 import mindspore.dataset.engine as de
 from mindspore.communication.management import init
 from crossentropy import CrossEntropy

 random.seed(1)
 np.random.seed(1)
 de.config.set_seed(1)

 parser = argparse.ArgumentParser(description='Image classification')
 parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute')
 parser.add_argument('--device_num', type=int, default=1, help='Device num.')
 parser.add_argument('--do_train', type=bool, default=False, help='Do train or not.')
 parser.add_argument('--do_eval', type=bool, default=True, help='Do eval or not.')
 parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path')
 parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
 args_opt = parser.parse_args()

 device_id = int(os.getenv('DEVICE_ID'))

 context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False, device_id=device_id)
 context.set_context(enable_task_sink=True)
 context.set_context(enable_loop_sink=True)
 context.set_context(enable_mem_reuse=True)

 if __name__ == '__main__':
    if not args_opt.do_eval and args_opt.run_distribute:
        context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
                                          mirror_mean=True, parameter_broadcast=True)
        auto_parallel_context().set_all_reduce_fusion_split_indices([180, 313])
        init()

    epoch_size = config.epoch_size
    net = resnet101(class_num=config.class_num)

    if not config.label_smooth:
        config.label_smooth_factor = 0.0
    loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num)

    if args_opt.do_eval:
        dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=False, batch_size=config.batch_size)
        step_size = dataset.get_dataset_size()

        if args_opt.checkpoint_path:
            param_dict = load_checkpoint(args_opt.checkpoint_path)
            load_param_into_net(net, param_dict)
        net.set_train(False)

        model = Model(net, loss_fn=loss, metrics={'top_1_accuracy', 'top_5_accuracy'})
        res = model.eval(dataset)
        print("result:", res, "ckpt=", args_opt.checkpoint_path)
--- a/example/resnet101_imagenet2012/lr_generator.py
+++ b/example/resnet101_imagenet2012/lr_generator.py
@@ -0,0 +1,52 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """learning rate generator"""
 import math
 import numpy as np

 def linear_warmup_lr(current_step, warmup_steps, base_lr, init_lr):
    lr_inc = (float(base_lr) - float(init_lr)) / float(warmup_steps)
    lr = float(init_lr) + lr_inc * current_step
    return lr

 def warmup_cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch):
    """
    generate learning rate array with cosine

    Args:
       lr(float): base learning rate
       steps_per_epoch(int): steps size of one epoch
       warmup_epochs(int): number of warmup epochs
       max_epoch(int): total epochs of training
    Returns:
       np.array, learning rate array
    """
    base_lr = lr
    warmup_init_lr = 0
    total_steps = int(max_epoch * steps_per_epoch)
    warmup_steps = int(warmup_epochs * steps_per_epoch)
    decay_steps = total_steps - warmup_steps

    lr_each_step = []
    for i in range(total_steps):
        if i < warmup_steps:
            lr = linear_warmup_lr(i + 1, warmup_steps, base_lr, warmup_init_lr)
        else:
            linear_decay = (total_steps - i) / decay_steps
            cosine_decay = 0.5 * (1 + math.cos(math.pi * 2 * 0.47 * i / decay_steps))
            decayed = linear_decay * cosine_decay + 0.00001
            lr = base_lr * decayed
        lr_each_step.append(lr)
    return np.array(lr_each_step).astype(np.float32)
--- a/example/resnet101_imagenet2012/run_distribute_train.sh
+++ b/example/resnet101_imagenet2012/run_distribute_train.sh
@@ -0,0 +1,66 @@
 #!/bin/bash
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================

 if [ $# != 2 ]
 then 
    echo "Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATASET_PATH]"
 exit 1
 fi

 get_real_path(){
  if [ "${1:0:1}" == "/" ]; then
    echo "$1"
  else
    echo "$(realpath -m $PWD/$1)"
  fi
 }
 PATH1=$(get_real_path $1)
 PATH2=$(get_real_path $2)
 echo $PATH1
 echo $PATH2

 if [ ! -f $PATH1 ]
 then 
    echo "error: MINDSPORE_HCCL_CONFIG_PATH=$PATH1 is not a file"
 exit 1
 fi 

 if [ ! -d $PATH2 ]
 then 
    echo "error: DATASET_PATH=$PATH2 is not a directory"
 exit 1
 fi 

 ulimit -u unlimited
 export DEVICE_NUM=8
 export RANK_SIZE=8
 export MINDSPORE_HCCL_CONFIG_PATH=$PATH1
 export RANK_TABLE_FILE=$PATH1

 for((i=0; i<${DEVICE_NUM}; i++))
 do
    export DEVICE_ID=$i
    export RANK_ID=$i
    rm -rf ./train_parallel$i
    mkdir ./train_parallel$i
    cp *.py ./train_parallel$i
    cp *.sh ./train_parallel$i
    cd ./train_parallel$i || exit
    echo "start training for rank $RANK_ID, device $DEVICE_ID"
    env > env.log
    python train.py --do_train=True --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 &> log &
    cd ..
 done
--- a/example/resnet101_imagenet2012/run_infer.sh
+++ b/example/resnet101_imagenet2012/run_infer.sh
@@ -0,0 +1,64 @@
 #!/bin/bash
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================

 if [ $# != 2 ]
 then 
    echo "Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH]"
 exit 1
 fi

 get_real_path(){
  if [ "${1:0:1}" == "/" ]; then
    echo "$1"
  else
    echo "$(realpath -m $PWD/$1)"
  fi
 }
 PATH1=$(get_real_path $1)
 PATH2=$(get_real_path $2)
 echo $PATH1
 echo $PATH2

 if [ ! -d $PATH1 ]
 then 
    echo "error: DATASET_PATH=$PATH1 is not a directory"
 exit 1
 fi 

 if [ ! -f $PATH2 ]
 then 
    echo "error: CHECKPOINT_PATH=$PATH2 is not a file"
 exit 1
 fi 

 ulimit -u unlimited
 export DEVICE_NUM=1
 export DEVICE_ID=0
 export RANK_SIZE=$DEVICE_NUM
 export RANK_ID=0

 if [ -d "infer" ];
 then
    rm -rf ./infer
 fi
 mkdir ./infer
 cp *.py ./infer
 cp *.sh ./infer
 cd ./infer || exit
 env > env.log
 echo "start infering for device $DEVICE_ID"
 python eval.py --do_eval=True --dataset_path=$PATH1 --checkpoint_path=$PATH2 &> log &
 cd ..
--- a/example/resnet101_imagenet2012/run_standalone_train.sh
+++ b/example/resnet101_imagenet2012/run_standalone_train.sh
@@ -0,0 +1,56 @@
 #!/bin/bash
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================

 if [ $# != 1 ]
 then 
    echo "Usage: sh run_standalone_train.sh [DATASET_PATH]"
 exit 1
 fi

 get_real_path(){
  if [ "${1:0:1}" == "/" ]; then
    echo "$1"
  else
    echo "$(realpath -m $PWD/$1)"
  fi
 }
 PATH1=$(get_real_path $1)
 echo $PATH1

 if [ ! -d $PATH1 ]
 then 
    echo "error: DATASET_PATH=$PATH1 is not a directory"
 exit 1
 fi 

 ulimit -u unlimited
 export DEVICE_NUM=1
 export DEVICE_ID=0
 export RANK_ID=0
 export RANK_SIZE=1

 if [ -d "train" ];
 then
    rm -rf ./train
 fi
 mkdir ./train
 cp *.py ./train
 cp *.sh ./train
 cd ./train || exit
 echo "start training for device $DEVICE_ID"
 env > env.log
 python train.py --do_train=True --dataset_path=$PATH1 &> log &
 cd ..
--- a/example/resnet101_imagenet2012/train.py
+++ b/example/resnet101_imagenet2012/train.py
@@ -0,0 +1,98 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """train_imagenet."""
 import os
 import argparse
 import random
 import numpy as np
 from dataset import create_dataset
 from lr_generator import warmup_cosine_annealing_lr
 from config import config
 from mindspore import context
 from mindspore import Tensor
 from mindspore.model_zoo.resnet import resnet101
 from mindspore.parallel._auto_parallel_context import auto_parallel_context
 from mindspore.nn.optim.momentum import Momentum
 from mindspore.train.model import Model, ParallelMode
 from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
 from mindspore.train.loss_scale_manager import FixedLossScaleManager
 import mindspore.dataset.engine as de
 from mindspore.communication.management import init
 import mindspore.nn as nn
 import mindspore.common.initializer as weight_init
 from crossentropy import CrossEntropy

 random.seed(1)
 np.random.seed(1)
 de.config.set_seed(1)

 parser = argparse.ArgumentParser(description='Image classification')
 parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute')
 parser.add_argument('--device_num', type=int, default=1, help='Device num.')
 parser.add_argument('--do_train', type=bool, default=True, help='Do train or not.')
 parser.add_argument('--do_eval', type=bool, default=False, help='Do eval or not.')
 parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
 args_opt = parser.parse_args()

 device_id = int(os.getenv('DEVICE_ID'))

 context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False, device_id=device_id)
 context.set_context(enable_task_sink=True)
 context.set_context(enable_loop_sink=True)
 context.set_context(enable_mem_reuse=True)

 if __name__ == '__main__':
    if not args_opt.do_eval and args_opt.run_distribute:
        context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
                                          mirror_mean=True, parameter_broadcast=True)
        auto_parallel_context().set_all_reduce_fusion_split_indices([180, 313])
        init()

    epoch_size = config.epoch_size
    net = resnet101(class_num=config.class_num)
    # weight init
    for _, cell in net.cells_and_names():
        if isinstance(cell, nn.Conv2d):
            cell.weight.default_input = weight_init.initializer(weight_init.XavierUniform(),
                                                                cell.weight.default_input.shape(),
                                                                cell.weight.default_input.dtype())
        if isinstance(cell, nn.Dense):
            cell.weight.default_input = weight_init.initializer(weight_init.TruncatedNormal(),
                                                                cell.weight.default_input.shape(),
                                                                cell.weight.default_input.dtype())
    if not config.label_smooth:
        config.label_smooth_factor = 0.0
    loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
    if args_opt.do_train:
        dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True,
                                 repeat_num=epoch_size, batch_size=config.batch_size)
        step_size = dataset.get_dataset_size()
        loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)

        # learning rate strategy with cosine
        lr = Tensor(warmup_cosine_annealing_lr(config.lr, step_size, config.warmup_epochs, config.epoch_size))
        opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum,
                       config.weight_decay, config.loss_scale)
        model = Model(net, loss_fn=loss, optimizer=opt, amp_level='O2', keep_batchnorm_fp32=False,
                      loss_scale_manager=loss_scale, metrics={'acc'})
        time_cb = TimeMonitor(data_size=step_size)
        loss_cb = LossMonitor()
        cb = [time_cb, loss_cb]
        if config.save_checkpoint:
            config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_epochs*step_size,
                                         keep_checkpoint_max=config.keep_checkpoint_max)
            ckpt_cb = ModelCheckpoint(prefix="resnet", directory=config.save_checkpoint_path, config=config_ck)
            cb += [ckpt_cb]
        model.train(epoch_size, dataset, callbacks=cb)
--- a/example/resnet50_cifar10/README.md
+++ b/example/resnet50_cifar10/README.md
@@ -0,0 +1,125 @@
 # ResNet-50 Example

 ## Description

 This is an example of training ResNet-50 with CIFAR-10 dataset in MindSpore.

 ## Requirements

 - Install [MindSpore](https://www.mindspore.cn/install/en).

 - Download the dataset [CIFAR-10](http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz).

 > Unzip the CIFAR-10 dataset to any path you want and the folder structure should be as follows:
 > ```
 > .  
 > ├── cifar-10-batches-bin  # train dataset
 > └── cifar-10-verify-bin   # infer dataset
 > ```


 ## Example structure

 ```shell
 .
 ├── config.py                       # parameter configuration
 ├── dataset.py                      # data preprocessing
 ├── eval.py                         # infer script
 ├── lr_generator.py                 # generate learning rate for each step
 ├── run_distribute_train.sh         # launch distributed training
 ├── run_infer.sh                    # launch infering
 ├── run_standalone_train.sh         # launch standalone training 
 └── train.py                        # train script
 ```


 ## Parameter configuration

 Parameters for both training and inference can be set in config.py.

 ```
 "class_num": 10,                  # dataset class num
 "batch_size": 32,                 # batch size of input tensor
 "loss_scale": 1024,               # loss scale
 "momentum": 0.9,                  # momentum
 "weight_decay": 1e-4,             # weight decay 
 "epoch_size": 90,                 # only valid for taining, which is always 1 for inference 
 "buffer_size": 100,               # number of queue size in data preprocessing
 "image_height": 224,              # image height
 "image_width": 224,               # image width
 "save_checkpoint": True,          # whether save checkpoint or not
 "save_checkpoint_steps": 195,     # the step interval between two checkpoints. By default, the last checkpoint will be saved after the last step
 "keep_checkpoint_max": 10,        # only keep the last keep_checkpoint_max checkpoint
 "save_checkpoint_path": "./",     # path to save checkpoint
 "lr_init": 0.01,                  # initial learning rate
 "lr_end": 0.00001,                # final learning rate
 "lr_max": 0.1,                    # maximum learning rate
 "warmup_epochs": 5,               # number of warmup epoch
 "lr_decay_mode": "poly"           # decay mode can be selected in steps, ploy and default
 ```

 ## Running the example

 ### Train

 #### Usage

 ```
 # distribute training
 Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATASET_PATH]

 # standalone training
 Usage: sh run_standalone_train.sh [DATASET_PATH]
 ```


 #### Launch

 ```
 # distribute training example
 sh run_distribute_train.sh rank_table.json ~/cifar-10-batches-bin

 # standalone training example
 sh run_standalone_train.sh ~/cifar-10-batches-bin
 ```

 > About rank_table.json, you can refer to the [distributed training tutorial](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html).

 #### Result

 Training result will be stored in the example path, whose folder name begins with "train" or "train_parallel". Under this, you can find checkpoint file together with result like the followings in log.

 ```
 # distribute training result(8p)
 epoch: 1 step: 195, loss is 1.9601055
 epoch: 2 step: 195, loss is 1.8555021
 epoch: 3 step: 195, loss is 1.6707983
 epoch: 4 step: 195, loss is 1.8162166
 epoch: 5 step: 195, loss is 1.393667
 ```

 ### Infer

 #### Usage

 ```
 # infer
 Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH]
 ```

 #### Launch

 ```
 # infer example
 sh run_infer.sh ~/cifar10-10-verify-bin ~/resnet50_cifar10/train_parallel0/resnet-90_195.ckpt
 ```

 > checkpoint can be produced in training process.

 #### Result

 Inference result will be stored in the example path, whose folder name is "infer". Under this, you can find result like the followings in log.

 ```
 result: {'acc': 0.91446314102564111} ckpt=~/resnet50_cifar10/train_parallel0/resnet-90_195.ckpt
 ```
--- a/example/resnet50_cifar10/eval.py
+++ b/example/resnet50_cifar10/eval.py
@@ -51,17 +51,11 @@ context.set_context(enable_loop_sink=True)
 context.set_context(enable_mem_reuse=True)

 if __name__ == '__main__':
    if args_opt.do_eval:
        context.set_context(enable_hccl=False)
    else:
        if args_opt.run_distribute:
            context.set_context(enable_hccl=True)
            context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
                                              mirror_mean=True)
            auto_parallel_context().set_all_reduce_fusion_split_indices([140])
            init()
        else:
            context.set_context(enable_hccl=False)
    if not args_opt.do_eval and args_opt.run_distribute:
        context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
                                          mirror_mean=True)
        auto_parallel_context().set_all_reduce_fusion_split_indices([140])
        init()

    epoch_size = config.epoch_size
    net = resnet50(class_num=config.class_num)
--- a/example/resnet50_cifar10/run_distribute_train.sh
+++ b/example/resnet50_cifar10/run_distribute_train.sh
@@ -20,22 +20,33 @@ then
 exit 1
 fi

 if [ ! -f $1 ]
 get_real_path(){
  if [ "${1:0:1}" == "/" ]; then
    echo "$1"
  else
    echo "$(realpath -m $PWD/$1)"
  fi
 }

 PATH1=$(get_real_path $1)
 PATH2=$(get_real_path $2)

 if [ ! -f "$PATH1" ]
 then 
    echo "error: DMINDSPORE_HCCL_CONFIG_PATH=$1 is not a file"
    echo "error: MINDSPORE_HCCL_CONFIG_PATH=$PATH1 is not a file"
 exit 1
 fi 

 if [ ! -d $2 ]
 if [ ! -d "$PATH2" ]
 then 
    echo "error: DATASET_PATH=$2 is not a directory"
    echo "error: DATASET_PATH=$PATH2 is not a directory"
 exit 1
 fi 

 ulimit -u unlimited
 export DEVICE_NUM=8
 export RANK_SIZE=8
 export MINDSPORE_HCCL_CONFIG_PATH=$1
 export MINDSPORE_HCCL_CONFIG_PATH=$PATH1

 for((i=0; i<${DEVICE_NUM}; i++))
 do
@@ -48,6 +59,6 @@ do
    cd ./train_parallel$i || exit
    echo "start training for rank $RANK_ID, device $DEVICE_ID"
    env > env.log
    python train.py --do_train=True --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$2 &> log &
    python train.py --do_train=True --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 &> log &
    cd ..
 done
--- a/example/resnet50_cifar10/run_standalone_train.sh
+++ b/example/resnet50_cifar10/run_standalone_train.sh
@@ -20,9 +20,19 @@ then
 exit 1
 fi

 if [ ! -d $1 ]
 get_real_path(){
  if [ "${1:0:1}" == "/" ]; then
    echo "$1"
  else
    echo "$(realpath -m $PWD/$1)"
  fi
 }

 PATH1=$(get_real_path $1)

 if [ ! -d "$PATH1" ]
 then 
    echo "error: DATASET_PATH=$1 is not a directory"
    echo "error: DATASET_PATH=$PATH1 is not a directory"
 exit 1
 fi 

@@ -41,5 +51,5 @@ cp *.sh ./train
 cd ./train || exit
 echo "start training for device $DEVICE_ID"
 env > env.log
 python train.py --do_train=True --dataset_path=$1 &> log &
 python train.py --do_train=True --dataset_path=$PATH1 &> log &
 cd ..
--- a/example/resnet50_cifar10/train.py
+++ b/example/resnet50_cifar10/train.py
@@ -54,21 +54,15 @@ context.set_context(enable_loop_sink=True)
 context.set_context(enable_mem_reuse=True)

 if __name__ == '__main__':
    if args_opt.do_eval:
        context.set_context(enable_hccl=False)
    else:
        if args_opt.run_distribute:
            context.set_context(enable_hccl=True)
            context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
                                              mirror_mean=True)
            auto_parallel_context().set_all_reduce_fusion_split_indices([140])
            init()
        else:
            context.set_context(enable_hccl=False)
    if not args_opt.do_eval and args_opt.run_distribute:
        context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
                                          mirror_mean=True)
        auto_parallel_context().set_all_reduce_fusion_split_indices([107, 160])
        init()

    epoch_size = config.epoch_size
    net = resnet50(class_num=config.class_num)
    loss = SoftmaxCrossEntropyWithLogits(sparse=True)
    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')


    if args_opt.do_train:
--- a/example/vgg16_cifar10/README.md
+++ b/example/vgg16_cifar10/README.md
@@ -0,0 +1,106 @@
 # VGG16 Example

 ## Description

 This example is for VGG16 model training and evaluation.

 ## Requirements

 - Install [MindSpore](https://www.mindspore.cn/install/en).

 - Download the dataset [CIFAR-10](http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz).

 > Unzip the CIFAR-10 dataset to any path you want and the folder structure should be as follows:
 > ```
 > .
 > ├── cifar-10-batches-bin  # train dataset
 > └── cifar-10-verify-bin   # infer dataset
 > ```

 ## Running the Example

 ### Training

 ```
 python train.py --data_path=your_data_path --device_id=6 > out.train.log 2>&1 & 
 ```
 The python command above will run in the background, you can view the results through the file `out.train.log`.

 After training, you'll get some checkpoint files under the script folder by default.

 You will get the loss value as following:
 ```
 # grep "loss is " out.train.log
 epoch: 1 step: 781, loss is 2.093086
 epcoh: 2 step: 781, loss is 1.827582
 ...
 ```

 ### Evaluation

 ```
 python eval.py --data_path=your_data_path --device_id=6 --checkpoint_path=./train_vgg_cifar10-70-781.ckpt > out.eval.log 2>&1 & 
 ```
 The above python command will run in the background, you can view the results through the file `out.eval.log`.

 You will get the accuracy as following:
 ```
 # grep "result: " out.eval.log
 result: {'acc': 0.92}
 ```

 ### Distribute Training
 ```
 sh run_distribute_train.sh rank_table.json your_data_path
 ```
 The above shell script will run distribute training in the background, you can view the results through the file `train_parallel[X]/log`.

 You will get the loss value as following:
 ```
 # grep "result: " train_parallel*/log
 train_parallel0/log:epoch: 1 step: 97, loss is 1.9060308
 train_parallel0/log:epcoh: 2 step: 97, loss is 1.6003821
 ...
 train_parallel1/log:epoch: 1 step: 97, loss is 1.7095519
 train_parallel1/log:epcoh: 2 step: 97, loss is 1.7133579
 ...
 ...
 ```
 > About rank_table.json, you can refer to the [distributed training tutorial](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html).

 ## Usage:

 ### Training
 ```
 usage: train.py [--device_target TARGET][--data_path DATA_PATH]
                [--device_id DEVICE_ID]

 parameters/options:
  --device_target       the training backend type, default is Ascend.
  --data_path           the storage path of dataset
  --device_id           the device which used to train model.

 ```

 ### Evaluation

 ```
 usage: eval.py [--device_target TARGET][--data_path DATA_PATH]
                [--device_id DEVICE_ID][--checkpoint_path CKPT_PATH]

 parameters/options:
  --device_target       the evaluation backend type, default is Ascend.
  --data_path           the storage path of datasetd 
  --device_id           the device which used to evaluate model.
  --checkpoint_path     the checkpoint file path used to evaluate model.
 ```

 ### Distribute Training

 ```
 Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATA_PATH]

 parameters/options:
  MINDSPORE_HCCL_CONFIG_PATH   HCCL configuration file path.
  DATA_PATH                    the storage path of dataset.
 ```
--- a/example/vgg16_cifar10/dataset.py
+++ b/example/vgg16_cifar10/dataset.py
@@ -28,7 +28,11 @@ def create_dataset(data_home, repeat_num=1, training=True):
    data_dir = os.path.join(data_home, "cifar-10-batches-bin")
    if not training:
        data_dir = os.path.join(data_home, "cifar-10-verify-bin")
    data_set = ds.Cifar10Dataset(data_dir)

    rank_size = int(os.environ.get("RANK_SIZE")) if os.environ.get("RANK_SIZE") else None
    rank_id = int(os.environ.get("RANK_ID")) if os.environ.get("RANK_ID") else None
    data_set = ds.Cifar10Dataset(data_dir, num_shards=rank_size, shard_id=rank_id)

    resize_height = cfg.image_height
    resize_width = cfg.image_width
    rescale = 1.0 / 255.0
--- a/example/vgg16_cifar10/eval.py
+++ b/example/vgg16_cifar10/eval.py
@@ -37,9 +37,9 @@ if __name__ == '__main__':

    context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target)
    context.set_context(device_id=args_opt.device_id)
    context.set_context(enable_mem_reuse=True, enable_hccl=False)
    context.set_context(enable_mem_reuse=True)

    net = vgg16(batch_size=cfg.batch_size, num_classes=cfg.num_classes)
    net = vgg16(num_classes=cfg.num_classes)
    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, cfg.momentum,
                   weight_decay=cfg.weight_decay)
    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False)
--- a/example/vgg16_cifar10/run_distribute_train.sh
+++ b/example/vgg16_cifar10/run_distribute_train.sh
@@ -0,0 +1,53 @@
 #!/bin/bash
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================

 if [ $# != 2 ]
 then 
    echo "Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATA_PATH]"
 exit 1
 fi

 if [ ! -f $1 ]
 then 
    echo "error: MINDSPORE_HCCL_CONFIG_PATH=$1 is not a file"
 exit 1
 fi 

 if [ ! -d $2 ]
 then 
    echo "error: DATA_PATH=$2 is not a directory"
 exit 1
 fi 

 ulimit -u unlimited
 export DEVICE_NUM=8
 export RANK_SIZE=8
 export MINDSPORE_HCCL_CONFIG_PATH=$1

 for((i=0; i<${DEVICE_NUM}; i++))
 do
    export DEVICE_ID=$i
    export RANK_ID=$i
    rm -rf ./train_parallel$i
    mkdir ./train_parallel$i
    cp *.py ./train_parallel$i
    cp *.sh ./train_parallel$i
    cd ./train_parallel$i || exit
    echo "start training for rank $RANK_ID, device $DEVICE_ID"
    env > env.log
    python train.py --data_path=$2 --device_id=$i &> log &
    cd ..
 done
--- a/example/vgg16_cifar10/train.py
+++ b/example/vgg16_cifar10/train.py
@@ -17,16 +17,18 @@
 python train.py --data_path=$DATA_HOME --device_id=$DEVICE_ID
 """
 import argparse
 import os
 import random
 import numpy as np
 import mindspore.nn as nn
 from mindspore import Tensor
 from mindspore.communication.management import init
 from mindspore.nn.optim.momentum import Momentum
 from mindspore.train.model import Model
 from mindspore.train.model import Model, ParallelMode
 from mindspore import context
 from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor
 from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
 from mindspore.model_zoo.vgg import vgg16
 import dataset
 from dataset import create_dataset
 from config import cifar_cfg as cfg
 random.seed(1)
 np.random.seed(1)
@@ -62,17 +64,31 @@ if __name__ == '__main__':

    context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target)
    context.set_context(device_id=args_opt.device_id)
    context.set_context(enable_mem_reuse=True, enable_hccl=False)
    context.set_context(enable_task_sink=True)
    context.set_context(enable_loop_sink=True)
    context.set_context(enable_mem_reuse=True)

    net = vgg16(batch_size=cfg.batch_size, num_classes=cfg.num_classes)
    lr = lr_steps(0, lr_max=cfg.lr_init, total_epochs=cfg.epoch_size, steps_per_epoch=50000 // cfg.batch_size)
    device_num = int(os.environ.get("DEVICE_NUM", 1))
    if device_num > 1:
        context.reset_auto_parallel_context()
        context.set_context(enable_hccl=True)
        context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
                                          mirror_mean=True)
        init()

    dataset = create_dataset(args_opt.data_path, cfg.epoch_size)
    batch_num = dataset.get_dataset_size()

    net = vgg16(num_classes=cfg.num_classes)
    lr = lr_steps(0, lr_max=cfg.lr_init, total_epochs=cfg.epoch_size, steps_per_epoch=batch_num)
    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), Tensor(lr), cfg.momentum, weight_decay=cfg.weight_decay)
    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False)
    model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'})
    model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'},
                  amp_level="O2", keep_batchnorm_fp32=False, loss_scale_manager=None)

    dataset = dataset.create_dataset(args_opt.data_path, cfg.epoch_size)
    batch_num = dataset.get_dataset_size()
    config_ck = CheckpointConfig(save_checkpoint_steps=batch_num * 5, keep_checkpoint_max=cfg.keep_checkpoint_max)
    time_cb = TimeMonitor(data_size=batch_num)
    ckpoint_cb = ModelCheckpoint(prefix="train_vgg_cifar10", directory="./", config=config_ck)
    loss_cb = LossMonitor()
    model.train(cfg.epoch_size, dataset, callbacks=[ckpoint_cb, loss_cb])
    model.train(cfg.epoch_size, dataset, callbacks=[time_cb, ckpoint_cb, loss_cb])
    print("train success")
--- a/example/yolov3_coco2017/README.md
+++ b/example/yolov3_coco2017/README.md
@@ -0,0 +1,94 @@
 # YOLOv3 Example

 ## Description

 YOLOv3 network based on ResNet-18, with support for training and evaluation.

 ## Requirements

 - Install [MindSpore](https://www.mindspore.cn/install/en).

 - Dataset

    We use coco2017 as training dataset.

    1. Download coco2017: [train2017](http://images.cocodataset.org/zips/train2017.zip), [val2017](http://images.cocodataset.org/zips/val2017.zip), [test2017](http://images.cocodataset.org/zips/test2017.zip), [annotations](http://images.cocodataset.org/annotations/annotations_trainval2017.zip). The directory structure is as follows:
        > ```
        > .
        > ├── annotations  # annotation jsons
        > ├── train2017    # train dataset
        > └── val2017      # infer dataset
        > ```

    2. Organize the dataset infomation into a TXT file, each row in the file is as follows:

        ```
        train2017/0000001.jpg 0,259,401,459,7 35,28,324,201,2 0,30,59,80,2
        ```

        Each row is an image annotation which split by space, the first column is a relative path of image, the others are box and class infomations of the format [xmin,ymin,xmax,ymax,class]. `dataset.py` is the parsing script, we read image from an image path joined by the `image_dir`(dataset directory) and the relative path in `anno_path`(the TXT file path), `image_dir` and `anno_path` are external inputs.


 ## Running the Example

 ### Training

 To train the model, run `train.py` with the dataset `image_dir`, `anno_path` and `mindrecord_dir`. If the `mindrecord_dir` is empty, it wil generate [mindrecord](https://www.mindspore.cn/tutorial/en/master/use/data_preparation/converting_datasets.html) file by `image_dir` and `anno_path`(the absolute image path is joined by the `image_dir` and the relative path in `anno_path`). **Note if `mindrecord_dir` isn't empty, it will use `mindrecord_dir` rather than `image_dir` and `anno_path`.**

 - Stand alone mode

    ```
    sh run_standalone_train.sh 0 50 ./Mindrecord_train ./dataset ./dataset/train.txt

    ```

    The input variables are device id, epoch size, mindrecord directory path, dataset directory path and train TXT file path.


 - Distributed mode

    ```
    sh run_distribute_train.sh 8 150 /data/Mindrecord_train /data /data/train.txt /data/hccl.json
    ```

    The input variables are device numbers, epoch size, mindrecord directory path, dataset directory path, train TXT file path and [hccl json configuration file](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html). **It is better to use absolute path.**

 You will get the loss value and time of each step as following:

 ```
 epoch: 145 step: 156, loss is 12.202981
 epoch time: 25599.22742843628, per step time: 164.0976117207454
 epoch: 146 step: 156, loss is 16.91706
 epoch time: 23199.971675872803, per step time: 148.7177671530308
 epoch: 147 step: 156, loss is 13.04007
 epoch time: 23801.95164680481, per step time: 152.57661312054364
 epoch: 148 step: 156, loss is 10.431475
 epoch time: 23634.241580963135, per step time: 151.50154859591754
 epoch: 149 step: 156, loss is 14.665991
 epoch time: 24118.8325881958, per step time: 154.60790120638333 
 epoch: 150 step: 156, loss is 10.779521
 epoch time: 25319.57221031189, per step time: 162.30495006610187
 ```

 Note the results is two-classification(person and face) used our own annotations with coco2017, you can change `num_classes` in `config.py` to train your dataset. And we will suport 80 classifications in coco2017 the near future.

 ### Evaluation

 To eval, run `eval.py` with the dataset `image_dir`, `anno_path`(eval txt), `mindrecord_dir` and `ckpt_path`. `ckpt_path` is the path of [checkpoint](https://www.mindspore.cn/tutorial/en/master/use/saving_and_loading_model_parameters.html) file.

 ```
 sh run_eval.sh 0 yolo.ckpt ./Mindrecord_eval ./dataset ./dataset/eval.txt
 ```

 The input variables are device id, checkpoint path, mindrecord directory path, dataset directory path and train TXT file path.

 You will get the precision and recall value of each class:

 ```
 class 0 precision is 88.18%, recall is 66.00%
 class 1 precision is 85.34%, recall is 79.13%
 ```

 Note the precision and recall values are results of two-classification(person and face) used our own annotations with coco2017.


--- a/example/yolov3_coco2017/dataset.py
+++ b/example/yolov3_coco2017/dataset.py
@@ -18,8 +18,8 @@ from __future__ import division

 import os
 import numpy as np
 from PIL import Image
 from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
 from PIL import Image
 import mindspore.dataset as de
 from mindspore.mindrecord import FileWriter
 import mindspore.dataset.transforms.vision.c_transforms as C
--- a/example/yolov3_coco2017/train.py
+++ b/example/yolov3_coco2017/train.py
@@ -90,13 +90,11 @@ if __name__ == '__main__':
    if args_opt.distribute:
        device_num = args_opt.device_num
        context.reset_auto_parallel_context()
        context.set_context(enable_hccl=True)
        context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True,
                                          device_num=device_num)
        init()
        rank = args_opt.device_id % device_num
    else:
        context.set_context(enable_hccl=False)
        rank = 0
        device_num = 1

--- a/mindspore/_akg/init.py
+++ b/mindspore/_akg/init.py
@@ -13,51 +13,6 @@
 # limitations under the License.

 """__init__"""
 from __future__ import absolute_import as _abs
 import sys
 import os

 def AKGAddPath():
    """_akg add path."""
    pwd = os.path.dirname(os.path.realpath(__file__))
    tvm_path = os.path.realpath(pwd)
    if tvm_path not in sys.path:
        sys.path.insert(0, tvm_path)
    else:
        sys.path.remove(tvm_path)
        sys.path.insert(0, tvm_path)


 class AKGMetaPathFinder:
    """class AKGMetaPath finder."""

    def find_module(self, fullname, path=None):
        """method _akg find module."""
        if fullname.startswith("_akg.tvm"):
            rname = fullname[5:]
            return AKGMetaPathLoader(rname)
        if fullname.startswith("_akg.topi"):
            rname = fullname[5:]
            return AKGMetaPathLoader(rname)
        return None


 class AKGMetaPathLoader:
    """class AKGMetaPathLoader loader."""
    def __init__(self, rname):
        self.__rname = rname

    def load_module(self, fullname):
        if self.__rname in sys.modules:
            sys.modules.pop(self.__rname)
        AKGAddPath()
        __import__(self.__rname, globals(), locals())
        self.__target_module = sys.modules[self.__rname]
        sys.modules[fullname] = self.__target_module
        return self.__target_module


 sys.meta_path.insert(0, AKGMetaPathFinder())

 from . import add_path
 from .op_build import op_build
 from .message import compilewithjson
--- a/mindspore/_akg/add_path.py
+++ b/mindspore/_akg/add_path.py
@@ -0,0 +1,61 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 """add tvm path"""
 import sys
 import os


 def AKGAddPath():
    """_akg add path."""
    pwd = os.path.dirname(os.path.realpath(__file__))
    tvm_path = os.path.realpath(pwd)
    if tvm_path not in sys.path:
        sys.path.insert(0, tvm_path)
    else:
        sys.path.remove(tvm_path)
        sys.path.insert(0, tvm_path)


 class AKGMetaPathFinder:
    """class AKGMetaPath finder."""

    def find_module(self, fullname, path=None):
        """method _akg find module."""
        if fullname.startswith("_akg.tvm"):
            rname = fullname[5:]
            return AKGMetaPathLoader(rname)
        if fullname.startswith("_akg.topi"):
            rname = fullname[5:]
            return AKGMetaPathLoader(rname)
        return None


 class AKGMetaPathLoader:
    """class AKGMetaPathLoader loader."""

    def __init__(self, rname):
        self.__rname = rname

    def load_module(self, fullname):
        if self.__rname in sys.modules:
            sys.modules.pop(self.__rname)
        AKGAddPath()
        __import__(self.__rname, globals(), locals())
        self.__target_module = sys.modules[self.__rname]
        sys.modules[fullname] = self.__target_module
        return self.__target_module


 sys.meta_path.insert(0, AKGMetaPathFinder())
--- a/mindspore/_akg/gpu/squeeze_grad.py
+++ b/mindspore/_akg/gpu/squeeze_grad.py
@@ -15,14 +15,14 @@
 """squeeze grad"""
 import _akg.topi as topi

 def SqueezeGrad(y_grad, x_shape, axis=None):

 def SqueezeGrad(y_grad, x_shape):
    """
    Computes gradients for squeeze op.

    Args:
        y_grad (tvm.tensor.Tensor): the gradient needed to be propagation.
        x_shape (Union[list, tuple]): output Tensor shape.
        axis (Union[list, tuple, int, None], optional): eliminated axis by squeeze.

    Returns:
        tvm.tensor.Tensor: output gradient.
--- a/mindspore/_akg/message.py
+++ b/mindspore/_akg/message.py
@@ -46,7 +46,8 @@ def compilewithjson(json_str):
        impl_path = os.path.realpath(kernel_info['impl_path'])
        if os.path.isfile(impl_path):
            custom_mod_name = Path(impl_path).resolve().stem
            mod_spec = importlib.util.spec_from_file_location(custom_mod_name, impl_path)
            mod_spec = importlib.util.spec_from_file_location(
                custom_mod_name, impl_path)
            custom_mod = importlib.util.module_from_spec(mod_spec)
            mod_spec.loader.exec_module(custom_mod)
            op_func = getattr(custom_mod, op_name, None)
@@ -57,7 +58,8 @@ def compilewithjson(json_str):
            op_func = getattr(gpu, op_name, None)

    if op_func is None:
        logging.error("this op not supported, please check op name %s", str(op_name))
        logging.error(
            "this op not supported, please check op name %s", str(op_name))
        return False

    args = {}
@@ -87,25 +89,16 @@ def compilewithjson(json_str):

    output = op_func(**args)

    schedule_func = None
    attrs = {}
    if isinstance(output, (list, tuple)):
        from inspect import isfunction
        tmp_outputs = []
        for elem in output:
            if isfunction(elem):
                schedule_func = elem
            elif isinstance(elem, dict):
                for key, value in elem.items():
                    if key not in attrs or not attrs[key]:
                        attrs[key] = value
            else:
            if not isfunction(elem) or isinstance(elem, dict):
                tmp_outputs.append(elem)

        output = tmp_outputs
    else:
        output = [output]


    tsr = tsr + [i for i in output if TensorUtils.is_output_value(i)]
    return op_build([op_name], output, tsr, schedule_func, processor, kernel_info['op'], attrs)
    return op_build([op_name], output, tsr, processor, kernel_info['op'])
--- a/mindspore/_akg/op_build.py
+++ b/mindspore/_akg/op_build.py
@@ -24,13 +24,13 @@ import _akg
 from _akg import save_gpu_param as gpu_utils
 from _akg.utils import validation_check as vc_util

 MS_CUDA_KERNEL_PATH = "/tmp/cuda_meta/"

@vc_util.check_input_type(list, (list, tuple), (list, tuple), (types.FunctionType, type(None)), str, str, dict)
 def op_build(opnames, computes, args, custom_schedule, device, kernel_name, attrs):
@vc_util.check_input_type(list, (list, tuple), (list, tuple), str, str)
 def op_build(opnames, computes, args, device, kernel_name):
    """op_build"""
    kernel_meta_path = "./cuda_meta_" + str(os.getpid()) + "/"
    if device == "cuda":
        cuda_path = os.path.realpath(MS_CUDA_KERNEL_PATH)
        cuda_path = os.path.realpath(kernel_meta_path)
        if not os.path.isdir(cuda_path):
            os.makedirs(cuda_path)
        if not opnames:
@@ -43,7 +43,7 @@ def op_build(opnames, computes, args, custom_schedule, device, kernel_name, attr
            logging.error("no schedule func found %s", str(schedule_name))
            return None

        ptx_file = os.path.realpath(MS_CUDA_KERNEL_PATH + kernel_name + ".ptx")
        ptx_file = os.path.realpath(kernel_meta_path + kernel_name + ".ptx")
        if os.path.exists(ptx_file):
            os.chmod(ptx_file, 0o600)
        try:
@@ -55,11 +55,12 @@ def op_build(opnames, computes, args, custom_schedule, device, kernel_name, attr
                    foo = _akg.tvm.build(s, args, device, name=kernel_name)
                    ptx_code = foo.imported_modules[0].get_source("ptx")
                    file.write(ptx_code)
                    json_file = os.path.realpath(MS_CUDA_KERNEL_PATH + kernel_name + ".json")
                    json_file = os.path.realpath(
                        kernel_meta_path + kernel_name + ".json")
                    kernel_info = (ptx_code, json_file, kernel_name)
                    gpu_utils.save_gpu_params(s, args, kernel_info)
            os.chmod(ptx_file, 0o400)
        except Exception:
        except IOError:
            logging.error(traceback.format_exc())
            return None
        return True
--- a/mindspore/_akg/ops/math/mean.py
+++ b/mindspore/_akg/ops/math/mean.py
@@ -17,7 +17,7 @@ import _akg.topi
 import _akg.tvm
 from _akg.utils import format_transform as ft_util
 from _akg.utils import validation_check as vc_util
 from _akg.ops.math import sum
 from _akg.ops.math import sum_value


@vc_util.check_input_type(_akg.tvm.tensor.Tensor, (list, tuple, int, type(None)), (bool, type(None)))
@@ -41,7 +41,7 @@ def mean(data, axis=None, keepdims=False):
    count = 1
    for i in axis:
        count *= shape[i]
    output, _ = sum.sum_value(data, axis, keepdims)
    output, _ = sum_value.sum_value(data, axis, keepdims)
    res = _akg.topi.divide(output, count)

    return res
--- a/mindspore/_akg/ops/math/sum_value.py
+++ b/mindspore/_akg/ops/math/sum_value.py
--- a/mindspore/_checkparam.py
+++ b/mindspore/_checkparam.py
@@ -128,7 +128,7 @@ class Validator:

    @staticmethod
    def check_number(arg_name, arg_value, value, rel, prim_name):
        """Integer value judgment."""
        """Number value judgment."""
        rel_fn = Rel.get_fns(rel)
        if not rel_fn(arg_value, value):
            rel_str = Rel.get_strs(rel).format(value)
@@ -210,7 +210,7 @@ class Validator:
                type_names = []
                for t in valid_values:
                    type_names.append(str(t))
                types_info = '[' + ", ".join(type_names) + ']'
                types_info = '[' + ', '.join(type_names) + ']'
                raise TypeError(f'For \'{prim_name}\' type of `{arg_key}` should be in {types_info},'
                                f' but got {elem_type}.')
            return (arg_key, elem_type)
@@ -653,30 +653,6 @@ def check_output_data(data):
        raise RuntimeError('Executor return data ' + str(data) + ', please check your net or input data.')


 def check_axis_type_int(axis):
    """Check axis type."""
    if not isinstance(axis, int):
        raise TypeError('Wrong type for axis, should be int.')


 def check_axis_range(axis, rank):
    """Check axis range."""
    if not -rank <= axis < rank:
        raise ValueError('The axis should be in range [{}, {}),'' but got {}.'.format(-rank, rank, axis))


 def check_attr_int(attr_name, attr):
    """Check int type."""
    if not isinstance(attr, int):
        raise TypeError("The attr {} should be int, but got {}.".format(attr_name, type(attr)))


 def check_t_in_range(t):
    """Check input range."""
    if t not in (mstype.float16, mstype.float32, mstype.float64, mstype.int32, mstype.int64):
        raise ValueError("The param T should be (float16, float32, float64, int32, int64).")


 once = _expand_tuple(1)
 twice = _expand_tuple(2)
 triple = _expand_tuple(3)
--- a/mindspore/_extends/builtin_operations.py
+++ b/mindspore/_extends/builtin_operations.py
@@ -86,7 +86,7 @@ def identity(x):
 def zeros_like_tensor(x):
    """Implement `zeros_like_tensor`."""
    x = x.asnumpy()
    value = Tensor(np.zeros(x.shape))
    value = Tensor(np.zeros(x.shape).astype(np.float32))
    return value


--- a/mindspore/_extends/parallel_compile/tbe_compiler/common.py
+++ b/mindspore/_extends/parallel_compile/tbe_compiler/common.py
@@ -122,10 +122,12 @@ def get_args(op_info, arg_type):

    elif arg_type == 'attrs':
        for item in op_info[arg_type]:
            if 'value' not in item:
                raise ValueError("Json string Errors, attr key:value not found.")
            if item["name"] != "isRef":
                args.append(item['value'])
            if item["valid"]:
                if 'value' not in item:
                    raise ValueError("Json string Errors, attr key:value not found.")
                if item["name"] != "isRef":
                    args.append(item['value'])

    return args


--- a/mindspore/_extends/parse/init.py
+++ b/mindspore/_extends/parse/init.py
@@ -18,15 +18,15 @@ Interfaces for parser module in c++.

 from .parser import (Parser, create_obj_instance, generate_scope,
                     get_bprop_method_of_class, get_class_instance_type,
                     get_class_member_namespace_symbol,
                     get_class_member_namespace_symbol, create_slice_obj,
                     get_dataclass_attributes, get_dataclass_methods,
                     get_module_namespace, get_obj_type, get_object_key,
                     get_parse_method_of_class, get_scope_name,
                     is_class_member, parse_cb, resolve_symbol)
                     is_class_member, parse_cb, resolve_symbol, create_ellipsis_obj)
 from .serialize import *

 __all__ = ['parse_cb', 'get_parse_method_of_class', 'get_bprop_method_of_class', 'resolve_symbol',
           'get_object_key', 'get_class_instance_type', 'is_class_member', 'get_obj_type',
           'create_obj_instance', 'get_module_namespace', 'get_class_member_namespace_symbol',
           'Parser', 'get_dataclass_attributes', 'get_dataclass_methods', 'dump_obj', 'load_obj',
           'get_dataclass_methods', 'get_scope_name']
           'get_dataclass_methods', 'get_scope_name', 'create_slice_obj', 'create_ellipsis_obj']
--- a/mindspore/_extends/parse/parser.py
+++ b/mindspore/_extends/parse/parser.py
@@ -29,6 +29,7 @@ from mindspore.common.dtype import pytype_to_dtype
 from mindspore.common.api import _MindSporeFunction
 from .namespace import CellNamespace, ClosureNamespace, ClassMemberNamespace
 from .resources import parse_object_map, convert_object_map, trope_ns, SYMBOL_UNDEFINE, NO_IMPLEMENT
 from ..utils import Slice, Ellipsis_

 # define return value
 RET_SUCCESS = 0
@@ -69,6 +70,15 @@ parse_expr_statement_white_list = (
    "append",
 )

 def create_ellipsis_obj():
    """Create Slice object"""
    return Ellipsis_()


 def create_slice_obj(start, end, step):
    """Create Slice object"""
    return Slice(start, end, step)


 def parse_cb(func, parse_method=None):
    """Implements the function of parse."""
--- a/mindspore/_extends/utils.py
+++ b/mindspore/_extends/utils.py
@@ -19,6 +19,7 @@ import logging
 import os
 import inspect
 from functools import wraps
 from dataclasses import dataclass


 def cal_sha256(file_path):
@@ -99,3 +100,20 @@ def cell_attr_register(fn=None, attrs=None):
    if fn is not None:
        return wrap_cell(fn)
    return wrap_cell


@dataclass
 class Slice:
    """
    Slice class
    """
    start: int
    end: int
    step: int


@dataclass
 class Ellipsis_:
    """
    Ellipsis class
    """
--- a/mindspore/ccsrc/CMakeLists.txt
+++ b/mindspore/ccsrc/CMakeLists.txt
@@ -1,14 +1,11 @@
 ## common setting
 include_directories(${CMAKE_CURRENT_SOURCE_DIR})
 if(ENABLE_CPU)
    include(ExternalProject)
    add_compile_definitions(CPUSESSION)
    file(GLOB_RECURSE CPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
    "device/cpu/*.cc"
    )
    if (CMAKE_SYSTEM_NAME MATCHES "Windows")
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-attributes -DHAVE_SNPRINTF")
        add_compile_definitions(BUILDING_DLL)
    endif()
 include_directories(${CMAKE_BINARY_DIR})
 link_directories(${CMAKE_SOURCE_DIR}/build/mindspore/graphengine)

 if (CMAKE_SYSTEM_NAME MATCHES "Windows")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-attributes -DHAVE_SNPRINTF")
    add_compile_definitions(BUILDING_DLL)
 endif()

 if(ENABLE_GPU)
@@ -20,7 +17,7 @@ if(ENABLE_GPU)
    enable_language(CUDA)
    if(NOT CUDA_PATH OR CUDA_PATH STREQUAL "")
        if(DEFINED ENV{CUDA_HOME})
            set(CUDA_PATH $ENV{CUDA_HOME})                
            set(CUDA_PATH $ENV{CUDA_HOME})
        else()
            set(CUDA_PATH ${CUDA_TOOLKIT_ROOT_DIR})
        endif()
@@ -41,261 +38,101 @@ if(ENABLE_GPU)
            "kernel/akg/akgkernelbuild.cc"
            "kernel/akg/akg_kernel_attrs_process.cc"
            )
    file(GLOB_RECURSE GPU_KERNEL_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
            "kernel/gpu/*.cc"
            )

    list(APPEND CUDA_NVCC_FLAGS -arch=sm_53)
    list(REMOVE_ITEM GPU_SRC_LIST "device/gpu/blocking_queue.cc" "device/gpu/gpu_buffer_mgr.cc")
    add_library(gpu_queue SHARED "device/gpu/blocking_queue.cc" "device/gpu/gpu_buffer_mgr.cc")
    target_link_libraries(gpu_queue ${CMAKE_THREAD_LIBS_INIT} ${CUDA_PATH}/lib64/libcudart.so)


    file(GLOB_RECURSE MS_STEPS_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
        "session/gpu_session.cc"
        )
    list(REMOVE_ITEM GPU_SRC_LIST "device/gpu/mpi/mpi_initializer.cc"
                                  "device/gpu/distribution/collective_wrapper.cc"
                                  "device/gpu/distribution/mpi_wrapper.cc"
                                  "device/gpu/distribution/nccl_wrapper.cc"
                                  )
    list(REMOVE_ITEM GPU_KERNEL_SRC_LIST "device/gpu/mpi/mpi_initializer.cc"
            "kernel/gpu/nccl/nccl_gpu_kernel.cc"
            )

    set(NVCC_TMP_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
    string(REPLACE "-std=c++17" "-std=c++11" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
    cuda_add_library(gpu_cuda_lib STATIC ${GPU_SRC_LIST})
    set(CMAKE_CXX_FLAGS ${NVCC_TMP_CMAKE_CXX_FLAGS})
 endif ()

    if(ENABLE_MPI)
      include(ExternalProject)

      file(GLOB_RECURSE GPU_NCCL_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
              "kernel/gpu/nccl/*.cc"
              )
      file(GLOB_RECURSE GPU_MPI_PYTHON_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
              "device/gpu/mpi/mpi_initializer.cc"
              )
      add_library(gpu_collective SHARED "device/gpu/distribution/collective_wrapper.cc"
                                        "device/gpu/distribution/mpi_wrapper.cc"
                                        "device/gpu/distribution/nccl_wrapper.cc"
                                        )
    endif()
 endif()

 ## make flatuffer files
 include_directories("${CMAKE_BINARY_DIR}/predict/schema/inner")
 file(GLOB_RECURSE FLATBUFFER_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/schema/*.fbs")
 set(FLATBUFFER_OU "${CMAKE_BINARY_DIR}/predict/schema/inner")
 ms_build_flatbuffers("${FLATBUFFER_IN}" "${FLATBUFFER_IN}" GENERATED_OUTPUT_DIR "${FLATBUFFER_OU}")
 ms_build_flatbuffers("${FLATBUFFER_IN}" "${FLATBUFFER_IN}" flat_input "${FLATBUFFER_OU}")

 file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
        "ir/*.cc"
        "ir/dtype/*.cc"
        "utils/context/ms_context.cc"
        "utils/symbolic.cc"
        "utils/tensorprint_utils.cc"
        "utils/convert_utils.cc"
        "utils/graph_utils.cc"
        "utils/misc.cc"
        "utils/callbacks.cc"
        "utils/profile.cc"
        "utils/base_ref.cc"
        "utils/summary/event_writer.cc"
        "utils/log_adapter.cc"
        "utils/comm_manager.cc"
        "utils/any.cc"
        "utils/config_manager.cc"
        "utils/system/file_system.cc"
        "utils/system/crc32c.cc"
        "common/*.cc"
        "parallel/*.cc"
        "pipeline/pipeline.cc"
        "pipeline/resource.cc"
        "pipeline/pass.cc"
        "pipeline/action.cc"
        "pipeline/validator.cc"
        "pipeline/remove_value_node_dup.cc"
        "pipeline/parse/*.cc"
        "pipeline/static_analysis/*.cc"
        "optimizer/*.cc"
        "debug/*.cc"
        "onnx/onnx_exporter.cc"
        "operator/*.cc"
        "session/kernel_graph.cc"
        "utils/node_utils.cc"
        "session/session_basic.cc"
        "session/session_factory.cc"
        "session/anf_runtime_algorithm.cc"
        "vm/*.cc"
        "pynative/base.cc"
        "pynative/pynative_execute.cc"
        "pybind_api/*.cc"
        "device/common/*.cc"
        "kernel/kernel_query.cc"
        "kernel/kernel_build_info.cc"
        "kernel/kash/*.cc"
        "device/kernel_info.cc"
        "device/kernel_runtime.cc"
        "device/memory_manager.cc"
        "device/kernel_runtime_manager.cc"
        "device/convert_tensor_utils.cc"
        "pre_activate/common/*.cc"
        "pre_activate/pass/*.cc"
        "pre_activate/gpu/*.cc"
        "pre_activate/mem_reuse/*.cc"
        "predict/predict.cc"
        "predict/generator/utils/ir_model_util.cc"
        "predict/converter/*.cc"
        "predict/converter/attr_utils/*.cc"
        "predict/converter/lite_model/*.cc"
        "predict/converter/lite_model/operations/*.cc"
        "kernel/common_utils.cc"
        "kernel/oplib/*.cc"
        "kernel/kash/*.cc"
        "device/gpu/distribution/collective_init.cc"
        )
 if (ENABLE_CPU)
    list(REMOVE_ITEM MINDSPORE_SRC_LIST "device/gpu/distribution/collective_init.cc")
    if (${CMAKE_SYSTEM_NAME} MATCHES "Windows")
        list(REMOVE_ITEM MINDSPORE_SRC_LIST "kernel/kernel_query.cc")
    endif()
 endif()
 if (NOT ENABLE_GPU)
    list(APPEND MINDSPORE_SRC_LIST "device/gpu/distribution/collective_fake_init.cc")
 endif()
 file(GLOB_RECURSE MEM_REUSE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
        "pre_activate/mem_reuse/*.cc"
        )
 if(NOT ENABLE_DUMP_E2E)
    list(REMOVE_ITEM MINDSPORE_SRC_LIST "debug/e2e_dump.cc")
 endif()
 file(COPY "${ms_onnx_INC}/onnx/onnx.proto" DESTINATION ${CMAKE_CURRENT_SOURCE_DIR})
 file(GLOB_RECURSE ONNX_PROTO RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "${CMAKE_CURRENT_SOURCE_DIR}/onnx.proto")
 message("onnx proto path is : ${ONNX_PROTO}")
 ## make protobuf files
 file(COPY "${ms_onnx_INC}/onnx/onnx.proto" DESTINATION ${CMAKE_BINARY_DIR}/proto)
 file(GLOB ONNX_PROTO "" ${CMAKE_BINARY_DIR}/proto/onnx.proto)
 message("onnx proto path is :" ${ONNX_PROTO})
 ms_protobuf_generate(ONNX_PROTO_SRCS ONNX_PROTO_HDRS ${ONNX_PROTO})
 list(APPEND MINDSPORE_PROTO_LIST ${ONNX_PROTO_SRCS})

 if(ENABLE_DUMP_PROTO)
 if (ENABLE_DUMP_PROTO)
    include_directories(${CMAKE_BINARY_DIR})
    file(GLOB_RECURSE PROTO_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
            "utils/node_strategy.proto"
            )
    file(GLOB_RECURSE PROTO_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "utils/node_strategy.proto")
    ms_protobuf_generate(PROTO_SRCS PROTO_HDRS ${PROTO_LIST})

    file(GLOB_RECURSE PROTO_PY RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
            "utils/anf_ir.proto"
            "utils/summary.proto"
            "utils/checkpoint.proto"
            )
        "utils/anf_ir.proto"
        "utils/summary.proto"
        "utils/checkpoint.proto"
    )
    ms_protobuf_generate_py(PY_SRCS PY_HDRS PY_PYS ${PROTO_PY})

    list(APPEND MINDSPORE_PROTO_DUMP_LIST ${PROTO_SRCS})
    list(APPEND MINDSPORE_PROTO_DUMP_LIST ${PY_SRCS})
    list(APPEND MINDSPORE_SRC_LIST "debug/dump_proto.cc")
    list(APPEND MINDSPORE_SRC_LIST "parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc")
    add_compile_definitions(ENABLE_DUMP_PROTO)
 endif()

 if(ENABLE_GE)
    file(GLOB_RECURSE GE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
            "transform/*.cc"
            "pynative/pynative_execute_ge.cc"
            "utils/callbacks_ge.cc"
            "pipeline/pipeline_ge.cc"
            )
    list(APPEND MINDSPORE_SRC_LIST ${GE_SRC_LIST})
 endif()
    list(APPEND MINDSPORE_PROTO_LIST ${PROTO_SRCS})
    list(APPEND MINDSPORE_PROTO_LIST ${PY_SRCS})
 endif ()

 if(ENABLE_D)
 if (ENABLE_D)
    include_directories("${CMAKE_BINARY_DIR}/kernel/aicpu")
    file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
            "kernel/aicpu/proto/*.proto"
            )
    ms_protobuf_generate(PROTOSRCS PROTOHDRS ${PROTO_IN})

    include_directories("${CMAKE_BINARY_DIR}/predict/generator/ir")
    file(GLOB_RECURSE PROTO_INNER RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
            "predict/proto/*.proto"
            )
    file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "kernel/aicpu/proto/*.proto")
    ms_protobuf_generate(PROTOSRCS PROTOHDRS ${PROTO_IN})
    
    file(GLOB_RECURSE PROTO_INNER RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/proto/*.proto")
    ms_protobuf_generate(PREDICT_PROTOSRCS PREDICT_PROTOHDRS ${PROTO_INNER})

    file(GLOB_RECURSE D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
            "device/ascend/*.cc"
            "device/ascend/profiling/*.cc"
            "device/ascend/tasksink/*.cc"
            "device/kernel_adjust.cc"
            "kernel/kernel_fusion.cc"
            "kernel/tbe/*.cc"
            "pre_activate/ascend/*.cc"
            "transform/*.cc"
            "pipeline/pipeline_ge.cc"
            )
    list(APPEND MINDSPORE_SRC_LIST ${D_SRC_LIST})
    list(APPEND MINDSPORE_PROTO_AICPU_LIST ${PROTOSRCS})
    list(APPEND MINDSPORE_PROTO_PREDICT_LIST ${PREDICT_PROTOSRCS})
    list(APPEND MINDSPORE_PROTO_LIST ${PROTOSRCS})
    list(APPEND MINDSPORE_PROTO_LIST ${PREDICT_PROTOSRCS})

    file(GLOB_RECURSE MS_STEPS_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
        "session/ascend_session.cc"
        )
    file(GLOB_RECURSE MS_TASKINFO_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
        "device/ascend/tasksink/taskinfo/*.cc")
    file(GLOB_RECURSE MS_AICPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
        "kernel/aicpu/*.cc"
        )
    file(GLOB_RECURSE MS_RT_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
        "kernel/mng/*.cc"
        )
    file(GLOB_RECURSE MS_HCCL_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
        "kernel/hccl/*.cc"
        )
    file(GLOB_RECURSE MS_PREDICT_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
        "predict/generator/ir/*.cc"
        )
    add_compile_definitions(ENABLE_D)
 endif()

 file(GLOB_RECURSE MS_GVAR_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
        "gvar/*.cc"
        )

 add_library(mindspore_gvar SHARED ${MS_GVAR_SRC_LIST})
 add_library(mindspore STATIC ${MINDSPORE_SRC_LIST})
 add_dependencies(mindspore GENERATED_OUTPUT_DIR)
 endif ()

 if(ENABLE_D)
    list(APPEND MINDSPORE_PROTO_LIST ${MINDSPORE_PROTO_AICPU_LIST})
 endif()
 if(ENABLE_DUMP_PROTO)
    list(APPEND MINDSPORE_PROTO_LIST ${MINDSPORE_PROTO_DUMP_LIST})
 endif()
 list(APPEND MINDSPORE_PROTO_LIST ${MINDSPORE_PROTO_PREDICT_LIST})
 if(MINDSPORE_PROTO_LIST)
 if (MINDSPORE_PROTO_LIST)
    add_library(proto_input STATIC ${MINDSPORE_PROTO_LIST})
    set_target_properties(proto_input PROPERTIES COMPILE_FLAGS "-Wno-unused-variable")
    target_link_libraries(mindspore proto_input)
 endif()

 if(APPLE)
    set_target_properties(mindspore_gvar PROPERTIES MACOSX_RPATH ON)
 endif()
 ## make sub objects
 set(SUB_COMP 
    transform pre_activate parallel pipeline device kernel common debug gvar ir onnx operator optimizer predict
    pybind_api pynative session utils vm
 )

 link_directories(${CMAKE_SOURCE_DIR}/build/mindspore/graphengine)
 foreach (_comp ${SUB_COMP})
    add_subdirectory(${_comp})
    if (TARGET _mindspore_${_comp}_obj)
        list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_${_comp}_obj>)
        add_dependencies(_mindspore_${_comp}_obj proto_input flat_input)
    endif ()
 endforeach ()

 add_library(mindspore STATIC ${SUB_OBJECTS_SRC})
 target_link_libraries(mindspore proto_input)
 target_link_libraries(mindspore securec mindspore::flatbuffers)
 if (NOT WIN32)
  target_link_libraries(mindspore dl)
 endif()

 if (ENABLE_GE)
    if(ENABLE_TRAIN)
        target_link_libraries(mindspore graph ge_client_train)
    else()
        target_link_libraries(mindspore graph ge_client)
    endif()
    target_link_libraries(mindspore tsdclient)
 elseif(ENABLE_D)
    add_compile_definitions(NO_GE_CLIENT)
    target_link_libraries(mindspore graph)
 else()
    add_compile_definitions(NO_GE_CLIENT)
        target_link_libraries(mindspore ge_client_train)
    else ()
        target_link_libraries(mindspore ge_client)
    endif ()
    target_link_libraries(mindspore graph tsdclient)
 endif()

 if(ENABLE_D)
 if (ENABLE_D)
    if (DEFINED ENV{D_LINK_PATH})
        if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
            MESSAGE("system processor matches aarch64")
@@ -306,13 +143,13 @@ if(ENABLE_D)
        else ()
            MESSAGE("system ${CMAKE_HOST_SYSTEM_PROCESSOR} not support")
        endif()
    else()
    else ()
        MESSAGE("use system default lib")
        if(DEFINED ENV{ASCEND_CUSTOM_PATH})
        if (DEFINED ENV{ASCEND_CUSTOM_PATH})
            set(ASCEND_PATH $ENV{ASCEND_CUSTOM_PATH})
        else()
        else ()
            set(ASCEND_PATH /usr/local/Ascend)
        endif()
        endif ()
        set(ASCEND_DRIVER_PATH ${ASCEND_PATH}/driver/lib64/common)
        set(ASCEND_DRIVER_BACK_PATH ${ASCEND_PATH}/driver/lib64/driver)
        set(ASCEND_RUNTIME_PATH ${ASCEND_PATH}/fwkacllib/lib64)
@@ -327,37 +164,14 @@ if(ENABLE_D)
    target_link_libraries(mindspore ge_runtime ${CCE_LIB} ${RUNTIME_LIB} ${TSDCLIENT} ${PROFILING} ${HCCL} ${TSDCLIENT})
 endif()

 target_link_libraries(mindspore securec)
 if (NOT WIN32)
  target_link_libraries(mindspore dl)
 endif()
 target_link_libraries(mindspore mindspore::flatbuffers)
 # link protobuf
 if (ENABLE_D)
    target_link_libraries(mindspore mindspore::protobuf)
 endif()

 if (${CMAKE_SYSTEM_NAME} MATCHES "Windows")
    target_link_libraries(mindspore ${PYTHON_LIBRARIES} mindspore_gvar)
 endif()

 # set c_expression building
 if (${CMAKE_SYSTEM_NAME} MATCHES "Windows")
    set(PYTHON_MODULE_SOURCE ${MS_GVAR_SRC_LIST}
        pipeline/init.cc
        kernel/oplib/oplib.cc
        ${MINDSPORE_SRC_LIST} ${MS_STEPS_SRC_LIST} ${MS_CCE_SRC_LIST} ${MS_AICPU_SRC_LIST} ${MS_TASKINFO_LIST} ${MS_RT_SRC_LIST}
        ${GPU_NCCL_LIST} ${MS_HCCL_SRC_LIST} ${MS_PREDICT_SRC_LIST} ${CPU_SRC_LIST} ${MEM_REUSE_SRC_LIST} ${GPU_KERNEL_SRC_LIST})
 else()
    set(PYTHON_MODULE_SOURCE
        pipeline/init.cc
        kernel/oplib/oplib.cc
        ${MS_STEPS_SRC_LIST} ${MS_CCE_SRC_LIST} ${MS_AICPU_SRC_LIST} ${MS_TASKINFO_LIST} ${MS_RT_SRC_LIST}
        ${GPU_NCCL_LIST} ${MS_HCCL_SRC_LIST} ${MS_PREDICT_SRC_LIST} ${CPU_SRC_LIST} ${MEM_REUSE_SRC_LIST} ${GPU_KERNEL_SRC_LIST})
 endif()

 set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
 pybind11_add_module(_c_expression ${PYTHON_MODULE_SOURCE})
 pybind11_add_module(_c_expression "pipeline/init.cc")

 MESSAGE(STATUS "operation system is ${CMAKE_SYSTEM}")
 if (CMAKE_SYSTEM_NAME MATCHES "Linux")
@@ -372,55 +186,41 @@ else ()
    MESSAGE(FATAL_ERROR "other platform: ${CMAKE_SYSTEM_NAME}")
 endif ()


 set(ORIGIN_PATH ${ORIGIN_PATH}/lib)
 set_target_properties(_c_expression PROPERTIES INSTALL_RPATH ${ORIGIN_PATH})
 if (WIN32)
    target_link_libraries(_c_expression PRIVATE
            mindspore::pybind11_module
            securec
            proto_input
            mindspore::flatbuffers
            )
 else()
    target_link_libraries(_c_expression PRIVATE
            mindspore::pybind11_module
            mindspore
            mindspore_gvar
            )
 endif()

 if(USE_GLOG)
 if (CMAKE_SYSTEM_NAME MATCHES "Windows")
    target_link_libraries(mindspore mindspore::pybind11_module)
    target_link_libraries(mindspore mindspore_gvar)
    target_link_libraries(_c_expression PRIVATE -Wl,--whole-archive mindspore -Wl,--no-whole-archive)
 else ()
    target_link_libraries(_c_expression PRIVATE -Wl,--whole-archive mindspore -Wl,--no-whole-archive)
    target_link_libraries(_c_expression PRIVATE mindspore::pybind11_module)
    target_link_libraries(_c_expression PRIVATE mindspore_gvar)
 endif ()

 if (USE_GLOG)
    target_link_libraries(_c_expression PRIVATE mindspore::glog)
 endif()
 endif ()

 if(ENABLE_DUMP_PROTO)
 if (ENABLE_DUMP_PROTO)
    target_link_libraries(_c_expression PRIVATE mindspore::protobuf)
 endif()
 endif ()

 if(ENABLE_GPU)
 if (ENABLE_GPU)
    message("add gpu lib to c_expression")
    target_link_libraries(_c_expression PRIVATE
                          gpu_cuda_lib
                          gpu_queue
                          cublas
    target_link_libraries(_c_expression PRIVATE gpu_cuda_lib gpu_queue cublas
                          ${CUDA_PATH}/lib64/libcurand.so
                          ${CUDNN_PATH}/lib64/libcudnn.so
                          ${CUDA_PATH}/lib64/libcudart.so
                          ${CUDA_PATH}/lib64/stubs/libcuda.so)
    if(ENABLE_MPI)
        pybind11_add_module(_ms_mpi ${GPU_MPI_PYTHON_LIST})
        target_link_libraries(_ms_mpi PRIVATE mindspore::pybind11_module mindspore::ompi)
        target_link_libraries(gpu_collective PRIVATE mindspore::ompi mindspore::nccl)
    endif()
 endif()

 endif ()

 if(ENABLE_CPU)
 if (ENABLE_CPU)
    target_link_libraries(_c_expression PRIVATE mindspore::dnnl mindspore::mkldnn)
 endif()
 endif ()

 if(ENABLE_MINDDATA)
 if (ENABLE_MINDDATA)
    add_subdirectory(mindrecord)
    add_subdirectory(dataset)
 endif()
 endif ()
--- a/mindspore/ccsrc/common/CMakeLists.txt
+++ b/mindspore/ccsrc/common/CMakeLists.txt
@@ -1,2 +1,2 @@

 add_library(_mindspore_common_obj OBJECT ${CMAKE_CURRENT_SOURCE_DIR}/*.cc)
 file(GLOB_RECURSE _COMMON_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 add_library(_mindspore_common_obj OBJECT ${_COMMON_ALL_SRC_FILES})
--- a/mindspore/ccsrc/common/trans.cc
+++ b/mindspore/ccsrc/common/trans.cc
@@ -101,13 +101,20 @@ const std::map<std::pair<TypeId, TypeId>, DataTypeTransMode> mode_map{
  {std::pair<TypeId, TypeId>(kNumberTypeInt64, kNumberTypeInt32), FROM_INT64_TO_INT32},
  {std::pair<TypeId, TypeId>(kNumberTypeUInt16, kNumberTypeInt32), FROM_UINT16_TO_INT32}};

 template <typename SrcT, typename DstT>
 void TransDataSrc2Dst(const TypeIdArgs &args, void *dst, const size_t data_size) {
  auto src_id = TypeIdSize(args.src_type);
  auto dst_id = TypeIdSize(args.dst_type);
  if (args.src_size / src_id != args.src_shape_size || args.dst_size / dst_id != args.dst_shape_size) {
 void CheckMemSize(const TypeIdArgs &args) {
  auto src_type_size = TypeIdSize(args.host_data_type);
  auto dst_type_size = TypeIdSize(args.device_data_type);
  if (src_type_size < 1 || dst_type_size < 1) {
    MS_LOG(EXCEPTION) << "Invalid src or dst data type.";
  }
  if (args.data_size / src_type_size != args.host_shape_size) {
    MS_LOG(EXCEPTION) << "Invalid src or dst data size.";
  }
 }

 template <typename SrcT, typename DstT>
 void TransDataSrc2Dst(const TypeIdArgs &args, void *dst, const size_t data_size) {
  CheckMemSize(args);
  for (size_t idx = 0; idx != data_size; idx++) {
    SrcT src_data = static_cast<const SrcT *>(args.data)[idx];
    static_cast<DstT *>(dst)[idx] = static_cast<DstT>(src_data);
@@ -116,11 +123,7 @@ void TransDataSrc2Dst(const TypeIdArgs &args, void *dst, const size_t data_size)

 template <typename SrcT>
 void TransDataSrc2Fp16(const TypeIdArgs &args, void *dst, const size_t data_size) {
  auto src_id = TypeIdSize(args.src_type);
  auto dst_id = TypeIdSize(args.dst_type);
  if (args.src_size / src_id != args.src_shape_size || args.dst_size / dst_id != args.dst_shape_size) {
    MS_LOG(EXCEPTION) << "Invalid src or dst data size.";
  }
  CheckMemSize(args);
  auto src_data = static_cast<const SrcT *>(args.data);
  auto half_data = static_cast<Eigen::half *>(dst);
  for (size_t i = 0; i < data_size; i++) {
@@ -394,27 +397,18 @@ bool CheckArgs(const FormatArgs &args, size_t *size, size_t *total_size) {
 }

 bool TransDataType(const TypeIdArgs &args, void *result) {
  MS_LOG(DEBUG) << "Begin trans datatype from " << TypeIdLabel(args.src_type) << " to " << TypeIdLabel(args.dst_type);
  MS_LOG(DEBUG) << "Begin trans datatype from " << TypeIdLabel(args.host_data_type) << " to "
                << TypeIdLabel(args.device_data_type);
  MS_EXCEPTION_IF_NULL(result);
  std::pair<TypeId, TypeId> type_info(args.src_type, args.dst_type);
  std::pair<TypeId, TypeId> type_info(args.host_data_type, args.device_data_type);
  auto iter = mode_map.find(type_info);
  if (iter == mode_map.end()) {
    MS_LOG(ERROR) << "Unsupported datatype trans. src_type :" << TypeIdLabel(args.src_type)
                  << ", dst_type:" << TypeIdLabel(args.dst_type);
    MS_LOG(ERROR) << "Unsupported datatype trans. src_type :" << TypeIdLabel(args.host_data_type)
                  << ", dst_type:" << TypeIdLabel(args.device_data_type);
    return false;
  }
  auto trans_mode = iter->second;
  auto src_id = TypeIdSize(args.src_type);
  auto dst_id = TypeIdSize(args.dst_type);
  if (src_id < 1 || dst_id < 1) {
    MS_LOG(ERROR) << "Invalid src or dst data type.";
    return false;
  }
  if (args.src_size / src_id != args.src_shape_size || args.dst_size / dst_id != args.dst_shape_size) {
    MS_LOG(ERROR) << "Invalid src or dst data size.";
    return false;
  }
  if (!CastKernel(args, result, args.dst_shape_size, trans_mode)) {
  if (!CastKernel(args, result, args.host_shape_size, trans_mode)) {
    MS_LOG(ERROR) << "Failed to trans datatype..";
    return false;
  }
--- a/mindspore/ccsrc/common/trans.h
+++ b/mindspore/ccsrc/common/trans.h
@@ -31,12 +31,10 @@ namespace mindspore {
 namespace trans {
 struct TypeIdArgs {
  const void *data;
  size_t src_size;
  size_t dst_size;
  TypeId src_type;
  TypeId dst_type;
  size_t src_shape_size;
  size_t dst_shape_size;
  size_t host_shape_size;  // Multiply each dimension elements. [a, b, c, d] => a*b*c*d
  TypeId host_data_type;
  TypeId device_data_type;
  size_t data_size;
 };

 struct FormatArgs {
--- a/mindspore/ccsrc/dataset/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/CMakeLists.txt
@@ -74,7 +74,6 @@ else ()
    add_library(_c_dataengine SHARED ${submodules})
 endif ()


 set_target_properties(_c_dataengine PROPERTIES
    PREFIX "${PYTHON_MODULE_PREFIX}"
    SUFFIX "${PYTHON_MODULE_EXTENSION}"
@@ -113,5 +112,10 @@ endif()

 if (USE_GLOG)
    target_link_libraries(_c_dataengine PRIVATE mindspore::glog)
 else()
    if (CMAKE_SYSTEM_NAME MATCHES "Linux")
        target_link_options(_c_dataengine PRIVATE -Wl,-init,mindspore_log_init)
    elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin")
        set_target_properties(_c_dataengine PROPERTIES MACOSX_RPATH ON)
    endif ()
 endif()

--- a/mindspore/ccsrc/dataset/api/de_pipeline.cc
+++ b/mindspore/ccsrc/dataset/api/de_pipeline.cc
@@ -28,6 +28,7 @@
 #include "dataset/engine/datasetops/source/manifest_op.h"
 #include "dataset/engine/datasetops/source/cifar_op.h"
 #include "dataset/engine/datasetops/source/celeba_op.h"
 #include "dataset/engine/datasetops/source/random_data_op.h"
 #include "dataset/engine/datasetops/source/text_file_op.h"
 #include "dataset/engine/datasetops/filter_op.h"
 #include "mindrecord/include/shard_category.h"
@@ -65,6 +66,7 @@ static std::unordered_map<uint32_t, pFunction> g_parse_op_func_ = {{kStorage, &D
                                                                   {kCifar10, &DEPipeline::ParseCifar10Op},
                                                                   {kCifar100, &DEPipeline::ParseCifar100Op},
                                                                   {kCelebA, &DEPipeline::ParseCelebAOp},
                                                                   {kRandomData, &DEPipeline::ParseRandomDataOp},
                                                                   {kTextFile, &DEPipeline::ParseTextFileOp}};

 DEPipeline::DEPipeline() : iterator_(nullptr) {
@@ -972,6 +974,45 @@ Status DEPipeline::ParseCifar100Op(const py::dict &args, std::shared_ptr<Dataset
  return Status::OK();
 }

 Status DEPipeline::ParseRandomDataOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
  // Required arguments
  RandomDataOp::Builder builder;

  if (args["num_samples"].is_none()) {
    std::string err_msg = "Error: num_samples is a required argument";
    RETURN_STATUS_UNEXPECTED(err_msg);
  }
  std::vector<std::string> columns_to_load;
  bool schema_exists = false;
  // Optional arguments
  for (auto arg : args) {
    std::string key = py::str(arg.first);
    py::handle value = arg.second;
    if (key == "num_parallel_workers") {
      (void)builder.SetNumWorkers(ToInt(value));
    } else if (key == "schema_file_path" || key == "schema_json_string") {
      schema_exists = true;
    } else if (key == "num_samples") {
      (void)builder.SetTotalRows(ToInt(value));
    } else if (key == "columns_list") {
      columns_to_load = ToStringVector(value);
    }
  }
  if (schema_exists) {
    std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
    if (args.contains("schema_file_path")) {
      RETURN_IF_NOT_OK(schema->LoadSchemaFile(ToString(args["schema_file_path"]), columns_to_load));
    } else {
      RETURN_IF_NOT_OK(schema->LoadSchemaString(ToString(args["schema_json_string"]), columns_to_load));
    }
    (void)builder.SetDataSchema(std::move(schema));
  }
  std::shared_ptr<RandomDataOp> op;
  RETURN_IF_NOT_OK(builder.Build(&op));
  *ptr = op;
  return Status::OK();
 }

 int32_t DEPipeline::GetNumClasses() const { return num_classes_; }

 Status DEPipeline::ParseMnistOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
--- a/mindspore/ccsrc/dataset/api/de_pipeline.h
+++ b/mindspore/ccsrc/dataset/api/de_pipeline.h
@@ -60,6 +60,7 @@ enum OpName {
  kCifar10,
  kCifar100,
  kCelebA,
  kRandomData,
  kTextFile
 };

@@ -142,6 +143,8 @@ class DEPipeline {

  Status ParseCifar100Op(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);

  Status ParseRandomDataOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);

  void PrintTree();

  int32_t GetNumClasses() const;
--- a/mindspore/ccsrc/dataset/api/python_bindings.cc
+++ b/mindspore/ccsrc/dataset/api/python_bindings.cc
@@ -47,6 +47,7 @@
 #include "dataset/engine/datasetops/source/mnist_op.h"
 #include "dataset/engine/datasetops/source/manifest_op.h"
 #include "dataset/engine/datasetops/source/mindrecord_op.h"
 #include "dataset/engine/datasetops/source/random_data_op.h"
 #include "dataset/engine/datasetops/source/sampler/distributed_sampler.h"
 #include "dataset/engine/datasetops/source/sampler/pk_sampler.h"
 #include "dataset/engine/datasetops/source/sampler/random_sampler.h"
@@ -435,12 +436,12 @@ void bindSamplerOps(py::module *m) {
    .def(py::init<std::vector<int64_t>, uint32_t>(), py::arg("indices"), py::arg("seed") = GetSeed());
  (void)py::class_<mindrecord::ShardPkSample, mindrecord::ShardOperator, std::shared_ptr<mindrecord::ShardPkSample>>(
    *m, "MindrecordPkSampler")
    .def(py::init([](int64_t kVal, bool shuffle) {
    .def(py::init([](int64_t kVal, std::string kColumn, bool shuffle) {
      if (shuffle == true) {
        return std::make_shared<mindrecord::ShardPkSample>("label", kVal, std::numeric_limits<int64_t>::max(),
        return std::make_shared<mindrecord::ShardPkSample>(kColumn, kVal, std::numeric_limits<int64_t>::max(),
                                                           GetSeed());
      } else {
        return std::make_shared<mindrecord::ShardPkSample>("label", kVal);
        return std::make_shared<mindrecord::ShardPkSample>(kColumn, kVal);
      }
    }));

@@ -489,6 +490,7 @@ PYBIND11_MODULE(_c_dataengine, m) {
    .value("VOC", OpName::kVoc)
    .value("CIFAR10", OpName::kCifar10)
    .value("CIFAR100", OpName::kCifar100)
    .value("RANDOMDATA", OpName::kRandomData)
    .value("CELEBA", OpName::kCelebA)
    .value("TEXTFILE", OpName::kTextFile);