diff --git a/.gitignore b/.gitignore
index 77ff222a1a..057169ec42 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,6 +26,7 @@ cmake-build-debug
*_pb2.py
*.pb.h
*.pb.cc
+*.pb
# Object files
*.o
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 37c3288f12..987e4ae709 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -12,7 +12,7 @@ if (NOT CMAKE_SYSTEM_NAME MATCHES "Windows")
endif ()
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
- set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Werror -Wno-return-std-move -Wno-unused-private-field -Wno-unused-lambda-capture -Wno-sign-compare -Wno-overloaded-virtual -Wno-unneeded-internal-declaration -Wno-unused-variable -Wno-pessimizing-move -Wno-inconsistent-missing-override -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2")
+ set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Werror -Wno-return-std-move -Wno-unused-private-field -Wno-unused-lambda-capture -Wno-sign-compare -Wno-overloaded-virtual -Wno-unneeded-internal-declaration -Wno-unused-variable -Wno-pessimizing-move -Wno-inconsistent-missing-override -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2")
else()
set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Wl,--allow-shlib-undefined -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2")
endif()
@@ -38,6 +38,10 @@ if (NOT Patch_FOUND)
endif ()
message(PATCH_EXECUTABLE = ${Patch_EXECUTABLE})
+if (ENABLE_AKG AND ENABLE_D)
+ add_subdirectory("${CMAKE_SOURCE_DIR}/akg")
+endif()
+
include(${CMAKE_SOURCE_DIR}/cmake/mind_expression.cmake)
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_party/flatbuffers/include)
@@ -86,10 +90,6 @@ if (ENABLE_GE OR ENABLE_D OR ENABLE_TESTCASES)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/third_party/fwkacllib/inc/toolchain)
endif()
-if (ENABLE_AKG AND ENABLE_D)
- add_subdirectory("${CMAKE_SOURCE_DIR}/akg")
-endif()
-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden")
add_subdirectory(mindspore/ccsrc)
if (ENABLE_TESTCASES)
diff --git a/README.md b/README.md
index a6bfd1ebbb..25abdd6fcb 100644
--- a/README.md
+++ b/README.md
@@ -29,7 +29,7 @@ enrichment of the AI software/hardware application ecosystem.
-For more details please check out our [Architecture Guide](https://www.mindspore.cn/docs/en/0.3.0-alpha/architecture.html).
+For more details please check out our [Architecture Guide](https://www.mindspore.cn/docs/en/master/architecture.html).
### Automatic Differentiation
@@ -66,7 +66,6 @@ MindSpore offers build options across multiple backends:
| Ascend910 | Ubuntu-x86 | ✔️ |
| | EulerOS-x86 | ✔️ |
| | EulerOS-aarch64 | ✔️ |
-| GPU CUDA 9.2 | Ubuntu-x86 | ✔️ |
| GPU CUDA 10.1 | Ubuntu-x86 | ✔️ |
| CPU | Ubuntu-x86 | ✔️ |
| | Windows-x86 | ✔️ |
@@ -76,7 +75,7 @@ For installation using `pip`, take `CPU` and `Ubuntu-x86` build version as an ex
1. Download whl from [MindSpore download page](https://www.mindspore.cn/versions/en), and install the package.
```
- pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.3.0-alpha/MindSpore/cpu/ubuntu_x86/mindspore-0.3.0-cp37-cp37m-linux_x86_64.whl
+ pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.5.0-beta/MindSpore/cpu/ubuntu_x86/mindspore-0.5.0-cp37-cp37m-linux_x86_64.whl
```
2. Run the following command to verify the install.
@@ -133,8 +132,8 @@ currently the containerized build options are supported as follows:
For `CPU` backend, you can directly pull and run the latest stable image using the below command:
```
- docker pull mindspore/mindspore-cpu:0.3.0-alpha
- docker run -it mindspore/mindspore-cpu:0.3.0-alpha /bin/bash
+ docker pull mindspore/mindspore-cpu:0.5.0-beta
+ docker run -it mindspore/mindspore-cpu:0.5.0-beta /bin/bash
```
* GPU
@@ -151,8 +150,8 @@ currently the containerized build options are supported as follows:
Then you can pull and run the latest stable image using the below command:
```
- docker pull mindspore/mindspore-gpu:0.3.0-alpha
- docker run -it --runtime=nvidia --privileged=true mindspore/mindspore-gpu:0.3.0-alpha /bin/bash
+ docker pull mindspore/mindspore-gpu:0.5.0-beta
+ docker run -it --runtime=nvidia --privileged=true mindspore/mindspore-gpu:0.5.0-beta /bin/bash
```
To test if the docker image works, please execute the python code below and check the output:
@@ -187,7 +186,7 @@ please check out [docker](docker/README.md) repo for the details.
## Quickstart
-See the [Quick Start](https://www.mindspore.cn/tutorial/en/0.3.0-alpha/quick_start/quick_start.html)
+See the [Quick Start](https://www.mindspore.cn/tutorial/en/master/quick_start/quick_start.html)
to implement the image classification.
## Docs
diff --git a/RELEASE.md b/RELEASE.md
index 9824f803f0..4b829152a2 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,3 +1,75 @@
+# Release 0.5.0-beta
+
+## Major Features and Improvements
+
+### Ascend 910 Training and Inference Framework
+* New models
+ * ResNext50: a simple, highly modularized network architecture using aggregated resdiual transformations for image classification on ImageNet 2012 dataset.
+ * MASS: a pre-training method for sequence to sequence based language generation tasks on Text Summarization and Conversational Response Generation using News Crawls 2007-2017 dataset, Gigaword corpus and Cornell movie dialog corpus.
+ * Transformer: a neural network architecture for language understanding on WMT 2014 English-German dataset.
+ * GCN:Graph Convolutional Networks for the task of classification of nodes in a graph on Cora and Citeseer datasets.
+ * GAT:an attention-based graph neural network for node classification on Cora and CiteSeer dataset.
+* Frontend and user interface
+ * Support tensor value and assignment of mixed tensor index in graph mode.
+ * Support tensor comparison, len operator, constexpr syntax, value and assignment of tensor index in pynative mode.
+ * Support converting MindSpore IR to pb format for infer model.
+ * Support print operator to write data directly on the hard disk.
+ * Add the double recursive programming solution for very high speed parallel strategy search in automatic parallel.
+ * User interfaces change log
+ * Allow the learning rate of AdamWeightDecayDynamicLR and Lamb to be 0([!1826](https://gitee.com/mindspore/mindspore/pulls/1826))
+ * Restricting the entire network input parameter is Tensor([!1967](https://gitee.com/mindspore/mindspore/pulls/1967))
+ * Turn shape and dtype into attributes instead of interfaces([!1919](https://gitee.com/mindspore/mindspore/pulls/1919))
+ * Delete multitypefungraph([!2116](https://gitee.com/mindspore/mindspore/pulls/2116))
+ * Refactor the callback module in an encapsulated way, use _CallbackManager instead of _build_callbacks([!2236](https://gitee.com/mindspore/mindspore/pulls/2236))
+ * Delete EmbeddingLookup([!2163](https://gitee.com/mindspore/mindspore/pulls/2163))
+ * Checkpoint add model_type([!2517](https://gitee.com/mindspore/mindspore/pulls/2517))
+* Executor and performance optimization
+ * Heterogeneous execution on CPU and Ascend devices supported, and is verified in Wide&Deep model.
+ * Quantitative training of MobileNetV2, Lenet and Resnet50 on Ascend-910 are supported.
+ * Support new fusion architecture, which can do fusion optimization across graphs and kernels to improve execution speed.
+* Data processing, augmentation, and save format
+ * Support data processing pipeline performance profiling.
+ * Support public dataset loading, such as CLUE and Coco.
+ * Support more text processing, such as more tokenizers and vocab data.
+ * Support MindRecord padded data.
+### Other Hardware Support
+* GPU platform
+ * New model supported: Bert / Wide&Deep.
+ * Support setting max device memory.
+* CPU platform
+ * New model supported: LSTM.
+
+## Bugfixes
+* Models
+ * Bert, Move Bert from `example` to `model_zoo`, optimize network for better performance. ([!1902](https://gitee.com/mindspore/mindspore/pulls/1902))
+ * VGG16, Move VGG16 from `example` to `model_zoo`, optimize network for better accuracy. ([!2645](https://gitee.com/mindspore/mindspore/pulls/2645))
+ * Alexnet, modify parameter setting to improve accuracy ([!1364](https://gitee.com/mindspore/mindspore/pulls/2370))
+ * Wide&Deep, Move Wide&Deep from `example` to `model_zoo`, optimize network for better performance. ([!2221](https://gitee.com/mindspore/mindspore/pulls/2221))
+* Python API
+ * Fix bug in auto cast([!1766](https://gitee.com/mindspore/mindspore/pulls/1766))
+ * Fix bug of register_backward_hook([!2148](https://gitee.com/mindspore/mindspore/pulls/2148))
+ * Fix bug of tuple args in pynative mode([!1878](https://gitee.com/mindspore/mindspore/pulls/1878))
+ * Fix bug of checking numbers of arguments and graph parameters([!1701](https://gitee.com/mindspore/mindspore/pulls/1701))
+* Executor
+ * Fix bug of loading input data repeatedly in pynative mode([!1966](https://gitee.com/mindspore/mindspore/pulls/1966))
+ * Fix bug of list cannot be used as input in pynative mode([!1765](https://gitee.com/mindspore/mindspore/pulls/1765))
+ * Fix bug of kernel select ([!2103](https://gitee.com/mindspore/mindspore/pulls/2103))
+ * Fix bug of pattern matching for batchnorm fusion in the case of auto mix precision.([!1851](https://gitee.com/mindspore/mindspore/pulls/1851))
+ * Fix bug of generate hccl's kernel info.([!2393](https://gitee.com/mindspore/mindspore/mindspore/pulls/2393))
+* GPU platform
+ * Fix bug of summary feature invalid([!2173](https://gitee.com/mindspore/mindspore/pulls/2173))
+* Data processing
+ * Fix bug of Cifar dataset reading([!2096](https://gitee.com/mindspore/mindspore/pulls/2096))
+ * Fix bug of C++ behavior in RandomCropAndResize([!2026](https://gitee.com/mindspore/mindspore/pulls/2026))
+ * Fix the bug of mindrecord shuffle([!2420](https://gitee.com/mindspore/mindspore/pulls/2420))
+
+## Contributors
+Thanks goes to these wonderful people:
+
+Alexey Shevlyakov, avakh, baihuawei, BowenK, buxue, caifubi, caojian05, Cathy Wong, changzherui, chenfei, chengxianbin, chenhaozhe, chenjianping, chentingting, chenzomi, chujinjin, Danish Farid, dayschan, dengwentao, dinghao, etone-chan, fangzehua, fary86, geekun, Giancarlo Colmenares, gong chen, gukecai, guohongzilong, hangangqiang, heleiwang, hesham, He Wei, hexia, hongxing, huangdongrun, huanghui, islam_amin, Jamie Nisbet, Jesse Lee, jiangjinsheng, jiangzhiwen, jinyaohui, jjfeing, jojobugfree, Jonathan Yan, jonyguo, Junhan Hu, Kang, kingfo, kouzhenzhong, kpy, kswang, laiyongqiang, leopz, liangzelang, lichenever, lihongkang, Li Hongzhang, lilei, limingqi107, lirongzhen1, liubuyu, liuchongming74, liuwenhao4, liuxiao, Lixia Chen, liyanliu, liyong, lizhenyu, lvliang, Mahdi, Margaret_wangrui, meixiaowei, ms_yan, nhussain, ougongchang, panfengfeng, panyifeng, peilinwang, Peilin Wang, pkuliuliu, qianlong, rick_sanchez, shibeiji, Shida He, shijianning, simson, sunsuodong, suteng, Tinazhang, Tron Zhang, unknown, VectorSL, wandongdong, wangcong, wangdongxu, wangdongxu6, wanghua, wangnan39, Wei Luning, wenchunjiang, wenkai, wilfChen, WilliamLian, wukesong, Xian Weizhao, Xiaoda Zhang, xiefangqi, xulei2020, xunxue, xutianchun, Yang, yanghaitao, yanghaitao1, yanghaoran, yangjie, yangjie159, YangLuo, Yanjun Peng, yankai, yanzhenxiang2020, yao_yf, Yi Huaijie, yoonlee666, yuchaojie, yujianfeng, zhangzhongpeng, zhangdengcheng, Zhang Qinghua, zhangyinxia, zhangz0911gm, zhaojichen, zhaoting, zhaozhenlong, zhoufeng, zhouneng, zhousiyi, Zirui Wu, Ziyan, zjun, ZPaC, lihongzhang, wangdongxu
+
+Contributions of any kind are welcome!
+
# Release 0.3.0-alpha
## Major Features and Improvements
diff --git a/akg b/akg
index c460176523..df57a6cf94 160000
--- a/akg
+++ b/akg
@@ -1 +1 @@
-Subproject commit c460176523d039c8995f1d71089753725ebc0792
+Subproject commit df57a6cf9450e347d1854687d1fe66a420ee3b35
diff --git a/build.sh b/build.sh
index 70718bf89b..059478b9af 100755
--- a/build.sh
+++ b/build.sh
@@ -25,7 +25,7 @@ usage()
echo "Usage:"
echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\"
echo " [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
- echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K]"
+ echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E]"
echo ""
echo "Options:"
echo " -d Debug mode"
@@ -50,10 +50,12 @@ usage()
echo " -D Enable dumping of function graph ir, default on"
echo " -z Compile dataset & mindrecord, default on"
echo " -M Enable MPI and NCCL for GPU training, gpu default on"
- echo " -V Specify the minimum required cuda version, default CUDA 9.2"
+ echo " -V Specify the minimum required cuda version, default CUDA 10.1"
echo " -I Compile predict, default off"
- echo " -K Compile with AKG, default off"
+ echo " -K Compile with AKG, default on"
echo " -s Enable serving module, default off"
+ echo " -B Enable debugger, default off"
+ echo " -E Enable IBVERBS for parameter server, default off"
}
# check value of input is 'on' or 'off'
@@ -88,14 +90,17 @@ checkopts()
ENABLE_DUMP_IR="on"
COMPILE_MINDDATA="on"
ENABLE_MPI="off"
- CUDA_VERSION="9.2"
+ CUDA_VERSION="10.1"
COMPILE_PREDICT="off"
USE_GLOG="on"
PREDICT_PLATFORM=""
ENABLE_AKG="on"
ENABLE_SERVING="off"
+ ENABLE_DEBUGGER="off"
+ ENABLE_IBVERBS="off"
+
# Process the options
- while getopts 'drvj:c:t:hsb:a:g:p:ie:m:I:LRP:Q:D:zM:V:K:s' opt
+ while getopts 'drvj:c:t:hsb:a:g:p:ie:m:I:LRP:Q:D:zM:V:K:sB:E' opt
do
OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]')
case "${opt}" in
@@ -191,6 +196,10 @@ checkopts()
usage
exit 1
fi
+ if [[ "X$OPTARG" == "X9.2" ]]; then
+ echo "Unsupported CUDA version 9.2"
+ exit 1
+ fi
CUDA_VERSION="$OPTARG"
;;
P)
@@ -240,6 +249,15 @@ checkopts()
ENABLE_SERVING="on"
echo "enable serving"
;;
+ B)
+ check_on_off $OPTARG B
+ ENABLE_DEBUGGER="on"
+ echo "enable debugger"
+ ;;
+ E)
+ ENABLE_IBVERBS="on"
+ echo "enable IBVERBS for parameter server"
+ ;;
*)
echo "Unknown option ${opt}!"
usage
@@ -322,7 +340,13 @@ build_mindspore()
if [[ "X$ENABLE_SERVING" = "Xon" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_SERVING=ON"
fi
+ if [[ "X$ENABLE_DEBUGGER" = "Xon" ]]; then
+ CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DEBUGGER=ON"
+ fi
+ if [[ "X$ENABLE_IBVERBS" = "Xon" ]]; then
+ CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_IBVERBS=ON"
+ fi
echo "${CMAKE_ARGS}"
if [[ "X$INC_BUILD" = "Xoff" ]]; then
cmake ${CMAKE_ARGS} ../..
@@ -446,9 +470,9 @@ build_predict()
cd "${BASEPATH}/predict/output/"
if [[ "$PREDICT_PLATFORM" == "x86_64" ]]; then
- tar -cf MSPredict-0.3.0-linux_x86_64.tar.gz include/ lib/ --warning=no-file-changed
+ tar -cf MSPredict-0.5.0-linux_x86_64.tar.gz include/ lib/ --warning=no-file-changed
elif [[ "$PREDICT_PLATFORM" == "arm64" ]]; then
- tar -cf MSPredict-0.3.0-linux_aarch64.tar.gz include/ lib/ --warning=no-file-changed
+ tar -cf MSPredict-0.5.0-linux_aarch64.tar.gz include/ lib/ --warning=no-file-changed
fi
echo "success to build predict project!"
}
diff --git a/cmake/external_libs/absl.cmake b/cmake/external_libs/absl.cmake
new file mode 100644
index 0000000000..6087b65128
--- /dev/null
+++ b/cmake/external_libs/absl.cmake
@@ -0,0 +1,14 @@
+mindspore_add_pkg(absl
+ VER 20200225.2
+ LIBS absl_strings absl_throw_delegate absl_raw_logging_internal absl_int128 absl_bad_optional_access
+ URL https://github.com/abseil/abseil-cpp/archive/20200225.2.tar.gz
+ MD5 73f2b6e72f1599a9139170c29482ddc4
+ CMAKE_OPTION -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=TRUE)
+
+include_directories(${absl_INC})
+
+add_library(mindspore::absl_strings ALIAS absl::absl_strings)
+add_library(mindspore::absl_throw_delegate ALIAS absl::absl_throw_delegate)
+add_library(mindspore::absl_raw_logging_internal ALIAS absl::absl_raw_logging_internal)
+add_library(mindspore::absl_int128 ALIAS absl::absl_int128)
+add_library(mindspore::absl_bad_optional_access ALIAS absl::absl_bad_optional_access)
diff --git a/cmake/external_libs/c-ares.cmake b/cmake/external_libs/c-ares.cmake
new file mode 100644
index 0000000000..9bb547f2db
--- /dev/null
+++ b/cmake/external_libs/c-ares.cmake
@@ -0,0 +1,12 @@
+mindspore_add_pkg(c-ares
+ VER 1.15.0
+ LIBS cares
+ URL https://github.com/c-ares/c-ares/releases/download/cares-1_15_0/c-ares-1.15.0.tar.gz
+ MD5 d2391da274653f7643270623e822dff7
+ CMAKE_OPTION -DCMAKE_BUILD_TYPE:STRING=Release
+ -DCARES_SHARED:BOOL=OFF
+ -DCARES_STATIC:BOOL=ON
+ -DCARES_STATIC_PIC:BOOL=ON)
+
+include_directories(${c-ares_INC})
+add_library(mindspore::cares ALIAS c-ares::cares)
diff --git a/cmake/external_libs/grpc.cmake b/cmake/external_libs/grpc.cmake
new file mode 100644
index 0000000000..7496cfd88e
--- /dev/null
+++ b/cmake/external_libs/grpc.cmake
@@ -0,0 +1,110 @@
+set(grpc_USE_STATIC_LIBS ON)
+if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+ set(grpc_CXXFLAGS "-fstack-protector-all -Wno-uninitialized -Wno-unused-parameter -fPIC -fvisibility=hidden -D_FORTIFY_SOURCE=2 -O2")
+elseif (${CMAKE_SYSTEM_NAME} MATCHES "Windows")
+ set(grpc_CXXFLAGS "-fstack-protector-all -Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fvisibility=hidden -D_FORTIFY_SOURCE=2 -O2")
+else()
+ set(grpc_CXXFLAGS "-fstack-protector-all -Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fvisibility=hidden -D_FORTIFY_SOURCE=2 -D_GLIBCXX_USE_CXX11_ABI=0 -O2")
+endif()
+
+set(grpc_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack")
+
+
+if (EXISTS ${protobuf_ROOT}/lib64)
+ set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${protobuf_ROOT}/lib64/cmake/protobuf")
+else()
+ set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${protobuf_ROOT}/lib/cmake/protobuf")
+endif()
+message("grpc using Protobuf_DIR : " ${_FINDPACKAGE_PROTOBUF_CONFIG_DIR})
+
+if (EXISTS ${absl_ROOT}/lib64)
+ set(_FINDPACKAGE_ABSL_CONFIG_DIR "${absl_ROOT}/lib64/cmake/absl")
+else()
+ set(_FINDPACKAGE_ABSL_CONFIG_DIR "${absl_ROOT}/lib/cmake/absl")
+endif()
+message("grpc using absl_DIR : " ${_FINDPACKAGE_ABSL_CONFIG_DIR})
+
+set(_CMAKE_ARGS_OPENSSL_ROOT_DIR "")
+if (OPENSSL_ROOT_DIR)
+ set(_CMAKE_ARGS_OPENSSL_ROOT_DIR "-DOPENSSL_ROOT_DIR:PATH=${OPENSSL_ROOT_DIR}")
+endif()
+
+mindspore_add_pkg(grpc
+ VER 1.27.3
+ LIBS grpc++ grpc gpr upb address_sorting
+ EXE grpc_cpp_plugin
+ URL https://github.com/grpc/grpc/archive/v1.27.3.tar.gz
+ MD5 0c6c3fc8682d4262dd0e5e6fabe1a7e2
+ CMAKE_OPTION -DCMAKE_BUILD_TYPE:STRING=Release
+ -DgRPC_INSTALL:BOOL=ON
+ -DgRPC_BUILD_TESTS:BOOL=OFF
+ -DgRPC_PROTOBUF_PROVIDER:STRING=package
+ -DgRPC_PROTOBUF_PACKAGE_TYPE:STRING=CONFIG
+ -DProtobuf_DIR:PATH=${_FINDPACKAGE_PROTOBUF_CONFIG_DIR}
+ -DgRPC_ZLIB_PROVIDER:STRING=package
+ -DZLIB_ROOT:PATH=${zlib_ROOT}
+ -DgRPC_ABSL_PROVIDER:STRING=package
+ -Dabsl_DIR:PATH=${_FINDPACKAGE_ABSL_CONFIG_DIR}
+ -DgRPC_CARES_PROVIDER:STRING=package
+ -Dc-ares_DIR:PATH=${c-ares_ROOT}/lib/cmake/c-ares
+ -DgRPC_SSL_PROVIDER:STRING=package
+ ${_CMAKE_ARGS_OPENSSL_ROOT_DIR}
+ )
+
+include_directories(${grpc_INC})
+
+add_library(mindspore::grpc++ ALIAS grpc::grpc++)
+
+# link other grpc libs
+target_link_libraries(grpc::grpc++ INTERFACE grpc::grpc grpc::gpr grpc::upb grpc::address_sorting)
+
+# link built dependencies
+target_link_libraries(grpc::grpc++ INTERFACE mindspore::z)
+target_link_libraries(grpc::grpc++ INTERFACE mindspore::cares)
+target_link_libraries(grpc::grpc++ INTERFACE mindspore::absl_strings mindspore::absl_throw_delegate
+ mindspore::absl_raw_logging_internal mindspore::absl_int128 mindspore::absl_bad_optional_access)
+
+# link system openssl
+find_package(OpenSSL REQUIRED)
+target_link_libraries(grpc::grpc++ INTERFACE OpenSSL::SSL OpenSSL::Crypto)
+
+
+function(ms_grpc_generate c_var h_var)
+ if(NOT ARGN)
+ message(SEND_ERROR "Error: ms_grpc_generate() called without any proto files")
+ return()
+ endif()
+
+ set(${c_var})
+ set(${h_var})
+
+ foreach(file ${ARGN})
+ get_filename_component(abs_file ${file} ABSOLUTE)
+ get_filename_component(file_name ${file} NAME_WE)
+ get_filename_component(file_dir ${abs_file} PATH)
+ file(RELATIVE_PATH rel_path ${CMAKE_CURRENT_SOURCE_DIR} ${file_dir})
+
+ list(APPEND ${c_var} "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.cc")
+ list(APPEND ${h_var} "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.h")
+ list(APPEND ${c_var} "${CMAKE_BINARY_DIR}/proto/${file_name}.grpc.pb.cc")
+ list(APPEND ${h_var} "${CMAKE_BINARY_DIR}/proto/${file_name}.grpc.pb.h")
+
+ add_custom_command(
+ OUTPUT "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.cc"
+ "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.h"
+ "${CMAKE_BINARY_DIR}/proto/${file_name}.grpc.pb.cc"
+ "${CMAKE_BINARY_DIR}/proto/${file_name}.grpc.pb.h"
+ WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
+ COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/proto"
+ COMMAND protobuf::protoc --version
+ COMMAND protobuf::protoc -I${file_dir} --cpp_out=${CMAKE_BINARY_DIR}/proto
+ --grpc_out=${CMAKE_BINARY_DIR}/proto --plugin=protoc-gen-grpc=$ ${abs_file}
+ DEPENDS protobuf::protoc grpc::grpc_cpp_plugin ${abs_file}
+ COMMENT "Running C++ gRPC compiler on ${file}" VERBATIM)
+ endforeach()
+
+ set_source_files_properties(${${c_var}} ${${h_var}} PROPERTIES GENERATED TRUE)
+ set(${c_var} ${${c_var}} PARENT_SCOPE)
+ set(${h_var} ${${h_var}} PARENT_SCOPE)
+
+endfunction()
diff --git a/cmake/external_libs/pslite.cmake b/cmake/external_libs/pslite.cmake
new file mode 100644
index 0000000000..28c851b094
--- /dev/null
+++ b/cmake/external_libs/pslite.cmake
@@ -0,0 +1,14 @@
+set(pslite_USE_STATIC_LIBS ON)
+if (${ENABLE_IBVERBS} STREQUAL "ON")
+ set(pslite_CXXFLAGS "USE_IBVERBS=1")
+endif()
+mindspore_add_pkg(pslite
+ LIBS ps
+ URL https://github.com/dmlc/ps-lite/archive/34fd45cae457d59850fdcb2066467778d0673f21.zip
+ MD5 393c0e27b68bfaf96718caa3aa96f5a3
+ PATCHES ${CMAKE_SOURCE_DIR}/third_party/patch/pslite/ps_lite.patch001
+ ONLY_MAKE True
+ ONLY_MAKE_INCS include/*
+ ONLY_MAKE_LIBS build/*)
+include_directories(${pslite_INC})
+add_library(mindspore::pslite ALIAS pslite::ps)
diff --git a/cmake/external_libs/zeromq.cmake b/cmake/external_libs/zeromq.cmake
new file mode 100644
index 0000000000..122f1ee90c
--- /dev/null
+++ b/cmake/external_libs/zeromq.cmake
@@ -0,0 +1,5 @@
+mindspore_add_pkg(zeromq
+ VER 4.1.4
+ HEAD_ONLY ./
+ URL https://raw.githubusercontent.com/mli/deps/master/build/zeromq-4.1.4.tar.gz
+ MD5 a611ecc93fffeb6d058c0e6edf4ad4fb)
diff --git a/cmake/external_libs/zlib.cmake b/cmake/external_libs/zlib.cmake
new file mode 100644
index 0000000000..06532ed8d7
--- /dev/null
+++ b/cmake/external_libs/zlib.cmake
@@ -0,0 +1,9 @@
+mindspore_add_pkg(zlib
+ VER 1.2.11
+ LIBS z
+ URL https://github.com/madler/zlib/archive/v1.2.11.tar.gz
+ MD5 0095d2d2d1f3442ce1318336637b695f
+ CMAKE_OPTION -DCMAKE_BUILD_TYPE:STRING=Release)
+
+include_directories(${zlib_INC})
+add_library(mindspore::z ALIAS zlib::z)
diff --git a/cmake/mind_expression.cmake b/cmake/mind_expression.cmake
index 86337c1dd2..63a65cd533 100644
--- a/cmake/mind_expression.cmake
+++ b/cmake/mind_expression.cmake
@@ -14,12 +14,26 @@ include(${CMAKE_SOURCE_DIR}/cmake/external_libs/eigen.cmake)
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/json.cmake)
include(${CMAKE_SOURCE_DIR}/cmake/dependency_securec.cmake)
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/protobuf.cmake)
+
+if (ENABLE_DEBUGGER)
+ # build dependencies of gRPC
+ include(${CMAKE_SOURCE_DIR}/cmake/external_libs/absl.cmake)
+ include(${CMAKE_SOURCE_DIR}/cmake/external_libs/c-ares.cmake)
+ include(${CMAKE_SOURCE_DIR}/cmake/external_libs/zlib.cmake)
+ # build gRPC
+ include(${CMAKE_SOURCE_DIR}/cmake/external_libs/grpc.cmake)
+endif()
+
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/pybind11.cmake)
MESSAGE("go to link flatbuffers")
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/flatbuffers.cmake)
if(USE_GLOG)
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/glog.cmake)
endif()
+if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows")
+ include(${CMAKE_SOURCE_DIR}/cmake/external_libs/zeromq.cmake)
+ include(${CMAKE_SOURCE_DIR}/cmake/external_libs/pslite.cmake)
+endif()
find_package(Python3)
include_directories(${Python3_INCLUDE_DIRS})
diff --git a/cmake/options.cmake b/cmake/options.cmake
index 3e03ed3339..18db942d68 100644
--- a/cmake/options.cmake
+++ b/cmake/options.cmake
@@ -17,6 +17,8 @@ option(ENABLE_DUMP_E2E "Enable dump e2e file, default on" OFF)
option(ENABLE_DUMP_IR "Enable dump funciton graph ir, default on" ON)
option(ENABLE_MPI "enable mpi" OFF)
option(ENABLE_AKG "enable akg" OFF)
+option(ENABLE_DEBUGGER "enable debugger" OFF)
+option(ENABLE_IBVERBS "enable IBVERBS for parameter server" OFF)
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if (WIN32)
@@ -112,3 +114,7 @@ endif()
if(ENABLE_DUMP_E2E)
add_compile_definitions(ENABLE_DUMP_E2E)
endif()
+
+if(ENABLE_DEBUGGER)
+ add_compile_definitions(ENABLE_DEBUGGER)
+endif()
diff --git a/cmake/package.cmake b/cmake/package.cmake
index 1cff396ef1..42821cf41d 100644
--- a/cmake/package.cmake
+++ b/cmake/package.cmake
@@ -128,6 +128,11 @@ if (ENABLE_MPI)
DESTINATION ${INSTALL_BASE_DIR}
COMPONENT mindspore
)
+ install(
+ TARGETS mpi_adapter
+ DESTINATION ${INSTALL_LIB_DIR}
+ COMPONENT mindspore
+ )
endif ()
if (ENABLE_GPU)
diff --git a/cmake/utils.cmake b/cmake/utils.cmake
index f0a5dc594c..cf8f6ebb46 100644
--- a/cmake/utils.cmake
+++ b/cmake/utils.cmake
@@ -206,7 +206,7 @@ function(mindspore_add_pkg pkg_name )
set(options )
set(oneValueArgs URL MD5 GIT_REPOSITORY GIT_TAG VER EXE DIR HEAD_ONLY CMAKE_PATH RELEASE LIB_PATH CUSTOM_CMAKE)
- set(multiValueArgs CMAKE_OPTION LIBS PRE_CONFIGURE_COMMAND CONFIGURE_COMMAND BUILD_OPTION INSTALL_INCS INSTALL_LIBS PATCHES SUBMODULES SOURCEMODULES)
+ set(multiValueArgs CMAKE_OPTION LIBS PRE_CONFIGURE_COMMAND CONFIGURE_COMMAND BUILD_OPTION INSTALL_INCS INSTALL_LIBS PATCHES SUBMODULES SOURCEMODULES ONLY_MAKE ONLY_MAKE_INCS ONLY_MAKE_LIBS)
cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} )
if (NOT PKG_LIB_PATH)
@@ -290,7 +290,7 @@ function(mindspore_add_pkg pkg_name )
foreach(_PATCH_FILE ${PKG_PATCHES})
get_filename_component(_PATCH_FILE_NAME ${_PATCH_FILE} NAME)
set(_LF_PATCH_FILE ${CMAKE_BINARY_DIR}/_ms_patch/${_PATCH_FILE_NAME})
- configure_file(${_PATCH_FILE} ${_LF_PATCH_FILE} NEWLINE_STYLE LF)
+ configure_file(${_PATCH_FILE} ${_LF_PATCH_FILE} NEWLINE_STYLE LF @ONLY)
message("patching ${${pkg_name}_SOURCE_DIR} -p1 < ${_LF_PATCH_FILE}")
execute_process(COMMAND ${Patch_EXECUTABLE} -p1 INPUT_FILE ${_LF_PATCH_FILE}
@@ -324,6 +324,16 @@ function(mindspore_add_pkg pkg_name )
target_include_directories(${pkg_name} INTERFACE ${${pkg_name}_INC})
endif ()
+ elseif (PKG_ONLY_MAKE)
+ __exec_cmd(COMMAND ${CMAKE_MAKE_PROGRAM} ${${pkg_name}_CXXFLAGS} -j${THNUM}
+ WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR})
+ set(PKG_INSTALL_INCS ${PKG_ONLY_MAKE_INCS})
+ set(PKG_INSTALL_LIBS ${PKG_ONLY_MAKE_LIBS})
+ file(GLOB ${pkg_name}_INSTALL_INCS ${${pkg_name}_SOURCE_DIR}/${PKG_INSTALL_INCS})
+ file(GLOB ${pkg_name}_INSTALL_LIBS ${${pkg_name}_SOURCE_DIR}/${PKG_INSTALL_LIBS})
+ file(COPY ${${pkg_name}_INSTALL_INCS} DESTINATION ${${pkg_name}_BASE_DIR}/include)
+ file(COPY ${${pkg_name}_INSTALL_LIBS} DESTINATION ${${pkg_name}_BASE_DIR}/lib)
+
elseif (PKG_CMAKE_OPTION)
# in cmake
file(MAKE_DIRECTORY ${${pkg_name}_SOURCE_DIR}/_build)
diff --git a/docker/mindspore-cpu/0.5.0-beta/Dockerfile b/docker/mindspore-cpu/0.5.0-beta/Dockerfile
new file mode 100644
index 0000000000..4da6294296
--- /dev/null
+++ b/docker/mindspore-cpu/0.5.0-beta/Dockerfile
@@ -0,0 +1,67 @@
+FROM ubuntu:18.04
+
+MAINTAINER leonwanghui
+
+# Set env
+ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5
+ENV PATH /usr/local/bin:$PATH
+
+# Install base tools
+RUN apt update \
+ && DEBIAN_FRONTEND=noninteractive apt install -y \
+ vim \
+ wget \
+ curl \
+ xz-utils \
+ net-tools \
+ openssh-client \
+ git \
+ ntpdate \
+ tzdata \
+ tcl \
+ sudo \
+ bash-completion
+
+# Install compile tools
+RUN DEBIAN_FRONTEND=noninteractive apt install -y \
+ gcc \
+ g++ \
+ zlibc \
+ make \
+ libgmp-dev \
+ patch \
+ autoconf \
+ libtool \
+ automake \
+ flex
+
+# Set bash
+RUN echo "dash dash/sh boolean false" | debconf-set-selections
+RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash
+
+# Install python (v3.7.5)
+RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
+ libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \
+ && cd /tmp \
+ && wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \
+ && tar -xvf v3.7.5.tar.gz \
+ && cd /tmp/cpython-3.7.5 \
+ && mkdir -p ${PYTHON_ROOT_PATH} \
+ && ./configure --prefix=${PYTHON_ROOT_PATH} \
+ && make -j4 \
+ && make install -j4 \
+ && rm -f /usr/local/bin/python \
+ && rm -f /usr/local/bin/pip \
+ && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \
+ && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \
+ && rm -rf /tmp/cpython-3.7.5 \
+ && rm -f /tmp/v3.7.5.tar.gz
+
+# Set pip source
+RUN mkdir -pv /root/.pip \
+ && echo "[global]" > /root/.pip/pip.conf \
+ && echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \
+ && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf
+
+# Install MindSpore cpu whl package
+RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.5.0-beta/MindSpore/cpu/ubuntu_x86/mindspore-0.5.0-cp37-cp37m-linux_x86_64.whl
diff --git a/docker/mindspore-gpu/0.5.0-beta/Dockerfile b/docker/mindspore-gpu/0.5.0-beta/Dockerfile
new file mode 100644
index 0000000000..dae6d16370
--- /dev/null
+++ b/docker/mindspore-gpu/0.5.0-beta/Dockerfile
@@ -0,0 +1,83 @@
+FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04
+
+MAINTAINER leonwanghui
+
+# Set env
+ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5
+ENV OMPI_ROOT_PATH /usr/local/openmpi-3.1.5
+ENV PATH ${OMPI_ROOT_PATH}/bin:/usr/local/bin:$PATH
+ENV LD_LIBRARY_PATH ${OMPI_ROOT_PATH}/lib:$LD_LIBRARY_PATH
+
+# Install base tools
+RUN apt update \
+ && DEBIAN_FRONTEND=noninteractive apt install -y \
+ vim \
+ wget \
+ curl \
+ xz-utils \
+ net-tools \
+ openssh-client \
+ git \
+ ntpdate \
+ tzdata \
+ tcl \
+ sudo \
+ bash-completion
+
+# Install compile tools
+RUN DEBIAN_FRONTEND=noninteractive apt install -y \
+ gcc \
+ g++ \
+ zlibc \
+ make \
+ libgmp-dev \
+ patch \
+ autoconf \
+ libtool \
+ automake \
+ flex \
+ libnccl2=2.4.8-1+cuda10.1 \
+ libnccl-dev=2.4.8-1+cuda10.1
+
+# Set bash
+RUN echo "dash dash/sh boolean false" | debconf-set-selections
+RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash
+
+# Install python (v3.7.5)
+RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
+ libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \
+ && cd /tmp \
+ && wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \
+ && tar -xvf v3.7.5.tar.gz \
+ && cd /tmp/cpython-3.7.5 \
+ && mkdir -p ${PYTHON_ROOT_PATH} \
+ && ./configure --prefix=${PYTHON_ROOT_PATH} \
+ && make -j4 \
+ && make install -j4 \
+ && rm -f /usr/local/bin/python \
+ && rm -f /usr/local/bin/pip \
+ && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \
+ && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \
+ && rm -rf /tmp/cpython-3.7.5 \
+ && rm -f /tmp/v3.7.5.tar.gz
+
+# Set pip source
+RUN mkdir -pv /root/.pip \
+ && echo "[global]" > /root/.pip/pip.conf \
+ && echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \
+ && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf
+
+# Install openmpi (v3.1.5)
+RUN cd /tmp \
+ && wget https://download.open-mpi.org/release/open-mpi/v3.1/openmpi-3.1.5.tar.gz \
+ && tar -xvf openmpi-3.1.5.tar.gz \
+ && cd /tmp/openmpi-3.1.5 \
+ && mkdir -p ${OMPI_ROOT_PATH} \
+ && ./configure --prefix=${OMPI_ROOT_PATH} \
+ && make -j4 \
+ && make install -j4 \
+ && rm -rf /tmp/openmpi-3.1.5 \
+ && rm -f /tmp/openmpi-3.1.5.tar.gz
+
+# Install MindSpore cuda-10.1 whl package
+RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.5.0-beta/MindSpore/gpu/ubuntu_x86/cuda-10.1/mindspore_gpu-0.5.0-cp37-cp37m-linux_x86_64.whl
diff --git a/example/nlp_to_mindrecord/CLUERNER2020/README.md b/example/nlp_to_mindrecord/CLUERNER2020/README.md
deleted file mode 100644
index c862156a47..0000000000
--- a/example/nlp_to_mindrecord/CLUERNER2020/README.md
+++ /dev/null
@@ -1,82 +0,0 @@
-# Guideline to Convert Training Data CLUERNER2020 to MindRecord For Bert Fine Tuning
-
-
-
-- [What does the example do](#what-does-the-example-do)
-- [How to use the example to process CLUERNER2020](#how-to-use-the-example-to-process-cluerner2020)
- - [Download CLUERNER2020 and unzip](#download-cluerner2020-and-unzip)
- - [Generate MindRecord](#generate-mindrecord)
- - [Create MindDataset By MindRecord](#create-minddataset-by-mindrecord)
-
-
-
-
-## What does the example do
-
-This example is based on [CLUERNER2020](https://www.cluebenchmarks.com/introduce.html) training data, generating MindRecord file, and finally used for Bert Fine Tuning progress.
-
-1. run.sh: generate MindRecord entry script
-2. run_read.py: create MindDataset by MindRecord entry script.
- - create_dataset.py: use MindDataset to read MindRecord to generate dataset.
-
-## How to use the example to process CLUERNER2020
-
-Download CLUERNER2020, convert it to MindRecord, use MindDataset to read MindRecord.
-
-### Download CLUERNER2020 and unzip
-
-1. Download the training data zip.
- > [CLUERNER2020 dataset download address](https://www.cluebenchmarks.com/introduce.html) **-> 任务介绍 -> CLUENER 细粒度命名实体识别 -> cluener下载链接**
-
-2. Unzip the training data to dir example/nlp_to_mindrecord/CLUERNER2020/cluener_public.
- ```
- unzip -d {your-mindspore}/example/nlp_to_mindrecord/CLUERNER2020/data/cluener_public cluener_public.zip
- ```
-
-### Generate MindRecord
-
-1. Run the run.sh script.
- ```bash
- bash run.sh
- ```
-
-2. Output like this:
- ```
- ...
- [INFO] ME(17603:139620983514944,MainProcess):2020-04-28-16:56:12.498.235 [mindspore/mindrecord/filewriter.py:313] The list of mindrecord files created are: ['data/train.mindrecord'], and the list of index files are: ['data/train.mindrecord.db']
- ...
- [INFO] ME(17603,python):2020-04-28-16:56:13.400.175 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
- [INFO] ME(17603,python):2020-04-28-16:56:13.400.863 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
- [INFO] ME(17603,python):2020-04-28-16:56:13.401.534 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
- [INFO] ME(17603,python):2020-04-28-16:56:13.402.179 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
- [INFO] ME(17603,python):2020-04-28-16:56:13.402.702 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
- ...
- [INFO] ME(17603:139620983514944,MainProcess):2020-04-28-16:56:13.431.208 [mindspore/mindrecord/filewriter.py:313] The list of mindrecord files created are: ['data/dev.mindrecord'], and the list of index files are: ['data/dev.mindrecord.db']
- ```
-
-3. Generate files like this:
- ```bash
- $ ls output/
- dev.mindrecord dev.mindrecord.db README.md train.mindrecord train.mindrecord.db
- ```
-
-### Create MindDataset By MindRecord
-
-1. Run the run_read.sh script.
- ```bash
- bash run_read.sh
- ```
-
-2. Output like this:
- ```
- ...
- example 1340: input_ids: [ 101 3173 1290 4852 7676 3949 122 3299 123 126 3189 4510 8020 6381 5442 7357 2590 3636 8021 7676 3949 4294 1166 6121 3124 1277 6121 3124 7270 2135 3295 5789 3326 123 126 3189 1355 6134 1093 1325 3173 2399 6590 6791 8024 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
- example 1340: input_mask: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
- example 1340: segment_ids: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
- example 1340: label_ids: [ 0 18 19 20 2 4 0 0 0 0 0 0 0 34 36 26 27 28 0 34 35 35 35 35 35 35 35 35 35 36 26 27 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
- example 1341: input_ids: [ 101 1728 711 4293 3868 1168 2190 2150 3791 934 3633 3428 4638 6237 7025 8024 3297 1400 5310 3362 6206 5023 5401 1744 3297 7770 3791 7368 976 1139 1104 2137 511 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
- example 1341: input_mask: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
- example 1341: segment_ids: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
- example 1341: label_ids: [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 18 19 19 19 19 20 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
- ...
- ```
diff --git a/example/nlp_to_mindrecord/CLUERNER2020/create_dataset.py b/example/nlp_to_mindrecord/CLUERNER2020/create_dataset.py
deleted file mode 100644
index 616bc71028..0000000000
--- a/example/nlp_to_mindrecord/CLUERNER2020/create_dataset.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""create MindDataset by MindRecord"""
-import mindspore.dataset as ds
-
-def create_dataset(data_file):
- """create MindDataset"""
- num_readers = 4
- data_set = ds.MindDataset(dataset_file=data_file, num_parallel_workers=num_readers, shuffle=True)
- index = 0
- for item in data_set.create_dict_iterator():
- # print("example {}: {}".format(index, item))
- print("example {}: input_ids: {}".format(index, item['input_ids']))
- print("example {}: input_mask: {}".format(index, item['input_mask']))
- print("example {}: segment_ids: {}".format(index, item['segment_ids']))
- print("example {}: label_ids: {}".format(index, item['label_ids']))
- index += 1
- if index % 1000 == 0:
- print("read rows: {}".format(index))
- print("total rows: {}".format(index))
-
-if __name__ == '__main__':
- create_dataset('output/train.mindrecord')
- create_dataset('output/dev.mindrecord')
diff --git a/example/nlp_to_mindrecord/CLUERNER2020/data/.gitignore b/example/nlp_to_mindrecord/CLUERNER2020/data/.gitignore
deleted file mode 100644
index cbbd6256c0..0000000000
--- a/example/nlp_to_mindrecord/CLUERNER2020/data/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-cluener_public
diff --git a/example/nlp_to_mindrecord/CLUERNER2020/output/README.md b/example/nlp_to_mindrecord/CLUERNER2020/output/README.md
deleted file mode 100644
index 7904933f43..0000000000
--- a/example/nlp_to_mindrecord/CLUERNER2020/output/README.md
+++ /dev/null
@@ -1 +0,0 @@
-## output dir
diff --git a/example/nlp_to_mindrecord/CLUERNER2020/run.sh b/example/nlp_to_mindrecord/CLUERNER2020/run.sh
deleted file mode 100644
index 15c6aa4362..0000000000
--- a/example/nlp_to_mindrecord/CLUERNER2020/run.sh
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/bin/bash
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-rm -f output/train.mindrecord*
-rm -f output/dev.mindrecord*
-
-if [ ! -d "../../../third_party/to_mindrecord/CLUERNER2020" ]; then
- echo "The patch base dir ../../../third_party/to_mindrecord/CLUERNER2020 is not exist."
- exit 1
-fi
-
-if [ ! -f "../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch" ]; then
- echo "The patch file ../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch is not exist."
- exit 1
-fi
-
-# patch for data_processor_seq.py
-patch -p0 -d ../../../third_party/to_mindrecord/CLUERNER2020/ -o data_processor_seq_patched.py < ../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch
-if [ $? -ne 0 ]; then
- echo "Patch ../../../third_party/to_mindrecord/CLUERNER2020/data_processor_seq.py failed"
- exit 1
-fi
-
-# use patched script
-python ../../../third_party/to_mindrecord/CLUERNER2020/data_processor_seq_patched.py \
---vocab_file=../../../third_party/to_mindrecord/CLUERNER2020/vocab.txt \
---label2id_file=../../../third_party/to_mindrecord/CLUERNER2020/label2id.json
diff --git a/example/nlp_to_mindrecord/aclImdb_preprocess/data/README.md b/example/nlp_to_mindrecord/aclImdb_preprocess/data/README.md
deleted file mode 100644
index b54948808e..0000000000
--- a/example/nlp_to_mindrecord/aclImdb_preprocess/data/README.md
+++ /dev/null
@@ -1 +0,0 @@
-## The input dataset
diff --git a/example/nlp_to_mindrecord/enwiki/README.md b/example/nlp_to_mindrecord/enwiki/README.md
deleted file mode 100644
index e92e8dbcc6..0000000000
--- a/example/nlp_to_mindrecord/enwiki/README.md
+++ /dev/null
@@ -1,173 +0,0 @@
-# Guideline to Convert Training Data enwiki to MindRecord For Bert Pre Training
-
-
-
-- [What does the example do](#what-does-the-example-do)
-- [How to use the example to process enwiki](#how-to-use-the-example-to-process-enwiki)
- - [Download enwiki training data](#download-enwiki-training-data)
- - [Process the enwiki](#process-the-enwiki)
- - [Generate MindRecord](#generate-mindrecord)
- - [Create MindDataset By MindRecord](#create-minddataset-by-mindrecord)
-
-
-
-
-## What does the example do
-
-This example is based on [enwiki](https://dumps.wikimedia.org/enwiki) training data, generating MindRecord file, and finally used for Bert network training.
-
-1. run.sh: generate MindRecord entry script.
-2. run_read.py: create MindDataset by MindRecord entry script.
- - create_dataset.py: use MindDataset to read MindRecord to generate dataset.
-
-## How to use the example to process enwiki
-
-Download enwiki data, process it, convert it to MindRecord, use MindDataset to read MindRecord.
-
-### Download enwiki training data
-
-> [enwiki dataset download address](https://dumps.wikimedia.org/enwiki) **-> 20200501 -> enwiki-20200501-pages-articles-multistream.xml.bz2**
-
-### Process the enwiki
-
-1. Please follow the steps in [process enwiki](https://github.com/mlperf/training/tree/master/language_model/tensorflow/bert)
-- All permissions of this step belong to the link address website.
-
-### Generate MindRecord
-
-1. Run the run.sh script.
- ```
- bash run.sh input_dir output_dir vocab_file
- ```
- - input_dir: the directory which contains files like 'part-00251-of-00500'.
- - output_dir: which will store the output mindrecord files.
- - vocab_file: the vocab file which you can download from other opensource project.
-
-2. The output like this:
- ```
- ...
- Begin preprocess Wed Jun 10 09:21:23 CST 2020
- Begin preprocess input file: /mnt/data/results/part-00000-of-00500
- Begin output file: part-00000-of-00500.mindrecord
- Total task: 510, processing: 1
- Begin preprocess input file: /mnt/data/results/part-00001-of-00500
- Begin output file: part-00001-of-00500.mindrecord
- Total task: 510, processing: 2
- Begin preprocess input file: /mnt/data/results/part-00002-of-00500
- Begin output file: part-00002-of-00500.mindrecord
- Total task: 510, processing: 3
- Begin preprocess input file: /mnt/data/results/part-00003-of-00500
- Begin output file: part-00003-of-00500.mindrecord
- Total task: 510, processing: 4
- Begin preprocess input file: /mnt/data/results/part-00004-of-00500
- Begin output file: part-00004-of-00500.mindrecord
- Total task: 510, processing: 4
- ...
- ```
-
-3. Generate files like this:
- ```bash
- $ ls {your_output_dir}/
- part-00000-of-00500.mindrecord part-00000-of-00500.mindrecord.db part-00001-of-00500.mindrecord part-00001-of-00500.mindrecord.db part-00002-of-00500.mindrecord part-00002-of-00500.mindrecord.db ...
- ```
-
-### Create MindDataset By MindRecord
-
-1. Run the run_read.sh script.
- ```bash
- bash run_read.sh input_dir
- ```
- - input_dir: the directory which contains mindrecord files.
-
-2. The output like this:
- ```
- ...
- example 633: input_ids: [ 101 2043 19781 4305 2140 4520 2041 1010 103 2034 2455 2002
- 7879 2003 1996 2455 1997 103 26378 4160 1012 102 7291 2001
- 1996 103 1011 2343 1997 6327 1010 3423 1998 103 4262 2005
- 1996 2118 1997 2329 3996 103 102 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0]
- example 633: input_mask: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
- 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
- example 633: segment_ids: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
- 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
- example 633: masked_lm_positions: [ 8 17 20 25 33 41 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0]
- example 633: masked_lm_ids: [ 1996 16137 1012 3580 2451 1012 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0 0 0 0 0 0 0 0 0
- 0 0 0 0]
- example 633: masked_lm_weights: [1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
- 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
- 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
- 0. 0. 0. 0.]
- example 633: next_sentence_labels: [1]
- ...
- ```
diff --git a/example/nlp_to_mindrecord/enwiki/create_dataset.py b/example/nlp_to_mindrecord/enwiki/create_dataset.py
deleted file mode 100644
index d90d12b7f2..0000000000
--- a/example/nlp_to_mindrecord/enwiki/create_dataset.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""create MindDataset by MindRecord"""
-import argparse
-import mindspore.dataset as ds
-
-def create_dataset(data_file):
- """create MindDataset"""
- num_readers = 4
- data_set = ds.MindDataset(dataset_file=data_file, num_parallel_workers=num_readers, shuffle=True)
- index = 0
- for item in data_set.create_dict_iterator():
- # print("example {}: {}".format(index, item))
- print("example {}: input_ids: {}".format(index, item['input_ids']))
- print("example {}: input_mask: {}".format(index, item['input_mask']))
- print("example {}: segment_ids: {}".format(index, item['segment_ids']))
- print("example {}: masked_lm_positions: {}".format(index, item['masked_lm_positions']))
- print("example {}: masked_lm_ids: {}".format(index, item['masked_lm_ids']))
- print("example {}: masked_lm_weights: {}".format(index, item['masked_lm_weights']))
- print("example {}: next_sentence_labels: {}".format(index, item['next_sentence_labels']))
- index += 1
- if index % 1000 == 0:
- print("read rows: {}".format(index))
- print("total rows: {}".format(index))
-
-if __name__ == '__main__':
- parser = argparse.ArgumentParser()
- parser.add_argument("-i", "--input_file", nargs='+', type=str, help='Input mindreord file')
- args = parser.parse_args()
-
- create_dataset(args.input_file)
diff --git a/example/nlp_to_mindrecord/enwiki/run.sh b/example/nlp_to_mindrecord/enwiki/run.sh
deleted file mode 100644
index cf66bed0fd..0000000000
--- a/example/nlp_to_mindrecord/enwiki/run.sh
+++ /dev/null
@@ -1,133 +0,0 @@
-#!/bin/bash
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-if [ $# -ne 3 ]; then
- echo "Usage: $0 input_dir output_dir vocab_file"
- exit 1
-fi
-
-if [ ! -d $1 ]; then
- echo "The input dir: $1 is not exist."
- exit 1
-fi
-
-if [ ! -d $2 ]; then
- echo "The output dir: $2 is not exist."
- exit 1
-fi
-rm -fr $2/*.mindrecord*
-
-if [ ! -f $3 ]; then
- echo "The vocab file: $3 is not exist."
- exit 1
-fi
-
-data_dir=$1
-output_dir=$2
-vocab_file=$3
-file_list=()
-output_filename=()
-file_index=0
-
-function getdir() {
- elements=`ls $1`
- for element in ${elements[*]};
- do
- dir_or_file=$1"/"$element
- if [ -d $dir_or_file ];
- then
- getdir $dir_or_file
- else
- file_list[$file_index]=$dir_or_file
- echo "${dir_or_file}" | tr '/' '\n' > dir_file_list.txt # dir dir file to mapfile
- mapfile parent_dir < dir_file_list.txt
- rm dir_file_list.txt >/dev/null 2>&1
- tmp_output_filename=${parent_dir[${#parent_dir[@]}-1]}".mindrecord"
- output_filename[$file_index]=`echo ${tmp_output_filename} | sed 's/ //g'`
- file_index=`expr $file_index + 1`
- fi
- done
-}
-
-getdir "${data_dir}"
-# echo "The input files: "${file_list[@]}
-# echo "The output files: "${output_filename[@]}
-
-if [ ! -d "../../../third_party/to_mindrecord/zhwiki" ]; then
- echo "The patch base dir ../../../third_party/to_mindrecord/zhwiki is not exist."
- exit 1
-fi
-
-if [ ! -f "../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
- echo "The patch file ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
- exit 1
-fi
-
-# patch for create_pretraining_data.py
-patch -p0 -d ../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
-if [ $? -ne 0 ]; then
- echo "Patch ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
- exit 1
-fi
-
-# get the cpu core count
-num_cpu_core=`cat /proc/cpuinfo | grep "processor" | wc -l`
-avaiable_core_size=`expr $num_cpu_core / 3 \* 2`
-
-echo "Begin preprocess `date`"
-
-# using patched script to generate mindrecord
-file_list_len=`expr ${#file_list[*]} - 1`
-for index in $(seq 0 $file_list_len); do
- echo "Begin preprocess input file: ${file_list[$index]}"
- echo "Begin output file: ${output_filename[$index]}"
- python ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
- --input_file=${file_list[$index]} \
- --output_file=${output_dir}/${output_filename[$index]} \
- --partition_number=1 \
- --vocab_file=${vocab_file} \
- --do_lower_case=True \
- --max_seq_length=512 \
- --max_predictions_per_seq=76 \
- --masked_lm_prob=0.15 \
- --random_seed=12345 \
- --dupe_factor=10 >/tmp/${output_filename[$index]}.log 2>&1 &
- process_count=`ps -ef | grep create_pretraining_data_patched | grep -v grep | wc -l`
- echo "Total task: ${#file_list[*]}, processing: ${process_count}"
- if [ $process_count -ge $avaiable_core_size ]; then
- while [ 1 ]; do
- process_num=`ps -ef | grep create_pretraining_data_patched | grep -v grep | wc -l`
- if [ $process_count -gt $process_num ]; then
- process_count=$process_num
- break;
- fi
- sleep 2
- done
- fi
-done
-
-process_num=`ps -ef | grep create_pretraining_data_patched | grep -v grep | wc -l`
-while [ 1 ]; do
- if [ $process_num -eq 0 ]; then
- break;
- fi
- echo "There are still ${process_num} preprocess running ..."
- sleep 2
- process_num=`ps -ef | grep create_pretraining_data_patched | grep -v grep | wc -l`
-done
-
-echo "Preprocess all the data success."
-echo "End preprocess `date`"
diff --git a/example/nlp_to_mindrecord/enwiki/run_read.sh b/example/nlp_to_mindrecord/enwiki/run_read.sh
deleted file mode 100644
index 737e9375c4..0000000000
--- a/example/nlp_to_mindrecord/enwiki/run_read.sh
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/bin/bash
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-if [ $# -ne 1 ]; then
- echo "Usage: $0 input_dir"
- exit 1
-fi
-
-if [ ! -d $1 ]; then
- echo "The input dir: $1 is not exist."
- exit 1
-fi
-
-file_list=()
-file_index=0
-
-# get all the mindrecord file from output dir
-function getdir() {
- elements=`ls $1/part-*.mindrecord`
- for element in ${elements[*]};
- do
- file_list[$file_index]=$element
- file_index=`expr $file_index + 1`
- done
-}
-
-getdir $1
-echo "Get all the mindrecord files: "${file_list[*]}
-
-# create dataset for train
-python create_dataset.py --input_file ${file_list[*]}
diff --git a/example/nlp_to_mindrecord/zhwiki/README.md b/example/nlp_to_mindrecord/zhwiki/README.md
deleted file mode 100644
index 1a9de05114..0000000000
--- a/example/nlp_to_mindrecord/zhwiki/README.md
+++ /dev/null
@@ -1,113 +0,0 @@
-# Guideline to Convert Training Data zhwiki to MindRecord For Bert Pre Training
-
-
-
-- [What does the example do](#what-does-the-example-do)
-- [Run simple test](#run-simple-test)
-- [How to use the example to process zhwiki](#how-to-use-the-example-to-process-zhwiki)
- - [Download zhwiki training data](#download-zhwiki-training-data)
- - [Extract the zhwiki](#extract-the-zhwiki)
- - [Generate MindRecord](#generate-mindrecord)
- - [Create MindDataset By MindRecord](#create-minddataset-by-mindrecord)
-
-
-
-
-## What does the example do
-
-This example is based on [zhwiki](https://dumps.wikimedia.org/zhwiki) training data, generating MindRecord file, and finally used for Bert network training.
-
-1. run.sh: generate MindRecord entry script.
-2. run_read.py: create MindDataset by MindRecord entry script.
- - create_dataset.py: use MindDataset to read MindRecord to generate dataset.
-
-## Run simple test
-
-Follow the step:
-
-```bash
-bash run_simple.sh # generate output/simple.mindrecord* by ../../../third_party/to_mindrecord/zhwiki/sample_text.txt
-bash run_read_simple.sh # use MindDataset to read output/simple.mindrecord*
-```
-
-## How to use the example to process zhwiki
-
-Download zhwiki data, extract it, convert it to MindRecord, use MindDataset to read MindRecord.
-
-### Download zhwiki training data
-
-> [zhwiki dataset download address](https://dumps.wikimedia.org/zhwiki) **-> 20200401 -> zhwiki-20200401-pages-articles-multistream.xml.bz2**
-
-- put the zhwiki-20200401-pages-articles-multistream.xml.bz2 in {your-mindspore}/example/nlp_to_mindrecord/zhwiki/data directory.
-
-### Extract the zhwiki
-
-1. Download [wikiextractor](https://github.com/attardi/wikiextractor) script to {your-mindspore}/example/nlp_to_mindrecord/zhwiki/data directory.
-
- ```
- $ ls data/
- README.md wikiextractor zhwiki-20200401-pages-articles-multistream.xml.bz2
- ```
-
-2. Extract the zhwiki.
- ```python
- python data/wikiextractor/WikiExtractor.py data/zhwiki-20200401-pages-articles-multistream.xml.bz2 --processes 4 --templates data/template --bytes 8M --min_text_length 0 --filter_disambig_pages --output data/extract
- ```
-
-3. Generate like this:
- ```
- $ ls data/extract
- AA AB
- ```
-
-### Generate MindRecord
-
-1. Run the run.sh script.
- ```
- bash run.sh
- ```
- > Caution: This process maybe slow, please wait patiently. If you do not have a machine with enough memory and cpu, it is recommended that you modify the script to generate mindrecord in step by step.
-
-2. The output like this:
- ```
- patching file create_pretraining_data_patched.py (read from create_pretraining_data.py)
- Begin preprocess input file: ./data/extract/AA/wiki_00
- Begin output file: AAwiki_00.mindrecord
- Total task: 5, processing: 1
- Begin preprocess input file: ./data/extract/AA/wiki_01
- Begin output file: AAwiki_01.mindrecord
- Total task: 5, processing: 2
- Begin preprocess input file: ./data/extract/AA/wiki_02
- Begin output file: AAwiki_02.mindrecord
- Total task: 5, processing: 3
- Begin preprocess input file: ./data/extract/AB/wiki_02
- Begin output file: ABwiki_02.mindrecord
- Total task: 5, processing: 4
- ...
- ```
-
-3. Generate files like this:
- ```bash
- $ ls output/
- AAwiki_00.mindrecord AAwiki_00.mindrecord.db AAwiki_01.mindrecord AAwiki_01.mindrecord.db AAwiki_02.mindrecord AAwiki_02.mindrecord.db ... ABwiki_00.mindrecord ABwiki_00.mindrecord.db ...
- ```
-
-### Create MindDataset By MindRecord
-
-1. Run the run_read.sh script.
- ```bash
- bash run_read.sh
- ```
-
-2. The output like this:
- ```
- ...
- example 74: input_ids: [ 101 8168 118 12847 8783 9977 15908 117 8256 9245 11643 8168 8847 8588 11575 8154 8228 143 8384 8376 9197 10241 103 10564 11421 8199 12268 112 161 8228 11541 9586 8436 8174 8363 9864 9702 103 103 119 103 9947 10564 103 8436 8806 11479 103 8912 119 103 103 103 12209 8303 103 8757 8824 117 8256 103 8619 8168 11541 102 11684 8196 103 8228 8847 11523 117 9059 9064 12410 8358 8181 10764 117 11167 11706 9920 148 8332 11390 8936 8205 10951 11997 103 8154 117 103 8670 10467 112 161 10951 13139 12413 117 10288 143 10425 8205 152 10795 8472 8196 103 161 12126 9172 13129 12106 8217 8174 12244 8205 143 103 8461 8277 10628 160 8221 119 102]
- example 74: input_mask: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
- example 74: segment_ids: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
- example 74: masked_lm_positions: [ 6 22 37 38 40 43 47 50 51 52 55 60 67 76 89 92 98 109 120 0]
- example 74: masked_lm_ids: [ 8118 8165 8329 8890 8554 8458 119 8850 8565 10392 8174 11467 10291 8181 8549 12718 13139 112 158 0]
- example 74: masked_lm_weights: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.]
- example 74: next_sentence_labels: [0]
- ...
- ```
diff --git a/example/nlp_to_mindrecord/zhwiki/create_dataset.py b/example/nlp_to_mindrecord/zhwiki/create_dataset.py
deleted file mode 100644
index d90d12b7f2..0000000000
--- a/example/nlp_to_mindrecord/zhwiki/create_dataset.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""create MindDataset by MindRecord"""
-import argparse
-import mindspore.dataset as ds
-
-def create_dataset(data_file):
- """create MindDataset"""
- num_readers = 4
- data_set = ds.MindDataset(dataset_file=data_file, num_parallel_workers=num_readers, shuffle=True)
- index = 0
- for item in data_set.create_dict_iterator():
- # print("example {}: {}".format(index, item))
- print("example {}: input_ids: {}".format(index, item['input_ids']))
- print("example {}: input_mask: {}".format(index, item['input_mask']))
- print("example {}: segment_ids: {}".format(index, item['segment_ids']))
- print("example {}: masked_lm_positions: {}".format(index, item['masked_lm_positions']))
- print("example {}: masked_lm_ids: {}".format(index, item['masked_lm_ids']))
- print("example {}: masked_lm_weights: {}".format(index, item['masked_lm_weights']))
- print("example {}: next_sentence_labels: {}".format(index, item['next_sentence_labels']))
- index += 1
- if index % 1000 == 0:
- print("read rows: {}".format(index))
- print("total rows: {}".format(index))
-
-if __name__ == '__main__':
- parser = argparse.ArgumentParser()
- parser.add_argument("-i", "--input_file", nargs='+', type=str, help='Input mindreord file')
- args = parser.parse_args()
-
- create_dataset(args.input_file)
diff --git a/example/nlp_to_mindrecord/zhwiki/data/.gitignore b/example/nlp_to_mindrecord/zhwiki/data/.gitignore
deleted file mode 100644
index f15cab0c89..0000000000
--- a/example/nlp_to_mindrecord/zhwiki/data/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-wikiextractor/
-zhwiki-20200401-pages-articles-multistream.xml.bz2
-extract/
diff --git a/example/nlp_to_mindrecord/zhwiki/data/README.md b/example/nlp_to_mindrecord/zhwiki/data/README.md
deleted file mode 100644
index b54948808e..0000000000
--- a/example/nlp_to_mindrecord/zhwiki/data/README.md
+++ /dev/null
@@ -1 +0,0 @@
-## The input dataset
diff --git a/example/nlp_to_mindrecord/zhwiki/output/README.md b/example/nlp_to_mindrecord/zhwiki/output/README.md
deleted file mode 100644
index b7cfba1b47..0000000000
--- a/example/nlp_to_mindrecord/zhwiki/output/README.md
+++ /dev/null
@@ -1 +0,0 @@
-## Output the mindrecord
diff --git a/example/nlp_to_mindrecord/zhwiki/run.sh b/example/nlp_to_mindrecord/zhwiki/run.sh
deleted file mode 100644
index a057031e6b..0000000000
--- a/example/nlp_to_mindrecord/zhwiki/run.sh
+++ /dev/null
@@ -1,112 +0,0 @@
-#!/bin/bash
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-rm -f output/*.mindrecord*
-
-data_dir="./data/extract"
-file_list=()
-output_filename=()
-file_index=0
-
-function getdir() {
- elements=`ls $1`
- for element in ${elements[*]};
- do
- dir_or_file=$1"/"$element
- if [ -d $dir_or_file ];
- then
- getdir $dir_or_file
- else
- file_list[$file_index]=$dir_or_file
- echo "${dir_or_file}" | tr '/' '\n' > dir_file_list.txt # dir dir file to mapfile
- mapfile parent_dir < dir_file_list.txt
- rm dir_file_list.txt >/dev/null 2>&1
- tmp_output_filename=${parent_dir[${#parent_dir[@]}-2]}${parent_dir[${#parent_dir[@]}-1]}".mindrecord"
- output_filename[$file_index]=`echo ${tmp_output_filename} | sed 's/ //g'`
- file_index=`expr $file_index + 1`
- fi
- done
-}
-
-getdir "${data_dir}"
-# echo "The input files: "${file_list[@]}
-# echo "The output files: "${output_filename[@]}
-
-if [ ! -d "../../../third_party/to_mindrecord/zhwiki" ]; then
- echo "The patch base dir ../../../third_party/to_mindrecord/zhwiki is not exist."
- exit 1
-fi
-
-if [ ! -f "../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
- echo "The patch file ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
- exit 1
-fi
-
-# patch for create_pretraining_data.py
-patch -p0 -d ../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
-if [ $? -ne 0 ]; then
- echo "Patch ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
- exit 1
-fi
-
-# get the cpu core count
-num_cpu_core=`cat /proc/cpuinfo | grep "processor" | wc -l`
-avaiable_core_size=`expr $num_cpu_core / 3 \* 2`
-
-echo "Begin preprocess `date`"
-
-# using patched script to generate mindrecord
-file_list_len=`expr ${#file_list[*]} - 1`
-for index in $(seq 0 $file_list_len); do
- echo "Begin preprocess input file: ${file_list[$index]}"
- echo "Begin output file: ${output_filename[$index]}"
- python ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
- --input_file=${file_list[$index]} \
- --output_file=output/${output_filename[$index]} \
- --partition_number=1 \
- --vocab_file=../../../third_party/to_mindrecord/zhwiki/vocab.txt \
- --do_lower_case=True \
- --max_seq_length=128 \
- --max_predictions_per_seq=20 \
- --masked_lm_prob=0.15 \
- --random_seed=12345 \
- --dupe_factor=10 >/tmp/${output_filename[$index]}.log 2>&1 & # user defined
- process_count=`ps -ef | grep create_pretraining_data_patched | grep -v grep | wc -l`
- echo "Total task: ${#file_list[*]}, processing: ${process_count}"
- if [ $process_count -ge $avaiable_core_size ]; then
- while [ 1 ]; do
- process_num=`ps -ef | grep create_pretraining_data_patched | grep -v grep | wc -l`
- if [ $process_count -gt $process_num ]; then
- process_count=$process_num
- break;
- fi
- sleep 2
- done
- fi
-done
-
-process_num=`ps -ef | grep create_pretraining_data_patched | grep -v grep | wc -l`
-while [ 1 ]; do
- if [ $process_num -eq 0 ]; then
- break;
- fi
- echo "There are still ${process_num} preprocess running ..."
- sleep 2
- process_num=`ps -ef | grep create_pretraining_data_patched | grep -v grep | wc -l`
-done
-
-echo "Preprocess all the data success."
-echo "End preprocess `date`"
diff --git a/example/nlp_to_mindrecord/zhwiki/run_read_simple.sh b/example/nlp_to_mindrecord/zhwiki/run_read_simple.sh
deleted file mode 100644
index 1c26dec449..0000000000
--- a/example/nlp_to_mindrecord/zhwiki/run_read_simple.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/bash
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-# create dataset for train
-python create_dataset.py --input_file=output/simple.mindrecord0
diff --git a/example/nlp_to_mindrecord/zhwiki/run_simple.sh b/example/nlp_to_mindrecord/zhwiki/run_simple.sh
deleted file mode 100644
index 20c1d98d66..0000000000
--- a/example/nlp_to_mindrecord/zhwiki/run_simple.sh
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/bash
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-rm -f output/simple.mindrecord*
-
-if [ ! -d "../../../third_party/to_mindrecord/zhwiki" ]; then
- echo "The patch base dir ../../../third_party/to_mindrecord/zhwiki is not exist."
- exit 1
-fi
-
-if [ ! -f "../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
- echo "The patch file ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
- exit 1
-fi
-
-# patch for create_pretraining_data.py
-patch -p0 -d ../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
-if [ $? -ne 0 ]; then
- echo "Patch ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
- exit 1
-fi
-
-# using patched script to generate mindrecord
-python ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
---input_file=../../../third_party/to_mindrecord/zhwiki/sample_text.txt \
---output_file=output/simple.mindrecord \
---partition_number=4 \
---vocab_file=../../../third_party/to_mindrecord/zhwiki/vocab.txt \
---do_lower_case=True \
---max_seq_length=128 \
---max_predictions_per_seq=20 \
---masked_lm_prob=0.15 \
---random_seed=12345 \
---dupe_factor=10 # user defined
diff --git a/example/resnet50_cifar10/README.md b/example/resnet50_cifar10/README.md
deleted file mode 100644
index abb0ba4090..0000000000
--- a/example/resnet50_cifar10/README.md
+++ /dev/null
@@ -1,137 +0,0 @@
-# ResNet-50 Example
-
-## Description
-
-This is an example of training ResNet-50 with CIFAR-10 dataset in MindSpore.
-
-## Requirements
-
-- Install [MindSpore](https://www.mindspore.cn/install/en).
-
-- Download the dataset CIFAR-10
-
-> Unzip the CIFAR-10 dataset to any path you want and the folder structure should include train and eval dataset as follows:
-> ```
-> .
-> ├── cifar-10-batches-bin # train dataset
-> └── cifar-10-verify-bin # infer dataset
-> ```
-
-
-## Example structure
-
-```shell
-.
-├── config.py # parameter configuration
-├── dataset.py # data preprocessing
-├── eval.py # infer script
-├── lr_generator.py # generate learning rate for each step
-├── run_distribute_train.sh # launch distributed training(8 pcs)
-├── run_infer.sh # launch infering
-├── run_standalone_train.sh # launch standalone training(1 pcs)
-└── train.py # train script
-```
-
-
-## Parameter configuration
-
-Parameters for both training and inference can be set in config.py.
-
-```
-"class_num": 10, # dataset class num
-"batch_size": 32, # batch size of input tensor
-"loss_scale": 1024, # loss scale
-"momentum": 0.9, # momentum
-"weight_decay": 1e-4, # weight decay
-"epoch_size": 90, # only valid for taining, which is always 1 for inference
-"buffer_size": 100, # number of queue size in data preprocessing
-"image_height": 224, # image height
-"image_width": 224, # image width
-"save_checkpoint": True, # whether save checkpoint or not
-"save_checkpoint_steps": 195, # the step interval between two checkpoints. By default, the last checkpoint will be saved after the last step
-"keep_checkpoint_max": 10, # only keep the last keep_checkpoint_max checkpoint
-"save_checkpoint_path": "./", # path to save checkpoint
-"warmup_epochs": 5, # number of warmup epoch
-"lr_decay_mode": "poly" # decay mode can be selected in steps, ploy and default
-"lr_init": 0.01, # initial learning rate
-"lr_end": 0.00001, # final learning rate
-"lr_max": 0.1, # maximum learning rate
-```
-
-## Running the example
-
-### Train
-
-#### Usage
-
-```
-# distributed training
-Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATASET_PATH]
-
-# standalone training
-Usage: sh run_standalone_train.sh [DATASET_PATH]
-```
-
-
-#### Launch
-
-```
-# distribute training example
-sh run_distribute_train.sh rank_table.json ~/cifar-10-batches-bin
-
-# standalone training example
-sh run_standalone_train.sh ~/cifar-10-batches-bin
-```
-
-> About rank_table.json, you can refer to the [distributed training tutorial](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html).
-
-#### Result
-
-Training result will be stored in the example path, whose folder name begins with "train" or "train_parallel". Under this, you can find checkpoint file together with result like the followings in log.
-
-```
-# distribute training result(8 pcs)
-epoch: 1 step: 195, loss is 1.9601055
-epoch: 2 step: 195, loss is 1.8555021
-epoch: 3 step: 195, loss is 1.6707983
-epoch: 4 step: 195, loss is 1.8162166
-epoch: 5 step: 195, loss is 1.393667
-```
-
-### Infer
-
-#### Usage
-
-```
-# infer
-Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH]
-```
-
-#### Launch
-
-```
-# infer example
-sh run_infer.sh ~/cifar10-10-verify-bin ~/resnet50_cifar10/train_parallel0/resnet-90_195.ckpt
-```
-
-> checkpoint can be produced in training process.
-
-#### Result
-
-Inference result will be stored in the example path, whose folder name is "infer". Under this, you can find result like the followings in log.
-
-```
-result: {'acc': 0.91446314102564111} ckpt=~/resnet50_cifar10/train_parallel0/resnet-90_195.ckpt
-```
-
-### Running on GPU
-```
-# distributed training example
-mpirun -n 8 python train.py --dataset_path=~/cifar-10-batches-bin --device_target="GPU" --run_distribute=True
-
-# standalone training example
-python train.py --dataset_path=~/cifar-10-batches-bin --device_target="GPU"
-
-# infer example
-python eval.py --dataset_path=~/cifar10-10-verify-bin --device_target="GPU" --checkpoint_path=resnet-90_195.ckpt
-```
\ No newline at end of file
diff --git a/example/resnet50_cifar10/dataset.py b/example/resnet50_cifar10/dataset.py
deleted file mode 100755
index 8a66ec573a..0000000000
--- a/example/resnet50_cifar10/dataset.py
+++ /dev/null
@@ -1,81 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-create train or eval dataset.
-"""
-import os
-import mindspore.common.dtype as mstype
-import mindspore.dataset.engine as de
-import mindspore.dataset.transforms.vision.c_transforms as C
-import mindspore.dataset.transforms.c_transforms as C2
-from mindspore.communication.management import init, get_rank, get_group_size
-from config import config
-
-
-def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"):
- """
- create a train or eval dataset
-
- Args:
- dataset_path(string): the path of dataset.
- do_train(bool): whether dataset is used for train or eval.
- repeat_num(int): the repeat times of dataset. Default: 1
- batch_size(int): the batch size of dataset. Default: 32
- target(str): the device target. Default: Ascend
-
- Returns:
- dataset
- """
- if target == "Ascend":
- device_num = int(os.getenv("DEVICE_NUM"))
- rank_id = int(os.getenv("RANK_ID"))
- else:
- init("nccl")
- rank_id = get_rank()
- device_num = get_group_size()
-
- if device_num == 1:
- ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=True)
- else:
- ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=True,
- num_shards=device_num, shard_id=rank_id)
-
- # define map operations
- trans = []
- if do_train:
- trans += [
- C.RandomCrop((32, 32), (4, 4, 4, 4)),
- C.RandomHorizontalFlip(prob=0.5)
- ]
-
- trans += [
- C.Resize((config.image_height, config.image_width)),
- C.Rescale(1.0 / 255.0, 0.0),
- C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]),
- C.HWC2CHW()
- ]
-
- type_cast_op = C2.TypeCast(mstype.int32)
-
- ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op)
- ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans)
-
- # apply batch operations
- ds = ds.batch(batch_size, drop_remainder=True)
-
- # apply dataset repeat operation
- ds = ds.repeat(repeat_num)
-
- return ds
diff --git a/example/resnet50_cifar10/eval.py b/example/resnet50_cifar10/eval.py
deleted file mode 100755
index f7d71c8d29..0000000000
--- a/example/resnet50_cifar10/eval.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-eval.
-"""
-import os
-import argparse
-from dataset import create_dataset
-from config import config
-from mindspore import context
-from mindspore.model_zoo.resnet import resnet50
-from mindspore.parallel._auto_parallel_context import auto_parallel_context
-from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
-from mindspore.train.model import Model, ParallelMode
-from mindspore.train.serialization import load_checkpoint, load_param_into_net
-from mindspore.communication.management import init, get_group_size
-
-parser = argparse.ArgumentParser(description='Image classification')
-parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute')
-parser.add_argument('--device_num', type=int, default=1, help='Device num.')
-parser.add_argument('--do_train', type=bool, default=False, help='Do train or not.')
-parser.add_argument('--do_eval', type=bool, default=True, help='Do eval or not.')
-parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path')
-parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
-parser.add_argument('--device_target', type=str, default='Ascend', help='Device target')
-args_opt = parser.parse_args()
-
-if __name__ == '__main__':
- target = args_opt.device_target
- context.set_context(mode=context.GRAPH_MODE, device_target=target, save_graphs=False)
- if not args_opt.do_eval and args_opt.run_distribute:
- if target == "Ascend":
- device_id = int(os.getenv('DEVICE_ID'))
- context.set_context(device_id=device_id)
- context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
- mirror_mean=True)
- auto_parallel_context().set_all_reduce_fusion_split_indices([140])
- init()
- elif target == "GPU":
- init("nccl")
- context.set_auto_parallel_context(device_num=get_group_size(), parallel_mode=ParallelMode.DATA_PARALLEL,
- mirror_mean=True)
-
- epoch_size = config.epoch_size
- net = resnet50(class_num=config.class_num)
- loss = SoftmaxCrossEntropyWithLogits(sparse=True)
-
- if args_opt.do_eval:
- dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=False, batch_size=config.batch_size,
- target=target)
- step_size = dataset.get_dataset_size()
-
- if args_opt.checkpoint_path:
- param_dict = load_checkpoint(args_opt.checkpoint_path)
- load_param_into_net(net, param_dict)
- net.set_train(False)
-
- model = Model(net, loss_fn=loss, metrics={'acc'})
- res = model.eval(dataset)
- print("result:", res, "ckpt=", args_opt.checkpoint_path)
diff --git a/example/resnet50_cifar10/run_infer.sh b/example/resnet50_cifar10/run_infer.sh
deleted file mode 100755
index 14d7faf981..0000000000
--- a/example/resnet50_cifar10/run_infer.sh
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/bin/bash
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-if [ $# != 2 ]
-then
- echo "Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH]"
-exit 1
-fi
-
-get_real_path(){
- if [ "${1:0:1}" == "/" ]; then
- echo "$1"
- else
- echo "$(realpath -m $PWD/$1)"
- fi
-}
-
-PATH1=$(get_real_path $1)
-PATH2=$(get_real_path $2)
-
-
-if [ ! -d $PATH1 ]
-then
- echo "error: DATASET_PATH=$1 is not a directory"
-exit 1
-fi
-
-if [ ! -f $PATH2 ]
-then
- echo "error: CHECKPOINT_PATH=$2 is not a file"
-exit 1
-fi
-
-ulimit -u unlimited
-export DEVICE_NUM=1
-export DEVICE_ID=0
-export RANK_SIZE=$DEVICE_NUM
-export RANK_ID=0
-
-if [ -d "infer" ];
-then
- rm -rf ./infer
-fi
-mkdir ./infer
-cp *.py ./infer
-cp *.sh ./infer
-cd ./infer || exit
-env > env.log
-echo "start infering for device $DEVICE_ID"
-python eval.py --do_eval=True --dataset_path=$PATH1 --checkpoint_path=$PATH2 &> log &
-cd ..
diff --git a/example/resnet50_cifar10/train.py b/example/resnet50_cifar10/train.py
deleted file mode 100755
index 323695ae29..0000000000
--- a/example/resnet50_cifar10/train.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""train_imagenet."""
-import os
-import argparse
-import numpy as np
-from dataset import create_dataset
-from lr_generator import get_lr
-from config import config
-from mindspore import context
-from mindspore import Tensor
-from mindspore.model_zoo.resnet import resnet50
-from mindspore.parallel._auto_parallel_context import auto_parallel_context
-from mindspore.nn.optim.momentum import Momentum
-from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
-
-from mindspore.train.model import Model, ParallelMode
-
-from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
-from mindspore.train.loss_scale_manager import FixedLossScaleManager
-from mindspore.communication.management import init, get_rank, get_group_size
-
-parser = argparse.ArgumentParser(description='Image classification')
-parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute')
-parser.add_argument('--device_num', type=int, default=1, help='Device num.')
-parser.add_argument('--do_train', type=bool, default=True, help='Do train or not.')
-parser.add_argument('--do_eval', type=bool, default=False, help='Do eval or not.')
-parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
-parser.add_argument('--device_target', type=str, default='Ascend', help='Device target')
-args_opt = parser.parse_args()
-
-
-if __name__ == '__main__':
- target = args_opt.device_target
- ckpt_save_dir = config.save_checkpoint_path
- context.set_context(mode=context.GRAPH_MODE, device_target=target, save_graphs=False)
- np.random.seed(1)
- if not args_opt.do_eval and args_opt.run_distribute:
- if target == "Ascend":
- device_id = int(os.getenv('DEVICE_ID'))
- context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False, device_id=device_id,
- enable_auto_mixed_precision=True)
- init()
- context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
- mirror_mean=True)
- auto_parallel_context().set_all_reduce_fusion_split_indices([107, 160])
- ckpt_save_dir = config.save_checkpoint_path
- elif target == "GPU":
- context.set_context(mode=context.GRAPH_MODE, device_target="GPU", save_graphs=False)
- init("nccl")
- context.set_auto_parallel_context(device_num=get_group_size(), parallel_mode=ParallelMode.DATA_PARALLEL,
- mirror_mean=True)
- ckpt_save_dir = config.save_checkpoint_path + "ckpt_" + str(get_rank()) + "/"
- epoch_size = config.epoch_size
- net = resnet50(class_num=config.class_num)
-
- if args_opt.do_train:
- dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True,
- repeat_num=epoch_size, batch_size=config.batch_size, target=target)
- step_size = dataset.get_dataset_size()
-
- loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
- lr = Tensor(get_lr(global_step=0, lr_init=config.lr_init, lr_end=config.lr_end, lr_max=config.lr_max,
- warmup_epochs=config.warmup_epochs, total_epochs=epoch_size, steps_per_epoch=step_size,
- lr_decay_mode='poly'))
- opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum,
- config.weight_decay, config.loss_scale)
- if target == 'GPU':
- loss = SoftmaxCrossEntropyWithLogits(sparse=True, is_grad=False, reduction='mean')
- opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum)
- model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'})
- else:
- loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
- model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'},
- amp_level="O2", keep_batchnorm_fp32=False)
-
- time_cb = TimeMonitor(data_size=step_size)
- loss_cb = LossMonitor()
- cb = [time_cb, loss_cb]
- if config.save_checkpoint:
- config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_epochs*step_size,
- keep_checkpoint_max=config.keep_checkpoint_max)
- ckpt_cb = ModelCheckpoint(prefix="resnet", directory=ckpt_save_dir, config=config_ck)
- cb += [ckpt_cb]
- model.train(epoch_size, dataset, callbacks=cb)
diff --git a/example/resnet50_imagenet2012/README.md b/example/resnet50_imagenet2012/README.md
deleted file mode 100644
index 6baf863544..0000000000
--- a/example/resnet50_imagenet2012/README.md
+++ /dev/null
@@ -1,150 +0,0 @@
-# ResNet-50 Example
-
-## Description
-
-This is an example of training ResNet-50 with ImageNet2012 dataset in MindSpore.
-
-## Requirements
-
-- Install [MindSpore](https://www.mindspore.cn/install/en).
-
-- Download the dataset ImageNet2012
-
-> Unzip the ImageNet2012 dataset to any path you want and the folder structure should include train and eval dataset as follows:
-> ```
-> .
-> ├── ilsvrc # train dataset
-> └── ilsvrc_eval # infer dataset
-> ```
-
-
-## Example structure
-
-```shell
-.
-├── crossentropy.py # CrossEntropy loss function
-├── config.py # parameter configuration
-├── dataset.py # data preprocessing
-├── eval.py # infer script
-├── lr_generator.py # generate learning rate for each step
-├── run_distribute_train.sh # launch distributed training(8 pcs)
-├── run_infer.sh # launch infering
-├── run_standalone_train.sh # launch standalone training(1 pcs)
-└── train.py # train script
-```
-
-
-## Parameter configuration
-
-Parameters for both training and inference can be set in config.py.
-
-```
-"class_num": 1001, # dataset class number
-"batch_size": 32, # batch size of input tensor
-"loss_scale": 1024, # loss scale
-"momentum": 0.9, # momentum optimizer
-"weight_decay": 1e-4, # weight decay
-"epoch_size": 90, # only valid for taining, which is always 1 for inference
-"pretrained_epoch_size": 1, # epoch size that model has been trained before load pretrained checkpoint
-"buffer_size": 1000, # number of queue size in data preprocessing
-"image_height": 224, # image height
-"image_width": 224, # image width
-"save_checkpoint": True, # whether save checkpoint or not
-"save_checkpoint_epochs": 1, # the epoch interval between two checkpoints. By default, the last checkpoint will be saved after the last epoch
-"keep_checkpoint_max": 10, # only keep the last keep_checkpoint_max checkpoint
-"save_checkpoint_path": "./", # path to save checkpoint relative to the executed path
-"warmup_epochs": 0, # number of warmup epoch
-"lr_decay_mode": "cosine", # decay mode for generating learning rate
-"label_smooth": True, # label smooth
-"label_smooth_factor": 0.1, # label smooth factor
-"lr_init": 0, # initial learning rate
-"lr_max": 0.1, # maximum learning rate
-```
-
-## Running the example
-
-### Train
-
-#### Usage
-
-```
-# distributed training
-Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)
-
-# standalone training
-Usage: sh run_standalone_train.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)
-
-```
-
-
-#### Launch
-
-```bash
-# distributed training example(8 pcs)
-sh run_distribute_train.sh rank_table_8p.json dataset/ilsvrc
-
-# If you want to load pretrained ckpt file
-sh run_distribute_train.sh rank_table_8p.json dataset/ilsvrc ./pretrained.ckpt
-
-# standalone training example(1 pcs)
-sh run_standalone_train.sh dataset/ilsvrc
-
-# If you want to load pretrained ckpt file
-sh run_standalone_train.sh dataset/ilsvrc ./pretrained.ckpt
-```
-
-> About rank_table.json, you can refer to the [distributed training tutorial](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html).
-
-#### Result
-
-Training result will be stored in the example path, whose folder name begins with "train" or "train_parallel". Under this, you can find checkpoint file together with result like the followings in log.
-
-```
-# distribute training result(8 pcs)
-epoch: 1 step: 5004, loss is 4.8995576
-epoch: 2 step: 5004, loss is 3.9235563
-epoch: 3 step: 5004, loss is 3.833077
-epoch: 4 step: 5004, loss is 3.2795618
-epoch: 5 step: 5004, loss is 3.1978393
-```
-
-### Infer
-
-#### Usage
-
-```
-# infer
-Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH]
-```
-
-#### Launch
-
-```bash
-# infer with checkpoint
-sh run_infer.sh dataset/ilsvrc_eval train_parallel0/resnet-90_5004.ckpt
-```
-
-> checkpoint can be produced in training process.
-
-#### Result
-
-Inference result will be stored in the example path, whose folder name is "infer". Under this, you can find result like the followings in log.
-
-```
-result: {'acc': 0.7671054737516005} ckpt=train_parallel0/resnet-90_5004.ckpt
-```
-
-### Running on GPU
-```
-# distributed training example
-mpirun -n 8 python train.py --dataset_path=dataset/ilsvrc/train --device_target="GPU" --run_distribute=True
-
-# standalone training example
-python train.py --dataset_path=dataset/ilsvrc/train --device_target="GPU"
-
-# standalone training example with pretrained checkpoint
-python train.py --dataset_path=dataset/ilsvrc/train --device_target="GPU" --pre_trained=pretrained.ckpt
-
-# infer example
-python eval.py --dataset_path=dataset/ilsvrc/val --device_target="GPU" --checkpoint_path=resnet-90_5004ss.ckpt
-```
\ No newline at end of file
diff --git a/example/resnet50_imagenet2012/dataset.py b/example/resnet50_imagenet2012/dataset.py
deleted file mode 100755
index 0691985e0b..0000000000
--- a/example/resnet50_imagenet2012/dataset.py
+++ /dev/null
@@ -1,85 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-create train or eval dataset.
-"""
-import os
-import mindspore.common.dtype as mstype
-import mindspore.dataset.engine as de
-import mindspore.dataset.transforms.vision.c_transforms as C
-import mindspore.dataset.transforms.c_transforms as C2
-from mindspore.communication.management import init, get_rank, get_group_size
-
-def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"):
- """
- create a train or eval dataset
-
- Args:
- dataset_path(string): the path of dataset.
- do_train(bool): whether dataset is used for train or eval.
- repeat_num(int): the repeat times of dataset. Default: 1
- batch_size(int): the batch size of dataset. Default: 32
- target(str): the device target. Default: Ascend
-
- Returns:
- dataset
- """
- if target == "Ascend":
- device_num = int(os.getenv("DEVICE_NUM"))
- rank_id = int(os.getenv("RANK_ID"))
- else:
- init("nccl")
- rank_id = get_rank()
- device_num = get_group_size()
-
- if device_num == 1:
- ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True)
- else:
- ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True,
- num_shards=device_num, shard_id=rank_id)
-
- image_size = 224
- mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
- std = [0.229 * 255, 0.224 * 255, 0.225 * 255]
-
- # define map operations
- if do_train:
- trans = [
- C.RandomCropDecodeResize(image_size, scale=(0.08, 1.0), ratio=(0.75, 1.333)),
- C.RandomHorizontalFlip(prob=0.5),
- C.Normalize(mean=mean, std=std),
- C.HWC2CHW()
- ]
- else:
- trans = [
- C.Decode(),
- C.Resize((256, 256)),
- C.CenterCrop(image_size),
- C.Normalize(mean=mean, std=std),
- C.HWC2CHW()
- ]
-
- type_cast_op = C2.TypeCast(mstype.int32)
-
- ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans)
- ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op)
-
- # apply batch operations
- ds = ds.batch(batch_size, drop_remainder=True)
-
- # apply dataset repeat operation
- ds = ds.repeat(repeat_num)
-
- return ds
diff --git a/example/resnet50_imagenet2012/eval.py b/example/resnet50_imagenet2012/eval.py
deleted file mode 100755
index 3f7961e786..0000000000
--- a/example/resnet50_imagenet2012/eval.py
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-eval.
-"""
-import os
-import argparse
-from dataset import create_dataset
-from config import config
-from mindspore import context
-from mindspore.model_zoo.resnet import resnet50
-from mindspore.train.model import Model
-from mindspore.train.serialization import load_checkpoint, load_param_into_net
-from crossentropy import CrossEntropy
-
-parser = argparse.ArgumentParser(description='Image classification')
-parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute')
-parser.add_argument('--device_num', type=int, default=1, help='Device num.')
-parser.add_argument('--do_train', type=bool, default=False, help='Do train or not.')
-parser.add_argument('--do_eval', type=bool, default=True, help='Do eval or not.')
-parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path')
-parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
-parser.add_argument('--device_target', type=str, default='Ascend', help='Device target')
-args_opt = parser.parse_args()
-target = args_opt.device_target
-context.set_context(mode=context.GRAPH_MODE, device_target=target, save_graphs=False)
-if target == "Ascend":
- device_id = int(os.getenv('DEVICE_ID'))
- context.set_context(device_id=device_id)
-
-if __name__ == '__main__':
-
- net = resnet50(class_num=config.class_num)
- if not config.use_label_smooth:
- config.label_smooth_factor = 0.0
- loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
-
- if args_opt.do_eval:
- dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=False, batch_size=config.batch_size,
- target=target)
- step_size = dataset.get_dataset_size()
-
- if args_opt.checkpoint_path:
- param_dict = load_checkpoint(args_opt.checkpoint_path)
- load_param_into_net(net, param_dict)
- net.set_train(False)
-
- model = Model(net, loss_fn=loss, metrics={'acc'})
- res = model.eval(dataset)
- print("result:", res, "ckpt=", args_opt.checkpoint_path)
diff --git a/example/resnet50_imagenet2012/train.py b/example/resnet50_imagenet2012/train.py
deleted file mode 100755
index 6896320ece..0000000000
--- a/example/resnet50_imagenet2012/train.py
+++ /dev/null
@@ -1,122 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""train_imagenet."""
-import os
-import argparse
-import numpy as np
-from dataset import create_dataset
-from lr_generator import get_lr
-from config import config
-from mindspore import context
-from mindspore import Tensor
-from mindspore.model_zoo.resnet import resnet50
-from mindspore.parallel._auto_parallel_context import auto_parallel_context
-from mindspore.nn.optim.momentum import Momentum
-
-from mindspore.train.model import Model, ParallelMode
-
-from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
-from mindspore.train.loss_scale_manager import FixedLossScaleManager
-from mindspore.train.serialization import load_checkpoint, load_param_into_net
-from mindspore.communication.management import init, get_rank, get_group_size
-import mindspore.nn as nn
-import mindspore.common.initializer as weight_init
-from crossentropy import CrossEntropy
-
-parser = argparse.ArgumentParser(description='Image classification')
-parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute')
-parser.add_argument('--device_num', type=int, default=1, help='Device num.')
-parser.add_argument('--do_train', type=bool, default=True, help='Do train or not.')
-parser.add_argument('--do_eval', type=bool, default=False, help='Do eval or not.')
-parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
-parser.add_argument('--device_target', type=str, default='Ascend', help='Device target')
-parser.add_argument('--pre_trained', type=str, default=None, help='Pretrained checkpoint path')
-args_opt = parser.parse_args()
-
-if __name__ == '__main__':
- target = args_opt.device_target
- ckpt_save_dir = config.save_checkpoint_path
- context.set_context(mode=context.GRAPH_MODE, device_target=target, save_graphs=False)
- np.random.seed(1)
- if not args_opt.do_eval and args_opt.run_distribute:
- if target == "Ascend":
- device_id = int(os.getenv('DEVICE_ID'))
- context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False, device_id=device_id,
- enable_auto_mixed_precision=True)
- init()
- context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
- mirror_mean=True)
- auto_parallel_context().set_all_reduce_fusion_split_indices([107, 160])
- ckpt_save_dir = config.save_checkpoint_path
- elif target == "GPU":
- context.set_context(mode=context.GRAPH_MODE, device_target="GPU", save_graphs=False)
- init("nccl")
- context.set_auto_parallel_context(device_num=get_group_size(), parallel_mode=ParallelMode.DATA_PARALLEL,
- mirror_mean=True)
- ckpt_save_dir = config.save_checkpoint_path + "ckpt_" + str(get_rank()) + "/"
-
- epoch_size = config.epoch_size
- net = resnet50(class_num=config.class_num)
-
- # weight init
- if args_opt.pre_trained:
- param_dict = load_checkpoint(args_opt.pre_trained)
- load_param_into_net(net, param_dict)
- epoch_size = config.epoch_size - config.pretrained_epoch_size
- else:
- for _, cell in net.cells_and_names():
- if isinstance(cell, nn.Conv2d):
- cell.weight.default_input = weight_init.initializer(weight_init.XavierUniform(),
- cell.weight.default_input.shape,
- cell.weight.default_input.dtype).to_tensor()
- if isinstance(cell, nn.Dense):
- cell.weight.default_input = weight_init.initializer(weight_init.TruncatedNormal(),
- cell.weight.default_input.shape,
- cell.weight.default_input.dtype).to_tensor()
- if not config.use_label_smooth:
- config.label_smooth_factor = 0.0
-
- loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
-
- if args_opt.do_train:
- dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True,
- repeat_num=epoch_size, batch_size=config.batch_size, target=target)
- step_size = dataset.get_dataset_size()
-
- loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
- lr = get_lr(lr_init=config.lr_init, lr_end=0.0, lr_max=config.lr_max, warmup_epochs=config.warmup_epochs,
- total_epochs=config.epoch_size, steps_per_epoch=step_size, lr_decay_mode='cosine')
- if args_opt.pre_trained:
- lr = lr[config.pretrained_epoch_size * step_size:]
- lr = Tensor(lr)
-
- opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum,
- config.weight_decay, config.loss_scale)
- if target == "Ascend":
- model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'},
- amp_level="O2", keep_batchnorm_fp32=False)
- elif target == "GPU":
- model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'})
-
-
- time_cb = TimeMonitor(data_size=step_size)
- loss_cb = LossMonitor()
- cb = [time_cb, loss_cb]
- if config.save_checkpoint:
- config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_epochs*step_size,
- keep_checkpoint_max=config.keep_checkpoint_max)
- ckpt_cb = ModelCheckpoint(prefix="resnet", directory=ckpt_save_dir, config=config_ck)
- cb += [ckpt_cb]
- model.train(epoch_size, dataset, callbacks=cb)
diff --git a/mindspore/_checkparam.py b/mindspore/_checkparam.py
index 880d26bfad..d5ac7c3e33 100644
--- a/mindspore/_checkparam.py
+++ b/mindspore/_checkparam.py
@@ -593,6 +593,17 @@ def check_bool(input_param):
raise TypeError("Input type must be bool!")
+def check_string(input_param, valid_values):
+ """String type judgment."""
+ if isinstance(input_param, str) and input_param in valid_values:
+ return input_param
+ if len(valid_values) == 1:
+ raise ValueError(f'Input should be str and must be {valid_values[0]},'
+ f' but got {input_param}.')
+ raise ValueError(f'Input should be str and must be one of {valid_values},'
+ f' but got {input_param}.')
+
+
def check_input_format(input_param):
"""Judge input format."""
if input_param == "NCHW":
diff --git a/mindspore/_extends/parse/parser.py b/mindspore/_extends/parse/parser.py
index 2a1c9e0943..a6043eb787 100644
--- a/mindspore/_extends/parse/parser.py
+++ b/mindspore/_extends/parse/parser.py
@@ -19,6 +19,7 @@
import ast
import types
import inspect
+import hashlib
from textwrap import dedent
from dataclasses import is_dataclass
import asttokens
@@ -319,7 +320,6 @@ def get_dataclass_methods(cls):
if isinstance(getattr(cls, name), (types.FunctionType,))}
return methods
-
class Parser:
"""
Parser python code to ast tree.
@@ -327,7 +327,10 @@ class Parser:
Args:
fn(FunctionType/MethodType): Need parse object instance.
parse_method(ExtendInfoOfParseObj): Extend information for parse the function.
+ ast_cache: Dictionary for caching ast tree.
"""
+ ast_cache = {}
+
def __init__(self, fn: (types.FunctionType, types.MethodType), parse_method=None) -> None:
self.fn = fn
self.parse_method = parse_method
@@ -348,11 +351,15 @@ class Parser:
tree = None
if isinstance(self.fn, (types.FunctionType, types.MethodType)):
original_src = inspect.getsource(self.fn)
- src = dedent(original_src)
- self.col_offset = \
- len(original_src.split('\n')[0]) - len(src.split('\n')[0])
- logger.debug("get source = %s", src)
- tree = asttokens.ASTTokens(src, parse=True).tree
+ hexstr = hashlib.sha256(original_src.encode()).hexdigest()
+ tree = Parser.ast_cache.get(hexstr)
+ if not tree:
+ src = dedent(original_src)
+ self.col_offset = \
+ len(original_src.split('\n')[0]) - len(src.split('\n')[0])
+ logger.debug("get source = %s", src)
+ tree = asttokens.ASTTokens(src, parse=True).tree
+ Parser.ast_cache[hexstr] = tree
else:
logger.error("Fn type is invalid")
return tree
diff --git a/mindspore/_extends/parse/resources.py b/mindspore/_extends/parse/resources.py
index 2ae8b7172f..e60b70efac 100644
--- a/mindspore/_extends/parse/resources.py
+++ b/mindspore/_extends/parse/resources.py
@@ -17,6 +17,7 @@
"""Resources for ast tree parse."""
import ast
import math
+from mindspore import IndexedSlices
from mindspore.ops.composite import multitype_ops
from mindspore.ops import functional as F, composite as C
from . import standard_method as M
@@ -111,10 +112,11 @@ convert_object_map = {
# system function
T.len: M.ms_len,
T.bool: M.bool_,
- T.map: C.HyperMap(),
+ T.map: C.Map(),
T.partial: F.partial,
T.zip: C.zip_operation,
T.print: F.print_,
+ T.enumerate: M.enumerate_,
# custom define operation
T.iter: M.ms_iter,
@@ -135,4 +137,7 @@ convert_object_map = {
math.sin: NO_IMPLEMENT,
math.cos: NO_IMPLEMENT,
math.tan: NO_IMPLEMENT,
+
+ # user defined
+ IndexedSlices: F.make_indexed_slices,
}
diff --git a/mindspore/_extends/parse/standard_method.py b/mindspore/_extends/parse/standard_method.py
index 0f3f843b63..936099a4fb 100644
--- a/mindspore/_extends/parse/standard_method.py
+++ b/mindspore/_extends/parse/standard_method.py
@@ -104,6 +104,15 @@ def bool_(x):
return x.__bool__()
+def enumerate_(x, start=0):
+ """Enumerate list or tuple."""
+ x_type = F.typeof(x)
+ ret = ()
+ if check_is_tuple_or_list(x_type, "enumerate"):
+ ret = zip(range(start, start + len(x)), x)
+ return ret
+
+
def while_cond(x):
"""For while condtion, if the condition is a tensor, the loop will not be unrolled"""
if F.issubclass_(F.typeof(x), F.typeof(mstype.tensor)):
@@ -113,6 +122,13 @@ def while_cond(x):
return x
+@constexpr
+def check_is_tuple_or_list(x, op_name):
+ """check whether x is list or tuple."""
+ if isinstance(x, (mstype.list_type, mstype.tuple_type)):
+ return True
+ raise TypeError(f"For '{op_name}', the input parameter should be tuple or list, but got {x}.")
+
@constexpr
def check_is_tensor_bool_cond(shp):
"""check if tensor is a bool condition"""
diff --git a/mindspore/_extends/parse/trope.py b/mindspore/_extends/parse/trope.py
index f169c58fb9..28f3196975 100644
--- a/mindspore/_extends/parse/trope.py
+++ b/mindspore/_extends/parse/trope.py
@@ -27,7 +27,7 @@ from operator import ( # noqa
# support system function call
from builtins import ( # noqa
- bool, getattr, setattr, len, iter, next, pow, range, map, zip, print
+ bool, getattr, setattr, len, iter, next, pow, range, map, zip, print, enumerate
)
# support functools
@@ -44,7 +44,7 @@ __all__ = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'mod', 'eq', 'ne', 'lt',
'not_', 'and_', 'or_', 'xor', 'lshift', 'rshift', 'invert', 'is_', 'is_not', 'contains',
'matmul', 'getitem', 'setitem',
'bool', 'getattr', 'setattr', 'len', 'iter', 'next', 'pow', 'range', 'map', 'zip',
- 'partial', 'print',
+ 'partial', 'print', 'enumerate',
'exp', 'log', 'sin', 'cos', 'tan']
diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt
index c435672bde..3f9965c042 100644
--- a/mindspore/ccsrc/CMakeLists.txt
+++ b/mindspore/ccsrc/CMakeLists.txt
@@ -71,6 +71,17 @@ message("onnx proto path is :" ${ONNX_PROTO})
ms_protobuf_generate(ONNX_PROTO_SRCS ONNX_PROTO_HDRS ${ONNX_PROTO})
list(APPEND MINDSPORE_PROTO_LIST ${ONNX_PROTO_SRCS})
+if (ENABLE_DEBUGGER)
+ # debugger: compile proto files
+ include_directories("${CMAKE_BINARY_DIR}/debug/debugger")
+ file(GLOB_RECURSE DEBUGGER_PROTO_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "debug/debugger/debug_graph.proto")
+ ms_protobuf_generate(DEBUGGER_PROTO_SRCS DEBUGGER_PROTO_HDRS ${DEBUGGER_PROTO_LIST})
+ file(GLOB_RECURSE DEBUGGER_GRPC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "debug/debugger/debug_grpc.proto")
+ ms_grpc_generate(DEBUGGER_GRPC_SRCS DEBUGGER_GRPC_HDRS ${DEBUGGER_GRPC_LIST})
+ list(APPEND MINDSPORE_PROTO_LIST ${DEBUGGER_PROTO_SRCS})
+ list(APPEND MINDSPORE_PROTO_LIST ${DEBUGGER_GRPC_SRCS})
+endif ()
+
if (ENABLE_DUMP_PROTO)
include_directories(${CMAKE_BINARY_DIR})
file(GLOB_RECURSE PROTO_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "utils/node_strategy.proto")
@@ -125,12 +136,21 @@ endforeach ()
set_property(SOURCE ${SUB_OBJECTS_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ME)
add_library(mindspore STATIC ${SUB_OBJECTS_SRC})
+
+target_link_libraries(proto_input mindspore::protobuf)
+
+if (ENABLE_DEBUGGER)
+ # debugger: link grpc
+ target_link_libraries(proto_input mindspore::grpc++)
+endif()
+
target_link_libraries(mindspore proto_input)
-if (ENABLE_CPU AND ENABLE_MPI)
- target_link_libraries(mindspore securec mindspore::flatbuffers mindspore::ompi)
+if (ENABLE_MPI)
+ target_link_libraries(mindspore securec mindspore::flatbuffers mpi_adapter)
else ()
target_link_libraries(mindspore securec mindspore::flatbuffers)
endif ()
+
if (NOT WIN32)
target_link_libraries(mindspore dl)
endif()
@@ -210,6 +230,10 @@ else ()
target_link_libraries(_c_expression PRIVATE -Wl,--whole-archive mindspore -Wl,--no-whole-archive)
target_link_libraries(_c_expression PRIVATE mindspore::pybind11_module)
target_link_libraries(_c_expression PRIVATE mindspore_gvar)
+ target_link_libraries(_c_expression PRIVATE mindspore::pslite mindspore::protobuf ${zeromq_DIRPATH}/zmq_install/lib/libzmq.a)
+ if (${ENABLE_IBVERBS} STREQUAL "ON")
+ target_link_libraries(_c_expression PRIVATE ibverbs rdmacm)
+ endif()
endif ()
if (USE_GLOG)
@@ -217,6 +241,7 @@ if (USE_GLOG)
endif ()
if (ENABLE_DUMP_PROTO)
+ message("add protobuf lib to c_expression")
target_link_libraries(_c_expression PRIVATE mindspore::protobuf)
endif ()
@@ -256,10 +281,11 @@ endif ()
if (USE_GLOG)
target_link_libraries(inference PRIVATE mindspore::glog)
-else()
- if (CMAKE_SYSTEM_NAME MATCHES "Linux")
- target_link_options(inference PRIVATE -Wl,-init,mindspore_log_init)
- elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin")
- set_target_properties(inference PROPERTIES MACOSX_RPATH ON)
- endif ()
endif()
+
+if (CMAKE_SYSTEM_NAME MATCHES "Linux")
+ target_link_options(inference PRIVATE -Wl,-init,common_log_init)
+elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin")
+ set_target_properties(inference PROPERTIES MACOSX_RPATH ON)
+endif ()
+
diff --git a/mindspore/ccsrc/dataset/api/de_pipeline.cc b/mindspore/ccsrc/dataset/api/de_pipeline.cc
index ce70476423..78fcdb7dd4 100644
--- a/mindspore/ccsrc/dataset/api/de_pipeline.cc
+++ b/mindspore/ccsrc/dataset/api/de_pipeline.cc
@@ -15,6 +15,7 @@
*/
#include "dataset/api/de_pipeline.h"
+#include
#include
#include