Merge pull request !1708 from chenjianping/host_reducetags/v0.5.0-beta
| @@ -86,7 +86,7 @@ checkopts() | |||||
| ENABLE_DUMPE2E="off" | ENABLE_DUMPE2E="off" | ||||
| ENABLE_DUMP_IR="on" | ENABLE_DUMP_IR="on" | ||||
| COMPILE_MINDDATA="on" | COMPILE_MINDDATA="on" | ||||
| ENABLE_MPI="on" | |||||
| ENABLE_MPI="off" | |||||
| CUDA_VERSION="9.2" | CUDA_VERSION="9.2" | ||||
| COMPILE_PREDICT="off" | COMPILE_PREDICT="off" | ||||
| USE_GLOG="on" | USE_GLOG="on" | ||||
| @@ -168,6 +168,7 @@ checkopts() | |||||
| if [[ "X$OPTARG" == "Xgpu" ]]; then | if [[ "X$OPTARG" == "Xgpu" ]]; then | ||||
| ENABLE_GPU="on" | ENABLE_GPU="on" | ||||
| ENABLE_CPU="on" | ENABLE_CPU="on" | ||||
| ENABLE_MPI="on" | |||||
| elif [[ "X$OPTARG" == "Xd" || "X$OPTARG" == "Xascend" ]]; then | elif [[ "X$OPTARG" == "Xd" || "X$OPTARG" == "Xascend" ]]; then | ||||
| ENABLE_D="on" | ENABLE_D="on" | ||||
| ENABLE_CPU="on" | ENABLE_CPU="on" | ||||
| @@ -26,6 +26,9 @@ include_directories(${Python3_INCLUDE_DIRS}) | |||||
| include_directories(${CMAKE_SOURCE_DIR}/third_party) | include_directories(${CMAKE_SOURCE_DIR}/third_party) | ||||
| if (ENABLE_CPU) | if (ENABLE_CPU) | ||||
| include(${CMAKE_SOURCE_DIR}/cmake/external_libs/mkl_dnn.cmake) | include(${CMAKE_SOURCE_DIR}/cmake/external_libs/mkl_dnn.cmake) | ||||
| if (ENABLE_MPI) | |||||
| include(${CMAKE_SOURCE_DIR}/cmake/external_libs/ompi.cmake) | |||||
| endif() | |||||
| endif() | endif() | ||||
| if (ENABLE_GPU) | if (ENABLE_GPU) | ||||
| @@ -36,7 +39,6 @@ if (ENABLE_GPU) | |||||
| if (ENABLE_MPI) | if (ENABLE_MPI) | ||||
| include(${CMAKE_SOURCE_DIR}/cmake/external_libs/nccl.cmake) | include(${CMAKE_SOURCE_DIR}/cmake/external_libs/nccl.cmake) | ||||
| include(${CMAKE_SOURCE_DIR}/cmake/external_libs/ompi.cmake) | |||||
| endif() | endif() | ||||
| endif() | endif() | ||||
| @@ -109,19 +109,20 @@ if (ENABLE_CPU) | |||||
| ) | ) | ||||
| endif () | endif () | ||||
| if (ENABLE_MPI) | |||||
| install( | |||||
| TARGETS _ms_mpi | |||||
| DESTINATION ${INSTALL_BASE_DIR} | |||||
| COMPONENT mindspore | |||||
| ) | |||||
| endif () | |||||
| if (ENABLE_GPU) | if (ENABLE_GPU) | ||||
| if (ENABLE_MPI) | |||||
| install( | |||||
| TARGETS _ms_mpi | |||||
| DESTINATION ${INSTALL_BASE_DIR} | |||||
| COMPONENT mindspore | |||||
| ) | |||||
| install( | install( | ||||
| TARGETS gpu_collective | TARGETS gpu_collective | ||||
| DESTINATION ${INSTALL_LIB_DIR} | DESTINATION ${INSTALL_LIB_DIR} | ||||
| COMPONENT mindspore | COMPONENT mindspore | ||||
| ) | ) | ||||
| endif () | |||||
| install( | install( | ||||
| TARGETS gpu_queue | TARGETS gpu_queue | ||||
| DESTINATION ${INSTALL_LIB_DIR} | DESTINATION ${INSTALL_LIB_DIR} | ||||
| @@ -8,6 +8,10 @@ if (CMAKE_SYSTEM_NAME MATCHES "Windows") | |||||
| add_compile_definitions(BUILDING_DLL) | add_compile_definitions(BUILDING_DLL) | ||||
| endif() | endif() | ||||
| if (ENABLE_MPI) | |||||
| add_compile_definitions(ENABLE_MPI) | |||||
| endif () | |||||
| if(ENABLE_GPU) | if(ENABLE_GPU) | ||||
| find_package(CUDA REQUIRED) | find_package(CUDA REQUIRED) | ||||
| find_package(Threads) | find_package(Threads) | ||||
| @@ -120,7 +124,11 @@ endforeach () | |||||
| set_property(SOURCE ${SUB_OBJECTS_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ME) | set_property(SOURCE ${SUB_OBJECTS_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ME) | ||||
| add_library(mindspore STATIC ${SUB_OBJECTS_SRC}) | add_library(mindspore STATIC ${SUB_OBJECTS_SRC}) | ||||
| target_link_libraries(mindspore proto_input) | target_link_libraries(mindspore proto_input) | ||||
| target_link_libraries(mindspore securec mindspore::flatbuffers) | |||||
| if (ENABLE_CPU AND ENABLE_MPI) | |||||
| target_link_libraries(mindspore securec mindspore::flatbuffers mindspore::ompi) | |||||
| else () | |||||
| target_link_libraries(mindspore securec mindspore::flatbuffers) | |||||
| endif () | |||||
| if (NOT WIN32) | if (NOT WIN32) | ||||
| target_link_libraries(mindspore dl) | target_link_libraries(mindspore dl) | ||||
| endif() | endif() | ||||
| @@ -14,6 +14,15 @@ endif () | |||||
| if (ENABLE_CPU) | if (ENABLE_CPU) | ||||
| file(GLOB_RECURSE CPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "cpu/*.cc") | file(GLOB_RECURSE CPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "cpu/*.cc") | ||||
| if (ENABLE_MPI) | |||||
| # _ms_mpi | |||||
| set_property(SOURCE "gpu/mpi/mpi_initializer.cc" | |||||
| PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE) | |||||
| pybind11_add_module(_ms_mpi "gpu/mpi/mpi_initializer.cc") | |||||
| target_link_libraries(_ms_mpi PRIVATE mindspore::pybind11_module mindspore::ompi) | |||||
| else () | |||||
| list(REMOVE_ITEM CPU_SRC_LIST "cpu/mpi/mpi_adapter.cc") | |||||
| endif () | |||||
| endif () | endif () | ||||
| # gpu | # gpu | ||||
| @@ -39,11 +48,6 @@ if (ENABLE_GPU) | |||||
| set_property(SOURCE ${GPU_COLLECTIVE_SRCS} | set_property(SOURCE ${GPU_COLLECTIVE_SRCS} | ||||
| PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE) | PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE) | ||||
| add_library(gpu_collective SHARED ${GPU_COLLECTIVE_SRCS}) | add_library(gpu_collective SHARED ${GPU_COLLECTIVE_SRCS}) | ||||
| # _ms_mpi | |||||
| set_property(SOURCE "gpu/mpi/mpi_initializer.cc" | |||||
| PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE) | |||||
| pybind11_add_module(_ms_mpi "gpu/mpi/mpi_initializer.cc") | |||||
| target_link_libraries(_ms_mpi PRIVATE mindspore::pybind11_module mindspore::ompi) | |||||
| target_link_libraries(gpu_collective PRIVATE mindspore::ompi mindspore::nccl) | target_link_libraries(gpu_collective PRIVATE mindspore::ompi mindspore::nccl) | ||||
| endif () | endif () | ||||
| @@ -15,7 +15,6 @@ | |||||
| */ | */ | ||||
| #include "device/ascend/ascend_kernel_runtime.h" | #include "device/ascend/ascend_kernel_runtime.h" | ||||
| #include <string> | #include <string> | ||||
| #include <vector> | #include <vector> | ||||
| #include <memory> | #include <memory> | ||||
| @@ -24,6 +23,7 @@ | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "device/ascend/ascend_device_address.h" | #include "device/ascend/ascend_device_address.h" | ||||
| #include "device/cpu/mpi/mpi_adapter.h" | |||||
| #include "utils/context/ms_context.h" | #include "utils/context/ms_context.h" | ||||
| #include "device/ascend/profiling/profiling_manager.h" | #include "device/ascend/profiling/profiling_manager.h" | ||||
| #include "hccl/hcom.h" | #include "hccl/hcom.h" | ||||
| @@ -510,11 +510,19 @@ bool AscendKernelRuntime::HcclInit() { | |||||
| MS_LOG(ERROR) << "file path " << config_path_str << " does not exist"; | MS_LOG(ERROR) << "file path " << config_path_str << " does not exist"; | ||||
| return false; | return false; | ||||
| } | } | ||||
| #ifdef ENABLE_MPI | |||||
| int rank_id = device::cpu::MPIAdapter::Instance().GetRankId(); | |||||
| const char *offset = std::getenv("RANK_OFFSET"); | |||||
| if (offset != nullptr) { | |||||
| int rank_offset = std::stoi(offset); | |||||
| rank_id += rank_offset; | |||||
| } | |||||
| const char *identify = reinterpret_cast<const char *>(std::to_string(rank_id).c_str()); | |||||
| #else | |||||
| const char *identify = std::getenv("RANK_ID"); | const char *identify = std::getenv("RANK_ID"); | ||||
| #endif | |||||
| if (identify == nullptr) { | if (identify == nullptr) { | ||||
| MS_LOG(ERROR) << "get hccl rankid failed, please set env RANK_ID"; | MS_LOG(ERROR) << "get hccl rankid failed, please set env RANK_ID"; | ||||
| free(full_path); | |||||
| return false; | return false; | ||||
| } | } | ||||
| MS_LOG(INFO) << "MINDSPORE_HCCL_CONFIG_PATH : " << full_path << ", RANK_ID: " << identify; | MS_LOG(INFO) << "MINDSPORE_HCCL_CONFIG_PATH : " << full_path << ", RANK_ID: " << identify; | ||||
| @@ -0,0 +1,191 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "device/cpu/mpi/mpi_adapter.h" | |||||
| #include <algorithm> | |||||
| #include "utils/log_adapter.h" | |||||
| namespace mindspore { | |||||
| namespace device { | |||||
| namespace cpu { | |||||
| namespace { | |||||
| MPI_Op GetMpiOp(const std::string &op_type) { | |||||
| if (op_type == "sum") { | |||||
| return MPI_SUM; | |||||
| } else if (op_type == "max") { | |||||
| return MPI_MAX; | |||||
| } else if (op_type == "min") { | |||||
| return MPI_MIN; | |||||
| } else if (op_type == "prod") { | |||||
| return MPI_PROD; | |||||
| } | |||||
| MS_LOG(EXCEPTION) << "unsupport op_type:" << op_type; | |||||
| return MPI_SUM; | |||||
| } | |||||
| } // namespace | |||||
| MPIAdapter::MPIAdapter() : rank_id_(0), rank_size_(0), comm_group_world_(MPI_GROUP_NULL) { Init(); } | |||||
| MPIAdapter::~MPIAdapter() { | |||||
| for (auto iter = ranks_group_.begin(); iter != ranks_group_.end(); ++iter) { | |||||
| MPI_Group_free(&iter->second); | |||||
| } | |||||
| if (comm_group_world_ != MPI_GROUP_NULL) { | |||||
| MPI_Group_free(&comm_group_world_); | |||||
| } | |||||
| int finalized; | |||||
| MPI_Finalized(&finalized); | |||||
| if (finalized == 0) { | |||||
| MPI_Finalize(); | |||||
| } | |||||
| } | |||||
| MPIAdapter &MPIAdapter::Instance() { | |||||
| static MPIAdapter instance; | |||||
| return instance; | |||||
| } | |||||
| int MPIAdapter::GetRankId() const { return rank_id_; } | |||||
| void MPIAdapter::Init() { | |||||
| static bool init = false; | |||||
| if (init) { | |||||
| return; | |||||
| } | |||||
| int init_flag = 0; | |||||
| if (MPI_Initialized(&init_flag) != MPI_SUCCESS) { | |||||
| MS_LOG(EXCEPTION) << "Check mpi initialized fail!"; | |||||
| } | |||||
| if (init_flag == 0) { | |||||
| auto ret = MPI_Init(nullptr, nullptr); | |||||
| if (ret != MPI_SUCCESS) { | |||||
| MS_LOG(EXCEPTION) << "Failed to init mpi!"; | |||||
| } | |||||
| } | |||||
| MPI_Comm_group(MPI_COMM_WORLD, &comm_group_world_); | |||||
| if (comm_group_world_ == MPI_GROUP_NULL) { | |||||
| MS_LOG(EXCEPTION) << "comm_group_world_ init fail!"; | |||||
| } | |||||
| auto ret = MPI_Comm_rank(MPI_COMM_WORLD, &rank_id_); | |||||
| if (ret != MPI_SUCCESS) { | |||||
| MS_LOG(EXCEPTION) << "Failed to init mpi rank id!"; | |||||
| } | |||||
| ret = MPI_Comm_size(MPI_COMM_WORLD, &rank_size_); | |||||
| if (ret != MPI_SUCCESS) { | |||||
| MS_LOG(EXCEPTION) << "Failed to init mpi rank size!rankid:" << rank_id_; | |||||
| } | |||||
| init = true; | |||||
| } | |||||
| MPI_Group MPIAdapter::AddGroup(const std::vector<int> &ranks) { | |||||
| if (ranks.size() > static_cast<size_t>(rank_size_) || ranks.empty()) { | |||||
| MS_LOG(EXCEPTION) << "input rank size: " << ranks.size() << ", max rank size: " << rank_size_; | |||||
| } | |||||
| if (std::find(ranks.begin(), ranks.end(), rank_id_) == ranks.end()) { | |||||
| MS_LOG(ERROR) << "rankid:" << rank_id_ << " is not in the input group."; | |||||
| return MPI_GROUP_NULL; | |||||
| } | |||||
| std::lock_guard<std::mutex> lock(group_mutex_); | |||||
| auto iter = ranks_group_.find(ranks); | |||||
| if (iter != ranks_group_.end()) { | |||||
| return iter->second; | |||||
| } | |||||
| const auto ranks_size = ranks.size(); | |||||
| std::vector<int> ranks_input(ranks_size, 0); | |||||
| for (size_t i = 0; i < ranks_size; ++i) { | |||||
| ranks_input[i] = ranks[i]; | |||||
| } | |||||
| MPI_Group group = MPI_GROUP_NULL; | |||||
| MPI_Group_incl(comm_group_world_, ranks.size(), ranks_input.data(), &group); | |||||
| if (group == MPI_GROUP_NULL) { | |||||
| MS_LOG(EXCEPTION) << "create mpi group fail!rankid:" << rank_id_; | |||||
| } | |||||
| ranks_group_[ranks] = group; | |||||
| MS_LOG(INFO) << "rank:" << rank_id_ << " add group:" << group; | |||||
| return group; | |||||
| } | |||||
| bool MPIAdapter::ReduceScatter(float *input, float *output, const std::vector<int> &ranks_group, size_t data_num, | |||||
| const std::string &op_type) { | |||||
| if (ranks_group.empty()) { | |||||
| MS_LOG(ERROR) << "input rank group is empty!"; | |||||
| return false; | |||||
| } | |||||
| auto group = AddGroup(ranks_group); | |||||
| if (group == MPI_GROUP_NULL) { | |||||
| MS_LOG(EXCEPTION) << "Get mpi group fail!rankid:" << rank_id_; | |||||
| } | |||||
| MPI_Comm comm; | |||||
| MPI_Comm_create_group(MPI_COMM_WORLD, group, 0, &comm); | |||||
| if (comm == MPI_COMM_NULL) { | |||||
| MS_LOG(EXCEPTION) << "create mpi comm fail!rankid:" << rank_id_; | |||||
| } | |||||
| std::vector<int> receive_count(ranks_group.size(), 0); | |||||
| for (size_t i = 0; i < ranks_group.size(); ++i) { | |||||
| receive_count[i] = data_num; | |||||
| } | |||||
| auto op = GetMpiOp(op_type); | |||||
| auto ret = MPI_Reduce_scatter(input, output, receive_count.data(), MPI_FLOAT, op, comm); | |||||
| bool result = true; | |||||
| if (ret != MPI_SUCCESS) { | |||||
| MS_LOG(ERROR) << "mpi reduce_scatter fail!ret = " << ret << ", rankid:" << rank_id_; | |||||
| result = false; | |||||
| } | |||||
| ret = MPI_Comm_free(&comm); | |||||
| if (ret != MPI_SUCCESS) { | |||||
| MS_LOG(WARNING) << "mpi comm free fail! ret = " << ret << ", rankid:" << rank_id_; | |||||
| } | |||||
| return result; | |||||
| } | |||||
| bool MPIAdapter::AllGather(float *input, float *output, const std::vector<int> &ranks_group, size_t data_num) { | |||||
| if (ranks_group.empty()) { | |||||
| MS_LOG(ERROR) << "input rank group is empty!"; | |||||
| return false; | |||||
| } | |||||
| auto group = AddGroup(ranks_group); | |||||
| if (group == MPI_GROUP_NULL) { | |||||
| MS_LOG(EXCEPTION) << "Get mpi group fail! rankid:" << rank_id_; | |||||
| } | |||||
| MPI_Comm comm; | |||||
| MPI_Comm_create_group(MPI_COMM_WORLD, group, 0, &comm); | |||||
| if (comm == MPI_COMM_NULL) { | |||||
| MS_LOG(EXCEPTION) << "create mpi comm fail! rankid:" << rank_id_; | |||||
| } | |||||
| auto ret = MPI_Allgather(input, data_num, MPI_FLOAT, output, data_num, MPI_FLOAT, comm); | |||||
| bool result = true; | |||||
| if (ret != MPI_SUCCESS) { | |||||
| MS_LOG(ERROR) << "mpi allgater fail!ret = " << ret << ", rankid:" << rank_id_; | |||||
| result = false; | |||||
| } | |||||
| ret = MPI_Comm_free(&comm); | |||||
| if (ret != MPI_SUCCESS) { | |||||
| MS_LOG(WARNING) << "mpi comm free fail!ret = " << ret << ",rankid:" << rank_id_; | |||||
| } | |||||
| return result; | |||||
| } | |||||
| } // namespace cpu | |||||
| } // namespace device | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,55 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_DEVICE_CPU_MPI_MPI_ADAPTER_H_ | |||||
| #define MINDSPORE_CCSRC_DEVICE_CPU_MPI_MPI_ADAPTER_H_ | |||||
| #ifdef ENABLE_MPI | |||||
| #include <mpi.h> | |||||
| #include <vector> | |||||
| #include <map> | |||||
| #include <string> | |||||
| #include <mutex> | |||||
| namespace mindspore { | |||||
| namespace device { | |||||
| namespace cpu { | |||||
| constexpr auto kOpTypeSum = "sum"; | |||||
| class MPIAdapter { | |||||
| public: | |||||
| ~MPIAdapter(); | |||||
| static MPIAdapter &Instance(); | |||||
| int GetRankId() const; | |||||
| bool ReduceScatter(float *input, float *output, const std::vector<int> &ranks_group, size_t data_num, | |||||
| const std::string &op_type = kOpTypeSum); | |||||
| bool AllGather(float *input, float *output, const std::vector<int> &ranks_group, size_t data_num); | |||||
| private: | |||||
| MPIAdapter(); | |||||
| void Init(); | |||||
| MPI_Group AddGroup(const std::vector<int> &ranks); | |||||
| int rank_id_; | |||||
| int rank_size_; | |||||
| MPI_Group comm_group_world_; | |||||
| // key:ranks group, value: mpi group | |||||
| std::map<std::vector<int>, MPI_Group> ranks_group_; | |||||
| std::mutex group_mutex_; | |||||
| }; | |||||
| } // namespace cpu | |||||
| } // namespace device | |||||
| } // namespace mindspore | |||||
| #endif // ENABLE_MPI | |||||
| #endif // MINDSPORE_CCSRC_DEVICE_CPU_MPI_MPI_ADAPTER_H_ | |||||
| @@ -20,7 +20,6 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace device { | namespace device { | ||||
| namespace gpu { | namespace gpu { | ||||
| class CollectiveFakeInitializer { | class CollectiveFakeInitializer { | ||||
| public: | public: | ||||
| CollectiveFakeInitializer() = default; | CollectiveFakeInitializer() = default; | ||||
| @@ -24,10 +24,28 @@ namespace mindspore { | |||||
| namespace device { | namespace device { | ||||
| namespace gpu { | namespace gpu { | ||||
| MPIInitializer::MPIInitializer() { | MPIInitializer::MPIInitializer() { | ||||
| int init_flag = 0; | |||||
| if (MPI_Initialized(&init_flag) != MPI_SUCCESS) { | |||||
| return; | |||||
| } | |||||
| if (init_flag == 0) { | |||||
| auto ret = MPI_Init(nullptr, nullptr); | |||||
| if (ret != MPI_SUCCESS) { | |||||
| return; | |||||
| } | |||||
| } | |||||
| MPI_Comm_rank(MPI_COMM_WORLD, &rank_id_); | MPI_Comm_rank(MPI_COMM_WORLD, &rank_id_); | ||||
| MPI_Comm_size(MPI_COMM_WORLD, &rank_size_); | MPI_Comm_size(MPI_COMM_WORLD, &rank_size_); | ||||
| } | } | ||||
| MPIInitializer::~MPIInitializer() { | |||||
| int finalized_flag = 0; | |||||
| (void)MPI_Finalized(&finalized_flag); | |||||
| if (finalized_flag == 0) { | |||||
| (void)MPI_Finalize(); | |||||
| } | |||||
| } | |||||
| MPIInitializer &MPIInitializer::GetInstance() { | MPIInitializer &MPIInitializer::GetInstance() { | ||||
| static MPIInitializer instance; | static MPIInitializer instance; | ||||
| return instance; | return instance; | ||||
| @@ -30,7 +30,7 @@ class MPIInitializer { | |||||
| private: | private: | ||||
| MPIInitializer(); | MPIInitializer(); | ||||
| ~MPIInitializer() = default; | |||||
| ~MPIInitializer(); | |||||
| int rank_id_; | int rank_id_; | ||||
| int rank_size_; | int rank_size_; | ||||
| @@ -21,6 +21,11 @@ if (ENABLE_CPU) | |||||
| file(GLOB_RECURSE CPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | file(GLOB_RECURSE CPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | ||||
| "cpu/*.cc" | "cpu/*.cc" | ||||
| ) | ) | ||||
| if (NOT ENABLE_MPI) | |||||
| list(REMOVE_ITEM CPU_SRC_LIST "cpu/allgather_cpu_kernel.cc") | |||||
| list(REMOVE_ITEM CPU_SRC_LIST "cpu/reduce_scatter_cpu_kernel.cc") | |||||
| endif () | |||||
| endif () | endif () | ||||
| if (ENABLE_GPU) | if (ENABLE_GPU) | ||||
| @@ -0,0 +1,62 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "kernel/cpu/allgather_cpu_kernel.h" | |||||
| #include "device/cpu/cpu_device_address.h" | |||||
| #include "device/cpu/mpi/mpi_adapter.h" | |||||
| #include "ir/primitive.h" | |||||
| #include "utils/log_adapter.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| namespace { | |||||
| constexpr auto kRanksGroup = "group"; | |||||
| constexpr auto kAllGatherInputNum = 1; | |||||
| } // namespace | |||||
| AllGatherCPUKernel::AllGatherCPUKernel() : input_data_number_(0) {} | |||||
| void AllGatherCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||||
| if (input_num != kAllGatherInputNum) { | |||||
| MS_LOG(EXCEPTION) << "allgather input num:" << input_num; | |||||
| } | |||||
| for (size_t i = 0; i < input_num; ++i) { | |||||
| auto shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i); | |||||
| size_t count = 1; | |||||
| for (size_t j = 0; j < shape.size(); j++) { | |||||
| count *= IntToSize(shape[j]); | |||||
| } | |||||
| input_data_number_ += count; | |||||
| } | |||||
| auto ranks_group = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr(kRanksGroup); | |||||
| if (ranks_group != nullptr) { | |||||
| ranks_group_ = GetValue<std::vector<int>>(ranks_group); | |||||
| } else { | |||||
| MS_LOG(EXCEPTION) << "Miss attribute " << kRanksGroup; | |||||
| } | |||||
| } | |||||
| bool AllGatherCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||||
| const std::vector<kernel::AddressPtr> &outputs) { | |||||
| auto input_addr = reinterpret_cast<float *>(inputs[0]->addr); | |||||
| auto output_addr = reinterpret_cast<float *>(outputs[0]->addr); | |||||
| return device::cpu::MPIAdapter::Instance().AllGather(input_addr, output_addr, ranks_group_, input_data_number_); | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,45 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "kernel/cpu/cpu_kernel.h" | |||||
| #include "kernel/cpu/cpu_kernel_factory.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class AllGatherCPUKernel : public CPUKernel { | |||||
| public: | |||||
| AllGatherCPUKernel(); | |||||
| ~AllGatherCPUKernel() override = default; | |||||
| void InitKernel(const CNodePtr &kernel_node) override; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs) override; | |||||
| private: | |||||
| size_t input_data_number_; | |||||
| std::vector<int> ranks_group_; | |||||
| }; | |||||
| MS_REG_CPU_KERNEL(HostAllGather, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||||
| AllGatherCPUKernel); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_ | |||||
| @@ -0,0 +1,62 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "kernel/cpu/reduce_scatter_cpu_kernel.h" | |||||
| #include "device/cpu/cpu_device_address.h" | |||||
| #include "device/cpu/mpi/mpi_adapter.h" | |||||
| #include "ir/primitive.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| namespace { | |||||
| constexpr auto kRanksGroup = "group"; | |||||
| } // namespace | |||||
| ReduceScatterCPUKernel::ReduceScatterCPUKernel() : output_data_number_(0), op_type_(device::cpu::kOpTypeSum) {} | |||||
| void ReduceScatterCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||||
| for (size_t i = 0; i < output_num; ++i) { | |||||
| auto shape = AnfAlgo::GetOutputInferShape(kernel_node, i); | |||||
| size_t size = 1; | |||||
| for (size_t j = 0; j < shape.size(); j++) { | |||||
| size *= IntToSize(shape[j]); | |||||
| } | |||||
| output_data_number_ += size; | |||||
| } | |||||
| auto op = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("op"); | |||||
| if (op != nullptr) { | |||||
| op_type_ = GetValue<std::string>(op); | |||||
| } | |||||
| auto ranks_group = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr(kRanksGroup); | |||||
| if (ranks_group != nullptr) { | |||||
| ranks_group_ = GetValue<std::vector<int>>(ranks_group); | |||||
| } else { | |||||
| MS_LOG(EXCEPTION) << "Miss attribute " << kRanksGroup; | |||||
| } | |||||
| } | |||||
| bool ReduceScatterCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||||
| const std::vector<kernel::AddressPtr> &outputs) { | |||||
| auto input_addr = reinterpret_cast<float *>(inputs[0]->addr); | |||||
| auto output_addr = reinterpret_cast<float *>(outputs[0]->addr); | |||||
| return device::cpu::MPIAdapter::Instance().ReduceScatter(input_addr, output_addr, ranks_group_, output_data_number_, | |||||
| op_type_); | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,46 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_ | |||||
| #include <vector> | |||||
| #include <string> | |||||
| #include "kernel/cpu/cpu_kernel.h" | |||||
| #include "kernel/cpu/cpu_kernel_factory.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class ReduceScatterCPUKernel : public CPUKernel { | |||||
| public: | |||||
| ReduceScatterCPUKernel(); | |||||
| ~ReduceScatterCPUKernel() override = default; | |||||
| void InitKernel(const CNodePtr &kernel_node) override; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs) override; | |||||
| private: | |||||
| size_t output_data_number_; | |||||
| std::string op_type_; | |||||
| std::vector<int> ranks_group_; | |||||
| }; | |||||
| MS_REG_CPU_KERNEL(HostReduceScatter, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||||
| ReduceScatterCPUKernel); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_ | |||||
| @@ -0,0 +1,70 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| import numpy as np | |||||
| import pytest | |||||
| import mindspore.context as context | |||||
| import mindspore.nn as nn | |||||
| from mindspore import Tensor | |||||
| from mindspore.common import dtype as mstype | |||||
| from mindspore.ops import operations as P | |||||
| import mindspore._ms_mpi as mpi | |||||
| # run comand: | |||||
| # mpirun -np 3 python test_reduce_scatter.py | |||||
| context.set_context(mode=context.GRAPH_MODE, device_target='CPU') | |||||
| class Net(nn.Cell): | |||||
| def __init__(self): | |||||
| super(Net, self).__init__() | |||||
| self.op = "sum" | |||||
| self.reducescatter = P.HostReduceScatter(op=self.op, group=[0,1,2]) | |||||
| def construct(self, x): | |||||
| return self.reducescatter(x) | |||||
| class AllGatherNet(nn.Cell): | |||||
| def __init__(self): | |||||
| super(AllGatherNet, self).__init__() | |||||
| self.hostallgather = P.HostAllGather(group=(0, 1, 2)) | |||||
| def construct(self, x): | |||||
| return self.hostallgather(x) | |||||
| def test_net_reduce_scatter(): | |||||
| x = np.ones(12).astype(np.float32) * 0.1 | |||||
| reducescatter = Net() | |||||
| rankid = mpi.get_rank_id() | |||||
| print("self rankid:", rankid) | |||||
| output = reducescatter(Tensor(x, mstype.float32)) | |||||
| print("output:\n", output) | |||||
| expect_result = np.ones(4).astype(np.float32) * 0.3 | |||||
| diff = abs(output.asnumpy() - expect_result) | |||||
| error = np.ones(shape=expect_result.shape) * 1.0e-6 | |||||
| assert np.all(diff < error) | |||||
| allgather = AllGatherNet() | |||||
| allgather_output = allgather(output) | |||||
| print("allgather result:\n", allgather_output) | |||||
| expect_allgather_result = np.ones(12).astype(np.float32) * 0.3 | |||||
| diff = abs(allgather_output.asnumpy() - expect_allgather_result) | |||||
| error = np.ones(shape=expect_allgather_result.shape) * 1.0e-6 | |||||
| assert np.all(diff < error) | |||||
| if __name__ == '__main__': | |||||
| test_net_reduce_scatter() | |||||