| @@ -1,4 +1,5 @@ | |||||
| file(GLOB_RECURSE _PARALLEL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") | file(GLOB_RECURSE _PARALLEL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") | ||||
| list(REMOVE_ITEM _PARALLEL_SRC_FILES "ps/util.cc" "ps/scheduler.cc") | |||||
| if (ENABLE_DUMP_PROTO) | if (ENABLE_DUMP_PROTO) | ||||
| list(REMOVE_ITEM _PARALLEL_SRC_FILES "parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc") | list(REMOVE_ITEM _PARALLEL_SRC_FILES "parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc") | ||||
| endif () | endif () | ||||
| @@ -0,0 +1,32 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "parallel/ps/scheduler.h" | |||||
| #include <unistd.h> | |||||
| #include "ps/ps.h" | |||||
| namespace mindspore { | |||||
| namespace parallel { | |||||
| namespace ps { | |||||
| void Scheduler::Run() { | |||||
| ::ps::Start(0); | |||||
| while (true) { | |||||
| sleep(1); | |||||
| } | |||||
| } | |||||
| } // namespace ps | |||||
| } // namespace parallel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,40 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_SCHEDULER_H_ | |||||
| #define MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_SCHEDULER_H_ | |||||
| namespace mindspore { | |||||
| namespace parallel { | |||||
| namespace ps { | |||||
| class Scheduler { | |||||
| public: | |||||
| static Scheduler &GetInstance() { | |||||
| static Scheduler instance; | |||||
| return instance; | |||||
| } | |||||
| void Run(); | |||||
| private: | |||||
| Scheduler() = default; | |||||
| ~Scheduler() = default; | |||||
| Scheduler(const Scheduler &) = delete; | |||||
| Scheduler &operator=(const Scheduler &) = delete; | |||||
| }; | |||||
| } // namespace ps | |||||
| } // namespace parallel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_SCHEDULER_H_ | |||||
| @@ -0,0 +1,128 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "parallel/ps/util.h" | |||||
| #include <unordered_map> | |||||
| #include "parallel/ps/common.h" | |||||
| #include "common/utils.h" | |||||
| namespace mindspore { | |||||
| namespace parallel { | |||||
| namespace ps { | |||||
| std::unordered_map<std::string, int> Util::optimizer_to_ids{ | |||||
| {kApplyMomentum, 0}, | |||||
| {kSparseAdam, 1}, | |||||
| {kSparseFtrl, 2}, | |||||
| }; | |||||
| std::unordered_map<int, std::string> Util::id_to_optimizers{ | |||||
| {0, kApplyMomentum}, | |||||
| {1, kSparseAdam}, | |||||
| {2, kSparseFtrl}, | |||||
| }; | |||||
| bool Util::IsParamServerMode() { return IsRoleOfWorker() || IsRoleOfPServer() || IsRoleOfScheduler(); } | |||||
| bool Util::IsRoleOfWorker() { | |||||
| auto role = common::GetEnv(kEnvRole); | |||||
| if (strcmp(role.c_str(), kEnvRoleOfWorker) == 0) { | |||||
| return true; | |||||
| } else { | |||||
| return false; | |||||
| } | |||||
| } | |||||
| bool Util::IsRoleOfPServer() { | |||||
| auto role = common::GetEnv(kEnvRole); | |||||
| if (strcmp(role.c_str(), kEnvRoleOfPServer) == 0) { | |||||
| return true; | |||||
| } else { | |||||
| return false; | |||||
| } | |||||
| } | |||||
| bool Util::IsRoleOfScheduler() { | |||||
| auto role = common::GetEnv(kEnvRole); | |||||
| if (strcmp(role.c_str(), kEnvRoleOfScheduler) == 0) { | |||||
| return true; | |||||
| } else { | |||||
| return false; | |||||
| } | |||||
| } | |||||
| void Util::SetInternalEnvVar() { | |||||
| if (IsParamServerMode()) { | |||||
| auto comm_type = common::GetEnv(kEnvCommType); | |||||
| if (comm_type.size() > 0) { | |||||
| (void)common::SetEnv(kDmlcCommType, comm_type.c_str()); | |||||
| } | |||||
| auto interface = common::GetEnv(kEnvInterface); | |||||
| if (interface.size() > 0) { | |||||
| (void)common::SetEnv(kDmlcInterface, interface.c_str()); | |||||
| } | |||||
| auto server_num = common::GetEnv(kEnvPServerNum); | |||||
| if (server_num.size() > 0) { | |||||
| (void)common::SetEnv(kDmlcPServerNum, server_num.c_str()); | |||||
| } | |||||
| auto worker_num = common::GetEnv(kEnvWorkerNum); | |||||
| if (worker_num.size() > 0) { | |||||
| (void)common::SetEnv(kDmlcWorkerNum, worker_num.c_str()); | |||||
| } | |||||
| if (IsRoleOfScheduler()) { | |||||
| (void)common::SetEnv(kDmlcRole, kRoleOfScheduler); | |||||
| } else if (IsRoleOfPServer()) { | |||||
| (void)common::SetEnv(kDmlcRole, kRoleOfPServer); | |||||
| } else if (IsRoleOfWorker()) { | |||||
| (void)common::SetEnv(kDmlcRole, kRoleOfWorker); | |||||
| } | |||||
| auto scheduler_host = common::GetEnv(kEnvSchedulerHost); | |||||
| if (scheduler_host.size() > 0) { | |||||
| (void)common::SetEnv(kDmlcSchedulerHost, scheduler_host.c_str()); | |||||
| } | |||||
| auto scheduler_port = common::GetEnv(kEnvSchedulerPort); | |||||
| if (scheduler_port.size() > 0) { | |||||
| (void)common::SetEnv(kDmlcSchedulerPort, scheduler_port.c_str()); | |||||
| } | |||||
| } | |||||
| } | |||||
| int Util::optimizer_id(std::string name) { | |||||
| if (optimizer_to_ids.count(name) > 0) { | |||||
| return optimizer_to_ids[name]; | |||||
| } | |||||
| return -1; | |||||
| } | |||||
| std::string Util::optimizer_name(int id) { | |||||
| if (id_to_optimizers.count(id) > 0) { | |||||
| return id_to_optimizers[id]; | |||||
| } | |||||
| return ""; | |||||
| } | |||||
| bool Util::is_optimizer(std::string name) { return optimizer_to_ids.count(name) > 0; } | |||||
| int Util::LocalShard(int first_dim, int rank_id, int server_num) { | |||||
| int shard_size = std::round((static_cast<float>(first_dim)) / server_num); | |||||
| int remain_size = first_dim % server_num; | |||||
| if (remain_size == 0 || rank_id < server_num - 1) { | |||||
| return shard_size; | |||||
| } else { | |||||
| return first_dim - (shard_size * (server_num - 1)); | |||||
| } | |||||
| } | |||||
| } // namespace ps | |||||
| } // namespace parallel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,47 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_UTIL_H_ | |||||
| #define MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_UTIL_H_ | |||||
| #include <map> | |||||
| #include <string> | |||||
| #include <unordered_map> | |||||
| #include "session/anf_runtime_algorithm.h" | |||||
| namespace mindspore { | |||||
| namespace parallel { | |||||
| namespace ps { | |||||
| class Util { | |||||
| public: | |||||
| static bool IsParamServerMode(); | |||||
| static bool IsRoleOfWorker(); | |||||
| static bool IsRoleOfPServer(); | |||||
| static bool IsRoleOfScheduler(); | |||||
| static void SetInternalEnvVar(); | |||||
| static int optimizer_id(std::string name); | |||||
| static std::string optimizer_name(int id); | |||||
| static bool is_optimizer(std::string name); | |||||
| static int LocalShard(int first_dim, int rank_id, int server_num); | |||||
| private: | |||||
| static std::unordered_map<std::string, int> optimizer_to_ids; | |||||
| static std::unordered_map<int, std::string> id_to_optimizers; | |||||
| }; | |||||
| } // namespace ps | |||||
| } // namespace parallel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_UTIL_H_ | |||||
| @@ -115,6 +115,8 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | |||||
| list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/debug/dump_proto.cc") | list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/debug/dump_proto.cc") | ||||
| list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ir/lite/tensor.cc") | list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ir/lite/tensor.cc") | ||||
| list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc") | list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc") | ||||
| list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/parallel/ps/util.cc") | |||||
| list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/parallel/ps/scheduler.cc") | |||||
| list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/anf_ir.pb.cc") | list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/anf_ir.pb.cc") | ||||
| list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/node_strategy.pb.cc") | list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/node_strategy.pb.cc") | ||||
| list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/load_onnx/anf_model_parser.cc") | list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/load_onnx/anf_model_parser.cc") | ||||
| @@ -12,16 +12,7 @@ diff -Npur ps-lite-master/include/dmlc/base.h ps-lite-master-new/include/dmlc/ba | |||||
| /*! | /*! | ||||
| diff -Npur ps-lite-master/include/dmlc/logging.h ps-lite-master-new/include/dmlc/logging.h | diff -Npur ps-lite-master/include/dmlc/logging.h ps-lite-master-new/include/dmlc/logging.h | ||||
| --- ps-lite-master/include/dmlc/logging.h 2020-02-29 13:59:55.000000000 +0800 | --- ps-lite-master/include/dmlc/logging.h 2020-02-29 13:59:55.000000000 +0800 | ||||
| +++ ps-lite-master-new/include/dmlc/logging.h 2020-07-01 11:58:00.015919207 +0800 | |||||
| @@ -13,7 +13,7 @@ | |||||
| #include <string> | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| -#include "./base.h" | |||||
| +//#include "./base.h" | |||||
| #if DMLC_LOG_STACK_TRACE | |||||
| #include <cxxabi.h> | |||||
| +++ ps-lite-master-new/include/dmlc/logging.h 2020-07-08 21:35:33.334584767 +0800 | |||||
| @@ -52,7 +52,7 @@ struct Error : public std::runtime_error | @@ -52,7 +52,7 @@ struct Error : public std::runtime_error | ||||
| namespace dmlc { | namespace dmlc { | ||||