| @@ -1,4 +1,5 @@ | |||
| file(GLOB_RECURSE _PARALLEL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") | |||
| list(REMOVE_ITEM _PARALLEL_SRC_FILES "ps/util.cc" "ps/scheduler.cc") | |||
| if (ENABLE_DUMP_PROTO) | |||
| list(REMOVE_ITEM _PARALLEL_SRC_FILES "parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc") | |||
| endif () | |||
| @@ -0,0 +1,32 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "parallel/ps/scheduler.h" | |||
| #include <unistd.h> | |||
| #include "ps/ps.h" | |||
| namespace mindspore { | |||
| namespace parallel { | |||
| namespace ps { | |||
| void Scheduler::Run() { | |||
| ::ps::Start(0); | |||
| while (true) { | |||
| sleep(1); | |||
| } | |||
| } | |||
| } // namespace ps | |||
| } // namespace parallel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,40 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_SCHEDULER_H_ | |||
| #define MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_SCHEDULER_H_ | |||
| namespace mindspore { | |||
| namespace parallel { | |||
| namespace ps { | |||
| class Scheduler { | |||
| public: | |||
| static Scheduler &GetInstance() { | |||
| static Scheduler instance; | |||
| return instance; | |||
| } | |||
| void Run(); | |||
| private: | |||
| Scheduler() = default; | |||
| ~Scheduler() = default; | |||
| Scheduler(const Scheduler &) = delete; | |||
| Scheduler &operator=(const Scheduler &) = delete; | |||
| }; | |||
| } // namespace ps | |||
| } // namespace parallel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_SCHEDULER_H_ | |||
| @@ -0,0 +1,128 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "parallel/ps/util.h" | |||
| #include <unordered_map> | |||
| #include "parallel/ps/common.h" | |||
| #include "common/utils.h" | |||
| namespace mindspore { | |||
| namespace parallel { | |||
| namespace ps { | |||
| std::unordered_map<std::string, int> Util::optimizer_to_ids{ | |||
| {kApplyMomentum, 0}, | |||
| {kSparseAdam, 1}, | |||
| {kSparseFtrl, 2}, | |||
| }; | |||
| std::unordered_map<int, std::string> Util::id_to_optimizers{ | |||
| {0, kApplyMomentum}, | |||
| {1, kSparseAdam}, | |||
| {2, kSparseFtrl}, | |||
| }; | |||
| bool Util::IsParamServerMode() { return IsRoleOfWorker() || IsRoleOfPServer() || IsRoleOfScheduler(); } | |||
| bool Util::IsRoleOfWorker() { | |||
| auto role = common::GetEnv(kEnvRole); | |||
| if (strcmp(role.c_str(), kEnvRoleOfWorker) == 0) { | |||
| return true; | |||
| } else { | |||
| return false; | |||
| } | |||
| } | |||
| bool Util::IsRoleOfPServer() { | |||
| auto role = common::GetEnv(kEnvRole); | |||
| if (strcmp(role.c_str(), kEnvRoleOfPServer) == 0) { | |||
| return true; | |||
| } else { | |||
| return false; | |||
| } | |||
| } | |||
| bool Util::IsRoleOfScheduler() { | |||
| auto role = common::GetEnv(kEnvRole); | |||
| if (strcmp(role.c_str(), kEnvRoleOfScheduler) == 0) { | |||
| return true; | |||
| } else { | |||
| return false; | |||
| } | |||
| } | |||
| void Util::SetInternalEnvVar() { | |||
| if (IsParamServerMode()) { | |||
| auto comm_type = common::GetEnv(kEnvCommType); | |||
| if (comm_type.size() > 0) { | |||
| (void)common::SetEnv(kDmlcCommType, comm_type.c_str()); | |||
| } | |||
| auto interface = common::GetEnv(kEnvInterface); | |||
| if (interface.size() > 0) { | |||
| (void)common::SetEnv(kDmlcInterface, interface.c_str()); | |||
| } | |||
| auto server_num = common::GetEnv(kEnvPServerNum); | |||
| if (server_num.size() > 0) { | |||
| (void)common::SetEnv(kDmlcPServerNum, server_num.c_str()); | |||
| } | |||
| auto worker_num = common::GetEnv(kEnvWorkerNum); | |||
| if (worker_num.size() > 0) { | |||
| (void)common::SetEnv(kDmlcWorkerNum, worker_num.c_str()); | |||
| } | |||
| if (IsRoleOfScheduler()) { | |||
| (void)common::SetEnv(kDmlcRole, kRoleOfScheduler); | |||
| } else if (IsRoleOfPServer()) { | |||
| (void)common::SetEnv(kDmlcRole, kRoleOfPServer); | |||
| } else if (IsRoleOfWorker()) { | |||
| (void)common::SetEnv(kDmlcRole, kRoleOfWorker); | |||
| } | |||
| auto scheduler_host = common::GetEnv(kEnvSchedulerHost); | |||
| if (scheduler_host.size() > 0) { | |||
| (void)common::SetEnv(kDmlcSchedulerHost, scheduler_host.c_str()); | |||
| } | |||
| auto scheduler_port = common::GetEnv(kEnvSchedulerPort); | |||
| if (scheduler_port.size() > 0) { | |||
| (void)common::SetEnv(kDmlcSchedulerPort, scheduler_port.c_str()); | |||
| } | |||
| } | |||
| } | |||
| int Util::optimizer_id(std::string name) { | |||
| if (optimizer_to_ids.count(name) > 0) { | |||
| return optimizer_to_ids[name]; | |||
| } | |||
| return -1; | |||
| } | |||
| std::string Util::optimizer_name(int id) { | |||
| if (id_to_optimizers.count(id) > 0) { | |||
| return id_to_optimizers[id]; | |||
| } | |||
| return ""; | |||
| } | |||
| bool Util::is_optimizer(std::string name) { return optimizer_to_ids.count(name) > 0; } | |||
| int Util::LocalShard(int first_dim, int rank_id, int server_num) { | |||
| int shard_size = std::round((static_cast<float>(first_dim)) / server_num); | |||
| int remain_size = first_dim % server_num; | |||
| if (remain_size == 0 || rank_id < server_num - 1) { | |||
| return shard_size; | |||
| } else { | |||
| return first_dim - (shard_size * (server_num - 1)); | |||
| } | |||
| } | |||
| } // namespace ps | |||
| } // namespace parallel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,47 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_UTIL_H_ | |||
| #define MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_UTIL_H_ | |||
| #include <map> | |||
| #include <string> | |||
| #include <unordered_map> | |||
| #include "session/anf_runtime_algorithm.h" | |||
| namespace mindspore { | |||
| namespace parallel { | |||
| namespace ps { | |||
| class Util { | |||
| public: | |||
| static bool IsParamServerMode(); | |||
| static bool IsRoleOfWorker(); | |||
| static bool IsRoleOfPServer(); | |||
| static bool IsRoleOfScheduler(); | |||
| static void SetInternalEnvVar(); | |||
| static int optimizer_id(std::string name); | |||
| static std::string optimizer_name(int id); | |||
| static bool is_optimizer(std::string name); | |||
| static int LocalShard(int first_dim, int rank_id, int server_num); | |||
| private: | |||
| static std::unordered_map<std::string, int> optimizer_to_ids; | |||
| static std::unordered_map<int, std::string> id_to_optimizers; | |||
| }; | |||
| } // namespace ps | |||
| } // namespace parallel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_UTIL_H_ | |||
| @@ -115,6 +115,8 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | |||
| list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/debug/dump_proto.cc") | |||
| list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ir/lite/tensor.cc") | |||
| list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc") | |||
| list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/parallel/ps/util.cc") | |||
| list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/parallel/ps/scheduler.cc") | |||
| list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/anf_ir.pb.cc") | |||
| list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/node_strategy.pb.cc") | |||
| list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/load_onnx/anf_model_parser.cc") | |||
| @@ -12,16 +12,7 @@ diff -Npur ps-lite-master/include/dmlc/base.h ps-lite-master-new/include/dmlc/ba | |||
| /*! | |||
| diff -Npur ps-lite-master/include/dmlc/logging.h ps-lite-master-new/include/dmlc/logging.h | |||
| --- ps-lite-master/include/dmlc/logging.h 2020-02-29 13:59:55.000000000 +0800 | |||
| +++ ps-lite-master-new/include/dmlc/logging.h 2020-07-01 11:58:00.015919207 +0800 | |||
| @@ -13,7 +13,7 @@ | |||
| #include <string> | |||
| #include <vector> | |||
| #include <memory> | |||
| -#include "./base.h" | |||
| +//#include "./base.h" | |||
| #if DMLC_LOG_STACK_TRACE | |||
| #include <cxxabi.h> | |||
| +++ ps-lite-master-new/include/dmlc/logging.h 2020-07-08 21:35:33.334584767 +0800 | |||
| @@ -52,7 +52,7 @@ struct Error : public std::runtime_error | |||
| namespace dmlc { | |||