zzy34407230
/
mindspore2022

/**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef MINDSPORE_CCSRC_PS_SERVER_PARAMETER_AGGREGATOR_H_
#define MINDSPORE_CCSRC_PS_SERVER_PARAMETER_AGGREGATOR_H_

#include <map>
#include <memory>
#include <string>
#include <vector>
#include <utility>
#include "ps/server/common.h"
#include "ps/server/memory_register.h"
#include "ps/server/kernel/aggregation_kernel_factory.h"
#include "ps/server/kernel/optimizer_kernel_factory.h"

namespace mindspore {
namespace ps {
namespace server {
// Encapsulate the parameters for a kernel into a struct to make it convenient for ParameterAggregator to launch server
// kernels.
typedef struct {
  std::vector<AddressPtr> inputs;
  std::vector<AddressPtr> workspace;
  std::vector<AddressPtr> outputs;
} KernelParams;

// ParameterAggregator includes methods for aggregating gradients and optimizing weights(launching aggregation and
// optimizer kernels), getting weights, etc. It's not thread-safe, which means the caller must acquire lock before
// calling ParameterAggregator methods concurrently.

// Each ParameterAggregator is corresponding to one weight for now.

// ParameterAggregator is stateful because the process of aggregation and optimizing could be stateful.
// For example, the finite-state machine for the ParameterAggregator in parameter server training mode is below:
// Initial->Aggregating->Aggregation done->Optimizing->Optimizing done->Pulling->Pull done->Initial.
class ParameterAggregator {
 public:
  ParameterAggregator()
      : server_mode_(ServerMode::PARAMETER_SERVER),
        required_push_count_(0),
        required_pull_count_(0),
        current_pull_count_(0),
        aggregation_done_(false),
        optimizing_done_(false),
        pulling_done_(true),
        memory_register_(nullptr) {}
  ~ParameterAggregator() = default;

  // Initialize ParameterAggregator with a cnode. This cnode is normally a optimizer kernel for now.
  // The parameter threshold_count helps ParameterAggregator to judge the current status if it's stateful.
  bool Init(const CNodePtr &cnode, size_t threshold_count = 0);

  // Update old data stored in ParameterAggregator with new data.
  // The data could have many meanings: weights, gradients, learning_rate, momentum, etc.
  bool UpdateData(const std::map<std::string, Address> &new_data);

  // Launch aggregators/optimizers of this ParameterAggregator in order.
  bool LaunchAggregators();
  bool LaunchOptimizers();

  // The implementation for primitive Pull in parameter server training mode.
  // Every call of this method will increase the count for pull by 1.
  AddressPtr Pull();

  // Different from the method Pull, this method simply returns the weight of this ParameterAggregator without causing
  // any change of status.
  AddressPtr GetWeight();

  // After aggregation/optimizing/pulling of one iteration is done, caller must reset the status to ensure the
  // correctness of the aggregation/optimizing/pulling for next iteration.
  void ResetAggregationStatus();
  void ResetOptimizingStatus();
  void ResetPullingStatus();

  // Returns the aggregation/optimizing/pulling status to the caller.
  bool IsAggregationDone() const;
  bool IsOptimizingDone() const;
  bool IsPullingDone() const;

 private:
  // Initializing aggregation/optimizer kenerls based on the cnode. The reason of this is described in the file
  // kernel/kernel_factory.h.
  bool InitAggregationKernels(const CNodePtr &cnode);
  bool InitOptimizerKernels(const CNodePtr &cnode);

  // Assign memory for server kernel K(AggregationKernel/OptimizerKernel).
  // The memory assigned can be accessed by MemoryRegister. The memory could be weights, gradients, learning_rate,
  // momentum, etc.
  template <typename K>
  bool AssignMemory(K server_kernel, const CNodePtr &cnode, const ReuseKernelNodeInfo &reuse_kernel_node_inputs_info,
                    std::shared_ptr<MemoryRegister> memory_register);

  // Generate kernel parameters for aggregation/optimizer kernels. All the parameters is registered and stored in
  // memory_register.
  bool GenerateAggregationKernelParams(const std::shared_ptr<kernel::AggregationKernel> aggr_kernel,
                                       const std::shared_ptr<MemoryRegister> memory_register);
  bool GenerateOptimizerKernelParams(const std::shared_ptr<kernel::OptimizerKernel> optim_kernel,
                                     const std::shared_ptr<MemoryRegister> memory_register);

  // The selection of the aggregation algorithm depends on multiple factors. For example, server mode, user
  // configuration, etc.
  std::vector<std::string> SelectAggregationAlgorithm(const CNodePtr &cnode);

  ServerMode server_mode_;
  size_t required_push_count_;
  size_t required_pull_count_;
  size_t current_pull_count_;

  // The status of aggregation/optimizing/pulling.
  bool aggregation_done_;
  bool optimizing_done_;
  bool pulling_done_;

  // ParameterAggregator stores all data that it needs for aggregation, optimizing, etc.
  std::shared_ptr<MemoryRegister> memory_register_;

  // Update could have multiple aggregation and optimizer server kernels.
  // Here stores multiple pairs of server kernels to parameters of their Launch function.
  std::vector<std::pair<std::shared_ptr<kernel::AggregationKernel>, KernelParams>> aggregation_kernel_parameters_;
  std::vector<std::pair<std::shared_ptr<kernel::OptimizerKernel>, KernelParams>> optimizer_kernel_parameters_;
};
}  // namespace server
}  // namespace ps
}  // namespace mindspore
#endif  // MINDSPORE_CCSRC_PS_SERVER_PARAMETER_AGGREGATOR_H_