| @@ -947,5 +947,122 @@ double GatherV2PCost::GetBackwardComputationCost(const std::vector<TensorInfo> & | |||||
| return result; | return result; | ||||
| } | } | ||||
| // The forward communication is determined by whether the slice is column split or row split | |||||
| // The number of segments is actually the shape[0] of the output, which is the cost of the AllReduce | |||||
| double UnsortedSegmentSumCost::GetForwardCommCost(const std::vector<TensorInfo> &inputs, | |||||
| const std::vector<TensorInfo> &outputs, int32_t stage_id) const { | |||||
| TensorInfo input0 = inputs[0]; | |||||
| TensorInfo input1 = inputs[1]; | |||||
| TensorInfo output0 = outputs[0]; | |||||
| Shape input0_shape = input0.shape(); | |||||
| Shape input0_slice_shape = inputs[0].slice_shape(); | |||||
| double result = 0.0; | |||||
| if (inputs_type_lengths_.size() != inputs.size()) { | |||||
| MS_LOG(EXCEPTION) << "Invalid inputs type size " << inputs_type_lengths_.size() << " for UnsortedSegmentSum cost"; | |||||
| } | |||||
| // If the shape b is not the same as the shape a, we regard it as column slice | |||||
| for (size_t i = 0; i < input1.shape().size(); ++i) { | |||||
| if (input0_shape[i] != input0_slice_shape[i]) { | |||||
| result = ListProduct(output0.slice_shape()) * static_cast<double>(outputs_type_lengths_[0]); | |||||
| return result; | |||||
| } | |||||
| } | |||||
| return result; | |||||
| } | |||||
| double UnsortedSegmentSumCost::GetBackwardCommCost(const std::vector<TensorInfo> &inputs, | |||||
| const std::vector<TensorInfo> &outputs, int32_t stage_id) const { | |||||
| TensorInfo input0 = inputs[0]; | |||||
| TensorInfo input1 = inputs[1]; | |||||
| TensorInfo output0 = outputs[0]; | |||||
| Shape input0_shape = input0.shape(); | |||||
| Shape input0_slice_shape = inputs[0].slice_shape(); | |||||
| double result = 0.0; | |||||
| if (inputs_type_lengths_.size() != inputs.size()) { | |||||
| MS_LOG(EXCEPTION) << "Invalid inputs type size " << inputs_type_lengths_.size() << " for UnsortedSegmentSum cost"; | |||||
| } | |||||
| if (is_parameter_[0]) { | |||||
| // If the forward process has a AllReduce, then the backward also needs one. | |||||
| for (size_t i = 0; i < input1.shape().size(); ++i) { | |||||
| if (input0_shape[i] != input0_slice_shape[i]) { | |||||
| result = ListProduct(output0.slice_shape()) * static_cast<double>(outputs_type_lengths_[0]); | |||||
| return result; | |||||
| } | |||||
| } | |||||
| } | |||||
| return result; | |||||
| } | |||||
| double UnsortedSegmentSumCost::GetForwardComputationCost(const std::vector<TensorInfo> &inputs, | |||||
| const std::vector<TensorInfo> &outputs, int32_t) const { | |||||
| // In forward phase, the computation cost = slice(A) + slice(B) | |||||
| Shape input0_slice_shape = inputs[0].slice_shape(); | |||||
| Shape input1_slice_shape = inputs[1].slice_shape(); | |||||
| Shape output_slice_shape = outputs[0].slice_shape(); | |||||
| double result = ListProduct(input0_slice_shape) * static_cast<double>(inputs_type_lengths_[0]) + | |||||
| ListProduct(input1_slice_shape) * static_cast<double>(inputs_type_lengths_[1]) + | |||||
| ListProduct(output_slice_shape) * static_cast<double>(outputs_type_lengths_[0]); | |||||
| return result; | |||||
| } | |||||
| double UnsortedSegmentMinCost::GetForwardCommCost(const std::vector<TensorInfo> &inputs, | |||||
| const std::vector<TensorInfo> &outputs, int32_t stage_id) const { | |||||
| TensorInfo input0 = inputs[0]; | |||||
| TensorInfo input1 = inputs[1]; | |||||
| TensorInfo output0 = outputs[0]; | |||||
| Shape input0_shape = input0.shape(); | |||||
| Shape input0_slice_shape = inputs[0].slice_shape(); | |||||
| double result = 0.0; | |||||
| if (inputs_type_lengths_.size() != inputs.size()) { | |||||
| MS_LOG(EXCEPTION) << "Invalid inputs type size " << inputs_type_lengths_.size() | |||||
| << " for UnsortedSegmentMinCost cost"; | |||||
| } | |||||
| // If the shape b is not the same as the shape a, we regard it as column slice | |||||
| // The cost is a AllGather operation, the shape is the same as the output of UnsortedSegmentMin. | |||||
| for (size_t i = 0; i < input1.shape().size(); ++i) { | |||||
| if (input0_shape[i] != input0_slice_shape[i]) { | |||||
| result = ListProduct(output0.slice_shape()) * static_cast<double>(outputs_type_lengths_[0]); | |||||
| return result; | |||||
| } | |||||
| } | |||||
| return result; | |||||
| } | |||||
| double UnsortedSegmentMinCost::GetBackwardCommCost(const std::vector<TensorInfo> &inputs, | |||||
| const std::vector<TensorInfo> &outputs, int32_t stage_id) const { | |||||
| TensorInfo input0 = inputs[0]; | |||||
| TensorInfo input1 = inputs[1]; | |||||
| TensorInfo output0 = outputs[0]; | |||||
| Shape input0_shape = input0.shape(); | |||||
| Shape input0_slice_shape = inputs[0].slice_shape(); | |||||
| double result = 0.0; | |||||
| if (inputs_type_lengths_.size() != inputs.size()) { | |||||
| MS_LOG(EXCEPTION) << "Invalid inputs type size " << inputs_type_lengths_.size() | |||||
| << " for UnsortedSegmentMinCost cost"; | |||||
| } | |||||
| if (is_parameter_[0]) { | |||||
| // If the forward process has a AllGather, then the backward also needs one ReduceScatter. | |||||
| for (size_t i = 0; i < input1.shape().size(); ++i) { | |||||
| if (input0_shape[i] != input0_slice_shape[i]) { | |||||
| result = ListProduct(output0.slice_shape()) * static_cast<double>(outputs_type_lengths_[0]); | |||||
| return result; | |||||
| } | |||||
| } | |||||
| } | |||||
| return result; | |||||
| } | |||||
| double UnsortedSegmentMinCost::GetForwardComputationCost(const std::vector<TensorInfo> &inputs, | |||||
| const std::vector<TensorInfo> &outputs, int32_t) const { | |||||
| // In forward phase, the computation cost = slice(A) + slice(B) | |||||
| Shape input0_slice_shape = inputs[0].slice_shape(); | |||||
| Shape input1_slice_shape = inputs[1].slice_shape(); | |||||
| Shape output_slice_shape = outputs[0].slice_shape(); | |||||
| // The forward operation is UnsortedSegmentMin + ReudceMin | |||||
| double result = ListProduct(input0_slice_shape) * static_cast<double>(inputs_type_lengths_[0]) + | |||||
| ListProduct(input1_slice_shape) * static_cast<double>(inputs_type_lengths_[1]) + | |||||
| ListProduct(output_slice_shape) * static_cast<double>(outputs_type_lengths_[0]) + | |||||
| ListProduct(output_slice_shape) * static_cast<double>(outputs_type_lengths_[0]); // ReduceMin | |||||
| return result; | |||||
| } | |||||
| } // namespace parallel | } // namespace parallel | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -578,6 +578,58 @@ class DropOutCost : public OperatorCost { | |||||
| using DropOutCostPtr = std::shared_ptr<DropOutCost>; | using DropOutCostPtr = std::shared_ptr<DropOutCost>; | ||||
| class UnsortedSegmentSumCost : public OperatorCost { | |||||
| public: | |||||
| explicit UnsortedSegmentSumCost(bool is_inputs_related) : OperatorCost(is_inputs_related) {} | |||||
| UnsortedSegmentSumCost() : OperatorCost(true) {} | |||||
| ~UnsortedSegmentSumCost() override = default; | |||||
| double GetCommCost(const std::vector<TensorInfo> &inputs, const std::vector<TensorInfo> &outputs, | |||||
| int32_t stage_id) const override { | |||||
| return GetForwardCommCost(inputs, outputs, stage_id) + GetBackwardCommCost(inputs, outputs, stage_id); | |||||
| } | |||||
| double GetForwardCommCost(const std::vector<TensorInfo> &, const std::vector<TensorInfo> &, int32_t) const override; | |||||
| double GetBackwardCommCost(const std::vector<TensorInfo> &, const std::vector<TensorInfo> &, int32_t) const override; | |||||
| double GetComputationCost(const std::vector<TensorInfo> &inputs, const std::vector<TensorInfo> &outputs, | |||||
| int32_t stage_id) const override { | |||||
| return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id); | |||||
| } | |||||
| double GetForwardComputationCost(const std::vector<TensorInfo> &, const std::vector<TensorInfo> &, | |||||
| int32_t) const override; | |||||
| double GetBackwardComputationCost(const std::vector<TensorInfo> &, const std::vector<TensorInfo> &, | |||||
| int32_t) const override { | |||||
| return 0.0; | |||||
| } | |||||
| }; | |||||
| using UnsortedSegmentSumCostPtr = std::shared_ptr<UnsortedSegmentSumCost>; | |||||
| class UnsortedSegmentMinCost : public OperatorCost { | |||||
| public: | |||||
| explicit UnsortedSegmentMinCost(bool is_inputs_related) : OperatorCost(is_inputs_related) {} | |||||
| UnsortedSegmentMinCost() : OperatorCost(true) {} | |||||
| ~UnsortedSegmentMinCost() override = default; | |||||
| double GetCommCost(const std::vector<TensorInfo> &inputs, const std::vector<TensorInfo> &outputs, | |||||
| int32_t stage_id) const override { | |||||
| return GetForwardCommCost(inputs, outputs, stage_id) + GetBackwardCommCost(inputs, outputs, stage_id); | |||||
| } | |||||
| double GetForwardCommCost(const std::vector<TensorInfo> &, const std::vector<TensorInfo> &, int32_t) const override; | |||||
| double GetBackwardCommCost(const std::vector<TensorInfo> &, const std::vector<TensorInfo> &, int32_t) const override; | |||||
| double GetComputationCost(const std::vector<TensorInfo> &inputs, const std::vector<TensorInfo> &outputs, | |||||
| int32_t stage_id) const override { | |||||
| return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id); | |||||
| } | |||||
| double GetForwardComputationCost(const std::vector<TensorInfo> &, const std::vector<TensorInfo> &, | |||||
| int32_t) const override; | |||||
| double GetBackwardComputationCost(const std::vector<TensorInfo> &, const std::vector<TensorInfo> &, | |||||
| int32_t) const override { | |||||
| return 0.0; | |||||
| } | |||||
| }; | |||||
| using UnsortedSegmentMinCostPtr = std::shared_ptr<UnsortedSegmentMinCost>; | |||||
| class LayerNormCost : public OperatorCost { | class LayerNormCost : public OperatorCost { | ||||
| public: | public: | ||||
| explicit LayerNormCost(bool is_inputs_related) : OperatorCost(is_inputs_related) {} | explicit LayerNormCost(bool is_inputs_related) : OperatorCost(is_inputs_related) {} | ||||
| @@ -173,6 +173,8 @@ REGISTER(ExpandDimsInfo); | |||||
| REGISTER(SqueezeInfo); | REGISTER(SqueezeInfo); | ||||
| REGISTER(SigmoidCrossEntropyWithLogitsInfo); | REGISTER(SigmoidCrossEntropyWithLogitsInfo); | ||||
| REGISTER(SquareInfo); | REGISTER(SquareInfo); | ||||
| REGISTER(UnsortedSegmentSumInfo); | |||||
| REGISTER(UnsortedSegmentMinInfo); | |||||
| REGISTER(GatherV2PInfo); | REGISTER(GatherV2PInfo); | ||||
| REGISTER(EmbeddingLookupInfo); | REGISTER(EmbeddingLookupInfo); | ||||
| REGISTER(TileInfo); | REGISTER(TileInfo); | ||||
| @@ -35,6 +35,7 @@ | |||||
| #include "frontend/parallel/ops_info/reduce_method_info.h" | #include "frontend/parallel/ops_info/reduce_method_info.h" | ||||
| #include "frontend/parallel/ops_info/reshape_info.h" | #include "frontend/parallel/ops_info/reshape_info.h" | ||||
| #include "frontend/parallel/ops_info/transpose_info.h" | #include "frontend/parallel/ops_info/transpose_info.h" | ||||
| #include "frontend/parallel/ops_info/unsorted_segment_op_info.h" | |||||
| #include "frontend/parallel/ops_info/virtual_dataset_info.h" | #include "frontend/parallel/ops_info/virtual_dataset_info.h" | ||||
| #include "frontend/parallel/ops_info/gather_v2_p_info.h" | #include "frontend/parallel/ops_info/gather_v2_p_info.h" | ||||
| #include "frontend/parallel/ops_info/tile_info.h" | #include "frontend/parallel/ops_info/tile_info.h" | ||||
| @@ -283,6 +283,8 @@ constexpr char IN_TOPK[] = "InTopK"; | |||||
| constexpr char GATHER_ND[] = "GatherNd"; | constexpr char GATHER_ND[] = "GatherNd"; | ||||
| constexpr char UNSORTEF_SEGMENT_MIND[] = "UnsortedSegmentMinD"; | constexpr char UNSORTEF_SEGMENT_MIND[] = "UnsortedSegmentMinD"; | ||||
| constexpr char UNSORTEF_SEGMENT_PRODD[] = "UnsortedSegmentProdD"; | constexpr char UNSORTEF_SEGMENT_PRODD[] = "UnsortedSegmentProdD"; | ||||
| constexpr char UNSORTED_SEGMENT_SUM[] = "UnsortedSegmentSum"; | |||||
| constexpr char UNSORTED_SEGMENT_MIN[] = "UnsortedSegmentMin"; | |||||
| constexpr char DEPTHWISE_CONV2D_NATIVE[] = "DepthwiseConv2dNative"; | constexpr char DEPTHWISE_CONV2D_NATIVE[] = "DepthwiseConv2dNative"; | ||||
| constexpr char DEPTHWISE_CONV2D[] = "DepthwiseConv2D"; | constexpr char DEPTHWISE_CONV2D[] = "DepthwiseConv2D"; | ||||
| constexpr char ADD[] = "Add"; | constexpr char ADD[] = "Add"; | ||||
| @@ -0,0 +1,313 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "frontend/parallel/ops_info/unsorted_segment_op_info.h" | |||||
| #include <memory> | |||||
| #include <utility> | |||||
| #include <vector> | |||||
| #include "frontend/parallel/auto_parallel/costmodel.h" | |||||
| #include "frontend/parallel/device_matrix.h" | |||||
| #include "frontend/parallel/graph_util/generate_graph.h" | |||||
| #include "frontend/parallel/strategy.h" | |||||
| #include "ir/tensor.h" | |||||
| #include "ir/value.h" | |||||
| #include "utils/log_adapter.h" | |||||
| namespace mindspore { | |||||
| namespace parallel { | |||||
| // The operator UnsortedSegment accepts three inputs: | |||||
| // input0 : vector, the shape is x1,x2,x3,...,xr | |||||
| // input1 : segment id, the shape is x1,x2,..,xn | |||||
| // input2 : value, the number of the segments | |||||
| // For Sum: r >= n | |||||
| // For Min: r >=n, n=1 | |||||
| Status UnsortedSegmentOpInfo::GetAttrs() { | |||||
| if (inputs_shape_.size() != UNSORTEDSEGMENTOP_INPUTS_SIZE) { | |||||
| MS_LOG(ERROR) << name_ << ": inputs shape size must be 2, but is " << inputs_shape_.size(); | |||||
| return FAILED; | |||||
| } | |||||
| if (outputs_shape_.size() != UNSORTEDSEGMENTOP_OUTPUTS_SIZE) { | |||||
| MS_LOG(ERROR) << name_ << ": outputs shape size must be 1, but is " << outputs_shape_.size(); | |||||
| return FAILED; | |||||
| } | |||||
| if (input_value_.at(2) == nullptr) { | |||||
| MS_LOG(ERROR) << name_ << ": the third input value is nullptr, is not a ValueNode!"; | |||||
| return FAILED; | |||||
| } | |||||
| if (inputs_shape_.at(0).empty()) { | |||||
| MS_LOG(ERROR) << name_ << ": input can not be a scalar!"; | |||||
| return FAILED; | |||||
| } | |||||
| int num_segments = GetValue<int>(input_value_.at(2)); | |||||
| if (num_segments < 0) { | |||||
| MS_LOG(ERROR) << name_ << ": the number of segments should be non negative value."; | |||||
| return FAILED; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status UnsortedSegmentOpInfo::CheckStrategy(const StrategyPtr &strategy) { | |||||
| // Check size | |||||
| if (inputs_shape_.size() != UNSORTEDSEGMENTOP_INPUTS_SIZE) { | |||||
| MS_LOG(ERROR) << name_ << ": inputs shape size must be " << UNSORTEDSEGMENTOP_INPUTS_SIZE << ", but is " | |||||
| << inputs_shape_.size(); | |||||
| return FAILED; | |||||
| } | |||||
| if (outputs_shape_.size() != UNSORTEDSEGMENTOP_OUTPUTS_SIZE) { | |||||
| MS_LOG(ERROR) << name_ << ": outputs shape size must be " << UNSORTEDSEGMENTOP_OUTPUTS_SIZE << ", but is " | |||||
| << outputs_shape_.size(); | |||||
| return FAILED; | |||||
| } | |||||
| // The strategy of the first and the second input should be set. | |||||
| if (CheckStrategyValue(strategy, {inputs_shape_.at(0), inputs_shape_.at(1)}) != SUCCESS) { | |||||
| MS_LOG(ERROR) << name_ << ": Invalid strategy."; | |||||
| return FAILED; | |||||
| } | |||||
| Strategys stra = strategy->GetInputDim(); | |||||
| Dimensions sub_a_strategy = stra.at(0); | |||||
| Dimensions sub_b_strategy = stra.at(1); | |||||
| Shape input_a_shape = inputs_shape_.at(0); | |||||
| Shape input_b_shape = inputs_shape_.at(1); | |||||
| // The size of the input b must be equal or smaller than input a | |||||
| for (size_t i = 0; i < input_b_shape.size(); ++i) { | |||||
| if ((sub_a_strategy[i] != sub_b_strategy[i]) && (input_a_shape[i] != input_b_shape[i])) { | |||||
| MS_LOG(ERROR) << name_ | |||||
| << " : Invalid strategy. The shape and the strategy of the input0 and input1 " | |||||
| "should be same before the front size of the input[1]"; | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status UnsortedSegmentOpInfo::InferDevMatrixShape() { | |||||
| Strategys stra = strategy_->GetInputDim(); | |||||
| dev_matrix_shape_ = stra.at(0); | |||||
| return SUCCESS; | |||||
| } | |||||
| // As the op converts the vector x1,x2,x3...,xr -> number of segments, xn,..,xr | |||||
| // the dimension x1,x2,x3,..,xn is eliminated | |||||
| // suppose the strategy of the inputs is (a,b,c,d), (a,b) | |||||
| // the tensor map of the input vector is (3,2,1,0), id:(1, 0) | |||||
| // the output vector is (-1, 1, 0) | |||||
| Status UnsortedSegmentOpInfo::InferTensorMap() { | |||||
| Shape tensor_map_in; | |||||
| Shape tensor_map_in_index; | |||||
| Shape tensor_map_out; | |||||
| size_t input0_size = inputs_shape_.at(0).size(); | |||||
| // such as 4: tensor_map_index [3,2,1,0] | |||||
| for (size_t i = 0; i < input0_size; ++i) { | |||||
| tensor_map_in.push_back(SizeToInt(input0_size - i - 1)); | |||||
| tensor_map_in_index.push_back(SizeToInt(input0_size - i - 1)); | |||||
| tensor_map_out.push_back(SizeToInt(input0_size - i - 1)); | |||||
| } | |||||
| (void)tensor_map_out.erase(tensor_map_out.begin(), tensor_map_out.begin() + inputs_shape_.at(1).size() - 1); | |||||
| // A special case: the input vector (a,) id (a,) or input vector (a,b,c), id(a,b,c) | |||||
| // The output vector will be a 1-dim vector, | |||||
| // These two kinds of situations as row slice. | |||||
| tensor_map_out[0] = -1; | |||||
| (void)tensor_map_in_index.erase(tensor_map_in_index.begin() + inputs_shape_.at(1).size(), tensor_map_in_index.end()); | |||||
| if (tensor_map_out.size() != outputs_shape_.at(0).size()) { | |||||
| MS_LOG(ERROR) << "Out tensor map size is not equal to output size! Out tensor map size is " << tensor_map_out.size() | |||||
| << " output size is " << outputs_shape_.at(0).size(); | |||||
| return FAILED; | |||||
| } | |||||
| inputs_tensor_map_.emplace_back(std::move(tensor_map_in)); | |||||
| inputs_tensor_map_.emplace_back(std::move(tensor_map_in_index)); | |||||
| outputs_tensor_map_.emplace_back(std::move(tensor_map_out)); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status UnsortedSegmentOpInfo::InferTensorInfo() { | |||||
| // infer tensor shape | |||||
| Shape input_shape = inputs_shape_.at(0); | |||||
| Shape input_index_shape = inputs_shape_.at(1); | |||||
| Shape output_shape = outputs_shape_.at(0); | |||||
| TensorLayout input_tensor_layout, input_index_layout, output_tensor_layout; | |||||
| if ((input_tensor_layout.InitFromVector(dev_matrix_shape_, inputs_tensor_map_.at(0), input_shape) != SUCCESS) || | |||||
| (input_index_layout.InitFromVector(dev_matrix_shape_, inputs_tensor_map_.at(1), input_index_shape) != SUCCESS) || | |||||
| (output_tensor_layout.InitFromVector(dev_matrix_shape_, outputs_tensor_map_.at(0), output_shape) != SUCCESS)) { | |||||
| return FAILED; | |||||
| } | |||||
| TensorInfo input_tensor_info(input_tensor_layout); | |||||
| TensorInfo input_index_info(input_index_layout); | |||||
| TensorInfo output_tensor_info(output_tensor_layout); | |||||
| inputs_tensor_info_.push_back(input_tensor_info); | |||||
| inputs_tensor_info_.push_back(input_index_info); | |||||
| outputs_tensor_info_.push_back(output_tensor_info); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status UnsortedSegmentOpInfo::Init(const StrategyPtr &strategy) { | |||||
| if (InitWithAutoRepeatCalc(strategy) != SUCCESS) { | |||||
| MS_LOG(ERROR) << name_ << ": Init failed."; | |||||
| return FAILED; | |||||
| } | |||||
| MS_LOG(INFO) << name_ << ": Init success."; | |||||
| return SUCCESS; | |||||
| } | |||||
| Status UnsortedSegmentOpInfo::InitForCostModel(const StrategyPtr &strategy) { | |||||
| if (InitForCostModelWithAutoRepeatCalc(strategy) != SUCCESS) { | |||||
| MS_LOG(ERROR) << name_ << ": Init for cost model failed."; | |||||
| return FAILED; | |||||
| } | |||||
| MS_LOG(INFO) << name_ << ": Init for cost model success."; | |||||
| return SUCCESS; | |||||
| } | |||||
| // Set the default strategy | |||||
| Status UnsortedSegmentOpInfo::GenerateStrategies(int32_t stage_id) { | |||||
| Shape input0_split(inputs_shape_[0].size(), 1); | |||||
| Shapes splittable_inputs = {input0_split}; | |||||
| std::vector<StrategyPtr> sp_vector; | |||||
| if (GenerateStrategiesForIndependentInputs(stage_id, {inputs_shape_.at(0)}, splittable_inputs, &sp_vector) != | |||||
| SUCCESS) { | |||||
| MS_LOG(ERROR) << name_ << " : Generate strategies for independent inputs() failed."; | |||||
| return FAILED; | |||||
| } | |||||
| for (auto &sp : sp_vector) { | |||||
| Strategys tmp_strategy; | |||||
| Dimensions first_input_strategy = sp->GetInputDim()[0]; | |||||
| Dimensions second_input_strategy; | |||||
| for (size_t i = 0; i < inputs_shape_[1].size(); ++i) { | |||||
| second_input_strategy.push_back(first_input_strategy[i]); | |||||
| } | |||||
| tmp_strategy.push_back(first_input_strategy); | |||||
| tmp_strategy.push_back(second_input_strategy); | |||||
| sp->ResetInputs(tmp_strategy); | |||||
| } | |||||
| size_t success = 0; | |||||
| for (auto &sp : sp_vector) { | |||||
| PrintStrategy(sp); | |||||
| if (SetCostUnderStrategy(sp) == SUCCESS) { | |||||
| success++; | |||||
| MS_LOG(INFO) << name_ << " : Successfully generated " << success << " strategy"; | |||||
| PrintStrategy(sp); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| // if the dimension of the input b is split, we regarded it as the row slice, thus requires a AllReduce | |||||
| // otherwise it is column slice, | |||||
| Status UnsortedSegmentOpInfo::InferForwardCommunication() { | |||||
| forward_op_.clear(); | |||||
| std::vector<Group> group_list; | |||||
| Shape tmp_group_tensor_map = outputs_tensor_map_.at(0); | |||||
| if (repeated_calc_num_ > 1) { | |||||
| for (size_t i = 1; i < tmp_group_tensor_map.size(); ++i) { | |||||
| tmp_group_tensor_map[i] += 1; | |||||
| } | |||||
| tmp_group_tensor_map.push_back(0); | |||||
| } | |||||
| if (CreateGroupByTensorMap(tmp_group_tensor_map, &group_list) != SUCCESS) { | |||||
| MS_LOG(ERROR) << name_ << " : Infer forward communication, create group failed."; | |||||
| return FAILED; | |||||
| } else if (group_list.empty()) { | |||||
| MS_LOG(INFO) << name_ << " : Forward all reduce is not required."; | |||||
| return SUCCESS; | |||||
| } | |||||
| Operator op; | |||||
| op = CreateAllReduceOp(REDUCE_OP_SUM, group_list[0].name()); | |||||
| forward_op_.push_back(op); | |||||
| MS_LOG(INFO) << name_ << " : The group name of forward communication is " << group_list[0].name(); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status UnsortedSegmentOpInfo::SetCostUnderStrategy(const StrategyPtr &strategy) { | |||||
| return SetCostUnderStrategyBase(strategy); | |||||
| } | |||||
| std::shared_ptr<Strategys> UnsortedSegmentOpInfo::GenerateBatchStrategies() { | |||||
| if (inputs_shape_.size() != UNSORTEDSEGMENTOP_INPUTS_SIZE) { | |||||
| MS_LOG(EXCEPTION) << name_ << ": inputs shape size must be " << UNSORTEDSEGMENTOP_INPUTS_SIZE << ", but is " | |||||
| << inputs_shape_.size(); | |||||
| } | |||||
| CheckGlobalDeviceManager(); | |||||
| size_t dev_num = g_device_manager->GetDeviceListByStageId(0).size(); | |||||
| if (GetAttrs() != SUCCESS) { | |||||
| MS_LOG(EXCEPTION) << "GetAttrs failed!"; | |||||
| } | |||||
| Dimensions strategy_a; | |||||
| Dimensions strategy_b; | |||||
| strategy_a.push_back(SizeToInt(dev_num)); | |||||
| for (size_t i = 1; i < inputs_shape_[0].size(); i++) { | |||||
| strategy_a.push_back(1); | |||||
| } | |||||
| strategy_b.push_back(SizeToInt(dev_num)); | |||||
| for (size_t i = 1; i < inputs_shape_[1].size(); i++) { | |||||
| strategy_b.push_back(1); | |||||
| } | |||||
| Strategys strategy_v = {strategy_a, strategy_b}; | |||||
| return std::make_shared<Strategys>(strategy_v); | |||||
| } | |||||
| // When the index is splited, the graph should be replaced | |||||
| // a special case is when the shape input equals the shape of ids, we regard it as column slice, | |||||
| // thus there is no need for repalce graphs | |||||
| ReplaceGraphPtr UnsortedSegmentMinInfo::replace_graph(const CNodePtr &cnode) { | |||||
| auto input_id_strategy = strategy_->GetInputDim().at(1); | |||||
| // 1. the two input shapes are same, and the strategy is not all ones | |||||
| if (std::any_of(input_id_strategy.begin(), input_id_strategy.end(), [](const int32_t &shard) { return shard > 1; })) { | |||||
| if (ComputeReplaceGraph(cnode) != SUCCESS) { | |||||
| MS_LOG(EXCEPTION) << name_ << ": ComputeReplaceGraph failed."; | |||||
| } | |||||
| } | |||||
| return replace_graph_; | |||||
| } | |||||
| Status UnsortedSegmentMinInfo::ComputeReplaceGraph(const CNodePtr &cnode) { | |||||
| GenerateGraph gen_g = GenerateGraph(); | |||||
| if (gen_g.Init(cnode) != SUCCESS) { | |||||
| MS_LOG(ERROR) << "GenerateGraph Init failed"; | |||||
| return FAILED; | |||||
| } | |||||
| // Get the attributes of the UnsortedSegmentMin | |||||
| auto num_segments = GetValue<int>(input_value_.at(2)); | |||||
| // Step1: Output branch | |||||
| auto segment_min = gen_g.PushBack({gen_g.NewOpInst(UNSORTED_SEGMENT_MIN), gen_g.virtual_input_node(), | |||||
| gen_g.virtual_input_node(), CreatInt32Imm(num_segments)}); | |||||
| auto expandim_output = gen_g.PushBack({gen_g.NewOpInst(EXPAND_DIMS), segment_min, CreatInt32Imm(0)}); | |||||
| auto all_gather_output = gen_g.PushBack({gen_g.NewOpInst(ALL_GATHER), expandim_output}); | |||||
| auto final_output = gen_g.PushBack({gen_g.NewOpInst(REDUCE_MIN), all_gather_output, CreatInt32Imm(0)}); | |||||
| std::vector<std::pair<AnfNodePtr, int>> input_nodes = {std::make_pair(segment_min, 1), | |||||
| std::make_pair(segment_min, 2)}; | |||||
| replace_graph_ = std::make_shared<std::pair<std::vector<std::pair<AnfNodePtr, int>>, AnfNodePtr>>( | |||||
| std::make_pair(input_nodes, final_output)); | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace parallel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,84 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_FRONTEND_PARALLEL_OPS_INFO_UNSORTEDSEGMENTOP_INFO_H_ | |||||
| #define MINDSPORE_CCSRC_FRONTEND_PARALLEL_OPS_INFO_UNSORTEDSEGMENTOP_INFO_H_ | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include <unordered_map> | |||||
| #include <vector> | |||||
| #include "frontend/parallel/auto_parallel/operator_costmodel.h" | |||||
| #include "frontend/parallel/ops_info/operator_info.h" | |||||
| #include "frontend/parallel/strategy.h" | |||||
| #include "ir/value.h" | |||||
| namespace mindspore { | |||||
| namespace parallel { | |||||
| constexpr size_t UNSORTEDSEGMENTOP_INPUTS_SIZE = 2; | |||||
| constexpr size_t UNSORTEDSEGMENTOP_OUTPUTS_SIZE = 1; | |||||
| class UnsortedSegmentOpInfo : public OperatorInfo { | |||||
| public: | |||||
| UnsortedSegmentOpInfo(const std::string &name, const Shapes &inputs_shape, const Shapes &outputs_shape, | |||||
| const PrimitiveAttrs &attrs, OperatorCostPtr cost) | |||||
| : OperatorInfo(name, inputs_shape, outputs_shape, attrs, cost) {} | |||||
| ~UnsortedSegmentOpInfo() override = default; | |||||
| Status Init(const StrategyPtr &strategy) override; | |||||
| Status InitForCostModel(const StrategyPtr &strategy) override; | |||||
| Status GenerateStrategies(int32_t stage_id) override; | |||||
| Status SetCostUnderStrategy(const StrategyPtr &strategy) override; | |||||
| std::shared_ptr<Strategys> GenerateBatchStrategies() override; | |||||
| protected: | |||||
| Status CheckStrategy(const StrategyPtr &strategy) override; | |||||
| Status InferForwardCommunication() override; | |||||
| Status InferMirrorOps() override { return SUCCESS; } | |||||
| Status InferTensorInfo() override; | |||||
| Status InferDevMatrixShape() override; | |||||
| Status InferTensorMap() override; | |||||
| Status GetAttrs() override; | |||||
| private: | |||||
| Status ComputeReplaceGraph(const CNodePtr &cnode); | |||||
| }; | |||||
| class UnsortedSegmentSumInfo : public UnsortedSegmentOpInfo { | |||||
| public: | |||||
| UnsortedSegmentSumInfo(const std::string &name, const Shapes &inputs_shape, const Shapes &outputs_shape, | |||||
| const PrimitiveAttrs &attrs) | |||||
| : UnsortedSegmentOpInfo(name, inputs_shape, outputs_shape, attrs, std::make_shared<UnsortedSegmentSumCost>()) {} | |||||
| ~UnsortedSegmentSumInfo() override = default; | |||||
| }; | |||||
| class UnsortedSegmentMinInfo : public UnsortedSegmentOpInfo { | |||||
| public: | |||||
| UnsortedSegmentMinInfo(const std::string &name, const Shapes &inputs_shape, const Shapes &outputs_shape, | |||||
| const PrimitiveAttrs &attrs) | |||||
| : UnsortedSegmentOpInfo(name, inputs_shape, outputs_shape, attrs, std::make_shared<UnsortedSegmentMinCost>()) {} | |||||
| ~UnsortedSegmentMinInfo() override = default; | |||||
| ReplaceGraphPtr replace_graph(const CNodePtr &cnode) override; | |||||
| Status InferForwardCommunication() override { return SUCCESS; } | |||||
| protected: | |||||
| Status ComputeReplaceGraph(const CNodePtr &cnode); | |||||
| }; | |||||
| } // namespace parallel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_FRONTEND_PARALLEL_OPS_INFO_UNSORTEDSEGMENTOP_INFO_H_ | |||||
| @@ -312,7 +312,8 @@ bool IsSplittableOperator(const std::string &op_name) { | |||||
| EMBEDDING_LOOKUP, FUSE_BATCH_NORM_EX, SPLIT, BROADCAST_TO, ABS, ACOSH, ASIN, ASINH, ATAN, ATANH, CEIL, COSH, | EMBEDDING_LOOKUP, FUSE_BATCH_NORM_EX, SPLIT, BROADCAST_TO, ABS, ACOSH, ASIN, ASINH, ATAN, ATANH, CEIL, COSH, | ||||
| EXPM1, LOG1P, SIN, SINH, TAN, RSQRT, INV, RECIPROCAL, ROUND, FLOOR, SIGN, ERF, ERFC, ZEROSLIKE, ONESLIKE, | EXPM1, LOG1P, SIN, SINH, TAN, RSQRT, INV, RECIPROCAL, ROUND, FLOOR, SIGN, ERF, ERFC, ZEROSLIKE, ONESLIKE, | ||||
| BESSELI0E, BESSELI1E, FLOORMOD, ASSIGN, ASSIGN_ADD, ATAN2, DIVNONAN, LOGICALAND, LOGICALOR, ELU, RELU6, RELUV2, | BESSELI0E, BESSELI1E, FLOORMOD, ASSIGN, ASSIGN_ADD, ATAN2, DIVNONAN, LOGICALAND, LOGICALOR, ELU, RELU6, RELUV2, | ||||
| SOFTPLUS, SOFTSIGN, GREATEREQUAL, LESSEQUAL, LESS, APPROXIMATEEQUAL, MOD, UNIQUE}; | |||||
| SOFTPLUS, SOFTSIGN, GREATEREQUAL, LESSEQUAL, LESS, APPROXIMATEEQUAL, MOD, UNIQUE, UNSORTED_SEGMENT_SUM, | |||||
| UNSORTED_SEGMENT_MIN}; | |||||
| // clang-format on | // clang-format on | ||||
| auto iter = splittable_op.find(op_name); | auto iter = splittable_op.find(op_name); | ||||
| @@ -740,9 +740,25 @@ void StepReplaceGraph(const ReplaceGraphPtr &replace_graph, const CNodePtr &node | |||||
| if (manager == nullptr) { | if (manager == nullptr) { | ||||
| MS_LOG(EXCEPTION) << "Failure:AddNode error since manager is nullptr"; | MS_LOG(EXCEPTION) << "Failure:AddNode error since manager is nullptr"; | ||||
| } | } | ||||
| // Sovle the input order | |||||
| // For example input_node:{segment_sum:1, segment_sum:2, gahter:2} | |||||
| // The Original code here will bind the all operations to the first inputs of theses operatos | |||||
| // However, the segment_sum operation needs two inputs, To sovle this | |||||
| // We maintain a dict to count the times of the same operations, | |||||
| // and bind the inputs according to the times of the op appears. | |||||
| static std::unordered_map<AnfNodePtr, int> input_map = {}; | |||||
| static int appear_count = 0; | |||||
| for (auto &replace_input : replace_graph->first) { | for (auto &replace_input : replace_graph->first) { | ||||
| auto pre_node = node->input(IntToSize(replace_input.second)); | auto pre_node = node->input(IntToSize(replace_input.second)); | ||||
| manager->SetEdge(replace_input.first, 1, pre_node); | |||||
| auto it = input_map.find(replace_input.first); | |||||
| if (it != input_map.end()) { | |||||
| appear_count = 1 + it->second; | |||||
| } else { | |||||
| appear_count = 1; | |||||
| } | |||||
| input_map[replace_input.first] = appear_count; | |||||
| manager->SetEdge(replace_input.first, appear_count, pre_node); | |||||
| } | } | ||||
| // "(void)manager->Replace(replace_graph->first, pre_node);" can not be called | // "(void)manager->Replace(replace_graph->first, pre_node);" can not be called | ||||
| auto replace_output = replace_graph->second; | auto replace_output = replace_graph->second; | ||||
| @@ -0,0 +1,71 @@ | |||||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| import numpy as np | |||||
| import mindspore as ms | |||||
| import mindspore.nn as nn | |||||
| from mindspore import Tensor | |||||
| from mindspore import context | |||||
| from mindspore.common.api import _executor | |||||
| from mindspore.ops import composite as C | |||||
| import mindspore.ops as P | |||||
| from tests.ut.python.ops.test_math_ops import VirtualLoss | |||||
| grad_all = C.GradOperation(get_all=True) | |||||
| class NetWithLoss(nn.Cell): | |||||
| def __init__(self, network): | |||||
| super(NetWithLoss, self).__init__() | |||||
| self.loss = VirtualLoss() | |||||
| self.network = network | |||||
| def construct(self, vectors, index): | |||||
| predict = self.network(vectors, index) | |||||
| return self.loss(predict) | |||||
| class GradWrap(nn.Cell): | |||||
| def __init__(self, network): | |||||
| super(GradWrap, self).__init__() | |||||
| self.network = network | |||||
| def construct(self, vectors, index): | |||||
| return grad_all(self.network)(vectors, index) | |||||
| def test_auto_parallel_unsortedsegmentmin(): | |||||
| class Net(nn.Cell): | |||||
| def __init__(self, num_segments): | |||||
| super().__init__() | |||||
| self.merge_op = P.UnsortedSegmentMin() | |||||
| self.num_segments = num_segments | |||||
| def construct(self, vectors, index): | |||||
| out = self.merge_op(vectors, index, self.num_segments) | |||||
| return out | |||||
| size = 8 | |||||
| context.set_auto_parallel_context(device_num=size, global_rank=0) | |||||
| context.set_auto_parallel_context(parallel_mode="auto_parallel") | |||||
| x = Tensor(np.random.rand(16, 16, 32, 64), dtype=ms.float32) | |||||
| indices = Tensor(np.random.randint(16, size=(16,)), ms.int32) | |||||
| net = GradWrap(NetWithLoss(Net(16))) | |||||
| net.set_auto_parallel() | |||||
| net.set_train() | |||||
| _executor.compile(net, x, indices) | |||||
| @@ -0,0 +1,71 @@ | |||||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| import numpy as np | |||||
| import mindspore as ms | |||||
| import mindspore.nn as nn | |||||
| from mindspore import Tensor | |||||
| from mindspore import context | |||||
| from mindspore.common.api import _executor | |||||
| from mindspore.ops import composite as C | |||||
| import mindspore.ops as P | |||||
| from tests.ut.python.ops.test_math_ops import VirtualLoss | |||||
| grad_all = C.GradOperation(get_all=True) | |||||
| class NetWithLoss(nn.Cell): | |||||
| def __init__(self, network): | |||||
| super(NetWithLoss, self).__init__() | |||||
| self.loss = VirtualLoss() | |||||
| self.network = network | |||||
| def construct(self, vectors, index): | |||||
| predict = self.network(vectors, index) | |||||
| return self.loss(predict) | |||||
| class GradWrap(nn.Cell): | |||||
| def __init__(self, network): | |||||
| super(GradWrap, self).__init__() | |||||
| self.network = network | |||||
| def construct(self, vectors, index): | |||||
| return grad_all(self.network)(vectors, index) | |||||
| def test_auto_parallel_unsortedsegmentsum(): | |||||
| class Net(nn.Cell): | |||||
| def __init__(self, num_segments): | |||||
| super().__init__() | |||||
| self.merge_op = P.UnsortedSegmentSum() | |||||
| self.num_segments = num_segments | |||||
| def construct(self, vectors, index): | |||||
| out = self.merge_op(vectors, index, self.num_segments) | |||||
| return out | |||||
| size = 8 | |||||
| context.set_auto_parallel_context(device_num=size, global_rank=0) | |||||
| context.set_auto_parallel_context(parallel_mode="auto_parallel") | |||||
| x = Tensor(np.random.rand(16, 16, 32, 64), dtype=ms.float32) | |||||
| indices = Tensor(np.random.randint(16, size=(16, 16))) | |||||
| net = GradWrap(NetWithLoss(Net(16))) | |||||
| net.set_auto_parallel() | |||||
| net.set_train() | |||||
| _executor.compile(net, x, indices) | |||||
| @@ -0,0 +1,161 @@ | |||||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| import numpy as np | |||||
| import mindspore as ms | |||||
| import mindspore.nn as nn | |||||
| from mindspore import Tensor | |||||
| from mindspore import context | |||||
| from mindspore.common.api import _executor | |||||
| from mindspore.ops import composite as C | |||||
| from mindspore.ops import operations as P | |||||
| from mindspore.ops.operations.comm_ops import _VirtualDataset | |||||
| from tests.ut.python.ops.test_math_ops import VirtualLoss | |||||
| context.set_context(mode=context.GRAPH_MODE) | |||||
| grad_all = C.GradOperation(get_all=True) | |||||
| class Net(nn.Cell): | |||||
| def __init__(self, strategy1, strategy2, num_segments): | |||||
| super(Net, self).__init__() | |||||
| self.virtual_dataset = _VirtualDataset() | |||||
| self.merge_op = P.UnsortedSegmentMin().shard((strategy1, strategy2)) | |||||
| self.num_segments = num_segments | |||||
| def construct(self, vectors, segment_ids): | |||||
| predict = self.merge_op(vectors, segment_ids, self.num_segments) | |||||
| return predict | |||||
| class GradWrap(nn.Cell): | |||||
| def __init__(self, network): | |||||
| super(GradWrap, self).__init__() | |||||
| self.network = network | |||||
| def construct(self, x, y): | |||||
| return grad_all(self.network)(x, y) | |||||
| class NetWithLoss(nn.Cell): | |||||
| def __init__(self, network): | |||||
| super(NetWithLoss, self).__init__() | |||||
| self.loss = VirtualLoss() | |||||
| self.network = network | |||||
| def construct(self, x, y): | |||||
| predict = self.network(x, y) | |||||
| return self.loss(predict) | |||||
| def compile_graph(x, y, segments, strategy1, strategy2, auto=False): | |||||
| net = GradWrap(NetWithLoss(Net(strategy1, strategy2, segments))) | |||||
| net.set_auto_parallel() | |||||
| net.set_train() | |||||
| if auto: | |||||
| context.set_auto_parallel_context(parallel_mode="auto_parallel") | |||||
| else: | |||||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||||
| _executor.compile(net, x, y) | |||||
| def test_unsortedsegmentmin_model_parallel_slice_1d(): | |||||
| context.set_auto_parallel_context(device_num=8, global_rank=0) | |||||
| x = Tensor(np.ones(8), ms.float32) | |||||
| y = Tensor(np.ones(8), ms.int32) | |||||
| num_segments = 16 | |||||
| strategy1 = (8,) | |||||
| strategy2 = (8,) | |||||
| compile_graph(x, y, num_segments, strategy1, strategy2) | |||||
| def test_unsortedsegmentmin_model_parallel_no_slice_1d(): | |||||
| context.set_auto_parallel_context(device_num=8, global_rank=0) | |||||
| x = Tensor(np.ones(8), ms.float32) | |||||
| y = Tensor(np.ones(8), ms.int32) | |||||
| num_segments = 16 | |||||
| strategy1 = (1,) | |||||
| strategy2 = (1,) | |||||
| compile_graph(x, y, num_segments, strategy1, strategy2) | |||||
| def test_unsortedsegmentmin_model_parallel_index_slice_2d(): | |||||
| context.set_auto_parallel_context(device_num=4, global_rank=0) | |||||
| x = Tensor(np.ones((4, 8)), ms.float32) | |||||
| y = Tensor(np.arange(4), ms.int32) | |||||
| num_segments = 4 | |||||
| strategy1 = (4, 1) | |||||
| strategy2 = (4,) | |||||
| compile_graph(x, y, num_segments, strategy1, strategy2) | |||||
| def test_unsortedsegmentmin_model_parallel_vector_slice_2d(): | |||||
| context.set_auto_parallel_context(device_num=4, global_rank=0) | |||||
| x = Tensor(np.ones((4, 8)), ms.float32) | |||||
| y = Tensor(np.ones(4), ms.int32) | |||||
| num_segments = 4 | |||||
| strategy1 = (1, 4) | |||||
| strategy2 = (1,) | |||||
| compile_graph(x, y, num_segments, strategy1, strategy2) | |||||
| def test_unsortedsegmentmin_model_parallel_vector_slice_3d(): | |||||
| context.set_auto_parallel_context(device_num=4, global_rank=0) | |||||
| x = Tensor(np.ones((4, 8, 8)), ms.float32) | |||||
| y = Tensor(np.ones(4), ms.int32) | |||||
| num_segments = 4 | |||||
| strategy1 = (1, 2, 2) | |||||
| strategy2 = (1,) | |||||
| compile_graph(x, y, num_segments, strategy1, strategy2) | |||||
| def test_unsortedsegmentmin_model_parallel_index_vector_slice_2d(): | |||||
| context.set_auto_parallel_context(device_num=4, global_rank=0) | |||||
| x = Tensor(np.ones((4, 8)), ms.float32) | |||||
| y = Tensor(np.ones(4), ms.int32) | |||||
| num_segments = 4 | |||||
| strategy1 = (2, 2) | |||||
| strategy2 = (2,) | |||||
| compile_graph(x, y, num_segments, strategy1, strategy2) | |||||
| def test_unsortedsegmentmin_model_parallel_index_vector_slice_3d(): | |||||
| context.set_auto_parallel_context(device_num=4, global_rank=0) | |||||
| x = Tensor(np.ones((4, 4, 8)), ms.float32) | |||||
| y = Tensor(np.ones((4)), ms.int32) | |||||
| num_segments = 16 | |||||
| strategy1 = (2, 1, 2) | |||||
| strategy2 = (2,) | |||||
| compile_graph(x, y, num_segments, strategy1, strategy2) | |||||
| def test_unsortedsegmentmin_model_parallel_float16(): | |||||
| context.set_auto_parallel_context(device_num=4, global_rank=0) | |||||
| x = Tensor(np.ones((4, 4, 8)), ms.float16) | |||||
| y = Tensor(np.ones((4)), ms.int32) | |||||
| num_segments = 16 | |||||
| strategy1 = (2, 1, 2) | |||||
| strategy2 = (2,) | |||||
| compile_graph(x, y, num_segments, strategy1, strategy2) | |||||
| def test_unsortedsegmentmin_model_parallel_int32(): | |||||
| context.set_auto_parallel_context(device_num=4, global_rank=0) | |||||
| x = Tensor(np.ones((4, 4, 8)), ms.int32) | |||||
| y = Tensor(np.ones((4)), ms.int32) | |||||
| num_segments = 16 | |||||
| strategy1 = (2, 1, 2) | |||||
| strategy2 = (2,) | |||||
| compile_graph(x, y, num_segments, strategy1, strategy2) | |||||
| @@ -0,0 +1,153 @@ | |||||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| import numpy as np | |||||
| import mindspore as ms | |||||
| import mindspore.nn as nn | |||||
| from mindspore import Tensor | |||||
| from mindspore import context | |||||
| from mindspore.common.api import _executor | |||||
| from mindspore.ops import composite as C | |||||
| from mindspore.ops import operations as P | |||||
| from mindspore.ops.operations.comm_ops import _VirtualDataset | |||||
| from tests.ut.python.ops.test_math_ops import VirtualLoss | |||||
| context.set_context(mode=context.GRAPH_MODE) | |||||
| grad_all = C.GradOperation(get_all=True) | |||||
| class Net(nn.Cell): | |||||
| def __init__(self, strategy1, strategy2, num_segments): | |||||
| super(Net, self).__init__() | |||||
| self.virtual_dataset = _VirtualDataset() | |||||
| self.merge_op = P.UnsortedSegmentSum().shard((strategy1, strategy2)) | |||||
| self.num_segments = num_segments | |||||
| def construct(self, vectors, segment_ids): | |||||
| predict = self.merge_op(vectors, segment_ids, self.num_segments) | |||||
| return predict | |||||
| class GradWrap(nn.Cell): | |||||
| def __init__(self, network): | |||||
| super(GradWrap, self).__init__() | |||||
| self.network = network | |||||
| def construct(self, x, y): | |||||
| return grad_all(self.network)(x, y) | |||||
| class NetWithLoss(nn.Cell): | |||||
| def __init__(self, network): | |||||
| super(NetWithLoss, self).__init__() | |||||
| self.loss = VirtualLoss() | |||||
| self.network = network | |||||
| def construct(self, x, y): | |||||
| predict = self.network(x, y) | |||||
| return self.loss(predict) | |||||
| def compile_graph(x, y, segments, strategy1, strategy2, auto=False): | |||||
| net = GradWrap(NetWithLoss(Net(strategy1, strategy2, segments))) | |||||
| net.set_auto_parallel() | |||||
| net.set_train() | |||||
| if auto: | |||||
| context.set_auto_parallel_context(parallel_mode="auto_parallel") | |||||
| else: | |||||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") | |||||
| _executor.compile(net, x, y) | |||||
| def test_unsortedsegmentsum_model_parallel_slice_1d(): | |||||
| context.set_auto_parallel_context(device_num=8, global_rank=0) | |||||
| x = Tensor(np.ones(8), ms.float32) | |||||
| y = Tensor(np.ones(8), ms.int32) | |||||
| num_segments = 16 | |||||
| strategy1 = (8,) | |||||
| strategy2 = (8,) | |||||
| compile_graph(x, y, num_segments, strategy1, strategy2) | |||||
| def test_unsortedsegmentsum_model_parallel_no_slice_1d(): | |||||
| context.set_auto_parallel_context(device_num=8, global_rank=0) | |||||
| x = Tensor(np.ones(8), ms.float32) | |||||
| y = Tensor(np.ones(8), ms.int32) | |||||
| num_segments = 16 | |||||
| strategy1 = (1,) | |||||
| strategy2 = (1,) | |||||
| compile_graph(x, y, num_segments, strategy1, strategy2) | |||||
| def test_unsortedsegmentsum_model_parallel_index_slice_2d(): | |||||
| context.set_auto_parallel_context(device_num=4, global_rank=0) | |||||
| x = Tensor(np.ones((4, 8)), ms.float32) | |||||
| y = Tensor(np.arange(4), ms.int32) | |||||
| num_segments = 4 | |||||
| strategy1 = (4, 1) | |||||
| strategy2 = (4,) | |||||
| compile_graph(x, y, num_segments, strategy1, strategy2) | |||||
| def test_unsortedsegmentsum_model_parallel_index_slice_3d(): | |||||
| context.set_auto_parallel_context(device_num=4, global_rank=0) | |||||
| x = Tensor(np.ones((4, 4, 8)), ms.float32) | |||||
| y = Tensor(np.ones((4, 4)), ms.int32) | |||||
| num_segments = 16 | |||||
| strategy1 = (2, 2, 1) | |||||
| strategy2 = (2, 2) | |||||
| compile_graph(x, y, num_segments, strategy1, strategy2) | |||||
| def test_unsortedsegmentsum_model_parallel_vector_slice_2d(): | |||||
| context.set_auto_parallel_context(device_num=4, global_rank=0) | |||||
| x = Tensor(np.ones((4, 8)), ms.float32) | |||||
| y = Tensor(np.ones(4), ms.int32) | |||||
| num_segments = 4 | |||||
| strategy1 = (1, 4) | |||||
| strategy2 = (1,) | |||||
| compile_graph(x, y, num_segments, strategy1, strategy2) | |||||
| def test_unsortedsegmentsum_model_parallel_vector_slice_3d(): | |||||
| context.set_auto_parallel_context(device_num=4, global_rank=0) | |||||
| x = Tensor(np.ones((4, 8, 8)), ms.float32) | |||||
| y = Tensor(np.ones(4), ms.int32) | |||||
| num_segments = 4 | |||||
| strategy1 = (1, 2, 2) | |||||
| strategy2 = (1,) | |||||
| compile_graph(x, y, num_segments, strategy1, strategy2) | |||||
| def test_unsortedsegmentsum_model_parallel_index_vector_slice_2d(): | |||||
| context.set_auto_parallel_context(device_num=4, global_rank=0) | |||||
| x = Tensor(np.ones((4, 8)), ms.float32) | |||||
| y = Tensor(np.ones(4), ms.int32) | |||||
| num_segments = 4 | |||||
| strategy1 = (2, 2) | |||||
| strategy2 = (2,) | |||||
| compile_graph(x, y, num_segments, strategy1, strategy2) | |||||
| def test_unsortedsegmentsum_model_parallel_index_vector_slice_3d(): | |||||
| context.set_auto_parallel_context(device_num=4, global_rank=0) | |||||
| x = Tensor(np.ones((4, 4, 8)), ms.float32) | |||||
| y = Tensor(np.ones((4, 4)), ms.int32) | |||||
| num_segments = 16 | |||||
| strategy1 = (2, 1, 2) | |||||
| strategy2 = (2, 1) | |||||
| compile_graph(x, y, num_segments, strategy1, strategy2) | |||||