Merge pull request !30589 from hanhuifeng/dcn_dyn_ops_2_newfeature/build-system-rewrite
| @@ -56,5 +56,22 @@ MS_REG_GPU_KERNEL_ONE(ReduceProd, KernelAttr().AddInputAttr(kNumberTypeFloat32). | |||
| ArrayReduceGpuKernelMod, float) | |||
| MS_REG_GPU_KERNEL_ONE(ReduceProd, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64), | |||
| ArrayReduceGpuKernelMod, double) | |||
| // dynamic | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| ReduceSum, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat64), | |||
| ArrayReduceGpuKernelMod, double, int64_t) | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| ReduceSum, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat32), | |||
| ArrayReduceGpuKernelMod, float, int64_t) | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| ReduceSum, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat16), | |||
| ArrayReduceGpuKernelMod, half, int64_t) | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| ReduceSum, KernelAttr().AddInputAttr(kNumberTypeBool).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeBool), | |||
| ArrayReduceGpuKernelMod, bool, int64_t) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -32,7 +32,7 @@ const std::map<std::string, cudnnReduceTensorOp_t> kReduceTypeMap = { | |||
| {"ReduceAny", CUDNN_REDUCE_TENSOR_MAX}, {"ReduceAll", CUDNN_REDUCE_TENSOR_MUL}, | |||
| {"ReduceProd", CUDNN_REDUCE_TENSOR_MUL}, | |||
| }; | |||
| template <typename T> | |||
| template <typename T, typename S = int64_t> | |||
| class ArrayReduceGpuKernelMod : public NativeGpuKernelMod { | |||
| public: | |||
| ArrayReduceGpuKernelMod() { ResetResource(); } | |||
| @@ -43,6 +43,9 @@ class ArrayReduceGpuKernelMod : public NativeGpuKernelMod { | |||
| if (is_null_input_) { | |||
| return true; | |||
| } | |||
| if (is_dynamic_axis_ && !get_dynamic_axis_value_) { | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', fail to get value of the axis when axis is dynamic!"; | |||
| } | |||
| T *input_addr = GetDeviceAddress<T>(inputs, 0); | |||
| T *output_addr = GetDeviceAddress<T>(outputs, 0); | |||
| T *workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 0); | |||
| @@ -87,8 +90,13 @@ class ArrayReduceGpuKernelMod : public NativeGpuKernelMod { | |||
| } | |||
| data_type_ = GetCudnnDataType(type_name); | |||
| size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 1) { | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num; | |||
| constexpr size_t kDynamicAxisInputNum = 2; | |||
| if (input_num != 1 && input_num != kDynamicAxisInputNum) { | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1 or " << kDynamicAxisInputNum | |||
| << ", but got " << input_num; | |||
| } | |||
| if (input_num == kDynamicAxisInputNum) { | |||
| is_dynamic_axis_ = true; | |||
| } | |||
| size_t output_num = common::AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| @@ -96,29 +104,28 @@ class ArrayReduceGpuKernelMod : public NativeGpuKernelMod { | |||
| } | |||
| int input_dim_length = SizeToInt(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0).size()); | |||
| auto prim = common::AnfAlgo::GetCNodePrimitive(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(prim); | |||
| if (prim->GetAttr("axis")->isa<ValueTuple>() || prim->GetAttr("axis")->isa<ValueList>()) { | |||
| std::vector<int> attr_axis; | |||
| std::vector<int64_t> attr_axis_me = GetAttr<std::vector<int64_t>>(kernel_node, "axis"); | |||
| (void)std::transform(attr_axis_me.begin(), attr_axis_me.end(), std::back_inserter(attr_axis), | |||
| [](const int64_t &value) { return static_cast<int>(value); }); | |||
| if (attr_axis.empty()) { | |||
| axis_.push_back(-1); | |||
| } else { | |||
| for (auto axis : attr_axis) { | |||
| axis < 0 ? axis_.push_back(axis + input_dim_length) : axis_.push_back(axis); | |||
| } | |||
| std::sort(axis_.begin(), axis_.end()); | |||
| auto multiple_pos = std::unique(axis_.begin(), axis_.end()); | |||
| axis_.erase(multiple_pos, axis_.end()); | |||
| std::vector<int64_t> attr_axis; | |||
| if (is_dynamic_axis_) { | |||
| get_dynamic_axis_value_ = GetDynamicAttrIntValue(kernel_node, kAxisIndex_, &attr_axis); | |||
| if (!get_dynamic_axis_value_) { | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| } else if (prim->GetAttr("axis")->isa<Int64Imm>()) { | |||
| int axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis")); | |||
| axis < 0 ? axis_.push_back(axis + input_dim_length) : axis_.push_back(axis); | |||
| dynamic_axis_size_ = attr_axis.size(); | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', attribute 'axis' type is invalid."; | |||
| attr_axis = GetAxisValue(kernel_node); | |||
| } | |||
| if (attr_axis.empty()) { | |||
| axis_.push_back(-1); | |||
| } else { | |||
| for (auto axis : attr_axis) { | |||
| axis < 0 ? axis_.push_back(axis + input_dim_length) : axis_.push_back(axis); | |||
| } | |||
| std::sort(axis_.begin(), axis_.end()); | |||
| auto multiple_pos = std::unique(axis_.begin(), axis_.end()); | |||
| axis_.erase(multiple_pos, axis_.end()); | |||
| } | |||
| keep_dims_ = GetAttr<bool>(kernel_node, "keep_dims"); | |||
| auto inputA_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); | |||
| @@ -156,6 +163,9 @@ class ArrayReduceGpuKernelMod : public NativeGpuKernelMod { | |||
| input_size_list_.clear(); | |||
| output_size_list_.clear(); | |||
| workspace_size_list_.clear(); | |||
| dynamic_axis_size_ = 0; | |||
| is_dynamic_axis_ = false; | |||
| get_dynamic_axis_value_ = false; | |||
| } | |||
| void DestroyResource() noexcept override { | |||
| @@ -182,6 +192,10 @@ class ArrayReduceGpuKernelMod : public NativeGpuKernelMod { | |||
| "cudnnGetTensorSizeInBytes failed."); | |||
| input_size_list_.push_back(input_size_); | |||
| if (is_dynamic_axis_) { | |||
| input_size_list_.push_back(dynamic_axis_size_ * sizeof(S)); | |||
| } | |||
| CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnGetTensorSizeInBytes(outputC_descriptor_, &output_size_), | |||
| "cudnnGetTensorSizeInBytes failed."); | |||
| output_size_list_.push_back(output_size_); | |||
| @@ -279,6 +293,21 @@ class ArrayReduceGpuKernelMod : public NativeGpuKernelMod { | |||
| return; | |||
| } | |||
| std::vector<int64_t> GetAxisValue(const CNodePtr &kernel_node) { | |||
| auto prim = common::AnfAlgo::GetCNodePrimitive(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(prim); | |||
| std::vector<int64_t> attr_axis_me; | |||
| if (prim->GetAttr("axis")->isa<ValueTuple>() || prim->GetAttr("axis")->isa<ValueList>()) { | |||
| attr_axis_me = GetAttr<std::vector<int64_t>>(kernel_node, "axis"); | |||
| } else if (prim->GetAttr("axis")->isa<Int64Imm>()) { | |||
| auto axis = GetAttr<int64_t>(kernel_node, "axis"); | |||
| attr_axis_me.push_back(axis); | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', attribute 'axis' type is invalid."; | |||
| } | |||
| return attr_axis_me; | |||
| } | |||
| cudnnHandle_t cudnn_handle_; | |||
| cudnnReduceTensorOp_t reduce_tensor_op_; | |||
| cudnnDataType_t data_type_; | |||
| @@ -295,7 +324,10 @@ class ArrayReduceGpuKernelMod : public NativeGpuKernelMod { | |||
| size_t input_size_; | |||
| size_t output_size_; | |||
| size_t workspace_size_; | |||
| std::string kernel_name_; | |||
| size_t dynamic_axis_size_; | |||
| bool is_dynamic_axis_; | |||
| bool get_dynamic_axis_value_; | |||
| static constexpr size_t kAxisIndex_{1}; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -33,18 +33,18 @@ namespace kernel { | |||
| REG_SLICE_GPU_DTYPES(REG_SLICE_GPU) | |||
| #define REG_DYNAMIC_SLICE_GPU_ATTR(T0_MS_DTYPE, T0_DTYPE, T1_MS_DTYPE) \ | |||
| MS_REG_GPU_KERNEL_ONE(Slice, \ | |||
| KernelAttr() \ | |||
| .AddInputAttr(T0_MS_DTYPE) \ | |||
| .AddInputAttr(T1_MS_DTYPE) \ | |||
| .AddInputAttr(T1_MS_DTYPE) \ | |||
| .AddOutputAttr(T0_MS_DTYPE), \ | |||
| SliceFwdGpuKernelMod, T0_DTYPE) | |||
| #define REG_DYNAMIC_SLICE_GPU_ATTR(T0_MS_DTYPE, T0_DTYPE, T1_MS_DTYPE, T1_DTYPE) \ | |||
| MS_REG_GPU_KERNEL_TWO(Slice, \ | |||
| KernelAttr() \ | |||
| .AddInputAttr(T0_MS_DTYPE) \ | |||
| .AddInputAttr(T1_MS_DTYPE) \ | |||
| .AddInputAttr(T1_MS_DTYPE) \ | |||
| .AddOutputAttr(T0_MS_DTYPE), \ | |||
| SliceFwdGpuKernelMod, T0_DTYPE, T1_DTYPE) | |||
| #define REG_DYNAMIC_SLICE_GPU(MS_DTYPE, DTYPE) \ | |||
| REG_DYNAMIC_SLICE_GPU_ATTR(MS_DTYPE, DTYPE, kNumberTypeInt32) \ | |||
| REG_DYNAMIC_SLICE_GPU_ATTR(MS_DTYPE, DTYPE, kNumberTypeInt64) | |||
| #define REG_DYNAMIC_SLICE_GPU(MS_DTYPE, DTYPE) \ | |||
| REG_DYNAMIC_SLICE_GPU_ATTR(MS_DTYPE, DTYPE, kNumberTypeInt32, int32_t) \ | |||
| REG_DYNAMIC_SLICE_GPU_ATTR(MS_DTYPE, DTYPE, kNumberTypeInt64, int64_t) | |||
| REG_SLICE_GPU_DTYPES(REG_DYNAMIC_SLICE_GPU) | |||
| } // namespace kernel | |||
| @@ -40,7 +40,7 @@ constexpr auto kIdx4 = 4; | |||
| constexpr auto kIdx5 = 5; | |||
| constexpr auto kIdx6 = 6; | |||
| template <typename T> | |||
| template <typename T, typename S = int64_t> | |||
| class SliceFwdGpuKernelMod : public NativeGpuKernelMod { | |||
| public: | |||
| SliceFwdGpuKernelMod() { kernel_name_ = "Slice"; } | |||
| @@ -106,7 +106,10 @@ class SliceFwdGpuKernelMod : public NativeGpuKernelMod { | |||
| kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| (void)CheckParam(kernel_node); | |||
| if (is_dynamic_attr_ && !get_dynamic_attr_value_) { | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); | |||
| auto out_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0); | |||
| is_null_input_ = | |||
| @@ -118,27 +121,15 @@ class SliceFwdGpuKernelMod : public NativeGpuKernelMod { | |||
| (void)std::transform(input_shape.begin(), input_shape.end(), std::back_inserter(input_shape_), | |||
| [](const int64_t &e) { return static_cast<int32_t>(e); }); | |||
| size_t input_size = sizeof(T); | |||
| input_size_ = sizeof(T); | |||
| for (size_t x : input_shape) { | |||
| input_size *= x; | |||
| } | |||
| input_size_list_.push_back(input_size); | |||
| if (is_dynamic_attr_) { | |||
| std::vector<size_t> dynamic_attr_indexs = {kBeginIndex_, kSizeIndex_}; | |||
| for (size_t index : dynamic_attr_indexs) { | |||
| input_size = sizeof(T); | |||
| for (size_t x : AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, index)) { | |||
| input_size *= x; | |||
| } | |||
| input_size_list_.push_back(input_size); | |||
| } | |||
| input_size_ *= x; | |||
| } | |||
| size_t output_size = sizeof(T); | |||
| output_size_ = sizeof(T); | |||
| for (size_t x : out_shape) { | |||
| output_size *= x; | |||
| output_size_ *= x; | |||
| } | |||
| output_size_list_.push_back(output_size); | |||
| // transpose begin and size for NHWC data | |||
| auto data_format = AnfAlgo::GetInputFormat(kernel_node, 0); | |||
| @@ -166,6 +157,8 @@ class SliceFwdGpuKernelMod : public NativeGpuKernelMod { | |||
| begin_.clear(); | |||
| size_.clear(); | |||
| input_shape_.clear(); | |||
| input_size_ = 0; | |||
| output_size_ = 0; | |||
| is_null_input_ = false; | |||
| kernel_name_ = "Slice"; | |||
| is_dynamic_attr_ = false; | |||
| @@ -173,7 +166,29 @@ class SliceFwdGpuKernelMod : public NativeGpuKernelMod { | |||
| } | |||
| protected: | |||
| void InitSizeLists() override {} | |||
| void InitSizeLists() override { | |||
| input_size_list_.push_back(input_size_); | |||
| output_size_list_.push_back(output_size_); | |||
| if (is_dynamic_attr_) { | |||
| InitDynamicAttrSizeLists(kernel_node_.lock()); | |||
| } | |||
| } | |||
| inline void InitDynamicAttrSizeLists(const CNodePtr &kernel_node) { | |||
| if (get_dynamic_attr_value_) { | |||
| std::vector<size_t> dynamic_attr_indexs = {kBeginIndex_, kSizeIndex_}; | |||
| for (size_t index : dynamic_attr_indexs) { | |||
| size_t input_size = sizeof(S); | |||
| for (size_t x : AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, index)) { | |||
| input_size *= x; | |||
| } | |||
| input_size_list_.push_back(input_size); | |||
| } | |||
| } else { | |||
| input_size_list_.push_back(0); | |||
| input_size_list_.push_back(0); | |||
| } | |||
| } | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node) { | |||
| @@ -205,13 +220,13 @@ class SliceFwdGpuKernelMod : public NativeGpuKernelMod { | |||
| size = GetAttr<std::vector<int64_t>>(kernel_node, "size"); | |||
| begin = GetAttr<std::vector<int64_t>>(kernel_node, "begin"); | |||
| } else { | |||
| // The value of dynamic attr can only be obtained after the InferShape() of dynamic kernel is executed | |||
| if (depend_tensor_map_.empty()) { | |||
| if (GetDynamicAttrIntValue(kernel_node, kBeginIndex_, &begin) && | |||
| GetDynamicAttrIntValue(kernel_node, kSizeIndex_, &size)) { | |||
| get_dynamic_attr_value_ = true; | |||
| } | |||
| if (!get_dynamic_attr_value_) { | |||
| return; | |||
| } | |||
| begin = GetDynamicAttrIntValue(kernel_node, kBeginIndex_); | |||
| size = GetDynamicAttrIntValue(kernel_node, kSizeIndex_); | |||
| get_dynamic_attr_value_ = true; | |||
| } | |||
| if (size.size() != input_shape.size() || begin.size() != input_shape.size()) { | |||
| @@ -220,9 +235,8 @@ class SliceFwdGpuKernelMod : public NativeGpuKernelMod { | |||
| << "of size: " << size.size() << ", the dimension of begin: " << begin.size() | |||
| << ", the dimension of input_x: " << input_shape.size(); | |||
| } | |||
| const int64_t NEG_ONE = -1; | |||
| for (size_t i = 0; i < input_shape.size(); i++) { | |||
| if (size[i] == NEG_ONE) { | |||
| if (size[i] == -1) { | |||
| size[i] = input_shape[i] - begin[i]; | |||
| } | |||
| if (input_shape[i] <= 0 || size[i] <= 0) { | |||
| @@ -245,6 +259,8 @@ class SliceFwdGpuKernelMod : public NativeGpuKernelMod { | |||
| std::vector<int32_t> size_; | |||
| std::vector<int32_t> input_shape_; | |||
| size_t input_size_{0}; | |||
| size_t output_size_{0}; | |||
| bool is_null_input_{false}; | |||
| bool is_dynamic_attr_{false}; | |||
| bool get_dynamic_attr_value_{false}; | |||
| @@ -346,9 +346,12 @@ class NativeGpuKernelMod : public GpuKernelMod { | |||
| return std::equal(s1.begin(), s1.end(), s2_trans.begin(), s2_trans.end()); | |||
| } | |||
| inline std::vector<int64_t> GetDynamicAttrIntValue(const CNodePtr &kernel_node, const size_t input_index) { | |||
| inline bool GetDynamicAttrIntValue(const CNodePtr &kernel_node, const size_t input_index, | |||
| std::vector<int64_t> *attr_value) { | |||
| // The value of dynamic attr can only be obtained after the InferShape() is executed | |||
| if (depend_tensor_map_.empty()) { | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the depend_tensor_map is empty!"; | |||
| MS_LOG(DEBUG) << "For '" << kernel_name_ << "', the depend_tensor_map is currently empty"; | |||
| return false; | |||
| } | |||
| auto depend_iter = depend_tensor_map_.find(input_index); | |||
| if (depend_iter == depend_tensor_map_.end()) { | |||
| @@ -366,7 +369,8 @@ class NativeGpuKernelMod : public GpuKernelMod { | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the format of the " << input_index | |||
| << "th input currently should be the default format and does not support " << data_format; | |||
| } | |||
| return GetTensorIntValue(input_tensor, input_index); | |||
| *attr_value = GetTensorIntValue(input_tensor, input_index); | |||
| return true; | |||
| } | |||
| }; | |||
| } // namespace kernel | |||
| @@ -33,30 +33,7 @@ constexpr size_t NO_CUDNN_BATCHNORM_OPS_BN_INPUT_NUM = 8; | |||
| template <typename T> | |||
| class BatchNormGradGpuKernelMod : public NativeGpuKernelMod { | |||
| public: | |||
| BatchNormGradGpuKernelMod() | |||
| : batch_(0), | |||
| channel_(0), | |||
| height_(0), | |||
| width_(0), | |||
| x_size_(0), | |||
| para_size_(0), | |||
| workspace_size_(0), | |||
| reserve_size_(0), | |||
| mode_(CUDNN_BATCHNORM_SPATIAL), | |||
| bn_ops_(CUDNN_BATCHNORM_OPS_BN), | |||
| epsilon_(10e-5), | |||
| is_train_(false), | |||
| is_null_input_(false), | |||
| x_desc_(nullptr), | |||
| y_desc_(nullptr), | |||
| dy_desc_(nullptr), | |||
| dx_desc_(nullptr), | |||
| dz_desc_(nullptr), | |||
| scale_bias_diff_desc_(nullptr), | |||
| activation_desc_(nullptr), | |||
| handle_(nullptr), | |||
| cudnn_data_type_(CUDNN_DATA_FLOAT), | |||
| beta_data_diff_(0) {} | |||
| BatchNormGradGpuKernelMod() { ResetResource(); } | |||
| ~BatchNormGradGpuKernelMod() override { DestroyResource(); } | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| @@ -171,6 +148,33 @@ class BatchNormGradGpuKernelMod : public NativeGpuKernelMod { | |||
| return true; | |||
| } | |||
| void ResetResource() noexcept override { | |||
| ResetSizeLists(); | |||
| batch_ = 0; | |||
| channel_ = 0; | |||
| height_ = 0; | |||
| width_ = 0; | |||
| x_size_ = 0; | |||
| para_size_ = 0; | |||
| workspace_size_ = 0; | |||
| reserve_size_ = 0; | |||
| mode_ = CUDNN_BATCHNORM_SPATIAL; | |||
| bn_ops_ = CUDNN_BATCHNORM_OPS_BN; | |||
| epsilon_ = 10e-5; | |||
| is_train_ = false; | |||
| is_null_input_ = false; | |||
| x_desc_ = nullptr; | |||
| y_desc_ = nullptr; | |||
| dy_desc_ = nullptr; | |||
| dx_desc_ = nullptr; | |||
| dz_desc_ = nullptr; | |||
| scale_bias_diff_desc_ = nullptr; | |||
| activation_desc_ = nullptr; | |||
| handle_ = nullptr; | |||
| cudnn_data_type_ = CUDNN_DATA_FLOAT; | |||
| beta_data_diff_ = 0; | |||
| } | |||
| protected: | |||
| void InitResource() override { | |||
| handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCudnnHandle(); | |||
| @@ -18,6 +18,7 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| // float64 | |||
| MS_REG_GPU_KERNEL_ONE(Flatten, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64), | |||
| FlattenFwdGpuKernelMod, double) | |||
| @@ -25,6 +26,7 @@ MS_REG_GPU_KERNEL_ONE(Reshape, KernelAttr().AddInputAttr(kNumberTypeFloat64).Add | |||
| FlattenFwdGpuKernelMod, double) | |||
| MS_REG_GPU_KERNEL_ONE(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64), | |||
| FlattenFwdGpuKernelMod, double) | |||
| // float32 | |||
| MS_REG_GPU_KERNEL_ONE(Flatten, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| FlattenFwdGpuKernelMod, float) | |||
| @@ -32,6 +34,7 @@ MS_REG_GPU_KERNEL_ONE(Reshape, KernelAttr().AddInputAttr(kNumberTypeFloat32).Add | |||
| FlattenFwdGpuKernelMod, float) | |||
| MS_REG_GPU_KERNEL_ONE(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| FlattenFwdGpuKernelMod, float) | |||
| // float16 | |||
| MS_REG_GPU_KERNEL_ONE(Flatten, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), | |||
| FlattenFwdGpuKernelMod, half) | |||
| @@ -39,6 +42,7 @@ MS_REG_GPU_KERNEL_ONE(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeFloat16). | |||
| FlattenFwdGpuKernelMod, half) | |||
| MS_REG_GPU_KERNEL_ONE(Reshape, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), | |||
| FlattenFwdGpuKernelMod, half) | |||
| // int64 | |||
| MS_REG_GPU_KERNEL_ONE(Flatten, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), | |||
| FlattenFwdGpuKernelMod, int64_t) | |||
| @@ -46,13 +50,15 @@ MS_REG_GPU_KERNEL_ONE(Reshape, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOu | |||
| FlattenFwdGpuKernelMod, int64_t) | |||
| MS_REG_GPU_KERNEL_ONE(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), | |||
| FlattenFwdGpuKernelMod, int64_t) | |||
| // int32 | |||
| MS_REG_GPU_KERNEL_ONE(Flatten, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), | |||
| FlattenFwdGpuKernelMod, int) | |||
| FlattenFwdGpuKernelMod, int32_t) | |||
| MS_REG_GPU_KERNEL_ONE(Reshape, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), | |||
| FlattenFwdGpuKernelMod, int) | |||
| FlattenFwdGpuKernelMod, int32_t) | |||
| MS_REG_GPU_KERNEL_ONE(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), | |||
| FlattenFwdGpuKernelMod, int) | |||
| FlattenFwdGpuKernelMod, int32_t) | |||
| // int16 | |||
| MS_REG_GPU_KERNEL_ONE(Flatten, KernelAttr().AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16), | |||
| FlattenFwdGpuKernelMod, int16_t) | |||
| @@ -107,5 +113,71 @@ MS_REG_GPU_KERNEL_ONE(Reshape, KernelAttr().AddInputAttr(kNumberTypeBool).AddOut | |||
| FlattenFwdGpuKernelMod, bool) | |||
| MS_REG_GPU_KERNEL_ONE(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), | |||
| FlattenFwdGpuKernelMod, bool) | |||
| } // namespace | |||
| // dynamic kernel: | |||
| namespace { | |||
| // float64 | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| Reshape, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat64), | |||
| FlattenFwdGpuKernelMod, double, int64_t) | |||
| // float32 | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| Reshape, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat32), | |||
| FlattenFwdGpuKernelMod, float, int64_t) | |||
| // float16 | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| Reshape, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat16), | |||
| FlattenFwdGpuKernelMod, half, int64_t) | |||
| // int64 | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| Reshape, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), | |||
| FlattenFwdGpuKernelMod, int64_t, int64_t) | |||
| // int32 | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| Reshape, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt32), | |||
| FlattenFwdGpuKernelMod, int32_t, int64_t) | |||
| // int16 | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| Reshape, KernelAttr().AddInputAttr(kNumberTypeInt16).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt16), | |||
| FlattenFwdGpuKernelMod, int16_t, int64_t) | |||
| // int8 | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| Reshape, KernelAttr().AddInputAttr(kNumberTypeInt8).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt8), | |||
| FlattenFwdGpuKernelMod, char, int64_t) | |||
| // uint64 | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| Reshape, KernelAttr().AddInputAttr(kNumberTypeUInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeUInt64), | |||
| FlattenFwdGpuKernelMod, uint64_t, int64_t) | |||
| // uint32 | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| Reshape, KernelAttr().AddInputAttr(kNumberTypeUInt32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeUInt32), | |||
| FlattenFwdGpuKernelMod, uint, int64_t) | |||
| // uint16 | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| Reshape, KernelAttr().AddInputAttr(kNumberTypeUInt16).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeUInt16), | |||
| FlattenFwdGpuKernelMod, uint16_t, int64_t) | |||
| // uint8 | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| Reshape, KernelAttr().AddInputAttr(kNumberTypeUInt8).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeUInt8), | |||
| FlattenFwdGpuKernelMod, uchar, int64_t) | |||
| // bool | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| Reshape, KernelAttr().AddInputAttr(kNumberTypeBool).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeBool), | |||
| FlattenFwdGpuKernelMod, bool, int64_t) | |||
| } // namespace | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -25,7 +25,7 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T> | |||
| template <typename T, typename S = int64_t> | |||
| class FlattenFwdGpuKernelMod : public NativeGpuKernelMod { | |||
| public: | |||
| FlattenFwdGpuKernelMod() : input_size_(0), is_null_input_(false) {} | |||
| @@ -48,6 +48,7 @@ class FlattenFwdGpuKernelMod : public NativeGpuKernelMod { | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); | |||
| kernel_node_ = kernel_node; | |||
| auto shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); | |||
| kernel_node_ = kernel_node; | |||
| is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "input"); | |||
| @@ -75,6 +76,21 @@ class FlattenFwdGpuKernelMod : public NativeGpuKernelMod { | |||
| void InitSizeLists() override { | |||
| input_size_list_.push_back(input_size_); | |||
| output_size_list_.push_back(input_size_); | |||
| InitDynamicAttrSizeLists(kernel_node_.lock()); | |||
| } | |||
| inline void InitDynamicAttrSizeLists(const CNodePtr &kernel_node) { | |||
| size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num == 1) { | |||
| return; | |||
| } | |||
| for (size_t index = 1; index < input_num; ++index) { | |||
| size_t input_size = sizeof(S); | |||
| for (size_t x : AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, index)) { | |||
| input_size *= x; | |||
| } | |||
| input_size_list_.push_back(input_size); | |||
| } | |||
| } | |||
| private: | |||
| @@ -49,6 +49,8 @@ class DynamicBroadcastGradientArgsGpuKernelMod : public NativeGpuKernelMod { | |||
| CHECK_CUDA_RET_WITH_EXCEPT( | |||
| kernel_node_, cudaMemcpyAsync(&x1_value[0], s1_addr, input_size_list_[1], cudaMemcpyDeviceToHost, cuda_stream), | |||
| "DynamicBroadcastGradientArgs copy s1 value failed"); | |||
| CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaStreamSynchronize(cuda_stream), | |||
| "DynamicBroadcastGradientArgs cudaStreamSynchronized failed"); | |||
| auto grad_reduce_idx = CalOut({x0_value, x1_value}); | |||
| r0_size_ = SetOuputValue(r0_addr, grad_reduce_idx[0], x0_value.size(), cuda_stream); | |||
| r1_size_ = SetOuputValue(r1_addr, grad_reduce_idx[1], x1_value.size(), cuda_stream); | |||
| @@ -84,7 +84,8 @@ std::set<int64_t> GetDependsFormMap(const CNodePtr &cnode) { | |||
| {kReshape, ShapeSet{1}}, | |||
| {kSlice, ShapeSet{1, 2}}, | |||
| {kSliceGrad, ShapeSet{2, 3}}, | |||
| {kDynamicBroadcastTo, ShapeSet{1}}}; | |||
| {kDynamicBroadcastTo, ShapeSet{1}}, | |||
| {kReduceSum, ShapeSet{1}}}; | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| if (cnode->inputs().empty()) { | |||
| @@ -98,7 +99,7 @@ std::set<int64_t> GetDependsFormMap(const CNodePtr &cnode) { | |||
| MS_EXCEPTION_IF_NULL(ms_context); | |||
| auto device = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET); | |||
| // Special dynamic shape depends for Ascend. | |||
| if (device == kAscendDevice && (prim_name == kReduceSum || prim_name == kTranspose)) { | |||
| if (device == kAscendDevice && prim_name == kTranspose) { | |||
| return {1}; | |||
| } | |||
| @@ -45,27 +45,34 @@ bool BatchNormGrad::get_is_training() const { | |||
| return GetValue<bool>(value_ptr); | |||
| } | |||
| AbstractBasePtr BatchNormGradInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive, | |||
| const std::vector<AbstractBasePtr> &input_args) { | |||
| constexpr auto kInputNum = 6; | |||
| abstract::TupleShapePtr BatchNormGradInferShape(const PrimitivePtr &primitive, | |||
| const std::vector<AbstractBasePtr> &input_args) { | |||
| MS_EXCEPTION_IF_NULL(primitive); | |||
| for (auto item : input_args) { | |||
| MS_EXCEPTION_IF_NULL(item); | |||
| } | |||
| const int64_t input_num = 5; | |||
| (void)CheckAndConvertUtils::CheckInteger("BatchNormGrad infer", SizeToLong(input_args.size()), kGreaterEqual, | |||
| input_num, primitive->name()); | |||
| auto y_backprop_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape]; | |||
| auto x_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[1]->BuildShape())[kShape]; | |||
| CheckAndConvertUtils::Check("BatchNorm y_backprop_shape", y_backprop_shape, kEqual, x_shape); | |||
| auto prim_name = primitive->name(); | |||
| (void)CheckAndConvertUtils::CheckInputArgs(input_args, kGreaterEqual, kInputNum, prim_name); | |||
| auto x_shape_ptr = input_args[kInputIndex1]->BuildShape(); | |||
| auto scale_shape_ptr = input_args[kInputIndex2]->BuildShape(); | |||
| return std::make_shared<abstract::TupleShape>( | |||
| std::vector<abstract::BaseShapePtr>{x_shape_ptr, scale_shape_ptr, scale_shape_ptr}); | |||
| } | |||
| auto dx = input_args[kInputIndex1]->Broaden(); | |||
| auto dscale = input_args[kInputIndex2]->Broaden(); | |||
| auto reserve_1 = input_args[kInputIndex3]->Broaden(); | |||
| auto reserve_2 = input_args[kInputIndex4]->Broaden(); | |||
| TuplePtr BatchNormGradInferType(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) { | |||
| MS_EXCEPTION_IF_NULL(primitive); | |||
| auto prim_name = primitive->name(); | |||
| (void)CheckAndConvertUtils::CheckInputArgs(input_args, kGreaterEqual, kInputNum, prim_name); | |||
| auto x_type_ptr = input_args[kInputIndex1]->BuildType(); | |||
| auto scale_type_ptr = input_args[kInputIndex2]->BuildType(); | |||
| return std::make_shared<Tuple>(std::vector<TypePtr>{x_type_ptr, scale_type_ptr, scale_type_ptr}); | |||
| } | |||
| AbstractBasePtrList rets = {dx, dscale, dscale, reserve_1, reserve_2}; | |||
| return std::make_shared<abstract::AbstractTuple>(rets); | |||
| AbstractBasePtr BatchNormGradInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive, | |||
| const std::vector<AbstractBasePtr> &input_args) { | |||
| MS_EXCEPTION_IF_NULL(primitive); | |||
| return abstract::MakeAbstract(BatchNormGradInferShape(primitive, input_args), | |||
| BatchNormGradInferType(primitive, input_args)); | |||
| } | |||
| REGISTER_PRIMITIVE_C(kNameBatchNormGrad, BatchNormGrad); | |||
| REGISTER_PRIMITIVE_EVAL_IMPL(BatchNormGrad, prim::kPrimBatchNormGrad, BatchNormGradInfer, nullptr, true); | |||
| } // namespace ops | |||
| } // namespace mindspore | |||
| @@ -118,7 +118,7 @@ class SqrtGrad(PrimitiveWithInfer): | |||
| return x_dtype | |||
| class BatchNormGrad(PrimitiveWithInfer): | |||
| class BatchNormGrad(Primitive): | |||
| """Performs grad of BatchNorm operation.""" | |||
| @prim_attr_register | |||
| @@ -127,13 +127,6 @@ class BatchNormGrad(PrimitiveWithInfer): | |||
| self.epsilon = validator.check_float_range(epsilon, 0, 1, Rel.INC_RIGHT, 'epsilon', self.name) | |||
| self.data_format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.name) | |||
| def infer_shape(self, y_backprop_shape, x_shape, scale_shape, save_mean_shape, save_variance_shape, reserve): | |||
| validator.check("BatchNorm y_backprop_shape", y_backprop_shape, "BatchNorm x_shape", x_shape) | |||
| return (x_shape, scale_shape, scale_shape) | |||
| def infer_dtype(self, y_backprop_type, x_type, scale_type, save_mean_shape, save_variance_shape, reserve): | |||
| return (x_type, scale_type, scale_type) | |||
| class SyncBatchNormGrad(PrimitiveWithInfer): | |||
| """Performs grad of SyncBatchNorm operation.""" | |||
| @@ -0,0 +1,52 @@ | |||
| # Copyright 2022 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore.nn as nn | |||
| import mindspore.context as context | |||
| from mindspore.ops.operations import _inner_ops | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="GPU") | |||
| class Net(nn.Cell): | |||
| def __init__(self): | |||
| super(Net, self).__init__() | |||
| self.args = _inner_ops.DynamicBroadcastGradientArgs() | |||
| def construct(self, s0, s1): | |||
| return self.args(s0, s1) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_gpu_training | |||
| @pytest.mark.env_onecard | |||
| def test_net(): | |||
| """ | |||
| Feature: DynamicBroadcastGradientArgs op. | |||
| Description: test cases for DynamicBroadcastGradientArgs op. | |||
| Expectation: the result match expected array. | |||
| """ | |||
| shape0 = (4, 2, 1) | |||
| shape1 = (2, 7) | |||
| net = Net() | |||
| r0, r1 = net(shape0, shape1) | |||
| r0_expected = [2] | |||
| r1_expected = [0] | |||
| assert np.array_equal(r0_expected, r0.asnumpy()) | |||
| assert np.array_equal(r1_expected, r1.asnumpy()) | |||
| @@ -102,31 +102,159 @@ class ConcatNet(nn.Cell): | |||
| return self.op((x1, x2)) | |||
| def dynamic_concat_run(is_grad): | |||
| axis = 1 | |||
| dtype = np.float32 | |||
| data_list = [] | |||
| for i in [2, 64]: | |||
| data = [] | |||
| data.append(np.random.rand(i, 16).astype(dtype)) | |||
| data.append(np.random.rand(i, 32).astype(dtype)) | |||
| if is_grad: | |||
| data.append(np.random.rand(i, 48).astype(dtype)) | |||
| data_list.append(tuple(data)) | |||
| column_names = get_columns(len(data_list[0])) | |||
| dataset = ds.GeneratorDataset(data_list, column_names, shuffle=False) | |||
| dynamic_columns = {column_names[0]: [ | |||
| None, 16], column_names[1]: [None, 32]} | |||
| if is_grad: | |||
| dynamic_columns[column_names[-1]] = [None, 48] | |||
| dataset.set_dynamic_columns(columns=dynamic_columns) | |||
| net = ConcatNet(axis) | |||
| if is_grad: | |||
| net = GradNetWrtX(net) | |||
| output = dynamic_shape_sink_process(net, dataset) | |||
| output_cmp = fixed_shape_process(net, dataset) | |||
| assert compare(output, output_cmp) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_gpu_training | |||
| @pytest.mark.env_onecard | |||
| def test_dynamic_concat(): | |||
| def test_dynamic_concat_forward(): | |||
| """ | |||
| Feature: Test Concat and its backward. | |||
| Feature: Test Concat. | |||
| Description: The shape of inputs is dynamic. | |||
| Expectation: Assert that results are consistent with fixed shape. | |||
| """ | |||
| axis = 1 | |||
| dynamic_concat_run(False) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_gpu_training | |||
| @pytest.mark.env_onecard | |||
| def test_dynamic_concat_backward(): | |||
| """ | |||
| Feature: Test backward of Concat. | |||
| Description: The shape of inputs is dynamic. | |||
| Expectation: Assert that results are consistent with fixed shape. | |||
| """ | |||
| dynamic_concat_run(True) | |||
| class BatchNormNet(nn.Cell): | |||
| def __init__(self, c): | |||
| super(BatchNormNet, self).__init__() | |||
| self.bn = nn.BatchNorm1d(c) | |||
| def construct(self, input_data): | |||
| x = self.bn(input_data) | |||
| return x | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_gpu_training | |||
| @pytest.mark.env_onecard | |||
| def test_dynamic_bachnorm(): | |||
| """ | |||
| Feature: Test BatchNorm and its backward. | |||
| Description: The shape of inputs is dynamic. | |||
| Expectation: Assert that results are consistent with fixed shape. | |||
| """ | |||
| c = 256 | |||
| dtype = np.float32 | |||
| data_list = [] | |||
| for i in [2, 64]: | |||
| data = [] | |||
| data.append(np.random.rand(i, 16).astype(dtype)) | |||
| data.append(np.random.rand(i, 32).astype(dtype)) | |||
| data.append(np.random.rand(i, 48).astype(dtype)) | |||
| data.append(np.random.rand(i, c).astype(dtype)) | |||
| data.append(np.random.rand(i, c).astype(dtype)) | |||
| data_list.append(tuple(data)) | |||
| column_names = get_columns(len(data_list[0])) | |||
| dataset = ds.GeneratorDataset(data_list, column_names, shuffle=False) | |||
| dataset.set_dynamic_columns( | |||
| columns={column_names[0]: [None, 16], column_names[1]: [None, 32], column_names[2]: [None, 48]}) | |||
| net = GradNetWrtX(ConcatNet(axis)) | |||
| dynamic_columns = {column_names[0]: [None, c], column_names[1]: [None, c]} | |||
| dataset.set_dynamic_columns(columns=dynamic_columns) | |||
| net = GradNetWrtX(BatchNormNet(c)) | |||
| gradients = dynamic_shape_sink_process(net, dataset) | |||
| gradients_cmp = fixed_shape_process(net, dataset) | |||
| assert compare(gradients, gradients_cmp) | |||
| class ReshapeNet(nn.Cell): | |||
| def construct(self, x, y): | |||
| shape_of_y = ops.DynamicShape()(y) | |||
| return ops.Reshape()(x, shape_of_y) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_gpu_training | |||
| @pytest.mark.env_onecard | |||
| def test_dynamic_reshape(): | |||
| """ | |||
| Feature: Test Reshape. | |||
| Description: The shape of inputs is dynamic. | |||
| Expectation: Assert that results are consistent with fixed shape. | |||
| """ | |||
| dtype = np.float32 | |||
| data_list = [] | |||
| for i in [2, 96]: | |||
| data = [] | |||
| data.append(np.random.rand(i, 64, 1).astype(dtype)) | |||
| data.append(np.random.rand(i, 64).astype(dtype)) | |||
| data_list.append(tuple(data)) | |||
| column_names = get_columns(len(data_list[0])) | |||
| dataset = ds.GeneratorDataset(data_list, column_names, shuffle=False) | |||
| dynamic_columns = {column_names[0]: [ | |||
| None, 64, 1], column_names[1]: [None, 64]} | |||
| dataset.set_dynamic_columns(columns=dynamic_columns) | |||
| net = ReshapeNet() | |||
| output = dynamic_shape_sink_process(net, dataset) | |||
| output_cmp = fixed_shape_process(net, dataset) | |||
| assert compare(output, output_cmp) | |||
| class ReduceSumNet(nn.Cell): | |||
| def __init__(self): | |||
| super(ReduceSumNet, self).__init__() | |||
| self.reduce = ops.ReduceSum() | |||
| def construct(self, x, y): | |||
| return self.reduce(x, y) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_gpu_training | |||
| @pytest.mark.env_onecard | |||
| def test_dynamic_reduce_sum(): | |||
| """ | |||
| Feature: Test ReduceSum. | |||
| Description: The shape of inputs is dynamic. | |||
| Expectation: Assert that results are consistent with result of the numpy compute | |||
| """ | |||
| dtype = np.float32 | |||
| data_list = [] | |||
| for i in [2, 96]: | |||
| data = [] | |||
| data.append(np.random.rand(i, 256).astype(dtype)) | |||
| data.append(np.array([1], dtype=np.int64)) | |||
| data_list.append(tuple(data)) | |||
| column_names = get_columns(len(data_list[0])) | |||
| dataset = ds.GeneratorDataset(data_list, column_names, shuffle=False) | |||
| dynamic_columns = {column_names[0]: [None, 256]} | |||
| dataset.set_dynamic_columns(columns=dynamic_columns) | |||
| net = ReduceSumNet() | |||
| output = dynamic_shape_sink_process(net, dataset) | |||
| # Currently, the parameter axis of ReduceSum operator is dynamic(tensor) is | |||
| # not supported under the fixed shape, so numpy is used for comparison | |||
| inputs = data_list[0] | |||
| output_cmp = np.sum(inputs[0], inputs[1][0]) | |||
| assert np.allclose(output.asnumpy(), output_cmp, rtol=1.0e-4, atol=1.0e-4) | |||