| @@ -67,7 +67,7 @@ void ArgMaxWithValueCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| num_after_axis_ *= shape_[i]; | |||
| } | |||
| } | |||
| dim_axis_ = shape_[IntToSize(axis)]; | |||
| dim_axis_ = shape_[LongToSize(axis)]; | |||
| } | |||
| template <typename T> | |||
| @@ -52,11 +52,11 @@ bool CheckValidCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &input | |||
| const size_t right_x = i * 4 + 2; | |||
| const size_t right_y = i * 4 + 3; | |||
| size_t valid_flag = 0; | |||
| valid_flag |= !IntToSize(anchor_box[left_x] >= ZERO); | |||
| valid_flag |= !IntToSize(anchor_box[left_y] >= ZERO); | |||
| valid_flag |= !IntToSize(img_metas[OFFSET_ONE] * img_metas[OFFSET_TWO] - ONE >= anchor_box[right_x]); | |||
| valid_flag |= !IntToSize(img_metas[OFFSET_ZERO] * img_metas[OFFSET_TWO] - ONE >= anchor_box[right_y]); | |||
| uint32_t valid_flag = 0; | |||
| valid_flag |= !static_cast<uint32_t>(anchor_box[left_x] >= ZERO); | |||
| valid_flag |= !static_cast<uint32_t>(anchor_box[left_y] >= ZERO); | |||
| valid_flag |= !static_cast<uint32_t>(img_metas[OFFSET_ONE] * img_metas[OFFSET_TWO] - ONE >= anchor_box[right_x]); | |||
| valid_flag |= !static_cast<uint32_t>(img_metas[OFFSET_ZERO] * img_metas[OFFSET_TWO] - ONE >= anchor_box[right_y]); | |||
| output[i] = !static_cast<bool>(valid_flag); | |||
| } | |||
| @@ -37,7 +37,7 @@ void BatchNormGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| std::vector<size_t> x_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| if (x_shape.size() == NC) { | |||
| (void)x_shape.insert(x_shape.end(), 2, 1); | |||
| (void)x_shape.insert(x_shape.end(), (NCHW - NC), 1); | |||
| } else if (x_shape.size() != NCHW) { | |||
| MS_LOG(EXCEPTION) << "Fused batchnorm support nc or nchw input!"; | |||
| } | |||
| @@ -20,16 +20,16 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| size_t NmsRoundUpPower2(int v) { | |||
| constexpr size_t ONE = 1, TWO = 2, FOUR = 4, EIGHT = 8, SIXTEEN = 16; | |||
| constexpr uint32_t ONE = 1, TWO = 2, FOUR = 4, EIGHT = 8, SIXTEEN = 16; | |||
| v--; | |||
| v = IntToSize(v); | |||
| v |= v >> ONE; | |||
| v |= v >> TWO; | |||
| v |= v >> FOUR; | |||
| v |= v >> EIGHT; | |||
| v |= v >> SIXTEEN; | |||
| v++; | |||
| return v; | |||
| size_t value = IntToSize(v); | |||
| value |= value >> ONE; | |||
| value |= value >> TWO; | |||
| value |= value >> FOUR; | |||
| value |= value >> EIGHT; | |||
| value |= value >> SIXTEEN; | |||
| value++; | |||
| return value; | |||
| } | |||
| template <typename T> | |||
| @@ -90,7 +90,7 @@ void NMSWithMaskCPUKernel<T>::MaskInit(size_t numSq, bool *row_mask) { | |||
| // copy data from input to output array sorted by indices returned from bitonic sort | |||
| // flips boxes if asked to, default - false -> if (x1/y1 > x2/y2) | |||
| template <typename T> | |||
| void NMSWithMaskCPUKernel<T>::PopulateOutput(T *data_in, T *data_out, const int *index_buff, const int num, | |||
| void NMSWithMaskCPUKernel<T>::PopulateOutput(const T *data_in, T *data_out, const int *index_buff, const int num, | |||
| int box_size, bool flip_mode) { | |||
| auto task = [this, &index_buff, &data_in, &data_out, flip_mode, num, box_size](int start, int end) { | |||
| for (int box_num = start; box_num < end; box_num++) { | |||
| @@ -139,10 +139,10 @@ void NMSWithMaskCPUKernel<T>::Preprocess(const int num, int *sel_idx, bool *sel_ | |||
| template <typename T> | |||
| bool NMSWithMaskCPUKernel<T>::IouDecision(const T *output, int box_A_start, int box_B_start, float IOU_value) { | |||
| constexpr size_t X1_OFFSET = 0; | |||
| constexpr size_t Y1_OFFSET = 1; | |||
| constexpr size_t X2_OFFSET = 2; | |||
| constexpr size_t Y2_OFFSET = 3; | |||
| constexpr int X1_OFFSET = 0; | |||
| constexpr int Y1_OFFSET = 1; | |||
| constexpr int X2_OFFSET = 2; | |||
| constexpr int Y2_OFFSET = 3; | |||
| T x_1 = std::max(output[box_A_start + X1_OFFSET], output[box_B_start + X1_OFFSET]); | |||
| T y_1 = std::max(output[box_A_start + Y1_OFFSET], output[box_B_start + Y1_OFFSET]); | |||
| T x_2 = std::min(output[box_A_start + X2_OFFSET], output[box_B_start + X2_OFFSET]); | |||
| @@ -23,11 +23,11 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| constexpr size_t X0 = 0; | |||
| constexpr size_t Y0 = 1; | |||
| constexpr size_t X1 = 2; | |||
| constexpr size_t Y1 = 3; | |||
| constexpr size_t SCORE = 4; | |||
| constexpr int X0 = 0; | |||
| constexpr int Y0 = 1; | |||
| constexpr int X1 = 2; | |||
| constexpr int Y1 = 3; | |||
| constexpr int SCORE = 4; | |||
| constexpr size_t INPUT_NUM = 1; | |||
| constexpr size_t OUTPUT_NUM = 3; | |||
| template <typename T> | |||
| @@ -49,7 +49,8 @@ class NMSWithMaskCPUKernel : public CPUKernel { | |||
| void MaskInit(size_t numSq, bool *row_mask); | |||
| void PopulateOutput(T *data_in, T *data_out, const int *index_buff, const int num, int box_size, bool flip_mode); | |||
| void PopulateOutput(const T *data_in, T *data_out, const int *index_buff, const int num, int box_size, | |||
| bool flip_mode); | |||
| void Preprocess(const int num, int *sel_idx, bool *sel_boxes); | |||
| @@ -108,29 +108,36 @@ void RandomChoiceWithMaskCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| dims_.emplace_back(input_i_shape[j]); | |||
| } | |||
| } | |||
| input_dim_size = SizeToInt(dims_.size()); | |||
| if (input_dim_size < 1 || input_dim_size > MAX_INPUT_DIMS) { | |||
| MS_LOG(EXCEPTION) << "Input dim size is " << input_dim_size << ", which is not supported."; | |||
| } | |||
| } | |||
| void RandomChoiceWithMaskCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { | |||
| CPUKernel::InitInputOutputSize(kernel_node); | |||
| GetInputTotalCount(dims_, &input_total_count, input_dim_size); | |||
| size_t temp_output_length = count_ > 0 ? count_ : input_total_count; | |||
| workspace_size_list_.push_back(IntToSize(input_total_count) * sizeof(int)); | |||
| workspace_size_list_.push_back(temp_output_length * sizeof(int)); | |||
| workspace_size_list_.push_back(temp_output_length * sizeof(int)); | |||
| workspace_size_list_.push_back(temp_output_length * IntToSize(input_dim_size) * sizeof(int)); | |||
| } | |||
| bool RandomChoiceWithMaskCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &workspace, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| auto *input = reinterpret_cast<bool *>(inputs[0]->addr); | |||
| auto *input_dim = reinterpret_cast<int *>(workspace[0]->addr); | |||
| auto *tmp_output = reinterpret_cast<int *>(workspace[1]->addr); | |||
| auto *mask_dim = reinterpret_cast<int *>(workspace[2]->addr); | |||
| auto *output = reinterpret_cast<int *>(workspace[3]->addr); | |||
| auto *output_coordinate = reinterpret_cast<int32_t *>(outputs[0]->addr); | |||
| auto *mask = reinterpret_cast<bool *>(outputs[1]->addr); | |||
| int32_t input_dim_size = SizeToInt(dims_.size()); | |||
| int32_t non_zero_num = 0; | |||
| int32_t input_total_count = 1; | |||
| if (input_dim_size < 1 || input_dim_size > MAX_INPUT_DIMS) { | |||
| MS_LOG(EXCEPTION) << "Input dim size is " << input_dim_size << ", which is not supported."; | |||
| } | |||
| int seedc = seed2_ != 0 ? seed2_ : (seed_ != 0 ? seed_ : SizeToInt(generator_())); | |||
| GetInputTotalCount(dims_, &input_total_count, input_dim_size); | |||
| int *input_dim = new (std::nothrow) int[input_total_count]; | |||
| if (input_dim == nullptr) { | |||
| MS_LOG(EXCEPTION) << "Malloc memory failed!"; | |||
| return false; | |||
| } | |||
| for (int32_t i = 0; i < input_total_count; i++) { | |||
| if (input[i] != 0) { | |||
| input_dim[non_zero_num] = i; | |||
| @@ -139,20 +146,6 @@ bool RandomChoiceWithMaskCPUKernel::Launch(const std::vector<kernel::AddressPtr> | |||
| } | |||
| GetOutputLength(&padding_flag, &output_length, &output_non_zero_length, count_, non_zero_num); | |||
| int *tmp_output = new (std::nothrow) int[output_length]; | |||
| if (tmp_output == nullptr) { | |||
| MS_LOG(EXCEPTION) << "Malloc memory failed!"; | |||
| delete[] input_dim; | |||
| return false; | |||
| } | |||
| int *mask_dim = new (std::nothrow) int[output_length]; | |||
| if (mask_dim == nullptr) { | |||
| MS_LOG(EXCEPTION) << "Malloc memory failed!"; | |||
| delete[] input_dim; | |||
| delete[] tmp_output; | |||
| return false; | |||
| } | |||
| (void)memset_s(mask_dim, IntToSize(output_length), 0X00, IntToSize(output_length)); | |||
| (void)memset_s(tmp_output, IntToSize(output_length), 0X00, IntToSize(output_length)); | |||
| @@ -177,47 +170,27 @@ bool RandomChoiceWithMaskCPUKernel::Launch(const std::vector<kernel::AddressPtr> | |||
| int32_t copy_output_length = 0; | |||
| if (output_length * input_dim_size >= INT_MAX || output_length * input_dim_size < 0) { | |||
| MS_LOG(EXCEPTION) << "Output size exceed INT_MAX"; | |||
| delete[] input_dim; | |||
| delete[] tmp_output; | |||
| delete[] mask_dim; | |||
| return false; | |||
| } | |||
| copy_output_length = output_length * input_dim_size; | |||
| int *output = new (std::nothrow) int[copy_output_length]; | |||
| if (output == nullptr) { | |||
| MS_LOG(EXCEPTION) << "Malloc memory failed!"; | |||
| delete[] input_dim; | |||
| delete[] tmp_output; | |||
| delete[] mask_dim; | |||
| return false; | |||
| } | |||
| (void)memset_s(output, IntToSize(copy_output_length), 0X00, IntToSize(copy_output_length)); | |||
| ParseOutputCoordinate(dims_, output_length, input_dim_size, input_total_count, tmp_output, output); | |||
| int32_t actual_output_length = SizeToInt(count_ * dims_.size()); | |||
| int32_t actual_output_length = count_ * SizeToInt(dims_.size()); | |||
| copy_output_length = std::min(actual_output_length, copy_output_length); | |||
| int32_t copy_output_bytes = 0; | |||
| if (INT_MAX / static_cast<int>(sizeof(int32_t)) < copy_output_length) { | |||
| MS_LOG(EXCEPTION) << "The output length is out of range!"; | |||
| delete[] input_dim; | |||
| delete[] mask_dim; | |||
| delete[] tmp_output; | |||
| delete[] output; | |||
| return false; | |||
| } | |||
| copy_output_bytes = copy_output_length * SizeToInt(sizeof(int32_t)); | |||
| auto ret = memcpy_s(output_coordinate, outputs[0]->size, output, IntToSize(copy_output_bytes)); | |||
| size_t copy_output_bytes = IntToSize(copy_output_length) * sizeof(int32_t); | |||
| auto ret = memcpy_s(output_coordinate, outputs[0]->size, output, copy_output_bytes); | |||
| if (ret != EOK) { | |||
| MS_LOG(INFO) << "memcpy_s failed, ret = " << ret; | |||
| return false; | |||
| } | |||
| UpdateOutput(dims_, non_zero_num, count_, output_length, mask_dim, output_coordinate, mask); | |||
| delete[] input_dim; | |||
| delete[] mask_dim; | |||
| delete[] tmp_output; | |||
| delete[] output; | |||
| return true; | |||
| } | |||
| @@ -34,10 +34,15 @@ class RandomChoiceWithMaskCPUKernel : public CPUKernel { | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| void InitInputOutputSize(const CNodePtr &kernel_node) override; | |||
| private: | |||
| int32_t input_dim_size = 0; | |||
| int32_t non_zero_num = 0; | |||
| int32_t input_total_count = 1; | |||
| bool padding_flag = false; | |||
| int32_t output_length = 0; | |||
| int32_t output_non_zero_length = 0; | |||
| @@ -58,12 +58,12 @@ bool ROIAlignCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const T *rois = reinterpret_cast<T *>(inputs[1]->addr); | |||
| auto out_data = reinterpret_cast<T *>(outputs[0]->addr); | |||
| size_t elem_num = IntToSize(roi_rows_ * channels_) * pooled_height_ * pooled_width_; | |||
| size_t elem_num = IntToSize(roi_rows_ * channels_ * pooled_height_ * pooled_width_); | |||
| auto task = [this, &input, &rois, &out_data](size_t start, size_t end) { | |||
| const T OFFSET = T(0.001); | |||
| const T ZERO = T(0.0); | |||
| for (size_t thread_idx = start; thread_idx < end; thread_idx++) { | |||
| int n = SizeToInt(thread_idx / pooled_width_ / pooled_height_) / channels_; | |||
| int n = SizeToInt(thread_idx) / pooled_width_ / pooled_height_ / channels_; | |||
| const T *roi_box = rois + n * roi_cols_; | |||
| if (roi_box[1] < OFFSET && roi_box[3] < OFFSET && roi_box[1] > -OFFSET && roi_box[3] > -OFFSET) { | |||
| continue; | |||
| @@ -21,7 +21,7 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| constexpr size_t ROIS_COLS = 5; | |||
| constexpr int ROIS_COLS = 5; | |||
| constexpr size_t X_DIMS = 4; | |||
| constexpr int CHANNEL = 1; | |||
| constexpr int HEIGHT = 2; | |||
| @@ -132,9 +132,9 @@ bool ROIAlignGradCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inp | |||
| int c, ph, pw, roi_bin_grid_h, roi_bin_grid_w; | |||
| T bin_size_h, bin_size_w, roi_start_h, roi_start_w; | |||
| bin_box(thread_idx, rois, roi_cols_, spatial_scale_, sample_num_, roi_end_mode_, channels_, height_, width_, | |||
| pooled_height_, pooled_width_, &offset, &n, &c, &ph, &pw, &roi_bin_grid_h, &roi_bin_grid_w, &bin_size_h, | |||
| &bin_size_w, &roi_start_h, &roi_start_w); | |||
| bin_box(SizeToInt(thread_idx), rois, roi_cols_, spatial_scale_, sample_num_, roi_end_mode_, channels_, height_, | |||
| width_, pooled_height_, pooled_width_, &offset, &n, &c, &ph, &pw, &roi_bin_grid_h, &roi_bin_grid_w, | |||
| &bin_size_h, &bin_size_w, &roi_start_h, &roi_start_w); | |||
| // (n, c, ph, pw) is the base param of pooled map | |||
| const T count_points_in_grid_cell = static_cast<T>(roi_bin_grid_h) * static_cast<T>(roi_bin_grid_w); | |||
| @@ -22,7 +22,7 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| constexpr size_t ROIS_COLS = 5; | |||
| constexpr int ROIS_COLS = 5; | |||
| constexpr size_t DY_DIMS = 4; | |||
| constexpr int BATCH = 0; | |||
| constexpr int CHANNEL = 1; | |||
| @@ -86,7 +86,7 @@ void ScatterNdCPUKernel<S, T>::InitKernel(const CNodePtr &kernel_node) { | |||
| int out_stride = 1; | |||
| out_strides_.push_back(out_stride); | |||
| for (int i = indices_unit_rank_ - TWO; i >= 0; i--) { | |||
| out_stride *= SizeToInt(shape[i + 1]); | |||
| out_stride *= SizeToInt(shape[IntToSize(i + 1)]); | |||
| out_strides_.push_back(out_stride); | |||
| } | |||
| reverse(out_strides_.begin(), out_strides_.end()); | |||