From: @fuzhiye Reviewed-by: @zhang_xue_tong,@hangangqiang Signed-off-by: @zhang_xue_tongtags/v1.1.0
| @@ -20,6 +20,7 @@ | |||
| typedef struct SpaceToBatchParameter { | |||
| OpParameter op_parameter_; | |||
| bool need_paddings_; | |||
| int m_; | |||
| int block_sizes_[4]; | |||
| int paddings_[4]; | |||
| int input_shape_[4]; | |||
| @@ -46,47 +46,40 @@ void DoSpaceToBatchNHWCInt8(const int8_t *input, int8_t *output, const int *bloc | |||
| } | |||
| } | |||
| void DoSpaceToBatchPaddingNHWCInt8(const int8_t *input, int8_t *output, const int *in_shape, const int *padding, | |||
| const int *out_shape, int32_t zp) { | |||
| void DoSpaceToBatchPaddingNHWCInt8(const int8_t *input, int8_t *output, SpaceToBatchParameter *param, int32_t zp) { | |||
| int *in_shape = param->input_shape_; | |||
| int *out_shape = param->output_shape_; | |||
| int *paddings = param->paddings_; | |||
| int block_shape_h = param->block_sizes_[0]; | |||
| int block_shape_w = param->m_ == 2 ? param->block_sizes_[1] : 1; | |||
| int in_b = in_shape[0]; | |||
| int in_h = in_shape[1]; | |||
| int in_w = in_shape[2]; | |||
| int in_c = in_shape[3]; | |||
| int channel = in_shape[3]; | |||
| int out_h = out_shape[1]; | |||
| int out_w = out_shape[2]; | |||
| int out_c = out_shape[3]; | |||
| size_t ped_h_num = out_w * out_c; | |||
| size_t ped_h_size = ped_h_num * sizeof(int8_t); | |||
| size_t ped_w_size = out_c * sizeof(int8_t); | |||
| size_t out_offset = 0; | |||
| int in_strides[4]; | |||
| ComputeStrides(in_shape, in_strides, 4); | |||
| int out_strides[4]; | |||
| ComputeStrides(out_shape, out_strides, 4); | |||
| size_t copy_size = in_c * sizeof(int8_t); | |||
| for (int i = 0; i < in_shape[0]; ++i) { | |||
| size_t in_offset0 = i * in_strides[0]; | |||
| for (int pad_h_top = 0; pad_h_top < padding[0]; ++pad_h_top) { | |||
| memset(output + out_offset, zp, ped_h_size); | |||
| out_offset += ped_h_num; | |||
| } | |||
| for (int j = 0; j < in_h; ++j) { | |||
| size_t in_offset1 = in_offset0 + j * in_strides[1]; | |||
| for (int pad_w_left = 0; pad_w_left < padding[2]; ++pad_w_left) { | |||
| memset(output + out_offset, zp, ped_w_size); | |||
| out_offset += out_c; | |||
| } | |||
| for (int k = 0; k < in_w; ++k) { | |||
| size_t in_offset2 = in_offset1 + k * in_strides[2]; | |||
| memcpy(output + out_offset, input + in_offset2, copy_size); | |||
| out_offset += in_c; | |||
| int pad_t = paddings[0]; | |||
| int pad_l = param->m_ == 2 ? paddings[2] : 0; | |||
| for (int i = 0; i < out_shape[0]; ++i) { | |||
| int in_batch = i % in_b; | |||
| int offset_w = (i / in_b) % block_shape_w; | |||
| int offset_h = (i / in_b) / block_shape_w; | |||
| int in_b_offset = in_batch * in_h * in_w * channel; | |||
| int out_b_offset = i * out_h * out_w * channel; | |||
| for (int j = 0; j < out_h; ++j) { | |||
| int out_h_offset = out_b_offset + j * out_w * channel; | |||
| for (int k = 0; k < out_w; ++k) { | |||
| int8_t *out_ptr = output + out_h_offset + k * channel; | |||
| int index_h = j * block_shape_h + offset_h; | |||
| int index_w = k * block_shape_w + offset_w; | |||
| if (index_h < pad_t || index_h >= (pad_t + in_h) || index_w < pad_l || index_w >= (pad_l + in_w)) { | |||
| memset(out_ptr, zp, channel * sizeof(int8_t)); | |||
| } else { | |||
| int in_plane_offset = in_b_offset + ((index_h - pad_t) * in_w + (index_w - pad_l)) * channel; | |||
| const int8_t *in_ptr = input + in_plane_offset; | |||
| memcpy(out_ptr, in_ptr, channel * sizeof(int8_t)); | |||
| } | |||
| } | |||
| for (int pad_w_right = 0; pad_w_right < padding[3]; ++pad_w_right) { | |||
| memset(output + out_offset, zp, ped_w_size); | |||
| out_offset += out_c; | |||
| } | |||
| } | |||
| for (int pad_h_bottom = 0; pad_h_bottom < padding[1]; ++pad_h_bottom) { | |||
| memset(output + out_offset, zp, ped_h_size); | |||
| out_offset += ped_h_num; | |||
| } | |||
| } | |||
| } | |||
| @@ -17,14 +17,14 @@ | |||
| #define MINDSPORE_LITE_NNACL_INT8_SPACE_TO_BATCH_INT8_H_ | |||
| #include "nnacl/op_base.h" | |||
| #include "nnacl/fp32/space_to_batch.h" | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void DoSpaceToBatchNHWCInt8(const int8_t *input, int8_t *output, const int *block_sizes, const int *in_shape, | |||
| const int *out_shape); | |||
| void DoSpaceToBatchPaddingNHWCInt8(const int8_t *input, int8_t *output, const int *in_shape, const int *padding, | |||
| const int *out_shape, int32_t zp); | |||
| void DoSpaceToBatchPaddingNHWCInt8(const int8_t *input, int8_t *output, SpaceToBatchParameter *param, int32_t zp); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -31,6 +31,7 @@ OpParameter *PopulateSpaceToBatchNDParameter(const mindspore::lite::PrimitiveC * | |||
| space_batch_param_nd->op_parameter_.type_ = primitive->Type(); | |||
| auto block_sizes = ((mindspore::lite::SpaceToBatchND *)primitive)->GetBlockShape(); | |||
| space_batch_param_nd->m_ = block_sizes.size(); | |||
| memcpy(space_batch_param_nd->block_sizes_, (block_sizes.data()), block_sizes.size() * sizeof(int)); | |||
| auto paddings = ((mindspore::lite::SpaceToBatchND *)primitive)->GetPaddings(); | |||
| memcpy(space_batch_param_nd->paddings_, (paddings.data()), paddings.size() * sizeof(int)); | |||
| @@ -33,6 +33,7 @@ OpParameter *PopulateSpaceToBatchParameter(const mindspore::lite::PrimitiveC *pr | |||
| memset(space_batch_param, 0, sizeof(SpaceToBatchParameter)); | |||
| space_batch_param->op_parameter_.type_ = primitive->Type(); | |||
| auto block_sizes = ((mindspore::lite::SpaceToBatch *)primitive)->BlockSizes(); | |||
| space_batch_param->m_ = block_sizes.size(); | |||
| memcpy(space_batch_param->block_sizes_, (block_sizes.data()), block_sizes.size() * sizeof(int)); | |||
| auto paddings = ((mindspore::lite::SpaceToBatch *)primitive)->Paddings(); | |||
| memcpy(space_batch_param->paddings_, (paddings.data()), paddings.size() * sizeof(int)); | |||
| @@ -80,8 +80,6 @@ Registry SpaceToBatchRegistry(schema::PrimitiveType_SpaceToBatch, SpaceToBatchCr | |||
| namespace { | |||
| constexpr int kSpaceToBatchNDOutputNum = 1; | |||
| constexpr int kSpaceToBatchNDInputNum = 1; | |||
| constexpr int kBlockSizesSize = 2; | |||
| constexpr int kPaddingsSize = 4; | |||
| } // namespace | |||
| int SpaceToBatch::InferShape(std::vector<lite::Tensor *> inputs, std::vector<lite::Tensor *> outputs) { | |||
| @@ -103,20 +101,13 @@ int SpaceToBatch::InferShape(std::vector<lite::Tensor *> inputs, std::vector<lit | |||
| } | |||
| auto input_shape = input->shape(); | |||
| if (input_shape.size() != kDimension_4d) { | |||
| MS_LOG(ERROR) << "input shape dimension size should == " << kDimension_4d; | |||
| return 1; | |||
| } | |||
| if (GetBlockShape().size() != kBlockSizesSize) { | |||
| MS_LOG(ERROR) << "Block shape size should be " << kBlockSizesSize; | |||
| return 1; | |||
| } | |||
| if (GetPaddings().size() != kPaddingsSize) { | |||
| MS_LOG(ERROR) << "Crops size should be " << kPaddingsSize; | |||
| return 1; | |||
| MS_LOG(ERROR) << "Space_to_batch op only support 4D input currently. But got %d dimensionality input." | |||
| << kDimension_4d; | |||
| return RET_ERROR; | |||
| } | |||
| for (int &iter : GetBlockShape()) { | |||
| auto block_shape_vector = GetBlockShape(); | |||
| for (int &iter : block_shape_vector) { | |||
| block_sizes_.emplace_back(iter); | |||
| } | |||
| @@ -125,7 +116,8 @@ int SpaceToBatch::InferShape(std::vector<lite::Tensor *> inputs, std::vector<lit | |||
| paddings_.clear(); | |||
| in_shape_.emplace_back(input_shape.at(NHWC_N)); | |||
| padded_in_shape_.emplace_back(input_shape.at(NHWC_N)); | |||
| for (int i = 0; i < kBlockSizesSize; i++) { | |||
| auto block_shape_size = block_shape_vector.size(); | |||
| for (size_t i = 0; i < block_shape_size; i++) { | |||
| in_shape_.emplace_back(input_shape.at(i + 1)); | |||
| padded_in_shape_.emplace_back(input_shape.at(i + 1) + (paddings_.at(2 * i) + paddings_.at(2 * i + 1))); | |||
| paddings_.emplace_back(paddings_.at(2 * i)); | |||
| @@ -137,11 +129,19 @@ int SpaceToBatch::InferShape(std::vector<lite::Tensor *> inputs, std::vector<lit | |||
| } | |||
| in_shape_.emplace_back(input_shape.at(NHWC_C)); | |||
| padded_in_shape_.emplace_back(input_shape.at(NHWC_C)); | |||
| int padding_left = 0; | |||
| int padding_right = 0; | |||
| int block_w = 1; | |||
| if (block_shape_size == 2) { | |||
| padding_left = paddings_[2]; | |||
| padding_right = paddings_[3]; | |||
| block_w = block_sizes_[1]; | |||
| } | |||
| std::vector<int32_t> output_shape(input_shape.size()); | |||
| output_shape[NHWC_N] = input_shape[NHWC_N] * (block_sizes_[NHWC_N] * block_sizes_[NHWC_H]); | |||
| output_shape[NHWC_H] = (input_shape[NHWC_H] + paddings_[0] + paddings_[1]) / block_sizes_[NHWC_N]; | |||
| output_shape[NHWC_W] = (input_shape[NHWC_W] + paddings_[2] + paddings_[3]) / block_sizes_[NHWC_H]; | |||
| output_shape[NHWC_N] = input_shape[NHWC_N] * (block_sizes_[0] * block_w); | |||
| output_shape[NHWC_H] = (input_shape[NHWC_H] + paddings_[0] + paddings_[1]) / block_sizes_[0]; | |||
| output_shape[NHWC_W] = (input_shape[NHWC_W] + padding_left + padding_right) / block_w; | |||
| output_shape[NHWC_C] = input_shape[NHWC_C]; | |||
| outputs[0]->set_shape(output_shape); | |||
| return RET_OK; | |||
| @@ -26,8 +26,6 @@ namespace lite { | |||
| namespace { | |||
| constexpr int kSpaceToBatchNDOutputNum = 1; | |||
| constexpr int kSpaceToBatchNDInputNum = 1; | |||
| constexpr int kBlockSizesSize = 2; | |||
| constexpr int kPaddingsSize = 4; | |||
| } // namespace | |||
| #ifdef PRIMITIVE_WRITEABLE | |||
| @@ -109,20 +107,19 @@ int SpaceToBatchND::InferShape(std::vector<lite::Tensor *> inputs, std::vector<l | |||
| return RET_ERROR; | |||
| } | |||
| auto block_shape = GetBlockShape(); | |||
| if (block_shape.size() != kBlockSizesSize) { | |||
| MS_LOG(ERROR) << "blockShape size != " << kBlockSizesSize; | |||
| return RET_ERROR; | |||
| } | |||
| auto pedding = GetPaddings(); | |||
| if (pedding.size() != kPaddingsSize) { | |||
| MS_LOG(ERROR) << "pedding size should be " << kPaddingsSize; | |||
| return RET_ERROR; | |||
| auto padding = GetPaddings(); | |||
| int padding_left = 0; | |||
| int padding_right = 0; | |||
| int block_w = 1; | |||
| if (block_shape.size() == 2) { | |||
| padding_left = padding[2]; | |||
| padding_right = padding[3]; | |||
| block_w = block_shape[1]; | |||
| } | |||
| std::vector<int32_t> output_shape(input_shape.size()); | |||
| output_shape[NHWC_N] = input_shape[NHWC_N] * block_shape[0] * block_shape[1]; | |||
| output_shape[NHWC_H] = (input_shape[NHWC_H] + pedding[0] + pedding[1]) / block_shape[0]; | |||
| output_shape[NHWC_W] = (input_shape[NHWC_W] + pedding[2] + pedding[3]) / block_shape[1]; | |||
| output_shape[NHWC_N] = input_shape[NHWC_N] * block_shape[0] * block_w; | |||
| output_shape[NHWC_H] = (input_shape[NHWC_H] + padding[0] + padding[1]) / block_shape[0]; | |||
| output_shape[NHWC_W] = (input_shape[NHWC_W] + padding_left + padding_right) / block_w; | |||
| output_shape[NHWC_C] = input_shape[NHWC_C]; | |||
| outputs[0]->set_shape(output_shape); | |||
| return RET_OK; | |||
| @@ -54,9 +54,15 @@ int SpaceToBatchCPUKernel::ReSize() { | |||
| } | |||
| } | |||
| if (param->need_paddings_) { | |||
| int padding_left = 0; | |||
| int padding_right = 0; | |||
| if (param->m_ == 2) { | |||
| padding_left = param->paddings_[2]; | |||
| padding_right = param->paddings_[3]; | |||
| } | |||
| param->padded_in_shape_[kNHWC_N] = input_tensor->shape().at(kNHWC_N); | |||
| param->padded_in_shape_[kNHWC_H] = input_tensor->shape().at(kNHWC_H) + param->paddings_[0] + param->paddings_[1]; | |||
| param->padded_in_shape_[kNHWC_W] = input_tensor->shape().at(kNHWC_W) + param->paddings_[2] + param->paddings_[3]; | |||
| param->padded_in_shape_[kNHWC_W] = input_tensor->shape().at(kNHWC_W) + padding_left + padding_right; | |||
| param->padded_in_shape_[kNHWC_C] = input_tensor->shape().at(kNHWC_C); | |||
| param->padded_input_element_num = param->padded_in_shape_[kNHWC_N] * param->padded_in_shape_[kNHWC_H] * | |||
| param->padded_in_shape_[kNHWC_W] * param->padded_in_shape_[kNHWC_C]; | |||
| @@ -38,17 +38,7 @@ int SpaceToBatchInt8CPUKernel::Run() { | |||
| auto quant_arg = output_tensor->GetQuantParams().front(); | |||
| if (param->need_paddings_) { | |||
| padded_input_ = context_->allocator->Malloc(param->padded_input_element_num * sizeof(int8_t)); | |||
| if (padded_input_ == nullptr) { | |||
| MS_LOG(ERROR) << "Memory allocation failed"; | |||
| return RET_ERROR; | |||
| } | |||
| auto padded_input = reinterpret_cast<int8_t *>(padded_input_); | |||
| DoSpaceToBatchPaddingNHWCInt8(input_ptr, padded_input, param->input_shape_, param->paddings_, | |||
| param->padded_in_shape_, quant_arg.zeroPoint); | |||
| DoSpaceToBatchNHWCInt8(padded_input, output_ptr, param->block_sizes_, param->padded_in_shape_, | |||
| param->output_shape_); | |||
| FreeTmpBuffer(); | |||
| DoSpaceToBatchPaddingNHWCInt8(input_ptr, output_ptr, param, quant_arg.zeroPoint); | |||
| } else { | |||
| DoSpaceToBatchNHWCInt8(input_ptr, output_ptr, param->block_sizes_, param->input_shape_, param->output_shape_); | |||
| } | |||
| @@ -34,7 +34,7 @@ TEST_F(SpaceToBatchTestInt8, test1) { | |||
| std::vector<lite::Tensor *> inputs = {&in_tensor}; | |||
| std::vector<lite::Tensor *> outputs = {&out_tensor}; | |||
| SpaceToBatchParameter parameter = {{}, false, {2, 2}, {1, 1, 1, 1}}; | |||
| SpaceToBatchParameter parameter = {{}, false, 2, {2, 2}, {1, 1, 1, 1}}; | |||
| kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_SpaceToBatchND}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||