From: @fuzhiye Reviewed-by: @zhang_xue_tong,@hangangqiang Signed-off-by: @zhang_xue_tongtags/v1.1.0
| @@ -20,6 +20,7 @@ | |||||
| typedef struct SpaceToBatchParameter { | typedef struct SpaceToBatchParameter { | ||||
| OpParameter op_parameter_; | OpParameter op_parameter_; | ||||
| bool need_paddings_; | bool need_paddings_; | ||||
| int m_; | |||||
| int block_sizes_[4]; | int block_sizes_[4]; | ||||
| int paddings_[4]; | int paddings_[4]; | ||||
| int input_shape_[4]; | int input_shape_[4]; | ||||
| @@ -46,47 +46,40 @@ void DoSpaceToBatchNHWCInt8(const int8_t *input, int8_t *output, const int *bloc | |||||
| } | } | ||||
| } | } | ||||
| void DoSpaceToBatchPaddingNHWCInt8(const int8_t *input, int8_t *output, const int *in_shape, const int *padding, | |||||
| const int *out_shape, int32_t zp) { | |||||
| void DoSpaceToBatchPaddingNHWCInt8(const int8_t *input, int8_t *output, SpaceToBatchParameter *param, int32_t zp) { | |||||
| int *in_shape = param->input_shape_; | |||||
| int *out_shape = param->output_shape_; | |||||
| int *paddings = param->paddings_; | |||||
| int block_shape_h = param->block_sizes_[0]; | |||||
| int block_shape_w = param->m_ == 2 ? param->block_sizes_[1] : 1; | |||||
| int in_b = in_shape[0]; | |||||
| int in_h = in_shape[1]; | int in_h = in_shape[1]; | ||||
| int in_w = in_shape[2]; | int in_w = in_shape[2]; | ||||
| int in_c = in_shape[3]; | |||||
| int channel = in_shape[3]; | |||||
| int out_h = out_shape[1]; | |||||
| int out_w = out_shape[2]; | int out_w = out_shape[2]; | ||||
| int out_c = out_shape[3]; | |||||
| size_t ped_h_num = out_w * out_c; | |||||
| size_t ped_h_size = ped_h_num * sizeof(int8_t); | |||||
| size_t ped_w_size = out_c * sizeof(int8_t); | |||||
| size_t out_offset = 0; | |||||
| int in_strides[4]; | |||||
| ComputeStrides(in_shape, in_strides, 4); | |||||
| int out_strides[4]; | |||||
| ComputeStrides(out_shape, out_strides, 4); | |||||
| size_t copy_size = in_c * sizeof(int8_t); | |||||
| for (int i = 0; i < in_shape[0]; ++i) { | |||||
| size_t in_offset0 = i * in_strides[0]; | |||||
| for (int pad_h_top = 0; pad_h_top < padding[0]; ++pad_h_top) { | |||||
| memset(output + out_offset, zp, ped_h_size); | |||||
| out_offset += ped_h_num; | |||||
| } | |||||
| for (int j = 0; j < in_h; ++j) { | |||||
| size_t in_offset1 = in_offset0 + j * in_strides[1]; | |||||
| for (int pad_w_left = 0; pad_w_left < padding[2]; ++pad_w_left) { | |||||
| memset(output + out_offset, zp, ped_w_size); | |||||
| out_offset += out_c; | |||||
| } | |||||
| for (int k = 0; k < in_w; ++k) { | |||||
| size_t in_offset2 = in_offset1 + k * in_strides[2]; | |||||
| memcpy(output + out_offset, input + in_offset2, copy_size); | |||||
| out_offset += in_c; | |||||
| int pad_t = paddings[0]; | |||||
| int pad_l = param->m_ == 2 ? paddings[2] : 0; | |||||
| for (int i = 0; i < out_shape[0]; ++i) { | |||||
| int in_batch = i % in_b; | |||||
| int offset_w = (i / in_b) % block_shape_w; | |||||
| int offset_h = (i / in_b) / block_shape_w; | |||||
| int in_b_offset = in_batch * in_h * in_w * channel; | |||||
| int out_b_offset = i * out_h * out_w * channel; | |||||
| for (int j = 0; j < out_h; ++j) { | |||||
| int out_h_offset = out_b_offset + j * out_w * channel; | |||||
| for (int k = 0; k < out_w; ++k) { | |||||
| int8_t *out_ptr = output + out_h_offset + k * channel; | |||||
| int index_h = j * block_shape_h + offset_h; | |||||
| int index_w = k * block_shape_w + offset_w; | |||||
| if (index_h < pad_t || index_h >= (pad_t + in_h) || index_w < pad_l || index_w >= (pad_l + in_w)) { | |||||
| memset(out_ptr, zp, channel * sizeof(int8_t)); | |||||
| } else { | |||||
| int in_plane_offset = in_b_offset + ((index_h - pad_t) * in_w + (index_w - pad_l)) * channel; | |||||
| const int8_t *in_ptr = input + in_plane_offset; | |||||
| memcpy(out_ptr, in_ptr, channel * sizeof(int8_t)); | |||||
| } | |||||
| } | } | ||||
| for (int pad_w_right = 0; pad_w_right < padding[3]; ++pad_w_right) { | |||||
| memset(output + out_offset, zp, ped_w_size); | |||||
| out_offset += out_c; | |||||
| } | |||||
| } | |||||
| for (int pad_h_bottom = 0; pad_h_bottom < padding[1]; ++pad_h_bottom) { | |||||
| memset(output + out_offset, zp, ped_h_size); | |||||
| out_offset += ped_h_num; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -17,14 +17,14 @@ | |||||
| #define MINDSPORE_LITE_NNACL_INT8_SPACE_TO_BATCH_INT8_H_ | #define MINDSPORE_LITE_NNACL_INT8_SPACE_TO_BATCH_INT8_H_ | ||||
| #include "nnacl/op_base.h" | #include "nnacl/op_base.h" | ||||
| #include "nnacl/fp32/space_to_batch.h" | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void DoSpaceToBatchNHWCInt8(const int8_t *input, int8_t *output, const int *block_sizes, const int *in_shape, | void DoSpaceToBatchNHWCInt8(const int8_t *input, int8_t *output, const int *block_sizes, const int *in_shape, | ||||
| const int *out_shape); | const int *out_shape); | ||||
| void DoSpaceToBatchPaddingNHWCInt8(const int8_t *input, int8_t *output, const int *in_shape, const int *padding, | |||||
| const int *out_shape, int32_t zp); | |||||
| void DoSpaceToBatchPaddingNHWCInt8(const int8_t *input, int8_t *output, SpaceToBatchParameter *param, int32_t zp); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -31,6 +31,7 @@ OpParameter *PopulateSpaceToBatchNDParameter(const mindspore::lite::PrimitiveC * | |||||
| space_batch_param_nd->op_parameter_.type_ = primitive->Type(); | space_batch_param_nd->op_parameter_.type_ = primitive->Type(); | ||||
| auto block_sizes = ((mindspore::lite::SpaceToBatchND *)primitive)->GetBlockShape(); | auto block_sizes = ((mindspore::lite::SpaceToBatchND *)primitive)->GetBlockShape(); | ||||
| space_batch_param_nd->m_ = block_sizes.size(); | |||||
| memcpy(space_batch_param_nd->block_sizes_, (block_sizes.data()), block_sizes.size() * sizeof(int)); | memcpy(space_batch_param_nd->block_sizes_, (block_sizes.data()), block_sizes.size() * sizeof(int)); | ||||
| auto paddings = ((mindspore::lite::SpaceToBatchND *)primitive)->GetPaddings(); | auto paddings = ((mindspore::lite::SpaceToBatchND *)primitive)->GetPaddings(); | ||||
| memcpy(space_batch_param_nd->paddings_, (paddings.data()), paddings.size() * sizeof(int)); | memcpy(space_batch_param_nd->paddings_, (paddings.data()), paddings.size() * sizeof(int)); | ||||
| @@ -33,6 +33,7 @@ OpParameter *PopulateSpaceToBatchParameter(const mindspore::lite::PrimitiveC *pr | |||||
| memset(space_batch_param, 0, sizeof(SpaceToBatchParameter)); | memset(space_batch_param, 0, sizeof(SpaceToBatchParameter)); | ||||
| space_batch_param->op_parameter_.type_ = primitive->Type(); | space_batch_param->op_parameter_.type_ = primitive->Type(); | ||||
| auto block_sizes = ((mindspore::lite::SpaceToBatch *)primitive)->BlockSizes(); | auto block_sizes = ((mindspore::lite::SpaceToBatch *)primitive)->BlockSizes(); | ||||
| space_batch_param->m_ = block_sizes.size(); | |||||
| memcpy(space_batch_param->block_sizes_, (block_sizes.data()), block_sizes.size() * sizeof(int)); | memcpy(space_batch_param->block_sizes_, (block_sizes.data()), block_sizes.size() * sizeof(int)); | ||||
| auto paddings = ((mindspore::lite::SpaceToBatch *)primitive)->Paddings(); | auto paddings = ((mindspore::lite::SpaceToBatch *)primitive)->Paddings(); | ||||
| memcpy(space_batch_param->paddings_, (paddings.data()), paddings.size() * sizeof(int)); | memcpy(space_batch_param->paddings_, (paddings.data()), paddings.size() * sizeof(int)); | ||||
| @@ -80,8 +80,6 @@ Registry SpaceToBatchRegistry(schema::PrimitiveType_SpaceToBatch, SpaceToBatchCr | |||||
| namespace { | namespace { | ||||
| constexpr int kSpaceToBatchNDOutputNum = 1; | constexpr int kSpaceToBatchNDOutputNum = 1; | ||||
| constexpr int kSpaceToBatchNDInputNum = 1; | constexpr int kSpaceToBatchNDInputNum = 1; | ||||
| constexpr int kBlockSizesSize = 2; | |||||
| constexpr int kPaddingsSize = 4; | |||||
| } // namespace | } // namespace | ||||
| int SpaceToBatch::InferShape(std::vector<lite::Tensor *> inputs, std::vector<lite::Tensor *> outputs) { | int SpaceToBatch::InferShape(std::vector<lite::Tensor *> inputs, std::vector<lite::Tensor *> outputs) { | ||||
| @@ -103,20 +101,13 @@ int SpaceToBatch::InferShape(std::vector<lite::Tensor *> inputs, std::vector<lit | |||||
| } | } | ||||
| auto input_shape = input->shape(); | auto input_shape = input->shape(); | ||||
| if (input_shape.size() != kDimension_4d) { | if (input_shape.size() != kDimension_4d) { | ||||
| MS_LOG(ERROR) << "input shape dimension size should == " << kDimension_4d; | |||||
| return 1; | |||||
| } | |||||
| if (GetBlockShape().size() != kBlockSizesSize) { | |||||
| MS_LOG(ERROR) << "Block shape size should be " << kBlockSizesSize; | |||||
| return 1; | |||||
| } | |||||
| if (GetPaddings().size() != kPaddingsSize) { | |||||
| MS_LOG(ERROR) << "Crops size should be " << kPaddingsSize; | |||||
| return 1; | |||||
| MS_LOG(ERROR) << "Space_to_batch op only support 4D input currently. But got %d dimensionality input." | |||||
| << kDimension_4d; | |||||
| return RET_ERROR; | |||||
| } | } | ||||
| for (int &iter : GetBlockShape()) { | |||||
| auto block_shape_vector = GetBlockShape(); | |||||
| for (int &iter : block_shape_vector) { | |||||
| block_sizes_.emplace_back(iter); | block_sizes_.emplace_back(iter); | ||||
| } | } | ||||
| @@ -125,7 +116,8 @@ int SpaceToBatch::InferShape(std::vector<lite::Tensor *> inputs, std::vector<lit | |||||
| paddings_.clear(); | paddings_.clear(); | ||||
| in_shape_.emplace_back(input_shape.at(NHWC_N)); | in_shape_.emplace_back(input_shape.at(NHWC_N)); | ||||
| padded_in_shape_.emplace_back(input_shape.at(NHWC_N)); | padded_in_shape_.emplace_back(input_shape.at(NHWC_N)); | ||||
| for (int i = 0; i < kBlockSizesSize; i++) { | |||||
| auto block_shape_size = block_shape_vector.size(); | |||||
| for (size_t i = 0; i < block_shape_size; i++) { | |||||
| in_shape_.emplace_back(input_shape.at(i + 1)); | in_shape_.emplace_back(input_shape.at(i + 1)); | ||||
| padded_in_shape_.emplace_back(input_shape.at(i + 1) + (paddings_.at(2 * i) + paddings_.at(2 * i + 1))); | padded_in_shape_.emplace_back(input_shape.at(i + 1) + (paddings_.at(2 * i) + paddings_.at(2 * i + 1))); | ||||
| paddings_.emplace_back(paddings_.at(2 * i)); | paddings_.emplace_back(paddings_.at(2 * i)); | ||||
| @@ -137,11 +129,19 @@ int SpaceToBatch::InferShape(std::vector<lite::Tensor *> inputs, std::vector<lit | |||||
| } | } | ||||
| in_shape_.emplace_back(input_shape.at(NHWC_C)); | in_shape_.emplace_back(input_shape.at(NHWC_C)); | ||||
| padded_in_shape_.emplace_back(input_shape.at(NHWC_C)); | padded_in_shape_.emplace_back(input_shape.at(NHWC_C)); | ||||
| int padding_left = 0; | |||||
| int padding_right = 0; | |||||
| int block_w = 1; | |||||
| if (block_shape_size == 2) { | |||||
| padding_left = paddings_[2]; | |||||
| padding_right = paddings_[3]; | |||||
| block_w = block_sizes_[1]; | |||||
| } | |||||
| std::vector<int32_t> output_shape(input_shape.size()); | std::vector<int32_t> output_shape(input_shape.size()); | ||||
| output_shape[NHWC_N] = input_shape[NHWC_N] * (block_sizes_[NHWC_N] * block_sizes_[NHWC_H]); | |||||
| output_shape[NHWC_H] = (input_shape[NHWC_H] + paddings_[0] + paddings_[1]) / block_sizes_[NHWC_N]; | |||||
| output_shape[NHWC_W] = (input_shape[NHWC_W] + paddings_[2] + paddings_[3]) / block_sizes_[NHWC_H]; | |||||
| output_shape[NHWC_N] = input_shape[NHWC_N] * (block_sizes_[0] * block_w); | |||||
| output_shape[NHWC_H] = (input_shape[NHWC_H] + paddings_[0] + paddings_[1]) / block_sizes_[0]; | |||||
| output_shape[NHWC_W] = (input_shape[NHWC_W] + padding_left + padding_right) / block_w; | |||||
| output_shape[NHWC_C] = input_shape[NHWC_C]; | output_shape[NHWC_C] = input_shape[NHWC_C]; | ||||
| outputs[0]->set_shape(output_shape); | outputs[0]->set_shape(output_shape); | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -26,8 +26,6 @@ namespace lite { | |||||
| namespace { | namespace { | ||||
| constexpr int kSpaceToBatchNDOutputNum = 1; | constexpr int kSpaceToBatchNDOutputNum = 1; | ||||
| constexpr int kSpaceToBatchNDInputNum = 1; | constexpr int kSpaceToBatchNDInputNum = 1; | ||||
| constexpr int kBlockSizesSize = 2; | |||||
| constexpr int kPaddingsSize = 4; | |||||
| } // namespace | } // namespace | ||||
| #ifdef PRIMITIVE_WRITEABLE | #ifdef PRIMITIVE_WRITEABLE | ||||
| @@ -109,20 +107,19 @@ int SpaceToBatchND::InferShape(std::vector<lite::Tensor *> inputs, std::vector<l | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| auto block_shape = GetBlockShape(); | auto block_shape = GetBlockShape(); | ||||
| if (block_shape.size() != kBlockSizesSize) { | |||||
| MS_LOG(ERROR) << "blockShape size != " << kBlockSizesSize; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto pedding = GetPaddings(); | |||||
| if (pedding.size() != kPaddingsSize) { | |||||
| MS_LOG(ERROR) << "pedding size should be " << kPaddingsSize; | |||||
| return RET_ERROR; | |||||
| auto padding = GetPaddings(); | |||||
| int padding_left = 0; | |||||
| int padding_right = 0; | |||||
| int block_w = 1; | |||||
| if (block_shape.size() == 2) { | |||||
| padding_left = padding[2]; | |||||
| padding_right = padding[3]; | |||||
| block_w = block_shape[1]; | |||||
| } | } | ||||
| std::vector<int32_t> output_shape(input_shape.size()); | std::vector<int32_t> output_shape(input_shape.size()); | ||||
| output_shape[NHWC_N] = input_shape[NHWC_N] * block_shape[0] * block_shape[1]; | |||||
| output_shape[NHWC_H] = (input_shape[NHWC_H] + pedding[0] + pedding[1]) / block_shape[0]; | |||||
| output_shape[NHWC_W] = (input_shape[NHWC_W] + pedding[2] + pedding[3]) / block_shape[1]; | |||||
| output_shape[NHWC_N] = input_shape[NHWC_N] * block_shape[0] * block_w; | |||||
| output_shape[NHWC_H] = (input_shape[NHWC_H] + padding[0] + padding[1]) / block_shape[0]; | |||||
| output_shape[NHWC_W] = (input_shape[NHWC_W] + padding_left + padding_right) / block_w; | |||||
| output_shape[NHWC_C] = input_shape[NHWC_C]; | output_shape[NHWC_C] = input_shape[NHWC_C]; | ||||
| outputs[0]->set_shape(output_shape); | outputs[0]->set_shape(output_shape); | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -54,9 +54,15 @@ int SpaceToBatchCPUKernel::ReSize() { | |||||
| } | } | ||||
| } | } | ||||
| if (param->need_paddings_) { | if (param->need_paddings_) { | ||||
| int padding_left = 0; | |||||
| int padding_right = 0; | |||||
| if (param->m_ == 2) { | |||||
| padding_left = param->paddings_[2]; | |||||
| padding_right = param->paddings_[3]; | |||||
| } | |||||
| param->padded_in_shape_[kNHWC_N] = input_tensor->shape().at(kNHWC_N); | param->padded_in_shape_[kNHWC_N] = input_tensor->shape().at(kNHWC_N); | ||||
| param->padded_in_shape_[kNHWC_H] = input_tensor->shape().at(kNHWC_H) + param->paddings_[0] + param->paddings_[1]; | param->padded_in_shape_[kNHWC_H] = input_tensor->shape().at(kNHWC_H) + param->paddings_[0] + param->paddings_[1]; | ||||
| param->padded_in_shape_[kNHWC_W] = input_tensor->shape().at(kNHWC_W) + param->paddings_[2] + param->paddings_[3]; | |||||
| param->padded_in_shape_[kNHWC_W] = input_tensor->shape().at(kNHWC_W) + padding_left + padding_right; | |||||
| param->padded_in_shape_[kNHWC_C] = input_tensor->shape().at(kNHWC_C); | param->padded_in_shape_[kNHWC_C] = input_tensor->shape().at(kNHWC_C); | ||||
| param->padded_input_element_num = param->padded_in_shape_[kNHWC_N] * param->padded_in_shape_[kNHWC_H] * | param->padded_input_element_num = param->padded_in_shape_[kNHWC_N] * param->padded_in_shape_[kNHWC_H] * | ||||
| param->padded_in_shape_[kNHWC_W] * param->padded_in_shape_[kNHWC_C]; | param->padded_in_shape_[kNHWC_W] * param->padded_in_shape_[kNHWC_C]; | ||||
| @@ -38,17 +38,7 @@ int SpaceToBatchInt8CPUKernel::Run() { | |||||
| auto quant_arg = output_tensor->GetQuantParams().front(); | auto quant_arg = output_tensor->GetQuantParams().front(); | ||||
| if (param->need_paddings_) { | if (param->need_paddings_) { | ||||
| padded_input_ = context_->allocator->Malloc(param->padded_input_element_num * sizeof(int8_t)); | |||||
| if (padded_input_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Memory allocation failed"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto padded_input = reinterpret_cast<int8_t *>(padded_input_); | |||||
| DoSpaceToBatchPaddingNHWCInt8(input_ptr, padded_input, param->input_shape_, param->paddings_, | |||||
| param->padded_in_shape_, quant_arg.zeroPoint); | |||||
| DoSpaceToBatchNHWCInt8(padded_input, output_ptr, param->block_sizes_, param->padded_in_shape_, | |||||
| param->output_shape_); | |||||
| FreeTmpBuffer(); | |||||
| DoSpaceToBatchPaddingNHWCInt8(input_ptr, output_ptr, param, quant_arg.zeroPoint); | |||||
| } else { | } else { | ||||
| DoSpaceToBatchNHWCInt8(input_ptr, output_ptr, param->block_sizes_, param->input_shape_, param->output_shape_); | DoSpaceToBatchNHWCInt8(input_ptr, output_ptr, param->block_sizes_, param->input_shape_, param->output_shape_); | ||||
| } | } | ||||
| @@ -34,7 +34,7 @@ TEST_F(SpaceToBatchTestInt8, test1) { | |||||
| std::vector<lite::Tensor *> inputs = {&in_tensor}; | std::vector<lite::Tensor *> inputs = {&in_tensor}; | ||||
| std::vector<lite::Tensor *> outputs = {&out_tensor}; | std::vector<lite::Tensor *> outputs = {&out_tensor}; | ||||
| SpaceToBatchParameter parameter = {{}, false, {2, 2}, {1, 1, 1, 1}}; | |||||
| SpaceToBatchParameter parameter = {{}, false, 2, {2, 2}, {1, 1, 1, 1}}; | |||||
| kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_SpaceToBatchND}; | kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_SpaceToBatchND}; | ||||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | ||||