diff --git a/mindspore/lite/nnacl/fp32/space_to_batch.h b/mindspore/lite/nnacl/fp32/space_to_batch.h index 8c8bac087d..9cbd526ccf 100644 --- a/mindspore/lite/nnacl/fp32/space_to_batch.h +++ b/mindspore/lite/nnacl/fp32/space_to_batch.h @@ -20,6 +20,7 @@ typedef struct SpaceToBatchParameter { OpParameter op_parameter_; bool need_paddings_; + int m_; int block_sizes_[4]; int paddings_[4]; int input_shape_[4]; diff --git a/mindspore/lite/nnacl/int8/space_to_batch_int8.c b/mindspore/lite/nnacl/int8/space_to_batch_int8.c index 3a1df54925..17bf2933ab 100644 --- a/mindspore/lite/nnacl/int8/space_to_batch_int8.c +++ b/mindspore/lite/nnacl/int8/space_to_batch_int8.c @@ -46,47 +46,40 @@ void DoSpaceToBatchNHWCInt8(const int8_t *input, int8_t *output, const int *bloc } } -void DoSpaceToBatchPaddingNHWCInt8(const int8_t *input, int8_t *output, const int *in_shape, const int *padding, - const int *out_shape, int32_t zp) { +void DoSpaceToBatchPaddingNHWCInt8(const int8_t *input, int8_t *output, SpaceToBatchParameter *param, int32_t zp) { + int *in_shape = param->input_shape_; + int *out_shape = param->output_shape_; + int *paddings = param->paddings_; + int block_shape_h = param->block_sizes_[0]; + int block_shape_w = param->m_ == 2 ? param->block_sizes_[1] : 1; + int in_b = in_shape[0]; int in_h = in_shape[1]; int in_w = in_shape[2]; - int in_c = in_shape[3]; + int channel = in_shape[3]; + int out_h = out_shape[1]; int out_w = out_shape[2]; - int out_c = out_shape[3]; - size_t ped_h_num = out_w * out_c; - size_t ped_h_size = ped_h_num * sizeof(int8_t); - size_t ped_w_size = out_c * sizeof(int8_t); - size_t out_offset = 0; - int in_strides[4]; - ComputeStrides(in_shape, in_strides, 4); - int out_strides[4]; - ComputeStrides(out_shape, out_strides, 4); - size_t copy_size = in_c * sizeof(int8_t); - for (int i = 0; i < in_shape[0]; ++i) { - size_t in_offset0 = i * in_strides[0]; - for (int pad_h_top = 0; pad_h_top < padding[0]; ++pad_h_top) { - memset(output + out_offset, zp, ped_h_size); - out_offset += ped_h_num; - } - for (int j = 0; j < in_h; ++j) { - size_t in_offset1 = in_offset0 + j * in_strides[1]; - for (int pad_w_left = 0; pad_w_left < padding[2]; ++pad_w_left) { - memset(output + out_offset, zp, ped_w_size); - out_offset += out_c; - } - for (int k = 0; k < in_w; ++k) { - size_t in_offset2 = in_offset1 + k * in_strides[2]; - memcpy(output + out_offset, input + in_offset2, copy_size); - out_offset += in_c; + int pad_t = paddings[0]; + int pad_l = param->m_ == 2 ? paddings[2] : 0; + for (int i = 0; i < out_shape[0]; ++i) { + int in_batch = i % in_b; + int offset_w = (i / in_b) % block_shape_w; + int offset_h = (i / in_b) / block_shape_w; + int in_b_offset = in_batch * in_h * in_w * channel; + int out_b_offset = i * out_h * out_w * channel; + for (int j = 0; j < out_h; ++j) { + int out_h_offset = out_b_offset + j * out_w * channel; + for (int k = 0; k < out_w; ++k) { + int8_t *out_ptr = output + out_h_offset + k * channel; + int index_h = j * block_shape_h + offset_h; + int index_w = k * block_shape_w + offset_w; + if (index_h < pad_t || index_h >= (pad_t + in_h) || index_w < pad_l || index_w >= (pad_l + in_w)) { + memset(out_ptr, zp, channel * sizeof(int8_t)); + } else { + int in_plane_offset = in_b_offset + ((index_h - pad_t) * in_w + (index_w - pad_l)) * channel; + const int8_t *in_ptr = input + in_plane_offset; + memcpy(out_ptr, in_ptr, channel * sizeof(int8_t)); + } } - for (int pad_w_right = 0; pad_w_right < padding[3]; ++pad_w_right) { - memset(output + out_offset, zp, ped_w_size); - out_offset += out_c; - } - } - for (int pad_h_bottom = 0; pad_h_bottom < padding[1]; ++pad_h_bottom) { - memset(output + out_offset, zp, ped_h_size); - out_offset += ped_h_num; } } } diff --git a/mindspore/lite/nnacl/int8/space_to_batch_int8.h b/mindspore/lite/nnacl/int8/space_to_batch_int8.h index c58f7ede80..ad9f00f9f4 100644 --- a/mindspore/lite/nnacl/int8/space_to_batch_int8.h +++ b/mindspore/lite/nnacl/int8/space_to_batch_int8.h @@ -17,14 +17,14 @@ #define MINDSPORE_LITE_NNACL_INT8_SPACE_TO_BATCH_INT8_H_ #include "nnacl/op_base.h" +#include "nnacl/fp32/space_to_batch.h" #ifdef __cplusplus extern "C" { #endif void DoSpaceToBatchNHWCInt8(const int8_t *input, int8_t *output, const int *block_sizes, const int *in_shape, const int *out_shape); -void DoSpaceToBatchPaddingNHWCInt8(const int8_t *input, int8_t *output, const int *in_shape, const int *padding, - const int *out_shape, int32_t zp); +void DoSpaceToBatchPaddingNHWCInt8(const int8_t *input, int8_t *output, SpaceToBatchParameter *param, int32_t zp); #ifdef __cplusplus } #endif diff --git a/mindspore/lite/src/ops/populate/space_to_batch_nd_populate.cc b/mindspore/lite/src/ops/populate/space_to_batch_nd_populate.cc index f6257cbf2a..a2d89ec44e 100644 --- a/mindspore/lite/src/ops/populate/space_to_batch_nd_populate.cc +++ b/mindspore/lite/src/ops/populate/space_to_batch_nd_populate.cc @@ -31,6 +31,7 @@ OpParameter *PopulateSpaceToBatchNDParameter(const mindspore::lite::PrimitiveC * space_batch_param_nd->op_parameter_.type_ = primitive->Type(); auto block_sizes = ((mindspore::lite::SpaceToBatchND *)primitive)->GetBlockShape(); + space_batch_param_nd->m_ = block_sizes.size(); memcpy(space_batch_param_nd->block_sizes_, (block_sizes.data()), block_sizes.size() * sizeof(int)); auto paddings = ((mindspore::lite::SpaceToBatchND *)primitive)->GetPaddings(); memcpy(space_batch_param_nd->paddings_, (paddings.data()), paddings.size() * sizeof(int)); diff --git a/mindspore/lite/src/ops/populate/space_to_batch_populate.cc b/mindspore/lite/src/ops/populate/space_to_batch_populate.cc index d0a1b536af..de096f1fd4 100644 --- a/mindspore/lite/src/ops/populate/space_to_batch_populate.cc +++ b/mindspore/lite/src/ops/populate/space_to_batch_populate.cc @@ -33,6 +33,7 @@ OpParameter *PopulateSpaceToBatchParameter(const mindspore::lite::PrimitiveC *pr memset(space_batch_param, 0, sizeof(SpaceToBatchParameter)); space_batch_param->op_parameter_.type_ = primitive->Type(); auto block_sizes = ((mindspore::lite::SpaceToBatch *)primitive)->BlockSizes(); + space_batch_param->m_ = block_sizes.size(); memcpy(space_batch_param->block_sizes_, (block_sizes.data()), block_sizes.size() * sizeof(int)); auto paddings = ((mindspore::lite::SpaceToBatch *)primitive)->Paddings(); memcpy(space_batch_param->paddings_, (paddings.data()), paddings.size() * sizeof(int)); diff --git a/mindspore/lite/src/ops/space_to_batch.cc b/mindspore/lite/src/ops/space_to_batch.cc index 4e4d00e4fd..b68c0309f1 100644 --- a/mindspore/lite/src/ops/space_to_batch.cc +++ b/mindspore/lite/src/ops/space_to_batch.cc @@ -80,8 +80,6 @@ Registry SpaceToBatchRegistry(schema::PrimitiveType_SpaceToBatch, SpaceToBatchCr namespace { constexpr int kSpaceToBatchNDOutputNum = 1; constexpr int kSpaceToBatchNDInputNum = 1; -constexpr int kBlockSizesSize = 2; -constexpr int kPaddingsSize = 4; } // namespace int SpaceToBatch::InferShape(std::vector inputs, std::vector outputs) { @@ -103,20 +101,13 @@ int SpaceToBatch::InferShape(std::vector inputs, std::vectorshape(); if (input_shape.size() != kDimension_4d) { - MS_LOG(ERROR) << "input shape dimension size should == " << kDimension_4d; - return 1; - } - - if (GetBlockShape().size() != kBlockSizesSize) { - MS_LOG(ERROR) << "Block shape size should be " << kBlockSizesSize; - return 1; - } - if (GetPaddings().size() != kPaddingsSize) { - MS_LOG(ERROR) << "Crops size should be " << kPaddingsSize; - return 1; + MS_LOG(ERROR) << "Space_to_batch op only support 4D input currently. But got %d dimensionality input." + << kDimension_4d; + return RET_ERROR; } - for (int &iter : GetBlockShape()) { + auto block_shape_vector = GetBlockShape(); + for (int &iter : block_shape_vector) { block_sizes_.emplace_back(iter); } @@ -125,7 +116,8 @@ int SpaceToBatch::InferShape(std::vector inputs, std::vector inputs, std::vector output_shape(input_shape.size()); - output_shape[NHWC_N] = input_shape[NHWC_N] * (block_sizes_[NHWC_N] * block_sizes_[NHWC_H]); - output_shape[NHWC_H] = (input_shape[NHWC_H] + paddings_[0] + paddings_[1]) / block_sizes_[NHWC_N]; - output_shape[NHWC_W] = (input_shape[NHWC_W] + paddings_[2] + paddings_[3]) / block_sizes_[NHWC_H]; + output_shape[NHWC_N] = input_shape[NHWC_N] * (block_sizes_[0] * block_w); + output_shape[NHWC_H] = (input_shape[NHWC_H] + paddings_[0] + paddings_[1]) / block_sizes_[0]; + output_shape[NHWC_W] = (input_shape[NHWC_W] + padding_left + padding_right) / block_w; output_shape[NHWC_C] = input_shape[NHWC_C]; outputs[0]->set_shape(output_shape); return RET_OK; diff --git a/mindspore/lite/src/ops/space_to_batch_nd.cc b/mindspore/lite/src/ops/space_to_batch_nd.cc index 640b6e50a9..50e0976df6 100644 --- a/mindspore/lite/src/ops/space_to_batch_nd.cc +++ b/mindspore/lite/src/ops/space_to_batch_nd.cc @@ -26,8 +26,6 @@ namespace lite { namespace { constexpr int kSpaceToBatchNDOutputNum = 1; constexpr int kSpaceToBatchNDInputNum = 1; -constexpr int kBlockSizesSize = 2; -constexpr int kPaddingsSize = 4; } // namespace #ifdef PRIMITIVE_WRITEABLE @@ -109,20 +107,19 @@ int SpaceToBatchND::InferShape(std::vector inputs, std::vector output_shape(input_shape.size()); - output_shape[NHWC_N] = input_shape[NHWC_N] * block_shape[0] * block_shape[1]; - output_shape[NHWC_H] = (input_shape[NHWC_H] + pedding[0] + pedding[1]) / block_shape[0]; - output_shape[NHWC_W] = (input_shape[NHWC_W] + pedding[2] + pedding[3]) / block_shape[1]; + output_shape[NHWC_N] = input_shape[NHWC_N] * block_shape[0] * block_w; + output_shape[NHWC_H] = (input_shape[NHWC_H] + padding[0] + padding[1]) / block_shape[0]; + output_shape[NHWC_W] = (input_shape[NHWC_W] + padding_left + padding_right) / block_w; output_shape[NHWC_C] = input_shape[NHWC_C]; outputs[0]->set_shape(output_shape); return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch_fp32.cc index e2c7fd1fb1..83119458a6 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch_fp32.cc @@ -54,9 +54,15 @@ int SpaceToBatchCPUKernel::ReSize() { } } if (param->need_paddings_) { + int padding_left = 0; + int padding_right = 0; + if (param->m_ == 2) { + padding_left = param->paddings_[2]; + padding_right = param->paddings_[3]; + } param->padded_in_shape_[kNHWC_N] = input_tensor->shape().at(kNHWC_N); param->padded_in_shape_[kNHWC_H] = input_tensor->shape().at(kNHWC_H) + param->paddings_[0] + param->paddings_[1]; - param->padded_in_shape_[kNHWC_W] = input_tensor->shape().at(kNHWC_W) + param->paddings_[2] + param->paddings_[3]; + param->padded_in_shape_[kNHWC_W] = input_tensor->shape().at(kNHWC_W) + padding_left + padding_right; param->padded_in_shape_[kNHWC_C] = input_tensor->shape().at(kNHWC_C); param->padded_input_element_num = param->padded_in_shape_[kNHWC_N] * param->padded_in_shape_[kNHWC_H] * param->padded_in_shape_[kNHWC_W] * param->padded_in_shape_[kNHWC_C]; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/space_to_batch_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/space_to_batch_int8.cc index 1460f979b4..495a781bba 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/space_to_batch_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/space_to_batch_int8.cc @@ -38,17 +38,7 @@ int SpaceToBatchInt8CPUKernel::Run() { auto quant_arg = output_tensor->GetQuantParams().front(); if (param->need_paddings_) { - padded_input_ = context_->allocator->Malloc(param->padded_input_element_num * sizeof(int8_t)); - if (padded_input_ == nullptr) { - MS_LOG(ERROR) << "Memory allocation failed"; - return RET_ERROR; - } - auto padded_input = reinterpret_cast(padded_input_); - DoSpaceToBatchPaddingNHWCInt8(input_ptr, padded_input, param->input_shape_, param->paddings_, - param->padded_in_shape_, quant_arg.zeroPoint); - DoSpaceToBatchNHWCInt8(padded_input, output_ptr, param->block_sizes_, param->padded_in_shape_, - param->output_shape_); - FreeTmpBuffer(); + DoSpaceToBatchPaddingNHWCInt8(input_ptr, output_ptr, param, quant_arg.zeroPoint); } else { DoSpaceToBatchNHWCInt8(input_ptr, output_ptr, param->block_sizes_, param->input_shape_, param->output_shape_); } diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/space_to_batch_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/space_to_batch_int8_tests.cc index a4ad5b1631..cb49866a65 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/space_to_batch_int8_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/space_to_batch_int8_tests.cc @@ -34,7 +34,7 @@ TEST_F(SpaceToBatchTestInt8, test1) { std::vector inputs = {&in_tensor}; std::vector outputs = {&out_tensor}; - SpaceToBatchParameter parameter = {{}, false, {2, 2}, {1, 1, 1, 1}}; + SpaceToBatchParameter parameter = {{}, false, 2, {2, 2}, {1, 1, 1, 1}}; kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_SpaceToBatchND}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);