| @@ -195,7 +195,7 @@ int GeluFp16(const float16_t *src, int length, float16_t *dst, bool approximate) | |||||
| int C8 = UP_ROUND(length, C8NUM); | int C8 = UP_ROUND(length, C8NUM); | ||||
| for (; i < C8; i += C8NUM) { | for (; i < C8; i += C8NUM) { | ||||
| float16x8_t in = vld1q_f16(src + i); | float16x8_t in = vld1q_f16(src + i); | ||||
| float16x8_t res = 0.5 * in * (1.0 + MS_ERFX8_F16(in / (float16_t)1.4142135623730951f)); | |||||
| const float16x8_t res = 0.5 * in * (1.0 + MS_ERFX8_F16(in / (float16_t)1.4142135623730951f)); | |||||
| vst1q_f16(dst + i, res); | vst1q_f16(dst + i, res); | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -59,7 +59,7 @@ static inline void single_exp_fp16(float16_t src, float16_t *dst) { | |||||
| int integer = (float)src / param[0]; | int integer = (float)src / param[0]; | ||||
| float decimal = (float)src - integer * param[0]; | float decimal = (float)src - integer * param[0]; | ||||
| int int_exp = (integer + 127) << 23; | int int_exp = (integer + 127) << 23; | ||||
| float decimal_exp = | |||||
| const float decimal_exp = | |||||
| 1.0f + decimal * (1.0f + decimal * (0.5f + decimal * (param[3] + decimal * (param[2] + decimal * param[1])))); | 1.0f + decimal * (1.0f + decimal * (0.5f + decimal * (param[3] + decimal * (param[2] + decimal * param[1])))); | ||||
| *dst = (float16_t)(*((float *)&int_exp) * decimal_exp); | *dst = (float16_t)(*((float *)&int_exp) * decimal_exp); | ||||
| } | } | ||||
| @@ -339,7 +339,7 @@ bool CheckConvDw1DWinograd(const ConvParameter *conv_param, int thread_num) { | |||||
| conv_param->stride_h_ == 1 && conv_param->dilation_h_ == 1 && conv_param->dilation_w_ == 1 && | conv_param->stride_h_ == 1 && conv_param->dilation_h_ == 1 && conv_param->dilation_w_ == 1 && | ||||
| conv_param->pad_u_ == 1 && conv_param->pad_d_ == 1 && conv_param->pad_l_ == 1 && conv_param->pad_r_ == 1 && | conv_param->pad_u_ == 1 && conv_param->pad_d_ == 1 && conv_param->pad_l_ == 1 && conv_param->pad_r_ == 1 && | ||||
| conv_param->input_channel_ == conv_param->output_channel_ && | conv_param->input_channel_ == conv_param->output_channel_ && | ||||
| conv_param->output_h_ / thread_num >= 4; // better had more than 4 rows for each thread | |||||
| conv_param->output_h_ >= thread_num * 4; // better had more than 4 rows for each thread | |||||
| } | } | ||||
| void ConvDw3x3RowLeft(const float *src, float *line, int lw, int channel) { | void ConvDw3x3RowLeft(const float *src, float *line, int lw, int channel) { | ||||
| @@ -20,7 +20,7 @@ | |||||
| void LayerNormGrad(const float *x, const float *dy, const float *var, const float *mean, const float *gamma, | void LayerNormGrad(const float *x, const float *dy, const float *var, const float *mean, const float *gamma, | ||||
| int param_num, int param_size, int block_num, int block_size, float *dx, float *dg, float *db) { | int param_num, int param_size, int block_num, int block_size, float *dx, float *dg, float *db) { | ||||
| // var is actually layer_norm forward output var | // var is actually layer_norm forward output var | ||||
| float eps = 1e-12; | |||||
| const float eps = 1e-12; | |||||
| const float *var_sqrt_rev = var; | const float *var_sqrt_rev = var; | ||||
| for (size_t i = 0; i < param_num; ++i) { | for (size_t i = 0; i < param_num; ++i) { | ||||
| float dgamma = 0.0f; | float dgamma = 0.0f; | ||||
| @@ -37,23 +37,23 @@ void LayerNormGrad(const float *x, const float *dy, const float *var, const floa | |||||
| float sum1 = 0.0f; | float sum1 = 0.0f; | ||||
| float sum2 = 0.0f; | float sum2 = 0.0f; | ||||
| float sum3 = 0.0f; | float sum3 = 0.0f; | ||||
| for (size_t j = i * block_size; j < (i + 1) * block_size; ++j) { | |||||
| int param_shift = j % param_num; | |||||
| int norm_shift = (int)(j / block_size); | |||||
| float dxm = x[j] - mean[norm_shift]; | |||||
| float dyg = dy[j] * gamma[param_shift]; | |||||
| sum1 += -0.5f * dyg * dxm * pow(var_sqrt_rev[norm_shift] + eps, -1.5); | |||||
| for (size_t j = 0; j < block_size; ++j) { | |||||
| int index = i * block_size + j; | |||||
| float dxm = x[index] - mean[i]; | |||||
| int param_shift = index % param_num; | |||||
| float dyg = dy[index] * gamma[param_shift]; | |||||
| sum1 += -0.5f * dyg * dxm * pow(var_sqrt_rev[i] + eps, -1.5); | |||||
| sum2 += dyg; | sum2 += dyg; | ||||
| sum3 += -2.0f * dxm; | sum3 += -2.0f * dxm; | ||||
| } | } | ||||
| for (size_t j = i * block_size; j < (i + 1) * block_size; ++j) { | |||||
| int param_shift = j % param_num; | |||||
| int norm_shift = (int)(j / block_size); | |||||
| float var_sqrt = pow(var_sqrt_rev[norm_shift] + eps, -0.5); | |||||
| float dx1 = dy[j] * gamma[param_shift] * var_sqrt; | |||||
| float dx2 = sum1 * 2.0f / block_size * (x[j] - mean[norm_shift]); | |||||
| for (size_t j = 0; j < block_size; ++j) { | |||||
| int index = i * block_size + j; | |||||
| float var_sqrt = pow(var_sqrt_rev[i] + eps, -0.5); | |||||
| int param_shift = index % param_num; | |||||
| float dx1 = dy[index] * gamma[param_shift] * var_sqrt; | |||||
| float dx2 = sum1 * 2.0f / block_size * (x[index] - mean[i]); | |||||
| float dx3 = (-1.0f * var_sqrt * sum2 + (1.0f / block_size) * sum1 * sum3) * (1.0f / block_size); | float dx3 = (-1.0f * var_sqrt * sum2 + (1.0f / block_size) * sum1 * sum3) * (1.0f / block_size); | ||||
| dx[j] = dx1 + dx2 + dx3; | |||||
| dx[index] = dx1 + dx2 + dx3; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -144,8 +144,7 @@ void MaxPoolingGrad(const float *input_ptr, const float *dy_ptr, float *output_p | |||||
| int xw = yw * stride_w + kw - pad_w; | int xw = yw * stride_w + kw - pad_w; | ||||
| int val_idx = (xw + in_w * xh) * channel + ic; | int val_idx = (xw + in_w * xh) * channel + ic; | ||||
| #ifdef ENABLE_ARM | #ifdef ENABLE_ARM | ||||
| unsigned int val_idx_vec[] = {val_idx, val_idx + 1, val_idx + 2, val_idx + 3}; | |||||
| uint32x4_t index = vld1q_u32(val_idx_vec); | |||||
| uint32x4_t index = {val_idx, val_idx + 1, val_idx + 2, val_idx + 3}; | |||||
| float32x4_t in = vld1q_f32(inPtr + val_idx); | float32x4_t in = vld1q_f32(inPtr + val_idx); | ||||
| max_idx = MaxIndex(in, &max_val, index, max_idx); | max_idx = MaxIndex(in, &max_val, index, max_idx); | ||||
| #else | #else | ||||
| @@ -52,17 +52,17 @@ void ResizeBiLinearGrad(float *in_addr, float *out_addr, int batch_size, int cha | |||||
| size_t h = i / param->in_width_; | size_t h = i / param->in_width_; | ||||
| size_t w = i % param->in_width_; | size_t w = i % param->in_width_; | ||||
| for (int32_t c = 0; c < channel; ++c) { | for (int32_t c = 0; c < channel; ++c) { | ||||
| float in_y = (float)h * param->height_scale_; | |||||
| const float in_y = (float)h * param->height_scale_; | |||||
| size_t top_y_index = MSMAX((size_t)(floorf(in_y)), (size_t)(0)); | size_t top_y_index = MSMAX((size_t)(floorf(in_y)), (size_t)(0)); | ||||
| size_t bottom_y_index = MSMIN((size_t)(ceilf(in_y)), param->out_height_ - 1); | size_t bottom_y_index = MSMIN((size_t)(ceilf(in_y)), param->out_height_ - 1); | ||||
| float y_lerp = in_y - floorf(in_y); | |||||
| float inverse_y_lerp = 1.0 - y_lerp; | |||||
| const float y_lerp = in_y - floorf(in_y); | |||||
| const float inverse_y_lerp = 1.0 - y_lerp; | |||||
| float in_x = (float)w * param->width_scale_; | |||||
| const float in_x = (float)w * param->width_scale_; | |||||
| size_t left_x_index = MSMAX((size_t)(floorf(in_x)), (size_t)(0)); | size_t left_x_index = MSMAX((size_t)(floorf(in_x)), (size_t)(0)); | ||||
| size_t right_x_index = MSMIN((size_t)(ceilf(in_x)), param->out_width_ - 1); | size_t right_x_index = MSMIN((size_t)(ceilf(in_x)), param->out_width_ - 1); | ||||
| float x_lerp = in_x - floorf(in_x); | |||||
| float inverse_x_lerp = 1.0 - x_lerp; | |||||
| const float x_lerp = in_x - floorf(in_x); | |||||
| const float inverse_x_lerp = 1.0 - x_lerp; | |||||
| size_t in_offset = h * (param->in_width_ * channel) + (w * channel) + c; | size_t in_offset = h * (param->in_width_ * channel) + (w * channel) + c; | ||||
| size_t out_offset_top_y_left_x = top_y_index * (param->out_width_ * channel) + (left_x_index * channel) + c; | size_t out_offset_top_y_left_x = top_y_index * (param->out_width_ * channel) + (left_x_index * channel) + c; | ||||
| @@ -18,6 +18,9 @@ | |||||
| int UnsortedSegmentSum(const float *input, int unit_num, int input_dim1, const int *indices, float *output, | int UnsortedSegmentSum(const float *input, int unit_num, int input_dim1, const int *indices, float *output, | ||||
| int output_dim0, int output_dim1) { | int output_dim0, int output_dim1) { | ||||
| if (input_dim1 == 0) { | |||||
| return NNACL_ERR; | |||||
| } | |||||
| for (int i = 0; i < unit_num; ++i) { | for (int i = 0; i < unit_num; ++i) { | ||||
| int j = i / input_dim1; | int j = i / input_dim1; | ||||
| int k = i % input_dim1; | int k = i % input_dim1; | ||||
| @@ -368,6 +368,9 @@ int FftInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **ou | |||||
| } | } | ||||
| int VectorCInit(VectorC *vc, size_t per_malloc_size) { | int VectorCInit(VectorC *vc, size_t per_malloc_size) { | ||||
| if (per_malloc_size == 0) { | |||||
| return NNACL_ERR; | |||||
| } | |||||
| vc->data_ = (int *)malloc(per_malloc_size * sizeof(int)); | vc->data_ = (int *)malloc(per_malloc_size * sizeof(int)); | ||||
| if (vc->data_ == NULL) { | if (vc->data_ == NULL) { | ||||
| return NNACL_ERR; | return NNACL_ERR; | ||||
| @@ -63,10 +63,10 @@ int PriorBoxInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC | |||||
| size_t min_sizes_size = param->min_sizes_size; | size_t min_sizes_size = param->min_sizes_size; | ||||
| size_t max_sizes_size = param->max_sizes_size; | size_t max_sizes_size = param->max_sizes_size; | ||||
| int32_t num_priors_box = min_sizes_size * different_aspect_ratios_size + max_sizes_size; | int32_t num_priors_box = min_sizes_size * different_aspect_ratios_size + max_sizes_size; | ||||
| int kPriorBoxPoints = 4; | |||||
| int kPriorBoxN = 1; | |||||
| int kPriorBoxW = 1; | |||||
| int kPriorBoxC = 2; | |||||
| const int kPriorBoxPoints = 4; | |||||
| const int kPriorBoxN = 1; | |||||
| const int kPriorBoxW = 1; | |||||
| const int kPriorBoxC = 2; | |||||
| int32_t h = GetHeight(input) * GetWidth(input) * num_priors_box * kPriorBoxPoints; | int32_t h = GetHeight(input) * GetWidth(input) * num_priors_box * kPriorBoxPoints; | ||||
| output->shape_size_ = 4; | output->shape_size_ = 4; | ||||
| @@ -68,7 +68,7 @@ int ReduceInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC * | |||||
| } | } | ||||
| bool keep_dims = param->keep_dims_; | bool keep_dims = param->keep_dims_; | ||||
| int out_shape[MAX_SHAPE_SIZE]; | int out_shape[MAX_SHAPE_SIZE]; | ||||
| size_t out_shape_size = 0; | |||||
| const size_t out_shape_size = 0; | |||||
| // get axes from input tensor | // get axes from input tensor | ||||
| const TensorC *axes_input = inputs[1]; | const TensorC *axes_input = inputs[1]; | ||||
| if (axes_input->shape_size_ == 1 && axes_input->shape_[0] == 0) { | if (axes_input->shape_size_ == 1 && axes_input->shape_[0] == 0) { | ||||
| @@ -75,6 +75,9 @@ int CalNewShape(const TensorC *in_tensor, int *out_shape, size_t out_shape_size) | |||||
| int CalShapeByType(const TensorC *const *inputs, size_t shape_size, int *out_shape, size_t *out_shape_size) { | int CalShapeByType(const TensorC *const *inputs, size_t shape_size, int *out_shape, size_t *out_shape_size) { | ||||
| const TensorC *shape_tensor = inputs[1]; | const TensorC *shape_tensor = inputs[1]; | ||||
| if (shape_size == 0) { | |||||
| return NNACL_ERR; | |||||
| } | |||||
| switch (shape_tensor->data_type_) { | switch (shape_tensor->data_type_) { | ||||
| case kNumberTypeInt8: { | case kNumberTypeInt8: { | ||||
| int8_t *data = (int8_t *)(shape_tensor->data_); | int8_t *data = (int8_t *)(shape_tensor->data_); | ||||
| @@ -59,8 +59,7 @@ int HandleAxesCheckNull(const TensorC *input_tensor, const TensorC *begin_tensor | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int HandleAxesInputExist(const TensorC *const *inputs, int *ndim_, int *in_shape_, int *begins_, int *strides_, | |||||
| int *ends_) { | |||||
| int HandleAxesInputExist(const TensorC *const *inputs, int *ndim, int *in_shape, int *begins, int *strides, int *ends) { | |||||
| const TensorC *input_tensor = inputs[0]; | const TensorC *input_tensor = inputs[0]; | ||||
| const TensorC *begin_tensor = inputs[1]; | const TensorC *begin_tensor = inputs[1]; | ||||
| int *begin_data = (int *)(begin_tensor->data_); | int *begin_data = (int *)(begin_tensor->data_); | ||||
| @@ -73,7 +72,7 @@ int HandleAxesInputExist(const TensorC *const *inputs, int *ndim_, int *in_shape | |||||
| } | } | ||||
| // when input contains axes, begins, ends, strides will be expand to the same length as input rank | // when input contains axes, begins, ends, strides will be expand to the same length as input rank | ||||
| *ndim_ = (int)(input_tensor->shape_size_); | |||||
| *ndim = (int)(input_tensor->shape_size_); | |||||
| int begin_ndim = GetElementNum(begin_tensor); | int begin_ndim = GetElementNum(begin_tensor); | ||||
| int *axes_data = NULL; | int *axes_data = NULL; | ||||
| @@ -111,20 +110,20 @@ int HandleAxesInputExist(const TensorC *const *inputs, int *ndim_, int *in_shape | |||||
| } | } | ||||
| for (int i = 0; i < begin_ndim; ++i) { | for (int i = 0; i < begin_ndim; ++i) { | ||||
| if (axes[i] < 0) { | if (axes[i] < 0) { | ||||
| axes[i] += *ndim_; | |||||
| axes[i] += *ndim; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| for (size_t i = 0; i < *ndim_; i++) { | |||||
| in_shape_[i] = 0; | |||||
| begins_[i] = 0; | |||||
| strides_[i] = 0; | |||||
| for (size_t i = 0; i < *ndim; i++) { | |||||
| in_shape[i] = 0; | |||||
| begins[i] = 0; | |||||
| strides[i] = 0; | |||||
| } | } | ||||
| for (size_t i = 0; i < *ndim_; ++i) { | |||||
| in_shape_[i] = input_tensor->shape_[i]; | |||||
| for (size_t i = 0; i < *ndim; ++i) { | |||||
| in_shape[i] = input_tensor->shape_[i]; | |||||
| } | } | ||||
| for (size_t i = 0; i < *ndim_; ++i) { | |||||
| for (size_t i = 0; i < *ndim; ++i) { | |||||
| int axes_it = 0; | int axes_it = 0; | ||||
| for (size_t j = 0; j < begin_ndim; j++) { | for (size_t j = 0; j < begin_ndim; j++) { | ||||
| if (axes[j] == i) { | if (axes[j] == i) { | ||||
| @@ -137,13 +136,13 @@ int HandleAxesInputExist(const TensorC *const *inputs, int *ndim_, int *in_shape | |||||
| if (axes_it != begin_ndim) { | if (axes_it != begin_ndim) { | ||||
| int axis = axes_it; | int axis = axes_it; | ||||
| // begins or ends exceed limit will be set to limit | // begins or ends exceed limit will be set to limit | ||||
| begins_[i] = imax(imin(begin_data[axis], input_tensor->shape_[i] - 1), -input_tensor->shape_[i]); | |||||
| ends_[i] = imax(imin(end_data[axis], input_tensor->shape_[i]), -input_tensor->shape_[i] - 1); | |||||
| strides_[i] = stride_data[axis]; | |||||
| begins[i] = imax(imin(begin_data[axis], input_tensor->shape_[i] - 1), -input_tensor->shape_[i]); | |||||
| ends[i] = imax(imin(end_data[axis], input_tensor->shape_[i]), -input_tensor->shape_[i] - 1); | |||||
| strides[i] = stride_data[axis]; | |||||
| } else { | } else { | ||||
| begins_[i] = 0; | |||||
| ends_[i] = input_tensor->shape_[i]; | |||||
| strides_[i] = 1; | |||||
| begins[i] = 0; | |||||
| ends[i] = input_tensor->shape_[i]; | |||||
| strides[i] = 1; | |||||
| } | } | ||||
| } | } | ||||
| return NNACL_OK; | return NNACL_OK; | ||||
| @@ -174,18 +173,18 @@ void Bit2Vector(StridedSliceTransferBuffer *transfer_buffer, StridedSliceParamet | |||||
| } | } | ||||
| } | } | ||||
| void ApplyNewAxisMask(StridedSliceTransferBuffer *transfer_buffer, StridedSliceParameter *param, int *in_shape_, | |||||
| size_t *in_shape_size) { | |||||
| void ApplyNewAxisMask(StridedSliceTransferBuffer *transfer_buffer, StridedSliceParameter *param, int *in_shape, | |||||
| size_t *out_shape_size) { | |||||
| for (size_t i = 0; i < transfer_buffer->new_axis_mask_size_; i++) { | for (size_t i = 0; i < transfer_buffer->new_axis_mask_size_; i++) { | ||||
| if (transfer_buffer->new_axis_mask_[i]) { | if (transfer_buffer->new_axis_mask_[i]) { | ||||
| transfer_buffer->ndim_ += 1; | transfer_buffer->ndim_ += 1; | ||||
| ShapeInsert(in_shape_, in_shape_size, i, 1); | |||||
| ShapeInsert(in_shape, out_shape_size, i, 1); | |||||
| transfer_buffer->begins_[i] = 0; | transfer_buffer->begins_[i] = 0; | ||||
| transfer_buffer->ends_[i] = 1; | transfer_buffer->ends_[i] = 1; | ||||
| transfer_buffer->strides_[i] = 1; | transfer_buffer->strides_[i] = 1; | ||||
| ShapePush(transfer_buffer->begins_, &transfer_buffer->begins_size_, 0); | ShapePush(transfer_buffer->begins_, &transfer_buffer->begins_size_, 0); | ||||
| ShapePush(transfer_buffer->ends_, &transfer_buffer->ends_size_, in_shape_[transfer_buffer->ndim_ - 1]); | |||||
| ShapePush(transfer_buffer->ends_, &transfer_buffer->ends_size_, in_shape[transfer_buffer->ndim_ - 1]); | |||||
| ShapePush(transfer_buffer->strides_, &transfer_buffer->strides_size_, 1); | ShapePush(transfer_buffer->strides_, &transfer_buffer->strides_size_, 1); | ||||
| transfer_buffer->begins_mask_[i] = false; | transfer_buffer->begins_mask_[i] = false; | ||||
| @@ -204,33 +203,45 @@ void ApplyBeginMask(StridedSliceTransferBuffer *transfer_buffer) { | |||||
| } | } | ||||
| } | } | ||||
| void ApplyEndMask(StridedSliceTransferBuffer *transfer_buffer, int *in_shape_) { | |||||
| int ApplyEndMask(StridedSliceTransferBuffer *transfer_buffer, const int *in_shape, size_t in_shape_size) { | |||||
| for (int i = 0; i < transfer_buffer->ndim_; i++) { | for (int i = 0; i < transfer_buffer->ndim_; i++) { | ||||
| if (transfer_buffer->ends_mask_[i]) { | if (transfer_buffer->ends_mask_[i]) { | ||||
| transfer_buffer->ends_[i] = in_shape_[i]; | |||||
| if (i >= in_shape_size) { | |||||
| return NNACL_ERR; | |||||
| } | |||||
| transfer_buffer->ends_[i] = in_shape[i]; | |||||
| } | } | ||||
| } | } | ||||
| return NNACL_OK; | |||||
| } | } | ||||
| void ApplyEllipsisMask(StridedSliceTransferBuffer *transfer_buffer, int *in_shape_) { | |||||
| int ApplyEllipsisMask(StridedSliceTransferBuffer *transfer_buffer, const int *in_shape, size_t in_shape_size) { | |||||
| for (size_t i = 0; i < transfer_buffer->ellipsis_mask_size_; i++) { | for (size_t i = 0; i < transfer_buffer->ellipsis_mask_size_; i++) { | ||||
| if (transfer_buffer->ellipsis_mask_[i]) { | if (transfer_buffer->ellipsis_mask_[i]) { | ||||
| if (i >= in_shape_size) { | |||||
| return NNACL_ERR; | |||||
| } | |||||
| transfer_buffer->begins_[i] = 0; | transfer_buffer->begins_[i] = 0; | ||||
| transfer_buffer->ends_[i] = in_shape_[i]; | |||||
| transfer_buffer->ends_[i] = in_shape[i]; | |||||
| break; | break; | ||||
| } | } | ||||
| } | } | ||||
| return NNACL_OK; | |||||
| } | } | ||||
| void TransIndexToPositive(StridedSliceTransferBuffer *transfer_buffer, int *in_shape_) { | |||||
| for (int i = 0; i < (int)(transfer_buffer->begins_size_); ++i) { | |||||
| int TransIndexToPositive(StridedSliceTransferBuffer *transfer_buffer, const int *in_shape, size_t in_shape_size) { | |||||
| for (size_t i = 0; i < transfer_buffer->begins_size_; i++) { | |||||
| if (i >= in_shape_size) { | |||||
| return NNACL_ERR; | |||||
| } | |||||
| if (transfer_buffer->begins_[i] < 0) { | if (transfer_buffer->begins_[i] < 0) { | ||||
| transfer_buffer->begins_[i] += in_shape_[i]; | |||||
| transfer_buffer->begins_[i] += in_shape[i]; | |||||
| } | } | ||||
| if (transfer_buffer->ends_[i] < 0) { | if (transfer_buffer->ends_[i] < 0) { | ||||
| transfer_buffer->ends_[i] += in_shape_[i]; | |||||
| transfer_buffer->ends_[i] += in_shape[i]; | |||||
| } | } | ||||
| } | } | ||||
| return NNACL_OK; | |||||
| } | } | ||||
| void ApplyShrinkMask(StridedSliceTransferBuffer *transfer_buffer, int *output_shape, size_t *output_shape_size) { | void ApplyShrinkMask(StridedSliceTransferBuffer *transfer_buffer, int *output_shape, size_t *output_shape_size) { | ||||
| @@ -251,20 +262,25 @@ void ApplyShrinkMask(StridedSliceTransferBuffer *transfer_buffer, int *output_sh | |||||
| } | } | ||||
| } | } | ||||
| void TransferBuffer2Param(StridedSliceTransferBuffer *transfer_buffer, StridedSliceParameter *param, int *in_shape_) { | |||||
| int TransferBuffer2Param(StridedSliceTransferBuffer *transfer_buffer, StridedSliceParameter *param, const int *in_shape, | |||||
| size_t in_shape_size) { | |||||
| if (transfer_buffer->ndim_ >= in_shape_size || param->in_shape_length_ >= in_shape_size) { | |||||
| return NNACL_ERR; | |||||
| } | |||||
| for (int i = 0; i < transfer_buffer->ndim_; i++) { | for (int i = 0; i < transfer_buffer->ndim_; i++) { | ||||
| param->begins_[i] = transfer_buffer->begins_[i]; | param->begins_[i] = transfer_buffer->begins_[i]; | ||||
| param->ends_[i] = transfer_buffer->ends_[i]; | param->ends_[i] = transfer_buffer->ends_[i]; | ||||
| param->in_shape_[i] = in_shape_[i]; | |||||
| param->in_shape_[i] = in_shape[i]; | |||||
| param->strides_[i] = transfer_buffer->strides_[i]; | param->strides_[i] = transfer_buffer->strides_[i]; | ||||
| } | } | ||||
| for (int i = transfer_buffer->ndim_; i < param->in_shape_length_; i++) { | for (int i = transfer_buffer->ndim_; i < param->in_shape_length_; i++) { | ||||
| param->begins_[i] = 0; | param->begins_[i] = 0; | ||||
| param->ends_[i] = in_shape_[i]; | |||||
| param->in_shape_[i] = in_shape_[i]; | |||||
| param->ends_[i] = in_shape[i]; | |||||
| param->in_shape_[i] = in_shape[i]; | |||||
| param->strides_[i] = 1; | param->strides_[i] = 1; | ||||
| } | } | ||||
| return NNACL_OK; | |||||
| } | } | ||||
| void InitStridedSliceTransferBuffer(StridedSliceTransferBuffer *transfer_buffer) { | void InitStridedSliceTransferBuffer(StridedSliceTransferBuffer *transfer_buffer) { | ||||
| @@ -302,9 +318,9 @@ int StridedSliceInferShape(const TensorC *const *inputs, size_t inputs_size, Ten | |||||
| return NNACL_INFER_INVALID; | return NNACL_INFER_INVALID; | ||||
| } | } | ||||
| int in_shape_[MAX_SHAPE_SIZE]; | |||||
| int in_shape[MAX_SHAPE_SIZE]; | |||||
| size_t in_shape_size = 0; | size_t in_shape_size = 0; | ||||
| ShapeSet(in_shape_, &in_shape_size, input->shape_, input->shape_size_); | |||||
| ShapeSet(in_shape, &in_shape_size, input->shape_, input->shape_size_); | |||||
| StridedSliceTransferBuffer transfer_buffer; | StridedSliceTransferBuffer transfer_buffer; | ||||
| InitStridedSliceTransferBuffer(&transfer_buffer); | InitStridedSliceTransferBuffer(&transfer_buffer); | ||||
| @@ -345,7 +361,7 @@ int StridedSliceInferShape(const TensorC *const *inputs, size_t inputs_size, Ten | |||||
| } | } | ||||
| if (inputs_size == 5) { | if (inputs_size == 5) { | ||||
| int ret = HandleAxesInputExist(inputs, &transfer_buffer.ndim_, in_shape_, transfer_buffer.begins_, | |||||
| int ret = HandleAxesInputExist(inputs, &transfer_buffer.ndim_, in_shape, transfer_buffer.begins_, | |||||
| transfer_buffer.strides_, transfer_buffer.ends_); | transfer_buffer.strides_, transfer_buffer.ends_); | ||||
| if (ret != NNACL_OK) { | if (ret != NNACL_OK) { | ||||
| return ret; | return ret; | ||||
| @@ -355,15 +371,24 @@ int StridedSliceInferShape(const TensorC *const *inputs, size_t inputs_size, Ten | |||||
| // set all mask to original input shape | // set all mask to original input shape | ||||
| SetMaskSize(&transfer_buffer); | SetMaskSize(&transfer_buffer); | ||||
| Bit2Vector(&transfer_buffer, param); | Bit2Vector(&transfer_buffer, param); | ||||
| ApplyNewAxisMask(&transfer_buffer, param, in_shape_, &in_shape_size); | |||||
| ApplyNewAxisMask(&transfer_buffer, param, in_shape, &in_shape_size); | |||||
| ApplyBeginMask(&transfer_buffer); | ApplyBeginMask(&transfer_buffer); | ||||
| ApplyEndMask(&transfer_buffer, in_shape_); | |||||
| ApplyEllipsisMask(&transfer_buffer, in_shape_); | |||||
| int ret = ApplyEndMask(&transfer_buffer, in_shape, MAX_SHAPE_SIZE); | |||||
| if (ret != NNACL_OK) { | |||||
| return ret; | |||||
| } | |||||
| ret = ApplyEllipsisMask(&transfer_buffer, in_shape, MAX_SHAPE_SIZE); | |||||
| if (ret != NNACL_OK) { | |||||
| return ret; | |||||
| } | |||||
| int output_shape[MAX_SHAPE_SIZE]; | int output_shape[MAX_SHAPE_SIZE]; | ||||
| size_t output_shape_size = 0; | size_t output_shape_size = 0; | ||||
| ShapeSet(output_shape, &output_shape_size, in_shape_, in_shape_size); | |||||
| TransIndexToPositive(&transfer_buffer, in_shape_); | |||||
| ShapeSet(output_shape, &output_shape_size, in_shape, in_shape_size); | |||||
| ret = TransIndexToPositive(&transfer_buffer, in_shape, MAX_SHAPE_SIZE); | |||||
| if (ret != NNACL_OK) { | |||||
| return ret; | |||||
| } | |||||
| for (int i = 0; i < transfer_buffer.ndim_; i++) { | for (int i = 0; i < transfer_buffer.ndim_; i++) { | ||||
| if (transfer_buffer.strides_[i] == 0) { | if (transfer_buffer.strides_[i] == 0) { | ||||
| return NNACL_ERR; | return NNACL_ERR; | ||||
| @@ -374,8 +399,10 @@ int StridedSliceInferShape(const TensorC *const *inputs, size_t inputs_size, Ten | |||||
| } | } | ||||
| ApplyShrinkMask(&transfer_buffer, output_shape, &output_shape_size); | ApplyShrinkMask(&transfer_buffer, output_shape, &output_shape_size); | ||||
| SetShapeArray(outputs[0], output_shape, output_shape_size); | SetShapeArray(outputs[0], output_shape, output_shape_size); | ||||
| TransferBuffer2Param(&transfer_buffer, param, in_shape_); | |||||
| ret = TransferBuffer2Param(&transfer_buffer, param, in_shape, MAX_SHAPE_SIZE); | |||||
| if (ret != NNACL_OK) { | |||||
| return ret; | |||||
| } | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| @@ -31,6 +31,7 @@ OpParameter *PopulateSoftmaxParameter(const void *prim) { | |||||
| auto prim_softmax = primitive->value_as_Softmax(); | auto prim_softmax = primitive->value_as_Softmax(); | ||||
| if (prim_softmax->axis()->size() != 1) { | if (prim_softmax->axis()->size() != 1) { | ||||
| MS_LOG(ERROR) << "axis number invalid!number: " << prim_softmax->axis()->size(); | MS_LOG(ERROR) << "axis number invalid!number: " << prim_softmax->axis()->size(); | ||||
| free(softmax_param); | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| softmax_param->axis_ = prim_softmax->axis()->data()[0]; | softmax_param->axis_ = prim_softmax->axis()->data()[0]; | ||||
| @@ -34,6 +34,7 @@ OpParameter *PopulateSplitParameter(const void *prim) { | |||||
| split_param->num_split_ = split_prim->numberSplit(); | split_param->num_split_ = split_prim->numberSplit(); | ||||
| if (split_param->num_split_ > std::numeric_limits<int>::max() / static_cast<int>(sizeof(int))) { | if (split_param->num_split_ > std::numeric_limits<int>::max() / static_cast<int>(sizeof(int))) { | ||||
| MS_LOG(ERROR) << "The value of split_param->num_split_ is too big"; | MS_LOG(ERROR) << "The value of split_param->num_split_ is too big"; | ||||
| free(split_param); | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| int *split_sizes = reinterpret_cast<int *>(malloc(split_param->num_split_ * sizeof(int))); | int *split_sizes = reinterpret_cast<int *>(malloc(split_param->num_split_ * sizeof(int))); | ||||
| @@ -51,7 +51,7 @@ inline ConvParameter *CreateNewConvParameter(ConvParameter *parameter) { | |||||
| inline void FreeMemory(ConvParameter *conv_param, const std::vector<lite::Tensor *> &new_inputs, | inline void FreeMemory(ConvParameter *conv_param, const std::vector<lite::Tensor *> &new_inputs, | ||||
| const std::vector<lite::Tensor *> &new_outputs) { | const std::vector<lite::Tensor *> &new_outputs) { | ||||
| if (conv_param) { | |||||
| if (conv_param != nullptr) { | |||||
| free(conv_param); | free(conv_param); | ||||
| } | } | ||||
| for (auto &in_tensor : new_inputs) { | for (auto &in_tensor : new_inputs) { | ||||
| @@ -146,7 +146,7 @@ kernel::LiteKernel *ConvolutionDelegateCPUKernel::CpuConvFp32KernelSelect() { | |||||
| } | } | ||||
| } | } | ||||
| if (kernel) { | |||||
| if (kernel != nullptr) { | |||||
| auto ret = kernel->Init(); | auto ret = kernel->Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "conv kernel init failed."; | MS_LOG(ERROR) << "conv kernel init failed."; | ||||
| @@ -112,12 +112,6 @@ kernel::LiteKernel *NpuConvKernelCreator(const std::vector<lite::Tensor *> &inpu | |||||
| const lite::InnerContext *ctx, const kernel::KernelKey &desc) { | const lite::InnerContext *ctx, const kernel::KernelKey &desc) { | ||||
| MS_ASSERT(op_parameter != nullptr); | MS_ASSERT(op_parameter != nullptr); | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DFusion); | MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DFusion); | ||||
| if (inputs[0]->Size() > NPU_MEMORY_MAX) { | |||||
| MS_LOG(ERROR) << "Npu does not support input tensor size greater than 200MB"; | |||||
| free(op_parameter); | |||||
| return nullptr; | |||||
| } | |||||
| auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter); | auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter); | ||||
| kernel::NPUKernel *kernel = nullptr; | kernel::NPUKernel *kernel = nullptr; | ||||
| if (conv_param->group_ == 1) { | if (conv_param->group_ == 1) { | ||||
| @@ -27,7 +27,6 @@ using mindspore::kernel::LiteKernel; | |||||
| using mindspore::lite::RET_ERROR; | using mindspore::lite::RET_ERROR; | ||||
| using mindspore::lite::RET_OK; | using mindspore::lite::RET_OK; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| #define NPU_MEMORY_MAX 200 * 1024 * 1024 | |||||
| class NPUKernel : public LiteKernel { | class NPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| NPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | NPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ||||
| @@ -63,11 +62,6 @@ kernel::LiteKernel *NPUKernelCreator(const std::vector<lite::Tensor *> &inputs, | |||||
| free(op_parameter); | free(op_parameter); | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| if (inputs[0]->Size() > NPU_MEMORY_MAX) { | |||||
| MS_LOG(ERROR) << "Npu does not support input tensor size greater than 200MB"; | |||||
| free(op_parameter); | |||||
| return nullptr; | |||||
| } | |||||
| auto *kernel = new (std::nothrow) T(op_parameter, inputs, outputs, ctx); | auto *kernel = new (std::nothrow) T(op_parameter, inputs, outputs, ctx); | ||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "kernel " << op_parameter->name_ << "is nullptr."; | MS_LOG(ERROR) << "kernel " << op_parameter->name_ << "is nullptr."; | ||||