From: @gongdaguo Reviewed-by: Signed-off-by:tags/v1.2.0-rc1
| @@ -42,15 +42,17 @@ int ArgCompareDescFp16(const void *a, const void *b) { | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| void ArgMaxTopK1Fp16(const float16_t *input, float16_t *output, float16_t *output_value, | |||||
| const ArgMinMaxParameter *param, int pre_axis_count, int axis_count, int after_axis_count) { | |||||
| void ArgMaxTopK1Fp16(const float16_t *input, void *output, float16_t *output_value, const ArgMinMaxParameter *param, | |||||
| int pre_axis_count, int axis_count, int after_axis_count) { | |||||
| bool out_value = param->out_value_; | bool out_value = param->out_value_; | ||||
| float16_t *outputfp16 = (float16_t *)output; | |||||
| int *outputint = (int *)output; | |||||
| for (int i = 0; i < pre_axis_count; ++i) { | for (int i = 0; i < pre_axis_count; ++i) { | ||||
| size_t output_offset = i * after_axis_count; | size_t output_offset = i * after_axis_count; | ||||
| size_t input_offset = output_offset * axis_count; | size_t input_offset = output_offset * axis_count; | ||||
| for (int j = 0; j < after_axis_count; ++j) { | for (int j = 0; j < after_axis_count; ++j) { | ||||
| float16_t value = -FLT_MAX; | float16_t value = -FLT_MAX; | ||||
| float16_t index = 0.0f; | |||||
| int index = 0; | |||||
| for (int k = 0; k < axis_count; ++k) { | for (int k = 0; k < axis_count; ++k) { | ||||
| float16_t value_tmp = input[input_offset + k * after_axis_count + j]; | float16_t value_tmp = input[input_offset + k * after_axis_count + j]; | ||||
| if (value_tmp > value) { | if (value_tmp > value) { | ||||
| @@ -58,7 +60,11 @@ void ArgMaxTopK1Fp16(const float16_t *input, float16_t *output, float16_t *outpu | |||||
| index = k; | index = k; | ||||
| } | } | ||||
| } | } | ||||
| output[output_offset + j] = out_value ? value : index; | |||||
| if (out_value) { | |||||
| outputfp16[output_offset + j] = value; | |||||
| } else { | |||||
| outputint[output_offset + j] = index; | |||||
| } | |||||
| if (output_value != NULL) { | if (output_value != NULL) { | ||||
| output_value[output_offset + j] = value; | output_value[output_offset + j] = value; | ||||
| } | } | ||||
| @@ -66,15 +72,17 @@ void ArgMaxTopK1Fp16(const float16_t *input, float16_t *output, float16_t *outpu | |||||
| } | } | ||||
| } | } | ||||
| void ArgMinTopK1Fp16(const float16_t *input, float16_t *output, float16_t *output_value, | |||||
| const ArgMinMaxParameter *param, int pre_axis_count, int axis_count, int after_axis_count) { | |||||
| void ArgMinTopK1Fp16(const float16_t *input, void *output, float16_t *output_value, const ArgMinMaxParameter *param, | |||||
| int pre_axis_count, int axis_count, int after_axis_count) { | |||||
| bool out_value = param->out_value_; | bool out_value = param->out_value_; | ||||
| float16_t *outputfp16 = (float16_t *)output; | |||||
| int *outputint = (int *)output; | |||||
| for (int i = 0; i < pre_axis_count; ++i) { | for (int i = 0; i < pre_axis_count; ++i) { | ||||
| size_t output_offset = i * after_axis_count; | size_t output_offset = i * after_axis_count; | ||||
| size_t input_offset = output_offset * axis_count; | size_t input_offset = output_offset * axis_count; | ||||
| for (int j = 0; j < after_axis_count; ++j) { | for (int j = 0; j < after_axis_count; ++j) { | ||||
| float16_t value = FLT_MAX; | float16_t value = FLT_MAX; | ||||
| float16_t index = 0.0f; | |||||
| int index = 0; | |||||
| for (int k = 0; k < axis_count; ++k) { | for (int k = 0; k < axis_count; ++k) { | ||||
| float16_t value_tmp = input[input_offset + k * after_axis_count + j]; | float16_t value_tmp = input[input_offset + k * after_axis_count + j]; | ||||
| if (value_tmp < value) { | if (value_tmp < value) { | ||||
| @@ -82,7 +90,11 @@ void ArgMinTopK1Fp16(const float16_t *input, float16_t *output, float16_t *outpu | |||||
| index = k; | index = k; | ||||
| } | } | ||||
| } | } | ||||
| output[output_offset + j] = out_value ? value : index; | |||||
| if (out_value) { | |||||
| outputfp16[output_offset + j] = value; | |||||
| } else { | |||||
| outputint[output_offset + j] = index; | |||||
| } | |||||
| if (output_value != NULL) { | if (output_value != NULL) { | ||||
| output_value[output_offset + j] = value; | output_value[output_offset + j] = value; | ||||
| } | } | ||||
| @@ -90,29 +102,37 @@ void ArgMinTopK1Fp16(const float16_t *input, float16_t *output, float16_t *outpu | |||||
| } | } | ||||
| } | } | ||||
| void ArgMinMaxDim0Fp16(const float16_t *input, float16_t *output, float16_t *output_value, const int *in_shape, | |||||
| void ArgMinMaxDim0Fp16(const float16_t *input, void *output, float16_t *output_value, const int *in_shape, | |||||
| const ArgMinMaxParameter *param, COMPARE_FUNCTION compare_func) { | const ArgMinMaxParameter *param, COMPARE_FUNCTION compare_func) { | ||||
| float16_t *outputfp16 = (float16_t *)output; | |||||
| int *outputint = (int *)output; | |||||
| for (int32_t i = 0; i < param->in_strides_[0]; ++i) { | for (int32_t i = 0; i < param->in_strides_[0]; ++i) { | ||||
| for (int j = 0; j < in_shape[0]; ++j) { | for (int j = 0; j < in_shape[0]; ++j) { | ||||
| size_t offset = param->in_strides_[0] * j + i; | size_t offset = param->in_strides_[0] * j + i; | ||||
| param->arg_elements_[j].index_ = j; | param->arg_elements_[j].index_ = j; | ||||
| param->arg_elements_[j].data_.f_data_ = input[offset]; | |||||
| param->arg_elements_[j].data_.f16_data_ = input[offset]; | |||||
| } | } | ||||
| qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), *compare_func); | qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), *compare_func); | ||||
| for (int j = 0; j < param->topk_; ++j) { | for (int j = 0; j < param->topk_; ++j) { | ||||
| size_t out_offset = j * param->out_strides_[0] + i; | size_t out_offset = j * param->out_strides_[0] + i; | ||||
| output[out_offset] = param->out_value_ ? param->arg_elements_[j].data_.f_data_ : param->arg_elements_[j].index_; | |||||
| if (param->out_value_) { | |||||
| outputfp16[out_offset] = param->arg_elements_[j].data_.f16_data_; | |||||
| } else { | |||||
| outputint[out_offset] = param->arg_elements_[j].index_; | |||||
| } | |||||
| if (output_value != NULL) { | if (output_value != NULL) { | ||||
| output_value[out_offset] = param->arg_elements_[j].data_.f_data_; | |||||
| output_value[out_offset] = param->arg_elements_[j].data_.f16_data_; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| return; | return; | ||||
| } | } | ||||
| void ArgMinMaxDim1Fp16(const float16_t *input, float16_t *output, float16_t *output_value, const int *in_shape, | |||||
| void ArgMinMaxDim1Fp16(const float16_t *input, void *output, float16_t *output_value, const int *in_shape, | |||||
| const ArgMinMaxParameter *param, COMPARE_FUNCTION compare_func) { | const ArgMinMaxParameter *param, COMPARE_FUNCTION compare_func) { | ||||
| int in_shape1 = in_shape[1]; | int in_shape1 = in_shape[1]; | ||||
| float16_t *outputfp16 = (float16_t *)output; | |||||
| int *outputint = (int *)output; | |||||
| for (int i = 0; i < in_shape[0]; ++i) { | for (int i = 0; i < in_shape[0]; ++i) { | ||||
| size_t in_dim0_offset = i * param->in_strides_[0]; | size_t in_dim0_offset = i * param->in_strides_[0]; | ||||
| size_t out_dim0_offset = i * param->out_strides_[0]; | size_t out_dim0_offset = i * param->out_strides_[0]; | ||||
| @@ -120,14 +140,18 @@ void ArgMinMaxDim1Fp16(const float16_t *input, float16_t *output, float16_t *out | |||||
| for (int k = 0; k < in_shape1; ++k) { | for (int k = 0; k < in_shape1; ++k) { | ||||
| size_t offset = param->in_strides_[1] * k + in_dim0_offset + j; | size_t offset = param->in_strides_[1] * k + in_dim0_offset + j; | ||||
| param->arg_elements_[k].index_ = k; | param->arg_elements_[k].index_ = k; | ||||
| param->arg_elements_[k].data_.f_data_ = input[offset]; | |||||
| param->arg_elements_[k].data_.f16_data_ = input[offset]; | |||||
| } | } | ||||
| qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), *compare_func); | qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), *compare_func); | ||||
| for (int k = 0; k < param->topk_; ++k) { | for (int k = 0; k < param->topk_; ++k) { | ||||
| size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1]; | size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1]; | ||||
| output[out_offset] = param->out_value_ ? param->arg_elements_[k].data_.f_data_ : param->arg_elements_[k].index_; | |||||
| if (param->out_value_) { | |||||
| outputfp16[out_offset] = param->arg_elements_[k].data_.f16_data_; | |||||
| } else { | |||||
| outputint[out_offset] = param->arg_elements_[k].index_; | |||||
| } | |||||
| if (output_value != NULL) { | if (output_value != NULL) { | ||||
| output_value[out_offset] = param->arg_elements_[k].data_.f_data_; | |||||
| output_value[out_offset] = param->arg_elements_[k].data_.f16_data_; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -139,6 +163,8 @@ void ArgMinMaxDim2Fp16(const float16_t *input, float16_t *output, float16_t *out | |||||
| const ArgMinMaxParameter *param, COMPARE_FUNCTION compare_func) { | const ArgMinMaxParameter *param, COMPARE_FUNCTION compare_func) { | ||||
| int in_shape1 = in_shape[1]; | int in_shape1 = in_shape[1]; | ||||
| int in_shape2 = in_shape[2]; | int in_shape2 = in_shape[2]; | ||||
| float *outputfp16 = (float *)output; | |||||
| int *outputint = (int *)output; | |||||
| for (int i = 0; i < in_shape[0]; ++i) { | for (int i = 0; i < in_shape[0]; ++i) { | ||||
| size_t in_dim0_offset = i * param->in_strides_[0]; | size_t in_dim0_offset = i * param->in_strides_[0]; | ||||
| size_t out_dim0_offset = i * param->out_strides_[0]; | size_t out_dim0_offset = i * param->out_strides_[0]; | ||||
| @@ -149,16 +175,18 @@ void ArgMinMaxDim2Fp16(const float16_t *input, float16_t *output, float16_t *out | |||||
| for (int l = 0; l < in_shape2; ++l) { | for (int l = 0; l < in_shape2; ++l) { | ||||
| size_t offset = param->in_strides_[2] * l + k + in_dim1_offset; | size_t offset = param->in_strides_[2] * l + k + in_dim1_offset; | ||||
| param->arg_elements_[l].index_ = l; | param->arg_elements_[l].index_ = l; | ||||
| param->arg_elements_[l].data_.f_data_ = input[offset]; | |||||
| param->arg_elements_[l].data_.f16_data_ = input[offset]; | |||||
| } | } | ||||
| qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), *compare_func); | qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), *compare_func); | ||||
| for (int l = 0; l < param->topk_; ++l) { | for (int l = 0; l < param->topk_; ++l) { | ||||
| size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2]; | size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2]; | ||||
| output[out_offset] = | |||||
| param->out_value_ ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_; | |||||
| if (param->out_value_) { | |||||
| outputfp16[out_offset] = param->arg_elements_[l].data_.f16_data_; | |||||
| } else { | |||||
| outputint[out_offset] = param->arg_elements_[l].index_; | |||||
| } | |||||
| if (output_value != NULL) { | if (output_value != NULL) { | ||||
| output_value[out_offset] = param->arg_elements_[l].data_.f_data_; | |||||
| output_value[out_offset] = param->arg_elements_[l].data_.f16_data_; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -171,6 +199,8 @@ void ArgMinMaxDim3Fp16(const float16_t *input, float16_t *output, float16_t *out | |||||
| int in_shape1 = in_shape[1]; | int in_shape1 = in_shape[1]; | ||||
| int in_shape2 = in_shape[2]; | int in_shape2 = in_shape[2]; | ||||
| int in_shape3 = in_shape[3]; | int in_shape3 = in_shape[3]; | ||||
| float *outputfp16 = (float *)output; | |||||
| int *outputint = (int *)output; | |||||
| for (int i = 0; i < in_shape[0]; ++i) { | for (int i = 0; i < in_shape[0]; ++i) { | ||||
| size_t in_dim0_offset = i * param->in_strides_[0]; | size_t in_dim0_offset = i * param->in_strides_[0]; | ||||
| size_t out_dim0_offset = i * param->out_strides_[0]; | size_t out_dim0_offset = i * param->out_strides_[0]; | ||||
| @@ -183,15 +213,18 @@ void ArgMinMaxDim3Fp16(const float16_t *input, float16_t *output, float16_t *out | |||||
| for (int l = 0; l < in_shape3; ++l) { | for (int l = 0; l < in_shape3; ++l) { | ||||
| size_t offset = l + in_dim2_offset; | size_t offset = l + in_dim2_offset; | ||||
| param->arg_elements_[l].index_ = l; | param->arg_elements_[l].index_ = l; | ||||
| param->arg_elements_[l].data_.f_data_ = input[offset]; | |||||
| param->arg_elements_[l].data_.f16_data_ = input[offset]; | |||||
| } | } | ||||
| qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), *compare_func); | qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), *compare_func); | ||||
| for (int l = 0; l < param->topk_; ++l) { | for (int l = 0; l < param->topk_; ++l) { | ||||
| size_t out_offset = out_dim2_offset + l; | size_t out_offset = out_dim2_offset + l; | ||||
| output[out_offset] = | |||||
| param->out_value_ ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_; | |||||
| if (param->out_value_) { | |||||
| outputfp16[out_offset] = param->arg_elements_[l].data_.f16_data_; | |||||
| } else { | |||||
| outputint[out_offset] = param->arg_elements_[l].index_; | |||||
| } | |||||
| if (output_value != NULL) { | if (output_value != NULL) { | ||||
| output_value[out_offset] = param->arg_elements_[l].data_.f_data_; | |||||
| output_value[out_offset] = param->arg_elements_[l].data_.f16_data_; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -199,7 +232,7 @@ void ArgMinMaxDim3Fp16(const float16_t *input, float16_t *output, float16_t *out | |||||
| } | } | ||||
| } | } | ||||
| void ArgMinMaxFp16(const float16_t *input, float16_t *output, float16_t *output_value, const int *in_shape, | |||||
| void ArgMinMaxFp16(const float16_t *input, void *output, float16_t *output_value, const int *in_shape, | |||||
| const ArgMinMaxParameter *param) { | const ArgMinMaxParameter *param) { | ||||
| if (param->topk_ == 1) { | if (param->topk_ == 1) { | ||||
| int pre_axis_count = 1; | int pre_axis_count = 1; | ||||
| @@ -23,7 +23,7 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void ArgMinMaxFp16(const float16_t *input, float16_t *output, float16_t *output_value, const int *in_shape, | |||||
| void ArgMinMaxFp16(const float16_t *input, void *output, float16_t *output_value, const int *in_shape, | |||||
| const ArgMinMaxParameter *param); | const ArgMinMaxParameter *param); | ||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| @@ -43,15 +43,17 @@ int ArgCompareDescFp32(const void *a, const void *b) { | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| void ArgMaxTopK1(const float *input, float *output, float *output_value, const ArgMinMaxParameter *param, | |||||
| void ArgMaxTopK1(const float *input, void *output, float *output_value, const ArgMinMaxParameter *param, | |||||
| int pre_axis_count, int axis_count, int after_axis_count) { | int pre_axis_count, int axis_count, int after_axis_count) { | ||||
| bool out_value = param->out_value_; | bool out_value = param->out_value_; | ||||
| float *outputfp32 = (float *)output; | |||||
| int *outputint = (int *)output; | |||||
| for (int i = 0; i < pre_axis_count; ++i) { | for (int i = 0; i < pre_axis_count; ++i) { | ||||
| size_t output_offset = i * after_axis_count; | size_t output_offset = i * after_axis_count; | ||||
| size_t input_offset = output_offset * axis_count; | size_t input_offset = output_offset * axis_count; | ||||
| for (int j = 0; j < after_axis_count; ++j) { | for (int j = 0; j < after_axis_count; ++j) { | ||||
| float value = -FLT_MAX; | float value = -FLT_MAX; | ||||
| float index = 0.0f; | |||||
| int index = 0; | |||||
| for (int k = 0; k < axis_count; ++k) { | for (int k = 0; k < axis_count; ++k) { | ||||
| float value_tmp = input[input_offset + k * after_axis_count + j]; | float value_tmp = input[input_offset + k * after_axis_count + j]; | ||||
| if (value_tmp > value) { | if (value_tmp > value) { | ||||
| @@ -59,7 +61,11 @@ void ArgMaxTopK1(const float *input, float *output, float *output_value, const A | |||||
| index = k; | index = k; | ||||
| } | } | ||||
| } | } | ||||
| output[output_offset + j] = out_value ? value : index; | |||||
| if (out_value) { | |||||
| outputfp32[output_offset + j] = value; | |||||
| } else { | |||||
| outputint[output_offset + j] = index; | |||||
| } | |||||
| if (output_value != NULL) { | if (output_value != NULL) { | ||||
| output_value[output_offset + j] = value; | output_value[output_offset + j] = value; | ||||
| } | } | ||||
| @@ -67,15 +73,17 @@ void ArgMaxTopK1(const float *input, float *output, float *output_value, const A | |||||
| } | } | ||||
| } | } | ||||
| void ArgMinTopK1(const float *input, float *output, float *output_value, const ArgMinMaxParameter *param, | |||||
| void ArgMinTopK1(const float *input, void *output, float *output_value, const ArgMinMaxParameter *param, | |||||
| int pre_axis_count, int axis_count, int after_axis_count) { | int pre_axis_count, int axis_count, int after_axis_count) { | ||||
| bool out_value = param->out_value_; | bool out_value = param->out_value_; | ||||
| float *outputfp32 = (float *)output; | |||||
| int *outputint = (int *)output; | |||||
| for (int i = 0; i < pre_axis_count; ++i) { | for (int i = 0; i < pre_axis_count; ++i) { | ||||
| size_t output_offset = i * after_axis_count; | size_t output_offset = i * after_axis_count; | ||||
| size_t input_offset = output_offset * axis_count; | size_t input_offset = output_offset * axis_count; | ||||
| for (int j = 0; j < after_axis_count; ++j) { | for (int j = 0; j < after_axis_count; ++j) { | ||||
| float value = FLT_MAX; | float value = FLT_MAX; | ||||
| float index = 0.0f; | |||||
| int index = 0; | |||||
| for (int k = 0; k < axis_count; ++k) { | for (int k = 0; k < axis_count; ++k) { | ||||
| float value_tmp = input[input_offset + k * after_axis_count + j]; | float value_tmp = input[input_offset + k * after_axis_count + j]; | ||||
| if (value_tmp < value) { | if (value_tmp < value) { | ||||
| @@ -83,7 +91,11 @@ void ArgMinTopK1(const float *input, float *output, float *output_value, const A | |||||
| index = k; | index = k; | ||||
| } | } | ||||
| } | } | ||||
| output[output_offset + j] = out_value ? value : index; | |||||
| if (out_value) { | |||||
| outputfp32[output_offset + j] = value; | |||||
| } else { | |||||
| outputint[output_offset + j] = index; | |||||
| } | |||||
| if (output_value != NULL) { | if (output_value != NULL) { | ||||
| output_value[output_offset + j] = value; | output_value[output_offset + j] = value; | ||||
| } | } | ||||
| @@ -91,8 +103,10 @@ void ArgMinTopK1(const float *input, float *output, float *output_value, const A | |||||
| } | } | ||||
| } | } | ||||
| void ArgMinMaxDim0(const float *input, float *output, float *output_value, const int *in_shape, | |||||
| void ArgMinMaxDim0(const float *input, void *output, float *output_value, const int *in_shape, | |||||
| const ArgMinMaxParameter *param, COMPARE_FUNCTION compare_func) { | const ArgMinMaxParameter *param, COMPARE_FUNCTION compare_func) { | ||||
| float *outputfp32 = (float *)output; | |||||
| int *outputint = (int *)output; | |||||
| for (int32_t i = 0; i < param->in_strides_[0]; ++i) { | for (int32_t i = 0; i < param->in_strides_[0]; ++i) { | ||||
| for (int j = 0; j < in_shape[0]; ++j) { | for (int j = 0; j < in_shape[0]; ++j) { | ||||
| size_t offset = param->in_strides_[0] * j + i; | size_t offset = param->in_strides_[0] * j + i; | ||||
| @@ -102,7 +116,11 @@ void ArgMinMaxDim0(const float *input, float *output, float *output_value, const | |||||
| qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), *compare_func); | qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), *compare_func); | ||||
| for (int j = 0; j < param->topk_; ++j) { | for (int j = 0; j < param->topk_; ++j) { | ||||
| size_t out_offset = j * param->out_strides_[0] + i; | size_t out_offset = j * param->out_strides_[0] + i; | ||||
| output[out_offset] = param->out_value_ ? param->arg_elements_[j].data_.f_data_ : param->arg_elements_[j].index_; | |||||
| if (param->out_value_) { | |||||
| outputfp32[out_offset] = param->arg_elements_[j].data_.f_data_; | |||||
| } else { | |||||
| outputint[out_offset] = param->arg_elements_[j].index_; | |||||
| } | |||||
| if (output_value != NULL) { | if (output_value != NULL) { | ||||
| output_value[out_offset] = param->arg_elements_[j].data_.f_data_; | output_value[out_offset] = param->arg_elements_[j].data_.f_data_; | ||||
| } | } | ||||
| @@ -111,8 +129,10 @@ void ArgMinMaxDim0(const float *input, float *output, float *output_value, const | |||||
| return; | return; | ||||
| } | } | ||||
| void ArgMinMaxDim1(const float *input, float *output, float *output_value, const int *in_shape, | |||||
| void ArgMinMaxDim1(const float *input, void *output, float *output_value, const int *in_shape, | |||||
| const ArgMinMaxParameter *param, COMPARE_FUNCTION compare_func) { | const ArgMinMaxParameter *param, COMPARE_FUNCTION compare_func) { | ||||
| float *outputfp32 = (float *)output; | |||||
| int *outputint = (int *)output; | |||||
| int in_shape1 = in_shape[1]; | int in_shape1 = in_shape[1]; | ||||
| for (int i = 0; i < in_shape[0]; ++i) { | for (int i = 0; i < in_shape[0]; ++i) { | ||||
| size_t in_dim0_offset = i * param->in_strides_[0]; | size_t in_dim0_offset = i * param->in_strides_[0]; | ||||
| @@ -126,7 +146,11 @@ void ArgMinMaxDim1(const float *input, float *output, float *output_value, const | |||||
| qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), *compare_func); | qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), *compare_func); | ||||
| for (int k = 0; k < param->topk_; ++k) { | for (int k = 0; k < param->topk_; ++k) { | ||||
| size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1]; | size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1]; | ||||
| output[out_offset] = param->out_value_ ? param->arg_elements_[k].data_.f_data_ : param->arg_elements_[k].index_; | |||||
| if (param->out_value_) { | |||||
| outputfp32[out_offset] = param->arg_elements_[k].data_.f_data_; | |||||
| } else { | |||||
| outputint[out_offset] = param->arg_elements_[k].index_; | |||||
| } | |||||
| if (output_value != NULL) { | if (output_value != NULL) { | ||||
| output_value[out_offset] = param->arg_elements_[k].data_.f_data_; | output_value[out_offset] = param->arg_elements_[k].data_.f_data_; | ||||
| } | } | ||||
| @@ -136,10 +160,12 @@ void ArgMinMaxDim1(const float *input, float *output, float *output_value, const | |||||
| return; | return; | ||||
| } | } | ||||
| void ArgMinMaxDim2(const float *input, float *output, float *output_value, const int *in_shape, | |||||
| void ArgMinMaxDim2(const float *input, void *output, float *output_value, const int *in_shape, | |||||
| const ArgMinMaxParameter *param, COMPARE_FUNCTION compare_func) { | const ArgMinMaxParameter *param, COMPARE_FUNCTION compare_func) { | ||||
| int in_shape1 = in_shape[1]; | int in_shape1 = in_shape[1]; | ||||
| int in_shape2 = in_shape[2]; | int in_shape2 = in_shape[2]; | ||||
| float *outputfp32 = (float *)output; | |||||
| int *outputint = (int *)output; | |||||
| for (int i = 0; i < in_shape[0]; ++i) { | for (int i = 0; i < in_shape[0]; ++i) { | ||||
| size_t in_dim0_offset = i * param->in_strides_[0]; | size_t in_dim0_offset = i * param->in_strides_[0]; | ||||
| size_t out_dim0_offset = i * param->out_strides_[0]; | size_t out_dim0_offset = i * param->out_strides_[0]; | ||||
| @@ -155,9 +181,11 @@ void ArgMinMaxDim2(const float *input, float *output, float *output_value, const | |||||
| qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), *compare_func); | qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), *compare_func); | ||||
| for (int l = 0; l < param->topk_; ++l) { | for (int l = 0; l < param->topk_; ++l) { | ||||
| size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2]; | size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2]; | ||||
| output[out_offset] = | |||||
| param->out_value_ ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_; | |||||
| if (param->out_value_) { | |||||
| outputfp32[out_offset] = param->arg_elements_[l].data_.f_data_; | |||||
| } else { | |||||
| outputint[out_offset] = param->arg_elements_[l].index_; | |||||
| } | |||||
| if (output_value != NULL) { | if (output_value != NULL) { | ||||
| output_value[out_offset] = param->arg_elements_[l].data_.f_data_; | output_value[out_offset] = param->arg_elements_[l].data_.f_data_; | ||||
| } | } | ||||
| @@ -167,11 +195,13 @@ void ArgMinMaxDim2(const float *input, float *output, float *output_value, const | |||||
| } | } | ||||
| } | } | ||||
| void ArgMinMaxDim3(const float *input, float *output, float *output_value, const int *in_shape, | |||||
| void ArgMinMaxDim3(const float *input, void *output, float *output_value, const int *in_shape, | |||||
| const ArgMinMaxParameter *param, COMPARE_FUNCTION compare_func) { | const ArgMinMaxParameter *param, COMPARE_FUNCTION compare_func) { | ||||
| int in_shape1 = in_shape[1]; | int in_shape1 = in_shape[1]; | ||||
| int in_shape2 = in_shape[2]; | int in_shape2 = in_shape[2]; | ||||
| int in_shape3 = in_shape[3]; | int in_shape3 = in_shape[3]; | ||||
| float *outputfp32 = (float *)output; | |||||
| int *outputint = (int *)output; | |||||
| for (int i = 0; i < in_shape[0]; ++i) { | for (int i = 0; i < in_shape[0]; ++i) { | ||||
| size_t in_dim0_offset = i * param->in_strides_[0]; | size_t in_dim0_offset = i * param->in_strides_[0]; | ||||
| size_t out_dim0_offset = i * param->out_strides_[0]; | size_t out_dim0_offset = i * param->out_strides_[0]; | ||||
| @@ -189,8 +219,11 @@ void ArgMinMaxDim3(const float *input, float *output, float *output_value, const | |||||
| qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), *compare_func); | qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), *compare_func); | ||||
| for (int l = 0; l < param->topk_; ++l) { | for (int l = 0; l < param->topk_; ++l) { | ||||
| size_t out_offset = out_dim2_offset + l; | size_t out_offset = out_dim2_offset + l; | ||||
| output[out_offset] = | |||||
| param->out_value_ ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_; | |||||
| if (param->out_value_) { | |||||
| outputfp32[out_offset] = param->arg_elements_[l].data_.f_data_; | |||||
| } else { | |||||
| outputint[out_offset] = param->arg_elements_[l].index_; | |||||
| } | |||||
| if (output_value != NULL) { | if (output_value != NULL) { | ||||
| output_value[out_offset] = param->arg_elements_[l].data_.f_data_; | output_value[out_offset] = param->arg_elements_[l].data_.f_data_; | ||||
| } | } | ||||
| @@ -200,7 +233,7 @@ void ArgMinMaxDim3(const float *input, float *output, float *output_value, const | |||||
| } | } | ||||
| } | } | ||||
| void ArgMinMaxFp32(const float *input, float *output, float *output_value, const int *in_shape, | |||||
| void ArgMinMaxFp32(const float *input, void *output, float *output_value, const int *in_shape, | |||||
| const ArgMinMaxParameter *param) { | const ArgMinMaxParameter *param) { | ||||
| if (param->topk_ == 1) { | if (param->topk_ == 1) { | ||||
| int pre_axis_count = 1; | int pre_axis_count = 1; | ||||
| @@ -22,7 +22,7 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void ArgMinMaxFp32(const float *input, float *output, float *output_value, const int *in_shape, | |||||
| void ArgMinMaxFp32(const float *input, void *output, float *output_value, const int *in_shape, | |||||
| const ArgMinMaxParameter *param); | const ArgMinMaxParameter *param); | ||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| @@ -33,21 +33,26 @@ int InstanceNorm(const float *src_data, float *dst_data, const float *gamma_data | |||||
| for (int c = channel_begin; c < channel_end; c++) { | for (int c = channel_begin; c < channel_end; c++) { | ||||
| const float *src = src_b + c * param->inner_size_; | const float *src = src_b + c * param->inner_size_; | ||||
| float *dst = dst_b + c * param->inner_size_; | float *dst = dst_b + c * param->inner_size_; | ||||
| float mean = 0.0f; | |||||
| float square_mean = 0.0f; | |||||
| double mean = 0.0f; | |||||
| double square_mean = 0.0f; | |||||
| int index = 0; | int index = 0; | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| float32x4_t sum = vdupq_n_f32(0); | |||||
| float32x4_t square_sum = vdupq_n_f32(0); | |||||
| for (; index < param->inner_size_ - C4NUM; index += C4NUM) { | for (; index < param->inner_size_ - C4NUM; index += C4NUM) { | ||||
| float32x4_t srcv = vld1q_f32(src + index); | float32x4_t srcv = vld1q_f32(src + index); | ||||
| float32x4_t squarev = vmulq_f32(srcv, srcv); | float32x4_t squarev = vmulq_f32(srcv, srcv); | ||||
| sum = vaddq_f32(sum, srcv); | |||||
| square_sum = vaddq_f32(square_sum, squarev); | |||||
| #ifdef ENABLE_ARM64 | |||||
| mean += vaddvq_f32(srcv); | |||||
| square_mean += vaddvq_f32(squarev); | |||||
| #else | |||||
| float32x2_t src_add2 = vadd_f32(vget_low_f32(srcv), vget_high_f32(srcv)); | |||||
| float32x2_t src_add4 = vpadd_f32(src_add2, src_add2); | |||||
| mean += vget_lane_f32(src_add4, 0); | |||||
| float32x2_t square_add2 = vadd_f32(vget_low_f32(squarev), vget_high_f32(squarev)); | |||||
| float32x2_t square_add4 = vpadd_f32(square_add2, square_add2); | |||||
| square_mean += vget_lane_f32(square_add4, 0); | |||||
| #endif | |||||
| } | } | ||||
| mean = sum[0] + sum[1] + sum[2] + sum[3]; | |||||
| square_mean = square_sum[0] + square_sum[1] + square_sum[2] + square_sum[3]; | |||||
| #endif | #endif | ||||
| for (; index < param->inner_size_; index++) { | for (; index < param->inner_size_; index++) { | ||||
| mean += src[index]; | mean += src[index]; | ||||
| @@ -56,27 +61,11 @@ int InstanceNorm(const float *src_data, float *dst_data, const float *gamma_data | |||||
| mean /= (float)param->inner_size_; | mean /= (float)param->inner_size_; | ||||
| square_mean /= (float)param->inner_size_; | square_mean /= (float)param->inner_size_; | ||||
| const float deno = 1 / sqrtf(square_mean - mean * mean + param->epsilon_); | |||||
| const double deno = gamma_data[c] / sqrt(square_mean - mean * mean + param->epsilon_); | |||||
| index = 0; | index = 0; | ||||
| #ifdef ENABLE_NEON | |||||
| float32x4_t meanv = vdupq_n_f32(mean); | |||||
| float32x4_t denov = vdupq_n_f32(deno); | |||||
| for (; index < param->inner_size_ - C4NUM; index += C4NUM) { | |||||
| float32x4_t srcv = vld1q_f32(src + index); | |||||
| float32x4_t outv = vsubq_f32(srcv, meanv); | |||||
| outv = vmulq_f32(outv, denov); | |||||
| float32x4_t gammav = vdupq_n_f32(gamma_data[c]); | |||||
| float32x4_t betav = vdupq_n_f32(beta_data[c]); | |||||
| outv = vmulq_f32(outv, gammav); | |||||
| outv = vaddq_f32(outv, betav); | |||||
| vst1q_f32(dst + index, outv); | |||||
| } | |||||
| #endif | |||||
| for (; index < param->inner_size_; index++) { | for (; index < param->inner_size_; index++) { | ||||
| dst[index] = (src[index] - mean) * deno; | |||||
| dst[index] = dst[index] * gamma_data[c] + beta_data[c]; | |||||
| dst[index] = (src[index] - mean) * deno + beta_data[c]; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -86,9 +86,10 @@ int AddN::InferShape(std::vector<Tensor *> inputs, std::vector<Tensor *> outputs | |||||
| for (size_t i = 1; i < inputs.size(); ++i) { | for (size_t i = 1; i < inputs.size(); ++i) { | ||||
| if (inputs.at(i)->shape().size() > max_dims) { | if (inputs.at(i)->shape().size() > max_dims) { | ||||
| max_dims = inputs.at(i)->shape().size(); | max_dims = inputs.at(i)->shape().size(); | ||||
| max_dims_idx = 0; | |||||
| max_dims_idx = i; | |||||
| } | } | ||||
| } | } | ||||
| output->set_shape(inputs.at(max_dims_idx)->shape()); | output->set_shape(inputs.at(max_dims_idx)->shape()); | ||||
| // make sure all elements have the same size or 1 (broadcasting) in all dimensions | // make sure all elements have the same size or 1 (broadcasting) in all dimensions | ||||
| @@ -95,13 +95,17 @@ int ArgMax::InferShape(std::vector<Tensor *> inputs_, std::vector<Tensor *> outp | |||||
| MS_ASSERT(input != nullptr); | MS_ASSERT(input != nullptr); | ||||
| auto output = outputs_.front(); | auto output = outputs_.front(); | ||||
| MS_ASSERT(output != nullptr); | MS_ASSERT(output != nullptr); | ||||
| if (inputs_.size() != kSingleNum || outputs_.size() != kSingleNum) { | |||||
| if (inputs_.size() != kSingleNum || outputs_.size() > kDoubleNum) { | |||||
| MS_LOG(ERROR) << "tensor number is error."; | MS_LOG(ERROR) << "tensor number is error."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| output->set_format(input->format()); | output->set_format(input->format()); | ||||
| output->set_data_type(input->data_type()); | |||||
| if (GetOutMaxValue() && outputs_.size() == kSingleNum) { | |||||
| output->set_data_type(input->data_type()); | |||||
| } else { | |||||
| output->set_data_type(kNumberTypeInt32); | |||||
| } | |||||
| if (!infer_flag()) { | if (!infer_flag()) { | ||||
| return RET_INFER_INVALID; | return RET_INFER_INVALID; | ||||
| } | } | ||||
| @@ -119,6 +123,11 @@ int ArgMax::InferShape(std::vector<Tensor *> inputs_, std::vector<Tensor *> outp | |||||
| } | } | ||||
| output->set_shape(output_shape); | output->set_shape(output_shape); | ||||
| if (outputs_.size() == kDoubleNum) { | |||||
| outputs_.at(1)->set_format(input->format()); | |||||
| outputs_.at(1)->set_data_type(input->data_type()); | |||||
| outputs_.at(1)->set_shape(output_shape); | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| } // namespace lite | } // namespace lite | ||||
| @@ -101,7 +101,11 @@ int ArgMin::InferShape(std::vector<lite::Tensor *> inputs_, std::vector<lite::Te | |||||
| MS_LOG(ERROR) << "tensor number is error."; | MS_LOG(ERROR) << "tensor number is error."; | ||||
| } | } | ||||
| output->set_format(input->format()); | output->set_format(input->format()); | ||||
| output->set_data_type(input->data_type()); | |||||
| if (GetOutMaxValue() && outputs_.size() == kSingleNum) { | |||||
| output->set_data_type(input->data_type()); | |||||
| } else { | |||||
| output->set_data_type(kNumberTypeInt32); | |||||
| } | |||||
| if (!infer_flag()) { | if (!infer_flag()) { | ||||
| return RET_INFER_INVALID; | return RET_INFER_INVALID; | ||||
| } | } | ||||
| @@ -116,6 +116,12 @@ int Concat::InferShape(std::vector<Tensor *> inputs_, std::vector<Tensor *> outp | |||||
| MS_LOG(ERROR) << "All inputs should have the same dim num!"; | MS_LOG(ERROR) << "All inputs should have the same dim num!"; | ||||
| return RET_PARAM_INVALID; | return RET_PARAM_INVALID; | ||||
| } | } | ||||
| if ((inputs_.at(i)->data_type() != output->data_type()) && | |||||
| !((inputs_.at(i)->data_type() == kNumberTypeFloat16 && output->data_type() == kNumberTypeFloat32) || | |||||
| (inputs_.at(i)->data_type() == kNumberTypeFloat32 && output->data_type() == kNumberTypeFloat16))) { | |||||
| MS_LOG(ERROR) << "All inputs should have the same type!"; | |||||
| return RET_PARAM_INVALID; | |||||
| } | |||||
| auto axis_tmp = shape_tmp[axis]; | auto axis_tmp = shape_tmp[axis]; | ||||
| shape_tmp.erase(shape_tmp.begin() + axis); | shape_tmp.erase(shape_tmp.begin() + axis); | ||||
| if (input0_shape_without_axis != shape_tmp) { | if (input0_shape_without_axis != shape_tmp) { | ||||
| @@ -627,6 +627,8 @@ std::shared_ptr<PrimitiveC> PrimitiveC::Create(const Primitive &prim, const std: | |||||
| return NewPrimitiveC<While>(prim, inputs, quantType); | return NewPrimitiveC<While>(prim, inputs, quantType); | ||||
| } else if (op_type == "MirrorPad") { | } else if (op_type == "MirrorPad") { | ||||
| return NewPrimitiveC<Pad>(prim, inputs, quantType); | return NewPrimitiveC<Pad>(prim, inputs, quantType); | ||||
| } else if (op_type == "InstanceNorm") { | |||||
| return NewPrimitiveC<InstanceNorm>(prim, inputs, quantType); | |||||
| } else if (op_type == "Gather") { | } else if (op_type == "Gather") { | ||||
| return NewPrimitiveC<Gather>(prim, inputs, quantType); | return NewPrimitiveC<Gather>(prim, inputs, quantType); | ||||
| } else if (op_type == "OnesLike") { | } else if (op_type == "OnesLike") { | ||||
| @@ -72,11 +72,11 @@ int ArgMinMaxCPUKernel::Run() { | |||||
| } | } | ||||
| } | } | ||||
| if (input->data_type() == kNumberTypeFloat32) { | if (input->data_type() == kNumberTypeFloat32) { | ||||
| ArgMinMaxFp32(reinterpret_cast<float *>(input_data), reinterpret_cast<float *>(output_data), | |||||
| ArgMinMaxFp32(reinterpret_cast<float *>(input_data), reinterpret_cast<void *>(output_data), | |||||
| reinterpret_cast<float *>(output_value), shape.data(), arg_param_); | reinterpret_cast<float *>(output_value), shape.data(), arg_param_); | ||||
| #ifdef ENABLE_ARM64 | #ifdef ENABLE_ARM64 | ||||
| } else if (input->data_type() == kNumberTypeFloat16) { | } else if (input->data_type() == kNumberTypeFloat16) { | ||||
| ArgMinMaxFp16(reinterpret_cast<float16_t *>(input_data), reinterpret_cast<float16_t *>(output_data), | |||||
| ArgMinMaxFp16(reinterpret_cast<float16_t *>(input_data), reinterpret_cast<void *>(output_data), | |||||
| reinterpret_cast<float16_t *>(output_value), shape.data(), arg_param_); | reinterpret_cast<float16_t *>(output_value), shape.data(), arg_param_); | ||||
| #endif | #endif | ||||
| @@ -65,7 +65,6 @@ int GatherNdCPUKernel::ReSize() { | |||||
| MS_LOG(ERROR) << "GatherNd Malloc in_offset_ error!"; | MS_LOG(ERROR) << "GatherNd Malloc in_offset_ error!"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| (void)memset(in_offset_, 0, count_ * sizeof(int)); | |||||
| thread_sz_count_ = MSMIN(thread_count_, count_); | thread_sz_count_ = MSMIN(thread_count_, count_); | ||||
| if (thread_sz_count_ != 0) { | if (thread_sz_count_ != 0) { | ||||
| thread_sz_stride_ = UP_DIV(count_, thread_sz_count_); | thread_sz_stride_ = UP_DIV(count_, thread_sz_count_); | ||||
| @@ -93,6 +92,7 @@ void GatherNdCPUKernel::InitOffset() { | |||||
| } | } | ||||
| int idx_stride = idx_lastshape; | int idx_stride = idx_lastshape; | ||||
| (void)memset(in_offset_, 0, count_ * sizeof(int)); | |||||
| for (int j = 0; j < count_; ++j) { | for (int j = 0; j < count_; ++j) { | ||||
| for (int k = 0; k < idx_lastshape; ++k) { | for (int k = 0; k < idx_lastshape; ++k) { | ||||
| in_offset_[j] += indices_ptr[j * idx_stride + k] * in_stride.at(k); | in_offset_[j] += indices_ptr[j * idx_stride + k] * in_stride.at(k); | ||||
| @@ -3,6 +3,8 @@ mobilenetv2_438.mindir 1.5 | |||||
| gate_u_net_small-1_110.mindir 1.5 | gate_u_net_small-1_110.mindir 1.5 | ||||
| shufflenetv2.mindir 1.5 | shufflenetv2.mindir 1.5 | ||||
| #inceptionv3.mindir 1.5 | #inceptionv3.mindir 1.5 | ||||
| cyclegan_AtoB.mindir 0.5 | |||||
| cyclegan_BtoA.mindir 0.5 | |||||
| googlenet.mindir 1.5 | googlenet.mindir 1.5 | ||||
| retinaface_732_1280_iod.mindir 1.5 | retinaface_732_1280_iod.mindir 1.5 | ||||
| mobilefacenet_iod.mindir 1.5 | mobilefacenet_iod.mindir 1.5 | ||||
| @@ -16,6 +18,7 @@ mindspore_ghost-pets-8244.mindir 1.5 | |||||
| mindspore_ghostnet600M-pets.mindir 1.5 | mindspore_ghostnet600M-pets.mindir 1.5 | ||||
| mindspore_ghostnet_1x_pets_int8.mindir 12 | mindspore_ghostnet_1x_pets_int8.mindir 12 | ||||
| mindspore_deeplab_v3_s16.mindir 6.5 | mindspore_deeplab_v3_s16.mindir 6.5 | ||||
| CenterNet_MultiPose_ascend.mindir 0.5 | |||||
| googlenet_1202.mindir 0.5 | googlenet_1202.mindir 0.5 | ||||
| inceptionv3_1203.mindir 0.5 | inceptionv3_1203.mindir 0.5 | ||||
| mobilenetv2_gpu.mindir 0.5 | mobilenetv2_gpu.mindir 0.5 | ||||
| @@ -50,4 +50,4 @@ hdc_Face_Landmark5_MTI_Aesthetic.onnx | |||||
| hdc_Image_Aesthetic_MTI_Aesthetic.onnx | hdc_Image_Aesthetic_MTI_Aesthetic.onnx | ||||
| hdc_mobilenet_1w_class.onnx | hdc_mobilenet_1w_class.onnx | ||||
| hdc_resnet_1w_class.onnx | hdc_resnet_1w_class.onnx | ||||
| ml_video_edit_imitate_filter.onnx | |||||
| #ml_video_edit_imitate_filter.onnx | |||||
| @@ -81,14 +81,13 @@ function Run_x86() { | |||||
| echo ${model_name}'_train' >> "${run_x86_log_file}" | echo ${model_name}'_train' >> "${run_x86_log_file}" | ||||
| echo 'cd '${x86_path}'/mindspore-lite-'${version}'-train-linux-x64' >> "${run_x86_log_file}" | echo 'cd '${x86_path}'/mindspore-lite-'${version}'-train-linux-x64' >> "${run_x86_log_file}" | ||||
| cd ${x86_path}/mindspore-lite-${version}-train-linux-x64 || return 1 | cd ${x86_path}/mindspore-lite-${version}-train-linux-x64 || return 1 | ||||
| echo 'LD_LIBRARY_PATH='${LD_LIBRARY_PATH}':./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib ./benchmark_train/benchmark_train --epochs='${epoch_num}' --modelFile='${ms_models_path}'/'${model_name}'_train.ms --inDataFile='${train_io_path}/${model_name}_input1.bin,${train_io_path}/${model_name}_input2.bin' --expectedDataFile='${train_io_path}'/'${model_name}'_output --exportFile='${ms_models_path}'/'${model_name}'_train_exported.ms' >> "${run_x86_log_file}" | |||||
| echo 'LD_LIBRARY_PATH='${LD_LIBRARY_PATH}':./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib ./benchmark_train/benchmark_train --epochs='${epoch_num}' --modelFile='${ms_models_path}'/'${model_name}'_train.ms --inDataFile='${train_io_path}/${model_name}_input1.bin,${train_io_path}/${model_name}_input2.bin' --expectedDataFile='${train_io_path}'/'${model_name}'_output' >> "${run_x86_log_file}" | |||||
| echo '-------------------------------------------------------------------------------' >> "${run_x86_log_file}" | echo '-------------------------------------------------------------------------------' >> "${run_x86_log_file}" | ||||
| LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib:./minddata/lib:./minddata/third_party/libjpeg-turbo/lib \ | LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib:./minddata/lib:./minddata/third_party/libjpeg-turbo/lib \ | ||||
| ${run_valgrind}./benchmark_train/benchmark_train \ | ${run_valgrind}./benchmark_train/benchmark_train \ | ||||
| --modelFile=${ms_models_path}/${model_name}_train.ms \ | --modelFile=${ms_models_path}/${model_name}_train.ms \ | ||||
| --inDataFile=${train_io_path}/${model_name}_input1.bin,${train_io_path}/${model_name}_input2.bin \ | --inDataFile=${train_io_path}/${model_name}_input1.bin,${train_io_path}/${model_name}_input2.bin \ | ||||
| --expectedDataFile=${train_io_path}/${model_name}_output \ | |||||
| --exportFile=${ms_models_path}/${model_name}_train_exported.ms >> "${run_x86_log_file}" \ | |||||
| --expectedDataFile=${train_io_path}/${model_name}_output >> "${run_x86_log_file}" \ | |||||
| --epochs=${epoch_num} --numThreads=${threads} | --epochs=${epoch_num} --numThreads=${threads} | ||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='x86: '${model_name}'_train pass'; echo ${run_result} >> ${run_benchmark_train_result_file} | run_result='x86: '${model_name}'_train pass'; echo ${run_result} >> ${run_benchmark_train_result_file} | ||||
| @@ -183,7 +182,6 @@ function Run_arm() { | |||||
| --modelFile=${model_name}_train.ms \ | --modelFile=${model_name}_train.ms \ | ||||
| --inDataFile=${tmp_dir}/${model_name}_input1.bin,${tmp_dir}/${model_name}_input2.bin \ | --inDataFile=${tmp_dir}/${model_name}_input1.bin,${tmp_dir}/${model_name}_input2.bin \ | ||||
| --expectedDataFile=${tmp_dir}/${model_name}_output \ | --expectedDataFile=${tmp_dir}/${model_name}_output \ | ||||
| --exportFile=${tmp_dir}/${model_name}_train_exported.ms \ | |||||
| --numThreads=${threads} | --numThreads=${threads} | ||||
| ENDM | ENDM | ||||
| ) | ) | ||||
| @@ -164,7 +164,31 @@ void ConvTransformFusion::GenNewConvTensor(const FuncGraphPtr &func_graph, const | |||||
| lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_INVALID_OP_ATTR); | lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_INVALID_OP_ATTR); | ||||
| return; | return; | ||||
| } | } | ||||
| CalNewWeightTensor(conv_node, weight_tensor, kernel_num, trans_scale); | |||||
| auto temp_weight_data = new (std::nothrow) float[weight_tensor->tensor_shape_size()]; | |||||
| if (temp_weight_data == nullptr) { | |||||
| MS_LOG(ERROR) << "new ParamValueLite failed"; | |||||
| lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_ERROR); | |||||
| return; | |||||
| } | |||||
| auto new_weight_tensor = std::make_shared<ParamValueLite>(); | |||||
| if (new_weight_tensor == nullptr) { | |||||
| delete temp_weight_data; | |||||
| MS_LOG(ERROR) << "new ParamValueLite failed"; | |||||
| return; | |||||
| } | |||||
| new_weight_tensor->set_tensor_size(weight_tensor->tensor_size()); | |||||
| new_weight_tensor->set_tensor_shape(weight_tensor->tensor_shape()); | |||||
| new_weight_tensor->set_tensor_type(weight_tensor->tensor_type()); | |||||
| new_weight_tensor->set_format(weight_tensor->format()); | |||||
| auto ret = memcpy_s(temp_weight_data, weight_tensor->tensor_shape_size() * sizeof(float), | |||||
| weight_tensor->tensor_addr(), weight_tensor->tensor_shape_size() * sizeof(float)); | |||||
| if (ret != EOK) { | |||||
| delete temp_weight_data; | |||||
| MS_LOG(ERROR) << "memcpy_s error:" << ret; | |||||
| return; | |||||
| } | |||||
| new_weight_tensor->set_tensor_addr(temp_weight_data); | |||||
| CalNewWeightTensor(conv_node, new_weight_tensor, kernel_num, trans_scale); | |||||
| float *bias_data = nullptr; | float *bias_data = nullptr; | ||||
| // conv has bias,bias_flag true | // conv has bias,bias_flag true | ||||
| bool bias_flag = false; | bool bias_flag = false; | ||||
| @@ -177,6 +201,7 @@ void ConvTransformFusion::GenNewConvTensor(const FuncGraphPtr &func_graph, const | |||||
| bias_data = new (std::nothrow) float[kernel_num]; | bias_data = new (std::nothrow) float[kernel_num]; | ||||
| if (bias_data == nullptr) { | if (bias_data == nullptr) { | ||||
| MS_LOG(ERROR) << "tensor_data is nullptr"; | MS_LOG(ERROR) << "tensor_data is nullptr"; | ||||
| delete temp_weight_data; | |||||
| return; | return; | ||||
| } | } | ||||
| } | } | ||||
| @@ -186,6 +211,16 @@ void ConvTransformFusion::GenNewConvTensor(const FuncGraphPtr &func_graph, const | |||||
| bias_node->set_name(conv_node->fullname_with_scope() + "_bias"); | bias_node->set_name(conv_node->fullname_with_scope() + "_bias"); | ||||
| conv_node->add_input(bias_node); | conv_node->add_input(bias_node); | ||||
| } | } | ||||
| auto new_weight_paramter = func_graph->add_parameter(); | |||||
| if (new_weight_paramter == nullptr) { | |||||
| MS_LOG(ERROR) << "new_weight_paramter is nullptr"; | |||||
| delete temp_weight_data; | |||||
| return; | |||||
| } | |||||
| new_weight_paramter->set_default_param(new_weight_tensor); | |||||
| new_weight_paramter->set_abstract(conv_weight_node->abstract()); | |||||
| new_weight_paramter->set_name(conv_node->fullname_with_scope() + conv_weight_node->fullname_with_scope()); | |||||
| conv_node->set_input(kConvWeightIndex, new_weight_paramter); | |||||
| } | } | ||||
| void ConvTransformFusion::CalNewWeightTensor(const CNodePtr &conv_node, const ParamValueLitePtr &weight_tensor, | void ConvTransformFusion::CalNewWeightTensor(const CNodePtr &conv_node, const ParamValueLitePtr &weight_tensor, | ||||
| int kernel_num, const float *trans_scale) const { | int kernel_num, const float *trans_scale) const { | ||||
| @@ -27,6 +27,44 @@ | |||||
| using mindspore::lite::PrimitiveC; | using mindspore::lite::PrimitiveC; | ||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace opt { | namespace opt { | ||||
| int MindirAdjustPass::ValueNodeInt64Convert(AnfNodePtr anf_node) { | |||||
| if (!utils::isa<ValueNodePtr>(anf_node)) { | |||||
| return lite::RET_NO_CHANGE; | |||||
| } | |||||
| auto valueNode = anf_node->cast<ValueNodePtr>(); | |||||
| if (valueNode->abstract() == nullptr) { | |||||
| return lite::RET_NO_CHANGE; | |||||
| } | |||||
| auto abstractTensor = utils::cast<abstract::AbstractTensorPtr>(valueNode->abstract()); | |||||
| if (abstractTensor == nullptr) { | |||||
| return lite::RET_NO_CHANGE; | |||||
| } | |||||
| auto value = abstractTensor->GetValueTrack(); | |||||
| if (value != nullptr && value->isa<tensor::Tensor>()) { | |||||
| if (abstractTensor->element() == nullptr) { | |||||
| MS_LOG(ERROR) << "abstractTensor->element() is nullptr."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto typePtr = abstractTensor->element()->GetTypeTrack(); | |||||
| if (typePtr->type_id() == kNumberTypeInt64) { | |||||
| auto shape_vector = utils::cast<abstract::ShapePtr>(abstractTensor->BuildShape())->shape(); | |||||
| auto dest_tensor_info = std::make_shared<tensor::Tensor>(kNumberTypeInt32, shape_vector); | |||||
| auto *dest_data_buf = reinterpret_cast<int32_t *>(dest_tensor_info->data_c()); | |||||
| auto src_tensor_info = value->cast<tensor::TensorPtr>(); | |||||
| auto *src_data_buf = reinterpret_cast<int64_t *>(src_tensor_info->data_c()); | |||||
| MS_ASSERT(dest_tensor_info->ElementsNum() == src_tensor_info->ElementsNum()); | |||||
| for (int i = 0; i < dest_tensor_info->ElementsNum(); i++) { | |||||
| dest_data_buf[i] = src_data_buf[i]; | |||||
| } | |||||
| abstractTensor->set_value(dest_tensor_info); | |||||
| abstractTensor->set_type(TypeIdToType(kNumberTypeInt32)); | |||||
| abstractTensor->element()->set_type(TypeIdToType(kNumberTypeInt32)); | |||||
| valueNode->set_value(dest_tensor_info); | |||||
| } | |||||
| } | |||||
| return lite::RET_NO_CHANGE; | |||||
| } | |||||
| int MindirAdjustPass::ParameterNodeConvert(AnfNodePtr anf_node) { | int MindirAdjustPass::ParameterNodeConvert(AnfNodePtr anf_node) { | ||||
| if (!utils::isa<ParameterPtr>(anf_node)) { | if (!utils::isa<ParameterPtr>(anf_node)) { | ||||
| MS_LOG(INFO) << "only parameter node need to convert tensor."; | MS_LOG(INFO) << "only parameter node need to convert tensor."; | ||||
| @@ -139,7 +177,10 @@ bool MindirAdjustPass::Run(const FuncGraphPtr &graph) { | |||||
| status = ParameterNodeConvert(node); | status = ParameterNodeConvert(node); | ||||
| } else if (utils::isa<CNodePtr>(node)) { | } else if (utils::isa<CNodePtr>(node)) { | ||||
| status = PrimitiveConvert(node); | status = PrimitiveConvert(node); | ||||
| } else if (utils::isa<ValueNodePtr>(node)) { | |||||
| status = ValueNodeInt64Convert(node); | |||||
| } | } | ||||
| if (status != lite::RET_OK && status != lite::RET_NO_CHANGE) { | if (status != lite::RET_OK && status != lite::RET_NO_CHANGE) { | ||||
| lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status); | lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status); | ||||
| success_flag = false; | success_flag = false; | ||||
| @@ -32,6 +32,7 @@ class MindirAdjustPass : public Pass { | |||||
| ~MindirAdjustPass() override = default; | ~MindirAdjustPass() override = default; | ||||
| void SetQuantType(QuantType quant_type) { quant_type_ = quant_type; } | void SetQuantType(QuantType quant_type) { quant_type_ = quant_type; } | ||||
| void SetFmkType(FmkType fmk_type) { fmk_type_ = fmk_type; } | void SetFmkType(FmkType fmk_type) { fmk_type_ = fmk_type; } | ||||
| int ValueNodeInt64Convert(AnfNodePtr anf_node); | |||||
| int ParameterNodeConvert(AnfNodePtr anf_node); | int ParameterNodeConvert(AnfNodePtr anf_node); | ||||
| int PrimitiveConvert(AnfNodePtr anf_node); | int PrimitiveConvert(AnfNodePtr anf_node); | ||||
| bool Run(const FuncGraphPtr &graph) override; | bool Run(const FuncGraphPtr &graph) override; | ||||