From: @jiangzhiwen8 Reviewed-by: @xulei2020 Signed-off-by:tags/v1.1.0
| @@ -19,8 +19,6 @@ | |||
| #include <string.h> | |||
| #include <cmath> | |||
| #include <vector> | |||
| #include <algorithm> | |||
| #include <limits> | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| @@ -549,73 +547,39 @@ template <typename T> | |||
| static void PadWithConstant(const LiteMat &src, LiteMat &dst, const int top, const int bottom, const int left, | |||
| const int right, const PaddBorderType pad_type, uint8_t fill_b_or_gray, uint8_t fill_g, | |||
| uint8_t fill_r) { | |||
| dst.Init(src.width_ + left + right, src.height_ + top + bottom, src.channel_, src.data_type_); | |||
| const T *src_start_p = src; | |||
| T *dst_start_p = dst; | |||
| // padd top | |||
| for (int h = 0; h < top; h++) { | |||
| for (int w = 0; w < dst.width_; w++) { | |||
| uint32_t index = (h * dst.width_ + w) * dst.channel_; | |||
| if (dst.channel_ == 1) { | |||
| dst_start_p[index] = fill_b_or_gray; | |||
| } else if (dst.channel_ == 3) { | |||
| dst_start_p[index] = fill_b_or_gray; | |||
| dst_start_p[index + 1] = fill_g; | |||
| dst_start_p[index + 2] = fill_r; | |||
| } else { | |||
| } | |||
| std::vector<uint8_t> row_buffer(dst.width_ * dst.channel_); | |||
| uint8_t *const_ptr = row_buffer.data(); | |||
| int src_step = src.width_ * dst.channel_; | |||
| int dst_step = dst.width_ * dst.channel_; | |||
| if (dst.channel_ == 1) { | |||
| for (int i = 0; i < dst_step; i++) { | |||
| const_ptr[i] = fill_b_or_gray; | |||
| } | |||
| } | |||
| // padd bottom | |||
| for (int h = dst.height_ - bottom; h < dst.height_; h++) { | |||
| for (int w = 0; w < dst.width_; w++) { | |||
| uint32_t index = (h * dst.width_ + w) * dst.channel_; | |||
| if (dst.channel_ == 1) { | |||
| dst_start_p[index] = fill_b_or_gray; | |||
| } else if (dst.channel_ == 3) { | |||
| dst_start_p[index] = fill_b_or_gray; | |||
| dst_start_p[index + 1] = fill_g; | |||
| dst_start_p[index + 2] = fill_r; | |||
| } else { | |||
| } | |||
| } else if (dst.channel_ == 3) { | |||
| for (int i = 0; i < dst.width_; i++) { | |||
| const_ptr[i * dst.channel_] = fill_b_or_gray; | |||
| const_ptr[i * dst.channel_ + 1] = fill_g; | |||
| const_ptr[i * dst.channel_ + 2] = fill_r; | |||
| } | |||
| } | |||
| // padd left | |||
| for (int h = top; h < dst.height_ - bottom; h++) { | |||
| for (int w = 0; w < left; w++) { | |||
| uint32_t index = (h * dst.width_ + w) * dst.channel_; | |||
| if (dst.channel_ == 1) { | |||
| dst_start_p[index] = fill_b_or_gray; | |||
| } else if (dst.channel_ == 3) { | |||
| dst_start_p[index] = fill_b_or_gray; | |||
| dst_start_p[index + 1] = fill_g; | |||
| dst_start_p[index + 2] = fill_r; | |||
| } else { | |||
| } | |||
| } | |||
| uint8_t *dst_ptr = reinterpret_cast<uint8_t *>(dst.data_ptr_); | |||
| uint8_t *src_ptr = reinterpret_cast<uint8_t *>(src.data_ptr_); | |||
| for (int i = 0; i < top; i++) { | |||
| memcpy(dst_ptr + i * dst_step, const_ptr, dst_step); | |||
| } | |||
| // padd right | |||
| for (int h = top; h < dst.height_ - bottom; h++) { | |||
| for (int w = dst.width_ - right; w < dst.width_; w++) { | |||
| uint32_t index = (h * dst.width_ + w) * dst.channel_; | |||
| if (dst.channel_ == 1) { | |||
| dst_start_p[index] = fill_b_or_gray; | |||
| } else if (dst.channel_ == 3) { | |||
| dst_start_p[index] = fill_b_or_gray; | |||
| dst_start_p[index + 1] = fill_g; | |||
| dst_start_p[index + 2] = fill_r; | |||
| } else { | |||
| } | |||
| } | |||
| int left_size = left * dst.channel_; | |||
| int right_size = right * dst.channel_; | |||
| uint8_t *dst_raw_data = dst_ptr + top * dst_step + left_size; | |||
| for (int i = 0; i < src.width_; i++, dst_raw_data += dst_step, src_ptr += src_step) { | |||
| memcpy(dst_raw_data, src_ptr, src_step); | |||
| memcpy(dst_raw_data - left_size, const_ptr, left_size); | |||
| memcpy(dst_raw_data + src_step, const_ptr, right_size); | |||
| } | |||
| // image data | |||
| dst_start_p = dst_start_p + (top * dst.width_ + left) * dst.channel_; | |||
| for (int i_h = 0; i_h < src.height_; i_h++) { | |||
| const T *src_index_p = src_start_p + i_h * src.width_ * src.channel_; | |||
| T *dst_index_p = dst_start_p + i_h * dst.width_ * dst.channel_; | |||
| (void)memcpy(dst_index_p, src_index_p, src.width_ * src.channel_ * sizeof(T)); | |||
| for (int i = dst.height_ - bottom; i < dst.height_; i++) { | |||
| memcpy(dst_ptr + i * dst_step, const_ptr, dst_step); | |||
| } | |||
| } | |||
| @@ -758,6 +722,15 @@ bool Pad(const LiteMat &src, LiteMat &dst, int top, int bottom, int left, int ri | |||
| if (src.IsEmpty()) { | |||
| return false; | |||
| } | |||
| int dst_width = src.width_ + left + right; | |||
| int dst_height = src.height_ + top + bottom; | |||
| if (dst.IsEmpty()) { | |||
| dst.Init(dst_width, dst_height, src.channel_, src.data_type_); | |||
| } else if (dst.width_ != dst_width || dst.height_ != dst_height || src.channel_ != dst.channel_) { | |||
| return false; | |||
| } else if (src.data_type_ != dst.data_type_) { | |||
| return false; | |||
| } | |||
| if (pad_type == PADD_BORDER_CONSTANT && src.data_type_ == LDataType::FLOAT32) { | |||
| PadWithConstant<float>(src, dst, top, bottom, left, right, pad_type, fill_b_or_gray, fill_g, fill_r); | |||
| } else if (pad_type == PADD_BORDER_CONSTANT && src.data_type_ == LDataType::UINT8) { | |||
| @@ -921,167 +894,5 @@ bool Affine(LiteMat &src, LiteMat &out_img, const double M[6], std::vector<size_ | |||
| return ImplementAffine(src, out_img, M, dsize, borderValue); | |||
| } | |||
| template <typename T> | |||
| inline void SubtractImpl(const T *src1_ptr, const T *src2_ptr, T *dst, size_t total_size) { | |||
| for (size_t i = 0; i < total_size; i++) { | |||
| dst[i] = src1_ptr[i] - src2_ptr[i]; | |||
| } | |||
| } | |||
| template <> | |||
| inline void SubtractImpl(const uint8_t *src1_ptr, const uint8_t *src2_ptr, uint8_t *dst, size_t total_size) { | |||
| for (size_t i = 0; i < total_size; i++) { | |||
| int val = static_cast<int>(src1_ptr[i]) - src2_ptr[i]; | |||
| dst[i] = | |||
| std::max<int>(std::numeric_limits<uint8_t>::min(), std::min<int>(std::numeric_limits<uint8_t>::max(), val)); | |||
| } | |||
| } | |||
| template <> | |||
| inline void SubtractImpl(const uint16_t *src1_ptr, const uint16_t *src2_ptr, uint16_t *dst, size_t total_size) { | |||
| for (size_t i = 0; i < total_size; i++) { | |||
| int val = static_cast<int>(src1_ptr[i]) - src2_ptr[i]; | |||
| dst[i] = | |||
| std::max<int>(std::numeric_limits<uint16_t>::min(), std::min<int>(std::numeric_limits<uint16_t>::max(), val)); | |||
| } | |||
| } | |||
| template <> | |||
| inline void SubtractImpl(const uint32_t *src1_ptr, const uint32_t *src2_ptr, uint32_t *dst, size_t total_size) { | |||
| for (size_t i = 0; i < total_size; i++) { | |||
| int64_t val = static_cast<int64_t>(src1_ptr[i]) - src2_ptr[i]; | |||
| dst[i] = std::max<int64_t>(std::numeric_limits<uint32_t>::min(), | |||
| std::min<int64_t>(std::numeric_limits<uint32_t>::max(), val)); | |||
| } | |||
| } | |||
| bool Subtract(const LiteMat &src1, const LiteMat &src2, LiteMat &dst) { | |||
| if (src1.width_ != src2.width_ || src1.height_ != src2.height_ || src1.channel_ != src2.channel_) { | |||
| return false; | |||
| } | |||
| if (src1.data_type_ != src2.data_type_) { | |||
| return false; | |||
| } | |||
| if (dst.IsEmpty()) { | |||
| dst.Init(src1.width_, src1.height_, src1.channel_, src1.data_type_); | |||
| } else if (src1.width_ != dst.width_ || src1.height_ != dst.height_ || src1.channel_ != dst.channel_) { | |||
| return false; | |||
| } else if (src1.data_type_ != dst.data_type_) { | |||
| return false; | |||
| } | |||
| size_t total_size = src1.height_ * src1.width_ * src1.channel_; | |||
| if (src1.data_type_ == LDataType::BOOL) { | |||
| SubtractImpl<bool>(src1, src2, dst, total_size); | |||
| } else if (src1.data_type_ == LDataType::INT8) { | |||
| SubtractImpl<int8_t>(src1, src2, dst, total_size); | |||
| } else if (src1.data_type_ == LDataType::UINT8) { | |||
| SubtractImpl<uint8_t>(src1, src2, dst, total_size); | |||
| } else if (src1.data_type_ == LDataType::INT16) { | |||
| SubtractImpl<int16_t>(src1, src2, dst, total_size); | |||
| } else if (src1.data_type_ == LDataType::UINT16) { | |||
| SubtractImpl<uint16_t>(src1, src2, dst, total_size); | |||
| } else if (src1.data_type_ == LDataType::INT32) { | |||
| SubtractImpl<int32_t>(src1, src2, dst, total_size); | |||
| } else if (src1.data_type_ == LDataType::UINT32) { | |||
| SubtractImpl<uint32_t>(src1, src2, dst, total_size); | |||
| } else if (src1.data_type_ == LDataType::INT64) { | |||
| SubtractImpl<int64_t>(src1, src2, dst, total_size); | |||
| } else if (src1.data_type_ == LDataType::UINT64) { | |||
| SubtractImpl<uint64_t>(src1, src2, dst, total_size); | |||
| } else if (src1.data_type_ == LDataType::FLOAT32) { | |||
| SubtractImpl<float>(src1, src2, dst, total_size); | |||
| } else if (src1.data_type_ == LDataType::FLOAT64) { | |||
| SubtractImpl<double>(src1, src2, dst, total_size); | |||
| } else { | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| template <typename T> | |||
| inline void DivideImpl(const T *src1_ptr, const T *src2_ptr, T *dst, size_t total_size) { | |||
| for (size_t i = 0; i < total_size; i++) { | |||
| dst[i] = src1_ptr[i] / (src2_ptr[i] + std::numeric_limits<float>::min()); | |||
| } | |||
| } | |||
| template <> | |||
| inline void DivideImpl(const uint8_t *src1_ptr, const uint8_t *src2_ptr, uint8_t *dst, size_t total_size) { | |||
| for (size_t i = 0; i < total_size; i++) { | |||
| int val = std::round(src1_ptr[i] / (src2_ptr[i] + std::numeric_limits<float>::min())); | |||
| dst[i] = | |||
| std::max<int>(std::numeric_limits<uint8_t>::min(), std::min<int>(std::numeric_limits<uint8_t>::max(), val)); | |||
| } | |||
| } | |||
| template <> | |||
| inline void DivideImpl(const uint16_t *src1_ptr, const uint16_t *src2_ptr, uint16_t *dst, size_t total_size) { | |||
| for (size_t i = 0; i < total_size; i++) { | |||
| int val = std::round(src1_ptr[i] / (src2_ptr[i] + std::numeric_limits<float>::min())); | |||
| dst[i] = | |||
| std::max<int>(std::numeric_limits<uint16_t>::min(), std::min<int>(std::numeric_limits<uint16_t>::max(), val)); | |||
| } | |||
| } | |||
| template <> | |||
| inline void DivideImpl(const uint32_t *src1_ptr, const uint32_t *src2_ptr, uint32_t *dst, size_t total_size) { | |||
| for (size_t i = 0; i < total_size; i++) { | |||
| int64_t val = std::round(src1_ptr[i] / (src2_ptr[i] + std::numeric_limits<double>::min())); | |||
| dst[i] = std::max<int64_t>(std::numeric_limits<uint32_t>::min(), | |||
| std::min<int64_t>(std::numeric_limits<uint32_t>::max(), val)); | |||
| } | |||
| } | |||
| bool Divide(const LiteMat &src1, const LiteMat &src2, LiteMat &dst) { | |||
| if (src1.width_ != src2.width_ || src1.height_ != src2.height_ || src1.channel_ != src2.channel_) { | |||
| return false; | |||
| } | |||
| if (src1.data_type_ != src2.data_type_) { | |||
| return false; | |||
| } | |||
| if (dst.IsEmpty()) { | |||
| dst.Init(src1.width_, src1.height_, src1.channel_, src1.data_type_); | |||
| } else if (src1.width_ != dst.width_ || src1.height_ != dst.height_ || src1.channel_ != dst.channel_) { | |||
| return false; | |||
| } else if (src1.data_type_ != dst.data_type_) { | |||
| return false; | |||
| } | |||
| size_t total_size = src1.height_ * src1.width_ * src1.channel_; | |||
| if (src1.data_type_ == LDataType::INT8) { | |||
| DivideImpl<int8_t>(src1, src2, dst, total_size); | |||
| } else if (src1.data_type_ == LDataType::UINT8) { | |||
| DivideImpl<uint8_t>(src1, src2, dst, total_size); | |||
| } else if (src1.data_type_ == LDataType::INT16) { | |||
| DivideImpl<int16_t>(src1, src2, dst, total_size); | |||
| } else if (src1.data_type_ == LDataType::UINT16) { | |||
| DivideImpl<uint16_t>(src1, src2, dst, total_size); | |||
| } else if (src1.data_type_ == LDataType::INT32) { | |||
| DivideImpl<int32_t>(src1, src2, dst, total_size); | |||
| } else if (src1.data_type_ == LDataType::UINT32) { | |||
| DivideImpl<uint32_t>(src1, src2, dst, total_size); | |||
| } else if (src1.data_type_ == LDataType::INT64) { | |||
| DivideImpl<int64_t>(src1, src2, dst, total_size); | |||
| } else if (src1.data_type_ == LDataType::UINT64) { | |||
| DivideImpl<uint64_t>(src1, src2, dst, total_size); | |||
| } else if (src1.data_type_ == LDataType::FLOAT32) { | |||
| DivideImpl<float>(src1, src2, dst, total_size); | |||
| } else if (src1.data_type_ == LDataType::FLOAT64) { | |||
| DivideImpl<double>(src1, src2, dst, total_size); | |||
| } else { | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -107,12 +107,6 @@ void ConvertBoxes(std::vector<std::vector<float>> &boxes, const std::vector<std: | |||
| std::vector<int> ApplyNms(const std::vector<std::vector<float>> &all_boxes, std::vector<float> &all_scores, float thres, | |||
| int max_boxes); | |||
| /// \brief Calculates the difference between the two images for each element | |||
| bool Subtract(const LiteMat &src1, const LiteMat &src2, LiteMat &dst); | |||
| /// \brief Calculates the division between the two images for each element | |||
| bool Divide(const LiteMat &src1, const LiteMat &src2, LiteMat &dst); | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif // IMAGE_PROCESS_H_ | |||
| @@ -15,6 +15,16 @@ | |||
| */ | |||
| #include "minddata/dataset/kernels/image/lite_cv/lite_mat.h" | |||
| #include <algorithm> | |||
| #include <limits> | |||
| #include <cmath> | |||
| #ifdef ENABLE_ANDROID | |||
| #if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64) | |||
| #define USE_NEON | |||
| #include <arm_neon.h> | |||
| #endif | |||
| #endif | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| @@ -221,5 +231,258 @@ void LiteMat::AlignFree(void *ptr) { | |||
| inline void LiteMat::InitElemSize(LDataType data_type) { elem_size_ = data_type.SizeInBytes(); } | |||
| template <typename T> | |||
| inline void SubtractImpl(const T *src0, const T *src1, T *dst, int64_t total_size) { | |||
| for (int64_t i = 0; i < total_size; i++) { | |||
| dst[i] = src0[i] - src1[i]; | |||
| } | |||
| } | |||
| template <> | |||
| inline void SubtractImpl(const uint8_t *src0, const uint8_t *src1, uint8_t *dst, int64_t total_size) { | |||
| int64_t x = 0; | |||
| #ifdef USE_NEON | |||
| const int64_t step = 32; | |||
| for (; x <= total_size - step; x += step) { | |||
| uint8x16_t v_src00 = vld1q_u8(src0 + x); | |||
| uint8x16_t v_src01 = vld1q_u8(src0 + x + 16); | |||
| uint8x16_t v_src10 = vld1q_u8(src1 + x); | |||
| uint8x16_t v_src11 = vld1q_u8(src1 + x + 16); | |||
| uint8x16_t v_dst; | |||
| v_dst = vqsubq_u8(v_src00, v_src10); | |||
| vst1q_u8(dst + x, v_dst); | |||
| v_dst = vqsubq_u8(v_src01, v_src11); | |||
| vst1q_u8(dst + x + 16, v_dst); | |||
| } | |||
| #endif | |||
| for (; x < total_size; x++) { | |||
| int32_t val = static_cast<int32_t>(src0[x]) - src1[x]; | |||
| dst[x] = std::max<int32_t>(std::numeric_limits<uint8_t>::min(), | |||
| std::min<int32_t>(std::numeric_limits<uint8_t>::max(), val)); | |||
| } | |||
| } | |||
| template <> | |||
| inline void SubtractImpl(const uint16_t *src0, const uint16_t *src1, uint16_t *dst, int64_t total_size) { | |||
| for (int64_t i = 0; i < total_size; i++) { | |||
| int32_t val = static_cast<int32_t>(src0[i]) - src1[i]; | |||
| dst[i] = std::max<int32_t>(std::numeric_limits<uint16_t>::min(), | |||
| std::min<int32_t>(std::numeric_limits<uint16_t>::max(), val)); | |||
| } | |||
| } | |||
| template <> | |||
| inline void SubtractImpl(const uint32_t *src0, const uint32_t *src1, uint32_t *dst, int64_t total_size) { | |||
| for (int64_t i = 0; i < total_size; i++) { | |||
| int64_t val = static_cast<int64_t>(src0[i]) - src1[i]; | |||
| dst[i] = std::max<int64_t>(std::numeric_limits<uint32_t>::min(), | |||
| std::min<int64_t>(std::numeric_limits<uint32_t>::max(), val)); | |||
| } | |||
| } | |||
| bool Subtract(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) { | |||
| if (src_a.width_ != src_b.width_ || src_a.height_ != src_b.height_ || src_a.channel_ != src_b.channel_) { | |||
| return false; | |||
| } | |||
| if (src_a.data_type_ != src_b.data_type_) { | |||
| return false; | |||
| } | |||
| if (dst->IsEmpty()) { | |||
| dst->Init(src_a.width_, src_a.height_, src_a.channel_, src_a.data_type_); | |||
| } else if (src_a.width_ != dst->width_ || src_a.height_ != dst->height_ || src_a.channel_ != dst->channel_) { | |||
| return false; | |||
| } else if (src_a.data_type_ != dst->data_type_) { | |||
| return false; | |||
| } | |||
| int64_t total_size = src_a.height_ * src_a.width_ * src_a.channel_; | |||
| if (src_a.data_type_ == LDataType::BOOL) { | |||
| SubtractImpl<bool>(src_a, src_b, *dst, total_size); | |||
| } else if (src_a.data_type_ == LDataType::INT8) { | |||
| SubtractImpl<int8_t>(src_a, src_b, *dst, total_size); | |||
| } else if (src_a.data_type_ == LDataType::UINT8) { | |||
| SubtractImpl<uint8_t>(src_a, src_b, *dst, total_size); | |||
| } else if (src_a.data_type_ == LDataType::INT16) { | |||
| SubtractImpl<int16_t>(src_a, src_b, *dst, total_size); | |||
| } else if (src_a.data_type_ == LDataType::UINT16) { | |||
| SubtractImpl<uint16_t>(src_a, src_b, *dst, total_size); | |||
| } else if (src_a.data_type_ == LDataType::INT32) { | |||
| SubtractImpl<int32_t>(src_a, src_b, *dst, total_size); | |||
| } else if (src_a.data_type_ == LDataType::UINT32) { | |||
| SubtractImpl<uint32_t>(src_a, src_b, *dst, total_size); | |||
| } else if (src_a.data_type_ == LDataType::INT64) { | |||
| SubtractImpl<int64_t>(src_a, src_b, *dst, total_size); | |||
| } else if (src_a.data_type_ == LDataType::UINT64) { | |||
| SubtractImpl<uint64_t>(src_a, src_b, *dst, total_size); | |||
| } else if (src_a.data_type_ == LDataType::FLOAT32) { | |||
| SubtractImpl<float>(src_a, src_b, *dst, total_size); | |||
| } else if (src_a.data_type_ == LDataType::FLOAT64) { | |||
| SubtractImpl<double>(src_a, src_b, *dst, total_size); | |||
| } else { | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| #ifdef USE_NEON | |||
| inline float32x4_t reciprocal_simd(float32x4_t val) { | |||
| // get an initial estimate of 1/val | |||
| float32x4_t reciprocal = vrecpeq_f32(val); | |||
| // use Newton-Raphson steps to refine the estimate | |||
| reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal); | |||
| reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal); | |||
| return reciprocal; | |||
| } | |||
| inline float32x4_t round_simd(const float32x4_t &v) { | |||
| const int32x4_t signMask = vdupq_n_s32(1U << 31); | |||
| const int32x4_t half = vreinterpretq_s32_f32(vdupq_n_f32(0.5f)); | |||
| float32x4_t v_addition = vreinterpretq_f32_s32(vorrq_s32(half, vandq_s32(signMask, vreinterpretq_s32_f32(v)))); | |||
| return vaddq_f32(v, v_addition); | |||
| } | |||
| #endif | |||
| template <typename T> | |||
| inline void DivideImpl(const T *src0, const T *src1, T *dst, int64_t total_size) { | |||
| for (size_t i = 0; i < total_size; i++) { | |||
| dst[i] = src1[i] ? src0[i] / src1[i] : 0; | |||
| } | |||
| } | |||
| template <> | |||
| inline void DivideImpl(const uint8_t *src0, const uint8_t *src1, uint8_t *dst, int64_t total_size) { | |||
| int64_t x = 0; | |||
| #ifdef USE_NEON | |||
| const int64_t step = 16; | |||
| for (; x <= total_size - step; x += step) { | |||
| __builtin_prefetch(reinterpret_cast<const char *>(src0 + x) + 32 * 10); | |||
| __builtin_prefetch(reinterpret_cast<const char *>(src1 + x) + 32 * 10); | |||
| uint8x16_t v_a = vld1q_u8(src0 + x); | |||
| uint8x16_t v_b = vld1q_u8(src1 + x); | |||
| uint8x16_t v_mask = vtstq_u8(v_b, v_b); | |||
| uint16x8_t va_l_16x8 = vmovl_u8(vget_low_u8(v_a)); | |||
| uint16x8_t va_h_16x8 = vmovl_u8(vget_high_u8(v_a)); | |||
| uint16x8_t vb_l_16x8 = vmovl_u8(vget_low_u8(v_b)); | |||
| uint16x8_t vb_h_16x8 = vmovl_u8(vget_high_u8(v_b)); | |||
| float32x4_t va_ll_f32x4 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(va_l_16x8))); | |||
| float32x4_t va_lh_f32x4 = vcvtq_f32_u32(vmovl_u16(vget_high_u16(va_l_16x8))); | |||
| float32x4_t va_hl_f32x4 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(va_h_16x8))); | |||
| float32x4_t va_hh_f32x4 = vcvtq_f32_u32(vmovl_u16(vget_high_u16(va_h_16x8))); | |||
| float32x4_t vb_ll_f32x4 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vb_l_16x8))); | |||
| float32x4_t vb_lh_f32x4 = vcvtq_f32_u32(vmovl_u16(vget_high_u16(vb_l_16x8))); | |||
| float32x4_t vb_hl_f32x4 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vb_h_16x8))); | |||
| float32x4_t vb_hh_f32x4 = vcvtq_f32_u32(vmovl_u16(vget_high_u16(vb_h_16x8))); | |||
| float32x4_t vb_ll_re_f32x4 = reciprocal_simd(vb_ll_f32x4); | |||
| float32x4_t vb_lh_re_f32x4 = reciprocal_simd(vb_lh_f32x4); | |||
| float32x4_t vb_hl_re_f32x4 = reciprocal_simd(vb_hl_f32x4); | |||
| float32x4_t vb_hh_re_f32x4 = reciprocal_simd(vb_hh_f32x4); | |||
| float32x4_t dst_ll_f32x4 = round_simd(vmulq_f32(va_ll_f32x4, vb_ll_re_f32x4)); | |||
| float32x4_t dst_lh_f32x4 = round_simd(vmulq_f32(va_lh_f32x4, vb_lh_re_f32x4)); | |||
| float32x4_t dst_hl_f32x4 = round_simd(vmulq_f32(va_hl_f32x4, vb_hl_re_f32x4)); | |||
| float32x4_t dst_hh_f32x4 = round_simd(vmulq_f32(va_hh_f32x4, vb_hh_re_f32x4)); | |||
| uint32x4_t dst_ll_32x4 = vcvtq_u32_f32(dst_ll_f32x4); | |||
| uint32x4_t dst_lh_32x4 = vcvtq_u32_f32(dst_lh_f32x4); | |||
| uint32x4_t dst_hl_32x4 = vcvtq_u32_f32(dst_hl_f32x4); | |||
| uint32x4_t dst_hh_32x4 = vcvtq_u32_f32(dst_hh_f32x4); | |||
| uint16x4_t dst_ll_16x4 = vqmovn_u32(dst_ll_32x4); | |||
| uint16x4_t dst_lh_16x4 = vqmovn_u32(dst_lh_32x4); | |||
| uint16x4_t dst_hl_16x4 = vqmovn_u32(dst_hl_32x4); | |||
| uint16x4_t dst_hh_16x4 = vqmovn_u32(dst_hh_32x4); | |||
| uint16x8_t dst_l_16x8 = vcombine_u16(dst_ll_16x4, dst_lh_16x4); | |||
| uint16x8_t dst_h_16x8 = vcombine_u16(dst_hl_16x4, dst_hh_16x4); | |||
| int8x8_t dst_l_8x8 = vqmovn_u16(dst_l_16x8); | |||
| int8x8_t dst_h_8x8 = vqmovn_u16(dst_h_16x8); | |||
| int8x16_t dst_8x16 = vcombine_u8(dst_l_8x8, dst_h_8x8); | |||
| dst_8x16 = vandq_u8(dst_8x16, v_mask); | |||
| vst1q_u8(dst + x, dst_8x16); | |||
| } | |||
| #endif | |||
| for (; x < total_size; x++) { | |||
| int32_t val = src1[x] ? std::round(src0[x] / src1[x]) : 0; | |||
| dst[x] = std::max<int32_t>(std::numeric_limits<uint8_t>::min(), | |||
| std::min<int32_t>(std::numeric_limits<uint8_t>::max(), val)); | |||
| } | |||
| } | |||
| template <> | |||
| inline void DivideImpl(const uint16_t *src0, const uint16_t *src1, uint16_t *dst, int64_t total_size) { | |||
| for (size_t i = 0; i < total_size; i++) { | |||
| int32_t val = src1[i] ? std::round(src0[i] / src1[i]) : 0; | |||
| dst[i] = std::max<int32_t>(std::numeric_limits<uint16_t>::min(), | |||
| std::min<int32_t>(std::numeric_limits<uint16_t>::max(), val)); | |||
| } | |||
| } | |||
| template <> | |||
| inline void DivideImpl(const uint32_t *src0, const uint32_t *src1, uint32_t *dst, int64_t total_size) { | |||
| for (size_t i = 0; i < total_size; i++) { | |||
| int64_t val = src1[i] ? std::round(src0[i] / src1[i]) : 0; | |||
| dst[i] = std::max<int64_t>(std::numeric_limits<uint32_t>::min(), | |||
| std::min<int64_t>(std::numeric_limits<uint32_t>::max(), val)); | |||
| } | |||
| } | |||
| bool Divide(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) { | |||
| if (src_a.width_ != src_b.width_ || src_a.height_ != src_b.height_ || src_a.channel_ != src_b.channel_) { | |||
| return false; | |||
| } | |||
| if (src_a.data_type_ != src_b.data_type_) { | |||
| return false; | |||
| } | |||
| if (dst->IsEmpty()) { | |||
| dst->Init(src_a.width_, src_a.height_, src_a.channel_, src_a.data_type_); | |||
| } else if (src_a.width_ != dst->width_ || src_a.height_ != dst->height_ || src_a.channel_ != dst->channel_) { | |||
| return false; | |||
| } else if (src_a.data_type_ != dst->data_type_) { | |||
| return false; | |||
| } | |||
| int64_t total_size = src_a.height_ * src_a.width_ * src_a.channel_; | |||
| if (src_a.data_type_ == LDataType::BOOL) { | |||
| DivideImpl<bool>(src_a, src_b, *dst, total_size); | |||
| } else if (src_a.data_type_ == LDataType::INT8) { | |||
| DivideImpl<int8_t>(src_a, src_b, *dst, total_size); | |||
| } else if (src_a.data_type_ == LDataType::UINT8) { | |||
| DivideImpl<uint8_t>(src_a, src_b, *dst, total_size); | |||
| } else if (src_a.data_type_ == LDataType::INT16) { | |||
| DivideImpl<int16_t>(src_a, src_b, *dst, total_size); | |||
| } else if (src_a.data_type_ == LDataType::UINT16) { | |||
| DivideImpl<uint16_t>(src_a, src_b, *dst, total_size); | |||
| } else if (src_a.data_type_ == LDataType::INT32) { | |||
| DivideImpl<int32_t>(src_a, src_b, *dst, total_size); | |||
| } else if (src_a.data_type_ == LDataType::UINT32) { | |||
| DivideImpl<uint32_t>(src_a, src_b, *dst, total_size); | |||
| } else if (src_a.data_type_ == LDataType::INT64) { | |||
| DivideImpl<int64_t>(src_a, src_b, *dst, total_size); | |||
| } else if (src_a.data_type_ == LDataType::UINT64) { | |||
| DivideImpl<uint64_t>(src_a, src_b, *dst, total_size); | |||
| } else if (src_a.data_type_ == LDataType::FLOAT32) { | |||
| DivideImpl<float>(src_a, src_b, *dst, total_size); | |||
| } else if (src_a.data_type_ == LDataType::FLOAT64) { | |||
| DivideImpl<double>(src_a, src_b, *dst, total_size); | |||
| } else { | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -247,6 +247,13 @@ class LiteMat { | |||
| LDataType data_type_; | |||
| int *ref_count_; | |||
| }; | |||
| /// \brief Calculates the difference between the two images for each element | |||
| bool Subtract(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst); | |||
| /// \brief Calculates the division between the two images for each element | |||
| bool Divide(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst); | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif // MINI_MAT_H_ | |||
| @@ -538,7 +538,7 @@ TEST_F(MindDataImageProcess, TestSubtractUint8) { | |||
| static_cast<UINT8_C1 *>(expect_uint8.data_ptr_)[i] = 1; | |||
| } | |||
| LiteMat dst_uint8; | |||
| EXPECT_TRUE(Subtract(src1_uint8, src2_uint8, dst_uint8)); | |||
| EXPECT_TRUE(Subtract(src1_uint8, src2_uint8, &dst_uint8)); | |||
| for (size_t i = 0; i < cols; i++) { | |||
| EXPECT_EQ(static_cast<UINT8_C1 *>(expect_uint8.data_ptr_)[i].c1, | |||
| static_cast<UINT8_C1 *>(dst_uint8.data_ptr_)[i].c1); | |||
| @@ -557,7 +557,7 @@ TEST_F(MindDataImageProcess, TestSubtractInt8) { | |||
| static_cast<INT8_C1 *>(expect_int8.data_ptr_)[i] = -1; | |||
| } | |||
| LiteMat dst_int8; | |||
| EXPECT_TRUE(Subtract(src1_int8, src2_int8, dst_int8)); | |||
| EXPECT_TRUE(Subtract(src1_int8, src2_int8, &dst_int8)); | |||
| for (size_t i = 0; i < cols; i++) { | |||
| EXPECT_EQ(static_cast<INT8_C1 *>(expect_int8.data_ptr_)[i].c1, static_cast<INT8_C1 *>(dst_int8.data_ptr_)[i].c1); | |||
| } | |||
| @@ -575,7 +575,7 @@ TEST_F(MindDataImageProcess, TestSubtractUInt16) { | |||
| static_cast<UINT16_C1 *>(expect_uint16.data_ptr_)[i] = 0; | |||
| } | |||
| LiteMat dst_uint16; | |||
| EXPECT_TRUE(Subtract(src1_uint16, src2_uint16, dst_uint16)); | |||
| EXPECT_TRUE(Subtract(src1_uint16, src2_uint16, &dst_uint16)); | |||
| for (size_t i = 0; i < cols; i++) { | |||
| EXPECT_EQ(static_cast<UINT16_C1 *>(expect_uint16.data_ptr_)[i].c1, | |||
| static_cast<UINT16_C1 *>(dst_uint16.data_ptr_)[i].c1); | |||
| @@ -594,7 +594,7 @@ TEST_F(MindDataImageProcess, TestSubtractInt16) { | |||
| static_cast<INT16_C1 *>(expect_int16.data_ptr_)[i] = -1; | |||
| } | |||
| LiteMat dst_int16; | |||
| EXPECT_TRUE(Subtract(src1_int16, src2_int16, dst_int16)); | |||
| EXPECT_TRUE(Subtract(src1_int16, src2_int16, &dst_int16)); | |||
| for (size_t i = 0; i < cols; i++) { | |||
| EXPECT_EQ(static_cast<INT16_C1 *>(expect_int16.data_ptr_)[i].c1, | |||
| static_cast<INT16_C1 *>(dst_int16.data_ptr_)[i].c1); | |||
| @@ -613,7 +613,7 @@ TEST_F(MindDataImageProcess, TestSubtractUInt32) { | |||
| static_cast<UINT32_C1 *>(expect_uint32.data_ptr_)[i] = 0; | |||
| } | |||
| LiteMat dst_uint32; | |||
| EXPECT_TRUE(Subtract(src1_uint32, src2_uint32, dst_uint32)); | |||
| EXPECT_TRUE(Subtract(src1_uint32, src2_uint32, &dst_uint32)); | |||
| for (size_t i = 0; i < cols; i++) { | |||
| EXPECT_EQ(static_cast<UINT32_C1 *>(expect_uint32.data_ptr_)[i].c1, | |||
| static_cast<UINT32_C1 *>(dst_uint32.data_ptr_)[i].c1); | |||
| @@ -632,7 +632,7 @@ TEST_F(MindDataImageProcess, TestSubtractInt32) { | |||
| static_cast<INT32_C1 *>(expect_int32.data_ptr_)[i] = -2; | |||
| } | |||
| LiteMat dst_int32; | |||
| EXPECT_TRUE(Subtract(src1_int32, src2_int32, dst_int32)); | |||
| EXPECT_TRUE(Subtract(src1_int32, src2_int32, &dst_int32)); | |||
| for (size_t i = 0; i < cols; i++) { | |||
| EXPECT_EQ(static_cast<INT32_C1 *>(expect_int32.data_ptr_)[i].c1, | |||
| static_cast<INT32_C1 *>(dst_int32.data_ptr_)[i].c1); | |||
| @@ -651,7 +651,7 @@ TEST_F(MindDataImageProcess, TestSubtractFloat) { | |||
| static_cast<FLOAT32_C1 *>(expect_float.data_ptr_)[i] = -2.3; | |||
| } | |||
| LiteMat dst_float; | |||
| EXPECT_TRUE(Subtract(src1_float, src2_float, dst_float)); | |||
| EXPECT_TRUE(Subtract(src1_float, src2_float, &dst_float)); | |||
| for (size_t i = 0; i < cols; i++) { | |||
| EXPECT_FLOAT_EQ(static_cast<FLOAT32_C1 *>(expect_float.data_ptr_)[i].c1, | |||
| static_cast<FLOAT32_C1 *>(dst_float.data_ptr_)[i].c1); | |||
| @@ -670,7 +670,7 @@ TEST_F(MindDataImageProcess, TestDivideUint8) { | |||
| static_cast<UINT8_C1 *>(expect_uint8.data_ptr_)[i] = 2; | |||
| } | |||
| LiteMat dst_uint8; | |||
| EXPECT_TRUE(Divide(src1_uint8, src2_uint8, dst_uint8)); | |||
| EXPECT_TRUE(Divide(src1_uint8, src2_uint8, &dst_uint8)); | |||
| for (size_t i = 0; i < cols; i++) { | |||
| EXPECT_EQ(static_cast<UINT8_C1 *>(expect_uint8.data_ptr_)[i].c1, | |||
| static_cast<UINT8_C1 *>(dst_uint8.data_ptr_)[i].c1); | |||
| @@ -689,7 +689,7 @@ TEST_F(MindDataImageProcess, TestDivideInt8) { | |||
| static_cast<INT8_C1 *>(expect_int8.data_ptr_)[i] = -2; | |||
| } | |||
| LiteMat dst_int8; | |||
| EXPECT_TRUE(Divide(src1_int8, src2_int8, dst_int8)); | |||
| EXPECT_TRUE(Divide(src1_int8, src2_int8, &dst_int8)); | |||
| for (size_t i = 0; i < cols; i++) { | |||
| EXPECT_EQ(static_cast<INT8_C1 *>(expect_int8.data_ptr_)[i].c1, static_cast<INT8_C1 *>(dst_int8.data_ptr_)[i].c1); | |||
| } | |||
| @@ -707,7 +707,7 @@ TEST_F(MindDataImageProcess, TestDivideUInt16) { | |||
| static_cast<UINT16_C1 *>(expect_uint16.data_ptr_)[i] = 2; | |||
| } | |||
| LiteMat dst_uint16; | |||
| EXPECT_TRUE(Divide(src1_uint16, src2_uint16, dst_uint16)); | |||
| EXPECT_TRUE(Divide(src1_uint16, src2_uint16, &dst_uint16)); | |||
| for (size_t i = 0; i < cols; i++) { | |||
| EXPECT_EQ(static_cast<UINT16_C1 *>(expect_uint16.data_ptr_)[i].c1, | |||
| static_cast<UINT16_C1 *>(dst_uint16.data_ptr_)[i].c1); | |||
| @@ -726,7 +726,7 @@ TEST_F(MindDataImageProcess, TestDivideInt16) { | |||
| static_cast<INT16_C1 *>(expect_int16.data_ptr_)[i] = -10000; | |||
| } | |||
| LiteMat dst_int16; | |||
| EXPECT_TRUE(Divide(src1_int16, src2_int16, dst_int16)); | |||
| EXPECT_TRUE(Divide(src1_int16, src2_int16, &dst_int16)); | |||
| for (size_t i = 0; i < cols; i++) { | |||
| EXPECT_EQ(static_cast<INT16_C1 *>(expect_int16.data_ptr_)[i].c1, | |||
| static_cast<INT16_C1 *>(dst_int16.data_ptr_)[i].c1); | |||
| @@ -745,7 +745,7 @@ TEST_F(MindDataImageProcess, TestDivideUInt32) { | |||
| static_cast<UINT32_C1 *>(expect_uint32.data_ptr_)[i] = 1000000000; | |||
| } | |||
| LiteMat dst_uint32; | |||
| EXPECT_TRUE(Divide(src1_uint32, src2_uint32, dst_uint32)); | |||
| EXPECT_TRUE(Divide(src1_uint32, src2_uint32, &dst_uint32)); | |||
| for (size_t i = 0; i < cols; i++) { | |||
| EXPECT_EQ(static_cast<UINT32_C1 *>(expect_uint32.data_ptr_)[i].c1, | |||
| static_cast<UINT32_C1 *>(dst_uint32.data_ptr_)[i].c1); | |||
| @@ -764,7 +764,7 @@ TEST_F(MindDataImageProcess, TestDivideInt32) { | |||
| static_cast<INT32_C1 *>(expect_int32.data_ptr_)[i] = -1000000000; | |||
| } | |||
| LiteMat dst_int32; | |||
| EXPECT_TRUE(Divide(src1_int32, src2_int32, dst_int32)); | |||
| EXPECT_TRUE(Divide(src1_int32, src2_int32, &dst_int32)); | |||
| for (size_t i = 0; i < cols; i++) { | |||
| EXPECT_EQ(static_cast<INT32_C1 *>(expect_int32.data_ptr_)[i].c1, | |||
| static_cast<INT32_C1 *>(dst_int32.data_ptr_)[i].c1); | |||
| @@ -783,7 +783,7 @@ TEST_F(MindDataImageProcess, TestDivideFloat) { | |||
| static_cast<FLOAT32_C1 *>(expect_float.data_ptr_)[i] = -6.17f; | |||
| } | |||
| LiteMat dst_float; | |||
| EXPECT_TRUE(Divide(src1_float, src2_float, dst_float)); | |||
| EXPECT_TRUE(Divide(src1_float, src2_float, &dst_float)); | |||
| for (size_t i = 0; i < cols; i++) { | |||
| EXPECT_FLOAT_EQ(static_cast<FLOAT32_C1 *>(expect_float.data_ptr_)[i].c1, | |||
| static_cast<FLOAT32_C1 *>(dst_float.data_ptr_)[i].c1); | |||