| @@ -511,79 +511,6 @@ static bool ConvertYUV420SPToBGR(const uint8_t *data, LDataType data_type, bool | |||||
| return true; | return true; | ||||
| } | } | ||||
| #ifdef ENABLE_NEON | |||||
| static uint8x8_t RGBToGray(const uint16x8_t &r_value, const uint16x8_t &g_value, const uint16x8_t &b_value, | |||||
| const uint16x4_t &r2y_value, const uint16x4_t &g2y_value, const uint16x4_t &b2y_value) { | |||||
| uint32x4_t dst0_value = vmull_u16(vget_low_u16(g_value), g2y_value); | |||||
| uint32x4_t dst1_value = vmull_u16(vget_high_u16(g_value), g2y_value); | |||||
| dst0_value = vmlal_u16(dst0_value, vget_low_u16(r_value), r2y_value); | |||||
| dst1_value = vmlal_u16(dst1_value, vget_high_u16(r_value), r2y_value); | |||||
| dst0_value = vmlal_u16(dst0_value, vget_low_u16(b_value), b2y_value); | |||||
| dst1_value = vmlal_u16(dst1_value, vget_high_u16(b_value), b2y_value); | |||||
| uint8x8_t v_gray = vqmovn_u16(vcombine_u16(vrshrn_n_u32(dst0_value, GRAYSHIFT), vrshrn_n_u32(dst1_value, GRAYSHIFT))); | |||||
| return v_gray; | |||||
| } | |||||
| static bool ConvertRGBAToGRAY_Neon(const uint8_t *srcBase, uint8_t *dstBase, int w, int h) { | |||||
| const uint32_t r_to_gray = R2GRAY; | |||||
| const uint32_t g_to_gray = G2GRAY; | |||||
| const uint32_t b_to_gray = B2GRAY; | |||||
| uint16x4_t r2y_value = vdup_n_u16(R2GRAY); | |||||
| uint16x4_t g2y_value = vdup_n_u16(G2GRAY); | |||||
| uint16x4_t b2y_value = vdup_n_u16(B2GRAY); | |||||
| size_t w16b = w >= 15 ? w - 15 : 0; | |||||
| size_t w8b = w >= 7 ? w - 7 : 0; | |||||
| for (size_t i = 0; i < h; ++i) { | |||||
| const uint8_t *src_ptr = srcBase + w * i * 4; | |||||
| uint8_t *dst_ptr = dstBase + w * i * 4; | |||||
| size_t src_j = 0u; | |||||
| size_t dst_j = 0u; | |||||
| for (; dst_j < w16b; src_j += 64, dst_j += 16) { | |||||
| uint8x16x4_t src_value0 = vld4q_u8(src_ptr + src_j); | |||||
| // 0 | |||||
| uint16x8_t r_value = vmovl_u8(vget_low_u8(src_value0.val[0])); | |||||
| uint16x8_t g_value = vmovl_u8(vget_low_u8(src_value0.val[1])); | |||||
| uint16x8_t b_value = vmovl_u8(vget_low_u8(src_value0.val[2])); | |||||
| uint8x8_t gray_value0 = RGBToGray(r_value, g_value, b_value, r2y_value, g2y_value, b2y_value); | |||||
| r_value = vmovl_u8(vget_high_u8(src_value0.val[0])); | |||||
| g_value = vmovl_u8(vget_high_u8(src_value0.val[1])); | |||||
| b_value = vmovl_u8(vget_high_u8(src_value0.val[2])); | |||||
| uint8x8_t gray_value1 = RGBToGray(r_value, g_value, b_value, r2y_value, g2y_value, b2y_value); | |||||
| vst1q_u8(dst_ptr + dst_j, vcombine_u8(gray_value0, gray_value1)); | |||||
| } | |||||
| if (dst_j < w8b) { | |||||
| uint8x8x4_t v_src = vld4_u8(src_ptr + src_j); | |||||
| uint16x8_t r_value = vmovl_u8(v_src.val[0]); | |||||
| uint16x8_t g_value = vmovl_u8(v_src.val[1]); | |||||
| uint16x8_t b_value = vmovl_u8(v_src.val[2]); | |||||
| uint8x8_t gray_value = RGBToGray(r_value, g_value, b_value, r2y_value, g2y_value, b2y_value); | |||||
| vst1_u8(dst_ptr + dst_j, gray_value); | |||||
| src_j += 32; | |||||
| dst_j += 8; | |||||
| } | |||||
| for (; dst_j < w; src_j += 4, dst_j++) { | |||||
| uint32_t val = src_ptr[src_j] * r_to_gray + src_ptr[src_j + 1] * g_to_gray + src_ptr[src_j + 2] * b_to_gray; | |||||
| dst_ptr[dst_j] = U32TOU8CAST((val + GRAYSHIFT_DELTA) >> GRAYSHIFT); | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| #endif | |||||
| static bool ConvertRGBAToGRAY(const unsigned char *data, LDataType data_type, int w, int h, LiteMat &mat) { | static bool ConvertRGBAToGRAY(const unsigned char *data, LDataType data_type, int w, int h, LiteMat &mat) { | ||||
| if (data_type == LDataType::UINT8) { | if (data_type == LDataType::UINT8) { | ||||
| mat.Init(w, h, 1, LDataType::UINT8); | mat.Init(w, h, 1, LDataType::UINT8); | ||||
| @@ -592,9 +519,6 @@ static bool ConvertRGBAToGRAY(const unsigned char *data, LDataType data_type, in | |||||
| } | } | ||||
| unsigned char *ptr = mat; | unsigned char *ptr = mat; | ||||
| const unsigned char *data_ptr = data; | const unsigned char *data_ptr = data; | ||||
| #ifdef ENABLE_NEON | |||||
| ConvertRGBAToGRAY_Neon(data_ptr, ptr, w, h); | |||||
| #else | |||||
| for (int y = 0; y < h; y++) { | for (int y = 0; y < h; y++) { | ||||
| for (int x = 0; x < w; x++) { | for (int x = 0; x < w; x++) { | ||||
| *ptr = (data_ptr[2] * B2GRAY + data_ptr[1] * G2GRAY + data_ptr[0] * R2GRAY + GRAYSHIFT_DELTA) >> GRAYSHIFT; | *ptr = (data_ptr[2] * B2GRAY + data_ptr[1] * G2GRAY + data_ptr[0] * R2GRAY + GRAYSHIFT_DELTA) >> GRAYSHIFT; | ||||
| @@ -602,7 +526,6 @@ static bool ConvertRGBAToGRAY(const unsigned char *data, LDataType data_type, in | |||||
| data_ptr += 4; | data_ptr += 4; | ||||
| } | } | ||||
| } | } | ||||
| #endif | |||||
| } else { | } else { | ||||
| return false; | return false; | ||||
| } | } | ||||