// Copyright 2025 Tencent // SPDX-License-Identifier: BSD-3-Clause // pip install paddlepaddle==3.0.0 // pip install paddleocr==3.0.0 // paddlex --install paddle2onnx // paddleocr ocr -i test.png // paddlex --paddle2onnx --paddle_model_dir ~/.paddlex/official_models/PP-OCRv5_mobile_det --onnx_model_dir PP-OCRv5_mobile_det // paddlex --paddle2onnx --paddle_model_dir ~/.paddlex/official_models/PP-OCRv5_mobile_rec --onnx_model_dir PP-OCRv5_mobile_rec // pnnx PP-OCRv5_mobile_det.onnx inputshape=[1,3,320,320] inputshape2=[1,3,256,256] // pnnx PP-OCRv5_mobile_rec.onnx inputshape=[1,3,48,160] inputshape2=[1,3,48,256] // pnnx PP-OCRv5_server_det.onnx inputshape=[1,3,320,320] inputshape2=[1,3,256,256] fp16=0 // pnnx PP-OCRv5_server_rec.onnx inputshape=[1,3,48,160] inputshape2=[1,3,48,256] fp16=0 #include "layer.h" #include "net.h" #include #include #include #include #include #include #include "ppocrv5_dict.h" struct Character { int id; float prob; }; struct Object { cv::RotatedRect rrect; int orientation; float prob; std::vector text; }; static double contour_score(const cv::Mat& binary, const std::vector& contour) { cv::Rect rect = cv::boundingRect(contour); if (rect.x < 0) rect.x = 0; if (rect.y < 0) rect.y = 0; if (rect.x + rect.width > binary.cols) rect.width = binary.cols - rect.x; if (rect.y + rect.height > binary.rows) rect.height = binary.rows - rect.y; cv::Mat binROI = binary(rect); cv::Mat mask = cv::Mat::zeros(rect.height, rect.width, CV_8U); std::vector roiContour; for (size_t i = 0; i < contour.size(); i++) { cv::Point pt = cv::Point(contour[i].x - rect.x, contour[i].y - rect.y); roiContour.push_back(pt); } std::vector > roiContours = {roiContour}; cv::fillPoly(mask, roiContours, cv::Scalar(255)); double score = cv::mean(binROI, mask).val[0]; return score / 255.f; } static cv::Mat get_rotate_crop_image(const cv::Mat& bgr, const Object& object) { const int orientation = object.orientation; const float rw = object.rrect.size.width; const float rh = object.rrect.size.height; const int target_height = 48; const float target_width = rh * target_height / rw; // warpperspective shall be used to rotate the image // but actually they are all rectangles, so warpaffine is almost enough :P cv::Mat dst; cv::Point2f corners[4]; object.rrect.points(corners); if (orientation == 0) { // horizontal text // corner points order // 0--------1 // | |rw -> as angle=90 // 3--------2 // rh std::vector src_pts(3); src_pts[0] = corners[0]; src_pts[1] = corners[1]; src_pts[2] = corners[3]; std::vector dst_pts(3); dst_pts[0] = cv::Point2f(0, 0); dst_pts[1] = cv::Point2f(target_width, 0); dst_pts[2] = cv::Point2f(0, target_height); cv::Mat tm = cv::getAffineTransform(src_pts, dst_pts); cv::warpAffine(bgr, dst, tm, cv::Size(target_width, target_height), cv::INTER_LINEAR, cv::BORDER_REPLICATE); } else { // vertial text // corner points order // 1----2 // | | // | | // | |rh -> as angle=0 // | | // | | // 0----3 // rw std::vector src_pts(3); src_pts[0] = corners[2]; src_pts[1] = corners[3]; src_pts[2] = corners[1]; std::vector dst_pts(3); dst_pts[0] = cv::Point2f(0, 0); dst_pts[1] = cv::Point2f(target_width, 0); dst_pts[2] = cv::Point2f(0, target_height); cv::Mat tm = cv::getAffineTransform(src_pts, dst_pts); cv::warpAffine(bgr, dst, tm, cv::Size(target_width, target_height), cv::INTER_LINEAR, cv::BORDER_REPLICATE); } return dst; } class PPOCRv5 { public: void init(); void detect(const cv::Mat& bgr, std::vector