add image_classification

5 years ago · ae2b50afe5
--- a/model_zoo/official/lite/.gitignore
+++ b/model_zoo/official/lite/.gitignore
@@ -0,0 +1,81 @@
 # MindSpore
 build/
 mindspore/lib
 output
 *.ir
 mindspore/ccsrc/schema/inner/*

 # Cmake files
 CMakeFiles/
 cmake_install.cmake
 CMakeCache.txt
 Makefile
 cmake-build-debug

 # Dynamic libraries
 *.so
 *.so.*
 *.dylib

 # Static libraries
 *.la
 *.lai
 *.a
 *.lib

 # Protocol buffers
 *_pb2.py
 *.pb.h
 *.pb.cc

 # Object files
 *.o

 # Editor
 .vscode
 .idea/

 # Cquery
 .cquery_cached_index/
 compile_commands.json

 # Ctags and cscope
 tags
 TAGS
 CTAGS
 GTAGS
 GRTAGS
 GSYMS
 GPATH
 cscope.*

 # Python files
 *__pycache__*
 .pytest_cache

 # Mac files
 *.DS_Store

 # Test results
 test_temp_summary_event_file/
 *.dot
 *.dat
 *.svg
 *.perf
 *.info
 *.ckpt
 *.shp
 *.pkl
 .clangd
 mindspore/version.py
 mindspore/default_config.py
 mindspore/.commit_id
 onnx.proto
 mindspore/ccsrc/onnx.proto

 # Android
 local.properties
 .gradle
 sdk/build
 sdk/.cxx
 app/.cxx
--- a/model_zoo/official/lite/README.md
+++ b/model_zoo/official/lite/README.md
@@ -0,0 +1,275 @@
 ## MindSpore Lite 端侧图像分类demo（Android）
  
 本示例程序演示了如何在端侧利用MindSpore Lite C++ API（Android JNI）以及MindSpore Lite 图像分类模型完成端侧推理，实现对设备摄像头捕获的内容进行分类，并在App图像预览界面中显示出最可能的分类结果。


 ### 运行依赖

 - Android Studio >= 3.2 (推荐4.0以上版本)
 - NDK 21.3
 - CMake 3.10
 - Android SDK >= 26
 - OpenCV >= 4.0.0

 ### 构建与运行

 1. 在Android Studio中加载本示例源码，并安装相应的SDK（指定SDK版本后，由Android Studio自动安装）。 

    ![start_home](images/home.png)

    启动Android Studio后，点击`File->Settings->System Settings->Android SDK`，勾选相应的SDK。如下图所示，勾选后，点击`OK`，Android Studio即可自动安装SDK。

    ![start_sdk](images/sdk_management.png)

    （可选）若安装时出现NDK版本问题，可手动下载相应的[NDK版本](https://developer.android.com/ndk/downloads?hl=zh-cn)（本示例代码使用的NDK版本为21.3），并在`Project Structure`的`Android NDK location`设置中指定SDK的位置。

    ![project_structure](images/project_structure.png)

 2. 连接Android设备，运行图像分类应用程序。

    通过USB连接Android设备调试，点击`Run 'app'`即可在您的设备上运行本示例项目。

    * 注：编译过程中Android Studio会自动下载MindSpore Lite、OpenCV、模型文件等相关依赖项，编译过程需做耐心等待。

    ![run_app](images/run_app.PNG)

    Android Studio连接设备调试操作，可参考<https://developer.android.com/studio/run/device?hl=zh-cn>。

 3. 在Android设备上，点击“继续安装”，安装完即可查看到设备摄像头捕获的内容和推理结果。

    ![install](images/install.jpg)

    如下图所示，识别出的概率最高的物体是植物。

    ![result](images/app_result.jpg)


 ## 示例程序详细说明  

 本端侧图像分类Android示例程序分为JAVA层和JNI层，其中，JAVA层主要通过Android Camera 2 API实现摄像头获取图像帧，以及相应的图像处理等功能；JNI层在[Runtime](https://www.mindspore.cn/tutorial/zh-CN/master/use/lite_runtime.html)中完成模型推理的过程。

 > 此处详细说明示例程序的JNI层实现，JAVA层运用Android Camera 2 API实现开启设备摄像头以及图像帧处理等功能，需读者具备一定的Android开发基础知识。

 ### 示例程序结构

 ```
 app
 |
 ├── libs # 存放demo jni层依赖的库文件
 │   └── arm64-v8a
 │       ├── libopencv_java4.so  # opencv
 │       ├── libmlkit-label-MS.so  # ndk编译生成的库文件
 │       └── libmindspore-lite.so # mindspore lite
 |
 ├── src/main
 │   ├── assets # 资源文件
 |   |   └── mobilenetv2.ms # 存放模型文件
 │   |
 │   ├── cpp # 模型加载和预测主要逻辑封装类
 |   |   ├── include # 存放MindSpore调用相关的头文件
 |   |   |   └── ...
 │   |   |
 |   |   ├── MindSporeNetnative.cpp # MindSpore调用相关的JNI方法
 │   |   └── MindSporeNetnative.h # 头文件
 │   |
 │   ├── java # java层应用代码
 │   │   └── com.huawei.himindsporedemo 
 │   │       ├── gallery.classify # 图像处理及MindSpore JNI调用相关实现
 │   │       │   └── ...
 │   │       └── obejctdetect # 开启摄像头及绘制相关实现
 │   │           └── ...
 │   │   
 │   ├── res # 存放Android相关的资源文件
 │   └── AndroidManifest.xml # Android配置文件
 │
 ├── CMakeList.txt # cmake编译入口文件
 │
 ├── build.gradle # 其他Android配置文件
 ├── download.gradle # APP构建时由gradle自动从HuaWei Server下载依赖的库文件及模型文件
 └── ...
 ```

 ### 配置MindSpore Lite依赖项

 Android JNI层调用MindSpore C++ API时，需要相关库文件支持。可通过MindSpore Lite[源码编译](https://www.mindspore.cn/lite/docs/zh-CN/master/deploy.html)生成`libmindspore-lite.so`库文件，或直接下载MindSpore Lite提供的已编译完成的AMR64、ARM32、x86等[软件包](#TODO)。

 在Android Studio中将编译完成的`libmindspore-lite.so`库文件（可包含多个兼容架构），分别放置在APP工程的`app/libs/arm64-v8a`（ARM64）或`app/libs/armeabi-v7a`（ARM32）目录下，并在应用的`build.gradle`文件中配置CMake编译支持，以及`arm64-v8a`和`armeabi-v7a`的编译支持。

 本示例中，build过程由download.gradle文件自动从华为服务器下载libmindspore-lite.so以及OpenCV的libopencv_java4.so库文件，并放置在`app/libs/arm64-v8a`目录下。

 * 注：若自动下载失败，请手动下载相关库文件并将其放在对应位置：
 * libmindspore-lite.so [下载链接](https://download.mindspore.cn/model_zoo/official/lite/lib/mindspore%20version%200.7/libmindspore-lite.so)
 * libopencv_java4.so [下载链接](https://download.mindspore.cn/model_zoo/official/lite/lib/opencv%204.4.0/libopencv_java4.so)

 ```
 android{
    defaultConfig{
        externalNativeBuild{
            cmake{
                arguments "-DANDROID_STL=c++_shared"
            }
        }

        ndk{ 
            abiFilters 'arm64-v8a'
        }
    }
 }
 ```

 在`app/CMakeLists.txt`文件中建立`.so`库文件链接，如下所示。

 ```
 # Set MindSpore Lite Dependencies.
 include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/include/MindSpore)
 add_library(mindspore-lite SHARED IMPORTED )
 set_target_properties(mindspore-lite PROPERTIES
    IMPORTED_LOCATION "${CMAKE_SOURCE_DIR}/libs/libmindspore-lite.so")

 # Set OpenCV Dependecies.
 include_directories(${CMAKE_SOURCE_DIR}/opencv/sdk/native/jni/include)
 add_library(lib-opencv SHARED IMPORTED )
 set_target_properties(lib-opencv PROPERTIES
    IMPORTED_LOCATION "${CMAKE_SOURCE_DIR}/libs/libopencv_java4.so")

 # Link target library.       
 target_link_libraries(
    ...
    mindspore-lite
    lib-opencv
    ...
 )
 ```

 ### 下载及部署模型文件

 从MindSpore Model Hub中下载模型文件，本示例程序中使用的终端图像分类模型文件为`mobilenetv2.ms`，同样通过download.gradle脚本在APP构建时自动下载，并放置在`app/src/main/assets`工程目录下。

 * 注：若下载失败请手动下载模型文件，mobilenetv2.ms [下载链接](https://download.mindspore.cn/model_zoo/official/lite/mobilenetv2_openimage_lite/mobilenetv2.ms)。


 ### 编写端侧推理代码

 在JNI层调用MindSpore Lite C++ API实现端测推理。

 推理代码流程如下，完整代码请参见`src/cpp/MindSporeNetnative.cpp`。 

 1. 加载MindSpore Lite模型文件，构建上下文、会话以及用于推理的计算图。  

    - 加载模型文件：创建并配置用于模型推理的上下文
        ```cpp
        // Buffer is the model data passed in by the Java layer
        jlong bufferLen = env->GetDirectBufferCapacity(buffer);
        char *modelBuffer = CreateLocalModelBuffer(env, buffer);  
        ```
        
    - 创建会话
        ```cpp
        void **labelEnv = new void *;
        MSNetWork *labelNet = new MSNetWork;
        *labelEnv = labelNet;
        
        // Create context.
        lite::Context *context = new lite::Context;
        context->thread_num_ = numThread;  //Specify the number of threads to run inference
        
        // Create the mindspore session.
        labelNet->CreateSessionMS(modelBuffer, bufferLen, context);
        delete(context);
        
        ```
        
    - 加载模型文件并构建用于推理的计算图
        ```cpp
        void MSNetWork::CreateSessionMS(char* modelBuffer, size_t bufferLen, mindspore::lite::Context* ctx)
        {
            CreateSession(modelBuffer, bufferLen, ctx);  
            session = mindspore::session::LiteSession::CreateSession(ctx);
            auto model = mindspore::lite::Model::Import(modelBuffer, bufferLen);
            int ret = session->CompileGraph(model); // Compile Graph 
        }
        ```
    
 2. 将输入图片转换为传入MindSpore模型的Tensor格式。 

    将待检测图片数据转换为输入MindSpore模型的Tensor。

    ```cpp
    // Convert the Bitmap image passed in from the JAVA layer to Mat for OpenCV processing
    BitmapToMat(env, srcBitmap, matImageSrc);
   // Processing such as zooming the picture size.
    matImgPreprocessed = PreProcessImageData(matImageSrc);  

    ImgDims inputDims; 
    inputDims.channel = matImgPreprocessed.channels();
    inputDims.width = matImgPreprocessed.cols;
    inputDims.height = matImgPreprocessed.rows;
    float *dataHWC = new float[inputDims.channel * inputDims.width * inputDims.height]

    // Copy the image data to be detected to the dataHWC array.
    // The dataHWC[image_size] array here is the intermediate variable of the input MindSpore model tensor.
    float *ptrTmp = reinterpret_cast<float *>(matImgPreprocessed.data);
    for(int i = 0; i < inputDims.channel * inputDims.width * inputDims.height; i++){
       dataHWC[i] = ptrTmp[i];
    }

    // Assign dataHWC[image_size] to the input tensor variable.
    auto msInputs = mSession->GetInputs();
    auto inTensor = msInputs.front();
    memcpy(inTensor->MutableData(), dataHWC,
        inputDims.channel * inputDims.width * inputDims.height * sizeof(float));
    delete[] (dataHWC);
    ```
    
 3. 对输入Tensor按照模型进行推理，获取输出Tensor，并进行后处理。    

   - 图执行，端测推理。

        ```cpp
        // After the model and image tensor data is loaded, run inference.
        auto status = mSession->RunGraph();
        ```

   - 获取输出数据。
        ```cpp
        // Get the mindspore inference results.
        auto msOutputs = mSession->GetOutputMapByNode();
        std::string retStr = ProcessRunnetResult(msOutputs);
        ```
        
   - 输出数据的后续处理。
        ```cpp
        std::string ProcessRunnetResult(
                std::unordered_map<std::string, std::vector<mindspore::tensor::MSTensor *>> msOutputs){
        
            // Get the branch of the model output.
            // Use iterators to get map elements.
            std::unordered_map<std::string, std::vector<mindspore::tensor::MSTensor *>>::iterator iter;
            iter = msOutputs.begin();

            // The mobilenetv2.ms model output just one branch.
            auto outputString = iter->first;
            auto outputTensor = iter->second;

            float *temp_scores = static_cast<float * >(branch1_tensor[0]->MutableData());
        
            float scores[RET_CATEGORY_SUM];
            for (int i = 0; i < RET_CATEGORY_SUM; ++i) {
                if (temp_scores[i] > 0.5){
                    MS_PRINT("MindSpore scores[%d] : [%f]",  i, temp_scores[i]);
                }
                scores[i] = temp_scores[i];
            }
   
            // Converted to text information that needs to be displayed in the APP. 
            std::string categoryScore = "";
                for (int i = 0; i < RET_CATEGORY_SUM; ++i) {
                    categoryScore += g_labels_name_map[i];
                    categoryScore += ":";
                    std::string score_str = std::to_string(scores[i]);
                    categoryScore += score_str;
                    categoryScore += ";";
                }
                return categoryScore;
        }      
        ```
--- a/model_zoo/official/lite/app/.gitignore
+++ b/model_zoo/official/lite/app/.gitignore
@@ -0,0 +1 @@
 /build
--- a/model_zoo/official/lite/app/CMakeLists.txt
+++ b/model_zoo/official/lite/app/CMakeLists.txt
@@ -0,0 +1,94 @@
 # For more information about using CMake with Android Studio, read the
 # documentation: https://d.android.com/studio/projects/add-native-code.html

 # Sets the minimum version of CMake required to build the native library.

 cmake_minimum_required(VERSION 3.4.1)

 set(CMAKE_VERBOSE_MAKEFILE on)
 set(libs  ${CMAKE_SOURCE_DIR}/libs)


 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY  ${CMAKE_SOURCE_DIR}/libs/${ANDROID_ABI})


 # ============== Set MindSpore Dependencies. =============
 include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/include)
 include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/include/MindSpore)
 include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/include/MindSpore/flatbuffers)
 include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/include/MindSpore/ir/dtype)
 include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/include/MindSpore/schema)

 add_library(mindspore-lite SHARED IMPORTED )

 set_target_properties(mindspore-lite PROPERTIES IMPORTED_LOCATION
        ${CMAKE_SOURCE_DIR}/libs/${ANDROID_ABI}/libmindspore-lite.so)
 # --------------- MindSpore Lite set End. --------------------



 # =============== Set OpenCV Dependencies ===================

 include_directories(${CMAKE_SOURCE_DIR}/opencv/sdk/native/jni/include/)

 add_library(lib-opencv SHARED IMPORTED )

 set_target_properties(lib-opencv PROPERTIES IMPORTED_LOCATION
        ${CMAKE_SOURCE_DIR}/libs/${ANDROID_ABI}/libopencv_java4.so)

 # --------------- OpenCV set End. ---------------------------


 # Creates and names a library, sets it as either STATIC
 # or SHARED, and provides the relative paths to its source code.
 # You can define multiple libraries, and CMake builds them for you.
 # Gradle automatically packages shared libraries with your APK.

 file(GLOB_RECURSE cpp_src "src/main/cpp/*.cpp"  "src/main/cpp/*.h")

 add_library( # Sets the name of the library.
        mlkit-label-MS

        # Sets the library as a shared library.
        SHARED

        # Provides a relative path to your source file(s).
        ${cpp_src})


 # Searches for a specified prebuilt library and stores the path as a
 # variable. Because CMake includes system libraries in the search path by
 # default, you only need to specify the name of the public NDK library
 # you want to add. CMake verifies that the library exists before
 # completing its build.

 find_library( # Sets the name of the path variable.
        log-lib

        # Specifies the name of the NDK library that
        # you want CMake to locate.
        log )


 find_library( jnigraphics-lib jnig·raphics )

 # Specifies libraries CMake should link to your target library. You
 # can link multiple libraries, such as libraries you define in this
 # build script, prebuilt third-party libraries, or system libraries.
 add_definitions(-DMNN_USE_LOGCAT)
 target_link_libraries( # Specifies the target library.
        mlkit-label-MS

        # --- opencv ---
        lib-opencv

        # --- mindspore ---
        mindspore-lite

        # --- other dependencies.---
        -ljnigraphics
        android

        # Links the target library to the log library
        ${log-lib}
        )
--- a/model_zoo/official/lite/app/build.gradle
+++ b/model_zoo/official/lite/app/build.gradle
@@ -0,0 +1,84 @@
 apply plugin: 'com.android.application'

 android {
    compileSdkVersion 30
    buildToolsVersion "30.0.1"

    defaultConfig {
        applicationId "com.huawei.himindsporedemo"
        minSdkVersion 21
        targetSdkVersion 30
        versionCode 1
        versionName "1.0"

        testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
        externalNativeBuild {
            cmake {
                arguments "-DANDROID_STL=c++_shared"
                cppFlags ""
            }
        }
        ndk {
            abiFilters 'arm64-v8a'
        }
    }
    aaptOptions {
        noCompress '.so', 'ms'
    }

    buildTypes {
        release {
            minifyEnabled false
            proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
        }

        customDebugType {
            debuggable true
        }
    }
    externalNativeBuild {
        cmake {
            path file('CMakeLists.txt')
        }
    }
    ndkVersion '21.3.6528147'

    sourceSets{
        main {
            jniLibs.srcDirs = ['libs']
        }
    }
    packagingOptions{
        pickFirst 'lib/arm64-v8a/libopencv_java4.so'
        pickFirst 'lib/arm64-v8a/libmindspore-lite.so'
        pickFirst 'lib/arm64-v8a/libmlkit-label-MS.so'
    }

 }



 // Before gradle build.
 // To download some necessary libraries.
 apply from:'download.gradle'
 /*if (!file("libs/arm64-v8a/libmindspore-lite.so").exists() ||
        !file("libs/arm64-v8a/libopencv_java4.so").exists()){
    apply from:'download.gradle'
 }*/



 dependencies {
    implementation fileTree(dir: "libs", include: ["*.jar"])
    implementation 'androidx.appcompat:appcompat:1.1.0'
    implementation 'androidx.constraintlayout:constraintlayout:1.1.3'
 //    implementation project(path: ':sdk')

    testImplementation 'junit:junit:4.12'
    androidTestImplementation 'androidx.test.ext:junit:1.1.1'
    androidTestImplementation 'androidx.test.espresso:espresso-core:3.2.0'

    implementation 'com.google.android.material:material:1.0.0'
    androidTestImplementation 'com.android.support.test:rules:1.0.2'
    androidTestImplementation 'com.google.truth:truth:1.0.1'
 }
--- a/model_zoo/official/lite/app/download.gradle
+++ b/model_zoo/official/lite/app/download.gradle
@@ -0,0 +1,73 @@
 /**
 * To download necessary library from HuaWei server.
 * Including mindspore-lite .so file, opencv .so file and model file.
 * The libraries can be downloaded manually.
 */


 def targetModelFile = "src/main/assets/model/mobilenetv2.ms"
 def openCVLibrary_arm64 = "libs/arm64-v8a/libopencv_java4.so"
 def mindSporeLibrary_arm64 = "libs/arm64-v8a/libmindspore-lite.so"

 def modelDownloadUrl = "https://download.mindspore.cn/model_zoo/official/lite/mobilenetv2_openimage_lite/mobilenetv2.ms"
 def opencvDownloadUrl = "https://download.mindspore.cn/model_zoo/official/lite/lib/opencv%204.4.0/libopencv_java4.so"
 def mindsporeLiteDownloadUrl = "https://download.mindspore.cn/model_zoo/official/lite/lib/mindspore%20version%200.7/libmindspore-lite.so"


 task downloadModelFile(type: DownloadUrlTask) {
    doFirst {
        println "Downloading ${modelDownloadUrl}"
    }
    sourceUrl = "${modelDownloadUrl}"
    target = file("${targetModelFile}")
 }


 task downloadOpenCVLibrary(type: DownloadUrlTask) {
    doFirst {
        println "Downloading ${opencvDownloadUrl}"
    }
    sourceUrl = "${opencvDownloadUrl}"
    target = file("${openCVLibrary_arm64}")
 }

 task downloadMindSporeLibrary(type: DownloadUrlTask) {
    doFirst {
        println "Downloading ${mindsporeLiteDownloadUrl}"
    }
    sourceUrl = "${mindsporeLiteDownloadUrl}"
    target = file("${mindSporeLibrary_arm64}")
 }

 /*
 * Using preBuild to download mindspore library, opencv library and model file.
 * Run before gradle build.
 */
 if (file("libs/arm64-v8a/libmindspore-lite.so").exists()){
    downloadMindSporeLibrary.enabled = false
 }

 if (file("libs/arm64-v8a/libopencv_java4.so.so").exists()){
    downloadOpenCVLibrary.enabled = false
 }
 if (file("src/main/assets/model/mobilenetv2.ms").exists()){
    downloadModelFile.enabled = false
 }

 preBuild.dependsOn downloadMindSporeLibrary
 preBuild.dependsOn downloadOpenCVLibrary
 preBuild.dependsOn downloadModelFile


 class DownloadUrlTask extends DefaultTask {
    @Input
    String sourceUrl

    @OutputFile
    File target

    @TaskAction
    void download() {
        ant.get(src: sourceUrl, dest: target)
    }
 }
--- a/model_zoo/official/lite/app/proguard-rules.pro
+++ b/model_zoo/official/lite/app/proguard-rules.pro
@@ -0,0 +1,21 @@
 # Add project specific ProGuard rules here.
 # You can control the set of applied configuration files using the
 # proguardFiles setting in build.gradle.
 #
 # For more details, see
 #   http://developer.android.com/guide/developing/tools/proguard.html

 # If your project uses WebView with JS, uncomment the following
 # and specify the fully qualified class name to the JavaScript interface
 # class:
 #-keepclassmembers class fqcn.of.javascript.interface.for.webview {
 #   public *;
 #}

 # Uncomment this to preserve the line number information for
 # debugging stack traces.
 #-keepattributes SourceFile,LineNumberTable

 # If you keep the line number information, uncomment this to
 # hide the original source file name.
 #-renamesourcefileattribute SourceFile
--- a/model_zoo/official/lite/app/src/androidTest/java/com/huawei/himindsporedemo/ExampleInstrumentedTest.java
+++ b/model_zoo/official/lite/app/src/androidTest/java/com/huawei/himindsporedemo/ExampleInstrumentedTest.java
@@ -0,0 +1,26 @@
 package com.huawei.himindsporedemo;

 import android.content.Context;

 import androidx.test.platform.app.InstrumentationRegistry;
 import androidx.test.ext.junit.runners.AndroidJUnit4;

 import org.junit.Test;
 import org.junit.runner.RunWith;

 import static org.junit.Assert.*;

 /**
 * Instrumented test, which will execute on an Android device.
 *
 * @see <a href="http://d.android.com/tools/testing">Testing documentation</a>
 */
@RunWith(AndroidJUnit4.class)
 public class ExampleInstrumentedTest {
    @Test
    public void useAppContext() {
        // Context of the app under test.
        Context appContext = InstrumentationRegistry.getInstrumentation().getTargetContext();
        assertEquals("com.huawei.himindsporedemo", appContext.getPackageName());
    }
 }
--- a/model_zoo/official/lite/app/src/main/AndroidManifest.xml
+++ b/model_zoo/official/lite/app/src/main/AndroidManifest.xml
@@ -0,0 +1,32 @@
 <?xml version="1.0" encoding="utf-8"?>
 <manifest xmlns:android="http://schemas.android.com/apk/res/android"
    package="com.huawei.himindsporedemo"
    android:versionCode="1"
    android:versionName="1.0">

    <uses-permission android:name="android.permission.CAMERA" />
    <uses-permission android:name="android.permission.WRITE_INTERNAL_STORAGE" />
    <uses-permission android:name="android.permission.READ_INTERNAL_STORAGE" />
    <uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE" />
    <uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" />
    <uses-permission android:name="android.permission.MOUNT_UNMOUNT_FILESYSTEM" />
    <uses-permission android:name="android.permission.READ_PHONE_STATE" />

    <application
        android:allowBackup="true"
        android:icon="@mipmap/ic_launcher"
        android:label="@string/app_name"
        android:roundIcon="@mipmap/ic_launcher_round"
        android:supportsRtl="true"
        android:theme="@style/AppTheme">
        <activity
            android:name=".widget.CameraActivity"
            android:screenOrientation="portrait">
            <intent-filter>
                <action android:name="android.intent.action.MAIN" />
                <category android:name="android.intent.category.LAUNCHER" />
            </intent-filter>
        </activity>
    </application>

 </manifest>
--- a/model_zoo/official/lite/app/src/main/assets/model/mobilenetv2.ms
+++ b/model_zoo/official/lite/app/src/main/assets/model/mobilenetv2.ms
--- a/model_zoo/official/lite/app/src/main/cpp/MindSporeNetnative.cpp
+++ b/model_zoo/official/lite/app/src/main/cpp/MindSporeNetnative.cpp
@@ -0,0 +1,296 @@
 /*
 * Copyright (c) Huawei Technologies Co., Ltd. 2018-2019. All rights reserved.
 */
 #include <android/bitmap.h>
 #include <android/asset_manager_jni.h>
 #include <android/log.h>
 #include <jni.h>
 #include <cstring>
 #include <set>

 #include <MindSpore/errorcode.h>
 #include <MindSpore/ms_tensor.h>
 #include "MindSporeNetnative.h"
 #include "opencv2/core.hpp"
 #include "opencv2/imgproc.hpp"
 #include "MindSpore/MSNetWork.h"
 #include "HMS/HMS_label_thres.h"

 using namespace cv;
 using namespace mindspore;
 using namespace mindspore::tensor;

 #define MS_PRINT(format, ...) __android_log_print(ANDROID_LOG_INFO, "MSJNI", format, ##__VA_ARGS__)


 void BitmapToMat2(JNIEnv *env, jobject &bitmap, Mat &mat, jboolean needUnPremultiplyAlpha) {
    AndroidBitmapInfo info;
    void *pixels = nullptr;
    Mat &dst = mat;
    CV_Assert(AndroidBitmap_getInfo(env, bitmap, &info) >= 0);
    CV_Assert(info.format == ANDROID_BITMAP_FORMAT_RGBA_8888 ||
              info.format == ANDROID_BITMAP_FORMAT_RGB_565);
    CV_Assert(AndroidBitmap_lockPixels(env, bitmap, &pixels) >= 0);
    CV_Assert(pixels);

    dst.create(info.height, info.width, CV_8UC4);
    if (info.format == ANDROID_BITMAP_FORMAT_RGBA_8888) {
        Mat tmp(info.height, info.width, CV_8UC4, pixels);
        if (needUnPremultiplyAlpha) {
            cvtColor(tmp, dst, COLOR_RGBA2BGR);
        } else {

            tmp.copyTo(dst);
        }
    } else {
        Mat tmp(info.height, info.width, CV_8UC4, pixels);
        cvtColor(tmp, dst, COLOR_BGR5652RGBA);
    }
    AndroidBitmap_unlockPixels(env, bitmap);
    return;
 }

 void BitmapToMat(JNIEnv *env, jobject &bitmap, Mat &mat) {
    BitmapToMat2(env, bitmap, mat, true);
 }

 /**
 * Processing image with resize and normalize.
 */
 cv::Mat PreProcessImageData(cv::Mat input) {
    cv::Mat imgFloatTmp, imgResized256, imgResized224;
    int resizeWidth = 256;
    int resizeHeight = 256;
    float normalizMin = 1.0;
    float normalizMax = 255.0;

    cv::resize(input, imgFloatTmp, cv::Size(resizeWidth, resizeHeight));


    imgFloatTmp.convertTo(imgResized256, CV_32FC3, normalizMin / normalizMax);

    int offsetX = 16;
    int offsetY = 16;
    int cropWidth = 224;
    int cropHeight = 224;

    // Standardization processing.
    float meanR = 0.485;
    float meanG = 0.456;
    float meanB = 0.406;
    float varR = 0.229;
    float varG = 0.224;
    float varB = 0.225;

    cv::Rect roi;
    roi.x = offsetX;
    roi.y = offsetY;
    roi.width = cropWidth;
    roi.height = cropHeight;

    // The final image size of the incoming model is 224*224.
    imgResized256(roi).copyTo(imgResized224);

    Scalar mean = Scalar(meanR, meanG, meanB);
    Scalar var = Scalar(varR, varG, varB);
    cv::Mat imgResized1;
    cv::Mat imgResized2;
    Mat imgMean(imgResized224.size(), CV_32FC3,
                mean);  // imgMean Each pixel channel is (0.485, 0.456, 0.406)
    Mat imgVar(imgResized224.size(), CV_32FC3,
               var); // imgVar Each pixel channel is (0.229, 0.224, 0.225)
    imgResized1 = imgResized224 - imgMean;
    imgResized2 = imgResized1 / imgVar;
    return imgResized2;
 }

 char *CreateLocalModelBuffer(JNIEnv *env, jobject modelBuffer) {
    jbyte *modelAddr = static_cast<jbyte *>(env->GetDirectBufferAddress(modelBuffer));
    int modelLen = static_cast<int>(env->GetDirectBufferCapacity(modelBuffer));
    char *buffer(new char[modelLen]);
    memcpy(buffer, modelAddr, modelLen);
    return buffer;
 }

 /**
 * To process the result of mindspore inference.
 * @param msOutputs
 * @return
 */
 std::string ProcessRunnetResult(
        std::unordered_map<std::string, std::vector<mindspore::tensor::MSTensor *>> msOutputs) {

    // Get the branch of the model output.
    // Use iterators to get map elements.
    std::unordered_map<std::string, std::vector<mindspore::tensor::MSTensor *>>::iterator iter;
    iter = msOutputs.begin();

    // The mobilenetv2.ms model output just one branch.
    auto outputString = iter->first;
    auto outputTensor = iter->second;

    int tensorNum = outputTensor[0]->ElementsNum();
    MS_PRINT("Number of tensor elements:%d", tensorNum);

    // Get a pointer to the first score.
    float *temp_scores = static_cast<float * >(outputTensor[0]->MutableData());

    float scores[RET_CATEGORY_SUM];
    for (int i = 0; i < RET_CATEGORY_SUM; ++i) {
        if (temp_scores[i] > 0.5) {
            MS_PRINT("MindSpore scores[%d] : [%f]", i, temp_scores[i]);
        }
        scores[i] = temp_scores[i];
    }

    // Score for each category.
    // Converted to text information that needs to be displayed in the APP.
    std::string categoryScore = "";
    for (int i = 0; i < RET_CATEGORY_SUM; ++i) {
        categoryScore += g_labels_name_map[i];
        categoryScore += ":";
        std::string score_str = std::to_string(scores[i]);
        categoryScore += score_str;
        categoryScore += ";";
    }
    return categoryScore;
 }


 /**
 * The Java layer reads the model into MappedByteBuffer or ByteBuffer to load the model.
 */
 extern "C"
 JNIEXPORT jlong JNICALL
 Java_com_huawei_himindsporedemo_gallery_classify_TrackingMobile_loadModel(JNIEnv *env, jobject thiz,
                                                                          jobject model_buffer,
                                                                          jint num_thread) {
    // TODO: implement loadModel()
    if (nullptr == model_buffer) {
        MS_PRINT("error, buffer is nullptr!");
        return (jlong) nullptr;
    }
    jlong bufferLen = env->GetDirectBufferCapacity(model_buffer);
    if (0 == bufferLen) {
        MS_PRINT("error, bufferLen is 0!");
        return (jlong) nullptr;
    }

    char *modelBuffer = CreateLocalModelBuffer(env, model_buffer);
    if (modelBuffer == nullptr) {
        MS_PRINT("modelBuffer create failed!");
        return (jlong) nullptr;
    }

    // To create a mindspore network inference environment.
    void **labelEnv = new void *;
    MSNetWork *labelNet = new MSNetWork;
    *labelEnv = labelNet;

    lite::Context *context = new lite::Context;
    context->thread_num_ = num_thread;

    labelNet->CreateSessionMS(modelBuffer, bufferLen, context);
    delete (context);

    if (labelNet->session == nullptr) {
        MS_PRINT("MindSpore create session failed!.");
        return (jlong) nullptr;
    }

    if (model_buffer != nullptr) {
        env->DeleteLocalRef(model_buffer);
    }

    return (jlong) labelEnv;
 }

 /**
 * After the inference environment is successfully created,
 * sending a picture to the model and run inference.
 */
 extern "C" JNIEXPORT jstring JNICALL
 Java_com_huawei_himindsporedemo_gallery_classify_TrackingMobile_runNet(JNIEnv *env, jclass type,
                                                                       jlong netEnv,
                                                                       jobject srcBitmap) {

    Mat matImageSrc;
    BitmapToMat(env, srcBitmap, matImageSrc);
    Mat matImgPreprocessed = PreProcessImageData(matImageSrc);


    ImgDims inputDims;
    inputDims.channel = matImgPreprocessed.channels();
    inputDims.width = matImgPreprocessed.cols;
    inputDims.height = matImgPreprocessed.rows;

    // Get the mindsore inference environment which created in loadModel().
    void **labelEnv = reinterpret_cast<void **>(netEnv);
    if (labelEnv == nullptr) {
        MS_PRINT("MindSpore error, labelEnv is a nullptr.");
        return NULL;
    }
    MSNetWork *labelNet = static_cast<MSNetWork *>(*labelEnv);

    auto mSession = labelNet->session;
    if (mSession == nullptr) {
        MS_PRINT("MindSpore error, Session is a nullptr.");
        return NULL;
    }
    MS_PRINT("MindSpore get session.");

    auto msInputs = mSession->GetInputs();
    if (msInputs.size() == 0) {
        MS_PRINT("MindSpore error, msInputs.size() equals 0.");
        return NULL;
    }
    auto inTensor = msInputs.front();

    // dataHWC is the tensor format.
    float *dataHWC = new float[inputDims.channel * inputDims.width * inputDims.height];
    float *ptrTmp = reinterpret_cast<float *>(matImgPreprocessed.data);
    for (int i = 0; i < inputDims.channel * inputDims.width * inputDims.height; ++i) {
        dataHWC[i] = ptrTmp[i];
    }

    // Copy dataHWC to the model input tensor.
    memcpy(inTensor->MutableData(), dataHWC,
           inputDims.channel * inputDims.width * inputDims.height * sizeof(float));
    // When using 'new' to allocate memory space, we need to use 'delete' to free space.
    delete[] (dataHWC);

    // After the model and image tensor data is loaded, run inference.
    auto status = mSession->RunGraph();

    if (status != lite::RET_OK) {
        MS_PRINT("MindSpore run net error.");
        return NULL;
    }

    /**
     * Get the mindspore inference results.
     * Return the map of output node name and MindSpore Lite MSTensor.
     */
    auto msOutputs = mSession->GetOutputMapByNode();

    std::string resultStr = ProcessRunnetResult(msOutputs);

    const char *resultCharData = resultStr.c_str();
    return (env)->NewStringUTF(resultCharData);
 }

 extern "C" JNIEXPORT jboolean JNICALL
 Java_com_huawei_himindsporedemo_gallery_classify_TrackingMobile_unloadModel(JNIEnv *env,
                                                                            jclass type,
                                                                            jlong netEnv) {
    MS_PRINT("MindSpore release net.");
    void **labelEnv = reinterpret_cast<void **>(netEnv);
    if (labelEnv == nullptr) {
        MS_PRINT("MindSpore error, labelEnv is a nullptr.");
    }
    MSNetWork *labelNet = static_cast<MSNetWork *>(*labelEnv);

    labelNet->ReleaseNets();

    return (jboolean) true;
 }
--- a/model_zoo/official/lite/app/src/main/cpp/MindSporeNetnative.h
+++ b/model_zoo/official/lite/app/src/main/cpp/MindSporeNetnative.h
@@ -0,0 +1,8 @@
 /*
 * Copyright (c) Huawei Technologies Co., Ltd. 2018-2019. All rights reserved.
 */

 #ifndef MINDSPORE_JNI_HMS_DEBUG_MINDSPORENETNATIVE_H
 #define MINDSPORE_JNI_HMS_DEBUG_MINDSPORENETNATIVE_H

 #endif //MINDSPORE_JNI_HMS_DEBUG_MINDSPORENETNATIVE_H
--- a/model_zoo/official/lite/app/src/main/cpp/include/HMS/HMS_label_thres.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/HMS/HMS_label_thres.h
@@ -0,0 +1,616 @@
 /*
 * Copyright (c) Huawei Technologies Co., Ltd. 2018-2019. All rights reserved.
 */

 #ifndef MNN_JNI_HMS_HMS_LABEL_THRES_H
 #define MNN_JNI_HMS_HMS_LABEL_THRES_H

 #include <string.h>
 #include <map>

 constexpr int RET_CATEGORY_SUM = 601;
 static std::string g_labels_name_map[RET_CATEGORY_SUM] = {
    {"Tortoise"},
    {"Container"},
    {"Magpie"},
    {"Seaturtle"},
    {"Football"},
    {"Ambulance"},
    {"Ladder"},
    {"Toothbrush"},
    {"Syringe"},
    {"Sink"},
    {"Toy"},
    {"Organ(MusicalInstrument) "},
    {"Cassettedeck"},
    {"Apple"},
    {"Humaneye"},
    {"Cosmetics"},
    {"Paddle"},
    {"Snowman"},
    {"Beer"},
    {"Chopsticks"},
    {"Humanbeard"},
    {"Bird"},
    {"Parkingmeter"},
    {"Trafficlight"},
    {"Croissant"},
    {"Cucumber"},
    {"Radish"},
    {"Towel"},
    {"Doll"},
    {"Skull"},
    {"Washingmachine"},
    {"Glove"},
    {"Tick"},
    {"Belt"},
    {"Sunglasses"},
    {"Banjo"},
    {"Cart"},
    {"Ball"},
    {"Backpack"},
    {"Bicycle"},
    {"Homeappliance"},
    {"Centipede"},
    {"Boat"},
    {"Surfboard"},
    {"Boot"},
    {"Headphones"},
    {"Hotdog"},
    {"Shorts"},
    {"Fastfood"},
    {"Bus"},
    {"Boy "},
    {"Screwdriver"},
    {"Bicyclewheel"},
    {"Barge"},
    {"Laptop"},
    {"Miniskirt"},
    {"Drill(Tool)"},
    {"Dress"},
    {"Bear"},
    {"Waffle"},
    {"Pancake"},
    {"Brownbear"},
    {"Woodpecker"},
    {"Bluejay"},
    {"Pretzel"},
    {"Bagel"},
    {"Tower"},
    {"Teapot"},
    {"Person"},
    {"Bowandarrow"},
    {"Swimwear"},
    {"Beehive"},
    {"Brassiere"},
    {"Bee"},
    {"Bat(Animal)"},
    {"Starfish"},
    {"Popcorn"},
    {"Burrito"},
    {"Chainsaw"},
    {"Balloon"},
    {"Wrench"},
    {"Tent"},
    {"Vehicleregistrationplate"},
    {"Lantern"},
    {"Toaster"},
    {"Flashlight"},
    {"Billboard"},
    {"Tiara"},
    {"Limousine"},
    {"Necklace"},
    {"Carnivore"},
    {"Scissors"},
    {"Stairs"},
    {"Computerkeyboard"},
    {"Printer"},
    {"Trafficsign"},
    {"Chair"},
    {"Shirt"},
    {"Poster"},
    {"Cheese"},
    {"Sock"},
    {"Firehydrant"},
    {"Landvehicle"},
    {"Earrings"},
    {"Tie"},
    {"Watercraft"},
    {"Cabinetry"},
    {"Suitcase"},
    {"Muffin"},
    {"Bidet"},
    {"Snack"},
    {"Snowmobile"},
    {"Clock"},
    {"Medicalequipment"},
    {"Cattle"},
    {"Cello"},
    {"Jetski"},
    {"Camel"},
    {"Coat"},
    {"Suit"},
    {"Desk"},
    {"Cat"},
    {"Bronzesculpture"},
    {"Juice"},
    {"Gondola"},
    {"Beetle"},
    {"Cannon"},
    {"Computermouse"},
    {"Cookie"},
    {"Officebuilding"},
    {"Fountain"},
    {"Coin"},
    {"Calculator"},
    {"Cocktail"},
    {"Computermonitor"},
    {"Box"},
    {"Stapler"},
    {"Christmastree"},
    {"Cowboyhat"},
    {"Hikingequipment"},
    {"Studiocouch"},
    {"Drum"},
    {"Dessert"},
    {"Winerack"},
    {"Drink"},
    {"Zucchini"},
    {"Ladle"},
    {"Humanmouth"},
    {"DairyProduct"},
    {"Dice"},
    {"Oven"},
    {"Dinosaur"},
    {"Ratchet(Device)"},
    {"Couch"},
    {"Cricketball"},
    {"Wintermelon"},
    {"Spatula"},
    {"Whiteboard"},
    {"Pencilsharpener"},
    {"Door"},
    {"Hat"},
    {"Shower"},
    {"Eraser"},
    {"Fedora"},
    {"Guacamole"},
    {"Dagger"},
    {"Scarf"},
    {"Dolphin"},
    {"Sombrero"},
    {"Tincan"},
    {"Mug"},
    {"Tap"},
    {"Harborseal"},
    {"Stretcher"},
    {"Canopener"},
    {"Goggles"},
    {"Humanbody"},
    {"Rollerskates"},
    {"Coffeecup"},
    {"Cuttingboard"},
    {"Blender"},
    {"Plumbingfixture"},
    {"Stopsign"},
    {"Officesupplies"},
    {"Volleyball(Ball)"},
    {"Vase"},
    {"Slowcooker"},
    {"Wardrobe"},
    {"Coffee"},
    {"Whisk"},
    {"Papertowel"},
    {"Personalcare"},
    {"Food"},
    {"Sunhat"},
    {"Treehouse"},
    {"Flyingdisc"},
    {"Skirt"},
    {"Gasstove"},
    {"Saltandpeppershakers"},
    {"Mechanicalfan"},
    {"Facepowder"},
    {"Fax"},
    {"Fruit"},
    {"Frenchfries"},
    {"Nightstand"},
    {"Barrel"},
    {"Kite"},
    {"Tart"},
    {"Treadmill"},
    {"Fox"},
    {"Flag"},
    {"Frenchhorn"},
    {"Windowblind"},
    {"Humanfoot"},
    {"Golfcart"},
    {"Jacket"},
    {"Egg(Food)"},
    {"Streetlight"},
    {"Guitar"},
    {"Pillow"},
    {"Humanleg"},
    {"Isopod"},
    {"Grape"},
    {"Humanear"},
    {"Powerplugsandsockets"},
    {"Panda"},
    {"Giraffe"},
    {"Woman"},
    {"Doorhandle"},
    {"Rhinoceros"},
    {"Bathtub"},
    {"Goldfish"},
    {"Houseplant"},
    {"Goat"},
    {"Baseballbat"},
    {"Baseballglove"},
    {"Mixingbowl"},
    {"Marineinvertebrates"},
    {"Kitchenutensil"},
    {"Lightswitch"},
    {"House"},
    {"Horse"},
    {"Stationarybicycle"},
    {"Hammer"},
    {"Ceilingfan"},
    {"Sofabed"},
    {"Adhesivetape "},
    {"Harp"},
    {"Sandal"},
    {"Bicyclehelmet"},
    {"Saucer"},
    {"Harpsichord"},
    {"Humanhair"},
    {"Heater"},
    {"Harmonica"},
    {"Hamster"},
    {"Curtain"},
    {"Bed"},
    {"Kettle"},
    {"Fireplace"},
    {"Scale"},
    {"Drinkingstraw"},
    {"Insect"},
    {"Hairdryer"},
    {"Kitchenware"},
    {"Indoorrower"},
    {"Invertebrate"},
    {"Foodprocessor"},
    {"Bookcase"},
    {"Refrigerator"},
    {"Wood-burningstove"},
    {"Punchingbag"},
    {"Commonfig"},
    {"Cocktailshaker"},
    {"Jaguar(Animal)"},
    {"Golfball"},
    {"Fashionaccessory"},
    {"Alarmclock"},
    {"Filingcabinet"},
    {"Artichoke"},
    {"Table"},
    {"Tableware"},
    {"Kangaroo"},
    {"Koala"},
    {"Knife"},
    {"Bottle"},
    {"Bottleopener"},
    {"Lynx"},
    {"Lavender(Plant)"},
    {"Lighthouse"},
    {"Dumbbell"},
    {"Humanhead"},
    {"Bowl"},
    {"Humidifier"},
    {"Porch"},
    {"Lizard"},
    {"Billiardtable"},
    {"Mammal"},
    {"Mouse"},
    {"Motorcycle"},
    {"Musicalinstrument"},
    {"Swimcap"},
    {"Fryingpan"},
    {"Snowplow"},
    {"Bathroomcabinet"},
    {"Missile"},
    {"Bust"},
    {"Man"},
    {"Waffleiron"},
    {"Milk"},
    {"Ringbinder"},
    {"Plate"},
    {"Mobilephone"},
    {"Bakedgoods"},
    {"Mushroom"},
    {"Crutch"},
    {"Pitcher(Container)"},
    {"Mirror"},
    {"Personalflotationdevice"},
    {"Tabletennisracket"},
    {"Pencilcase"},
    {"Musicalkeyboard"},
    {"Scoreboard"},
    {"Briefcase"},
    {"Kitchenknife"},
    {"Nail(Construction)"},
    {"Tennisball"},
    {"Plasticbag"},
    {"Oboe"},
    {"Chestofdrawers"},
    {"Ostrich"},
    {"Piano"},
    {"Girl"},
    {"Plant"},
    {"Potato"},
    {"Hairspray"},
    {"Sportsequipment"},
    {"Pasta"},
    {"Penguin"},
    {"Pumpkin"},
    {"Pear"},
    {"Infantbed"},
    {"Polarbear"},
    {"Mixer"},
    {"Cupboard"},
    {"Jacuzzi"},
    {"Pizza"},
    {"Digitalclock"},
    {"Pig"},
    {"Reptile"},
    {"Rifle"},
    {"Lipstick"},
    {"Skateboard"},
    {"Raven"},
    {"Highheels"},
    {"Redpanda"},
    {"Rose"},
    {"Rabbit"},
    {"Sculpture"},
    {"Saxophone"},
    {"Shotgun"},
    {"Seafood"},
    {"Submarinesandwich"},
    {"Snowboard"},
    {"Sword"},
    {"Pictureframe"},
    {"Sushi"},
    {"Loveseat"},
    {"Ski"},
    {"Squirrel"},
    {"Tripod"},
    {"Stethoscope"},
    {"Submarine"},
    {"Scorpion"},
    {"Segway"},
    {"Trainingbench"},
    {"Snake"},
    {"Coffeetable"},
    {"Skyscraper"},
    {"Sheep"},
    {"Television"},
    {"Trombone"},
    {"Tea"},
    {"Tank"},
    {"Taco"},
    {"Telephone"},
    {"Torch"},
    {"Tiger"},
    {"Strawberry"},
    {"Trumpet"},
    {"Tree"},
    {"Tomato"},
    {"Train"},
    {"Tool"},
    {"Picnicbasket"},
    {"Cookingspray"},
    {"Trousers"},
    {"Bowlingequipment"},
    {"Footballhelmet"},
    {"Truck"},
    {"Measuringcup"},
    {"Coffeemaker"},
    {"Violin"},
    {"Vehicle"},
    {"Handbag"},
    {"Papercutter"},
    {"Wine"},
    {"Weapon"},
    {"Wheel"},
    {"Worm"},
    {"Wok"},
    {"Whale"},
    {"Zebra"},
    {"Autopart"},
    {"Jug"},
    {"Pizzacutter"},
    {"Cream"},
    {"Monkey"},
    {"Lion"},
    {"Bread"},
    {"Platter"},
    {"Chicken"},
    {"Eagle"},
    {"Helicopter"},
    {"Owl"},
    {"Duck"},
    {"Turtle"},
    {"Hippopotamus"},
    {"Crocodile"},
    {"Toilet"},
    {"Toiletpaper"},
    {"Squid"},
    {"Clothing"},
    {"Footwear"},
    {"Lemon"},
    {"Spider"},
    {"Deer"},
    {"Frog"},
    {"Banana"},
    {"Rocket"},
    {"Wineglass"},
    {"Countertop"},
    {"Tabletcomputer"},
    {"Wastecontainer"},
    {"Swimmingpool"},
    {"Dog"},
    {"Book"},
    {"Elephant"},
    {"Shark"},
    {"Candle"},
    {"Leopard"},
    {"Axe"},
    {"Handdryer"},
    {"Soapdispenser"},
    {"Porcupine"},
    {"Flower"},
    {"Canary"},
    {"Cheetah"},
    {"Palmtree"},
    {"Hamburger"},
    {"Maple"},
    {"Building"},
    {"Fish"},
    {"Lobster"},
    {"GardenAsparagus"},
    {"Furniture"},
    {"Hedgehog"},
    {"Airplane"},
    {"Spoon"},
    {"Otter"},
    {"Bull"},
    {"Oyster"},
    {"Horizontalbar"},
    {"Conveniencestore"},
    {"Bomb"},
    {"Bench"},
    {"Icecream"},
    {"Caterpillar"},
    {"Butterfly"},
    {"Parachute"},
    {"Orange"},
    {"Antelope"},
    {"Beaker"},
    {"Mothsandbutterflies"},
    {"Window"},
    {"Closet"},
    {"Castle"},
    {"Jellyfish"},
    {"Goose"},
    {"Mule"},
    {"Swan"},
    {"Peach"},
    {"Coconut"},
    {"Seatbelt"},
    {"Raccoon"},
    {"Chisel"},
    {"Fork"},
    {"Lamp"},
    {"Camera"},
    {"Squash(Plant)"},
    {"Racket"},
    {"Humanface"},
    {"Humanarm"},
    {"Vegetable"},
    {"Diaper"},
    {"Unicycle"},
    {"Falcon"},
    {"Chime"},
    {"Snail"},
    {"Shellfish"},
    {"Cabbage"},
    {"Carrot"},
    {"Mango"},
    {"Jeans"},
    {"Flowerpot"},
    {"Pineapple"},
    {"Drawer"},
    {"Stool"},
    {"Envelope"},
    {"Cake"},
    {"Dragonfly"},
    {"Commonsunflower"},
    {"Microwaveoven"},
    {"Honeycomb"},
    {"Marinemammal"},
    {"Sealion"},
    {"Ladybug"},
    {"Shelf"},
    {"Watch"},
    {"Candy"},
    {"Salad"},
    {"Parrot"},
    {"Handgun"},
    {"Sparrow"},
    {"Van"},
    {"Grinder"},
    {"Spicerack"},
    {"Lightbulb"},
    {"Cordedphone"},
    {"Sportsuniform"},
    {"Tennisracket"},
    {"Wallclock"},
    {"Servingtray"},
    {"Kitchen&diningroomtable"},
    {"Dogbed"},
    {"Cakestand"},
    {"Catfurniture"},
    {"Bathroomaccessory"},
    {"Facialtissueholder"},
    {"Pressurecooker"},
    {"Kitchenappliance"},
    {"Tire"},
    {"Ruler"},
    {"Luggageandbags"},
    {"Microphone"},
    {"Broccoli"},
    {"Umbrella"},
    {"Pastry"},
    {"Grapefruit"},
    {"Band-aid"},
    {"Animal"},
    {"Bellpepper"},
    {"Turkey"},
    {"Lily"},
    {"Pomegranate"},
    {"Doughnut"},
    {"Glasses"},
    {"Humannose"},
    {"Pen"},
    {"Ant"},
    {"Car"},
    {"Aircraft"},
    {"Humanhand"},
    {"Skunk"},
    {"Teddybear"},
    {"Watermelon"},
    {"Cantaloupe"},
    {"Dishwasher"},
    {"Flute"},
    {"Balancebeam"},
    {"Sandwich"},
    {"Shrimp"},
    {"Sewingmachine"},
    {"Binoculars"},
    {"Raysandskates"},
    {"Ipod"},
    {"Accordion"},
    {"Willow"},
    {"Crab"},
    {"Crown"},
    {"Seahorse"},
    {"Perfume"},
    {"Alpaca"},
    {"Taxi"},
    {"Canoe"},
    {"Remotecontrol"},
    {"Wheelchair"},
    {"Rugbyball"},
    {"Armadillo"},
    {"Maracas"},
    {"Helmet"},
 };

 #endif // MNN_JNI_HMS_HMS_LABEL_THRES_H
--- a/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/MSNetWork.cpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/MSNetWork.cpp
@@ -0,0 +1,41 @@
 #include "MSNetWork.h"
 #include <iostream>
 #include <android/log.h>
 #include "errorcode.h"

 #define MS_PRINT(format, ...) __android_log_print(ANDROID_LOG_INFO, "MSJNI", format, ##__VA_ARGS__)

 MSNetWork::MSNetWork(void) : session(nullptr) {}
 MSNetWork::~MSNetWork(void) {}


 void MSNetWork::CreateSessionMS(char* modelBuffer, size_t bufferLen, mindspore::lite::Context* ctx)
 {
    session = mindspore::session::LiteSession::CreateSession(ctx);
    if (session == nullptr){
        MS_PRINT("Create Session failed.");
        return;
    }

    // Compile model.
    auto model = mindspore::lite::Model::Import(modelBuffer, bufferLen);
    if (model == nullptr){
        MS_PRINT("Import model failed.");
        return;
    }

    int ret = session->CompileGraph(model);
    if (ret != mindspore::lite::RET_OK){
        MS_PRINT("CompileGraph failed.");
        return;
    }

 }

 int MSNetWork::ReleaseNets(void)
 {
    delete session;
 //    delete model;
    return 0;
 }

--- a/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/MSNetWork.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/MSNetWork.h
@@ -0,0 +1,50 @@
 // * Copyright (c) Huawei Technologies Co., Ltd. 2018-2019. All rights reserved.

 #ifndef MSNETWORK_H
 #define MSNETWORK_H

 #include <cstdio>
 #include <algorithm>
 #include <fstream>
 #include <functional>
 #include <sstream>
 #include <vector>
 #include <map>
 #include <string>
 #include <memory>
 #include <utility>

 #include <context.h>
 #include <lite_session.h>
 #include <model.h>
 #include <errorcode.h>

 using namespace mindspore;

 struct ImgDims {
    int channel = 0;
    int width = 0;
    int height = 0;
 };

 /*struct SessIterm {
    std::shared_ptr<mindspore::session::LiteSession> sess = nullptr;
 };*/



 class MSNetWork {
 public:
    MSNetWork();
    ~MSNetWork();

    void CreateSessionMS(char* modelBuffer, size_t bufferLen, mindspore::lite::Context* ctx);
    int ReleaseNets(void);
    mindspore::session::LiteSession *session;
    mindspore::lite::Model *model;

 private:
    //std::map<std::string, SessIterm> sess;
 };

 #endif
--- a/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/context.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/context.h
@@ -0,0 +1,71 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef MINDSPORE_LITE_INCLUDE_CONTEXT_H_
 #define MINDSPORE_LITE_INCLUDE_CONTEXT_H_

 #include <string>
 #include <memory>
 #include "ms_tensor.h"
 #include "thread_pool_config.h"

 namespace mindspore {
    namespace lite {
        /// \brief Allocator defined a memory pool for malloc memory and free memory dynamically.
        ///
        /// \note List public class and interface for reference.
        class Allocator;

        /// \brief DeviceType defined for holding user's preferred backend.
        typedef enum {
            DT_CPU, /**< CPU device type */
            DT_GPU, /**< GPU device type */
            DT_NPU  /**< NPU device type, not supported yet */
        } DeviceType;

        /// \brief DeviceContext defined for holding DeviceType.
        typedef struct {
            DeviceType type; /**< device type */
        } DeviceContext;

        /// \brief Context defined for holding environment variables during runtime.
        class MS_API Context {
        public:
            /// \brief Constructor of MindSpore Lite Context using default value for parameters.
            ///
            /// \return Instance of MindSpore Lite Context.
            Context();

            /// \brief Constructor of MindSpore Lite Context using input value for parameters.
            ///
            /// \param[in] thread_num Define the work thread number during the runtime.
            /// \param[in] allocator Define the allocator for malloc.
            /// \param[in] device_ctx Define device information during the runtime.
            Context(int thread_num, std::shared_ptr<Allocator> allocator, DeviceContext device_ctx);

            /// \brief Destructor of MindSpore Lite Context.
            virtual ~Context();

        public:
            bool float16_priority = false; /**< allow priority select float16 kernel */
            DeviceContext device_ctx_{DT_CPU};
            int thread_num_ = 2; /**< thread number config for thread pool */
            std::shared_ptr<Allocator> allocator = nullptr;
            CpuBindMode cpu_bind_mode_ = MID_CPU;
        };
    }
 }  // namespace mindspore::lite
 #endif  // MINDSPORE_LITE_INCLUDE_CONTEXT_H_
--- a/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/errorcode.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/errorcode.h
@@ -0,0 +1,59 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef MINDSPORE_LITE_INCLUDE_ERRORCODE_H_
 #define MINDSPORE_LITE_INCLUDE_ERRORCODE_H_

 namespace mindspore {
 namespace lite {
 /// \brief STATUS defined for holding error code in MindSpore Lite.
 using STATUS = int;

 /* Success */
 constexpr int RET_OK = 0; /**< No error occurs. */

 /* Common error code, range: [-1, -100]*/
 constexpr int RET_ERROR = -1;         /**< Common error code. */
 constexpr int RET_NULL_PTR = -2;      /**< NULL pointer returned.*/
 constexpr int RET_PARAM_INVALID = -3; /**< Invalid parameter.*/
 constexpr int RET_NO_CHANGE = -4;     /**< No change. */
 constexpr int RET_SUCCESS_EXIT = -5;  /**< No error but exit. */
 constexpr int RET_MEMORY_FAILED = -6; /**< Fail to create memory. */

 /* Executor error code, range: [-101,-200] */
 constexpr int RET_OUT_OF_TENSOR_RANGE = -101; /**< Failed to check range. */
 constexpr int RET_INPUT_TENSOR_ERROR = -102;  /**< Failed to check input tensor. */
 constexpr int RET_REENTRANT_ERROR = -103;     /**< Exist executor running. */

 /* Graph error code, range: [-201,-300] */
 constexpr int RET_GRAPH_FILE_ERR = -201; /**< Failed to verify graph file. */

 /* Node error code, range: [-301,-400] */
 constexpr int RET_NOT_FIND_OP = -301;        /**< Failed to find operator. */
 constexpr int RET_INVALID_OP_NAME = -302;    /**< Invalid operator name. */
 constexpr int RET_INVALID_OP_ATTR = -303;    /**< Invalid operator attr. */
 constexpr int RET_OP_EXECUTE_FAILURE = -304; /**< Failed to execution operator. */

 /* Tensor error code, range: [-401,-500] */
 constexpr int RET_FORMAT_ERR = -401; /**< Failed to checking tensor format. */

 /* InferShape error code, range: [-501,-600] */
 constexpr int RET_INFER_ERR = -501; /**< Failed to infer shape. */
 constexpr int RET_INFER_INVALID = -502; /**< Invalid infer shape before runtime. */
 }  // namespace lite
 }  // namespace mindspore

 #endif  // MINDSPORE_LITE_INCLUDE_ERRORCODE_H_
--- a/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/flatbuffers/flatbuffers.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/flatbuffers/flatbuffers.h
--- a/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/flatbuffers/include/base.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/flatbuffers/include/base.h
@@ -0,0 +1,379 @@
 #ifndef FLATBUFFERS_BASE_H_
 #define FLATBUFFERS_BASE_H_

 // clang-format off

 // If activate should be declared and included first.
 #if defined(FLATBUFFERS_MEMORY_LEAK_TRACKING) && \
    defined(_MSC_VER) && defined(_DEBUG)
  // The _CRTDBG_MAP_ALLOC inside <crtdbg.h> will replace
  // calloc/free (etc) to its debug version using #define directives.
  #define _CRTDBG_MAP_ALLOC
  #include <stdlib.h>
  #include <crtdbg.h>
  // Replace operator new by trace-enabled version.
  #define DEBUG_NEW new(_NORMAL_BLOCK, __FILE__, __LINE__)
  #define new DEBUG_NEW
 #endif

 #if !defined(FLATBUFFERS_ASSERT)
 #include <assert.h>
 #define FLATBUFFERS_ASSERT assert
 #elif defined(FLATBUFFERS_ASSERT_INCLUDE)
 // Include file with forward declaration
 #include FLATBUFFERS_ASSERT_INCLUDE
 #endif

 #ifndef ARDUINO
 #include <cstdint>
 #endif

 #include <cstddef>
 #include <cstdlib>
 #include <cstring>

 #if defined(ARDUINO) && !defined(ARDUINOSTL_M_H)
  #include <utility.h>
 #else
  #include <utility>
 #endif

 #include <string>
 #include <type_traits>
 #include <vector>
 #include <set>
 #include <algorithm>
 #include <iterator>
 #include <memory>

 #ifdef _STLPORT_VERSION
  #define FLATBUFFERS_CPP98_STL
 #endif
 #ifndef FLATBUFFERS_CPP98_STL
  #include <functional>
 #endif

 #include "stl_emulation.h"

 // Note the __clang__ check is needed, because clang presents itself
 // as an older GNUC compiler (4.2).
 // Clang 3.3 and later implement all of the ISO C++ 2011 standard.
 // Clang 3.4 and later implement all of the ISO C++ 2014 standard.
 // http://clang.llvm.org/cxx_status.html

 // Note the MSVC value '__cplusplus' may be incorrect:
 // The '__cplusplus' predefined macro in the MSVC stuck at the value 199711L,
 // indicating (erroneously!) that the compiler conformed to the C++98 Standard.
 // This value should be correct starting from MSVC2017-15.7-Preview-3.
 // The '__cplusplus' will be valid only if MSVC2017-15.7-P3 and the `/Zc:__cplusplus` switch is set.
 // Workaround (for details see MSDN):
 // Use the _MSC_VER and _MSVC_LANG definition instead of the __cplusplus  for compatibility.
 // The _MSVC_LANG macro reports the Standard version regardless of the '/Zc:__cplusplus' switch.

 #if defined(__GNUC__) && !defined(__clang__)
  #define FLATBUFFERS_GCC (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
 #else
  #define FLATBUFFERS_GCC 0
 #endif

 #if defined(__clang__)
  #define FLATBUFFERS_CLANG (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__)
 #else
  #define FLATBUFFERS_CLANG 0
 #endif

 /// @cond FLATBUFFERS_INTERNAL
 #if __cplusplus <= 199711L && \
    (!defined(_MSC_VER) || _MSC_VER < 1600) && \
    (!defined(__GNUC__) || \
      (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__ < 40400))
  #error A C++11 compatible compiler with support for the auto typing is \
         required for FlatBuffers.
  #error __cplusplus _MSC_VER __GNUC__  __GNUC_MINOR__  __GNUC_PATCHLEVEL__
 #endif

 #if !defined(__clang__) && \
    defined(__GNUC__) && \
    (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__ < 40600)
  // Backwards compatability for g++ 4.4, and 4.5 which don't have the nullptr
  // and constexpr keywords. Note the __clang__ check is needed, because clang
  // presents itself as an older GNUC compiler.
  #ifndef nullptr_t
    const class nullptr_t {
    public:
      template<class T> inline operator T*() const { return 0; }
    private:
      void operator&() const;
    } nullptr = {};
  #endif
  #ifndef constexpr
    #define constexpr const
  #endif
 #endif

 // The wire format uses a little endian encoding (since that's efficient for
 // the common platforms).
 #if defined(__s390x__)
  #define FLATBUFFERS_LITTLEENDIAN 0
 #endif // __s390x__
 #if !defined(FLATBUFFERS_LITTLEENDIAN)
  #if defined(__GNUC__) || defined(__clang__)
    #if (defined(__BIG_ENDIAN__) || \
         (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))
      #define FLATBUFFERS_LITTLEENDIAN 0
    #else
      #define FLATBUFFERS_LITTLEENDIAN 1
    #endif // __BIG_ENDIAN__
  #elif defined(_MSC_VER)
    #if defined(_M_PPC)
      #define FLATBUFFERS_LITTLEENDIAN 0
    #else
      #define FLATBUFFERS_LITTLEENDIAN 1
    #endif
  #else
    #error Unable to determine endianness, define FLATBUFFERS_LITTLEENDIAN.
  #endif
 #endif // !defined(FLATBUFFERS_LITTLEENDIAN)

 #define FLATBUFFERS_VERSION_MAJOR 1
 #define FLATBUFFERS_VERSION_MINOR 11
 #define FLATBUFFERS_VERSION_REVISION 0
 #define FLATBUFFERS_STRING_EXPAND(X) #X
 #define FLATBUFFERS_STRING(X) FLATBUFFERS_STRING_EXPAND(X)

 #if (!defined(_MSC_VER) || _MSC_VER > 1600) && \
    (!defined(__GNUC__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 407)) || \
    defined(__clang__)
  #define FLATBUFFERS_FINAL_CLASS final
  #define FLATBUFFERS_OVERRIDE override
  #define FLATBUFFERS_VTABLE_UNDERLYING_TYPE : flatbuffers::voffset_t
 #else
  #define FLATBUFFERS_FINAL_CLASS
  #define FLATBUFFERS_OVERRIDE
  #define FLATBUFFERS_VTABLE_UNDERLYING_TYPE
 #endif

 #if (!defined(_MSC_VER) || _MSC_VER >= 1900) && \
    (!defined(__GNUC__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 406)) || \
    (defined(__cpp_constexpr) && __cpp_constexpr >= 200704)
  #define FLATBUFFERS_CONSTEXPR constexpr
 #else
  #define FLATBUFFERS_CONSTEXPR const
 #endif

 #if (defined(__cplusplus) && __cplusplus >= 201402L) || \
    (defined(__cpp_constexpr) && __cpp_constexpr >= 201304)
  #define FLATBUFFERS_CONSTEXPR_CPP14 FLATBUFFERS_CONSTEXPR
 #else
  #define FLATBUFFERS_CONSTEXPR_CPP14
 #endif

 #if (defined(__GXX_EXPERIMENTAL_CXX0X__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 406)) || \
    (defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190023026)) || \
    defined(__clang__)
  #define FLATBUFFERS_NOEXCEPT noexcept
 #else
  #define FLATBUFFERS_NOEXCEPT
 #endif

 // NOTE: the FLATBUFFERS_DELETE_FUNC macro may change the access mode to
 // private, so be sure to put it at the end or reset access mode explicitly.
 #if (!defined(_MSC_VER) || _MSC_FULL_VER >= 180020827) && \
    (!defined(__GNUC__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 404)) || \
    defined(__clang__)
  #define FLATBUFFERS_DELETE_FUNC(func) func = delete;
 #else
  #define FLATBUFFERS_DELETE_FUNC(func) private: func;
 #endif

 #ifndef FLATBUFFERS_HAS_STRING_VIEW
  // Only provide flatbuffers::string_view if __has_include can be used
  // to detect a header that provides an implementation
  #if defined(__has_include)
    // Check for std::string_view (in c++17)
    #if __has_include(<string_view>) && (__cplusplus >= 201606 || _HAS_CXX17)
      #include <string_view>
      namespace flatbuffers {
        typedef std::string_view string_view;
      }
      #define FLATBUFFERS_HAS_STRING_VIEW 1
    // Check for std::experimental::string_view (in c++14, compiler-dependent)
    #elif __has_include(<experimental/string_view>) && (__cplusplus >= 201411)
      #include <experimental/string_view>
      namespace flatbuffers {
        typedef std::experimental::string_view string_view;
      }
      #define FLATBUFFERS_HAS_STRING_VIEW 1
    #endif
  #endif // __has_include
 #endif // !FLATBUFFERS_HAS_STRING_VIEW

 #ifndef FLATBUFFERS_HAS_NEW_STRTOD
  // Modern (C++11) strtod and strtof functions are available for use.
  // 1) nan/inf strings as argument of strtod;
  // 2) hex-float  as argument of  strtod/strtof.
  #if (defined(_MSC_VER) && _MSC_VER >= 1900) || \
      (defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 409)) || \
      (defined(__clang__))
    #define FLATBUFFERS_HAS_NEW_STRTOD 1
  #endif
 #endif // !FLATBUFFERS_HAS_NEW_STRTOD

 #ifndef FLATBUFFERS_LOCALE_INDEPENDENT
  // Enable locale independent functions {strtof_l, strtod_l,strtoll_l, strtoull_l}.
  // They are part of the POSIX-2008 but not part of the C/C++ standard.
  // GCC/Clang have definition (_XOPEN_SOURCE>=700) if POSIX-2008.
  #if ((defined(_MSC_VER) && _MSC_VER >= 1800)            || \
       (defined(_XOPEN_SOURCE) && (_XOPEN_SOURCE>=700)))
    #define FLATBUFFERS_LOCALE_INDEPENDENT 1
  #else
    #define FLATBUFFERS_LOCALE_INDEPENDENT 0
  #endif
 #endif  // !FLATBUFFERS_LOCALE_INDEPENDENT

 // Suppress Undefined Behavior Sanitizer (recoverable only). Usage:
 // - __supress_ubsan__("undefined")
 // - __supress_ubsan__("signed-integer-overflow")
 #if defined(__clang__)
  #define __supress_ubsan__(type) __attribute__((no_sanitize(type)))
 #elif defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 409)
  #define __supress_ubsan__(type) __attribute__((no_sanitize_undefined))
 #else
  #define __supress_ubsan__(type)
 #endif

 // This is constexpr function used for checking compile-time constants.
 // Avoid `#pragma warning(disable: 4127) // C4127: expression is constant`.
 template<typename T> FLATBUFFERS_CONSTEXPR inline bool IsConstTrue(T t) {
  return !!t;
 }

 // Enable C++ attribute [[]] if std:c++17 or higher.
 #if ((__cplusplus >= 201703L) \
    || (defined(_MSVC_LANG) &&  (_MSVC_LANG >= 201703L)))
  // All attributes unknown to an implementation are ignored without causing an error.
  #define FLATBUFFERS_ATTRIBUTE(attr) [[attr]]

  #define FLATBUFFERS_FALLTHROUGH() [[fallthrough]]
 #else
  #define FLATBUFFERS_ATTRIBUTE(attr)

  #if FLATBUFFERS_CLANG >= 30800
    #define FLATBUFFERS_FALLTHROUGH() [[clang::fallthrough]]
  #elif FLATBUFFERS_GCC >= 70300
    #define FLATBUFFERS_FALLTHROUGH() [[gnu::fallthrough]]
  #else
    #define FLATBUFFERS_FALLTHROUGH()
  #endif
 #endif

 /// @endcond

 /// @file
 namespace flatbuffers {

 /// @cond FLATBUFFERS_INTERNAL
 // Our default offset / size type, 32bit on purpose on 64bit systems.
 // Also, using a consistent offset type maintains compatibility of serialized
 // offset values between 32bit and 64bit systems.
 typedef uint32_t uoffset_t;

 // Signed offsets for references that can go in both directions.
 typedef int32_t soffset_t;

 // Offset/index used in v-tables, can be changed to uint8_t in
 // format forks to save a bit of space if desired.
 typedef uint16_t voffset_t;

 typedef uintmax_t largest_scalar_t;

 // In 32bits, this evaluates to 2GB - 1
 #define FLATBUFFERS_MAX_BUFFER_SIZE ((1ULL << (sizeof(soffset_t) * 8 - 1)) - 1)

 // We support aligning the contents of buffers up to this size.
 #define FLATBUFFERS_MAX_ALIGNMENT 16

 #if defined(_MSC_VER)
  #pragma warning(push)
  #pragma warning(disable: 4127) // C4127: conditional expression is constant
 #endif

 template<typename T> T EndianSwap(T t) {
  #if defined(_MSC_VER)
    #define FLATBUFFERS_BYTESWAP16 _byteswap_ushort
    #define FLATBUFFERS_BYTESWAP32 _byteswap_ulong
    #define FLATBUFFERS_BYTESWAP64 _byteswap_uint64
  #else
    #if defined(__GNUC__) && __GNUC__ * 100 + __GNUC_MINOR__ < 408 && !defined(__clang__)
      // __builtin_bswap16 was missing prior to GCC 4.8.
      #define FLATBUFFERS_BYTESWAP16(x) \
        static_cast<uint16_t>(__builtin_bswap32(static_cast<uint32_t>(x) << 16))
    #else
      #define FLATBUFFERS_BYTESWAP16 __builtin_bswap16
    #endif
    #define FLATBUFFERS_BYTESWAP32 __builtin_bswap32
    #define FLATBUFFERS_BYTESWAP64 __builtin_bswap64
  #endif
  if (sizeof(T) == 1) {   // Compile-time if-then's.
    return t;
  } else if (sizeof(T) == 2) {
    union { T t; uint16_t i; } u;
    u.t = t;
    u.i = FLATBUFFERS_BYTESWAP16(u.i);
    return u.t;
  } else if (sizeof(T) == 4) {
    union { T t; uint32_t i; } u;
    u.t = t;
    u.i = FLATBUFFERS_BYTESWAP32(u.i);
    return u.t;
  } else if (sizeof(T) == 8) {
    union { T t; uint64_t i; } u;
    u.t = t;
    u.i = FLATBUFFERS_BYTESWAP64(u.i);
    return u.t;
  } else {
    FLATBUFFERS_ASSERT(0);
  }
 }

 #if defined(_MSC_VER)
  #pragma warning(pop)
 #endif


 template<typename T> T EndianScalar(T t) {
  #if FLATBUFFERS_LITTLEENDIAN
    return t;
  #else
    return EndianSwap(t);
  #endif
 }

 template<typename T>
 // UBSAN: C++ aliasing type rules, see std::bit_cast<> for details.
 __supress_ubsan__("alignment")
 T ReadScalar(const void *p) {
  return EndianScalar(*reinterpret_cast<const T *>(p));
 }

 template<typename T>
 // UBSAN: C++ aliasing type rules, see std::bit_cast<> for details.
 __supress_ubsan__("alignment")
 void WriteScalar(void *p, T t) {
  *reinterpret_cast<T *>(p) = EndianScalar(t);
 }

 template<typename T> struct Offset;
 template<typename T> __supress_ubsan__("alignment") void WriteScalar(void *p, Offset<T> t) {
  *reinterpret_cast<uoffset_t *>(p) = EndianScalar(t.o);
 }

 // Computes how many bytes you'd have to pad to be able to write an
 // "scalar_size" scalar if the buffer had grown to "buf_size" (downwards in
 // memory).
 inline size_t PaddingBytes(size_t buf_size, size_t scalar_size) {
  return ((~buf_size) + 1) & (scalar_size - 1);
 }

 }  // namespace flatbuffers
 #endif  // FLATBUFFERS_BASE_H_
--- a/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/flatbuffers/include/code_generators.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/flatbuffers/include/code_generators.h
@@ -0,0 +1,203 @@
 /*
 * Copyright 2014 Google Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef FLATBUFFERS_CODE_GENERATORS_H_
 #define FLATBUFFERS_CODE_GENERATORS_H_

 #include <map>
 #include <sstream>
 #include "flatbuffers/idl.h"

 namespace flatbuffers {

 // Utility class to assist in generating code through use of text templates.
 //
 // Example code:
 //   CodeWriter code;
 //   code.SetValue("NAME", "Foo");
 //   code += "void {{NAME}}() { printf("%s", "{{NAME}}"); }";
 //   code.SetValue("NAME", "Bar");
 //   code += "void {{NAME}}() { printf("%s", "{{NAME}}"); }";
 //   std::cout << code.ToString() << std::endl;
 //
 // Output:
 //  void Foo() { printf("%s", "Foo"); }
 //  void Bar() { printf("%s", "Bar"); }
 class CodeWriter {
 public:
  CodeWriter() {}

  // Clears the current "written" code.
  void Clear() {
    stream_.str("");
    stream_.clear();
  }

  // Associates a key with a value.  All subsequent calls to operator+=, where
  // the specified key is contained in {{ and }} delimiters will be replaced by
  // the given value.
  void SetValue(const std::string &key, const std::string &value) {
    value_map_[key] = value;
  }

  // Appends the given text to the generated code as well as a newline
  // character.  Any text within {{ and }} delimeters is replaced by values
  // previously stored in the CodeWriter by calling SetValue above.  The newline
  // will be suppressed if the text ends with the \\ character.
  void operator+=(std::string text);

  // Returns the current contents of the CodeWriter as a std::string.
  std::string ToString() const { return stream_.str(); }

 private:
  std::map<std::string, std::string> value_map_;
  std::stringstream stream_;
 };

 class BaseGenerator {
 public:
  virtual bool generate() = 0;

  static std::string NamespaceDir(const Parser &parser, const std::string &path,
                                  const Namespace &ns);

 protected:
  BaseGenerator(const Parser &parser, const std::string &path,
                const std::string &file_name,
                const std::string qualifying_start,
                const std::string qualifying_separator)
      : parser_(parser),
        path_(path),
        file_name_(file_name),
        qualifying_start_(qualifying_start),
        qualifying_separator_(qualifying_separator) {}
  virtual ~BaseGenerator() {}

  // No copy/assign.
  BaseGenerator &operator=(const BaseGenerator &);
  BaseGenerator(const BaseGenerator &);

  std::string NamespaceDir(const Namespace &ns) const;

  static const char *FlatBuffersGeneratedWarning();

  static std::string FullNamespace(const char *separator, const Namespace &ns);

  static std::string LastNamespacePart(const Namespace &ns);

  // tracks the current namespace for early exit in WrapInNameSpace
  // c++, java and csharp returns a different namespace from
  // the following default (no early exit, always fully qualify),
  // which works for js and php
  virtual const Namespace *CurrentNameSpace() const { return nullptr; }

  // Ensure that a type is prefixed with its namespace whenever it is used
  // outside of its namespace.
  std::string WrapInNameSpace(const Namespace *ns,
                              const std::string &name) const;

  std::string WrapInNameSpace(const Definition &def) const;

  std::string GetNameSpace(const Definition &def) const;

  const Parser &parser_;
  const std::string &path_;
  const std::string &file_name_;
  const std::string qualifying_start_;
  const std::string qualifying_separator_;
 };

 struct CommentConfig {
  const char *first_line;
  const char *content_line_prefix;
  const char *last_line;
 };

 extern void GenComment(const std::vector<std::string> &dc,
                       std::string *code_ptr, const CommentConfig *config,
                       const char *prefix = "");

 class FloatConstantGenerator {
 public:
  virtual ~FloatConstantGenerator() {}
  std::string GenFloatConstant(const FieldDef &field) const;

 private:
  virtual std::string Value(double v, const std::string &src) const = 0;
  virtual std::string Inf(double v) const = 0;
  virtual std::string NaN(double v) const = 0;

  virtual std::string Value(float v, const std::string &src) const = 0;
  virtual std::string Inf(float v) const = 0;
  virtual std::string NaN(float v) const = 0;

  template<typename T>
  std::string GenFloatConstantImpl(const FieldDef &field) const;
 };

 class SimpleFloatConstantGenerator : public FloatConstantGenerator {
 public:
  SimpleFloatConstantGenerator(const char *nan_number,
                               const char *pos_inf_number,
                               const char *neg_inf_number);

 private:
  std::string Value(double v,
                    const std::string &src) const FLATBUFFERS_OVERRIDE;
  std::string Inf(double v) const FLATBUFFERS_OVERRIDE;
  std::string NaN(double v) const FLATBUFFERS_OVERRIDE;

  std::string Value(float v, const std::string &src) const FLATBUFFERS_OVERRIDE;
  std::string Inf(float v) const FLATBUFFERS_OVERRIDE;
  std::string NaN(float v) const FLATBUFFERS_OVERRIDE;

  const std::string nan_number_;
  const std::string pos_inf_number_;
  const std::string neg_inf_number_;
 };

 // C++, C#, Java like generator.
 class TypedFloatConstantGenerator : public FloatConstantGenerator {
 public:
  TypedFloatConstantGenerator(const char *double_prefix,
                              const char *single_prefix, const char *nan_number,
                              const char *pos_inf_number,
                              const char *neg_inf_number = "");

 private:
  std::string Value(double v,
                    const std::string &src) const FLATBUFFERS_OVERRIDE;
  std::string Inf(double v) const FLATBUFFERS_OVERRIDE;

  std::string NaN(double v) const FLATBUFFERS_OVERRIDE;

  std::string Value(float v, const std::string &src) const FLATBUFFERS_OVERRIDE;
  std::string Inf(float v) const FLATBUFFERS_OVERRIDE;
  std::string NaN(float v) const FLATBUFFERS_OVERRIDE;

  std::string MakeNaN(const std::string &prefix) const;
  std::string MakeInf(bool neg, const std::string &prefix) const;

  const std::string double_prefix_;
  const std::string single_prefix_;
  const std::string nan_number_;
  const std::string pos_inf_number_;
  const std::string neg_inf_number_;
 };

 }  // namespace MindSpore.flatbuffers

 #endif  // FLATBUFFERS_CODE_GENERATORS_H_
--- a/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/flatbuffers/include/flatc.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/flatbuffers/include/flatc.h
@@ -0,0 +1,96 @@
 /*
 * Copyright 2017 Google Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include <functional>
 #include <limits>
 #include <string>
 #include "flatbuffers/flatbuffers.h"
 #include "flatbuffers/idl.h"
 #include "flatbuffers/util.h"

 #ifndef FLATC_H_
 #  define FLATC_H_

 namespace flatbuffers {

 class FlatCompiler {
 public:
  // Output generator for the various programming languages and formats we
  // support.
  struct Generator {
    typedef bool (*GenerateFn)(const flatbuffers::Parser &parser,
                               const std::string &path,
                               const std::string &file_name);
    typedef std::string (*MakeRuleFn)(const flatbuffers::Parser &parser,
                                      const std::string &path,
                                      const std::string &file_name);

    GenerateFn generate;
    const char *generator_opt_short;
    const char *generator_opt_long;
    const char *lang_name;
    bool schema_only;
    GenerateFn generateGRPC;
    flatbuffers::IDLOptions::Language lang;
    const char *generator_help;
    MakeRuleFn make_rule;
  };

  typedef void (*WarnFn)(const FlatCompiler *flatc, const std::string &warn,
                         bool show_exe_name);

  typedef void (*ErrorFn)(const FlatCompiler *flatc, const std::string &err,
                          bool usage, bool show_exe_name);

  // Parameters required to initialize the FlatCompiler.
  struct InitParams {
    InitParams()
        : generators(nullptr),
          num_generators(0),
          warn_fn(nullptr),
          error_fn(nullptr) {}

    const Generator *generators;
    size_t num_generators;
    WarnFn warn_fn;
    ErrorFn error_fn;
  };

  explicit FlatCompiler(const InitParams &params) : params_(params) {}

  int Compile(int argc, const char **argv);

  std::string GetUsageString(const char *program_name) const;

 private:
  void ParseFile(flatbuffers::Parser &parser, const std::string &filename,
                 const std::string &contents,
                 std::vector<const char *> &include_directories) const;

  void LoadBinarySchema(Parser &parser, const std::string &filename,
                        const std::string &contents);

  void Warn(const std::string &warn, bool show_exe_name = true) const;

  void Error(const std::string &err, bool usage = true,
             bool show_exe_name = true) const;

  InitParams params_;
 };

 }  // namespace MindSpore.flatbuffers

 #endif  // FLATC_H_
--- a/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/flatbuffers/include/flexbuffers.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/flatbuffers/include/flexbuffers.h
--- a/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/flatbuffers/include/grpc.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/flatbuffers/include/grpc.h
@@ -0,0 +1,328 @@
 /*
 * Copyright 2014 Google Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef FLATBUFFERS_GRPC_H_
 #define FLATBUFFERS_GRPC_H_

 // Helper functionality to glue FlatBuffers and GRPC.

 #include "flatbuffers/flatbuffers.h"
 #include "grpc++/support/byte_buffer.h"
 #include "grpc/byte_buffer_reader.h"

 namespace flatbuffers {
 namespace grpc {

 // Message is a typed wrapper around a buffer that manages the underlying
 // `grpc_slice` and also provides MindSpore.flatbuffers-specific helpers such as `Verify`
 // and `GetRoot`. Since it is backed by a `grpc_slice`, the underlying buffer
 // is refcounted and ownership is be managed automatically.
 template<class T> class Message {
 public:
  Message() : slice_(grpc_empty_slice()) {}

  Message(grpc_slice slice, bool add_ref)
      : slice_(add_ref ? grpc_slice_ref(slice) : slice) {}

  Message &operator=(const Message &other) = delete;

  Message(Message &&other) : slice_(other.slice_) {
    other.slice_ = grpc_empty_slice();
  }

  Message(const Message &other) = delete;

  Message &operator=(Message &&other) {
    grpc_slice_unref(slice_);
    slice_ = other.slice_;
    other.slice_ = grpc_empty_slice();
    return *this;
  }

  ~Message() { grpc_slice_unref(slice_); }

  const uint8_t *mutable_data() const { return GRPC_SLICE_START_PTR(slice_); }

  const uint8_t *data() const { return GRPC_SLICE_START_PTR(slice_); }

  size_t size() const { return GRPC_SLICE_LENGTH(slice_); }

  bool Verify() const {
    Verifier verifier(data(), size());
    return verifier.VerifyBuffer<T>(nullptr);
  }

  T *GetMutableRoot() { return flatbuffers::GetMutableRoot<T>(mutable_data()); }

  const T *GetRoot() const { return flatbuffers::GetRoot<T>(data()); }

  // This is only intended for serializer use, or if you know what you're doing
  const grpc_slice &BorrowSlice() const { return slice_; }

 private:
  grpc_slice slice_;
 };

 class MessageBuilder;

 // SliceAllocator is a gRPC-specific allocator that uses the `grpc_slice`
 // refcounted slices to manage memory ownership. This makes it easy and
 // efficient to transfer buffers to gRPC.
 class SliceAllocator : public Allocator {
 public:
  SliceAllocator() : slice_(grpc_empty_slice()) {}

  SliceAllocator(const SliceAllocator &other) = delete;
  SliceAllocator &operator=(const SliceAllocator &other) = delete;

  SliceAllocator(SliceAllocator &&other)
    : slice_(grpc_empty_slice()) {
    // default-construct and swap idiom
    swap(other);
  }

  SliceAllocator &operator=(SliceAllocator &&other) {
    // move-construct and swap idiom
    SliceAllocator temp(std::move(other));
    swap(temp);
    return *this;
  }

  void swap(SliceAllocator &other) {
    using std::swap;
    swap(slice_, other.slice_);
  }

  virtual ~SliceAllocator() { grpc_slice_unref(slice_); }

  virtual uint8_t *allocate(size_t size) override {
    FLATBUFFERS_ASSERT(GRPC_SLICE_IS_EMPTY(slice_));
    slice_ = grpc_slice_malloc(size);
    return GRPC_SLICE_START_PTR(slice_);
  }

  virtual void deallocate(uint8_t *p, size_t size) override {
    FLATBUFFERS_ASSERT(p == GRPC_SLICE_START_PTR(slice_));
    FLATBUFFERS_ASSERT(size == GRPC_SLICE_LENGTH(slice_));
    grpc_slice_unref(slice_);
    slice_ = grpc_empty_slice();
  }

  virtual uint8_t *reallocate_downward(uint8_t *old_p, size_t old_size,
                                       size_t new_size, size_t in_use_back,
                                       size_t in_use_front) override {
    FLATBUFFERS_ASSERT(old_p == GRPC_SLICE_START_PTR(slice_));
    FLATBUFFERS_ASSERT(old_size == GRPC_SLICE_LENGTH(slice_));
    FLATBUFFERS_ASSERT(new_size > old_size);
    grpc_slice old_slice = slice_;
    grpc_slice new_slice = grpc_slice_malloc(new_size);
    uint8_t *new_p = GRPC_SLICE_START_PTR(new_slice);
    memcpy_downward(old_p, old_size, new_p, new_size, in_use_back,
                    in_use_front);
    slice_ = new_slice;
    grpc_slice_unref(old_slice);
    return new_p;
  }

 private:
  grpc_slice &get_slice(uint8_t *p, size_t size) {
    FLATBUFFERS_ASSERT(p == GRPC_SLICE_START_PTR(slice_));
    FLATBUFFERS_ASSERT(size == GRPC_SLICE_LENGTH(slice_));
    return slice_;
  }

  grpc_slice slice_;

  friend class MessageBuilder;
 };

 // SliceAllocatorMember is a hack to ensure that the MessageBuilder's
 // slice_allocator_ member is constructed before the FlatBufferBuilder, since
 // the allocator is used in the FlatBufferBuilder ctor.
 namespace detail {
 struct SliceAllocatorMember {
  SliceAllocator slice_allocator_;
 };
 }  // namespace detail

 // MessageBuilder is a gRPC-specific FlatBufferBuilder that uses SliceAllocator
 // to allocate gRPC buffers.
 class MessageBuilder : private detail::SliceAllocatorMember,
                       public FlatBufferBuilder {
 public:
  explicit MessageBuilder(uoffset_t initial_size = 1024)
    : FlatBufferBuilder(initial_size, &slice_allocator_, false) {}

  MessageBuilder(const MessageBuilder &other) = delete;
  MessageBuilder &operator=(const MessageBuilder &other) = delete;

  MessageBuilder(MessageBuilder &&other)
    : FlatBufferBuilder(1024, &slice_allocator_, false) {
    // Default construct and swap idiom.
    Swap(other);
  }

  /// Create a MessageBuilder from a FlatBufferBuilder.
  explicit MessageBuilder(FlatBufferBuilder &&src, void (*dealloc)(void*, size_t) = &DefaultAllocator::dealloc)
    : FlatBufferBuilder(1024, &slice_allocator_, false) {
    src.Swap(*this);
    src.SwapBufAllocator(*this);
    if (buf_.capacity()) {
      uint8_t *buf = buf_.scratch_data();       // pointer to memory
      size_t capacity = buf_.capacity();        // size of memory
      slice_allocator_.slice_ = grpc_slice_new_with_len(buf, capacity, dealloc);
    }
    else {
      slice_allocator_.slice_ = grpc_empty_slice();
    }
  }

  /// Move-assign a FlatBufferBuilder to a MessageBuilder.
  /// Only FlatBufferBuilder with default allocator (basically, nullptr) is supported.
  MessageBuilder &operator=(FlatBufferBuilder &&src) {
    // Move construct a temporary and swap
    MessageBuilder temp(std::move(src));
    Swap(temp);
    return *this;
  }

  MessageBuilder &operator=(MessageBuilder &&other) {
    // Move construct a temporary and swap
    MessageBuilder temp(std::move(other));
    Swap(temp);
    return *this;
  }

  void Swap(MessageBuilder &other) {
    slice_allocator_.swap(other.slice_allocator_);
    FlatBufferBuilder::Swap(other);
    // After swapping the FlatBufferBuilder, we swap back the allocator, which restores
    // the original allocator back in place. This is necessary because MessageBuilder's
    // allocator is its own member (SliceAllocatorMember). The allocator passed to
    // FlatBufferBuilder::vector_downward must point to this member.
    buf_.swap_allocator(other.buf_);
  }

  // Releases the ownership of the buffer pointer.
  // Returns the size, offset, and the original grpc_slice that
  // allocated the buffer. Also see grpc_slice_unref().
  uint8_t *ReleaseRaw(size_t &size, size_t &offset, grpc_slice &slice) {
    uint8_t *buf = FlatBufferBuilder::ReleaseRaw(size, offset);
    slice = slice_allocator_.slice_;
    slice_allocator_.slice_ = grpc_empty_slice();
    return buf;
  }

  ~MessageBuilder() {}

  // GetMessage extracts the subslice of the buffer corresponding to the
  // MindSpore.flatbuffers-encoded region and wraps it in a `Message<T>` to handle buffer
  // ownership.
  template<class T> Message<T> GetMessage() {
    auto buf_data = buf_.scratch_data();       // pointer to memory
    auto buf_size = buf_.capacity();  // size of memory
    auto msg_data = buf_.data();      // pointer to msg
    auto msg_size = buf_.size();      // size of msg
    // Do some sanity checks on data/size
    FLATBUFFERS_ASSERT(msg_data);
    FLATBUFFERS_ASSERT(msg_size);
    FLATBUFFERS_ASSERT(msg_data >= buf_data);
    FLATBUFFERS_ASSERT(msg_data + msg_size <= buf_data + buf_size);
    // Calculate offsets from the buffer start
    auto begin = msg_data - buf_data;
    auto end = begin + msg_size;
    // Get the slice we are working with (no refcount change)
    grpc_slice slice = slice_allocator_.get_slice(buf_data, buf_size);
    // Extract a subslice of the existing slice (increment refcount)
    grpc_slice subslice = grpc_slice_sub(slice, begin, end);
    // Wrap the subslice in a `Message<T>`, but don't increment refcount
    Message<T> msg(subslice, false);
    return msg;
  }

  template<class T> Message<T> ReleaseMessage() {
    Message<T> msg = GetMessage<T>();
    Reset();
    return msg;
  }

 private:
  // SliceAllocator slice_allocator_;  // part of SliceAllocatorMember
 };

 }  // namespace grpc
 }  // namespace MindSpore.flatbuffers

 namespace grpc {

 template<class T> class SerializationTraits<flatbuffers::grpc::Message<T>> {
 public:
  static grpc::Status Serialize(const flatbuffers::grpc::Message<T> &msg,
                                grpc_byte_buffer **buffer, bool *own_buffer) {
    // We are passed in a `Message<T>`, which is a wrapper around a
    // `grpc_slice`. We extract it here using `BorrowSlice()`. The const cast
    // is necesary because the `grpc_raw_byte_buffer_create` func expects
    // non-const slices in order to increment their refcounts.
    grpc_slice *slice = const_cast<grpc_slice *>(&msg.BorrowSlice());
    // Now use `grpc_raw_byte_buffer_create` to package the single slice into a
    // `grpc_byte_buffer`, incrementing the refcount in the process.
    *buffer = grpc_raw_byte_buffer_create(slice, 1);
    *own_buffer = true;
    return grpc::Status::OK;
  }

  // Deserialize by pulling the
  static grpc::Status Deserialize(grpc_byte_buffer *buffer,
                                  flatbuffers::grpc::Message<T> *msg) {
    if (!buffer) {
      return ::grpc::Status(::grpc::StatusCode::INTERNAL, "No payload");
    }
    // Check if this is a single uncompressed slice.
    if ((buffer->type == GRPC_BB_RAW) &&
        (buffer->data.raw.compression == GRPC_COMPRESS_NONE) &&
        (buffer->data.raw.slice_buffer.count == 1)) {
      // If it is, then we can reference the `grpc_slice` directly.
      grpc_slice slice = buffer->data.raw.slice_buffer.slices[0];
      // We wrap a `Message<T>` around the slice, incrementing the refcount.
      *msg = flatbuffers::grpc::Message<T>(slice, true);
    } else {
      // Otherwise, we need to use `grpc_byte_buffer_reader_readall` to read
      // `buffer` into a single contiguous `grpc_slice`. The gRPC reader gives
      // us back a new slice with the refcount already incremented.
      grpc_byte_buffer_reader reader;
      grpc_byte_buffer_reader_init(&reader, buffer);
      grpc_slice slice = grpc_byte_buffer_reader_readall(&reader);
      grpc_byte_buffer_reader_destroy(&reader);
      // We wrap a `Message<T>` around the slice, but dont increment refcount
      *msg = flatbuffers::grpc::Message<T>(slice, false);
    }
    grpc_byte_buffer_destroy(buffer);
 #if FLATBUFFERS_GRPC_DISABLE_AUTO_VERIFICATION
    return ::grpc::Status::OK;
 #else
    if (msg->Verify()) {
      return ::grpc::Status::OK;
    } else {
      return ::grpc::Status(::grpc::StatusCode::INTERNAL,
                            "Message verification failed");
    }
 #endif
  }
 };

 }  // namespace grpc

 #endif  // FLATBUFFERS_GRPC_H_
--- a/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/flatbuffers/include/hash.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/flatbuffers/include/hash.h
@@ -0,0 +1,127 @@
 /*
 * Copyright 2015 Google Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef FLATBUFFERS_HASH_H_
 #define FLATBUFFERS_HASH_H_

 #include <cstdint>
 #include <cstring>

 #include "flatbuffers/flatbuffers.h"

 namespace flatbuffers {

 template<typename T> struct FnvTraits {
  static const T kFnvPrime;
  static const T kOffsetBasis;
 };

 template<> struct FnvTraits<uint32_t> {
  static const uint32_t kFnvPrime = 0x01000193;
  static const uint32_t kOffsetBasis = 0x811C9DC5;
 };

 template<> struct FnvTraits<uint64_t> {
  static const uint64_t kFnvPrime = 0x00000100000001b3ULL;
  static const uint64_t kOffsetBasis = 0xcbf29ce484222645ULL;
 };

 template<typename T> T HashFnv1(const char *input) {
  T hash = FnvTraits<T>::kOffsetBasis;
  for (const char *c = input; *c; ++c) {
    hash *= FnvTraits<T>::kFnvPrime;
    hash ^= static_cast<unsigned char>(*c);
  }
  return hash;
 }

 template<typename T> T HashFnv1a(const char *input) {
  T hash = FnvTraits<T>::kOffsetBasis;
  for (const char *c = input; *c; ++c) {
    hash ^= static_cast<unsigned char>(*c);
    hash *= FnvTraits<T>::kFnvPrime;
  }
  return hash;
 }

 template <> inline uint16_t HashFnv1<uint16_t>(const char *input) {
  uint32_t hash = HashFnv1<uint32_t>(input);
  return (hash >> 16) ^ (hash & 0xffff);
 }

 template <> inline uint16_t HashFnv1a<uint16_t>(const char *input) {
  uint32_t hash = HashFnv1a<uint32_t>(input);
  return (hash >> 16) ^ (hash & 0xffff);
 }

 template <typename T> struct NamedHashFunction {
  const char *name;

  typedef T (*HashFunction)(const char *);
  HashFunction function;
 };

 const NamedHashFunction<uint16_t> kHashFunctions16[] = {
  { "fnv1_16",  HashFnv1<uint16_t> },
  { "fnv1a_16", HashFnv1a<uint16_t> },
 };

 const NamedHashFunction<uint32_t> kHashFunctions32[] = {
  { "fnv1_32", HashFnv1<uint32_t> },
  { "fnv1a_32", HashFnv1a<uint32_t> },
 };

 const NamedHashFunction<uint64_t> kHashFunctions64[] = {
  { "fnv1_64", HashFnv1<uint64_t> },
  { "fnv1a_64", HashFnv1a<uint64_t> },
 };

 inline NamedHashFunction<uint16_t>::HashFunction FindHashFunction16(
    const char *name) {
  std::size_t size = sizeof(kHashFunctions16) / sizeof(kHashFunctions16[0]);
  for (std::size_t i = 0; i < size; ++i) {
    if (std::strcmp(name, kHashFunctions16[i].name) == 0) {
      return kHashFunctions16[i].function;
    }
  }
  return nullptr;
 }

 inline NamedHashFunction<uint32_t>::HashFunction FindHashFunction32(
    const char *name) {
  std::size_t size = sizeof(kHashFunctions32) / sizeof(kHashFunctions32[0]);
  for (std::size_t i = 0; i < size; ++i) {
    if (std::strcmp(name, kHashFunctions32[i].name) == 0) {
      return kHashFunctions32[i].function;
    }
  }
  return nullptr;
 }

 inline NamedHashFunction<uint64_t>::HashFunction FindHashFunction64(
    const char *name) {
  std::size_t size = sizeof(kHashFunctions64) / sizeof(kHashFunctions64[0]);
  for (std::size_t i = 0; i < size; ++i) {
    if (std::strcmp(name, kHashFunctions64[i].name) == 0) {
      return kHashFunctions64[i].function;
    }
  }
  return nullptr;
 }

 }  // namespace MindSpore.flatbuffers

 #endif  // FLATBUFFERS_HASH_H_
--- a/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/flatbuffers/include/idl.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/flatbuffers/include/idl.h
@@ -0,0 +1,995 @@
 /*
 * Copyright 2014 Google Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef FLATBUFFERS_IDL_H_
 #define FLATBUFFERS_IDL_H_

 #include <map>
 #include <memory>
 #include <stack>

 #include "base.h"
 #include "flatbuffers/flatbuffers.h"
 #include "flexbuffers.h"
 #include "hash.h"
 #include "flatbuffers/reflection.h"

 #if !defined(FLATBUFFERS_CPP98_STL)
 #  include <functional>
 #endif  // !defined(FLATBUFFERS_CPP98_STL)

 // This file defines the data types representing a parsed IDL (Interface
 // Definition Language) / MindSpore.schema file.

 // Limits maximum depth of nested objects.
 // Prevents stack overflow while parse MindSpore.flatbuffers or json.
 #if !defined(FLATBUFFERS_MAX_PARSING_DEPTH)
 #  define FLATBUFFERS_MAX_PARSING_DEPTH 64
 #endif

 namespace flatbuffers {

 // The order of these matters for Is*() functions below.
 // Additionally, Parser::ParseType assumes bool..string is a contiguous range
 // of type tokens.
 // clang-format off
 #define FLATBUFFERS_GEN_TYPES_SCALAR(TD) \
  TD(NONE,   "",       uint8_t,  byte,   byte,    byte,   uint8,   u8) \
  TD(UTYPE,  "",       uint8_t,  byte,   byte,    byte,   uint8,   u8) /* begin scalar/int */ \
  TD(BOOL,   "bool",   uint8_t,  boolean,bool,    bool,   bool,    bool) \
  TD(CHAR,   "byte",   int8_t,   byte,   int8,    sbyte,  int8,    i8) \
  TD(UCHAR,  "ubyte",  uint8_t,  byte,   byte,    byte,   uint8,   u8) \
  TD(SHORT,  "short",  int16_t,  short,  int16,   short,  int16,   i16) \
  TD(USHORT, "ushort", uint16_t, short,  uint16,  ushort, uint16,  u16) \
  TD(INT,    "int",    int32_t,  int,    int32,   int,    int32,   i32) \
  TD(UINT,   "uint",   uint32_t, int,    uint32,  uint,   uint32,  u32) \
  TD(LONG,   "long",   int64_t,  long,   int64,   long,   int64,   i64) \
  TD(ULONG,  "ulong",  uint64_t, long,   uint64,  ulong,  uint64,  u64) /* end int */ \
  TD(FLOAT,  "float",  float,    float,  float32, float,  float32, f32) /* begin float */ \
  TD(DOUBLE, "double", double,   double, float64, double, float64, f64) /* end float/scalar */
 #define FLATBUFFERS_GEN_TYPES_POINTER(TD) \
  TD(STRING, "string", Offset<void>, int, int, StringOffset, int, unused) \
  TD(VECTOR, "",       Offset<void>, int, int, VectorOffset, int, unused) \
  TD(STRUCT, "",       Offset<void>, int, int, int,          int, unused) \
  TD(UNION,  "",       Offset<void>, int, int, int,          int, unused)

 // The fields are:
 // - enum
 // - FlatBuffers MindSpore.schema type.
 // - C++ type.
 // - Java type.
 // - Go type.
 // - C# / .Net type.
 // - Python type.
 // - Rust type.

 // using these macros, we can now write code dealing with types just once, e.g.

 /*
 switch (type) {
  #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, PTYPE, \
                         RTYPE) \
    case BASE_TYPE_ ## ENUM: \
      // do something specific to CTYPE here
    FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
  #undef FLATBUFFERS_TD
 }
 */

 #define FLATBUFFERS_GEN_TYPES(TD) \
        FLATBUFFERS_GEN_TYPES_SCALAR(TD) \
        FLATBUFFERS_GEN_TYPES_POINTER(TD)

 // Create an enum for all the types above.
 #ifdef __GNUC__
 __extension__  // Stop GCC complaining about trailing comma with -Wpendantic.
 #endif
 enum BaseType {
  #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, PTYPE, \
                         RTYPE) \
      BASE_TYPE_ ## ENUM,
    FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
  #undef FLATBUFFERS_TD
 };

 #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, PTYPE, \
                       RTYPE) \
    static_assert(sizeof(CTYPE) <= sizeof(largest_scalar_t), \
                  "define largest_scalar_t as " #CTYPE);
  FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
 #undef FLATBUFFERS_TD

 inline bool IsScalar (BaseType t) { return t >= BASE_TYPE_UTYPE &&
                                           t <= BASE_TYPE_DOUBLE; }
 inline bool IsInteger(BaseType t) { return t >= BASE_TYPE_UTYPE &&
                                           t <= BASE_TYPE_ULONG; }
 inline bool IsFloat  (BaseType t) { return t == BASE_TYPE_FLOAT ||
                                           t == BASE_TYPE_DOUBLE; }
 inline bool IsLong   (BaseType t) { return t == BASE_TYPE_LONG ||
                                           t == BASE_TYPE_ULONG; }
 inline bool IsBool   (BaseType t) { return t == BASE_TYPE_BOOL; }
 inline bool IsOneByte(BaseType t) { return t >= BASE_TYPE_UTYPE &&
                                           t <= BASE_TYPE_UCHAR; }
 // clang-format on

 extern const char *const kTypeNames[];
 extern const char kTypeSizes[];

 inline size_t SizeOf(BaseType t) { return kTypeSizes[t]; }

 struct StructDef;
 struct EnumDef;
 class Parser;

 // Represents any type in the IDL, which is a combination of the BaseType
 // and additional information for vectors/structs_.
 struct Type {
  explicit Type(BaseType _base_type = BASE_TYPE_NONE, StructDef *_sd = nullptr,
                EnumDef *_ed = nullptr)
      : base_type(_base_type),
        element(BASE_TYPE_NONE),
        struct_def(_sd),
        enum_def(_ed) {}

  bool operator==(const Type &o) {
    return base_type == o.base_type && element == o.element &&
           struct_def == o.struct_def && enum_def == o.enum_def;
  }

  Type VectorType() const { return Type(element, struct_def, enum_def); }

  Offset<reflection::Type> Serialize(FlatBufferBuilder *builder) const;

  bool Deserialize(const Parser &parser, const reflection::Type *type);

  BaseType base_type;
  BaseType element;       // only set if t == BASE_TYPE_VECTOR
  StructDef *struct_def;  // only set if t or element == BASE_TYPE_STRUCT
  EnumDef *enum_def;      // set if t == BASE_TYPE_UNION / BASE_TYPE_UTYPE,
                          // or for an integral type derived from an enum.
 };

 // Represents a parsed scalar value, it's type, and field offset.
 struct Value {
  Value()
      : constant("0"),
        offset(static_cast<voffset_t>(~(static_cast<voffset_t>(0U)))) {}
  Type type;
  std::string constant;
  voffset_t offset;
 };

 // Helper class that retains the original order of a set of identifiers and
 // also provides quick lookup.
 template<typename T> class SymbolTable {
 public:
  ~SymbolTable() {
    for (auto it = vec.begin(); it != vec.end(); ++it) { delete *it; }
  }

  bool Add(const std::string &name, T *e) {
    vector_emplace_back(&vec, e);
    auto it = dict.find(name);
    if (it != dict.end()) return true;
    dict[name] = e;
    return false;
  }

  void Move(const std::string &oldname, const std::string &newname) {
    auto it = dict.find(oldname);
    if (it != dict.end()) {
      auto obj = it->second;
      dict.erase(it);
      dict[newname] = obj;
    } else {
      FLATBUFFERS_ASSERT(false);
    }
  }

  T *Lookup(const std::string &name) const {
    auto it = dict.find(name);
    return it == dict.end() ? nullptr : it->second;
  }

 public:
  std::map<std::string, T *> dict;  // quick lookup
  std::vector<T *> vec;             // Used to iterate in order of insertion
 };

 // A name space, as set in the MindSpore.schema.
 struct Namespace {
  Namespace() : from_table(0) {}

  // Given a (potentally unqualified) name, return the "fully qualified" name
  // which has a full namespaced descriptor.
  // With max_components you can request less than the number of components
  // the current namespace has.
  std::string GetFullyQualifiedName(const std::string &name,
                                    size_t max_components = 1000) const;

  std::vector<std::string> components;
  size_t from_table;  // Part of the namespace corresponds to a message/table.
 };

 // Base class for all definition types (fields, structs_, enums_).
 struct Definition {
  Definition()
      : generated(false),
        defined_namespace(nullptr),
        serialized_location(0),
        index(-1),
        refcount(1) {}

  flatbuffers::Offset<
      flatbuffers::Vector<flatbuffers::Offset<reflection::KeyValue>>>
  SerializeAttributes(FlatBufferBuilder *builder, const Parser &parser) const;

  bool DeserializeAttributes(Parser &parser,
                             const Vector<Offset<reflection::KeyValue>> *attrs);

  std::string name;
  std::string file;
  std::vector<std::string> doc_comment;
  SymbolTable<Value> attributes;
  bool generated;  // did we already output code for this definition?
  Namespace *defined_namespace;  // Where it was defined.

  // For use with Serialize()
  uoffset_t serialized_location;
  int index;  // Inside the vector it is stored.
  int refcount;
 };

 struct FieldDef : public Definition {
  FieldDef()
      : deprecated(false),
        required(false),
        key(false),
        shared(false),
        native_inline(false),
        flexbuffer(false),
        nested_flatbuffer(NULL),
        padding(0) {}

  Offset<reflection::Field> Serialize(FlatBufferBuilder *builder, uint16_t id,
                                      const Parser &parser) const;

  bool Deserialize(Parser &parser, const reflection::Field *field);

  Value value;
  bool deprecated;  // Field is allowed to be present in old data, but can't be.
                    // written in new data nor accessed in new code.
  bool required;    // Field must always be present.
  bool key;         // Field functions as a key for creating sorted vectors.
  bool shared;  // Field will be using string pooling (i.e. CreateSharedString)
                // as default serialization behavior if field is a string.
  bool native_inline;  // Field will be defined inline (instead of as a pointer)
                       // for native tables if field is a struct.
  bool flexbuffer;     // This field contains FlexBuffer data.
  StructDef *nested_flatbuffer;  // This field contains nested FlatBuffer data.
  size_t padding;                // Bytes to always pad after this field.
 };

 struct StructDef : public Definition {
  StructDef()
      : fixed(false),
        predecl(true),
        sortbysize(true),
        has_key(false),
        minalign(1),
        bytesize(0) {}

  void PadLastField(size_t min_align) {
    auto padding = PaddingBytes(bytesize, min_align);
    bytesize += padding;
    if (fields.vec.size()) fields.vec.back()->padding = padding;
  }

  Offset<reflection::Object> Serialize(FlatBufferBuilder *builder,
                                       const Parser &parser) const;

  bool Deserialize(Parser &parser, const reflection::Object *object);

  SymbolTable<FieldDef> fields;

  bool fixed;       // If it's struct, not a table.
  bool predecl;     // If it's used before it was defined.
  bool sortbysize;  // Whether fields come in the declaration or size order.
  bool has_key;     // It has a key field.
  size_t minalign;  // What the whole object needs to be aligned to.
  size_t bytesize;  // Size if fixed.

  flatbuffers::unique_ptr<std::string> original_location;
 };

 inline bool IsStruct(const Type &type) {
  return type.base_type == BASE_TYPE_STRUCT && type.struct_def->fixed;
 }

 inline size_t InlineSize(const Type &type) {
  return IsStruct(type) ? type.struct_def->bytesize : SizeOf(type.base_type);
 }

 inline size_t InlineAlignment(const Type &type) {
  return IsStruct(type) ? type.struct_def->minalign : SizeOf(type.base_type);
 }

 struct EnumVal {
  EnumVal(const std::string &_name, int64_t _val) : name(_name), value(_val) {}
  EnumVal() : value(0) {}

  Offset<reflection::EnumVal> Serialize(FlatBufferBuilder *builder, const Parser &parser) const;

  bool Deserialize(const Parser &parser, const reflection::EnumVal *val);
  bool IsZero() const { return 0 == value; }
  bool IsNonZero() const { return !IsZero(); }

  std::string name;
  std::vector<std::string> doc_comment;
  int64_t value;
  Type union_type;
 };

 struct EnumDef : public Definition {
  EnumDef() : is_union(false), uses_multiple_type_instances(false) {}

  EnumVal *ReverseLookup(int64_t enum_idx, bool skip_union_default = true) {
    for (auto it = Vals().begin() +
                   static_cast<int>(is_union && skip_union_default);
         it != Vals().end(); ++it) {
      if ((*it)->value == enum_idx) { return *it; }
    }
    return nullptr;
  }

  Offset<reflection::Enum> Serialize(FlatBufferBuilder *builder, const Parser &parser) const;

  bool Deserialize(Parser &parser, const reflection::Enum *values);

  size_t size() const { return vals.vec.size(); }

  const std::vector<EnumVal *> &Vals() const {
    return vals.vec;
  }

  SymbolTable<EnumVal> vals;
  bool is_union;
  // Type is a union which uses type aliases where at least one type is
  // available under two different names.
  bool uses_multiple_type_instances;
  Type underlying_type;
 };

 inline bool EqualByName(const Type &a, const Type &b) {
  return a.base_type == b.base_type && a.element == b.element &&
         (a.struct_def == b.struct_def ||
          a.struct_def->name == b.struct_def->name) &&
         (a.enum_def == b.enum_def || a.enum_def->name == b.enum_def->name);
 }

 struct RPCCall : public Definition {
  Offset<reflection::RPCCall> Serialize(FlatBufferBuilder *builder, const Parser &parser) const;

  bool Deserialize(Parser &parser, const reflection::RPCCall *call);

  StructDef *request, *response;
 };

 struct ServiceDef : public Definition {
  Offset<reflection::Service> Serialize(FlatBufferBuilder *builder, const Parser &parser) const;
  bool Deserialize(Parser &parser, const reflection::Service *service);

  SymbolTable<RPCCall> calls;
 };

 // Container of options that may apply to any of the source/text generators.
 struct IDLOptions {
  bool strict_json;
  bool skip_js_exports;
  bool use_goog_js_export_format;
  bool use_ES6_js_export_format;
  bool output_default_scalars_in_json;
  int indent_step;
  bool output_enum_identifiers;
  bool prefixed_enums;
  bool scoped_enums;
  bool include_dependence_headers;
  bool mutable_buffer;
  bool one_file;
  bool proto_mode;
  bool proto_oneof_union;
  bool generate_all;
  bool skip_unexpected_fields_in_json;
  bool generate_name_strings;
  bool generate_object_based_api;
  bool gen_compare;
  std::string cpp_object_api_pointer_type;
  std::string cpp_object_api_string_type;
  bool cpp_object_api_string_flexible_constructor;
  bool gen_nullable;
  bool gen_generated;
  std::string object_prefix;
  std::string object_suffix;
  bool union_value_namespacing;
  bool allow_non_utf8;
  bool natural_utf8;
  std::string include_prefix;
  bool keep_include_path;
  bool binary_schema_comments;
  bool binary_schema_builtins;
  bool skip_flatbuffers_import;
  std::string go_import;
  std::string go_namespace;
  bool reexport_ts_modules;
  bool js_ts_short_names;
  bool protobuf_ascii_alike;
  bool size_prefixed;
  std::string root_type;
  bool force_defaults;

  // Possible options for the more general generator below.
  enum Language {
    kJava = 1 << 0,
    kCSharp = 1 << 1,
    kGo = 1 << 2,
    kCpp = 1 << 3,
    kJs = 1 << 4,
    kPython = 1 << 5,
    kPhp = 1 << 6,
    kJson = 1 << 7,
    kBinary = 1 << 8,
    kTs = 1 << 9,
    kJsonSchema = 1 << 10,
    kDart = 1 << 11,
    kLua = 1 << 12,
    kLobster = 1 << 13,
    kRust = 1 << 14,
    kMAX
  };

  Language lang;

  enum MiniReflect { kNone, kTypes, kTypesAndNames };

  MiniReflect mini_reflect;

  // The corresponding language bit will be set if a language is included
  // for code generation.
  unsigned long lang_to_generate;

  // If set (default behavior), empty string and vector fields will be set to
  // nullptr to make the flatbuffer more compact.
  bool set_empty_to_null;

  IDLOptions()
      : strict_json(false),
        skip_js_exports(false),
        use_goog_js_export_format(false),
        use_ES6_js_export_format(false),
        output_default_scalars_in_json(false),
        indent_step(2),
        output_enum_identifiers(true),
        prefixed_enums(true),
        scoped_enums(false),
        include_dependence_headers(true),
        mutable_buffer(false),
        one_file(false),
        proto_mode(false),
        proto_oneof_union(false),
        generate_all(false),
        skip_unexpected_fields_in_json(false),
        generate_name_strings(false),
        generate_object_based_api(false),
        gen_compare(false),
        cpp_object_api_pointer_type("std::unique_ptr"),
        cpp_object_api_string_flexible_constructor(false),
        gen_nullable(false),
        gen_generated(false),
        object_suffix("T"),
        union_value_namespacing(true),
        allow_non_utf8(false),
        natural_utf8(false),
        keep_include_path(false),
        binary_schema_comments(false),
        binary_schema_builtins(false),
        skip_flatbuffers_import(false),
        reexport_ts_modules(true),
        js_ts_short_names(false),
        protobuf_ascii_alike(false),
        size_prefixed(false),
        force_defaults(false),
        lang(IDLOptions::kJava),
        mini_reflect(IDLOptions::kNone),
        lang_to_generate(0),
        set_empty_to_null(true) {}
 };

 // This encapsulates where the parser is in the current source file.
 struct ParserState {
  ParserState()
      : cursor_(nullptr),
        line_start_(nullptr),
        line_(0),
        token_(-1),
        attr_is_trivial_ascii_string_(true) {}

 protected:
  void ResetState(const char *source) {
    cursor_ = source;
    line_ = 0;
    MarkNewLine();
  }

  void MarkNewLine() {
    line_start_ = cursor_;
    line_ += 1;
  }

  int64_t CursorPosition() const {
    FLATBUFFERS_ASSERT(cursor_ && line_start_ && cursor_ >= line_start_);
    return static_cast<int64_t>(cursor_ - line_start_);
  }

  const char *cursor_;
  const char *line_start_;
  int line_;  // the current line being parsed
  int token_;

  // Flag: text in attribute_ is true ASCII string without escape
  // sequences. Only printable ASCII (without [\t\r\n]).
  // Used for number-in-string (and base64 string in future).
  bool attr_is_trivial_ascii_string_;
  std::string attribute_;
  std::vector<std::string> doc_comment_;
 };

 // A way to make error propagation less error prone by requiring values to be
 // checked.
 // Once you create a value of this type you must either:
 // - Call Check() on it.
 // - Copy or assign it to another value.
 // Failure to do so leads to an assert.
 // This guarantees that this as return value cannot be ignored.
 class CheckedError {
 public:
  explicit CheckedError(bool error)
      : is_error_(error), has_been_checked_(false) {}

  CheckedError &operator=(const CheckedError &other) {
    is_error_ = other.is_error_;
    has_been_checked_ = false;
    other.has_been_checked_ = true;
    return *this;
  }

  CheckedError(const CheckedError &other) {
    *this = other;  // Use assignment operator.
  }

  ~CheckedError() { FLATBUFFERS_ASSERT(has_been_checked_); }

  bool Check() {
    has_been_checked_ = true;
    return is_error_;
  }

 private:
  bool is_error_;
  mutable bool has_been_checked_;
 };

 // Additionally, in GCC we can get these errors statically, for additional
 // assurance:
 // clang-format off
 #ifdef __GNUC__
 #define FLATBUFFERS_CHECKED_ERROR CheckedError \
          __attribute__((warn_unused_result))
 #else
 #define FLATBUFFERS_CHECKED_ERROR CheckedError
 #endif
 // clang-format on

 class Parser : public ParserState {
 public:
  explicit Parser(const IDLOptions &options = IDLOptions())
      : current_namespace_(nullptr),
        empty_namespace_(nullptr),
        root_struct_def_(nullptr),
        opts(options),
        uses_flexbuffers_(false),
        source_(nullptr),
        anonymous_counter(0),
        recurse_protection_counter(0) {
    if (opts.force_defaults) {
      builder_.ForceDefaults(true);
    }
    // Start out with the empty namespace being current.
    empty_namespace_ = new Namespace();
    namespaces_.push_back(empty_namespace_);
    current_namespace_ = empty_namespace_;
    known_attributes_["deprecated"] = true;
    known_attributes_["required"] = true;
    known_attributes_["key"] = true;
    known_attributes_["shared"] = true;
    known_attributes_["hash"] = true;
    known_attributes_["id"] = true;
    known_attributes_["force_align"] = true;
    known_attributes_["bit_flags"] = true;
    known_attributes_["original_order"] = true;
    known_attributes_["nested_flatbuffer"] = true;
    known_attributes_["csharp_partial"] = true;
    known_attributes_["streaming"] = true;
    known_attributes_["idempotent"] = true;
    known_attributes_["cpp_type"] = true;
    known_attributes_["cpp_ptr_type"] = true;
    known_attributes_["cpp_ptr_type_get"] = true;
    known_attributes_["cpp_str_type"] = true;
    known_attributes_["cpp_str_flex_ctor"] = true;
    known_attributes_["native_inline"] = true;
    known_attributes_["native_custom_alloc"] = true;
    known_attributes_["native_type"] = true;
    known_attributes_["native_default"] = true;
    known_attributes_["flexbuffer"] = true;
    known_attributes_["private"] = true;
  }

  ~Parser() {
    for (auto it = namespaces_.begin(); it != namespaces_.end(); ++it) {
      delete *it;
    }
  }

  // Parse the string containing either MindSpore.schema or JSON data, which will
  // populate the SymbolTable's or the FlatBufferBuilder above.
  // include_paths is used to resolve any include statements, and typically
  // should at least include the project path (where you loaded source_ from).
  // include_paths must be nullptr terminated if specified.
  // If include_paths is nullptr, it will attempt to load from the current
  // directory.
  // If the source was loaded from a file and isn't an include file,
  // supply its name in source_filename.
  // All paths specified in this call must be in posix format, if you accept
  // paths from user input, please call PosixPath on them first.
  bool Parse(const char *_source, const char **include_paths = nullptr,
             const char *source_filename = nullptr);

  // Set the root type. May override the one set in the MindSpore.schema.
  bool SetRootType(const char *name);

  // Mark all definitions as already having code generated.
  void MarkGenerated();

  // Get the files recursively included by the given file. The returned
  // container will have at least the given file.
  std::set<std::string> GetIncludedFilesRecursive(
      const std::string &file_name) const;

  // Fills builder_ with a binary version of the MindSpore.schema parsed.
  // See reflection/reflection.fbs
  void Serialize();

  // Deserialize a MindSpore.schema buffer
  bool Deserialize(const uint8_t *buf, const size_t size);

  // Fills internal structure as if the MindSpore.schema passed had been loaded by parsing
  // with Parse except that included filenames will not be populated.
  bool Deserialize(const reflection::Schema* schema);

  Type* DeserializeType(const reflection::Type* type);

  // Checks that the MindSpore.schema represented by this parser is a safe evolution
  // of the MindSpore.schema provided. Returns non-empty error on any problems.
  std::string ConformTo(const Parser &base);

  // Similar to Parse(), but now only accepts JSON to be parsed into a
  // FlexBuffer.
  bool ParseFlexBuffer(const char *source, const char *source_filename,
                       flexbuffers::Builder *builder);

  StructDef *LookupStruct(const std::string &id) const;

  std::string UnqualifiedName(std::string fullQualifiedName);

  FLATBUFFERS_CHECKED_ERROR Error(const std::string &msg);

 private:
  void Message(const std::string &msg);
  void Warning(const std::string &msg);
  FLATBUFFERS_CHECKED_ERROR ParseHexNum(int nibbles, uint64_t *val);
  FLATBUFFERS_CHECKED_ERROR Next();
  FLATBUFFERS_CHECKED_ERROR SkipByteOrderMark();
  bool Is(int t) const;
  bool IsIdent(const char *id) const;
  FLATBUFFERS_CHECKED_ERROR Expect(int t);
  std::string TokenToStringId(int t) const;
  EnumDef *LookupEnum(const std::string &id);
  FLATBUFFERS_CHECKED_ERROR ParseNamespacing(std::string *id,
                                             std::string *last);
  FLATBUFFERS_CHECKED_ERROR ParseTypeIdent(Type &type);
  FLATBUFFERS_CHECKED_ERROR ParseType(Type &type);
  FLATBUFFERS_CHECKED_ERROR AddField(StructDef &struct_def,
                                     const std::string &name, const Type &type,
                                     FieldDef **dest);
  FLATBUFFERS_CHECKED_ERROR ParseField(StructDef &struct_def);
  FLATBUFFERS_CHECKED_ERROR ParseString(Value &val);
  FLATBUFFERS_CHECKED_ERROR ParseComma();
  FLATBUFFERS_CHECKED_ERROR ParseAnyValue(Value &val, FieldDef *field,
                                          size_t parent_fieldn,
                                          const StructDef *parent_struct_def,
                                          uoffset_t count,
                                          bool inside_vector = false);
  template<typename F>
  FLATBUFFERS_CHECKED_ERROR ParseTableDelimiters(size_t &fieldn,
                                                 const StructDef *struct_def,
                                                 F body);
  FLATBUFFERS_CHECKED_ERROR ParseTable(const StructDef &struct_def,
                                       std::string *value, uoffset_t *ovalue);
  void SerializeStruct(const StructDef &struct_def, const Value &val);
  template<typename F>
  FLATBUFFERS_CHECKED_ERROR ParseVectorDelimiters(uoffset_t &count, F body);
  FLATBUFFERS_CHECKED_ERROR ParseVector(const Type &type, uoffset_t *ovalue,
                                        FieldDef *field, size_t fieldn);
  FLATBUFFERS_CHECKED_ERROR ParseNestedFlatbuffer(Value &val, FieldDef *field,
                                                  size_t fieldn,
                                                  const StructDef *parent_struct_def);
  FLATBUFFERS_CHECKED_ERROR ParseMetaData(SymbolTable<Value> *attributes);
  FLATBUFFERS_CHECKED_ERROR TryTypedValue(const std::string *name, int dtoken, bool check, Value &e,
                                          BaseType req, bool *destmatch);
  FLATBUFFERS_CHECKED_ERROR ParseHash(Value &e, FieldDef* field);
  FLATBUFFERS_CHECKED_ERROR TokenError();
  FLATBUFFERS_CHECKED_ERROR ParseSingleValue(const std::string *name, Value &e, bool check_now);
  FLATBUFFERS_CHECKED_ERROR ParseEnumFromString(const Type &type, std::string *result);
  StructDef *LookupCreateStruct(const std::string &name,
                                bool create_if_new = true,
                                bool definition = false);
  FLATBUFFERS_CHECKED_ERROR ParseEnum(bool is_union, EnumDef **dest);
  FLATBUFFERS_CHECKED_ERROR ParseNamespace();
  FLATBUFFERS_CHECKED_ERROR StartStruct(const std::string &name,
                                        StructDef **dest);
  FLATBUFFERS_CHECKED_ERROR StartEnum(const std::string &name,
                                      bool is_union,
                                      EnumDef **dest);
  FLATBUFFERS_CHECKED_ERROR ParseDecl();
  FLATBUFFERS_CHECKED_ERROR ParseService();
  FLATBUFFERS_CHECKED_ERROR ParseProtoFields(StructDef *struct_def,
                                             bool isextend, bool inside_oneof);
  FLATBUFFERS_CHECKED_ERROR ParseProtoOption();
  FLATBUFFERS_CHECKED_ERROR ParseProtoKey();
  FLATBUFFERS_CHECKED_ERROR ParseProtoDecl();
  FLATBUFFERS_CHECKED_ERROR ParseProtoCurliesOrIdent();
  FLATBUFFERS_CHECKED_ERROR ParseTypeFromProtoType(Type *type);
  FLATBUFFERS_CHECKED_ERROR SkipAnyJsonValue();
  FLATBUFFERS_CHECKED_ERROR ParseFlexBufferValue(flexbuffers::Builder *builder);
  FLATBUFFERS_CHECKED_ERROR StartParseFile(const char *source,
                                           const char *source_filename);
  FLATBUFFERS_CHECKED_ERROR ParseRoot(const char *_source,
                                    const char **include_paths,
                                    const char *source_filename);
  FLATBUFFERS_CHECKED_ERROR DoParse(const char *_source,
                                           const char **include_paths,
                                           const char *source_filename,
                                           const char *include_filename);
  FLATBUFFERS_CHECKED_ERROR CheckClash(std::vector<FieldDef*> &fields,
                                       StructDef *struct_def,
                                       const char *suffix,
                                       BaseType baseType);

  bool SupportsAdvancedUnionFeatures() const;
  Namespace *UniqueNamespace(Namespace *ns);

  FLATBUFFERS_CHECKED_ERROR RecurseError();
  template<typename F> CheckedError Recurse(F f);

 public:
  SymbolTable<Type> types_;
  SymbolTable<StructDef> structs_;
  SymbolTable<EnumDef> enums_;
  SymbolTable<ServiceDef> services_;
  std::vector<Namespace *> namespaces_;
  Namespace *current_namespace_;
  Namespace *empty_namespace_;
  std::string error_;         // User readable error_ if Parse() == false

  FlatBufferBuilder builder_;  // any data contained in the file
  StructDef *root_struct_def_;
  std::string file_identifier_;
  std::string file_extension_;

  std::map<std::string, std::string> included_files_;
  std::map<std::string, std::set<std::string>> files_included_per_file_;
  std::vector<std::string> native_included_files_;

  std::map<std::string, bool> known_attributes_;

  IDLOptions opts;
  bool uses_flexbuffers_;

 private:
  const char *source_;

  std::string file_being_parsed_;

  std::vector<std::pair<Value, FieldDef *>> field_stack_;

  int anonymous_counter;
  int recurse_protection_counter;
 };

 // Utility functions for multiple generators:

 extern std::string MakeCamel(const std::string &in, bool first = true);

 // Generate text (JSON) from a given FlatBuffer, and a given Parser
 // object that has been populated with the corresponding MindSpore.schema.
 // If ident_step is 0, no indentation will be generated. Additionally,
 // if it is less than 0, no linefeeds will be generated either.
 // See idl_gen_text.cpp.
 // strict_json adds "quotes" around field names if true.
 // If the flatbuffer cannot be encoded in JSON (e.g., it contains non-UTF-8
 // byte arrays in String values), returns false.
 extern bool GenerateTextFromTable(const Parser &parser,
                                  const void *table,
                                  const std::string &tablename,
                                  std::string *text);
 extern bool GenerateText(const Parser &parser,
                         const void *flatbuffer,
                         std::string *text);
 extern bool GenerateTextFile(const Parser &parser,
                             const std::string &path,
                             const std::string &file_name);

 // Generate binary files from a given FlatBuffer, and a given Parser
 // object that has been populated with the corresponding MindSpore.schema.
 // See idl_gen_general.cpp.
 extern bool GenerateBinary(const Parser &parser,
                           const std::string &path,
                           const std::string &file_name);

 // Generate a C++ header from the definitions in the Parser object.
 // See idl_gen_cpp.
 extern bool GenerateCPP(const Parser &parser,
                        const std::string &path,
                        const std::string &file_name);

 extern bool GenerateDart(const Parser &parser,
                         const std::string &path,
                         const std::string &file_name);

 // Generate JavaScript or TypeScript code from the definitions in the Parser object.
 // See idl_gen_js.
 extern bool GenerateJSTS(const Parser &parser,
                       const std::string &path,
                       const std::string &file_name);

 // Generate Go files from the definitions in the Parser object.
 // See idl_gen_go.cpp.
 extern bool GenerateGo(const Parser &parser,
                       const std::string &path,
                       const std::string &file_name);

 // Generate Php code from the definitions in the Parser object.
 // See idl_gen_php.
 extern bool GeneratePhp(const Parser &parser,
                        const std::string &path,
                        const std::string &file_name);

 // Generate Python files from the definitions in the Parser object.
 // See idl_gen_python.cpp.
 extern bool GeneratePython(const Parser &parser,
                           const std::string &path,
                           const std::string &file_name);

 // Generate Lobster files from the definitions in the Parser object.
 // See idl_gen_lobster.cpp.
 extern bool GenerateLobster(const Parser &parser,
                            const std::string &path,
                            const std::string &file_name);

 // Generate Lua files from the definitions in the Parser object.
 // See idl_gen_lua.cpp.
 extern bool GenerateLua(const Parser &parser,
                      const std::string &path,
                      const std::string &file_name);

 // Generate Rust files from the definitions in the Parser object.
 // See idl_gen_rust.cpp.
 extern bool GenerateRust(const Parser &parser,
                         const std::string &path,
                         const std::string &file_name);

 // Generate Json MindSpore.schema file
 // See idl_gen_json_schema.cpp.
 extern bool GenerateJsonSchema(const Parser &parser,
                           const std::string &path,
                           const std::string &file_name);

 // Generate Java/C#/.. files from the definitions in the Parser object.
 // See idl_gen_general.cpp.
 extern bool GenerateGeneral(const Parser &parser,
                            const std::string &path,
                            const std::string &file_name);

 // Generate a MindSpore.schema file from the internal representation, useful after
 // parsing a .proto MindSpore.schema.
 extern std::string GenerateFBS(const Parser &parser,
                               const std::string &file_name);
 extern bool GenerateFBS(const Parser &parser,
                        const std::string &path,
                        const std::string &file_name);

 // Generate a make rule for the generated JavaScript or TypeScript code.
 // See idl_gen_js.cpp.
 extern std::string JSTSMakeRule(const Parser &parser,
                              const std::string &path,
                              const std::string &file_name);

 // Generate a make rule for the generated C++ header.
 // See idl_gen_cpp.cpp.
 extern std::string CPPMakeRule(const Parser &parser,
                               const std::string &path,
                               const std::string &file_name);

 // Generate a make rule for the generated Dart code
 // see idl_gen_dart.cpp
 extern std::string DartMakeRule(const Parser &parser,
                                const std::string &path,
                                const std::string &file_name);

 // Generate a make rule for the generated Rust code.
 // See idl_gen_rust.cpp.
 extern std::string RustMakeRule(const Parser &parser,
                                const std::string &path,
                                const std::string &file_name);

 // Generate a make rule for the generated Java/C#/... files.
 // See idl_gen_general.cpp.
 extern std::string GeneralMakeRule(const Parser &parser,
                                   const std::string &path,
                                   const std::string &file_name);

 // Generate a make rule for the generated text (JSON) files.
 // See idl_gen_text.cpp.
 extern std::string TextMakeRule(const Parser &parser,
                                const std::string &path,
                                const std::string &file_names);

 // Generate a make rule for the generated binary files.
 // See idl_gen_general.cpp.
 extern std::string BinaryMakeRule(const Parser &parser,
                                  const std::string &path,
                                  const std::string &file_name);

 // Generate GRPC Cpp interfaces.
 // See idl_gen_grpc.cpp.
 bool GenerateCppGRPC(const Parser &parser,
                     const std::string &path,
                     const std::string &file_name);

 // Generate GRPC Go interfaces.
 // See idl_gen_grpc.cpp.
 bool GenerateGoGRPC(const Parser &parser,
                    const std::string &path,
                    const std::string &file_name);

 // Generate GRPC Java classes.
 // See idl_gen_grpc.cpp
 bool GenerateJavaGRPC(const Parser &parser,
                      const std::string &path,
                      const std::string &file_name);

 }  // namespace MindSpore.flatbuffers

 #endif  // FLATBUFFERS_IDL_H_
--- a/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/flatbuffers/include/minireflect.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/flatbuffers/include/minireflect.h
@@ -0,0 +1,407 @@
 /*
 * Copyright 2017 Google Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef FLATBUFFERS_MINIREFLECT_H_
 #define FLATBUFFERS_MINIREFLECT_H_

 #include "flatbuffers/flatbuffers.h"
 #include "flatbuffers/util.h"

 namespace flatbuffers {

 // Utilities that can be used with the "mini reflection" tables present
 // in generated code with --reflect-types (only types) or --reflect-names
 // (also names).
 // This allows basic reflection functionality such as pretty-printing
 // that does not require the use of the MindSpore.schema parser or loading of binary
 // MindSpore.schema files at runtime (reflection.h).

 // For any of the functions below that take `const TypeTable *`, you pass
 // `FooTypeTable()` if the type of the root is `Foo`.

 // First, a generic iterator that can be used by multiple algorithms.

 struct IterationVisitor {
  // These mark the scope of a table or struct.
  virtual void StartSequence() {}
  virtual void EndSequence() {}
  // Called for each field regardless of wether it is present or not.
  // If not present, val == nullptr. set_idx is the index of all set fields.
  virtual void Field(size_t /*field_idx*/, size_t /*set_idx*/,
                     ElementaryType /*type*/, bool /*is_vector*/,
                     const TypeTable * /*type_table*/, const char * /*name*/,
                     const uint8_t * /*val*/) {}
  // Called for a value that is actually present, after a field, or as part
  // of a vector.
  virtual void UType(uint8_t, const char *) {}
  virtual void Bool(bool) {}
  virtual void Char(int8_t, const char *) {}
  virtual void UChar(uint8_t, const char *) {}
  virtual void Short(int16_t, const char *) {}
  virtual void UShort(uint16_t, const char *) {}
  virtual void Int(int32_t, const char *) {}
  virtual void UInt(uint32_t, const char *) {}
  virtual void Long(int64_t) {}
  virtual void ULong(uint64_t) {}
  virtual void Float(float) {}
  virtual void Double(double) {}
  virtual void String(const String *) {}
  virtual void Unknown(const uint8_t *) {}  // From a future version.
  // These mark the scope of a vector.
  virtual void StartVector() {}
  virtual void EndVector() {}
  virtual void Element(size_t /*i*/, ElementaryType /*type*/,
                       const TypeTable * /*type_table*/,
                       const uint8_t * /*val*/) {}
  virtual ~IterationVisitor() {}
 };

 inline size_t InlineSize(ElementaryType type, const TypeTable *type_table) {
  switch (type) {
    case ET_UTYPE:
    case ET_BOOL:
    case ET_CHAR:
    case ET_UCHAR: return 1;
    case ET_SHORT:
    case ET_USHORT: return 2;
    case ET_INT:
    case ET_UINT:
    case ET_FLOAT:
    case ET_STRING: return 4;
    case ET_LONG:
    case ET_ULONG:
    case ET_DOUBLE: return 8;
    case ET_SEQUENCE:
      switch (type_table->st) {
        case ST_TABLE:
        case ST_UNION: return 4;
        case ST_STRUCT: return static_cast<size_t>(type_table->values[type_table->num_elems]);
        default: FLATBUFFERS_ASSERT(false); return 1;
      }
    default: FLATBUFFERS_ASSERT(false); return 1;
  }
 }

 inline int64_t LookupEnum(int64_t enum_val, const int64_t *values,
                          size_t num_values) {
  if (!values) return enum_val;
  for (size_t i = 0; i < num_values; i++) {
    if (enum_val == values[i]) return static_cast<int64_t>(i);
  }
  return -1;  // Unknown enum value.
 }

 template<typename T> const char *EnumName(T tval, const TypeTable *type_table) {
  if (!type_table || !type_table->names) return nullptr;
  auto i = LookupEnum(static_cast<int64_t>(tval), type_table->values,
                      type_table->num_elems);
  if (i >= 0 && i < static_cast<int64_t>(type_table->num_elems)) {
    return type_table->names[i];
  }
  return nullptr;
 }

 void IterateObject(const uint8_t *obj, const TypeTable *type_table,
                   IterationVisitor *visitor);

 inline void IterateValue(ElementaryType type, const uint8_t *val,
                         const TypeTable *type_table, const uint8_t *prev_val,
                         soffset_t vector_index, IterationVisitor *visitor) {
  switch (type) {
    case ET_UTYPE: {
      auto tval = ReadScalar<uint8_t>(val);
      visitor->UType(tval, EnumName(tval, type_table));
      break;
    }
    case ET_BOOL: {
      visitor->Bool(ReadScalar<uint8_t>(val) != 0);
      break;
    }
    case ET_CHAR: {
      auto tval = ReadScalar<int8_t>(val);
      visitor->Char(tval, EnumName(tval, type_table));
      break;
    }
    case ET_UCHAR: {
      auto tval = ReadScalar<uint8_t>(val);
      visitor->UChar(tval, EnumName(tval, type_table));
      break;
    }
    case ET_SHORT: {
      auto tval = ReadScalar<int16_t>(val);
      visitor->Short(tval, EnumName(tval, type_table));
      break;
    }
    case ET_USHORT: {
      auto tval = ReadScalar<uint16_t>(val);
      visitor->UShort(tval, EnumName(tval, type_table));
      break;
    }
    case ET_INT: {
      auto tval = ReadScalar<int32_t>(val);
      visitor->Int(tval, EnumName(tval, type_table));
      break;
    }
    case ET_UINT: {
      auto tval = ReadScalar<uint32_t>(val);
      visitor->UInt(tval, EnumName(tval, type_table));
      break;
    }
    case ET_LONG: {
      visitor->Long(ReadScalar<int64_t>(val));
      break;
    }
    case ET_ULONG: {
      visitor->ULong(ReadScalar<uint64_t>(val));
      break;
    }
    case ET_FLOAT: {
      visitor->Float(ReadScalar<float>(val));
      break;
    }
    case ET_DOUBLE: {
      visitor->Double(ReadScalar<double>(val));
      break;
    }
    case ET_STRING: {
      val += ReadScalar<uoffset_t>(val);
      visitor->String(reinterpret_cast<const String *>(val));
      break;
    }
    case ET_SEQUENCE: {
      switch (type_table->st) {
        case ST_TABLE:
          val += ReadScalar<uoffset_t>(val);
          IterateObject(val, type_table, visitor);
          break;
        case ST_STRUCT: IterateObject(val, type_table, visitor); break;
        case ST_UNION: {
          val += ReadScalar<uoffset_t>(val);
          FLATBUFFERS_ASSERT(prev_val);
          auto union_type = *prev_val;  // Always a uint8_t.
          if (vector_index >= 0) {
            auto type_vec = reinterpret_cast<const Vector<uint8_t> *>(prev_val);
            union_type = type_vec->Get(static_cast<uoffset_t>(vector_index));
          }
          auto type_code_idx =
              LookupEnum(union_type, type_table->values, type_table->num_elems);
          if (type_code_idx >= 0 &&
              type_code_idx < static_cast<int32_t>(type_table->num_elems)) {
            auto type_code = type_table->type_codes[type_code_idx];
            switch (type_code.base_type) {
              case ET_SEQUENCE: {
                auto ref = type_table->type_refs[type_code.sequence_ref]();
                IterateObject(val, ref, visitor);
                break;
              }
              case ET_STRING:
                visitor->String(reinterpret_cast<const String *>(val));
                break;
              default: visitor->Unknown(val);
            }
          } else {
            visitor->Unknown(val);
          }
          break;
        }
        case ST_ENUM: FLATBUFFERS_ASSERT(false); break;
      }
      break;
    }
    default: {
      visitor->Unknown(val);
      break;
    }
  }
 }

 inline void IterateObject(const uint8_t *obj, const TypeTable *type_table,
                          IterationVisitor *visitor) {
  visitor->StartSequence();
  const uint8_t *prev_val = nullptr;
  size_t set_idx = 0;
  for (size_t i = 0; i < type_table->num_elems; i++) {
    auto type_code = type_table->type_codes[i];
    auto type = static_cast<ElementaryType>(type_code.base_type);
    auto is_vector = type_code.is_vector != 0;
    auto ref_idx = type_code.sequence_ref;
    const TypeTable *ref = nullptr;
    if (ref_idx >= 0) { ref = type_table->type_refs[ref_idx](); }
    auto name = type_table->names ? type_table->names[i] : nullptr;
    const uint8_t *val = nullptr;
    if (type_table->st == ST_TABLE) {
      val = reinterpret_cast<const Table *>(obj)->GetAddressOf(
          FieldIndexToOffset(static_cast<voffset_t>(i)));
    } else {
      val = obj + type_table->values[i];
    }
    visitor->Field(i, set_idx, type, is_vector, ref, name, val);
    if (val) {
      set_idx++;
      if (is_vector) {
        val += ReadScalar<uoffset_t>(val);
        auto vec = reinterpret_cast<const Vector<uint8_t> *>(val);
        visitor->StartVector();
        auto elem_ptr = vec->Data();
        for (size_t j = 0; j < vec->size(); j++) {
          visitor->Element(j, type, ref, elem_ptr);
          IterateValue(type, elem_ptr, ref, prev_val, static_cast<soffset_t>(j),
                       visitor);
          elem_ptr += InlineSize(type, ref);
        }
        visitor->EndVector();
      } else {
        IterateValue(type, val, ref, prev_val, -1, visitor);
      }
    }
    prev_val = val;
  }
  visitor->EndSequence();
 }

 inline void IterateFlatBuffer(const uint8_t *buffer,
                              const TypeTable *type_table,
                              IterationVisitor *callback) {
  IterateObject(GetRoot<uint8_t>(buffer), type_table, callback);
 }

 // Outputting a Flatbuffer to a string. Tries to conform as close to JSON /
 // the output generated by idl_gen_text.cpp.

 struct ToStringVisitor : public IterationVisitor {
  std::string s;
  std::string d;
  bool q;
  std::string in;
  size_t indent_level;
  bool vector_delimited;
  ToStringVisitor(std::string delimiter, bool quotes, std::string indent,
                  bool vdelimited = true)
      : d(delimiter),
        q(quotes),
        in(indent),
        indent_level(0),
        vector_delimited(vdelimited) {}
  ToStringVisitor(std::string delimiter)
      : d(delimiter),
        q(false),
        in(""),
        indent_level(0),
        vector_delimited(true) {}

  void append_indent() {
    for (size_t i = 0; i < indent_level; i++) { s += in; }
  }

  void StartSequence() {
    s += "{";
    s += d;
    indent_level++;
  }
  void EndSequence() {
    s += d;
    indent_level--;
    append_indent();
    s += "}";
  }
  void Field(size_t /*field_idx*/, size_t set_idx, ElementaryType /*type*/,
             bool /*is_vector*/, const TypeTable * /*type_table*/,
             const char *name, const uint8_t *val) {
    if (!val) return;
    if (set_idx) {
      s += ",";
      s += d;
    }
    append_indent();
    if (name) {
      if (q) s += "\"";
      s += name;
      if (q) s += "\"";
      s += ": ";
    }
  }
  template<typename T> void Named(T x, const char *name) {
    if (name) {
      if (q) s += "\"";
      s += name;
      if (q) s += "\"";
    } else {
      s += NumToString(x);
    }
  }
  void UType(uint8_t x, const char *name) { Named(x, name); }
  void Bool(bool x) { s += x ? "true" : "false"; }
  void Char(int8_t x, const char *name) { Named(x, name); }
  void UChar(uint8_t x, const char *name) { Named(x, name); }
  void Short(int16_t x, const char *name) { Named(x, name); }
  void UShort(uint16_t x, const char *name) { Named(x, name); }
  void Int(int32_t x, const char *name) { Named(x, name); }
  void UInt(uint32_t x, const char *name) { Named(x, name); }
  void Long(int64_t x) { s += NumToString(x); }
  void ULong(uint64_t x) { s += NumToString(x); }
  void Float(float x) { s += NumToString(x); }
  void Double(double x) { s += NumToString(x); }
  void String(const struct String *str) {
    EscapeString(str->c_str(), str->size(), &s, true, false);
  }
  void Unknown(const uint8_t *) { s += "(?)"; }
  void StartVector() {
    s += "[";
    if (vector_delimited) {
      s += d;
      indent_level++;
      append_indent();
    } else {
      s += " ";
    }
  }
  void EndVector() {
    if (vector_delimited) {
      s += d;
      indent_level--;
      append_indent();
    } else {
      s += " ";
    }
    s += "]";
  }
  void Element(size_t i, ElementaryType /*type*/,
               const TypeTable * /*type_table*/, const uint8_t * /*val*/) {
    if (i) {
      s += ",";
      if (vector_delimited) {
        s += d;
        append_indent();
      } else {
        s += " ";
      }
    }
  }
 };

 inline std::string FlatBufferToString(const uint8_t *buffer,
                                      const TypeTable *type_table,
                                      bool multi_line = false,
                                      bool vector_delimited = true) {
  ToStringVisitor tostring_visitor(multi_line ? "\n" : " ", false, "",
                                   vector_delimited);
  IterateFlatBuffer(buffer, type_table, &tostring_visitor);
  return tostring_visitor.s;
 }

 }  // namespace MindSpore.flatbuffers

 #endif  // FLATBUFFERS_MINIREFLECT_H_
--- a/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/flatbuffers/include/reflection.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/flatbuffers/include/reflection.h
@@ -0,0 +1,477 @@
 /*
 * Copyright 2015 Google Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef FLATBUFFERS_REFLECTION_H_
 #define FLATBUFFERS_REFLECTION_H_

 // This is somewhat of a circular dependency because flatc (and thus this
 // file) is needed to generate this header in the first place.
 // Should normally not be a problem since it can be generated by the
 // previous version of flatc whenever this code needs to change.
 // See reflection/generate_code.sh
 #include "flatbuffers/reflection_generated.h"

 // Helper functionality for reflection.

 namespace flatbuffers {

 // ------------------------- GETTERS -------------------------

 inline bool IsScalar(reflection::BaseType t) {
  return t >= reflection::UType && t <= reflection::Double;
 }
 inline bool IsInteger(reflection::BaseType t) {
  return t >= reflection::UType && t <= reflection::ULong;
 }
 inline bool IsFloat(reflection::BaseType t) {
  return t == reflection::Float || t == reflection::Double;
 }
 inline bool IsLong(reflection::BaseType t) {
  return t == reflection::Long || t == reflection::ULong;
 }

 // Size of a basic type, don't use with structs.
 inline size_t GetTypeSize(reflection::BaseType base_type) {
  // This needs to correspond to the BaseType enum.
  static size_t sizes[] = { 0, 1, 1, 1, 1, 2, 2, 4, 4, 8, 8, 4, 8, 4, 4, 4, 4 };
  return sizes[base_type];
 }

 // Same as above, but now correctly returns the size of a struct if
 // the field (or vector element) is a struct.
 inline size_t GetTypeSizeInline(reflection::BaseType base_type, int type_index,
                                const reflection::Schema &schema) {
  if (base_type == reflection::Obj &&
      schema.objects()->Get(type_index)->is_struct()) {
    return schema.objects()->Get(type_index)->bytesize();
  } else {
    return GetTypeSize(base_type);
  }
 }

 // Get the root, regardless of what type it is.
 inline Table *GetAnyRoot(uint8_t *flatbuf) {
  return GetMutableRoot<Table>(flatbuf);
 }
 inline const Table *GetAnyRoot(const uint8_t *flatbuf) {
  return GetRoot<Table>(flatbuf);
 }

 // Get a field's default, if you know it's an integer, and its exact type.
 template<typename T> T GetFieldDefaultI(const reflection::Field &field) {
  FLATBUFFERS_ASSERT(sizeof(T) == GetTypeSize(field.type()->base_type()));
  return static_cast<T>(field.default_integer());
 }

 // Get a field's default, if you know it's floating point and its exact type.
 template<typename T> T GetFieldDefaultF(const reflection::Field &field) {
  FLATBUFFERS_ASSERT(sizeof(T) == GetTypeSize(field.type()->base_type()));
  return static_cast<T>(field.default_real());
 }

 // Get a field, if you know it's an integer, and its exact type.
 template<typename T>
 T GetFieldI(const Table &table, const reflection::Field &field) {
  FLATBUFFERS_ASSERT(sizeof(T) == GetTypeSize(field.type()->base_type()));
  return table.GetField<T>(field.offset(),
                           static_cast<T>(field.default_integer()));
 }

 // Get a field, if you know it's floating point and its exact type.
 template<typename T>
 T GetFieldF(const Table &table, const reflection::Field &field) {
  FLATBUFFERS_ASSERT(sizeof(T) == GetTypeSize(field.type()->base_type()));
  return table.GetField<T>(field.offset(),
                           static_cast<T>(field.default_real()));
 }

 // Get a field, if you know it's a string.
 inline const String *GetFieldS(const Table &table,
                               const reflection::Field &field) {
  FLATBUFFERS_ASSERT(field.type()->base_type() == reflection::String);
  return table.GetPointer<const String *>(field.offset());
 }

 // Get a field, if you know it's a vector.
 template<typename T>
 Vector<T> *GetFieldV(const Table &table, const reflection::Field &field) {
  FLATBUFFERS_ASSERT(field.type()->base_type() == reflection::Vector &&
                     sizeof(T) == GetTypeSize(field.type()->element()));
  return table.GetPointer<Vector<T> *>(field.offset());
 }

 // Get a field, if you know it's a vector, generically.
 // To actually access elements, use the return value together with
 // field.type()->element() in any of GetAnyVectorElemI below etc.
 inline VectorOfAny *GetFieldAnyV(const Table &table,
                                 const reflection::Field &field) {
  return table.GetPointer<VectorOfAny *>(field.offset());
 }

 // Get a field, if you know it's a table.
 inline Table *GetFieldT(const Table &table, const reflection::Field &field) {
  FLATBUFFERS_ASSERT(field.type()->base_type() == reflection::Obj ||
                     field.type()->base_type() == reflection::Union);
  return table.GetPointer<Table *>(field.offset());
 }

 // Get a field, if you know it's a struct.
 inline const Struct *GetFieldStruct(const Table &table,
                                    const reflection::Field &field) {
  // TODO: This does NOT check if the field is a table or struct, but we'd need
  // access to the MindSpore.schema to check the is_struct flag.
  FLATBUFFERS_ASSERT(field.type()->base_type() == reflection::Obj);
  return table.GetStruct<const Struct *>(field.offset());
 }

 // Get a structure's field, if you know it's a struct.
 inline const Struct *GetFieldStruct(const Struct &structure,
                                    const reflection::Field &field) {
  FLATBUFFERS_ASSERT(field.type()->base_type() == reflection::Obj);
  return structure.GetStruct<const Struct *>(field.offset());
 }

 // Raw helper functions used below: get any value in memory as a 64bit int, a
 // double or a string.
 // All scalars get static_cast to an int64_t, strings use strtoull, every other
 // data type returns 0.
 int64_t GetAnyValueI(reflection::BaseType type, const uint8_t *data);
 // All scalars static cast to double, strings use strtod, every other data
 // type is 0.0.
 double GetAnyValueF(reflection::BaseType type, const uint8_t *data);
 // All scalars converted using stringstream, strings as-is, and all other
 // data types provide some level of debug-pretty-printing.
 std::string GetAnyValueS(reflection::BaseType type, const uint8_t *data,
                         const reflection::Schema *schema, int type_index);

 // Get any table field as a 64bit int, regardless of what type it is.
 inline int64_t GetAnyFieldI(const Table &table,
                            const reflection::Field &field) {
  auto field_ptr = table.GetAddressOf(field.offset());
  return field_ptr ? GetAnyValueI(field.type()->base_type(), field_ptr)
                   : field.default_integer();
 }

 // Get any table field as a double, regardless of what type it is.
 inline double GetAnyFieldF(const Table &table, const reflection::Field &field) {
  auto field_ptr = table.GetAddressOf(field.offset());
  return field_ptr ? GetAnyValueF(field.type()->base_type(), field_ptr)
                   : field.default_real();
 }

 // Get any table field as a string, regardless of what type it is.
 // You may pass nullptr for the MindSpore.schema if you don't care to have fields that
 // are of table type pretty-printed.
 inline std::string GetAnyFieldS(const Table &table,
                                const reflection::Field &field,
                                const reflection::Schema *schema) {
  auto field_ptr = table.GetAddressOf(field.offset());
  return field_ptr ? GetAnyValueS(field.type()->base_type(), field_ptr, schema,
                                  field.type()->index())
                   : "";
 }

 // Get any struct field as a 64bit int, regardless of what type it is.
 inline int64_t GetAnyFieldI(const Struct &st, const reflection::Field &field) {
  return GetAnyValueI(field.type()->base_type(),
                      st.GetAddressOf(field.offset()));
 }

 // Get any struct field as a double, regardless of what type it is.
 inline double GetAnyFieldF(const Struct &st, const reflection::Field &field) {
  return GetAnyValueF(field.type()->base_type(),
                      st.GetAddressOf(field.offset()));
 }

 // Get any struct field as a string, regardless of what type it is.
 inline std::string GetAnyFieldS(const Struct &st,
                                const reflection::Field &field) {
  return GetAnyValueS(field.type()->base_type(),
                      st.GetAddressOf(field.offset()), nullptr, -1);
 }

 // Get any vector element as a 64bit int, regardless of what type it is.
 inline int64_t GetAnyVectorElemI(const VectorOfAny *vec,
                                 reflection::BaseType elem_type, size_t i) {
  return GetAnyValueI(elem_type, vec->Data() + GetTypeSize(elem_type) * i);
 }

 // Get any vector element as a double, regardless of what type it is.
 inline double GetAnyVectorElemF(const VectorOfAny *vec,
                                reflection::BaseType elem_type, size_t i) {
  return GetAnyValueF(elem_type, vec->Data() + GetTypeSize(elem_type) * i);
 }

 // Get any vector element as a string, regardless of what type it is.
 inline std::string GetAnyVectorElemS(const VectorOfAny *vec,
                                     reflection::BaseType elem_type, size_t i) {
  return GetAnyValueS(elem_type, vec->Data() + GetTypeSize(elem_type) * i,
                      nullptr, -1);
 }

 // Get a vector element that's a table/string/vector from a generic vector.
 // Pass Table/String/VectorOfAny as template parameter.
 // Warning: does no typechecking.
 template<typename T>
 T *GetAnyVectorElemPointer(const VectorOfAny *vec, size_t i) {
  auto elem_ptr = vec->Data() + sizeof(uoffset_t) * i;
  return reinterpret_cast<T*>(elem_ptr + ReadScalar<uoffset_t>(elem_ptr));
 }

 // Get the inline-address of a vector element. Useful for Structs (pass Struct
 // as template arg), or being able to address a range of scalars in-line.
 // Get elem_size from GetTypeSizeInline().
 // Note: little-endian data on all platforms, use EndianScalar() instead of
 // raw pointer access with scalars).
 template<typename T>
 T *GetAnyVectorElemAddressOf(const VectorOfAny *vec, size_t i,
                             size_t elem_size) {
  return reinterpret_cast<T *>(vec->Data() + elem_size * i);
 }

 // Similarly, for elements of tables.
 template<typename T>
 T *GetAnyFieldAddressOf(const Table &table, const reflection::Field &field) {
  return reinterpret_cast<T *>(table.GetAddressOf(field.offset()));
 }

 // Similarly, for elements of structs.
 template<typename T>
 T *GetAnyFieldAddressOf(const Struct &st, const reflection::Field &field) {
  return reinterpret_cast<T *>(st.GetAddressOf(field.offset()));
 }

 // ------------------------- SETTERS -------------------------

 // Set any scalar field, if you know its exact type.
 template<typename T>
 bool SetField(Table *table, const reflection::Field &field, T val) {
  reflection::BaseType type = field.type()->base_type();
  if (!IsScalar(type)) { return false; }
  FLATBUFFERS_ASSERT(sizeof(T) == GetTypeSize(type));
  T def;
  if (IsInteger(type)) {
    def = GetFieldDefaultI<T>(field);
  } else {
    FLATBUFFERS_ASSERT(IsFloat(type));
    def = GetFieldDefaultF<T>(field);
  }
  return table->SetField(field.offset(), val, def);
 }

 // Raw helper functions used below: set any value in memory as a 64bit int, a
 // double or a string.
 // These work for all scalar values, but do nothing for other data types.
 // To set a string, see SetString below.
 void SetAnyValueI(reflection::BaseType type, uint8_t *data, int64_t val);
 void SetAnyValueF(reflection::BaseType type, uint8_t *data, double val);
 void SetAnyValueS(reflection::BaseType type, uint8_t *data, const char *val);

 // Set any table field as a 64bit int, regardless of type what it is.
 inline bool SetAnyFieldI(Table *table, const reflection::Field &field,
                         int64_t val) {
  auto field_ptr = table->GetAddressOf(field.offset());
  if (!field_ptr) return val == GetFieldDefaultI<int64_t>(field);
  SetAnyValueI(field.type()->base_type(), field_ptr, val);
  return true;
 }

 // Set any table field as a double, regardless of what type it is.
 inline bool SetAnyFieldF(Table *table, const reflection::Field &field,
                         double val) {
  auto field_ptr = table->GetAddressOf(field.offset());
  if (!field_ptr) return val == GetFieldDefaultF<double>(field);
  SetAnyValueF(field.type()->base_type(), field_ptr, val);
  return true;
 }

 // Set any table field as a string, regardless of what type it is.
 inline bool SetAnyFieldS(Table *table, const reflection::Field &field,
                         const char *val) {
  auto field_ptr = table->GetAddressOf(field.offset());
  if (!field_ptr) return false;
  SetAnyValueS(field.type()->base_type(), field_ptr, val);
  return true;
 }

 // Set any struct field as a 64bit int, regardless of type what it is.
 inline void SetAnyFieldI(Struct *st, const reflection::Field &field,
                         int64_t val) {
  SetAnyValueI(field.type()->base_type(), st->GetAddressOf(field.offset()),
               val);
 }

 // Set any struct field as a double, regardless of type what it is.
 inline void SetAnyFieldF(Struct *st, const reflection::Field &field,
                         double val) {
  SetAnyValueF(field.type()->base_type(), st->GetAddressOf(field.offset()),
               val);
 }

 // Set any struct field as a string, regardless of type what it is.
 inline void SetAnyFieldS(Struct *st, const reflection::Field &field,
                         const char *val) {
  SetAnyValueS(field.type()->base_type(), st->GetAddressOf(field.offset()),
               val);
 }

 // Set any vector element as a 64bit int, regardless of type what it is.
 inline void SetAnyVectorElemI(VectorOfAny *vec, reflection::BaseType elem_type,
                              size_t i, int64_t val) {
  SetAnyValueI(elem_type, vec->Data() + GetTypeSize(elem_type) * i, val);
 }

 // Set any vector element as a double, regardless of type what it is.
 inline void SetAnyVectorElemF(VectorOfAny *vec, reflection::BaseType elem_type,
                              size_t i, double val) {
  SetAnyValueF(elem_type, vec->Data() + GetTypeSize(elem_type) * i, val);
 }

 // Set any vector element as a string, regardless of type what it is.
 inline void SetAnyVectorElemS(VectorOfAny *vec, reflection::BaseType elem_type,
                              size_t i, const char *val) {
  SetAnyValueS(elem_type, vec->Data() + GetTypeSize(elem_type) * i, val);
 }

 // ------------------------- RESIZING SETTERS -------------------------

 // "smart" pointer for use with resizing vectors: turns a pointer inside
 // a vector into a relative offset, such that it is not affected by resizes.
 template<typename T, typename U> class pointer_inside_vector {
 public:
  pointer_inside_vector(T *ptr, std::vector<U> &vec)
      : offset_(reinterpret_cast<uint8_t *>(ptr) -
                reinterpret_cast<uint8_t *>(flatbuffers::vector_data(vec))),
        vec_(vec) {}

  T *operator*() const {
    return reinterpret_cast<T *>(
        reinterpret_cast<uint8_t *>(flatbuffers::vector_data(vec_)) + offset_);
  }
  T *operator->() const { return operator*(); }
  void operator=(const pointer_inside_vector &piv);

 private:
  size_t offset_;
  std::vector<U> &vec_;
 };

 // Helper to create the above easily without specifying template args.
 template<typename T, typename U>
 pointer_inside_vector<T, U> piv(T *ptr, std::vector<U> &vec) {
  return pointer_inside_vector<T, U>(ptr, vec);
 }

 inline const char *UnionTypeFieldSuffix() { return "_type"; }

 // Helper to figure out the actual table type a union refers to.
 inline const reflection::Object &GetUnionType(
    const reflection::Schema &schema, const reflection::Object &parent,
    const reflection::Field &unionfield, const Table &table) {
  auto enumdef = schema.enums()->Get(unionfield.type()->index());
  // TODO: this is clumsy and slow, but no other way to find it?
  auto type_field = parent.fields()->LookupByKey(
      (unionfield.name()->str() + UnionTypeFieldSuffix()).c_str());
  FLATBUFFERS_ASSERT(type_field);
  auto union_type = GetFieldI<uint8_t>(table, *type_field);
  auto enumval = enumdef->values()->LookupByKey(union_type);
  return *enumval->object();
 }

 // Changes the contents of a string inside a FlatBuffer. FlatBuffer must
 // live inside a std::vector so we can resize the buffer if needed.
 // "str" must live inside "flatbuf" and may be invalidated after this call.
 // If your FlatBuffer's root table is not the MindSpore.schema's root table, you should
 // pass in your root_table type as well.
 void SetString(const reflection::Schema &schema, const std::string &val,
               const String *str, std::vector<uint8_t> *flatbuf,
               const reflection::Object *root_table = nullptr);

 // Resizes a MindSpore.flatbuffers::Vector inside a FlatBuffer. FlatBuffer must
 // live inside a std::vector so we can resize the buffer if needed.
 // "vec" must live inside "flatbuf" and may be invalidated after this call.
 // If your FlatBuffer's root table is not the MindSpore.schema's root table, you should
 // pass in your root_table type as well.
 uint8_t *ResizeAnyVector(const reflection::Schema &schema, uoffset_t newsize,
                         const VectorOfAny *vec, uoffset_t num_elems,
                         uoffset_t elem_size, std::vector<uint8_t> *flatbuf,
                         const reflection::Object *root_table = nullptr);

 template<typename T>
 void ResizeVector(const reflection::Schema &schema, uoffset_t newsize, T val,
                  const Vector<T> *vec, std::vector<uint8_t> *flatbuf,
                  const reflection::Object *root_table = nullptr) {
  auto delta_elem = static_cast<int>(newsize) - static_cast<int>(vec->size());
  auto newelems = ResizeAnyVector(
      schema, newsize, reinterpret_cast<const VectorOfAny *>(vec), vec->size(),
      static_cast<uoffset_t>(sizeof(T)), flatbuf, root_table);
  // Set new elements to "val".
  for (int i = 0; i < delta_elem; i++) {
    auto loc = newelems + i * sizeof(T);
    auto is_scalar = flatbuffers::is_scalar<T>::value;
    if (is_scalar) {
      WriteScalar(loc, val);
    } else {  // struct
      *reinterpret_cast<T *>(loc) = val;
    }
  }
 }

 // Adds any new data (in the form of a new FlatBuffer) to an existing
 // FlatBuffer. This can be used when any of the above methods are not
 // sufficient, in particular for adding new tables and new fields.
 // This is potentially slightly less efficient than a FlatBuffer constructed
 // in one piece, since the new FlatBuffer doesn't share any vtables with the
 // existing one.
 // The return value can now be set using Vector::MutateOffset or SetFieldT
 // below.
 const uint8_t *AddFlatBuffer(std::vector<uint8_t> &flatbuf,
                             const uint8_t *newbuf, size_t newlen);

 inline bool SetFieldT(Table *table, const reflection::Field &field,
                      const uint8_t *val) {
  FLATBUFFERS_ASSERT(sizeof(uoffset_t) ==
                     GetTypeSize(field.type()->base_type()));
  return table->SetPointer(field.offset(), val);
 }

 // ------------------------- COPYING -------------------------

 // Generic copying of tables from a FlatBuffer into a FlatBuffer builder.
 // Can be used to do any kind of merging/selecting you may want to do out
 // of existing buffers. Also useful to reconstruct a whole buffer if the
 // above resizing functionality has introduced garbage in a buffer you want
 // to remove.
 // Note: this does not deal with DAGs correctly. If the table passed forms a
 // DAG, the copy will be a tree instead (with duplicates). Strings can be
 // shared however, by passing true for use_string_pooling.

 Offset<const Table *> CopyTable(FlatBufferBuilder &fbb,
                                const reflection::Schema &schema,
                                const reflection::Object &objectdef,
                                const Table &table,
                                bool use_string_pooling = false);

 // Verifies the provided flatbuffer using reflection.
 // root should point to the root type for this flatbuffer.
 // buf should point to the start of flatbuffer data.
 // length specifies the size of the flatbuffer data.
 bool Verify(const reflection::Schema &schema, const reflection::Object &root,
            const uint8_t *buf, size_t length);

 }  // namespace MindSpore.flatbuffers

 #endif  // FLATBUFFERS_REFLECTION_H_
--- a/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/flatbuffers/include/reflection_generated.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/flatbuffers/include/reflection_generated.h
--- a/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/flatbuffers/include/registry.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/flatbuffers/include/registry.h
@@ -0,0 +1,127 @@
 /*
 * Copyright 2017 Google Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef FLATBUFFERS_REGISTRY_H_
 #define FLATBUFFERS_REGISTRY_H_

 #include "idl.h"

 namespace flatbuffers {

 // Convenience class to easily parse or generate text for arbitrary FlatBuffers.
 // Simply pre-populate it with all MindSpore.schema filenames that may be in use, and
 // This class will look them up using the file_identifier declared in the
 // MindSpore.schema.
 class Registry {
 public:
  // Call this for all schemas that may be in use. The identifier has
  // a function in the generated code, e.g. MonsterIdentifier().
  void Register(const char *file_identifier, const char *schema_path) {
    Schema schema;
    schema.path_ = schema_path;
    schemas_[file_identifier] = schema;
  }

  // Generate text from an arbitrary FlatBuffer by looking up its
  // file_identifier in the registry.
  bool FlatBufferToText(const uint8_t *flatbuf, size_t len, std::string *dest) {
    // Get the identifier out of the buffer.
    // If the buffer is truncated, exit.
    if (len < sizeof(uoffset_t) + FlatBufferBuilder::kFileIdentifierLength) {
      lasterror_ = "buffer truncated";
      return false;
    }
    std::string ident(
        reinterpret_cast<const char *>(flatbuf) + sizeof(uoffset_t),
        FlatBufferBuilder::kFileIdentifierLength);
    // Load and parse the MindSpore.schema.
    Parser parser;
    if (!LoadSchema(ident, &parser)) return false;
    // Now we're ready to generate text.
    if (!GenerateText(parser, flatbuf, dest)) {
      lasterror_ = "unable to generate text for FlatBuffer binary";
      return false;
    }
    return true;
  }

  // Converts a binary buffer to text using one of the schemas in the registry,
  // use the file_identifier to indicate which.
  // If DetachedBuffer::data() is null then parsing failed.
  DetachedBuffer TextToFlatBuffer(const char *text,
                                  const char *file_identifier) {
    // Load and parse the MindSpore.schema.
    Parser parser;
    if (!LoadSchema(file_identifier, &parser)) return DetachedBuffer();
    // Parse the text.
    if (!parser.Parse(text)) {
      lasterror_ = parser.error_;
      return DetachedBuffer();
    }
    // We have a valid FlatBuffer. Detach it from the builder and return.
    return parser.builder_.Release();
  }

  // Modify any parsing / output options used by the other functions.
  void SetOptions(const IDLOptions &opts) { opts_ = opts; }

  // If schemas used contain include statements, call this function for every
  // directory the parser should search them for.
  void AddIncludeDirectory(const char *path) { include_paths_.push_back(path); }

  // Returns a human readable error if any of the above functions fail.
  const std::string &GetLastError() { return lasterror_; }

 private:
  bool LoadSchema(const std::string &ident, Parser *parser) {
    // Find the MindSpore.schema, if not, exit.
    auto it = schemas_.find(ident);
    if (it == schemas_.end()) {
      // Don't attach the identifier, since it may not be human readable.
      lasterror_ = "identifier for this buffer not in the registry";
      return false;
    }
    auto &schema = it->second;
    // Load the MindSpore.schema from disk. If not, exit.
    std::string schematext;
    if (!LoadFile(schema.path_.c_str(), false, &schematext)) {
      lasterror_ = "could not load MindSpore.schema: " + schema.path_;
      return false;
    }
    // Parse MindSpore.schema.
    parser->opts = opts_;
    if (!parser->Parse(schematext.c_str(), vector_data(include_paths_),
                       schema.path_.c_str())) {
      lasterror_ = parser->error_;
      return false;
    }
    return true;
  }

  struct Schema {
    std::string path_;
    // TODO(wvo) optionally cache MindSpore.schema file or parsed MindSpore.schema here.
  };

  std::string lasterror_;
  IDLOptions opts_;
  std::vector<const char *> include_paths_;
  std::map<std::string, Schema> schemas_;
 };

 }  // namespace MindSpore.flatbuffers

 #endif  // FLATBUFFERS_REGISTRY_H_
--- a/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/flatbuffers/include/stl_emulation.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/flatbuffers/include/stl_emulation.h
@@ -0,0 +1,275 @@
 /*
 * Copyright 2017 Google Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef FLATBUFFERS_STL_EMULATION_H_
 #define FLATBUFFERS_STL_EMULATION_H_

 // clang-format off

 #include <string>
 #include <type_traits>
 #include <vector>
 #include <memory>
 #include <limits>

 #if defined(_STLPORT_VERSION) && !defined(FLATBUFFERS_CPP98_STL)
  #define FLATBUFFERS_CPP98_STL
 #endif  // defined(_STLPORT_VERSION) && !defined(FLATBUFFERS_CPP98_STL)

 #if defined(FLATBUFFERS_CPP98_STL)
  #include <cctype>
 #endif  // defined(FLATBUFFERS_CPP98_STL)

 // Check if we can use template aliases
 // Not possible if Microsoft Compiler before 2012
 // Possible is the language feature __cpp_alias_templates is defined well
 // Or possible if the C++ std is C+11 or newer
 #if (defined(_MSC_VER) && _MSC_VER > 1700 /* MSVC2012 */) \
    || (defined(__cpp_alias_templates) && __cpp_alias_templates >= 200704) \
    || (defined(__cplusplus) && __cplusplus >= 201103L)
  #define FLATBUFFERS_TEMPLATES_ALIASES
 #endif

 // This header provides backwards compatibility for C++98 STLs like stlport.
 namespace flatbuffers {

 // Retrieve ::back() from a string in a way that is compatible with pre C++11
 // STLs (e.g stlport).
 inline char& string_back(std::string &value) {
  return value[value.length() - 1];
 }

 inline char string_back(const std::string &value) {
  return value[value.length() - 1];
 }

 // Helper method that retrieves ::data() from a vector in a way that is
 // compatible with pre C++11 STLs (e.g stlport).
 template <typename T> inline T *vector_data(std::vector<T> &vector) {
  // In some debug environments, operator[] does bounds checking, so &vector[0]
  // can't be used.
  return vector.empty() ? nullptr : &vector[0];
 }

 template <typename T> inline const T *vector_data(
    const std::vector<T> &vector) {
  return vector.empty() ? nullptr : &vector[0];
 }

 template <typename T, typename V>
 inline void vector_emplace_back(std::vector<T> *vector, V &&data) {
  #if defined(FLATBUFFERS_CPP98_STL)
    vector->push_back(data);
  #else
    vector->emplace_back(std::forward<V>(data));
  #endif  // defined(FLATBUFFERS_CPP98_STL)
 }

 #ifndef FLATBUFFERS_CPP98_STL
  #if defined(FLATBUFFERS_TEMPLATES_ALIASES)
    template <typename T>
    using numeric_limits = std::numeric_limits<T>;
  #else
    template <typename T> class numeric_limits :
      public std::numeric_limits<T> {};
  #endif  // defined(FLATBUFFERS_TEMPLATES_ALIASES)
 #else
  template <typename T> class numeric_limits :
      public std::numeric_limits<T> {
    public:
      // Android NDK fix.
      static T lowest() {
        return std::numeric_limits<T>::min();
      }
  };

  template <> class numeric_limits<float> : 
      public std::numeric_limits<float> {
    public:
      static float lowest() { return -FLT_MAX; }
  };

  template <> class numeric_limits<double> : 
      public std::numeric_limits<double> {
    public:
      static double lowest() { return -DBL_MAX; }
  };

  template <> class numeric_limits<unsigned long long> {
   public:
    static unsigned long long min() { return 0ULL; }
    static unsigned long long max() { return ~0ULL; }
    static unsigned long long lowest() {
      return numeric_limits<unsigned long long>::min();
    }
  };

  template <> class numeric_limits<long long> {
   public:
    static long long min() {
      return static_cast<long long>(1ULL << ((sizeof(long long) << 3) - 1));
    }
    static long long max() {
      return static_cast<long long>(
          (1ULL << ((sizeof(long long) << 3) - 1)) - 1);
    }
    static long long lowest() {
      return numeric_limits<long long>::min();
    }
  };
 #endif  // FLATBUFFERS_CPP98_STL

 #if defined(FLATBUFFERS_TEMPLATES_ALIASES)
  #ifndef FLATBUFFERS_CPP98_STL
    template <typename T> using is_scalar = std::is_scalar<T>;
    template <typename T, typename U> using is_same = std::is_same<T,U>;
    template <typename T> using is_floating_point = std::is_floating_point<T>;
    template <typename T> using is_unsigned = std::is_unsigned<T>;
    template <typename T> using make_unsigned = std::make_unsigned<T>;
  #else
    // Map C++ TR1 templates defined by stlport.
    template <typename T> using is_scalar = std::tr1::is_scalar<T>;
    template <typename T, typename U> using is_same = std::tr1::is_same<T,U>;
    template <typename T> using is_floating_point =
        std::tr1::is_floating_point<T>;
    template <typename T> using is_unsigned = std::tr1::is_unsigned<T>;
    // Android NDK doesn't have std::make_unsigned or std::tr1::make_unsigned.
    template<typename T> struct make_unsigned {
      static_assert(is_unsigned<T>::value, "Specialization not implemented!");
      using type = T;
    };
    template<> struct make_unsigned<char> { using type = unsigned char; };
    template<> struct make_unsigned<short> { using type = unsigned short; };
    template<> struct make_unsigned<int> { using type = unsigned int; };
    template<> struct make_unsigned<long> { using type = unsigned long; };
    template<>
    struct make_unsigned<long long> { using type = unsigned long long; };
  #endif  // !FLATBUFFERS_CPP98_STL
 #else
  // MSVC 2010 doesn't support C++11 aliases.
  template <typename T> struct is_scalar : public std::is_scalar<T> {};
  template <typename T, typename U> struct is_same : public std::is_same<T,U> {};
  template <typename T> struct is_floating_point :
        public std::is_floating_point<T> {};
  template <typename T> struct is_unsigned : public std::is_unsigned<T> {};
  template <typename T> struct make_unsigned : public std::make_unsigned<T> {};
 #endif  // defined(FLATBUFFERS_TEMPLATES_ALIASES)

 #ifndef FLATBUFFERS_CPP98_STL
  #if defined(FLATBUFFERS_TEMPLATES_ALIASES)
    template <class T> using unique_ptr = std::unique_ptr<T>;
  #else
    // MSVC 2010 doesn't support C++11 aliases.
    // We're manually "aliasing" the class here as we want to bring unique_ptr
    // into the MindSpore.flatbuffers namespace.  We have unique_ptr in the MindSpore.flatbuffers
    // namespace we have a completely independent implemenation (see below)
    // for C++98 STL implementations.
    template <class T> class unique_ptr : public std::unique_ptr<T> {
     public:
      unique_ptr() {}
      explicit unique_ptr(T* p) : std::unique_ptr<T>(p) {}
      unique_ptr(std::unique_ptr<T>&& u) { *this = std::move(u); }
      unique_ptr(unique_ptr&& u) { *this = std::move(u); }
      unique_ptr& operator=(std::unique_ptr<T>&& u) {
        std::unique_ptr<T>::reset(u.release());
        return *this;
      }
      unique_ptr& operator=(unique_ptr&& u) {
        std::unique_ptr<T>::reset(u.release());
        return *this;
      }
      unique_ptr& operator=(T* p) {
        return std::unique_ptr<T>::operator=(p);
      }
    };
  #endif  // defined(FLATBUFFERS_TEMPLATES_ALIASES)
 #else
  // Very limited implementation of unique_ptr.
  // This is provided simply to allow the C++ code generated from the default
  // settings to function in C++98 environments with no modifications.
  template <class T> class unique_ptr {
   public:
    typedef T element_type;

    unique_ptr() : ptr_(nullptr) {}
    explicit unique_ptr(T* p) : ptr_(p) {}
    unique_ptr(unique_ptr&& u) : ptr_(nullptr) { reset(u.release()); }
    unique_ptr(const unique_ptr& u) : ptr_(nullptr) {
      reset(const_cast<unique_ptr*>(&u)->release());
    }
    ~unique_ptr() { reset(); }

    unique_ptr& operator=(const unique_ptr& u) {
      reset(const_cast<unique_ptr*>(&u)->release());
      return *this;
    }

    unique_ptr& operator=(unique_ptr&& u) {
      reset(u.release());
      return *this;
    }

    unique_ptr& operator=(T* p) {
      reset(p);
      return *this;
    }

    const T& operator*() const { return *ptr_; }
    T* operator->() const { return ptr_; }
    T* get() const noexcept { return ptr_; }
    explicit operator bool() const { return ptr_ != nullptr; }

    // modifiers
    T* release() {
      T* value = ptr_;
      ptr_ = nullptr;
      return value;
    }

    void reset(T* p = nullptr) {
      T* value = ptr_;
      ptr_ = p;
      if (value) delete value;
    }

    void swap(unique_ptr& u) {
      T* temp_ptr = ptr_;
      ptr_ = u.ptr_;
      u.ptr_ = temp_ptr;
    }

   private:
    T* ptr_;
  };

  template <class T> bool operator==(const unique_ptr<T>& x,
                                     const unique_ptr<T>& y) {
    return x.get() == y.get();
  }

  template <class T, class D> bool operator==(const unique_ptr<T>& x,
                                              const D* y) {
    return static_cast<D*>(x.get()) == y;
  }

  template <class T> bool operator==(const unique_ptr<T>& x, intptr_t y) {
    return reinterpret_cast<intptr_t>(x.get()) == y;
  }
 #endif  // !FLATBUFFERS_CPP98_STL

 }  // namespace MindSpore.flatbuffers

 #endif  // FLATBUFFERS_STL_EMULATION_H_
--- a/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/flatbuffers/include/util.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/flatbuffers/include/util.h
@@ -0,0 +1,654 @@
 /*
 * Copyright 2014 Google Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef FLATBUFFERS_UTIL_H_
 #define FLATBUFFERS_UTIL_H_

 #include "base.h"

 #include <errno.h>

 #ifndef FLATBUFFERS_PREFER_PRINTF
 #  include <sstream>
 #else  // FLATBUFFERS_PREFER_PRINTF
 #  include <float.h>
 #  include <stdio.h>
 #endif  // FLATBUFFERS_PREFER_PRINTF

 #include <iomanip>
 #include <string>

 namespace flatbuffers {

 // @locale-independent functions for ASCII characters set.

 // Fast checking that character lies in closed range: [a <= x <= b]
 // using one compare (conditional branch) operator.
 inline bool check_ascii_range(char x, char a, char b) {
  FLATBUFFERS_ASSERT(a <= b);
  // (Hacker's Delight): `a <= x <= b` <=> `(x-a) <={u} (b-a)`.
  // The x, a, b will be promoted to int and subtracted without overflow.
  return static_cast<unsigned int>(x - a) <= static_cast<unsigned int>(b - a);
 }

 // Case-insensitive isalpha
 inline bool is_alpha(char c) {
  // ASCII only: alpha to upper case => reset bit 0x20 (~0x20 = 0xDF).
  return check_ascii_range(c & 0xDF, 'a' & 0xDF, 'z' & 0xDF);
 }

 // Check (case-insensitive) that `c` is equal to alpha.
 inline bool is_alpha_char(char c, char alpha) {
  FLATBUFFERS_ASSERT(is_alpha(alpha));
  // ASCII only: alpha to upper case => reset bit 0x20 (~0x20 = 0xDF).
  return ((c & 0xDF) == (alpha & 0xDF));
 }

 // https://en.cppreference.com/w/cpp/string/byte/isxdigit
 // isdigit and isxdigit are the only standard narrow character classification
 // functions that are not affected by the currently installed C locale. although
 // some implementations (e.g. Microsoft in 1252 codepage) may classify
 // additional single-byte characters as digits.
 inline bool is_digit(char c) { return check_ascii_range(c, '0', '9'); }

 inline bool is_xdigit(char c) {
  // Replace by look-up table.
  return is_digit(c) || check_ascii_range(c & 0xDF, 'a' & 0xDF, 'f' & 0xDF);
 }

 // Case-insensitive isalnum
 inline bool is_alnum(char c) { return is_alpha(c) || is_digit(c); }

 // @end-locale-independent functions for ASCII character set

 #ifdef FLATBUFFERS_PREFER_PRINTF
 template<typename T> size_t IntToDigitCount(T t) {
  size_t digit_count = 0;
  // Count the sign for negative numbers
  if (t < 0) digit_count++;
  // Count a single 0 left of the dot for fractional numbers
  if (-1 < t && t < 1) digit_count++;
  // Count digits until fractional part
  T eps = std::numeric_limits<float>::epsilon();
  while (t <= (-1 + eps) || (1 - eps) <= t) {
    t /= 10;
    digit_count++;
  }
  return digit_count;
 }

 template<typename T> size_t NumToStringWidth(T t, int precision = 0) {
  size_t string_width = IntToDigitCount(t);
  // Count the dot for floating point numbers
  if (precision) string_width += (precision + 1);
  return string_width;
 }

 template<typename T>
 std::string NumToStringImplWrapper(T t, const char *fmt, int precision = 0) {
  size_t string_width = NumToStringWidth(t, precision);
  std::string s(string_width, 0x00);
  // Allow snprintf to use std::string trailing null to detect buffer overflow
  snprintf(const_cast<char *>(s.data()), (s.size() + 1), fmt, precision, t);
  return s;
 }
 #endif  // FLATBUFFERS_PREFER_PRINTF

 // Convert an integer or floating point value to a string.
 // In contrast to std::stringstream, "char" values are
 // converted to a string of digits, and we don't use scientific notation.
 template<typename T> std::string NumToString(T t) {
  // clang-format off

  #ifndef FLATBUFFERS_PREFER_PRINTF
    std::stringstream ss;
    ss << t;
    return ss.str();
  #else // FLATBUFFERS_PREFER_PRINTF
    auto v = static_cast<long long>(t);
    return NumToStringImplWrapper(v, "%.*lld");
  #endif // FLATBUFFERS_PREFER_PRINTF
  // clang-format on
 }
 // Avoid char types used as character data.
 template<> inline std::string NumToString<signed char>(signed char t) {
  return NumToString(static_cast<int>(t));
 }
 template<> inline std::string NumToString<unsigned char>(unsigned char t) {
  return NumToString(static_cast<int>(t));
 }
 template<> inline std::string NumToString<char>(char t) {
  return NumToString(static_cast<int>(t));
 }
 #if defined(FLATBUFFERS_CPP98_STL)
 template<> inline std::string NumToString<long long>(long long t) {
  char buf[21];  // (log((1 << 63) - 1) / log(10)) + 2
  snprintf(buf, sizeof(buf), "%lld", t);
  return std::string(buf);
 }

 template<>
 inline std::string NumToString<unsigned long long>(unsigned long long t) {
  char buf[22];  // (log((1 << 63) - 1) / log(10)) + 1
  snprintf(buf, sizeof(buf), "%llu", t);
  return std::string(buf);
 }
 #endif  // defined(FLATBUFFERS_CPP98_STL)

 // Special versions for floats/doubles.
 template<typename T> std::string FloatToString(T t, int precision) {
  // clang-format off

  #ifndef FLATBUFFERS_PREFER_PRINTF
    // to_string() prints different numbers of digits for floats depending on
    // platform and isn't available on Android, so we use stringstream
    std::stringstream ss;
    // Use std::fixed to suppress scientific notation.
    ss << std::fixed;
    // Default precision is 6, we want that to be higher for doubles.
    ss << std::setprecision(precision);
    ss << t;
    auto s = ss.str();
  #else // FLATBUFFERS_PREFER_PRINTF
    auto v = static_cast<double>(t);
    auto s = NumToStringImplWrapper(v, "%0.*f", precision);
  #endif // FLATBUFFERS_PREFER_PRINTF
  // clang-format on
  // Sadly, std::fixed turns "1" into "1.00000", so here we undo that.
  auto p = s.find_last_not_of('0');
  if (p != std::string::npos) {
    // Strip trailing zeroes. If it is a whole number, keep one zero.
    s.resize(p + (s[p] == '.' ? 2 : 1));
  }
  return s;
 }

 template<> inline std::string NumToString<double>(double t) {
  return FloatToString(t, 12);
 }
 template<> inline std::string NumToString<float>(float t) {
  return FloatToString(t, 6);
 }

 // Convert an integer value to a hexadecimal string.
 // The returned string length is always xdigits long, prefixed by 0 digits.
 // For example, IntToStringHex(0x23, 8) returns the string "00000023".
 inline std::string IntToStringHex(int i, int xdigits) {
  FLATBUFFERS_ASSERT(i >= 0);
  // clang-format off

  #ifndef FLATBUFFERS_PREFER_PRINTF
    std::stringstream ss;
    ss << std::setw(xdigits) << std::setfill('0') << std::hex << std::uppercase
       << i;
    return ss.str();
  #else // FLATBUFFERS_PREFER_PRINTF
    return NumToStringImplWrapper(i, "%.*X", xdigits);
  #endif // FLATBUFFERS_PREFER_PRINTF
  // clang-format on
 }

 // clang-format off
 // Use locale independent functions {strtod_l, strtof_l, strtoll_l, strtoull_l}.
 #if defined(FLATBUFFERS_LOCALE_INDEPENDENT) && (FLATBUFFERS_LOCALE_INDEPENDENT > 0)
  class ClassicLocale {
    #ifdef _MSC_VER
      typedef _locale_t locale_type;
    #else
      typedef locale_t locale_type;  // POSIX.1-2008 locale_t type
    #endif
    ClassicLocale();
    ~ClassicLocale();
    locale_type locale_;
    static ClassicLocale instance_;
  public:
    static locale_type Get() { return instance_.locale_; }
  };

  #ifdef _MSC_VER
    #define __strtoull_impl(s, pe, b) _strtoui64_l(s, pe, b, ClassicLocale::Get())
    #define __strtoll_impl(s, pe, b) _strtoi64_l(s, pe, b, ClassicLocale::Get())
    #define __strtod_impl(s, pe) _strtod_l(s, pe, ClassicLocale::Get())
    #define __strtof_impl(s, pe) _strtof_l(s, pe, ClassicLocale::Get())
  #else
    #define __strtoull_impl(s, pe, b) strtoull_l(s, pe, b, ClassicLocale::Get())
    #define __strtoll_impl(s, pe, b) strtoll_l(s, pe, b, ClassicLocale::Get())
    #define __strtod_impl(s, pe) strtod_l(s, pe, ClassicLocale::Get())
    #define __strtof_impl(s, pe) strtof_l(s, pe, ClassicLocale::Get())
  #endif
 #else
  #define __strtod_impl(s, pe) strtod(s, pe)
  #define __strtof_impl(s, pe) static_cast<float>(strtod(s, pe))
  #ifdef _MSC_VER
    #define __strtoull_impl(s, pe, b) _strtoui64(s, pe, b)
    #define __strtoll_impl(s, pe, b) _strtoi64(s, pe, b)
  #else
    #define __strtoull_impl(s, pe, b) strtoull(s, pe, b)
    #define __strtoll_impl(s, pe, b) strtoll(s, pe, b)
  #endif
 #endif

 inline void strtoval_impl(int64_t *val, const char *str, char **endptr,
                                 int base) {
    *val = __strtoll_impl(str, endptr, base);
 }

 inline void strtoval_impl(uint64_t *val, const char *str, char **endptr,
                                 int base) {
  *val = __strtoull_impl(str, endptr, base);
 }

 inline void strtoval_impl(double *val, const char *str, char **endptr) {
  *val = __strtod_impl(str, endptr);
 }

 // UBSAN: double to float is safe if numeric_limits<float>::is_iec559 is true.
 __supress_ubsan__("float-cast-overflow")
 inline void strtoval_impl(float *val, const char *str, char **endptr) {
  *val = __strtof_impl(str, endptr);
 }
 #undef __strtoull_impl
 #undef __strtoll_impl
 #undef __strtod_impl
 #undef __strtof_impl
 // clang-format on

 // Adaptor for strtoull()/strtoll().
 // Flatbuffers accepts numbers with any count of leading zeros (-009 is -9),
 // while strtoll with base=0 interprets first leading zero as octal prefix.
 // In future, it is possible to add prefixed 0b0101.
 // 1) Checks errno code for overflow condition (out of range).
 // 2) If base <= 0, function try to detect base of number by prefix.
 //
 // Return value (like strtoull and strtoll, but reject partial result):
 // - If successful, an integer value corresponding to the str is returned.
 // - If full string conversion can't be performed, 0 is returned.
 // - If the converted value falls out of range of corresponding return type, a
 // range error occurs. In this case value MAX(T)/MIN(T) is returned.
 template<typename T>
 inline bool StringToIntegerImpl(T *val, const char *const str,
                                const int base = 0,
                                const bool check_errno = true) {
  // T is int64_t or uint64_T
  FLATBUFFERS_ASSERT(str);
  if (base <= 0) {
    auto s = str;
    while (*s && !is_digit(*s)) s++;
    if (s[0] == '0' && is_alpha_char(s[1], 'X'))
      return StringToIntegerImpl(val, str, 16, check_errno);
    // if a prefix not match, try base=10
    return StringToIntegerImpl(val, str, 10, check_errno);
  } else {
    if (check_errno) errno = 0;  // clear thread-local errno
    auto endptr = str;
    strtoval_impl(val, str, const_cast<char **>(&endptr), base);
    if ((*endptr != '\0') || (endptr == str)) {
      *val = 0;      // erase partial result
      return false;  // invalid string
    }
    // errno is out-of-range, return MAX/MIN
    if (check_errno && errno) return false;
    return true;
  }
 }

 template<typename T>
 inline bool StringToFloatImpl(T *val, const char *const str) {
  // Type T must be either float or double.
  FLATBUFFERS_ASSERT(str && val);
  auto end = str;
  strtoval_impl(val, str, const_cast<char **>(&end));
  auto done = (end != str) && (*end == '\0');
  if (!done) *val = 0;  // erase partial result
  return done;
 }

 // Convert a string to an instance of T.
 // Return value (matched with StringToInteger64Impl and strtod):
 // - If successful, a numeric value corresponding to the str is returned.
 // - If full string conversion can't be performed, 0 is returned.
 // - If the converted value falls out of range of corresponding return type, a
 // range error occurs. In this case value MAX(T)/MIN(T) is returned.
 template<typename T> inline bool StringToNumber(const char *s, T *val) {
  FLATBUFFERS_ASSERT(s && val);
  int64_t i64;
  // The errno check isn't needed, will return MAX/MIN on overflow.
  if (StringToIntegerImpl(&i64, s, 0, false)) {
    const int64_t max = flatbuffers::numeric_limits<T>::max();
    const int64_t min = flatbuffers::numeric_limits<T>::lowest();
    if (i64 > max) {
      *val = static_cast<T>(max);
      return false;
    }
    if (i64 < min) {
      // For unsigned types return max to distinguish from
      // "no conversion can be performed" when 0 is returned.
      *val = static_cast<T>(flatbuffers::is_unsigned<T>::value ? max : min);
      return false;
    }
    *val = static_cast<T>(i64);
    return true;
  }
  *val = 0;
  return false;
 }

 template<> inline bool StringToNumber<int64_t>(const char *str, int64_t *val) {
  return StringToIntegerImpl(val, str);
 }

 template<>
 inline bool StringToNumber<uint64_t>(const char *str, uint64_t *val) {
  if (!StringToIntegerImpl(val, str)) return false;
  // The strtoull accepts negative numbers:
  // If the minus sign was part of the input sequence, the numeric value
  // calculated from the sequence of digits is negated as if by unary minus
  // in the result type, which applies unsigned integer wraparound rules.
  // Fix this behaviour (except -0).
  if (*val) {
    auto s = str;
    while (*s && !is_digit(*s)) s++;
    s = (s > str) ? (s - 1) : s;  // step back to one symbol
    if (*s == '-') {
      // For unsigned types return the max to distinguish from
      // "no conversion can be performed".
      *val = flatbuffers::numeric_limits<uint64_t>::max();
      return false;
    }
  }
  return true;
 }

 template<> inline bool StringToNumber(const char *s, float *val) {
  return StringToFloatImpl(val, s);
 }

 template<> inline bool StringToNumber(const char *s, double *val) {
  return StringToFloatImpl(val, s);
 }

 inline int64_t StringToInt(const char *s, int base = 10) {
  int64_t val;
  return StringToIntegerImpl(&val, s, base) ? val : 0;
 }

 inline uint64_t StringToUInt(const char *s, int base = 10) {
  uint64_t val;
  return StringToIntegerImpl(&val, s, base) ? val : 0;
 }

 typedef bool (*LoadFileFunction)(const char *filename, bool binary,
                                 std::string *dest);
 typedef bool (*FileExistsFunction)(const char *filename);

 LoadFileFunction SetLoadFileFunction(LoadFileFunction load_file_function);

 FileExistsFunction SetFileExistsFunction(
    FileExistsFunction file_exists_function);

 // Check if file "name" exists.
 bool FileExists(const char *name);

 // Check if "name" exists and it is also a directory.
 bool DirExists(const char *name);

 // Load file "name" into "buf" returning true if successful
 // false otherwise.  If "binary" is false data is read
 // using ifstream's text mode, otherwise data is read with
 // no transcoding.
 bool LoadFile(const char *name, bool binary, std::string *buf);

 // Save data "buf" of length "len" bytes into a file
 // "name" returning true if successful, false otherwise.
 // If "binary" is false data is written using ifstream's
 // text mode, otherwise data is written with no
 // transcoding.
 bool SaveFile(const char *name, const char *buf, size_t len, bool binary);

 // Save data "buf" into file "name" returning true if
 // successful, false otherwise.  If "binary" is false
 // data is written using ifstream's text mode, otherwise
 // data is written with no transcoding.
 inline bool SaveFile(const char *name, const std::string &buf, bool binary) {
  return SaveFile(name, buf.c_str(), buf.size(), binary);
 }

 // Functionality for minimalistic portable path handling.

 // The functions below behave correctly regardless of whether posix ('/') or
 // Windows ('/' or '\\') separators are used.

 // Any new separators inserted are always posix.
 FLATBUFFERS_CONSTEXPR char kPathSeparator = '/';

 // Returns the path with the extension, if any, removed.
 std::string StripExtension(const std::string &filepath);

 // Returns the extension, if any.
 std::string GetExtension(const std::string &filepath);

 // Return the last component of the path, after the last separator.
 std::string StripPath(const std::string &filepath);

 // Strip the last component of the path + separator.
 std::string StripFileName(const std::string &filepath);

 // Concatenates a path with a filename, regardless of wether the path
 // ends in a separator or not.
 std::string ConCatPathFileName(const std::string &path,
                               const std::string &filename);

 // Replaces any '\\' separators with '/'
 std::string PosixPath(const char *path);

 // This function ensure a directory exists, by recursively
 // creating dirs for any parts of the path that don't exist yet.
 void EnsureDirExists(const std::string &filepath);

 // Obtains the absolute path from any other path.
 // Returns the input path if the absolute path couldn't be resolved.
 std::string AbsolutePath(const std::string &filepath);

 // To and from UTF-8 unicode conversion functions

 // Convert a unicode code point into a UTF-8 representation by appending it
 // to a string. Returns the number of bytes generated.
 inline int ToUTF8(uint32_t ucc, std::string *out) {
  FLATBUFFERS_ASSERT(!(ucc & 0x80000000));  // Top bit can't be set.
  // 6 possible encodings: http://en.wikipedia.org/wiki/UTF-8
  for (int i = 0; i < 6; i++) {
    // Max bits this encoding can represent.
    uint32_t max_bits = 6 + i * 5 + static_cast<int>(!i);
    if (ucc < (1u << max_bits)) {  // does it fit?
      // Remaining bits not encoded in the first byte, store 6 bits each
      uint32_t remain_bits = i * 6;
      // Store first byte:
      (*out) += static_cast<char>((0xFE << (max_bits - remain_bits)) |
                                  (ucc >> remain_bits));
      // Store remaining bytes:
      for (int j = i - 1; j >= 0; j--) {
        (*out) += static_cast<char>(((ucc >> (j * 6)) & 0x3F) | 0x80);
      }
      return i + 1;  // Return the number of bytes added.
    }
  }
  FLATBUFFERS_ASSERT(0);  // Impossible to arrive here.
  return -1;
 }

 // Converts whatever prefix of the incoming string corresponds to a valid
 // UTF-8 sequence into a unicode code. The incoming pointer will have been
 // advanced past all bytes parsed.
 // returns -1 upon corrupt UTF-8 encoding (ignore the incoming pointer in
 // this case).
 inline int FromUTF8(const char **in) {
  int len = 0;
  // Count leading 1 bits.
  for (int mask = 0x80; mask >= 0x04; mask >>= 1) {
    if (**in & mask) {
      len++;
    } else {
      break;
    }
  }
  if ((static_cast<unsigned char>(**in) << len) & 0x80)
    return -1;  // Bit after leading 1's must be 0.
  if (!len) return *(*in)++;
  // UTF-8 encoded values with a length are between 2 and 4 bytes.
  if (len < 2 || len > 4) { return -1; }
  // Grab initial bits of the code.
  int ucc = *(*in)++ & ((1 << (7 - len)) - 1);
  for (int i = 0; i < len - 1; i++) {
    if ((**in & 0xC0) != 0x80) return -1;  // Upper bits must 1 0.
    ucc <<= 6;
    ucc |= *(*in)++ & 0x3F;  // Grab 6 more bits of the code.
  }
  // UTF-8 cannot encode values between 0xD800 and 0xDFFF (reserved for
  // UTF-16 surrogate pairs).
  if (ucc >= 0xD800 && ucc <= 0xDFFF) { return -1; }
  // UTF-8 must represent code points in their shortest possible encoding.
  switch (len) {
    case 2:
      // Two bytes of UTF-8 can represent code points from U+0080 to U+07FF.
      if (ucc < 0x0080 || ucc > 0x07FF) { return -1; }
      break;
    case 3:
      // Three bytes of UTF-8 can represent code points from U+0800 to U+FFFF.
      if (ucc < 0x0800 || ucc > 0xFFFF) { return -1; }
      break;
    case 4:
      // Four bytes of UTF-8 can represent code points from U+10000 to U+10FFFF.
      if (ucc < 0x10000 || ucc > 0x10FFFF) { return -1; }
      break;
  }
  return ucc;
 }

 #ifndef FLATBUFFERS_PREFER_PRINTF
 // Wraps a string to a maximum length, inserting new lines where necessary. Any
 // existing whitespace will be collapsed down to a single space. A prefix or
 // suffix can be provided, which will be inserted before or after a wrapped
 // line, respectively.
 inline std::string WordWrap(const std::string in, size_t max_length,
                            const std::string wrapped_line_prefix,
                            const std::string wrapped_line_suffix) {
  std::istringstream in_stream(in);
  std::string wrapped, line, word;

  in_stream >> word;
  line = word;

  while (in_stream >> word) {
    if ((line.length() + 1 + word.length() + wrapped_line_suffix.length()) <
        max_length) {
      line += " " + word;
    } else {
      wrapped += line + wrapped_line_suffix + "\n";
      line = wrapped_line_prefix + word;
    }
  }
  wrapped += line;

  return wrapped;
 }
 #endif  // !FLATBUFFERS_PREFER_PRINTF

 inline bool EscapeString(const char *s, size_t length, std::string *_text,
                         bool allow_non_utf8, bool natural_utf8) {
  std::string &text = *_text;
  text += "\"";
  for (uoffset_t i = 0; i < length; i++) {
    char c = s[i];
    switch (c) {
      case '\n': text += "\\n"; break;
      case '\t': text += "\\t"; break;
      case '\r': text += "\\r"; break;
      case '\b': text += "\\b"; break;
      case '\f': text += "\\f"; break;
      case '\"': text += "\\\""; break;
      case '\\': text += "\\\\"; break;
      default:
        if (c >= ' ' && c <= '~') {
          text += c;
        } else {
          // Not printable ASCII data. Let's see if it's valid UTF-8 first:
          const char *utf8 = s + i;
          int ucc = FromUTF8(&utf8);
          if (ucc < 0) {
            if (allow_non_utf8) {
              text += "\\x";
              text += IntToStringHex(static_cast<uint8_t>(c), 2);
            } else {
              // There are two cases here:
              //
              // 1) We reached here by parsing an IDL file. In that case,
              // we previously checked for non-UTF-8, so we shouldn't reach
              // here.
              //
              // 2) We reached here by someone calling GenerateText()
              // on a previously-serialized flatbuffer. The data might have
              // non-UTF-8 Strings, or might be corrupt.
              //
              // In both cases, we have to give up and inform the caller
              // they have no JSON.
              return false;
            }
          } else {
            if (natural_utf8) {
              // utf8 points to past all utf-8 bytes parsed
              text.append(s + i, static_cast<size_t>(utf8 - s - i));
            } else if (ucc <= 0xFFFF) {
              // Parses as Unicode within JSON's \uXXXX range, so use that.
              text += "\\u";
              text += IntToStringHex(ucc, 4);
            } else if (ucc <= 0x10FFFF) {
              // Encode Unicode SMP values to a surrogate pair using two \u
              // escapes.
              uint32_t base = ucc - 0x10000;
              auto high_surrogate = (base >> 10) + 0xD800;
              auto low_surrogate = (base & 0x03FF) + 0xDC00;
              text += "\\u";
              text += IntToStringHex(high_surrogate, 4);
              text += "\\u";
              text += IntToStringHex(low_surrogate, 4);
            }
            // Skip past characters recognized.
            i = static_cast<uoffset_t>(utf8 - s - 1);
          }
        }
        break;
    }
  }
  text += "\"";
  return true;
 }

 // Remove paired quotes in a string: "text"|'text' -> text.
 std::string RemoveStringQuotes(const std::string &s);

 // Change th global C-locale to locale with name <locale_name>.
 // Returns an actual locale name in <_value>, useful if locale_name is "" or
 // null.
 bool SetGlobalTestLocale(const char *locale_name,
                         std::string *_value = nullptr);

 // Read (or test) a value of environment variable.
 bool ReadEnvironmentVariable(const char *var_name,
                             std::string *_value = nullptr);

 }  // namespace MindSpore.flatbuffers

 #endif  // FLATBUFFERS_UTIL_H_
--- a/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/ir/dtype/type_id.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/ir/dtype/type_id.h
@@ -0,0 +1,87 @@
 /**
 * This is the C++ adaptation and derivative work of Myia (https://github.com/mila-iqia/myia/).
 *
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef MINDSPORE_CORE_IR_DTYPE_TYPE_ID_H_
 #define MINDSPORE_CORE_IR_DTYPE_TYPE_ID_H_

 #include <unordered_map>
 #include <string>

 namespace mindspore {
 //
 // Supported meta type
 //
 enum TypeId : int {
  kTypeUnknown = 0,
  kMetaTypeBegin = kTypeUnknown,
  kMetaTypeType,  // Type
  kMetaTypeAnything,
  kMetaTypeObject,
  kMetaTypeTypeType,  // TypeType
  kMetaTypeProblem,
  kMetaTypeExternal,
  kMetaTypeNone,
  kMetaTypeNull,
  kMetaTypeEllipsis,
  kMetaTypeEnd,
  //
  // Object types
  //
  kObjectTypeBegin = kMetaTypeEnd,
  kObjectTypeNumber,
  kObjectTypeString,
  kObjectTypeList,
  kObjectTypeTuple,
  kObjectTypeSlice,
  kObjectTypeKeyword,
  kObjectTypeTensorType,
  kObjectTypeIndexedSlicesType,
  kObjectTypeSparseTensorType,
  kObjectTypeUndeterminedType,
  kObjectTypeClass,
  kObjectTypeDictionary,
  kObjectTypeFunction,
  kObjectTypeJTagged,
  kObjectTypeSymbolicKeyType,
  kObjectTypeEnvType,
  kObjectTypeRefKey,
  kObjectTypeRef,
  kObjectTypeEnd,
  //
  // Number Types
  //
  kNumberTypeBegin = kObjectTypeEnd,
  kNumberTypeBool,
  kNumberTypeInt,
  kNumberTypeInt8,
  kNumberTypeInt16,
  kNumberTypeInt32,
  kNumberTypeInt64,
  kNumberTypeUInt,
  kNumberTypeUInt8,
  kNumberTypeUInt16,
  kNumberTypeUInt32,
  kNumberTypeUInt64,
  kNumberTypeFloat,
  kNumberTypeFloat16,
  kNumberTypeFloat32,
  kNumberTypeFloat64,
  kNumberTypeEnd
 };
 }  // namespace mindspore
 #endif  // MINDSPORE_CORE_IR_DTYPE_TYPE_ID_H_
--- a/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/lite_session.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/lite_session.h
@@ -0,0 +1,127 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef MINDSPORE_LITE_INCLUDE_LITE_SESSION_H
 #define MINDSPORE_LITE_INCLUDE_LITE_SESSION_H

 #include <memory>
 #include <vector>
 #include <string>
 #include <unordered_map>
 #include "ms_tensor.h"
 #include "model.h"
 #include "context.h"

 namespace mindspore {
 namespace session {
 /// \brief CallBackParam defined input arguments for callBack function.
 struct CallBackParam {
  std::string name_callback_param; /**< node name argument */
  std::string type_callback_param; /**< node type argument */
 };

 /// \brief KernelCallBack defined the function pointer for callBack.
 using KernelCallBack = std::function<bool(std::vector<tensor::MSTensor *> inputs,
                                          std::vector<tensor::MSTensor *> outputs, const CallBackParam &opInfo)>;

 /// \brief LiteSession defined session in MindSpore Lite for compiling Model and forwarding model.
 class MS_API LiteSession {
 public:
  /// \brief Static method to create a LiteSession pointer.
  ///
  /// \param[in] context Define the context of session to be created.
  ///
  /// \return Pointer of MindSpore Lite LiteSession.
  static LiteSession *CreateSession(lite::Context *context);

  /// \brief Destructor of MindSpore Lite LiteSession.
  virtual ~LiteSession() = default;

  /// \brief Attempt to bind or unbind threads in the thread pool to or from the specified cpu core.
  ///
  /// \param[in] if_bind Define whether to bind or unbind threads.
  virtual void BindThread(bool if_bind) = 0;

  /// \brief Compile MindSpore Lite model.
  ///
  /// \note CompileGraph should be called before RunGraph.
  ///
  /// \param[in] model Define the model to be compiled.
  ///
  /// \return STATUS as an error code of compiling graph, STATUS is defined in errorcode.h.
  virtual int CompileGraph(lite::Model *model) = 0;

  /// \brief Get input MindSpore Lite MSTensors of model.
  ///
  /// \return The vector of MindSpore Lite MSTensor.
  virtual std::vector<tensor::MSTensor *> GetInputs() const = 0;

  /// \brief Get input MindSpore Lite MSTensors of model by node name.
  ///
  /// \param[in] node_name Define node name.
  ///
  /// \return The vector of MindSpore Lite MSTensor.
  virtual std::vector<tensor::MSTensor *> GetInputsByName(const std::string &node_name) const = 0;

  /// \brief Run session with callback.
  ///
  /// \param[in] before Define a call_back_function to be called before running each node.
  /// \param[in] after Define a call_back_function called after running each node.
  ///
  /// \note RunGraph should be called after CompileGraph.
  ///
  /// \return STATUS as an error code of running graph, STATUS is defined in errorcode.h.
  virtual int RunGraph(const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr) = 0;

  /// \brief Get output MindSpore Lite MSTensors of model mapped by node name.
  ///
  /// \return The map of output node name and MindSpore Lite MSTensor.
  virtual std::unordered_map<std::string, std::vector<mindspore::tensor::MSTensor *>> GetOutputMapByNode() const = 0;

  /// \brief Get output MindSpore Lite MSTensors of model by node name.
  ///
  /// \param[in] node_name Define node name.
  ///
  /// \return The vector of MindSpore Lite MSTensor.
  virtual std::vector<tensor::MSTensor *> GetOutputsByNodeName(const std::string &node_name) const = 0;

  /// \brief Get output MindSpore Lite MSTensors of model mapped by tensor name.
  ///
  /// \return The map of output tensor name and MindSpore Lite MSTensor.
  virtual std::unordered_map<std::string, mindspore::tensor::MSTensor *> GetOutputMapByTensor() const = 0;

  /// \brief Get name of output tensors of model compiled by this session.
  ///
  /// \return The vector of string as output tensor names in order.
  virtual std::vector<std::string> GetOutputTensorNames() const = 0;

  /// \brief Get output MindSpore Lite MSTensors of model by tensor name.
  ///
  /// \param[in] tensor_name Define tensor name.
  ///
  /// \return Pointer of MindSpore Lite MSTensor.
  virtual mindspore::tensor::MSTensor *GetOutputByTensorName(const std::string &tensor_name) const = 0;

  /// \brief Resize inputs shape.
  ///
  /// \param[in] inputs Define the new inputs shape.
  ///
  /// \return STATUS as an error code of resize inputs, STATUS is defined in errorcode.h.
  virtual int Resize(const std::vector<tensor::MSTensor *> &inputs) = 0;
 };
 }  // namespace session
 }  // namespace mindspore
 #endif  // MINDSPORE_LITE_INCLUDE_LITE_SESSION_H
--- a/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/model.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/model.h
@@ -0,0 +1,110 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef MINDSPORE_LITE_INCLUDE_MODEL_H
 #define MINDSPORE_LITE_INCLUDE_MODEL_H

 #include <string>
 #include <vector>
 #include <memory>
 #include "schema/model_generated.h"

 namespace mindspore {
 #define MS_API __attribute__((visibility("default")))

 namespace lite {
 /// \brief ModelImpl defined the implement class of Model in MindSpore Lite.
 ///
 /// \note List public class and interface for reference.
 class ModelImpl;

 /// \brief Primitive defined as prototype of operator.
 ///
 /// \note List public class and interface for reference.
 class PrimitiveC;

 /// \brief Model defined model in MindSpore Lite for managing graph.
 class MS_API Model {
 public:
  /// \brief Static method to create a Model pointer.
  ///
  /// \param[in] model_buf Define the buffer read from a model file.
  /// \param[in] size Define bytes number of model buffer.
  ///
  /// \return Pointer of MindSpore Lite Model.
  static Model *Import(const char *model_buf, size_t size);

  /// \brief Constructor of MindSpore Lite Model using default value for parameters.
  ///
  /// \return Instance of MindSpore Lite Model.
  Model() = default;

  /// \brief Destructor of MindSpore Lite Model.
  virtual ~Model();

  /// \brief Get MindSpore Lite Primitive by name.
  ///
  /// \param[in] name Define name of primitive to be returned.
  ///
  /// \return the pointer of MindSpore Lite Primitive.
  PrimitiveC *GetOp(const std::string &name) const;

  /// \brief Get graph defined in flatbuffers.
  ///
  /// \return the pointer of graph defined in flatbuffers.
  const schema::MetaGraph *GetMetaGraph() const;

  /// \brief Free MetaGraph in MindSpore Lite Model.
  void FreeMetaGraph();

 protected:
  ModelImpl *model_impl_ = nullptr;
 };

 /// \brief ModelBuilder defined by MindSpore Lite.
 class MS_API ModelBuilder {
 public:
  /// \brief OutEdge defined by MindSpore Lite.
  struct OutEdge {
    std::string nodeId;  /**< ID of a node linked by this edge */
    size_t outEdgeIndex; /**< Index of this edge */
  };

  /// \brief Constructor of MindSpore Lite Model using default value for parameters.
  ///
  /// \return Instance of MindSpore Lite ModelBuilder.
  ModelBuilder() = default;

  /// \brief Destructor of MindSpore Lite ModelBuilder.
  virtual ~ModelBuilder() = default;

  /// \brief Add primitive into model builder for model building.
  ///
  /// \param[in] op Define the primitive to be added.
  /// \param[in] inputs Define input edge of primitive to be added.
  ///
  /// \return ID of the added primitive.
  virtual std::string AddOp(const PrimitiveC &op, const std::vector<OutEdge> &inputs) = 0;

  /// \brief Finish constructing the model.
  ///
  /// \return the pointer of MindSpore Lite Model.
  virtual Model *Construct();
 };
 }  // namespace lite
 }  // namespace mindspore

 #endif  // MINDSPORE_LITE_INCLUDE_MODEL_H
--- a/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/ms_tensor.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/ms_tensor.h
@@ -0,0 +1,108 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef MINDSPORE_LITE_INCLUDE_MS_TENSOR_H_
 #define MINDSPORE_LITE_INCLUDE_MS_TENSOR_H_

 #include <utility>
 #include <vector>
 #include <memory>
 #include "ir/dtype/type_id.h"

 namespace mindspore {
 #define MS_API __attribute__((visibility("default")))
 namespace tensor {
 /// \brief MSTensor defined tensor in MindSpore Lite.
 class MS_API MSTensor {
 public:
  /// \brief Constructor of MindSpore Lite MSTensor.
  ///
  /// \return Instance of MindSpore Lite MSTensor.
  MSTensor() = default;

  /// \brief Static method to create a MSTensor pointer.
  ///
  /// \param[in] data_type Define data type of tensor to be created.
  /// \param[in] shape Define Shape of tensor to be created.
  ///
  /// \note TypeId is defined in mindspore/mindspore/core/ir/dtype/type_id.h. Only number types in TypeId enum are
  /// suitable for MSTensor.
  ///
  /// \return the pointer of MSTensor.
  static MSTensor *CreateTensor(TypeId data_type, const std::vector<int> &shape);

  /// \brief Destructor of MindSpore Lite Model.
  virtual ~MSTensor() = default;

  /// \brief Get data type of the MindSpore Lite MSTensor.
  ///
  /// \note TypeId is defined in mindspore/mindspore/core/ir/dtype/type_id.h. Only number types in TypeId enum are
  /// suitable for MSTensor.
  ///
  /// \return MindSpore Lite TypeId of the MindSpore Lite MSTensor.
  virtual TypeId data_type() const = 0;

  /// \brief Set data type for the MindSpore Lite MSTensor.
  ///
  /// \param[in] data_type Define MindSpore Lite TypeId to be set in the MindSpore Lite MSTensor.
  ///
  /// \return MindSpore Lite TypeId of the MindSpore Lite MSTensor after set.
  virtual TypeId set_data_type(TypeId data_type) = 0;

  /// \brief Get shape of the MindSpore Lite MSTensor.
  ///
  /// \return A vector of int as the shape of the MindSpore Lite MSTensor.
  virtual std::vector<int> shape() const = 0;

  /// \brief Set shape for the MindSpore Lite MSTensor.
  ///
  /// \param[in] shape Define a vector of int as shape to be set into the MindSpore Lite MSTensor.
  ///
  /// \return size of shape of the MindSpore Lite MSTensor after set.
  virtual size_t set_shape(const std::vector<int> &shape) = 0;

  /// \brief Get size of the dimension of the MindSpore Lite MSTensor index by the parameter index.
  ///
  /// \param[in] index Define index of dimension returned.
  ///
  /// \return Size of dimension of the MindSpore Lite MSTensor.
  virtual int DimensionSize(size_t index) const = 0;

  /// \brief Get number of element in MSTensor.
  ///
  /// \return Number of element in MSTensor.
  virtual int ElementsNum() const = 0;

  /// \brief Get hash of the MindSpore Lite MSTensor.
  ///
  /// \return Hash of the MindSpore Lite MSTensor.
  virtual std::size_t hash() const = 0;

  /// \brief Get byte size of data in MSTensor.
  ///
  /// \return Byte size of data in MSTensor.
  virtual size_t Size() const = 0;

  /// \brief Get the pointer of data in MSTensor.
  ///
  /// \note The data pointer can be used to both write and read data in MSTensor.
  ///
  /// \return the pointer points to data in MSTensor.
  virtual void *MutableData() const = 0;
 };
 }  // namespace tensor
 }  // namespace mindspore
 #endif  // MINDSPORE_LITE_INCLUDE_MS_TENSOR_H_
--- a/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/schema/model_generated.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/schema/model_generated.h
--- a/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/schema/ops_generated.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/schema/ops_generated.h
--- a/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/thread_pool_config.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/MindSpore/thread_pool_config.h
@@ -0,0 +1,35 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef MINDSPORE_LITE_INCLUDE_THREAD_POOL_CONFIG_H_
 #define MINDSPORE_LITE_INCLUDE_THREAD_POOL_CONFIG_H_

 /// \brief CpuBindMode defined for holding bind cpu strategy argument.
 typedef enum Mode {
  MID_CPU = -1,   /**< bind middle cpu first */
  HIGHER_CPU = 1, /**< bind higher cpu first */
  NO_BIND = 0     /**< no bind */
 } CpuBindMode;

 /// \brief ThreadPoolId defined for specifying which thread pool to use.
 typedef enum Id {
  THREAD_POOL_DEFAULT = 0, /**< default thread pool id */
  THREAD_POOL_SECOND = 1,  /**< the second thread pool id */
  THREAD_POOL_THIRD = 2,   /**< the third thread pool id */
  THREAD_POOL_FOURTH = 3   /**< the fourth thread pool id */
 } ThreadPoolId;

 #endif  // LITE_MINDSPORE_LITE_INCLUDE_THREAD_POOL_CONFIG_H_
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/calib3d.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/calib3d.hpp
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/calib3d/calib3d.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/calib3d/calib3d.hpp
@@ -0,0 +1,48 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifdef __OPENCV_BUILD
 #error this is a compatibility header which should not be used inside the OpenCV library
 #endif

 #include "opencv2/calib3d.hpp"
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/calib3d/calib3d_c.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/calib3d/calib3d_c.h
@@ -0,0 +1,150 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CALIB3D_C_H
 #define OPENCV_CALIB3D_C_H

 #include "opencv2/core/types_c.h"

 #ifdef __cplusplus
 extern "C" {
 #endif

 /* Calculates fundamental matrix given a set of corresponding points */
 #define CV_FM_7POINT 1
 #define CV_FM_8POINT 2

 #define CV_LMEDS 4
 #define CV_RANSAC 8

 #define CV_FM_LMEDS_ONLY  CV_LMEDS
 #define CV_FM_RANSAC_ONLY CV_RANSAC
 #define CV_FM_LMEDS CV_LMEDS
 #define CV_FM_RANSAC CV_RANSAC

 enum
 {
    CV_ITERATIVE = 0,
    CV_EPNP = 1, // F.Moreno-Noguer, V.Lepetit and P.Fua "EPnP: Efficient Perspective-n-Point Camera Pose Estimation"
    CV_P3P = 2, // X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang; "Complete Solution Classification for the Perspective-Three-Point Problem"
    CV_DLS = 3 // Joel A. Hesch and Stergios I. Roumeliotis. "A Direct Least-Squares (DLS) Method for PnP"
 };

 #define CV_CALIB_CB_ADAPTIVE_THRESH  1
 #define CV_CALIB_CB_NORMALIZE_IMAGE  2
 #define CV_CALIB_CB_FILTER_QUADS     4
 #define CV_CALIB_CB_FAST_CHECK       8

 #define CV_CALIB_USE_INTRINSIC_GUESS  1
 #define CV_CALIB_FIX_ASPECT_RATIO     2
 #define CV_CALIB_FIX_PRINCIPAL_POINT  4
 #define CV_CALIB_ZERO_TANGENT_DIST    8
 #define CV_CALIB_FIX_FOCAL_LENGTH 16
 #define CV_CALIB_FIX_K1  32
 #define CV_CALIB_FIX_K2  64
 #define CV_CALIB_FIX_K3  128
 #define CV_CALIB_FIX_K4  2048
 #define CV_CALIB_FIX_K5  4096
 #define CV_CALIB_FIX_K6  8192
 #define CV_CALIB_RATIONAL_MODEL 16384
 #define CV_CALIB_THIN_PRISM_MODEL 32768
 #define CV_CALIB_FIX_S1_S2_S3_S4  65536
 #define CV_CALIB_TILTED_MODEL  262144
 #define CV_CALIB_FIX_TAUX_TAUY  524288
 #define CV_CALIB_FIX_TANGENT_DIST 2097152

 #define CV_CALIB_NINTRINSIC 18

 #define CV_CALIB_FIX_INTRINSIC  256
 #define CV_CALIB_SAME_FOCAL_LENGTH 512

 #define CV_CALIB_ZERO_DISPARITY 1024

 /* stereo correspondence parameters and functions */
 #define CV_STEREO_BM_NORMALIZED_RESPONSE  0
 #define CV_STEREO_BM_XSOBEL               1

 #ifdef __cplusplus
 } // extern "C"

 //////////////////////////////////////////////////////////////////////////////////////////
 class CV_EXPORTS CvLevMarq
 {
 public:
    CvLevMarq();
    CvLevMarq( int nparams, int nerrs, CvTermCriteria criteria=
              cvTermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER,30,DBL_EPSILON),
              bool completeSymmFlag=false );
    ~CvLevMarq();
    void init( int nparams, int nerrs, CvTermCriteria criteria=
              cvTermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER,30,DBL_EPSILON),
              bool completeSymmFlag=false );
    bool update( const CvMat*& param, CvMat*& J, CvMat*& err );
    bool updateAlt( const CvMat*& param, CvMat*& JtJ, CvMat*& JtErr, double*& errNorm );

    void clear();
    void step();
    enum { DONE=0, STARTED=1, CALC_J=2, CHECK_ERR=3 };

    cv::Ptr<CvMat> mask;
    cv::Ptr<CvMat> prevParam;
    cv::Ptr<CvMat> param;
    cv::Ptr<CvMat> J;
    cv::Ptr<CvMat> err;
    cv::Ptr<CvMat> JtJ;
    cv::Ptr<CvMat> JtJN;
    cv::Ptr<CvMat> JtErr;
    cv::Ptr<CvMat> JtJV;
    cv::Ptr<CvMat> JtJW;
    double prevErrNorm, errNorm;
    int lambdaLg10;
    CvTermCriteria criteria;
    int state;
    int iters;
    bool completeSymmFlag;
    int solveMethod;
 };

 #endif

 #endif /* OPENCV_CALIB3D_C_H */
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core.hpp
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/affine.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/affine.hpp
@@ -0,0 +1,678 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CORE_AFFINE3_HPP
 #define OPENCV_CORE_AFFINE3_HPP

 #ifdef __cplusplus

 #include <opencv2/core.hpp>

 namespace cv
 {

 //! @addtogroup core
 //! @{

    /** @brief Affine transform
     *
     * It represents a 4x4 homogeneous transformation matrix \f$T\f$
     *
     *  \f[T =
     *  \begin{bmatrix}
     *  R & t\\
     *  0 & 1\\
     *  \end{bmatrix}
     *  \f]
     *
     *  where \f$R\f$ is a 3x3 rotation matrix and \f$t\f$ is a 3x1 translation vector.
     *
     *  You can specify \f$R\f$ either by a 3x3 rotation matrix or by a 3x1 rotation vector,
     *  which is converted to a 3x3 rotation matrix by the Rodrigues formula.
     *
     *  To construct a matrix \f$T\f$ representing first rotation around the axis \f$r\f$ with rotation
     *  angle \f$|r|\f$ in radian (right hand rule) and then translation by the vector \f$t\f$, you can use
     *
     *  @code
     *  cv::Vec3f r, t;
     *  cv::Affine3f T(r, t);
     *  @endcode
     *
     *  If you already have the rotation matrix \f$R\f$, then you can use
     *
     *  @code
     *  cv::Matx33f R;
     *  cv::Affine3f T(R, t);
     *  @endcode
     *
     *  To extract the rotation matrix \f$R\f$ from \f$T\f$, use
     *
     *  @code
     *  cv::Matx33f R = T.rotation();
     *  @endcode
     *
     *  To extract the translation vector \f$t\f$ from \f$T\f$, use
     *
     *  @code
     *  cv::Vec3f t = T.translation();
     *  @endcode
     *
     *  To extract the rotation vector \f$r\f$ from \f$T\f$, use
     *
     *  @code
     *  cv::Vec3f r = T.rvec();
     *  @endcode
     *
     *  Note that since the mapping from rotation vectors to rotation matrices
     *  is many to one. The returned rotation vector is not necessarily the one
     *  you used before to set the matrix.
     *
     *  If you have two transformations \f$T = T_1 * T_2\f$, use
     *
     *  @code
     *  cv::Affine3f T, T1, T2;
     *  T = T2.concatenate(T1);
     *  @endcode
     *
     *  To get the inverse transform of \f$T\f$, use
     *
     *  @code
     *  cv::Affine3f T, T_inv;
     *  T_inv = T.inv();
     *  @endcode
     *
     */
    template<typename T>
    class Affine3
    {
    public:
        typedef T float_type;
        typedef Matx<float_type, 3, 3> Mat3;
        typedef Matx<float_type, 4, 4> Mat4;
        typedef Vec<float_type, 3> Vec3;

       //! Default constructor. It represents a 4x4 identity matrix.
        Affine3();

        //! Augmented affine matrix
        Affine3(const Mat4& affine);

        /**
         *  The resulting 4x4 matrix is
         *
         *  \f[
         *  \begin{bmatrix}
         *  R & t\\
         *  0 & 1\\
         *  \end{bmatrix}
         *  \f]
         *
         * @param R 3x3 rotation matrix.
         * @param t 3x1 translation vector.
         */
        Affine3(const Mat3& R, const Vec3& t = Vec3::all(0));

        /**
         * Rodrigues vector.
         *
         * The last row of the current matrix is set to [0,0,0,1].
         *
         * @param rvec 3x1 rotation vector. Its direction indicates the rotation axis and its length
         *             indicates the rotation angle in radian (using right hand rule).
         * @param t 3x1 translation vector.
         */
        Affine3(const Vec3& rvec, const Vec3& t = Vec3::all(0));

        /**
         * Combines all constructors above. Supports 4x4, 3x4, 3x3, 1x3, 3x1 sizes of data matrix.
         *
         * The last row of the current matrix is set to [0,0,0,1] when data is not 4x4.
         *
         * @param data 1-channel matrix.
         *             when it is 4x4, it is copied to the current matrix and t is not used.
         *             When it is 3x4, it is copied to the upper part 3x4 of the current matrix and t is not used.
         *             When it is 3x3, it is copied to the upper left 3x3 part of the current matrix.
         *             When it is 3x1 or 1x3, it is treated as a rotation vector and the Rodrigues formula is used
         *                             to compute a 3x3 rotation matrix.
         * @param t 3x1 translation vector. It is used only when data is neither 4x4 nor 3x4.
         */
        explicit Affine3(const Mat& data, const Vec3& t = Vec3::all(0));

        //! From 16-element array
        explicit Affine3(const float_type* vals);

        //! Create an 4x4 identity transform
        static Affine3 Identity();

        /**
         * Rotation matrix.
         *
         * Copy the rotation matrix to the upper left 3x3 part of the current matrix.
         * The remaining elements of the current matrix are not changed.
         *
         * @param R 3x3 rotation matrix.
         *
         */
        void rotation(const Mat3& R);

        /**
         * Rodrigues vector.
         *
         * It sets the upper left 3x3 part of the matrix. The remaining part is unaffected.
         *
         * @param rvec 3x1 rotation vector. The direction indicates the rotation axis and
         *             its length indicates the rotation angle in radian (using the right thumb convention).
         */
        void rotation(const Vec3& rvec);

        /**
         * Combines rotation methods above. Supports 3x3, 1x3, 3x1 sizes of data matrix.
         *
         * It sets the upper left 3x3 part of the matrix. The remaining part is unaffected.
         *
         * @param data 1-channel matrix.
         *             When it is a 3x3 matrix, it sets the upper left 3x3 part of the current matrix.
         *             When it is a 1x3 or 3x1 matrix, it is used as a rotation vector. The Rodrigues formula
         *             is used to compute the rotation matrix and sets the upper left 3x3 part of the current matrix.
         */
        void rotation(const Mat& data);

        /**
         * Copy the 3x3 matrix L to the upper left part of the current matrix
         *
         * It sets the upper left 3x3 part of the matrix. The remaining part is unaffected.
         *
         * @param L 3x3 matrix.
         */
        void linear(const Mat3& L);

        /**
         * Copy t to the first three elements of the last column of the current matrix
         *
         * It sets the upper right 3x1 part of the matrix. The remaining part is unaffected.
         *
         * @param t 3x1 translation vector.
         */
        void translation(const Vec3& t);

        //! @return the upper left 3x3 part
        Mat3 rotation() const;

        //! @return the upper left 3x3 part
        Mat3 linear() const;

        //! @return the upper right 3x1 part
        Vec3 translation() const;

        //! Rodrigues vector.
        //! @return a vector representing the upper left 3x3 rotation matrix of the current matrix.
        //! @warning  Since the mapping between rotation vectors and rotation matrices is many to one,
        //!           this function returns only one rotation vector that represents the current rotation matrix,
        //!           which is not necessarily the same one set by `rotation(const Vec3& rvec)`.
        Vec3 rvec() const;

        //! @return the inverse of the current matrix.
        Affine3 inv(int method = cv::DECOMP_SVD) const;

        //! a.rotate(R) is equivalent to Affine(R, 0) * a;
        Affine3 rotate(const Mat3& R) const;

        //! a.rotate(rvec) is equivalent to Affine(rvec, 0) * a;
        Affine3 rotate(const Vec3& rvec) const;

        //! a.translate(t) is equivalent to Affine(E, t) * a, where E is an identity matrix
        Affine3 translate(const Vec3& t) const;

        //! a.concatenate(affine) is equivalent to affine * a;
        Affine3 concatenate(const Affine3& affine) const;

        template <typename Y> operator Affine3<Y>() const;

        template <typename Y> Affine3<Y> cast() const;

        Mat4 matrix;

 #if defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H
        Affine3(const Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>& affine);
        Affine3(const Eigen::Transform<T, 3, Eigen::Affine>& affine);
        operator Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>() const;
        operator Eigen::Transform<T, 3, Eigen::Affine>() const;
 #endif
    };

    template<typename T> static
    Affine3<T> operator*(const Affine3<T>& affine1, const Affine3<T>& affine2);

    //! V is a 3-element vector with member fields x, y and z
    template<typename T, typename V> static
    V operator*(const Affine3<T>& affine, const V& vector);

    typedef Affine3<float> Affine3f;
    typedef Affine3<double> Affine3d;

    static Vec3f operator*(const Affine3f& affine, const Vec3f& vector);
    static Vec3d operator*(const Affine3d& affine, const Vec3d& vector);

    template<typename _Tp> class DataType< Affine3<_Tp> >
    {
    public:
        typedef Affine3<_Tp>                               value_type;
        typedef Affine3<typename DataType<_Tp>::work_type> work_type;
        typedef _Tp                                        channel_type;

        enum { generic_type = 0,
               channels     = 16,
               fmt          = traits::SafeFmt<channel_type>::fmt + ((channels - 1) << 8)
 #ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
               ,depth        = DataType<channel_type>::depth
               ,type         = CV_MAKETYPE(depth, channels)
 #endif
             };

        typedef Vec<channel_type, channels> vec_type;
    };

    namespace traits {
    template<typename _Tp>
    struct Depth< Affine3<_Tp> > { enum { value = Depth<_Tp>::value }; };
    template<typename _Tp>
    struct Type< Affine3<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 16) }; };
    } // namespace

 //! @} core

 }

 //! @cond IGNORED

 ///////////////////////////////////////////////////////////////////////////////////
 // Implementation

 template<typename T> inline
 cv::Affine3<T>::Affine3()
    : matrix(Mat4::eye())
 {}

 template<typename T> inline
 cv::Affine3<T>::Affine3(const Mat4& affine)
    : matrix(affine)
 {}

 template<typename T> inline
 cv::Affine3<T>::Affine3(const Mat3& R, const Vec3& t)
 {
    rotation(R);
    translation(t);
    matrix.val[12] = matrix.val[13] = matrix.val[14] = 0;
    matrix.val[15] = 1;
 }

 template<typename T> inline
 cv::Affine3<T>::Affine3(const Vec3& _rvec, const Vec3& t)
 {
    rotation(_rvec);
    translation(t);
    matrix.val[12] = matrix.val[13] = matrix.val[14] = 0;
    matrix.val[15] = 1;
 }

 template<typename T> inline
 cv::Affine3<T>::Affine3(const cv::Mat& data, const Vec3& t)
 {
    CV_Assert(data.type() == cv::traits::Type<T>::value);
    CV_Assert(data.channels() == 1);

    if (data.cols == 4 && data.rows == 4)
    {
        data.copyTo(matrix);
        return;
    }
    else if (data.cols == 4 && data.rows == 3)
    {
        rotation(data(Rect(0, 0, 3, 3)));
        translation(data(Rect(3, 0, 1, 3)));
    }
    else
    {
        rotation(data);
        translation(t);
    }

    matrix.val[12] = matrix.val[13] = matrix.val[14] = 0;
    matrix.val[15] = 1;
 }

 template<typename T> inline
 cv::Affine3<T>::Affine3(const float_type* vals) : matrix(vals)
 {}

 template<typename T> inline
 cv::Affine3<T> cv::Affine3<T>::Identity()
 {
    return Affine3<T>(cv::Affine3<T>::Mat4::eye());
 }

 template<typename T> inline
 void cv::Affine3<T>::rotation(const Mat3& R)
 {
    linear(R);
 }

 template<typename T> inline
 void cv::Affine3<T>::rotation(const Vec3& _rvec)
 {
    double theta = norm(_rvec);

    if (theta < DBL_EPSILON)
        rotation(Mat3::eye());
    else
    {
        double c = std::cos(theta);
        double s = std::sin(theta);
        double c1 = 1. - c;
        double itheta = (theta != 0) ? 1./theta : 0.;

        Point3_<T> r = _rvec*itheta;

        Mat3 rrt( r.x*r.x, r.x*r.y, r.x*r.z, r.x*r.y, r.y*r.y, r.y*r.z, r.x*r.z, r.y*r.z, r.z*r.z );
        Mat3 r_x( 0, -r.z, r.y, r.z, 0, -r.x, -r.y, r.x, 0 );

        // R = cos(theta)*I + (1 - cos(theta))*r*rT + sin(theta)*[r_x]
        // where [r_x] is [0 -rz ry; rz 0 -rx; -ry rx 0]
        Mat3 R = c*Mat3::eye() + c1*rrt + s*r_x;

        rotation(R);
    }
 }

 //Combines rotation methods above. Supports 3x3, 1x3, 3x1 sizes of data matrix;
 template<typename T> inline
 void cv::Affine3<T>::rotation(const cv::Mat& data)
 {
    CV_Assert(data.type() == cv::traits::Type<T>::value);
    CV_Assert(data.channels() == 1);

    if (data.cols == 3 && data.rows == 3)
    {
        Mat3 R;
        data.copyTo(R);
        rotation(R);
    }
    else if ((data.cols == 3 && data.rows == 1) || (data.cols == 1 && data.rows == 3))
    {
        Vec3 _rvec;
        data.reshape(1, 3).copyTo(_rvec);
        rotation(_rvec);
    }
    else
        CV_Error(Error::StsError, "Input matrix can only be 3x3, 1x3 or 3x1");
 }

 template<typename T> inline
 void cv::Affine3<T>::linear(const Mat3& L)
 {
    matrix.val[0] = L.val[0]; matrix.val[1] = L.val[1];  matrix.val[ 2] = L.val[2];
    matrix.val[4] = L.val[3]; matrix.val[5] = L.val[4];  matrix.val[ 6] = L.val[5];
    matrix.val[8] = L.val[6]; matrix.val[9] = L.val[7];  matrix.val[10] = L.val[8];
 }

 template<typename T> inline
 void cv::Affine3<T>::translation(const Vec3& t)
 {
    matrix.val[3] = t[0]; matrix.val[7] = t[1]; matrix.val[11] = t[2];
 }

 template<typename T> inline
 typename cv::Affine3<T>::Mat3 cv::Affine3<T>::rotation() const
 {
    return linear();
 }

 template<typename T> inline
 typename cv::Affine3<T>::Mat3 cv::Affine3<T>::linear() const
 {
    typename cv::Affine3<T>::Mat3 R;
    R.val[0] = matrix.val[0];  R.val[1] = matrix.val[1];  R.val[2] = matrix.val[ 2];
    R.val[3] = matrix.val[4];  R.val[4] = matrix.val[5];  R.val[5] = matrix.val[ 6];
    R.val[6] = matrix.val[8];  R.val[7] = matrix.val[9];  R.val[8] = matrix.val[10];
    return R;
 }

 template<typename T> inline
 typename cv::Affine3<T>::Vec3 cv::Affine3<T>::translation() const
 {
    return Vec3(matrix.val[3], matrix.val[7], matrix.val[11]);
 }

 template<typename T> inline
 typename cv::Affine3<T>::Vec3 cv::Affine3<T>::rvec() const
 {
    cv::Vec3d w;
    cv::Matx33d u, vt, R = rotation();
    cv::SVD::compute(R, w, u, vt, cv::SVD::FULL_UV + cv::SVD::MODIFY_A);
    R = u * vt;

    double rx = R.val[7] - R.val[5];
    double ry = R.val[2] - R.val[6];
    double rz = R.val[3] - R.val[1];

    double s = std::sqrt((rx*rx + ry*ry + rz*rz)*0.25);
    double c = (R.val[0] + R.val[4] + R.val[8] - 1) * 0.5;
    c = c > 1.0 ? 1.0 : c < -1.0 ? -1.0 : c;
    double theta = acos(c);

    if( s < 1e-5 )
    {
        if( c > 0 )
            rx = ry = rz = 0;
        else
        {
            double t;
            t = (R.val[0] + 1) * 0.5;
            rx = std::sqrt(std::max(t, 0.0));
            t = (R.val[4] + 1) * 0.5;
            ry = std::sqrt(std::max(t, 0.0)) * (R.val[1] < 0 ? -1.0 : 1.0);
            t = (R.val[8] + 1) * 0.5;
            rz = std::sqrt(std::max(t, 0.0)) * (R.val[2] < 0 ? -1.0 : 1.0);

            if( fabs(rx) < fabs(ry) && fabs(rx) < fabs(rz) && (R.val[5] > 0) != (ry*rz > 0) )
                rz = -rz;
            theta /= std::sqrt(rx*rx + ry*ry + rz*rz);
            rx *= theta;
            ry *= theta;
            rz *= theta;
        }
    }
    else
    {
        double vth = 1/(2*s);
        vth *= theta;
        rx *= vth; ry *= vth; rz *= vth;
    }

    return cv::Vec3d(rx, ry, rz);
 }

 template<typename T> inline
 cv::Affine3<T> cv::Affine3<T>::inv(int method) const
 {
    return matrix.inv(method);
 }

 template<typename T> inline
 cv::Affine3<T> cv::Affine3<T>::rotate(const Mat3& R) const
 {
    Mat3 Lc = linear();
    Vec3 tc = translation();
    Mat4 result;
    result.val[12] = result.val[13] = result.val[14] = 0;
    result.val[15] = 1;

    for(int j = 0; j < 3; ++j)
    {
        for(int i = 0; i < 3; ++i)
        {
            float_type value = 0;
            for(int k = 0; k < 3; ++k)
                value += R(j, k) * Lc(k, i);
            result(j, i) = value;
        }

        result(j, 3) = R.row(j).dot(tc.t());
    }
    return result;
 }

 template<typename T> inline
 cv::Affine3<T> cv::Affine3<T>::rotate(const Vec3& _rvec) const
 {
    return rotate(Affine3f(_rvec).rotation());
 }

 template<typename T> inline
 cv::Affine3<T> cv::Affine3<T>::translate(const Vec3& t) const
 {
    Mat4 m = matrix;
    m.val[ 3] += t[0];
    m.val[ 7] += t[1];
    m.val[11] += t[2];
    return m;
 }

 template<typename T> inline
 cv::Affine3<T> cv::Affine3<T>::concatenate(const Affine3<T>& affine) const
 {
    return (*this).rotate(affine.rotation()).translate(affine.translation());
 }

 template<typename T> template <typename Y> inline
 cv::Affine3<T>::operator Affine3<Y>() const
 {
    return Affine3<Y>(matrix);
 }

 template<typename T> template <typename Y> inline
 cv::Affine3<Y> cv::Affine3<T>::cast() const
 {
    return Affine3<Y>(matrix);
 }

 template<typename T> inline
 cv::Affine3<T> cv::operator*(const cv::Affine3<T>& affine1, const cv::Affine3<T>& affine2)
 {
    return affine2.concatenate(affine1);
 }

 template<typename T, typename V> inline
 V cv::operator*(const cv::Affine3<T>& affine, const V& v)
 {
    const typename Affine3<T>::Mat4& m = affine.matrix;

    V r;
    r.x = m.val[0] * v.x + m.val[1] * v.y + m.val[ 2] * v.z + m.val[ 3];
    r.y = m.val[4] * v.x + m.val[5] * v.y + m.val[ 6] * v.z + m.val[ 7];
    r.z = m.val[8] * v.x + m.val[9] * v.y + m.val[10] * v.z + m.val[11];
    return r;
 }

 static inline
 cv::Vec3f cv::operator*(const cv::Affine3f& affine, const cv::Vec3f& v)
 {
    const cv::Matx44f& m = affine.matrix;
    cv::Vec3f r;
    r.val[0] = m.val[0] * v[0] + m.val[1] * v[1] + m.val[ 2] * v[2] + m.val[ 3];
    r.val[1] = m.val[4] * v[0] + m.val[5] * v[1] + m.val[ 6] * v[2] + m.val[ 7];
    r.val[2] = m.val[8] * v[0] + m.val[9] * v[1] + m.val[10] * v[2] + m.val[11];
    return r;
 }

 static inline
 cv::Vec3d cv::operator*(const cv::Affine3d& affine, const cv::Vec3d& v)
 {
    const cv::Matx44d& m = affine.matrix;
    cv::Vec3d r;
    r.val[0] = m.val[0] * v[0] + m.val[1] * v[1] + m.val[ 2] * v[2] + m.val[ 3];
    r.val[1] = m.val[4] * v[0] + m.val[5] * v[1] + m.val[ 6] * v[2] + m.val[ 7];
    r.val[2] = m.val[8] * v[0] + m.val[9] * v[1] + m.val[10] * v[2] + m.val[11];
    return r;
 }



 #if defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H

 template<typename T> inline
 cv::Affine3<T>::Affine3(const Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>& affine)
 {
    cv::Mat(4, 4, cv::traits::Type<T>::value, affine.matrix().data()).copyTo(matrix);
 }

 template<typename T> inline
 cv::Affine3<T>::Affine3(const Eigen::Transform<T, 3, Eigen::Affine>& affine)
 {
    Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)> a = affine;
    cv::Mat(4, 4, cv::traits::Type<T>::value, a.matrix().data()).copyTo(matrix);
 }

 template<typename T> inline
 cv::Affine3<T>::operator Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>() const
 {
    Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)> r;
    cv::Mat hdr(4, 4, cv::traits::Type<T>::value, r.matrix().data());
    cv::Mat(matrix, false).copyTo(hdr);
    return r;
 }

 template<typename T> inline
 cv::Affine3<T>::operator Eigen::Transform<T, 3, Eigen::Affine>() const
 {
    return this->operator Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>();
 }

 #endif /* defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H */

 //! @endcond

 #endif /* __cplusplus */

 #endif /* OPENCV_CORE_AFFINE3_HPP */
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/async.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/async.hpp
@@ -0,0 +1,105 @@
 // This file is part of OpenCV project.
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.

 #ifndef OPENCV_CORE_ASYNC_HPP
 #define OPENCV_CORE_ASYNC_HPP

 #include <opencv2/core/mat.hpp>

 #ifdef CV_CXX11
 //#include <future>
 #include <chrono>
 #endif

 namespace cv {

 /** @addtogroup core_async

@{
 */


 /** @brief Returns result of asynchronous operations

 Object has attached asynchronous state.
 Assignment operator doesn't clone asynchronous state (it is shared between all instances).

 Result can be fetched via get() method only once.

 */
 class CV_EXPORTS_W AsyncArray
 {
 public:
    ~AsyncArray() CV_NOEXCEPT;
    CV_WRAP AsyncArray() CV_NOEXCEPT;
    AsyncArray(const AsyncArray& o) CV_NOEXCEPT;
    AsyncArray& operator=(const AsyncArray& o) CV_NOEXCEPT;
    CV_WRAP void release() CV_NOEXCEPT;

    /** Fetch the result.
    @param[out] dst destination array

    Waits for result until container has valid result.
    Throws exception if exception was stored as a result.

    Throws exception on invalid container state.

    @note Result or stored exception can be fetched only once.
    */
    CV_WRAP void get(OutputArray dst) const;

    /** Retrieving the result with timeout
    @param[out] dst destination array
    @param[in] timeoutNs timeout in nanoseconds, -1 for infinite wait

    @returns true if result is ready, false if the timeout has expired

    @note Result or stored exception can be fetched only once.
    */
    bool get(OutputArray dst, int64 timeoutNs) const;

    CV_WRAP inline
    bool get(OutputArray dst, double timeoutNs) const { return get(dst, (int64)timeoutNs); }

    bool wait_for(int64 timeoutNs) const;

    CV_WRAP inline
    bool wait_for(double timeoutNs) const { return wait_for((int64)timeoutNs); }

    CV_WRAP bool valid() const CV_NOEXCEPT;

 #ifdef CV_CXX11
    inline AsyncArray(AsyncArray&& o) { p = o.p; o.p = NULL; }
    inline AsyncArray& operator=(AsyncArray&& o) CV_NOEXCEPT { std::swap(p, o.p); return *this; }

    template<typename _Rep, typename _Period>
    inline bool get(OutputArray dst, const std::chrono::duration<_Rep, _Period>& timeout)
    {
        return get(dst, (int64)(std::chrono::nanoseconds(timeout).count()));
    }

    template<typename _Rep, typename _Period>
    inline bool wait_for(const std::chrono::duration<_Rep, _Period>& timeout)
    {
        return wait_for((int64)(std::chrono::nanoseconds(timeout).count()));
    }

 #if 0
    std::future<Mat> getFutureMat() const;
    std::future<UMat> getFutureUMat() const;
 #endif
 #endif


    // PImpl
    struct Impl; friend struct Impl;
    inline void* _getImpl() const CV_NOEXCEPT { return p; }
 protected:
    Impl* p;
 };


 //! @}
 } // namespace
 #endif // OPENCV_CORE_ASYNC_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/base.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/base.hpp
@@ -0,0 +1,654 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Copyright (C) 2014, Itseez Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CORE_BASE_HPP
 #define OPENCV_CORE_BASE_HPP

 #ifndef __cplusplus
 #  error base.hpp header must be compiled as C++
 #endif

 #include "opencv2/opencv_modules.hpp"

 #include <climits>
 #include <algorithm>

 #include "opencv2/core/cvdef.h"
 #include "opencv2/core/cvstd.hpp"

 namespace cv
 {

 //! @addtogroup core_utils
 //! @{

 namespace Error {
 //! error codes
 enum Code {
    StsOk=                       0,  //!< everything is ok
    StsBackTrace=               -1,  //!< pseudo error for back trace
    StsError=                   -2,  //!< unknown /unspecified error
    StsInternal=                -3,  //!< internal error (bad state)
    StsNoMem=                   -4,  //!< insufficient memory
    StsBadArg=                  -5,  //!< function arg/param is bad
    StsBadFunc=                 -6,  //!< unsupported function
    StsNoConv=                  -7,  //!< iteration didn't converge
    StsAutoTrace=               -8,  //!< tracing
    HeaderIsNull=               -9,  //!< image header is NULL
    BadImageSize=              -10,  //!< image size is invalid
    BadOffset=                 -11,  //!< offset is invalid
    BadDataPtr=                -12,  //!<
    BadStep=                   -13,  //!< image step is wrong, this may happen for a non-continuous matrix.
    BadModelOrChSeq=           -14,  //!<
    BadNumChannels=            -15,  //!< bad number of channels, for example, some functions accept only single channel matrices.
    BadNumChannel1U=           -16,  //!<
    BadDepth=                  -17,  //!< input image depth is not supported by the function
    BadAlphaChannel=           -18,  //!<
    BadOrder=                  -19,  //!< number of dimensions is out of range
    BadOrigin=                 -20,  //!< incorrect input origin
    BadAlign=                  -21,  //!< incorrect input align
    BadCallBack=               -22,  //!<
    BadTileSize=               -23,  //!<
    BadCOI=                    -24,  //!< input COI is not supported
    BadROISize=                -25,  //!< incorrect input roi
    MaskIsTiled=               -26,  //!<
    StsNullPtr=                -27,  //!< null pointer
    StsVecLengthErr=           -28,  //!< incorrect vector length
    StsFilterStructContentErr= -29,  //!< incorrect filter structure content
    StsKernelStructContentErr= -30,  //!< incorrect transform kernel content
    StsFilterOffsetErr=        -31,  //!< incorrect filter offset value
    StsBadSize=                -201, //!< the input/output structure size is incorrect
    StsDivByZero=              -202, //!< division by zero
    StsInplaceNotSupported=    -203, //!< in-place operation is not supported
    StsObjectNotFound=         -204, //!< request can't be completed
    StsUnmatchedFormats=       -205, //!< formats of input/output arrays differ
    StsBadFlag=                -206, //!< flag is wrong or not supported
    StsBadPoint=               -207, //!< bad CvPoint
    StsBadMask=                -208, //!< bad format of mask (neither 8uC1 nor 8sC1)
    StsUnmatchedSizes=         -209, //!< sizes of input/output structures do not match
    StsUnsupportedFormat=      -210, //!< the data format/type is not supported by the function
    StsOutOfRange=             -211, //!< some of parameters are out of range
    StsParseError=             -212, //!< invalid syntax/structure of the parsed file
    StsNotImplemented=         -213, //!< the requested function/feature is not implemented
    StsBadMemBlock=            -214, //!< an allocated block has been corrupted
    StsAssert=                 -215, //!< assertion failed
    GpuNotSupported=           -216, //!< no CUDA support
    GpuApiCallError=           -217, //!< GPU API call error
    OpenGlNotSupported=        -218, //!< no OpenGL support
    OpenGlApiCallError=        -219, //!< OpenGL API call error
    OpenCLApiCallError=        -220, //!< OpenCL API call error
    OpenCLDoubleNotSupported=  -221,
    OpenCLInitError=           -222, //!< OpenCL initialization error
    OpenCLNoAMDBlasFft=        -223
 };
 } //Error

 //! @} core_utils

 //! @addtogroup core_array
 //! @{

 //! matrix decomposition types
 enum DecompTypes {
    /** Gaussian elimination with the optimal pivot element chosen. */
    DECOMP_LU       = 0,
    /** singular value decomposition (SVD) method; the system can be over-defined and/or the matrix
    src1 can be singular */
    DECOMP_SVD      = 1,
    /** eigenvalue decomposition; the matrix src1 must be symmetrical */
    DECOMP_EIG      = 2,
    /** Cholesky \f$LL^T\f$ factorization; the matrix src1 must be symmetrical and positively
    defined */
    DECOMP_CHOLESKY = 3,
    /** QR factorization; the system can be over-defined and/or the matrix src1 can be singular */
    DECOMP_QR       = 4,
    /** while all the previous flags are mutually exclusive, this flag can be used together with
    any of the previous; it means that the normal equations
    \f$\texttt{src1}^T\cdot\texttt{src1}\cdot\texttt{dst}=\texttt{src1}^T\texttt{src2}\f$ are
    solved instead of the original system
    \f$\texttt{src1}\cdot\texttt{dst}=\texttt{src2}\f$ */
    DECOMP_NORMAL   = 16
 };

 /** norm types

 src1 and src2 denote input arrays.
 */

 enum NormTypes {
                /**
                \f[
                norm =  \forkthree
                {\|\texttt{src1}\|_{L_{\infty}} =  \max _I | \texttt{src1} (I)|}{if  \(\texttt{normType} = \texttt{NORM_INF}\) }
                {\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} =  \max _I | \texttt{src1} (I) -  \texttt{src2} (I)|}{if  \(\texttt{normType} = \texttt{NORM_INF}\) }
                {\frac{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}}    }{\|\texttt{src2}\|_{L_{\infty}} }}{if  \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_INF}\) }
                \f]
                */
                NORM_INF       = 1,
                /**
                \f[
                norm =  \forkthree
                {\| \texttt{src1} \| _{L_1} =  \sum _I | \texttt{src1} (I)|}{if  \(\texttt{normType} = \texttt{NORM_L1}\)}
                { \| \texttt{src1} - \texttt{src2} \| _{L_1} =  \sum _I | \texttt{src1} (I) -  \texttt{src2} (I)|}{if  \(\texttt{normType} = \texttt{NORM_L1}\) }
                { \frac{\|\texttt{src1}-\texttt{src2}\|_{L_1} }{\|\texttt{src2}\|_{L_1}} }{if  \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L1}\) }
                \f]*/
                 NORM_L1        = 2,
                 /**
                 \f[
                 norm =  \forkthree
                 { \| \texttt{src1} \| _{L_2} =  \sqrt{\sum_I \texttt{src1}(I)^2} }{if  \(\texttt{normType} = \texttt{NORM_L2}\) }
                 { \| \texttt{src1} - \texttt{src2} \| _{L_2} =  \sqrt{\sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2} }{if  \(\texttt{normType} = \texttt{NORM_L2}\) }
                 { \frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}} }{if  \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L2}\) }
                 \f]
                 */
                 NORM_L2        = 4,
                 /**
                 \f[
                 norm =  \forkthree
                 { \| \texttt{src1} \| _{L_2} ^{2} = \sum_I \texttt{src1}(I)^2} {if  \(\texttt{normType} = \texttt{NORM_L2SQR}\)}
                 { \| \texttt{src1} - \texttt{src2} \| _{L_2} ^{2} =  \sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2 }{if  \(\texttt{normType} = \texttt{NORM_L2SQR}\) }
                 { \left(\frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}}\right)^2 }{if  \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L2SQR}\) }
                 \f]
                 */
                 NORM_L2SQR     = 5,
                 /**
                 In the case of one input array, calculates the Hamming distance of the array from zero,
                 In the case of two input arrays, calculates the Hamming distance between the arrays.
                 */
                 NORM_HAMMING   = 6,
                 /**
                 Similar to NORM_HAMMING, but in the calculation, each two bits of the input sequence will
                 be added and treated as a single bit to be used in the same calculation as NORM_HAMMING.
                 */
                 NORM_HAMMING2  = 7,
                 NORM_TYPE_MASK = 7, //!< bit-mask which can be used to separate norm type from norm flags
                 NORM_RELATIVE  = 8, //!< flag
                 NORM_MINMAX    = 32 //!< flag
               };

 //! comparison types
 enum CmpTypes { CMP_EQ = 0, //!< src1 is equal to src2.
                CMP_GT = 1, //!< src1 is greater than src2.
                CMP_GE = 2, //!< src1 is greater than or equal to src2.
                CMP_LT = 3, //!< src1 is less than src2.
                CMP_LE = 4, //!< src1 is less than or equal to src2.
                CMP_NE = 5  //!< src1 is unequal to src2.
              };

 //! generalized matrix multiplication flags
 enum GemmFlags { GEMM_1_T = 1, //!< transposes src1
                 GEMM_2_T = 2, //!< transposes src2
                 GEMM_3_T = 4 //!< transposes src3
               };

 enum DftFlags {
    /** performs an inverse 1D or 2D transform instead of the default forward
        transform. */
    DFT_INVERSE        = 1,
    /** scales the result: divide it by the number of array elements. Normally, it is
        combined with DFT_INVERSE. */
    DFT_SCALE          = 2,
    /** performs a forward or inverse transform of every individual row of the input
        matrix; this flag enables you to transform multiple vectors simultaneously and can be used to
        decrease the overhead (which is sometimes several times larger than the processing itself) to
        perform 3D and higher-dimensional transformations and so forth.*/
    DFT_ROWS           = 4,
    /** performs a forward transformation of 1D or 2D real array; the result,
        though being a complex array, has complex-conjugate symmetry (*CCS*, see the function
        description below for details), and such an array can be packed into a real array of the same
        size as input, which is the fastest option and which is what the function does by default;
        however, you may wish to get a full complex array (for simpler spectrum analysis, and so on) -
        pass the flag to enable the function to produce a full-size complex output array. */
    DFT_COMPLEX_OUTPUT = 16,
    /** performs an inverse transformation of a 1D or 2D complex array; the
        result is normally a complex array of the same size, however, if the input array has
        conjugate-complex symmetry (for example, it is a result of forward transformation with
        DFT_COMPLEX_OUTPUT flag), the output is a real array; while the function itself does not
        check whether the input is symmetrical or not, you can pass the flag and then the function
        will assume the symmetry and produce the real output array (note that when the input is packed
        into a real array and inverse transformation is executed, the function treats the input as a
        packed complex-conjugate symmetrical array, and the output will also be a real array). */
    DFT_REAL_OUTPUT    = 32,
    /** specifies that input is complex input. If this flag is set, the input must have 2 channels.
        On the other hand, for backwards compatibility reason, if input has 2 channels, input is
        already considered complex. */
    DFT_COMPLEX_INPUT  = 64,
    /** performs an inverse 1D or 2D transform instead of the default forward transform. */
    DCT_INVERSE        = DFT_INVERSE,
    /** performs a forward or inverse transform of every individual row of the input
        matrix. This flag enables you to transform multiple vectors simultaneously and can be used to
        decrease the overhead (which is sometimes several times larger than the processing itself) to
        perform 3D and higher-dimensional transforms and so forth.*/
    DCT_ROWS           = DFT_ROWS
 };

 //! Various border types, image boundaries are denoted with `|`
 //! @see borderInterpolate, copyMakeBorder
 enum BorderTypes {
    BORDER_CONSTANT    = 0, //!< `iiiiii|abcdefgh|iiiiiii`  with some specified `i`
    BORDER_REPLICATE   = 1, //!< `aaaaaa|abcdefgh|hhhhhhh`
    BORDER_REFLECT     = 2, //!< `fedcba|abcdefgh|hgfedcb`
    BORDER_WRAP        = 3, //!< `cdefgh|abcdefgh|abcdefg`
    BORDER_REFLECT_101 = 4, //!< `gfedcb|abcdefgh|gfedcba`
    BORDER_TRANSPARENT = 5, //!< `uvwxyz|abcdefgh|ijklmno`

    BORDER_REFLECT101  = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101
    BORDER_DEFAULT     = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101
    BORDER_ISOLATED    = 16 //!< do not look outside of ROI
 };

 //! @} core_array

 //! @addtogroup core_utils
 //! @{

 /*! @brief Signals an error and raises the exception.

 By default the function prints information about the error to stderr,
 then it either stops if setBreakOnError() had been called before or raises the exception.
 It is possible to alternate error processing by using redirectError().
@param _code - error code (Error::Code)
@param _err - error description
@param _func - function name. Available only when the compiler supports getting it
@param _file - source file name where the error has occurred
@param _line - line number in the source file where the error has occurred
@see CV_Error, CV_Error_, CV_Assert, CV_DbgAssert
 */
 CV_EXPORTS CV_NORETURN void error(int _code, const String& _err, const char* _func, const char* _file, int _line);

 #ifdef CV_STATIC_ANALYSIS

 // In practice, some macro are not processed correctly (noreturn is not detected).
 // We need to use simplified definition for them.
 #define CV_Error(code, msg) do { (void)(code); (void)(msg); abort(); } while (0)
 #define CV_Error_(code, args) do { (void)(code); (void)(cv::format args); abort(); } while (0)
 #define CV_Assert( expr ) do { if (!(expr)) abort(); } while (0)

 #else // CV_STATIC_ANALYSIS

 /** @brief Call the error handler.

 Currently, the error handler prints the error code and the error message to the standard
 error stream `stderr`. In the Debug configuration, it then provokes memory access violation, so that
 the execution stack and all the parameters can be analyzed by the debugger. In the Release
 configuration, the exception is thrown.

@param code one of Error::Code
@param msg error message
 */
 #define CV_Error( code, msg ) cv::error( code, msg, CV_Func, __FILE__, __LINE__ )

 /**  @brief Call the error handler.

 This macro can be used to construct an error message on-fly to include some dynamic information,
 for example:
@code
    // note the extra parentheses around the formatted text message
    CV_Error_(Error::StsOutOfRange,
    ("the value at (%d, %d)=%g is out of range", badPt.x, badPt.y, badValue));
@endcode
@param code one of Error::Code
@param args printf-like formatted error message in parentheses
 */
 #define CV_Error_( code, args ) cv::error( code, cv::format args, CV_Func, __FILE__, __LINE__ )

 /** @brief Checks a condition at runtime and throws exception if it fails

 The macros CV_Assert (and CV_DbgAssert(expr)) evaluate the specified expression. If it is 0, the macros
 raise an error (see cv::error). The macro CV_Assert checks the condition in both Debug and Release
 configurations while CV_DbgAssert is only retained in the Debug configuration.
 */
 #define CV_Assert( expr ) do { if(!!(expr)) ; else cv::error( cv::Error::StsAssert, #expr, CV_Func, __FILE__, __LINE__ ); } while(0)

 #endif // CV_STATIC_ANALYSIS

 //! @cond IGNORED
 #if !defined(__OPENCV_BUILD)  // TODO: backward compatibility only
 #ifndef CV_ErrorNoReturn
 #define CV_ErrorNoReturn CV_Error
 #endif
 #ifndef CV_ErrorNoReturn_
 #define CV_ErrorNoReturn_ CV_Error_
 #endif
 #endif

 #define CV_Assert_1 CV_Assert
 #define CV_Assert_2( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_1( __VA_ARGS__ ))
 #define CV_Assert_3( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_2( __VA_ARGS__ ))
 #define CV_Assert_4( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_3( __VA_ARGS__ ))
 #define CV_Assert_5( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_4( __VA_ARGS__ ))
 #define CV_Assert_6( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_5( __VA_ARGS__ ))
 #define CV_Assert_7( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_6( __VA_ARGS__ ))
 #define CV_Assert_8( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_7( __VA_ARGS__ ))
 #define CV_Assert_9( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_8( __VA_ARGS__ ))
 #define CV_Assert_10( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_9( __VA_ARGS__ ))

 #define CV_Assert_N(...) do { __CV_EXPAND(__CV_CAT(CV_Assert_, __CV_VA_NUM_ARGS(__VA_ARGS__)) (__VA_ARGS__)); } while(0)

 //! @endcond

 #if defined _DEBUG || defined CV_STATIC_ANALYSIS
 #  define CV_DbgAssert(expr) CV_Assert(expr)
 #else
 /** replaced with CV_Assert(expr) in Debug configuration */
 #  define CV_DbgAssert(expr)
 #endif

 /*
 * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
 * bit count of A exclusive XOR'ed with B
 */
 struct CV_EXPORTS Hamming
 {
    static const NormTypes normType = NORM_HAMMING;
    typedef unsigned char ValueType;
    typedef int ResultType;

    /** this will count the bits in a ^ b
     */
    ResultType operator()( const unsigned char* a, const unsigned char* b, int size ) const;
 };

 typedef Hamming HammingLUT;

 /////////////////////////////////// inline norms ////////////////////////////////////

 template<typename _Tp> inline _Tp cv_abs(_Tp x) { return std::abs(x); }
 inline int cv_abs(uchar x) { return x; }
 inline int cv_abs(schar x) { return std::abs(x); }
 inline int cv_abs(ushort x) { return x; }
 inline int cv_abs(short x) { return std::abs(x); }

 template<typename _Tp, typename _AccTp> static inline
 _AccTp normL2Sqr(const _Tp* a, int n)
 {
    _AccTp s = 0;
    int i=0;
 #if CV_ENABLE_UNROLLED
    for( ; i <= n - 4; i += 4 )
    {
        _AccTp v0 = a[i], v1 = a[i+1], v2 = a[i+2], v3 = a[i+3];
        s += v0*v0 + v1*v1 + v2*v2 + v3*v3;
    }
 #endif
    for( ; i < n; i++ )
    {
        _AccTp v = a[i];
        s += v*v;
    }
    return s;
 }

 template<typename _Tp, typename _AccTp> static inline
 _AccTp normL1(const _Tp* a, int n)
 {
    _AccTp s = 0;
    int i = 0;
 #if CV_ENABLE_UNROLLED
    for(; i <= n - 4; i += 4 )
    {
        s += (_AccTp)cv_abs(a[i]) + (_AccTp)cv_abs(a[i+1]) +
            (_AccTp)cv_abs(a[i+2]) + (_AccTp)cv_abs(a[i+3]);
    }
 #endif
    for( ; i < n; i++ )
        s += cv_abs(a[i]);
    return s;
 }

 template<typename _Tp, typename _AccTp> static inline
 _AccTp normInf(const _Tp* a, int n)
 {
    _AccTp s = 0;
    for( int i = 0; i < n; i++ )
        s = std::max(s, (_AccTp)cv_abs(a[i]));
    return s;
 }

 template<typename _Tp, typename _AccTp> static inline
 _AccTp normL2Sqr(const _Tp* a, const _Tp* b, int n)
 {
    _AccTp s = 0;
    int i= 0;
 #if CV_ENABLE_UNROLLED
    for(; i <= n - 4; i += 4 )
    {
        _AccTp v0 = _AccTp(a[i] - b[i]), v1 = _AccTp(a[i+1] - b[i+1]), v2 = _AccTp(a[i+2] - b[i+2]), v3 = _AccTp(a[i+3] - b[i+3]);
        s += v0*v0 + v1*v1 + v2*v2 + v3*v3;
    }
 #endif
    for( ; i < n; i++ )
    {
        _AccTp v = _AccTp(a[i] - b[i]);
        s += v*v;
    }
    return s;
 }

 static inline float normL2Sqr(const float* a, const float* b, int n)
 {
    float s = 0.f;
    for( int i = 0; i < n; i++ )
    {
        float v = a[i] - b[i];
        s += v*v;
    }
    return s;
 }

 template<typename _Tp, typename _AccTp> static inline
 _AccTp normL1(const _Tp* a, const _Tp* b, int n)
 {
    _AccTp s = 0;
    int i= 0;
 #if CV_ENABLE_UNROLLED
    for(; i <= n - 4; i += 4 )
    {
        _AccTp v0 = _AccTp(a[i] - b[i]), v1 = _AccTp(a[i+1] - b[i+1]), v2 = _AccTp(a[i+2] - b[i+2]), v3 = _AccTp(a[i+3] - b[i+3]);
        s += std::abs(v0) + std::abs(v1) + std::abs(v2) + std::abs(v3);
    }
 #endif
    for( ; i < n; i++ )
    {
        _AccTp v = _AccTp(a[i] - b[i]);
        s += std::abs(v);
    }
    return s;
 }

 inline float normL1(const float* a, const float* b, int n)
 {
    float s = 0.f;
    for( int i = 0; i < n; i++ )
    {
        s += std::abs(a[i] - b[i]);
    }
    return s;
 }

 inline int normL1(const uchar* a, const uchar* b, int n)
 {
    int s = 0;
    for( int i = 0; i < n; i++ )
    {
        s += std::abs(a[i] - b[i]);
    }
    return s;
 }

 template<typename _Tp, typename _AccTp> static inline
 _AccTp normInf(const _Tp* a, const _Tp* b, int n)
 {
    _AccTp s = 0;
    for( int i = 0; i < n; i++ )
    {
        _AccTp v0 = a[i] - b[i];
        s = std::max(s, std::abs(v0));
    }
    return s;
 }

 /** @brief Computes the cube root of an argument.

 The function cubeRoot computes \f$\sqrt[3]{\texttt{val}}\f$. Negative arguments are handled correctly.
 NaN and Inf are not handled. The accuracy approaches the maximum possible accuracy for
 single-precision data.
 @param val A function argument.
 */
 CV_EXPORTS_W float cubeRoot(float val);

 /** @brief Calculates the angle of a 2D vector in degrees.

 The function fastAtan2 calculates the full-range angle of an input 2D vector. The angle is measured
 in degrees and varies from 0 to 360 degrees. The accuracy is about 0.3 degrees.
 @param x x-coordinate of the vector.
 @param y y-coordinate of the vector.
 */
 CV_EXPORTS_W float fastAtan2(float y, float x);

 /** proxy for hal::LU */
 CV_EXPORTS int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n);
 /** proxy for hal::LU */
 CV_EXPORTS int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n);
 /** proxy for hal::Cholesky */
 CV_EXPORTS bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n);
 /** proxy for hal::Cholesky */
 CV_EXPORTS bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n);

 ////////////////// forward declarations for important OpenCV types //////////////////

 //! @cond IGNORED

 template<typename _Tp, int cn> class Vec;
 template<typename _Tp, int m, int n> class Matx;

 template<typename _Tp> class Complex;
 template<typename _Tp> class Point_;
 template<typename _Tp> class Point3_;
 template<typename _Tp> class Size_;
 template<typename _Tp> class Rect_;
 template<typename _Tp> class Scalar_;

 class CV_EXPORTS RotatedRect;
 class CV_EXPORTS Range;
 class CV_EXPORTS TermCriteria;
 class CV_EXPORTS KeyPoint;
 class CV_EXPORTS DMatch;
 class CV_EXPORTS RNG;

 class CV_EXPORTS Mat;
 class CV_EXPORTS MatExpr;

 class CV_EXPORTS UMat;

 class CV_EXPORTS SparseMat;
 typedef Mat MatND;

 template<typename _Tp> class Mat_;
 template<typename _Tp> class SparseMat_;

 class CV_EXPORTS MatConstIterator;
 class CV_EXPORTS SparseMatIterator;
 class CV_EXPORTS SparseMatConstIterator;
 template<typename _Tp> class MatIterator_;
 template<typename _Tp> class MatConstIterator_;
 template<typename _Tp> class SparseMatIterator_;
 template<typename _Tp> class SparseMatConstIterator_;

 namespace ogl
 {
    class CV_EXPORTS Buffer;
    class CV_EXPORTS Texture2D;
    class CV_EXPORTS Arrays;
 }

 namespace cuda
 {
    class CV_EXPORTS GpuMat;
    class CV_EXPORTS HostMem;
    class CV_EXPORTS Stream;
    class CV_EXPORTS Event;
 }

 namespace cudev
 {
    template <typename _Tp> class GpuMat_;
 }

 namespace ipp
 {
 CV_EXPORTS   unsigned long long getIppFeatures();
 CV_EXPORTS   void setIppStatus(int status, const char * const funcname = NULL, const char * const filename = NULL,
                             int line = 0);
 CV_EXPORTS   int getIppStatus();
 CV_EXPORTS   String getIppErrorLocation();
 CV_EXPORTS_W bool   useIPP();
 CV_EXPORTS_W void   setUseIPP(bool flag);
 CV_EXPORTS_W String getIppVersion();

 // IPP Not-Exact mode. This function may force use of IPP then both IPP and OpenCV provide proper results
 // but have internal accuracy differences which have too much direct or indirect impact on accuracy tests.
 CV_EXPORTS_W bool useIPP_NotExact();
 CV_EXPORTS_W void setUseIPP_NotExact(bool flag);
 #ifndef DISABLE_OPENCV_3_COMPATIBILITY
 static inline bool useIPP_NE() { return useIPP_NotExact(); }
 static inline void setUseIPP_NE(bool flag) { setUseIPP_NotExact(flag); }
 #endif

 } // ipp

 //! @endcond

 //! @} core_utils




 } // cv

 #include "opencv2/core/neon_utils.hpp"
 #include "opencv2/core/vsx_utils.hpp"
 #include "opencv2/core/check.hpp"

 #endif //OPENCV_CORE_BASE_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/bindings_utils.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/bindings_utils.hpp
@@ -0,0 +1,87 @@
 // This file is part of OpenCV project.
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.

 #ifndef OPENCV_CORE_BINDINGS_UTILS_HPP
 #define OPENCV_CORE_BINDINGS_UTILS_HPP

 #include <opencv2/core/async.hpp>
 #include <opencv2/core/detail/async_promise.hpp>

 namespace cv { namespace utils {
 //! @addtogroup core_utils
 //! @{

 CV_EXPORTS_W String dumpInputArray(InputArray argument);

 CV_EXPORTS_W String dumpInputArrayOfArrays(InputArrayOfArrays argument);

 CV_EXPORTS_W String dumpInputOutputArray(InputOutputArray argument);

 CV_EXPORTS_W String dumpInputOutputArrayOfArrays(InputOutputArrayOfArrays argument);

 CV_WRAP static inline
 String dumpBool(bool argument)
 {
    return (argument) ? String("Bool: True") : String("Bool: False");
 }

 CV_WRAP static inline
 String dumpInt(int argument)
 {
    return cv::format("Int: %d", argument);
 }

 CV_WRAP static inline
 String dumpSizeT(size_t argument)
 {
    std::ostringstream oss("size_t: ", std::ios::ate);
    oss << argument;
    return oss.str();
 }

 CV_WRAP static inline
 String dumpFloat(float argument)
 {
    return cv::format("Float: %.2f", argument);
 }

 CV_WRAP static inline
 String dumpDouble(double argument)
 {
    return cv::format("Double: %.2f", argument);
 }

 CV_WRAP static inline
 String dumpCString(const char* argument)
 {
    return cv::format("String: %s", argument);
 }

 CV_WRAP static inline
 AsyncArray testAsyncArray(InputArray argument)
 {
    AsyncPromise p;
    p.setValue(argument);
    return p.getArrayResult();
 }

 CV_WRAP static inline
 AsyncArray testAsyncException()
 {
    AsyncPromise p;
    try
    {
        CV_Error(Error::StsOk, "Test: Generated async error");
    }
    catch (const cv::Exception& e)
    {
        p.setException(e);
    }
    return p.getArrayResult();
 }

 //! @}
 }} // namespace

 #endif // OPENCV_CORE_BINDINGS_UTILS_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/bufferpool.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/bufferpool.hpp
@@ -0,0 +1,40 @@
 // This file is part of OpenCV project.
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
 // Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.

 #ifndef OPENCV_CORE_BUFFER_POOL_HPP
 #define OPENCV_CORE_BUFFER_POOL_HPP

 #ifdef _MSC_VER
 #pragma warning(push)
 #pragma warning(disable: 4265)
 #endif

 namespace cv
 {

 //! @addtogroup core
 //! @{

 class BufferPoolController
 {
 protected:
    ~BufferPoolController() { }
 public:
    virtual size_t getReservedSize() const = 0;
    virtual size_t getMaxReservedSize() const = 0;
    virtual void setMaxReservedSize(size_t size) = 0;
    virtual void freeAllReservedBuffers() = 0;
 };

 //! @}

 }

 #ifdef _MSC_VER
 #pragma warning(pop)
 #endif

 #endif // OPENCV_CORE_BUFFER_POOL_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/check.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/check.hpp
@@ -0,0 +1,160 @@
 // This file is part of OpenCV project.
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.

 #ifndef OPENCV_CORE_CHECK_HPP
 #define OPENCV_CORE_CHECK_HPP

 #include <opencv2/core/base.hpp>

 namespace cv {

 /** Returns string of cv::Mat depth value: CV_8U -> "CV_8U" or "<invalid depth>" */
 CV_EXPORTS const char* depthToString(int depth);

 /** Returns string of cv::Mat depth value: CV_8UC3 -> "CV_8UC3" or "<invalid type>" */
 CV_EXPORTS const String typeToString(int type);


 //! @cond IGNORED
 namespace detail {

 /** Returns string of cv::Mat depth value: CV_8U -> "CV_8U" or NULL */
 CV_EXPORTS const char* depthToString_(int depth);

 /** Returns string of cv::Mat depth value: CV_8UC3 -> "CV_8UC3" or cv::String() */
 CV_EXPORTS const cv::String typeToString_(int type);

 enum TestOp {
  TEST_CUSTOM = 0,
  TEST_EQ = 1,
  TEST_NE = 2,
  TEST_LE = 3,
  TEST_LT = 4,
  TEST_GE = 5,
  TEST_GT = 6,
  CV__LAST_TEST_OP
 };

 struct CheckContext {
    const char* func;
    const char* file;
    int line;
    enum TestOp testOp;
    const char* message;
    const char* p1_str;
    const char* p2_str;
 };

 #ifndef CV__CHECK_FILENAME
 # define CV__CHECK_FILENAME __FILE__
 #endif

 #ifndef CV__CHECK_FUNCTION
 # if defined _MSC_VER
 #   define CV__CHECK_FUNCTION __FUNCSIG__
 # elif defined __GNUC__
 #   define CV__CHECK_FUNCTION __PRETTY_FUNCTION__
 # else
 #   define CV__CHECK_FUNCTION "<unknown>"
 # endif
 #endif

 #define CV__CHECK_LOCATION_VARNAME(id) CVAUX_CONCAT(CVAUX_CONCAT(__cv_check_, id), __LINE__)
 #define CV__DEFINE_CHECK_CONTEXT(id, message, testOp, p1_str, p2_str) \
    static const cv::detail::CheckContext CV__CHECK_LOCATION_VARNAME(id) = \
            { CV__CHECK_FUNCTION, CV__CHECK_FILENAME, __LINE__, testOp, message, p1_str, p2_str }

 CV_EXPORTS void CV_NORETURN check_failed_auto(const int v1, const int v2, const CheckContext& ctx);
 CV_EXPORTS void CV_NORETURN check_failed_auto(const size_t v1, const size_t v2, const CheckContext& ctx);
 CV_EXPORTS void CV_NORETURN check_failed_auto(const float v1, const float v2, const CheckContext& ctx);
 CV_EXPORTS void CV_NORETURN check_failed_auto(const double v1, const double v2, const CheckContext& ctx);
 CV_EXPORTS void CV_NORETURN check_failed_auto(const Size_<int> v1, const Size_<int> v2, const CheckContext& ctx);
 CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v1, const int v2, const CheckContext& ctx);
 CV_EXPORTS void CV_NORETURN check_failed_MatType(const int v1, const int v2, const CheckContext& ctx);
 CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v1, const int v2, const CheckContext& ctx);

 CV_EXPORTS void CV_NORETURN check_failed_auto(const int v, const CheckContext& ctx);
 CV_EXPORTS void CV_NORETURN check_failed_auto(const size_t v, const CheckContext& ctx);
 CV_EXPORTS void CV_NORETURN check_failed_auto(const float v, const CheckContext& ctx);
 CV_EXPORTS void CV_NORETURN check_failed_auto(const double v, const CheckContext& ctx);
 CV_EXPORTS void CV_NORETURN check_failed_auto(const Size_<int> v, const CheckContext& ctx);
 CV_EXPORTS void CV_NORETURN check_failed_auto(const std::string& v1, const CheckContext& ctx);
 CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v, const CheckContext& ctx);
 CV_EXPORTS void CV_NORETURN check_failed_MatType(const int v, const CheckContext& ctx);
 CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v, const CheckContext& ctx);


 #define CV__TEST_EQ(v1, v2) ((v1) == (v2))
 #define CV__TEST_NE(v1, v2) ((v1) != (v2))
 #define CV__TEST_LE(v1, v2) ((v1) <= (v2))
 #define CV__TEST_LT(v1, v2) ((v1) < (v2))
 #define CV__TEST_GE(v1, v2) ((v1) >= (v2))
 #define CV__TEST_GT(v1, v2) ((v1) > (v2))

 #define CV__CHECK(id, op, type, v1, v2, v1_str, v2_str, msg_str) do { \
    if(CV__TEST_##op((v1), (v2))) ; else { \
        CV__DEFINE_CHECK_CONTEXT(id, msg_str, cv::detail::TEST_ ## op, v1_str, v2_str); \
        cv::detail::check_failed_ ## type((v1), (v2), CV__CHECK_LOCATION_VARNAME(id)); \
    } \
 } while (0)

 #define CV__CHECK_CUSTOM_TEST(id, type, v, test_expr, v_str, test_expr_str, msg_str) do { \
    if(!!(test_expr)) ; else { \
        CV__DEFINE_CHECK_CONTEXT(id, msg_str, cv::detail::TEST_CUSTOM, v_str, test_expr_str); \
        cv::detail::check_failed_ ## type((v), CV__CHECK_LOCATION_VARNAME(id)); \
    } \
 } while (0)

 } // namespace
 //! @endcond


 /// Supported values of these types: int, float, double
 #define CV_CheckEQ(v1, v2, msg)  CV__CHECK(_, EQ, auto, v1, v2, #v1, #v2, msg)
 #define CV_CheckNE(v1, v2, msg)  CV__CHECK(_, NE, auto, v1, v2, #v1, #v2, msg)
 #define CV_CheckLE(v1, v2, msg)  CV__CHECK(_, LE, auto, v1, v2, #v1, #v2, msg)
 #define CV_CheckLT(v1, v2, msg)  CV__CHECK(_, LT, auto, v1, v2, #v1, #v2, msg)
 #define CV_CheckGE(v1, v2, msg)  CV__CHECK(_, GE, auto, v1, v2, #v1, #v2, msg)
 #define CV_CheckGT(v1, v2, msg)  CV__CHECK(_, GT, auto, v1, v2, #v1, #v2, msg)

 /// Check with additional "decoding" of type values in error message
 #define CV_CheckTypeEQ(t1, t2, msg)  CV__CHECK(_, EQ, MatType, t1, t2, #t1, #t2, msg)
 /// Check with additional "decoding" of depth values in error message
 #define CV_CheckDepthEQ(d1, d2, msg)  CV__CHECK(_, EQ, MatDepth, d1, d2, #d1, #d2, msg)

 #define CV_CheckChannelsEQ(c1, c2, msg)  CV__CHECK(_, EQ, MatChannels, c1, c2, #c1, #c2, msg)

 /// Example: type == CV_8UC1 || type == CV_8UC3
 #define CV_CheckType(t, test_expr, msg)  CV__CHECK_CUSTOM_TEST(_, MatType, t, (test_expr), #t, #test_expr, msg)

 /// Example: depth == CV_32F || depth == CV_64F
 #define CV_CheckDepth(t, test_expr, msg)  CV__CHECK_CUSTOM_TEST(_, MatDepth, t, (test_expr), #t, #test_expr, msg)

 /// Example: v == A || v == B
 #define CV_Check(v, test_expr, msg)  CV__CHECK_CUSTOM_TEST(_, auto, v, (test_expr), #v, #test_expr, msg)

 /// Some complex conditions: CV_Check(src2, src2.empty() || (src2.type() == src1.type() && src2.size() == src1.size()), "src2 should have same size/type as src1")
 // TODO define pretty-printers

 #ifndef NDEBUG
 #define CV_DbgCheck(v, test_expr, msg)  CV__CHECK_CUSTOM_TEST(_, auto, v, (test_expr), #v, #test_expr, msg)
 #define CV_DbgCheckEQ(v1, v2, msg)  CV__CHECK(_, EQ, auto, v1, v2, #v1, #v2, msg)
 #define CV_DbgCheckNE(v1, v2, msg)  CV__CHECK(_, NE, auto, v1, v2, #v1, #v2, msg)
 #define CV_DbgCheckLE(v1, v2, msg)  CV__CHECK(_, LE, auto, v1, v2, #v1, #v2, msg)
 #define CV_DbgCheckLT(v1, v2, msg)  CV__CHECK(_, LT, auto, v1, v2, #v1, #v2, msg)
 #define CV_DbgCheckGE(v1, v2, msg)  CV__CHECK(_, GE, auto, v1, v2, #v1, #v2, msg)
 #define CV_DbgCheckGT(v1, v2, msg)  CV__CHECK(_, GT, auto, v1, v2, #v1, #v2, msg)
 #else
 #define CV_DbgCheck(v, test_expr, msg)  do { } while (0)
 #define CV_DbgCheckEQ(v1, v2, msg)  do { } while (0)
 #define CV_DbgCheckNE(v1, v2, msg)  do { } while (0)
 #define CV_DbgCheckLE(v1, v2, msg)  do { } while (0)
 #define CV_DbgCheckLT(v1, v2, msg)  do { } while (0)
 #define CV_DbgCheckGE(v1, v2, msg)  do { } while (0)
 #define CV_DbgCheckGT(v1, v2, msg)  do { } while (0)
 #endif

 } // namespace

 #endif // OPENCV_CORE_CHECK_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/core.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/core.hpp
@@ -0,0 +1,48 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifdef __OPENCV_BUILD
 #error this is a compatibility header which should not be used inside the OpenCV library
 #endif

 #include "opencv2/core.hpp"
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/core_c.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/core_c.h
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda.hpp
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda.inl.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda.inl.hpp
@@ -0,0 +1,637 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CORE_CUDAINL_HPP
 #define OPENCV_CORE_CUDAINL_HPP

 #include "opencv2/core/cuda.hpp"

 //! @cond IGNORED

 namespace cv { namespace cuda {

 //===================================================================================
 // GpuMat
 //===================================================================================

 inline
 GpuMat::GpuMat(Allocator* allocator_)
    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
 {}

 inline
 GpuMat::GpuMat(int rows_, int cols_, int type_, Allocator* allocator_)
    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
 {
    if (rows_ > 0 && cols_ > 0)
        create(rows_, cols_, type_);
 }

 inline
 GpuMat::GpuMat(Size size_, int type_, Allocator* allocator_)
    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
 {
    if (size_.height > 0 && size_.width > 0)
        create(size_.height, size_.width, type_);
 }

 inline
 GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_, Allocator* allocator_)
    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
 {
    if (rows_ > 0 && cols_ > 0)
    {
        create(rows_, cols_, type_);
        setTo(s_);
    }
 }

 inline
 GpuMat::GpuMat(Size size_, int type_, Scalar s_, Allocator* allocator_)
    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
 {
    if (size_.height > 0 && size_.width > 0)
    {
        create(size_.height, size_.width, type_);
        setTo(s_);
    }
 }

 inline
 GpuMat::GpuMat(const GpuMat& m)
    : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), allocator(m.allocator)
 {
    if (refcount)
        CV_XADD(refcount, 1);
 }

 inline
 GpuMat::GpuMat(InputArray arr, Allocator* allocator_) :
    flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
 {
    upload(arr);
 }

 inline
 GpuMat::~GpuMat()
 {
    release();
 }

 inline
 GpuMat& GpuMat::operator =(const GpuMat& m)
 {
    if (this != &m)
    {
        GpuMat temp(m);
        swap(temp);
    }

    return *this;
 }

 inline
 void GpuMat::create(Size size_, int type_)
 {
    create(size_.height, size_.width, type_);
 }

 inline
 void GpuMat::swap(GpuMat& b)
 {
    std::swap(flags, b.flags);
    std::swap(rows, b.rows);
    std::swap(cols, b.cols);
    std::swap(step, b.step);
    std::swap(data, b.data);
    std::swap(datastart, b.datastart);
    std::swap(dataend, b.dataend);
    std::swap(refcount, b.refcount);
    std::swap(allocator, b.allocator);
 }

 inline
 GpuMat GpuMat::clone() const
 {
    GpuMat m;
    copyTo(m);
    return m;
 }

 inline
 void GpuMat::copyTo(OutputArray dst, InputArray mask) const
 {
    copyTo(dst, mask, Stream::Null());
 }

 inline
 GpuMat& GpuMat::setTo(Scalar s)
 {
    return setTo(s, Stream::Null());
 }

 inline
 GpuMat& GpuMat::setTo(Scalar s, InputArray mask)
 {
    return setTo(s, mask, Stream::Null());
 }

 inline
 void GpuMat::convertTo(OutputArray dst, int rtype) const
 {
    convertTo(dst, rtype, Stream::Null());
 }

 inline
 void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, double beta) const
 {
    convertTo(dst, rtype, alpha, beta, Stream::Null());
 }

 inline
 void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const
 {
    convertTo(dst, rtype, alpha, 0.0, stream);
 }

 inline
 void GpuMat::assignTo(GpuMat& m, int _type) const
 {
    if (_type < 0)
        m = *this;
    else
        convertTo(m, _type);
 }

 inline
 uchar* GpuMat::ptr(int y)
 {
    CV_DbgAssert( (unsigned)y < (unsigned)rows );
    return data + step * y;
 }

 inline
 const uchar* GpuMat::ptr(int y) const
 {
    CV_DbgAssert( (unsigned)y < (unsigned)rows );
    return data + step * y;
 }

 template<typename _Tp> inline
 _Tp* GpuMat::ptr(int y)
 {
    return (_Tp*)ptr(y);
 }

 template<typename _Tp> inline
 const _Tp* GpuMat::ptr(int y) const
 {
    return (const _Tp*)ptr(y);
 }

 template <class T> inline
 GpuMat::operator PtrStepSz<T>() const
 {
    return PtrStepSz<T>(rows, cols, (T*)data, step);
 }

 template <class T> inline
 GpuMat::operator PtrStep<T>() const
 {
    return PtrStep<T>((T*)data, step);
 }

 inline
 GpuMat GpuMat::row(int y) const
 {
    return GpuMat(*this, Range(y, y+1), Range::all());
 }

 inline
 GpuMat GpuMat::col(int x) const
 {
    return GpuMat(*this, Range::all(), Range(x, x+1));
 }

 inline
 GpuMat GpuMat::rowRange(int startrow, int endrow) const
 {
    return GpuMat(*this, Range(startrow, endrow), Range::all());
 }

 inline
 GpuMat GpuMat::rowRange(Range r) const
 {
    return GpuMat(*this, r, Range::all());
 }

 inline
 GpuMat GpuMat::colRange(int startcol, int endcol) const
 {
    return GpuMat(*this, Range::all(), Range(startcol, endcol));
 }

 inline
 GpuMat GpuMat::colRange(Range r) const
 {
    return GpuMat(*this, Range::all(), r);
 }

 inline
 GpuMat GpuMat::operator ()(Range rowRange_, Range colRange_) const
 {
    return GpuMat(*this, rowRange_, colRange_);
 }

 inline
 GpuMat GpuMat::operator ()(Rect roi) const
 {
    return GpuMat(*this, roi);
 }

 inline
 bool GpuMat::isContinuous() const
 {
    return (flags & Mat::CONTINUOUS_FLAG) != 0;
 }

 inline
 size_t GpuMat::elemSize() const
 {
    return CV_ELEM_SIZE(flags);
 }

 inline
 size_t GpuMat::elemSize1() const
 {
    return CV_ELEM_SIZE1(flags);
 }

 inline
 int GpuMat::type() const
 {
    return CV_MAT_TYPE(flags);
 }

 inline
 int GpuMat::depth() const
 {
    return CV_MAT_DEPTH(flags);
 }

 inline
 int GpuMat::channels() const
 {
    return CV_MAT_CN(flags);
 }

 inline
 size_t GpuMat::step1() const
 {
    return step / elemSize1();
 }

 inline
 Size GpuMat::size() const
 {
    return Size(cols, rows);
 }

 inline
 bool GpuMat::empty() const
 {
    return data == 0;
 }

 inline
 void* GpuMat::cudaPtr() const
 {
    return data;
 }

 static inline
 GpuMat createContinuous(int rows, int cols, int type)
 {
    GpuMat m;
    createContinuous(rows, cols, type, m);
    return m;
 }

 static inline
 void createContinuous(Size size, int type, OutputArray arr)
 {
    createContinuous(size.height, size.width, type, arr);
 }

 static inline
 GpuMat createContinuous(Size size, int type)
 {
    GpuMat m;
    createContinuous(size, type, m);
    return m;
 }

 static inline
 void ensureSizeIsEnough(Size size, int type, OutputArray arr)
 {
    ensureSizeIsEnough(size.height, size.width, type, arr);
 }

 static inline
 void swap(GpuMat& a, GpuMat& b)
 {
    a.swap(b);
 }

 //===================================================================================
 // HostMem
 //===================================================================================

 inline
 HostMem::HostMem(AllocType alloc_type_)
    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
 {
 }

 inline
 HostMem::HostMem(const HostMem& m)
    : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), alloc_type(m.alloc_type)
 {
    if( refcount )
        CV_XADD(refcount, 1);
 }

 inline
 HostMem::HostMem(int rows_, int cols_, int type_, AllocType alloc_type_)
    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
 {
    if (rows_ > 0 && cols_ > 0)
        create(rows_, cols_, type_);
 }

 inline
 HostMem::HostMem(Size size_, int type_, AllocType alloc_type_)
    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
 {
    if (size_.height > 0 && size_.width > 0)
        create(size_.height, size_.width, type_);
 }

 inline
 HostMem::HostMem(InputArray arr, AllocType alloc_type_)
    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
 {
    arr.getMat().copyTo(*this);
 }

 inline
 HostMem::~HostMem()
 {
    release();
 }

 inline
 HostMem& HostMem::operator =(const HostMem& m)
 {
    if (this != &m)
    {
        HostMem temp(m);
        swap(temp);
    }

    return *this;
 }

 inline
 void HostMem::swap(HostMem& b)
 {
    std::swap(flags, b.flags);
    std::swap(rows, b.rows);
    std::swap(cols, b.cols);
    std::swap(step, b.step);
    std::swap(data, b.data);
    std::swap(datastart, b.datastart);
    std::swap(dataend, b.dataend);
    std::swap(refcount, b.refcount);
    std::swap(alloc_type, b.alloc_type);
 }

 inline
 HostMem HostMem::clone() const
 {
    HostMem m(size(), type(), alloc_type);
    createMatHeader().copyTo(m);
    return m;
 }

 inline
 void HostMem::create(Size size_, int type_)
 {
    create(size_.height, size_.width, type_);
 }

 inline
 Mat HostMem::createMatHeader() const
 {
    return Mat(size(), type(), data, step);
 }

 inline
 bool HostMem::isContinuous() const
 {
    return (flags & Mat::CONTINUOUS_FLAG) != 0;
 }

 inline
 size_t HostMem::elemSize() const
 {
    return CV_ELEM_SIZE(flags);
 }

 inline
 size_t HostMem::elemSize1() const
 {
    return CV_ELEM_SIZE1(flags);
 }

 inline
 int HostMem::type() const
 {
    return CV_MAT_TYPE(flags);
 }

 inline
 int HostMem::depth() const
 {
    return CV_MAT_DEPTH(flags);
 }

 inline
 int HostMem::channels() const
 {
    return CV_MAT_CN(flags);
 }

 inline
 size_t HostMem::step1() const
 {
    return step / elemSize1();
 }

 inline
 Size HostMem::size() const
 {
    return Size(cols, rows);
 }

 inline
 bool HostMem::empty() const
 {
    return data == 0;
 }

 static inline
 void swap(HostMem& a, HostMem& b)
 {
    a.swap(b);
 }

 //===================================================================================
 // Stream
 //===================================================================================

 inline
 Stream::Stream(const Ptr<Impl>& impl)
    : impl_(impl)
 {
 }

 //===================================================================================
 // Event
 //===================================================================================

 inline
 Event::Event(const Ptr<Impl>& impl)
    : impl_(impl)
 {
 }

 //===================================================================================
 // Initialization & Info
 //===================================================================================

 inline
 bool TargetArchs::has(int major, int minor)
 {
    return hasPtx(major, minor) || hasBin(major, minor);
 }

 inline
 bool TargetArchs::hasEqualOrGreater(int major, int minor)
 {
    return hasEqualOrGreaterPtx(major, minor) || hasEqualOrGreaterBin(major, minor);
 }

 inline
 DeviceInfo::DeviceInfo()
 {
    device_id_ = getDevice();
 }

 inline
 DeviceInfo::DeviceInfo(int device_id)
 {
    CV_Assert( device_id >= 0 && device_id < getCudaEnabledDeviceCount() );
    device_id_ = device_id;
 }

 inline
 int DeviceInfo::deviceID() const
 {
    return device_id_;
 }

 inline
 size_t DeviceInfo::freeMemory() const
 {
    size_t _totalMemory = 0, _freeMemory = 0;
    queryMemory(_totalMemory, _freeMemory);
    return _freeMemory;
 }

 inline
 size_t DeviceInfo::totalMemory() const
 {
    size_t _totalMemory = 0, _freeMemory = 0;
    queryMemory(_totalMemory, _freeMemory);
    return _totalMemory;
 }

 inline
 bool DeviceInfo::supports(FeatureSet feature_set) const
 {
    int version = majorVersion() * 10 + minorVersion();
    return version >= feature_set;
 }


 }} // namespace cv { namespace cuda {

 //===================================================================================
 // Mat
 //===================================================================================

 namespace cv {

 inline
 Mat::Mat(const cuda::GpuMat& m)
    : flags(0), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows)
 {
    m.download(*this);
 }

 }

 //! @endcond

 #endif // OPENCV_CORE_CUDAINL_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/block.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/block.hpp
@@ -0,0 +1,211 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CUDA_DEVICE_BLOCK_HPP
 #define OPENCV_CUDA_DEVICE_BLOCK_HPP

 /** @file
 * @deprecated Use @ref cudev instead.
 */

 //! @cond IGNORED

 namespace cv { namespace cuda { namespace device
 {
    struct Block
    {
        static __device__ __forceinline__ unsigned int id()
        {
            return blockIdx.x;
        }

        static __device__ __forceinline__ unsigned int stride()
        {
            return blockDim.x * blockDim.y * blockDim.z;
        }

        static __device__ __forceinline__ void sync()
        {
            __syncthreads();
        }

        static __device__ __forceinline__ int flattenedThreadId()
        {
            return threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x;
        }

        template<typename It, typename T>
        static __device__ __forceinline__ void fill(It beg, It end, const T& value)
        {
            int STRIDE = stride();
            It t = beg + flattenedThreadId();

            for(; t < end; t += STRIDE)
                *t = value;
        }

        template<typename OutIt, typename T>
        static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
        {
            int STRIDE = stride();
            int tid = flattenedThreadId();
            value += tid;

            for(OutIt t = beg + tid; t < end; t += STRIDE, value += STRIDE)
                *t = value;
        }

        template<typename InIt, typename OutIt>
        static __device__ __forceinline__ void copy(InIt beg, InIt end, OutIt out)
        {
            int STRIDE = stride();
            InIt  t = beg + flattenedThreadId();
            OutIt o = out + (t - beg);

            for(; t < end; t += STRIDE, o += STRIDE)
                *o = *t;
        }

        template<typename InIt, typename OutIt, class UnOp>
        static __device__ __forceinline__ void transform(InIt beg, InIt end, OutIt out, UnOp op)
        {
            int STRIDE = stride();
            InIt  t = beg + flattenedThreadId();
            OutIt o = out + (t - beg);

            for(; t < end; t += STRIDE, o += STRIDE)
                *o = op(*t);
        }

        template<typename InIt1, typename InIt2, typename OutIt, class BinOp>
        static __device__ __forceinline__ void transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
        {
            int STRIDE = stride();
            InIt1 t1 = beg1 + flattenedThreadId();
            InIt2 t2 = beg2 + flattenedThreadId();
            OutIt o  = out + (t1 - beg1);

            for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, o += STRIDE)
                *o = op(*t1, *t2);
        }

        template<int CTA_SIZE, typename T, class BinOp>
        static __device__ __forceinline__ void reduce(volatile T* buffer, BinOp op)
        {
            int tid = flattenedThreadId();
            T val =  buffer[tid];

            if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); }
            if (CTA_SIZE >=  512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); }
            if (CTA_SIZE >=  256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); }
            if (CTA_SIZE >=  128) { if (tid <  64) buffer[tid] = val = op(val, buffer[tid +  64]); __syncthreads(); }

            if (tid < 32)
            {
                if (CTA_SIZE >=   64) { buffer[tid] = val = op(val, buffer[tid +  32]); }
                if (CTA_SIZE >=   32) { buffer[tid] = val = op(val, buffer[tid +  16]); }
                if (CTA_SIZE >=   16) { buffer[tid] = val = op(val, buffer[tid +   8]); }
                if (CTA_SIZE >=    8) { buffer[tid] = val = op(val, buffer[tid +   4]); }
                if (CTA_SIZE >=    4) { buffer[tid] = val = op(val, buffer[tid +   2]); }
                if (CTA_SIZE >=    2) { buffer[tid] = val = op(val, buffer[tid +   1]); }
            }
        }

        template<int CTA_SIZE, typename T, class BinOp>
        static __device__ __forceinline__ T reduce(volatile T* buffer, T init, BinOp op)
        {
            int tid = flattenedThreadId();
            T val =  buffer[tid] = init;
            __syncthreads();

            if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); }
            if (CTA_SIZE >=  512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); }
            if (CTA_SIZE >=  256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); }
            if (CTA_SIZE >=  128) { if (tid <  64) buffer[tid] = val = op(val, buffer[tid +  64]); __syncthreads(); }

            if (tid < 32)
            {
                if (CTA_SIZE >=   64) { buffer[tid] = val = op(val, buffer[tid +  32]); }
                if (CTA_SIZE >=   32) { buffer[tid] = val = op(val, buffer[tid +  16]); }
                if (CTA_SIZE >=   16) { buffer[tid] = val = op(val, buffer[tid +   8]); }
                if (CTA_SIZE >=    8) { buffer[tid] = val = op(val, buffer[tid +   4]); }
                if (CTA_SIZE >=    4) { buffer[tid] = val = op(val, buffer[tid +   2]); }
                if (CTA_SIZE >=    2) { buffer[tid] = val = op(val, buffer[tid +   1]); }
            }
            __syncthreads();
            return buffer[0];
        }

        template <typename T, class BinOp>
        static __device__ __forceinline__ void reduce_n(T* data, unsigned int n, BinOp op)
        {
            int ftid = flattenedThreadId();
            int sft = stride();

            if (sft < n)
            {
                for (unsigned int i = sft + ftid; i < n; i += sft)
                    data[ftid] = op(data[ftid], data[i]);

                __syncthreads();

                n = sft;
            }

            while (n > 1)
            {
                unsigned int half = n/2;

                if (ftid < half)
                    data[ftid] = op(data[ftid], data[n - ftid - 1]);

                __syncthreads();

                n = n - half;
            }
        }
    };
 }}}

 //! @endcond

 #endif /* OPENCV_CUDA_DEVICE_BLOCK_HPP */
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/border_interpolate.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/border_interpolate.hpp
@@ -0,0 +1,722 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CUDA_BORDER_INTERPOLATE_HPP
 #define OPENCV_CUDA_BORDER_INTERPOLATE_HPP

 #include "saturate_cast.hpp"
 #include "vec_traits.hpp"
 #include "vec_math.hpp"

 /** @file
 * @deprecated Use @ref cudev instead.
 */

 //! @cond IGNORED

 namespace cv { namespace cuda { namespace device
 {
    //////////////////////////////////////////////////////////////
    // BrdConstant

    template <typename D> struct BrdRowConstant
    {
        typedef D result_type;

        explicit __host__ __device__ __forceinline__ BrdRowConstant(int width_, const D& val_ = VecTraits<D>::all(0)) : width(width_), val(val_) {}

        template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
        {
            return x >= 0 ? saturate_cast<D>(data[x]) : val;
        }

        template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
        {
            return x < width ? saturate_cast<D>(data[x]) : val;
        }

        template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
        {
            return (x >= 0 && x < width) ? saturate_cast<D>(data[x]) : val;
        }

        int width;
        D val;
    };

    template <typename D> struct BrdColConstant
    {
        typedef D result_type;

        explicit __host__ __device__ __forceinline__ BrdColConstant(int height_, const D& val_ = VecTraits<D>::all(0)) : height(height_), val(val_) {}

        template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
        {
            return y >= 0 ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
        }

        template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
        {
            return y < height ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
        }

        template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
        {
            return (y >= 0 && y < height) ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
        }

        int height;
        D val;
    };

    template <typename D> struct BrdConstant
    {
        typedef D result_type;

        __host__ __device__ __forceinline__ BrdConstant(int height_, int width_, const D& val_ = VecTraits<D>::all(0)) : height(height_), width(width_), val(val_)
        {
        }

        template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
        {
            return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(((const T*)((const uchar*)data + y * step))[x]) : val;
        }

        template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
        {
            return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
        }

        int height;
        int width;
        D val;
    };

    //////////////////////////////////////////////////////////////
    // BrdReplicate

    template <typename D> struct BrdRowReplicate
    {
        typedef D result_type;

        explicit __host__ __device__ __forceinline__ BrdRowReplicate(int width) : last_col(width - 1) {}
        template <typename U> __host__ __device__ __forceinline__ BrdRowReplicate(int width, U) : last_col(width - 1) {}

        __device__ __forceinline__ int idx_col_low(int x) const
        {
            return ::max(x, 0);
        }

        __device__ __forceinline__ int idx_col_high(int x) const
        {
            return ::min(x, last_col);
        }

        __device__ __forceinline__ int idx_col(int x) const
        {
            return idx_col_low(idx_col_high(x));
        }

        template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
        {
            return saturate_cast<D>(data[idx_col_low(x)]);
        }

        template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
        {
            return saturate_cast<D>(data[idx_col_high(x)]);
        }

        template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
        {
            return saturate_cast<D>(data[idx_col(x)]);
        }

        int last_col;
    };

    template <typename D> struct BrdColReplicate
    {
        typedef D result_type;

        explicit __host__ __device__ __forceinline__ BrdColReplicate(int height) : last_row(height - 1) {}
        template <typename U> __host__ __device__ __forceinline__ BrdColReplicate(int height, U) : last_row(height - 1) {}

        __device__ __forceinline__ int idx_row_low(int y) const
        {
            return ::max(y, 0);
        }

        __device__ __forceinline__ int idx_row_high(int y) const
        {
            return ::min(y, last_row);
        }

        __device__ __forceinline__ int idx_row(int y) const
        {
            return idx_row_low(idx_row_high(y));
        }

        template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
        {
            return saturate_cast<D>(*(const T*)((const char*)data + idx_row_low(y) * step));
        }

        template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
        {
            return saturate_cast<D>(*(const T*)((const char*)data + idx_row_high(y) * step));
        }

        template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
        {
            return saturate_cast<D>(*(const T*)((const char*)data + idx_row(y) * step));
        }

        int last_row;
    };

    template <typename D> struct BrdReplicate
    {
        typedef D result_type;

        __host__ __device__ __forceinline__ BrdReplicate(int height, int width) : last_row(height - 1), last_col(width - 1) {}
        template <typename U> __host__ __device__ __forceinline__ BrdReplicate(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}

        __device__ __forceinline__ int idx_row_low(int y) const
        {
            return ::max(y, 0);
        }

        __device__ __forceinline__ int idx_row_high(int y) const
        {
            return ::min(y, last_row);
        }

        __device__ __forceinline__ int idx_row(int y) const
        {
            return idx_row_low(idx_row_high(y));
        }

        __device__ __forceinline__ int idx_col_low(int x) const
        {
            return ::max(x, 0);
        }

        __device__ __forceinline__ int idx_col_high(int x) const
        {
            return ::min(x, last_col);
        }

        __device__ __forceinline__ int idx_col(int x) const
        {
            return idx_col_low(idx_col_high(x));
        }

        template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
        {
            return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
        }

        template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
        {
            return saturate_cast<D>(src(idx_row(y), idx_col(x)));
        }

        int last_row;
        int last_col;
    };

    //////////////////////////////////////////////////////////////
    // BrdReflect101

    template <typename D> struct BrdRowReflect101
    {
        typedef D result_type;

        explicit __host__ __device__ __forceinline__ BrdRowReflect101(int width) : last_col(width - 1) {}
        template <typename U> __host__ __device__ __forceinline__ BrdRowReflect101(int width, U) : last_col(width - 1) {}

        __device__ __forceinline__ int idx_col_low(int x) const
        {
            return ::abs(x) % (last_col + 1);
        }

        __device__ __forceinline__ int idx_col_high(int x) const
        {
            return ::abs(last_col - ::abs(last_col - x)) % (last_col + 1);
        }

        __device__ __forceinline__ int idx_col(int x) const
        {
            return idx_col_low(idx_col_high(x));
        }

        template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
        {
            return saturate_cast<D>(data[idx_col_low(x)]);
        }

        template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
        {
            return saturate_cast<D>(data[idx_col_high(x)]);
        }

        template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
        {
            return saturate_cast<D>(data[idx_col(x)]);
        }

        int last_col;
    };

    template <typename D> struct BrdColReflect101
    {
        typedef D result_type;

        explicit __host__ __device__ __forceinline__ BrdColReflect101(int height) : last_row(height - 1) {}
        template <typename U> __host__ __device__ __forceinline__ BrdColReflect101(int height, U) : last_row(height - 1) {}

        __device__ __forceinline__ int idx_row_low(int y) const
        {
            return ::abs(y) % (last_row + 1);
        }

        __device__ __forceinline__ int idx_row_high(int y) const
        {
            return ::abs(last_row - ::abs(last_row - y)) % (last_row + 1);
        }

        __device__ __forceinline__ int idx_row(int y) const
        {
            return idx_row_low(idx_row_high(y));
        }

        template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
        {
            return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
        }

        template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
        {
            return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
        }

        template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
        {
            return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
        }

        int last_row;
    };

    template <typename D> struct BrdReflect101
    {
        typedef D result_type;

        __host__ __device__ __forceinline__ BrdReflect101(int height, int width) : last_row(height - 1), last_col(width - 1) {}
        template <typename U> __host__ __device__ __forceinline__ BrdReflect101(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}

        __device__ __forceinline__ int idx_row_low(int y) const
        {
            return ::abs(y) % (last_row + 1);
        }

        __device__ __forceinline__ int idx_row_high(int y) const
        {
            return ::abs(last_row - ::abs(last_row - y)) % (last_row + 1);
        }

        __device__ __forceinline__ int idx_row(int y) const
        {
            return idx_row_low(idx_row_high(y));
        }

        __device__ __forceinline__ int idx_col_low(int x) const
        {
            return ::abs(x) % (last_col + 1);
        }

        __device__ __forceinline__ int idx_col_high(int x) const
        {
            return ::abs(last_col - ::abs(last_col - x)) % (last_col + 1);
        }

        __device__ __forceinline__ int idx_col(int x) const
        {
            return idx_col_low(idx_col_high(x));
        }

        template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
        {
            return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
        }

        template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
        {
            return saturate_cast<D>(src(idx_row(y), idx_col(x)));
        }

        int last_row;
        int last_col;
    };

    //////////////////////////////////////////////////////////////
    // BrdReflect

    template <typename D> struct BrdRowReflect
    {
        typedef D result_type;

        explicit __host__ __device__ __forceinline__ BrdRowReflect(int width) : last_col(width - 1) {}
        template <typename U> __host__ __device__ __forceinline__ BrdRowReflect(int width, U) : last_col(width - 1) {}

        __device__ __forceinline__ int idx_col_low(int x) const
        {
            return (::abs(x) - (x < 0)) % (last_col + 1);
        }

        __device__ __forceinline__ int idx_col_high(int x) const
        {
            return ::abs(last_col - ::abs(last_col - x) + (x > last_col)) % (last_col + 1);
        }

        __device__ __forceinline__ int idx_col(int x) const
        {
            return idx_col_high(::abs(x) - (x < 0));
        }

        template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
        {
            return saturate_cast<D>(data[idx_col_low(x)]);
        }

        template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
        {
            return saturate_cast<D>(data[idx_col_high(x)]);
        }

        template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
        {
            return saturate_cast<D>(data[idx_col(x)]);
        }

        int last_col;
    };

    template <typename D> struct BrdColReflect
    {
        typedef D result_type;

        explicit __host__ __device__ __forceinline__ BrdColReflect(int height) : last_row(height - 1) {}
        template <typename U> __host__ __device__ __forceinline__ BrdColReflect(int height, U) : last_row(height - 1) {}

        __device__ __forceinline__ int idx_row_low(int y) const
        {
            return (::abs(y) - (y < 0)) % (last_row + 1);
        }

        __device__ __forceinline__ int idx_row_high(int y) const
        {
            return ::abs(last_row - ::abs(last_row - y) + (y > last_row)) % (last_row + 1);
        }

        __device__ __forceinline__ int idx_row(int y) const
        {
            return idx_row_high(::abs(y) - (y < 0));
        }

        template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
        {
            return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
        }

        template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
        {
            return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
        }

        template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
        {
            return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
        }

        int last_row;
    };

    template <typename D> struct BrdReflect
    {
        typedef D result_type;

        __host__ __device__ __forceinline__ BrdReflect(int height, int width) : last_row(height - 1), last_col(width - 1) {}
        template <typename U> __host__ __device__ __forceinline__ BrdReflect(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}

        __device__ __forceinline__ int idx_row_low(int y) const
        {
            return (::abs(y) - (y < 0)) % (last_row + 1);
        }

        __device__ __forceinline__ int idx_row_high(int y) const
        {
            return /*::abs*/(last_row - ::abs(last_row - y) + (y > last_row)) /*% (last_row + 1)*/;
        }

        __device__ __forceinline__ int idx_row(int y) const
        {
            return idx_row_low(idx_row_high(y));
        }

        __device__ __forceinline__ int idx_col_low(int x) const
        {
            return (::abs(x) - (x < 0)) % (last_col + 1);
        }

        __device__ __forceinline__ int idx_col_high(int x) const
        {
            return (last_col - ::abs(last_col - x) + (x > last_col));
        }

        __device__ __forceinline__ int idx_col(int x) const
        {
            return idx_col_low(idx_col_high(x));
        }

        template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
        {
            return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
        }

        template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
        {
            return saturate_cast<D>(src(idx_row(y), idx_col(x)));
        }

        int last_row;
        int last_col;
    };

    //////////////////////////////////////////////////////////////
    // BrdWrap

    template <typename D> struct BrdRowWrap
    {
        typedef D result_type;

        explicit __host__ __device__ __forceinline__ BrdRowWrap(int width_) : width(width_) {}
        template <typename U> __host__ __device__ __forceinline__ BrdRowWrap(int width_, U) : width(width_) {}

        __device__ __forceinline__ int idx_col_low(int x) const
        {
            return (x >= 0) * x + (x < 0) * (x - ((x - width + 1) / width) * width);
        }

        __device__ __forceinline__ int idx_col_high(int x) const
        {
            return (x < width) * x + (x >= width) * (x % width);
        }

        __device__ __forceinline__ int idx_col(int x) const
        {
            return idx_col_high(idx_col_low(x));
        }

        template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
        {
            return saturate_cast<D>(data[idx_col_low(x)]);
        }

        template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
        {
            return saturate_cast<D>(data[idx_col_high(x)]);
        }

        template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
        {
            return saturate_cast<D>(data[idx_col(x)]);
        }

        int width;
    };

    template <typename D> struct BrdColWrap
    {
        typedef D result_type;

        explicit __host__ __device__ __forceinline__ BrdColWrap(int height_) : height(height_) {}
        template <typename U> __host__ __device__ __forceinline__ BrdColWrap(int height_, U) : height(height_) {}

        __device__ __forceinline__ int idx_row_low(int y) const
        {
            return (y >= 0) * y + (y < 0) * (y - ((y - height + 1) / height) * height);
        }

        __device__ __forceinline__ int idx_row_high(int y) const
        {
            return (y < height) * y + (y >= height) * (y % height);
        }

        __device__ __forceinline__ int idx_row(int y) const
        {
            return idx_row_high(idx_row_low(y));
        }

        template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
        {
            return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
        }

        template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
        {
            return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
        }

        template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
        {
            return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
        }

        int height;
    };

    template <typename D> struct BrdWrap
    {
        typedef D result_type;

        __host__ __device__ __forceinline__ BrdWrap(int height_, int width_) :
            height(height_), width(width_)
        {
        }
        template <typename U>
        __host__ __device__ __forceinline__ BrdWrap(int height_, int width_, U) :
            height(height_), width(width_)
        {
        }

        __device__ __forceinline__ int idx_row_low(int y) const
        {
            return (y >= 0) ? y : (y - ((y - height + 1) / height) * height);
        }

        __device__ __forceinline__ int idx_row_high(int y) const
        {
            return (y < height) ? y : (y % height);
        }

        __device__ __forceinline__ int idx_row(int y) const
        {
            return idx_row_high(idx_row_low(y));
        }

        __device__ __forceinline__ int idx_col_low(int x) const
        {
            return (x >= 0) ? x : (x - ((x - width + 1) / width) * width);
        }

        __device__ __forceinline__ int idx_col_high(int x) const
        {
            return (x < width) ? x : (x % width);
        }

        __device__ __forceinline__ int idx_col(int x) const
        {
            return idx_col_high(idx_col_low(x));
        }

        template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
        {
            return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
        }

        template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
        {
            return saturate_cast<D>(src(idx_row(y), idx_col(x)));
        }

        int height;
        int width;
    };

    //////////////////////////////////////////////////////////////
    // BorderReader

    template <typename Ptr2D, typename B> struct BorderReader
    {
        typedef typename B::result_type elem_type;
        typedef typename Ptr2D::index_type index_type;

        __host__ __device__ __forceinline__ BorderReader(const Ptr2D& ptr_, const B& b_) : ptr(ptr_), b(b_) {}

        __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const
        {
            return b.at(y, x, ptr);
        }

        Ptr2D ptr;
        B b;
    };

    // under win32 there is some bug with templated types that passed as kernel parameters
    // with this specialization all works fine
    template <typename Ptr2D, typename D> struct BorderReader< Ptr2D, BrdConstant<D> >
    {
        typedef typename BrdConstant<D>::result_type elem_type;
        typedef typename Ptr2D::index_type index_type;

        __host__ __device__ __forceinline__ BorderReader(const Ptr2D& src_, const BrdConstant<D>& b) :
            src(src_), height(b.height), width(b.width), val(b.val)
        {
        }

        __device__ __forceinline__ D operator ()(index_type y, index_type x) const
        {
            return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
        }

        Ptr2D src;
        int height;
        int width;
        D val;
    };
 }}} // namespace cv { namespace cuda { namespace cudev

 //! @endcond

 #endif // OPENCV_CUDA_BORDER_INTERPOLATE_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/color.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/color.hpp
@@ -0,0 +1,309 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CUDA_COLOR_HPP
 #define OPENCV_CUDA_COLOR_HPP

 #include "detail/color_detail.hpp"

 /** @file
 * @deprecated Use @ref cudev instead.
 */

 //! @cond IGNORED

 namespace cv { namespace cuda { namespace device
 {
    // All OPENCV_CUDA_IMPLEMENT_*_TRAITS(ColorSpace1_to_ColorSpace2, ...) macros implements
    // template <typename T> class ColorSpace1_to_ColorSpace2_traits
    // {
    //     typedef ... functor_type;
    //     static __host__ __device__ functor_type create_functor();
    // };

    OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgb, 3, 3, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_bgra, 3, 4, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgba, 3, 4, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_bgr, 4, 3, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgb, 4, 3, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgba, 4, 4, 2)

    #undef OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS

    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr555, 3, 0, 5)
    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr565, 3, 0, 6)
    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr555, 3, 2, 5)
    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr565, 3, 2, 6)
    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr555, 4, 0, 5)
    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr565, 4, 0, 6)
    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr555, 4, 2, 5)
    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr565, 4, 2, 6)

    #undef OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS

    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgb, 3, 2, 5)
    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgb, 3, 2, 6)
    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgr, 3, 0, 5)
    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgr, 3, 0, 6)
    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgba, 4, 2, 5)
    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgba, 4, 2, 6)
    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgra, 4, 0, 5)
    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgra, 4, 0, 6)

    #undef OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS

    OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgr, 3)
    OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgra, 4)

    #undef OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS

    OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr555, 5)
    OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr565, 6)

    #undef OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS

    OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr555_to_gray, 5)
    OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr565_to_gray, 6)

    #undef OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS

    OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(rgb_to_gray, 3, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(bgr_to_gray, 3, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(rgba_to_gray, 4, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(bgra_to_gray, 4, 0)

    #undef OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS

    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv, 3, 3, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv, 4, 3, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv4, 3, 4, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv4, 4, 4, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv, 3, 3, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv, 4, 3, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv4, 3, 4, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv4, 4, 4, 0)

    #undef OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS

    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgb, 3, 3, 2)
    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgba, 3, 4, 2)
    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgb, 4, 3, 2)
    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgba, 4, 4, 2)
    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgr, 3, 3, 0)
    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgra, 3, 4, 0)
    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgr, 4, 3, 0)
    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgra, 4, 4, 0)

    #undef OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS

    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb, 3, 3, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb, 4, 3, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb4, 3, 4, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb4, 4, 4, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb, 3, 3, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb, 4, 3, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb4, 3, 4, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb4, 4, 4, 0)

    #undef OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS

    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgb, 3, 3, 2)
    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgba, 3, 4, 2)
    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgb, 4, 3, 2)
    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgba, 4, 4, 2)
    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgr, 3, 3, 0)
    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgra, 3, 4, 0)
    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgr, 4, 3, 0)
    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgra, 4, 4, 0)

    #undef OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS

    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz, 3, 3, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz, 4, 3, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz4, 3, 4, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz4, 4, 4, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz, 3, 3, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz, 4, 3, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz4, 3, 4, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz4, 4, 4, 0)

    #undef OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS

    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgb, 3, 3, 2)
    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgb, 4, 3, 2)
    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgba, 3, 4, 2)
    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgba, 4, 4, 2)
    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgr, 3, 3, 0)
    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgr, 4, 3, 0)
    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgra, 3, 4, 0)
    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgra, 4, 4, 0)

    #undef OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS

    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv, 3, 3, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv, 4, 3, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv4, 3, 4, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv4, 4, 4, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv, 3, 3, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv, 4, 3, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv4, 3, 4, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv4, 4, 4, 0)

    #undef OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS

    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgb, 3, 3, 2)
    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgba, 3, 4, 2)
    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgb, 4, 3, 2)
    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgba, 4, 4, 2)
    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgr, 3, 3, 0)
    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgra, 3, 4, 0)
    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgr, 4, 3, 0)
    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgra, 4, 4, 0)

    #undef OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS

    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls, 3, 3, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls, 4, 3, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls4, 3, 4, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls4, 4, 4, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls, 3, 3, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls, 4, 3, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls4, 3, 4, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls4, 4, 4, 0)

    #undef OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS

    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgb, 3, 3, 2)
    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgba, 3, 4, 2)
    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgb, 4, 3, 2)
    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgba, 4, 4, 2)
    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgr, 3, 3, 0)
    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgra, 3, 4, 0)
    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgr, 4, 3, 0)
    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgra, 4, 4, 0)

    #undef OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS

    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgb_to_lab, 3, 3, true, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgba_to_lab, 4, 3, true, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgb_to_lab4, 3, 4, true, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgba_to_lab4, 4, 4, true, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgr_to_lab, 3, 3, true, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgra_to_lab, 4, 3, true, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgr_to_lab4, 3, 4, true, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgra_to_lab4, 4, 4, true, 0)

    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgb_to_lab, 3, 3, false, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgba_to_lab, 4, 3, false, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgb_to_lab4, 3, 4, false, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgba_to_lab4, 4, 4, false, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgr_to_lab, 3, 3, false, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgra_to_lab, 4, 3, false, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgr_to_lab4, 3, 4, false, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgra_to_lab4, 4, 4, false, 0)

    #undef OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS

    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_rgb, 3, 3, true, 2)
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_rgb, 4, 3, true, 2)
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_rgba, 3, 4, true, 2)
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_rgba, 4, 4, true, 2)
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_bgr, 3, 3, true, 0)
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_bgr, 4, 3, true, 0)
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_bgra, 3, 4, true, 0)
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_bgra, 4, 4, true, 0)

    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lrgb, 3, 3, false, 2)
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lrgb, 4, 3, false, 2)
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lrgba, 3, 4, false, 2)
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lrgba, 4, 4, false, 2)
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lbgr, 3, 3, false, 0)
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lbgr, 4, 3, false, 0)
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lbgra, 3, 4, false, 0)
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lbgra, 4, 4, false, 0)

    #undef OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS

    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgb_to_luv, 3, 3, true, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgba_to_luv, 4, 3, true, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgb_to_luv4, 3, 4, true, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgba_to_luv4, 4, 4, true, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgr_to_luv, 3, 3, true, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgra_to_luv, 4, 3, true, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgr_to_luv4, 3, 4, true, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgra_to_luv4, 4, 4, true, 0)

    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgb_to_luv, 3, 3, false, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgba_to_luv, 4, 3, false, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgb_to_luv4, 3, 4, false, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgba_to_luv4, 4, 4, false, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgr_to_luv, 3, 3, false, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgra_to_luv, 4, 3, false, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgr_to_luv4, 3, 4, false, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgra_to_luv4, 4, 4, false, 0)

    #undef OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS

    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_rgb, 3, 3, true, 2)
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_rgb, 4, 3, true, 2)
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_rgba, 3, 4, true, 2)
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_rgba, 4, 4, true, 2)
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_bgr, 3, 3, true, 0)
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_bgr, 4, 3, true, 0)
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_bgra, 3, 4, true, 0)
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_bgra, 4, 4, true, 0)

    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lrgb, 3, 3, false, 2)
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lrgb, 4, 3, false, 2)
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lrgba, 3, 4, false, 2)
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lrgba, 4, 4, false, 2)
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lbgr, 3, 3, false, 0)
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lbgr, 4, 3, false, 0)
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lbgra, 3, 4, false, 0)
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lbgra, 4, 4, false, 0)

    #undef OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS
 }}} // namespace cv { namespace cuda { namespace cudev

 //! @endcond

 #endif // OPENCV_CUDA_COLOR_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/common.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/common.hpp
@@ -0,0 +1,123 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CUDA_COMMON_HPP
 #define OPENCV_CUDA_COMMON_HPP

 #include <cuda_runtime.h>
 #include "opencv2/core/cuda_types.hpp"
 #include "opencv2/core/cvdef.h"
 #include "opencv2/core/base.hpp"

 /** @file
 * @deprecated Use @ref cudev instead.
 */

 //! @cond IGNORED

 #ifndef CV_PI_F
    #ifndef CV_PI
        #define CV_PI_F 3.14159265f
    #else
        #define CV_PI_F ((float)CV_PI)
    #endif
 #endif

 namespace cv { namespace cuda {
    static inline void checkCudaError(cudaError_t err, const char* file, const int line, const char* func)
    {
        if (cudaSuccess != err)
            cv::error(cv::Error::GpuApiCallError, cudaGetErrorString(err), func, file, line);
    }
 }}

 #ifndef cudaSafeCall
    #define cudaSafeCall(expr)  cv::cuda::checkCudaError(expr, __FILE__, __LINE__, CV_Func)
 #endif

 namespace cv { namespace cuda
 {
    template <typename T> static inline bool isAligned(const T* ptr, size_t size)
    {
        return reinterpret_cast<size_t>(ptr) % size == 0;
    }

    static inline bool isAligned(size_t step, size_t size)
    {
        return step % size == 0;
    }
 }}

 namespace cv { namespace cuda
 {
    namespace device
    {
        __host__ __device__ __forceinline__ int divUp(int total, int grain)
        {
            return (total + grain - 1) / grain;
        }

        template<class T> inline void bindTexture(const textureReference* tex, const PtrStepSz<T>& img)
        {
            cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
            cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
        }

        template<class T> inline void createTextureObjectPitch2D(cudaTextureObject_t* tex, PtrStepSz<T>& img, const cudaTextureDesc& texDesc)
        {
            cudaResourceDesc resDesc;
            memset(&resDesc, 0, sizeof(resDesc));
            resDesc.resType = cudaResourceTypePitch2D;
            resDesc.res.pitch2D.devPtr = static_cast<void*>(img.ptr());
            resDesc.res.pitch2D.height = img.rows;
            resDesc.res.pitch2D.width = img.cols;
            resDesc.res.pitch2D.pitchInBytes = img.step;
            resDesc.res.pitch2D.desc = cudaCreateChannelDesc<T>();

            cudaSafeCall( cudaCreateTextureObject(tex, &resDesc, &texDesc, NULL) );
        }
    }
 }}

 //! @endcond

 #endif // OPENCV_CUDA_COMMON_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/datamov_utils.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/datamov_utils.hpp
@@ -0,0 +1,113 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CUDA_DATAMOV_UTILS_HPP
 #define OPENCV_CUDA_DATAMOV_UTILS_HPP

 #include "common.hpp"

 /** @file
 * @deprecated Use @ref cudev instead.
 */

 //! @cond IGNORED

 namespace cv { namespace cuda { namespace device
 {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 200

        // for Fermi memory space is detected automatically
        template <typename T> struct ForceGlob
        {
            __device__ __forceinline__ static void Load(const T* ptr, int offset, T& val)  { val = ptr[offset];  }
        };

    #else // __CUDA_ARCH__ >= 200

        #if defined(_WIN64) || defined(__LP64__)
            // 64-bit register modifier for inlined asm
            #define OPENCV_CUDA_ASM_PTR "l"
        #else
            // 32-bit register modifier for inlined asm
            #define OPENCV_CUDA_ASM_PTR "r"
        #endif

        template<class T> struct ForceGlob;

        #define OPENCV_CUDA_DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
            template <> struct ForceGlob<base_type> \
            { \
                __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
                { \
                    asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \
                } \
            };

        #define OPENCV_CUDA_DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
            template <> struct ForceGlob<base_type> \
            { \
                __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
                { \
                    asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \
                } \
            };

            OPENCV_CUDA_DEFINE_FORCE_GLOB_B(uchar,  u8)
            OPENCV_CUDA_DEFINE_FORCE_GLOB_B(schar,  s8)
            OPENCV_CUDA_DEFINE_FORCE_GLOB_B(char,   b8)
            OPENCV_CUDA_DEFINE_FORCE_GLOB  (ushort, u16, h)
            OPENCV_CUDA_DEFINE_FORCE_GLOB  (short,  s16, h)
            OPENCV_CUDA_DEFINE_FORCE_GLOB  (uint,   u32, r)
            OPENCV_CUDA_DEFINE_FORCE_GLOB  (int,    s32, r)
            OPENCV_CUDA_DEFINE_FORCE_GLOB  (float,  f32, f)
            OPENCV_CUDA_DEFINE_FORCE_GLOB  (double, f64, d)

        #undef OPENCV_CUDA_DEFINE_FORCE_GLOB
        #undef OPENCV_CUDA_DEFINE_FORCE_GLOB_B
        #undef OPENCV_CUDA_ASM_PTR

    #endif // __CUDA_ARCH__ >= 200
 }}} // namespace cv { namespace cuda { namespace cudev

 //! @endcond

 #endif // OPENCV_CUDA_DATAMOV_UTILS_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/detail/color_detail.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/detail/color_detail.hpp
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/detail/reduce.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/detail/reduce.hpp
@@ -0,0 +1,365 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CUDA_REDUCE_DETAIL_HPP
 #define OPENCV_CUDA_REDUCE_DETAIL_HPP

 #include <thrust/tuple.h>
 #include "../warp.hpp"
 #include "../warp_shuffle.hpp"

 //! @cond IGNORED

 namespace cv { namespace cuda { namespace device
 {
    namespace reduce_detail
    {
        template <typename T> struct GetType;
        template <typename T> struct GetType<T*>
        {
            typedef T type;
        };
        template <typename T> struct GetType<volatile T*>
        {
            typedef T type;
        };
        template <typename T> struct GetType<T&>
        {
            typedef T type;
        };

        template <unsigned int I, unsigned int N>
        struct For
        {
            template <class PointerTuple, class ValTuple>
            static __device__ void loadToSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid)
            {
                thrust::get<I>(smem)[tid] = thrust::get<I>(val);

                For<I + 1, N>::loadToSmem(smem, val, tid);
            }
            template <class PointerTuple, class ValTuple>
            static __device__ void loadFromSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid)
            {
                thrust::get<I>(val) = thrust::get<I>(smem)[tid];

                For<I + 1, N>::loadFromSmem(smem, val, tid);
            }

            template <class PointerTuple, class ValTuple, class OpTuple>
            static __device__ void merge(const PointerTuple& smem, const ValTuple& val, unsigned int tid, unsigned int delta, const OpTuple& op)
            {
                typename GetType<typename thrust::tuple_element<I, PointerTuple>::type>::type reg = thrust::get<I>(smem)[tid + delta];
                thrust::get<I>(smem)[tid] = thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg);

                For<I + 1, N>::merge(smem, val, tid, delta, op);
            }
            template <class ValTuple, class OpTuple>
            static __device__ void mergeShfl(const ValTuple& val, unsigned int delta, unsigned int width, const OpTuple& op)
            {
                typename GetType<typename thrust::tuple_element<I, ValTuple>::type>::type reg = shfl_down(thrust::get<I>(val), delta, width);
                thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg);

                For<I + 1, N>::mergeShfl(val, delta, width, op);
            }
        };
        template <unsigned int N>
        struct For<N, N>
        {
            template <class PointerTuple, class ValTuple>
            static __device__ void loadToSmem(const PointerTuple&, const ValTuple&, unsigned int)
            {
            }
            template <class PointerTuple, class ValTuple>
            static __device__ void loadFromSmem(const PointerTuple&, const ValTuple&, unsigned int)
            {
            }

            template <class PointerTuple, class ValTuple, class OpTuple>
            static __device__ void merge(const PointerTuple&, const ValTuple&, unsigned int, unsigned int, const OpTuple&)
            {
            }
            template <class ValTuple, class OpTuple>
            static __device__ void mergeShfl(const ValTuple&, unsigned int, unsigned int, const OpTuple&)
            {
            }
        };

        template <typename T>
        __device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, unsigned int tid)
        {
            smem[tid] = val;
        }
        template <typename T>
        __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& val, unsigned int tid)
        {
            val = smem[tid];
        }
        template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
                  typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9>
        __device__ __forceinline__ void loadToSmem(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
                                                       const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
                                                       unsigned int tid)
        {
            For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadToSmem(smem, val, tid);
        }
        template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
                  typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9>
        __device__ __forceinline__ void loadFromSmem(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
                                                         const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
                                                         unsigned int tid)
        {
            For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadFromSmem(smem, val, tid);
        }

        template <typename T, class Op>
        __device__ __forceinline__ void merge(volatile T* smem, T& val, unsigned int tid, unsigned int delta, const Op& op)
        {
            T reg = smem[tid + delta];
            smem[tid] = val = op(val, reg);
        }
        template <typename T, class Op>
        __device__ __forceinline__ void mergeShfl(T& val, unsigned int delta, unsigned int width, const Op& op)
        {
            T reg = shfl_down(val, delta, width);
            val = op(val, reg);
        }
        template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
                  typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
                  class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
        __device__ __forceinline__ void merge(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
                                              const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
                                              unsigned int tid,
                                              unsigned int delta,
                                              const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
        {
            For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::merge(smem, val, tid, delta, op);
        }
        template <typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
                  class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
        __device__ __forceinline__ void mergeShfl(const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
                                                  unsigned int delta,
                                                  unsigned int width,
                                                  const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
        {
            For<0, thrust::tuple_size<thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9> >::value>::mergeShfl(val, delta, width, op);
        }

        template <unsigned int N> struct Generic
        {
            template <typename Pointer, typename Reference, class Op>
            static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
            {
                loadToSmem(smem, val, tid);
                if (N >= 32)
                    __syncthreads();

                if (N >= 2048)
                {
                    if (tid < 1024)
                        merge(smem, val, tid, 1024, op);

                    __syncthreads();
                }
                if (N >= 1024)
                {
                    if (tid < 512)
                        merge(smem, val, tid, 512, op);

                    __syncthreads();
                }
                if (N >= 512)
                {
                    if (tid < 256)
                        merge(smem, val, tid, 256, op);

                    __syncthreads();
                }
                if (N >= 256)
                {
                    if (tid < 128)
                        merge(smem, val, tid, 128, op);

                    __syncthreads();
                }
                if (N >= 128)
                {
                    if (tid < 64)
                        merge(smem, val, tid, 64, op);

                    __syncthreads();
                }
                if (N >= 64)
                {
                    if (tid < 32)
                        merge(smem, val, tid, 32, op);
                }

                if (tid < 16)
                {
                    merge(smem, val, tid, 16, op);
                    merge(smem, val, tid, 8, op);
                    merge(smem, val, tid, 4, op);
                    merge(smem, val, tid, 2, op);
                    merge(smem, val, tid, 1, op);
                }
            }
        };

        template <unsigned int I, typename Pointer, typename Reference, class Op>
        struct Unroll
        {
            static __device__ void loopShfl(Reference val, Op op, unsigned int N)
            {
                mergeShfl(val, I, N, op);
                Unroll<I / 2, Pointer, Reference, Op>::loopShfl(val, op, N);
            }
            static __device__ void loop(Pointer smem, Reference val, unsigned int tid, Op op)
            {
                merge(smem, val, tid, I, op);
                Unroll<I / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
            }
        };
        template <typename Pointer, typename Reference, class Op>
        struct Unroll<0, Pointer, Reference, Op>
        {
            static __device__ void loopShfl(Reference, Op, unsigned int)
            {
            }
            static __device__ void loop(Pointer, Reference, unsigned int, Op)
            {
            }
        };

        template <unsigned int N> struct WarpOptimized
        {
            template <typename Pointer, typename Reference, class Op>
            static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
            {
            #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
                CV_UNUSED(smem);
                CV_UNUSED(tid);

                Unroll<N / 2, Pointer, Reference, Op>::loopShfl(val, op, N);
            #else
                loadToSmem(smem, val, tid);

                if (tid < N / 2)
                    Unroll<N / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
            #endif
            }
        };

        template <unsigned int N> struct GenericOptimized32
        {
            enum { M = N / 32 };

            template <typename Pointer, typename Reference, class Op>
            static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
            {
                const unsigned int laneId = Warp::laneId();

            #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
                Unroll<16, Pointer, Reference, Op>::loopShfl(val, op, warpSize);

                if (laneId == 0)
                    loadToSmem(smem, val, tid / 32);
            #else
                loadToSmem(smem, val, tid);

                if (laneId < 16)
                    Unroll<16, Pointer, Reference, Op>::loop(smem, val, tid, op);

                __syncthreads();

                if (laneId == 0)
                    loadToSmem(smem, val, tid / 32);
            #endif

                __syncthreads();

                loadFromSmem(smem, val, tid);

                if (tid < 32)
                {
                #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
                    Unroll<M / 2, Pointer, Reference, Op>::loopShfl(val, op, M);
                #else
                    Unroll<M / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
                #endif
                }
            }
        };

        template <bool val, class T1, class T2> struct StaticIf;
        template <class T1, class T2> struct StaticIf<true, T1, T2>
        {
            typedef T1 type;
        };
        template <class T1, class T2> struct StaticIf<false, T1, T2>
        {
            typedef T2 type;
        };

        template <unsigned int N> struct IsPowerOf2
        {
            enum { value = ((N != 0) && !(N & (N - 1))) };
        };

        template <unsigned int N> struct Dispatcher
        {
            typedef typename StaticIf<
                (N <= 32) && IsPowerOf2<N>::value,
                WarpOptimized<N>,
                typename StaticIf<
                    (N <= 1024) && IsPowerOf2<N>::value,
                    GenericOptimized32<N>,
                    Generic<N>
                >::type
            >::type reductor;
        };
    }
 }}}

 //! @endcond

 #endif // OPENCV_CUDA_REDUCE_DETAIL_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/detail/reduce_key_val.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/detail/reduce_key_val.hpp
@@ -0,0 +1,502 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP
 #define OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP

 #include <thrust/tuple.h>
 #include "../warp.hpp"
 #include "../warp_shuffle.hpp"

 //! @cond IGNORED

 namespace cv { namespace cuda { namespace device
 {
    namespace reduce_key_val_detail
    {
        template <typename T> struct GetType;
        template <typename T> struct GetType<T*>
        {
            typedef T type;
        };
        template <typename T> struct GetType<volatile T*>
        {
            typedef T type;
        };
        template <typename T> struct GetType<T&>
        {
            typedef T type;
        };

        template <unsigned int I, unsigned int N>
        struct For
        {
            template <class PointerTuple, class ReferenceTuple>
            static __device__ void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid)
            {
                thrust::get<I>(smem)[tid] = thrust::get<I>(data);

                For<I + 1, N>::loadToSmem(smem, data, tid);
            }
            template <class PointerTuple, class ReferenceTuple>
            static __device__ void loadFromSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid)
            {
                thrust::get<I>(data) = thrust::get<I>(smem)[tid];

                For<I + 1, N>::loadFromSmem(smem, data, tid);
            }

            template <class ReferenceTuple>
            static __device__ void copyShfl(const ReferenceTuple& val, unsigned int delta, int width)
            {
                thrust::get<I>(val) = shfl_down(thrust::get<I>(val), delta, width);

                For<I + 1, N>::copyShfl(val, delta, width);
            }
            template <class PointerTuple, class ReferenceTuple>
            static __device__ void copy(const PointerTuple& svals, const ReferenceTuple& val, unsigned int tid, unsigned int delta)
            {
                thrust::get<I>(svals)[tid] = thrust::get<I>(val) = thrust::get<I>(svals)[tid + delta];

                For<I + 1, N>::copy(svals, val, tid, delta);
            }

            template <class KeyReferenceTuple, class ValReferenceTuple, class CmpTuple>
            static __device__ void mergeShfl(const KeyReferenceTuple& key, const ValReferenceTuple& val, const CmpTuple& cmp, unsigned int delta, int width)
            {
                typename GetType<typename thrust::tuple_element<I, KeyReferenceTuple>::type>::type reg = shfl_down(thrust::get<I>(key), delta, width);

                if (thrust::get<I>(cmp)(reg, thrust::get<I>(key)))
                {
                    thrust::get<I>(key) = reg;
                    thrust::get<I>(val) = shfl_down(thrust::get<I>(val), delta, width);
                }

                For<I + 1, N>::mergeShfl(key, val, cmp, delta, width);
            }
            template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple>
            static __device__ void merge(const KeyPointerTuple& skeys, const KeyReferenceTuple& key,
                                         const ValPointerTuple& svals, const ValReferenceTuple& val,
                                         const CmpTuple& cmp,
                                         unsigned int tid, unsigned int delta)
            {
                typename GetType<typename thrust::tuple_element<I, KeyPointerTuple>::type>::type reg = thrust::get<I>(skeys)[tid + delta];

                if (thrust::get<I>(cmp)(reg, thrust::get<I>(key)))
                {
                    thrust::get<I>(skeys)[tid] = thrust::get<I>(key) = reg;
                    thrust::get<I>(svals)[tid] = thrust::get<I>(val) = thrust::get<I>(svals)[tid + delta];
                }

                For<I + 1, N>::merge(skeys, key, svals, val, cmp, tid, delta);
            }
        };
        template <unsigned int N>
        struct For<N, N>
        {
            template <class PointerTuple, class ReferenceTuple>
            static __device__ void loadToSmem(const PointerTuple&, const ReferenceTuple&, unsigned int)
            {
            }
            template <class PointerTuple, class ReferenceTuple>
            static __device__ void loadFromSmem(const PointerTuple&, const ReferenceTuple&, unsigned int)
            {
            }

            template <class ReferenceTuple>
            static __device__ void copyShfl(const ReferenceTuple&, unsigned int, int)
            {
            }
            template <class PointerTuple, class ReferenceTuple>
            static __device__ void copy(const PointerTuple&, const ReferenceTuple&, unsigned int, unsigned int)
            {
            }

            template <class KeyReferenceTuple, class ValReferenceTuple, class CmpTuple>
            static __device__ void mergeShfl(const KeyReferenceTuple&, const ValReferenceTuple&, const CmpTuple&, unsigned int, int)
            {
            }
            template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple>
            static __device__ void merge(const KeyPointerTuple&, const KeyReferenceTuple&,
                                         const ValPointerTuple&, const ValReferenceTuple&,
                                         const CmpTuple&,
                                         unsigned int, unsigned int)
            {
            }
        };

        //////////////////////////////////////////////////////
        // loadToSmem

        template <typename T>
        __device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, unsigned int tid)
        {
            smem[tid] = data;
        }
        template <typename T>
        __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& data, unsigned int tid)
        {
            data = smem[tid];
        }
        template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
                  typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
        __device__ __forceinline__ void loadToSmem(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem,
                                                   const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data,
                                                   unsigned int tid)
        {
            For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadToSmem(smem, data, tid);
        }
        template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
                  typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
        __device__ __forceinline__ void loadFromSmem(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem,
                                                     const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data,
                                                     unsigned int tid)
        {
            For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadFromSmem(smem, data, tid);
        }

        //////////////////////////////////////////////////////
        // copyVals

        template <typename V>
        __device__ __forceinline__ void copyValsShfl(V& val, unsigned int delta, int width)
        {
            val = shfl_down(val, delta, width);
        }
        template <typename V>
        __device__ __forceinline__ void copyVals(volatile V* svals, V& val, unsigned int tid, unsigned int delta)
        {
            svals[tid] = val = svals[tid + delta];
        }
        template <typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
        __device__ __forceinline__ void copyValsShfl(const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
                                                     unsigned int delta,
                                                     int width)
        {
            For<0, thrust::tuple_size<thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9> >::value>::copyShfl(val, delta, width);
        }
        template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
                  typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
        __device__ __forceinline__ void copyVals(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
                                                 const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
                                                 unsigned int tid, unsigned int delta)
        {
            For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::copy(svals, val, tid, delta);
        }

        //////////////////////////////////////////////////////
        // merge

        template <typename K, typename V, class Cmp>
        __device__ __forceinline__ void mergeShfl(K& key, V& val, const Cmp& cmp, unsigned int delta, int width)
        {
            K reg = shfl_down(key, delta, width);

            if (cmp(reg, key))
            {
                key = reg;
                copyValsShfl(val, delta, width);
            }
        }
        template <typename K, typename V, class Cmp>
        __device__ __forceinline__ void merge(volatile K* skeys, K& key, volatile V* svals, V& val, const Cmp& cmp, unsigned int tid, unsigned int delta)
        {
            K reg = skeys[tid + delta];

            if (cmp(reg, key))
            {
                skeys[tid] = key = reg;
                copyVals(svals, val, tid, delta);
            }
        }
        template <typename K,
                  typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
                  class Cmp>
        __device__ __forceinline__ void mergeShfl(K& key,
                                                  const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
                                                  const Cmp& cmp,
                                                  unsigned int delta, int width)
        {
            K reg = shfl_down(key, delta, width);

            if (cmp(reg, key))
            {
                key = reg;
                copyValsShfl(val, delta, width);
            }
        }
        template <typename K,
                  typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
                  typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
                  class Cmp>
        __device__ __forceinline__ void merge(volatile K* skeys, K& key,
                                              const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
                                              const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
                                              const Cmp& cmp, unsigned int tid, unsigned int delta)
        {
            K reg = skeys[tid + delta];

            if (cmp(reg, key))
            {
                skeys[tid] = key = reg;
                copyVals(svals, val, tid, delta);
            }
        }
        template <typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
                  typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
                  class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
        __device__ __forceinline__ void mergeShfl(const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
                                                  const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
                                                  const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp,
                                                  unsigned int delta, int width)
        {
            For<0, thrust::tuple_size<thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9> >::value>::mergeShfl(key, val, cmp, delta, width);
        }
        template <typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9,
                  typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
                  typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
                  typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
                  class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
        __device__ __forceinline__ void merge(const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys,
                                              const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
                                              const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
                                              const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
                                              const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp,
                                              unsigned int tid, unsigned int delta)
        {
            For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::merge(skeys, key, svals, val, cmp, tid, delta);
        }

        //////////////////////////////////////////////////////
        // Generic

        template <unsigned int N> struct Generic
        {
            template <class KP, class KR, class VP, class VR, class Cmp>
            static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
            {
                loadToSmem(skeys, key, tid);
                loadValsToSmem(svals, val, tid);
                if (N >= 32)
                    __syncthreads();

                if (N >= 2048)
                {
                    if (tid < 1024)
                        merge(skeys, key, svals, val, cmp, tid, 1024);

                    __syncthreads();
                }
                if (N >= 1024)
                {
                    if (tid < 512)
                        merge(skeys, key, svals, val, cmp, tid, 512);

                    __syncthreads();
                }
                if (N >= 512)
                {
                    if (tid < 256)
                        merge(skeys, key, svals, val, cmp, tid, 256);

                    __syncthreads();
                }
                if (N >= 256)
                {
                    if (tid < 128)
                        merge(skeys, key, svals, val, cmp, tid, 128);

                    __syncthreads();
                }
                if (N >= 128)
                {
                    if (tid < 64)
                        merge(skeys, key, svals, val, cmp, tid, 64);

                    __syncthreads();
                }
                if (N >= 64)
                {
                    if (tid < 32)
                        merge(skeys, key, svals, val, cmp, tid, 32);
                }

                if (tid < 16)
                {
                    merge(skeys, key, svals, val, cmp, tid, 16);
                    merge(skeys, key, svals, val, cmp, tid, 8);
                    merge(skeys, key, svals, val, cmp, tid, 4);
                    merge(skeys, key, svals, val, cmp, tid, 2);
                    merge(skeys, key, svals, val, cmp, tid, 1);
                }
            }
        };

        template <unsigned int I, class KP, class KR, class VP, class VR, class Cmp>
        struct Unroll
        {
            static __device__ void loopShfl(KR key, VR val, Cmp cmp, unsigned int N)
            {
                mergeShfl(key, val, cmp, I, N);
                Unroll<I / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, N);
            }
            static __device__ void loop(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
            {
                merge(skeys, key, svals, val, cmp, tid, I);
                Unroll<I / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
            }
        };
        template <class KP, class KR, class VP, class VR, class Cmp>
        struct Unroll<0, KP, KR, VP, VR, Cmp>
        {
            static __device__ void loopShfl(KR, VR, Cmp, unsigned int)
            {
            }
            static __device__ void loop(KP, KR, VP, VR, unsigned int, Cmp)
            {
            }
        };

        template <unsigned int N> struct WarpOptimized
        {
            template <class KP, class KR, class VP, class VR, class Cmp>
            static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
            {
            #if 0 // __CUDA_ARCH__ >= 300
                CV_UNUSED(skeys);
                CV_UNUSED(svals);
                CV_UNUSED(tid);

                Unroll<N / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, N);
            #else
                loadToSmem(skeys, key, tid);
                loadToSmem(svals, val, tid);

                if (tid < N / 2)
                    Unroll<N / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
            #endif
            }
        };

        template <unsigned int N> struct GenericOptimized32
        {
            enum { M = N / 32 };

            template <class KP, class KR, class VP, class VR, class Cmp>
            static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
            {
                const unsigned int laneId = Warp::laneId();

            #if 0 // __CUDA_ARCH__ >= 300
                Unroll<16, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, warpSize);

                if (laneId == 0)
                {
                    loadToSmem(skeys, key, tid / 32);
                    loadToSmem(svals, val, tid / 32);
                }
            #else
                loadToSmem(skeys, key, tid);
                loadToSmem(svals, val, tid);

                if (laneId < 16)
                    Unroll<16, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);

                __syncthreads();

                if (laneId == 0)
                {
                    loadToSmem(skeys, key, tid / 32);
                    loadToSmem(svals, val, tid / 32);
                }
            #endif

                __syncthreads();

                loadFromSmem(skeys, key, tid);

                if (tid < 32)
                {
                #if 0 // __CUDA_ARCH__ >= 300
                    loadFromSmem(svals, val, tid);

                    Unroll<M / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, M);
                #else
                    Unroll<M / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
                #endif
                }
            }
        };

        template <bool val, class T1, class T2> struct StaticIf;
        template <class T1, class T2> struct StaticIf<true, T1, T2>
        {
            typedef T1 type;
        };
        template <class T1, class T2> struct StaticIf<false, T1, T2>
        {
            typedef T2 type;
        };

        template <unsigned int N> struct IsPowerOf2
        {
            enum { value = ((N != 0) && !(N & (N - 1))) };
        };

        template <unsigned int N> struct Dispatcher
        {
            typedef typename StaticIf<
                (N <= 32) && IsPowerOf2<N>::value,
                WarpOptimized<N>,
                typename StaticIf<
                    (N <= 1024) && IsPowerOf2<N>::value,
                    GenericOptimized32<N>,
                    Generic<N>
                >::type
            >::type reductor;
        };
    }
 }}}

 //! @endcond

 #endif // OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/detail/transform_detail.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/detail/transform_detail.hpp
@@ -0,0 +1,392 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CUDA_TRANSFORM_DETAIL_HPP
 #define OPENCV_CUDA_TRANSFORM_DETAIL_HPP

 #include "../common.hpp"
 #include "../vec_traits.hpp"
 #include "../functional.hpp"

 //! @cond IGNORED

 namespace cv { namespace cuda { namespace device
 {
    namespace transform_detail
    {
        //! Read Write Traits

        template <typename T, typename D, int shift> struct UnaryReadWriteTraits
        {
            typedef typename TypeVec<T, shift>::vec_type read_type;
            typedef typename TypeVec<D, shift>::vec_type write_type;
        };

        template <typename T1, typename T2, typename D, int shift> struct BinaryReadWriteTraits
        {
            typedef typename TypeVec<T1, shift>::vec_type read_type1;
            typedef typename TypeVec<T2, shift>::vec_type read_type2;
            typedef typename TypeVec<D, shift>::vec_type write_type;
        };

        //! Transform kernels

        template <int shift> struct OpUnroller;
        template <> struct OpUnroller<1>
        {
            template <typename T, typename D, typename UnOp, typename Mask>
            static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
            {
                if (mask(y, x_shifted))
                    dst.x = op(src.x);
            }

            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
            static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
            {
                if (mask(y, x_shifted))
                    dst.x = op(src1.x, src2.x);
            }
        };
        template <> struct OpUnroller<2>
        {
            template <typename T, typename D, typename UnOp, typename Mask>
            static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
            {
                if (mask(y, x_shifted))
                    dst.x = op(src.x);
                if (mask(y, x_shifted + 1))
                    dst.y = op(src.y);
            }

            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
            static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
            {
                if (mask(y, x_shifted))
                    dst.x = op(src1.x, src2.x);
                if (mask(y, x_shifted + 1))
                    dst.y = op(src1.y, src2.y);
            }
        };
        template <> struct OpUnroller<3>
        {
            template <typename T, typename D, typename UnOp, typename Mask>
            static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
            {
                if (mask(y, x_shifted))
                    dst.x = op(src.x);
                if (mask(y, x_shifted + 1))
                    dst.y = op(src.y);
                if (mask(y, x_shifted + 2))
                    dst.z = op(src.z);
            }

            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
            static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
            {
                if (mask(y, x_shifted))
                    dst.x = op(src1.x, src2.x);
                if (mask(y, x_shifted + 1))
                    dst.y = op(src1.y, src2.y);
                if (mask(y, x_shifted + 2))
                    dst.z = op(src1.z, src2.z);
            }
        };
        template <> struct OpUnroller<4>
        {
            template <typename T, typename D, typename UnOp, typename Mask>
            static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
            {
                if (mask(y, x_shifted))
                    dst.x = op(src.x);
                if (mask(y, x_shifted + 1))
                    dst.y = op(src.y);
                if (mask(y, x_shifted + 2))
                    dst.z = op(src.z);
                if (mask(y, x_shifted + 3))
                    dst.w = op(src.w);
            }

            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
            static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
            {
                if (mask(y, x_shifted))
                    dst.x = op(src1.x, src2.x);
                if (mask(y, x_shifted + 1))
                    dst.y = op(src1.y, src2.y);
                if (mask(y, x_shifted + 2))
                    dst.z = op(src1.z, src2.z);
                if (mask(y, x_shifted + 3))
                    dst.w = op(src1.w, src2.w);
            }
        };
        template <> struct OpUnroller<8>
        {
            template <typename T, typename D, typename UnOp, typename Mask>
            static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
            {
                if (mask(y, x_shifted))
                    dst.a0 = op(src.a0);
                if (mask(y, x_shifted + 1))
                    dst.a1 = op(src.a1);
                if (mask(y, x_shifted + 2))
                    dst.a2 = op(src.a2);
                if (mask(y, x_shifted + 3))
                    dst.a3 = op(src.a3);
                if (mask(y, x_shifted + 4))
                    dst.a4 = op(src.a4);
                if (mask(y, x_shifted + 5))
                    dst.a5 = op(src.a5);
                if (mask(y, x_shifted + 6))
                    dst.a6 = op(src.a6);
                if (mask(y, x_shifted + 7))
                    dst.a7 = op(src.a7);
            }

            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
            static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
            {
                if (mask(y, x_shifted))
                    dst.a0 = op(src1.a0, src2.a0);
                if (mask(y, x_shifted + 1))
                    dst.a1 = op(src1.a1, src2.a1);
                if (mask(y, x_shifted + 2))
                    dst.a2 = op(src1.a2, src2.a2);
                if (mask(y, x_shifted + 3))
                    dst.a3 = op(src1.a3, src2.a3);
                if (mask(y, x_shifted + 4))
                    dst.a4 = op(src1.a4, src2.a4);
                if (mask(y, x_shifted + 5))
                    dst.a5 = op(src1.a5, src2.a5);
                if (mask(y, x_shifted + 6))
                    dst.a6 = op(src1.a6, src2.a6);
                if (mask(y, x_shifted + 7))
                    dst.a7 = op(src1.a7, src2.a7);
            }
        };

        template <typename T, typename D, typename UnOp, typename Mask>
        static __global__ void transformSmart(const PtrStepSz<T> src_, PtrStep<D> dst_, const Mask mask, const UnOp op)
        {
            typedef TransformFunctorTraits<UnOp> ft;
            typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::read_type read_type;
            typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::write_type write_type;

            const int x = threadIdx.x + blockIdx.x * blockDim.x;
            const int y = threadIdx.y + blockIdx.y * blockDim.y;
            const int x_shifted = x * ft::smart_shift;

            if (y < src_.rows)
            {
                const T* src = src_.ptr(y);
                D* dst = dst_.ptr(y);

                if (x_shifted + ft::smart_shift - 1 < src_.cols)
                {
                    const read_type src_n_el = ((const read_type*)src)[x];
                    OpUnroller<ft::smart_shift>::unroll(src_n_el, ((write_type*)dst)[x], mask, op, x_shifted, y);
                }
                else
                {
                    for (int real_x = x_shifted; real_x < src_.cols; ++real_x)
                    {
                        if (mask(y, real_x))
                            dst[real_x] = op(src[real_x]);
                    }
                }
            }
        }

        template <typename T, typename D, typename UnOp, typename Mask>
        __global__ static void transformSimple(const PtrStepSz<T> src, PtrStep<D> dst, const Mask mask, const UnOp op)
        {
            const int x = blockDim.x * blockIdx.x + threadIdx.x;
            const int y = blockDim.y * blockIdx.y + threadIdx.y;

            if (x < src.cols && y < src.rows && mask(y, x))
            {
                dst.ptr(y)[x] = op(src.ptr(y)[x]);
            }
        }

        template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
        static __global__ void transformSmart(const PtrStepSz<T1> src1_, const PtrStep<T2> src2_, PtrStep<D> dst_,
            const Mask mask, const BinOp op)
        {
            typedef TransformFunctorTraits<BinOp> ft;
            typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type1 read_type1;
            typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type2 read_type2;
            typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::write_type write_type;

            const int x = threadIdx.x + blockIdx.x * blockDim.x;
            const int y = threadIdx.y + blockIdx.y * blockDim.y;
            const int x_shifted = x * ft::smart_shift;

            if (y < src1_.rows)
            {
                const T1* src1 = src1_.ptr(y);
                const T2* src2 = src2_.ptr(y);
                D* dst = dst_.ptr(y);

                if (x_shifted + ft::smart_shift - 1 < src1_.cols)
                {
                    const read_type1 src1_n_el = ((const read_type1*)src1)[x];
                    const read_type2 src2_n_el = ((const read_type2*)src2)[x];

                    OpUnroller<ft::smart_shift>::unroll(src1_n_el, src2_n_el, ((write_type*)dst)[x], mask, op, x_shifted, y);
                }
                else
                {
                    for (int real_x = x_shifted; real_x < src1_.cols; ++real_x)
                    {
                        if (mask(y, real_x))
                            dst[real_x] = op(src1[real_x], src2[real_x]);
                    }
                }
            }
        }

        template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
        static __global__ void transformSimple(const PtrStepSz<T1> src1, const PtrStep<T2> src2, PtrStep<D> dst,
            const Mask mask, const BinOp op)
        {
            const int x = blockDim.x * blockIdx.x + threadIdx.x;
            const int y = blockDim.y * blockIdx.y + threadIdx.y;

            if (x < src1.cols && y < src1.rows && mask(y, x))
            {
                const T1 src1_data = src1.ptr(y)[x];
                const T2 src2_data = src2.ptr(y)[x];
                dst.ptr(y)[x] = op(src1_data, src2_data);
            }
        }

        template <bool UseSmart> struct TransformDispatcher;
        template<> struct TransformDispatcher<false>
        {
            template <typename T, typename D, typename UnOp, typename Mask>
            static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
            {
                typedef TransformFunctorTraits<UnOp> ft;

                const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
                const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1);

                transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
                cudaSafeCall( cudaGetLastError() );

                if (stream == 0)
                    cudaSafeCall( cudaDeviceSynchronize() );
            }

            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
            static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
            {
                typedef TransformFunctorTraits<BinOp> ft;

                const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
                const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1);

                transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
                cudaSafeCall( cudaGetLastError() );

                if (stream == 0)
                    cudaSafeCall( cudaDeviceSynchronize() );
            }
        };
        template<> struct TransformDispatcher<true>
        {
            template <typename T, typename D, typename UnOp, typename Mask>
            static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
            {
                typedef TransformFunctorTraits<UnOp> ft;

                CV_StaticAssert(ft::smart_shift != 1, "");

                if (!isAligned(src.data, ft::smart_shift * sizeof(T)) || !isAligned(src.step, ft::smart_shift * sizeof(T)) ||
                    !isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
                {
                    TransformDispatcher<false>::call(src, dst, op, mask, stream);
                    return;
                }

                const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
                const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1);

                transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
                cudaSafeCall( cudaGetLastError() );

                if (stream == 0)
                    cudaSafeCall( cudaDeviceSynchronize() );
            }

            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
            static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
            {
                typedef TransformFunctorTraits<BinOp> ft;

                CV_StaticAssert(ft::smart_shift != 1, "");

                if (!isAligned(src1.data, ft::smart_shift * sizeof(T1)) || !isAligned(src1.step, ft::smart_shift * sizeof(T1)) ||
                    !isAligned(src2.data, ft::smart_shift * sizeof(T2)) || !isAligned(src2.step, ft::smart_shift * sizeof(T2)) ||
                    !isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
                {
                    TransformDispatcher<false>::call(src1, src2, dst, op, mask, stream);
                    return;
                }

                const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
                const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1);

                transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
                cudaSafeCall( cudaGetLastError() );

                if (stream == 0)
                    cudaSafeCall( cudaDeviceSynchronize() );
            }
        };
    } // namespace transform_detail
 }}} // namespace cv { namespace cuda { namespace cudev

 //! @endcond

 #endif // OPENCV_CUDA_TRANSFORM_DETAIL_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/detail/type_traits_detail.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/detail/type_traits_detail.hpp
@@ -0,0 +1,191 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP
 #define OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP

 #include "../common.hpp"
 #include "../vec_traits.hpp"

 //! @cond IGNORED

 namespace cv { namespace cuda { namespace device
 {
    namespace type_traits_detail
    {
        template <bool, typename T1, typename T2> struct Select { typedef T1 type; };
        template <typename T1, typename T2> struct Select<false, T1, T2> { typedef T2 type; };

        template <typename T> struct IsSignedIntergral { enum {value = 0}; };
        template <> struct IsSignedIntergral<schar> { enum {value = 1}; };
        template <> struct IsSignedIntergral<char1> { enum {value = 1}; };
        template <> struct IsSignedIntergral<short> { enum {value = 1}; };
        template <> struct IsSignedIntergral<short1> { enum {value = 1}; };
        template <> struct IsSignedIntergral<int> { enum {value = 1}; };
        template <> struct IsSignedIntergral<int1> { enum {value = 1}; };

        template <typename T> struct IsUnsignedIntegral { enum {value = 0}; };
        template <> struct IsUnsignedIntegral<uchar> { enum {value = 1}; };
        template <> struct IsUnsignedIntegral<uchar1> { enum {value = 1}; };
        template <> struct IsUnsignedIntegral<ushort> { enum {value = 1}; };
        template <> struct IsUnsignedIntegral<ushort1> { enum {value = 1}; };
        template <> struct IsUnsignedIntegral<uint> { enum {value = 1}; };
        template <> struct IsUnsignedIntegral<uint1> { enum {value = 1}; };

        template <typename T> struct IsIntegral { enum {value = IsSignedIntergral<T>::value || IsUnsignedIntegral<T>::value}; };
        template <> struct IsIntegral<char> { enum {value = 1}; };
        template <> struct IsIntegral<bool> { enum {value = 1}; };

        template <typename T> struct IsFloat { enum {value = 0}; };
        template <> struct IsFloat<float> { enum {value = 1}; };
        template <> struct IsFloat<double> { enum {value = 1}; };

        template <typename T> struct IsVec { enum {value = 0}; };
        template <> struct IsVec<uchar1> { enum {value = 1}; };
        template <> struct IsVec<uchar2> { enum {value = 1}; };
        template <> struct IsVec<uchar3> { enum {value = 1}; };
        template <> struct IsVec<uchar4> { enum {value = 1}; };
        template <> struct IsVec<uchar8> { enum {value = 1}; };
        template <> struct IsVec<char1> { enum {value = 1}; };
        template <> struct IsVec<char2> { enum {value = 1}; };
        template <> struct IsVec<char3> { enum {value = 1}; };
        template <> struct IsVec<char4> { enum {value = 1}; };
        template <> struct IsVec<char8> { enum {value = 1}; };
        template <> struct IsVec<ushort1> { enum {value = 1}; };
        template <> struct IsVec<ushort2> { enum {value = 1}; };
        template <> struct IsVec<ushort3> { enum {value = 1}; };
        template <> struct IsVec<ushort4> { enum {value = 1}; };
        template <> struct IsVec<ushort8> { enum {value = 1}; };
        template <> struct IsVec<short1> { enum {value = 1}; };
        template <> struct IsVec<short2> { enum {value = 1}; };
        template <> struct IsVec<short3> { enum {value = 1}; };
        template <> struct IsVec<short4> { enum {value = 1}; };
        template <> struct IsVec<short8> { enum {value = 1}; };
        template <> struct IsVec<uint1> { enum {value = 1}; };
        template <> struct IsVec<uint2> { enum {value = 1}; };
        template <> struct IsVec<uint3> { enum {value = 1}; };
        template <> struct IsVec<uint4> { enum {value = 1}; };
        template <> struct IsVec<uint8> { enum {value = 1}; };
        template <> struct IsVec<int1> { enum {value = 1}; };
        template <> struct IsVec<int2> { enum {value = 1}; };
        template <> struct IsVec<int3> { enum {value = 1}; };
        template <> struct IsVec<int4> { enum {value = 1}; };
        template <> struct IsVec<int8> { enum {value = 1}; };
        template <> struct IsVec<float1> { enum {value = 1}; };
        template <> struct IsVec<float2> { enum {value = 1}; };
        template <> struct IsVec<float3> { enum {value = 1}; };
        template <> struct IsVec<float4> { enum {value = 1}; };
        template <> struct IsVec<float8> { enum {value = 1}; };
        template <> struct IsVec<double1> { enum {value = 1}; };
        template <> struct IsVec<double2> { enum {value = 1}; };
        template <> struct IsVec<double3> { enum {value = 1}; };
        template <> struct IsVec<double4> { enum {value = 1}; };
        template <> struct IsVec<double8> { enum {value = 1}; };

        template <class U> struct AddParameterType { typedef const U& type; };
        template <class U> struct AddParameterType<U&> { typedef U& type; };
        template <> struct AddParameterType<void> { typedef void type; };

        template <class U> struct ReferenceTraits
        {
            enum { value = false };
            typedef U type;
        };
        template <class U> struct ReferenceTraits<U&>
        {
            enum { value = true };
            typedef U type;
        };

        template <class U> struct PointerTraits
        {
            enum { value = false };
            typedef void type;
        };
        template <class U> struct PointerTraits<U*>
        {
            enum { value = true };
            typedef U type;
        };
        template <class U> struct PointerTraits<U*&>
        {
            enum { value = true };
            typedef U type;
        };

        template <class U> struct UnConst
        {
            typedef U type;
            enum { value = 0 };
        };
        template <class U> struct UnConst<const U>
        {
            typedef U type;
            enum { value = 1 };
        };
        template <class U> struct UnConst<const U&>
        {
            typedef U& type;
            enum { value = 1 };
        };

        template <class U> struct UnVolatile
        {
            typedef U type;
            enum { value = 0 };
        };
        template <class U> struct UnVolatile<volatile U>
        {
            typedef U type;
            enum { value = 1 };
        };
        template <class U> struct UnVolatile<volatile U&>
        {
            typedef U& type;
            enum { value = 1 };
        };
    } // namespace type_traits_detail
 }}} // namespace cv { namespace cuda { namespace cudev

 //! @endcond

 #endif // OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/detail/vec_distance_detail.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/detail/vec_distance_detail.hpp
@@ -0,0 +1,121 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP
 #define OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP

 #include "../datamov_utils.hpp"

 //! @cond IGNORED

 namespace cv { namespace cuda { namespace device
 {
    namespace vec_distance_detail
    {
        template <int THREAD_DIM, int N> struct UnrollVecDiffCached
        {
            template <typename Dist, typename T1, typename T2>
            static __device__ void calcCheck(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int ind)
            {
                if (ind < len)
                {
                    T1 val1 = *vecCached++;

                    T2 val2;
                    ForceGlob<T2>::Load(vecGlob, ind, val2);

                    dist.reduceIter(val1, val2);

                    UnrollVecDiffCached<THREAD_DIM, N - 1>::calcCheck(vecCached, vecGlob, len, dist, ind + THREAD_DIM);
                }
            }

            template <typename Dist, typename T1, typename T2>
            static __device__ void calcWithoutCheck(const T1* vecCached, const T2* vecGlob, Dist& dist)
            {
                T1 val1 = *vecCached++;

                T2 val2;
                ForceGlob<T2>::Load(vecGlob, 0, val2);
                vecGlob += THREAD_DIM;

                dist.reduceIter(val1, val2);

                UnrollVecDiffCached<THREAD_DIM, N - 1>::calcWithoutCheck(vecCached, vecGlob, dist);
            }
        };
        template <int THREAD_DIM> struct UnrollVecDiffCached<THREAD_DIM, 0>
        {
            template <typename Dist, typename T1, typename T2>
            static __device__ __forceinline__ void calcCheck(const T1*, const T2*, int, Dist&, int)
            {
            }

            template <typename Dist, typename T1, typename T2>
            static __device__ __forceinline__ void calcWithoutCheck(const T1*, const T2*, Dist&)
            {
            }
        };

        template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN> struct VecDiffCachedCalculator;
        template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, false>
        {
            template <typename Dist, typename T1, typename T2>
            static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
            {
                UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcCheck(vecCached, vecGlob, len, dist, tid);
            }
        };
        template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, true>
        {
            template <typename Dist, typename T1, typename T2>
            static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
            {
                UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcWithoutCheck(vecCached, vecGlob + tid, dist);
            }
        };
    } // namespace vec_distance_detail
 }}} // namespace cv { namespace cuda { namespace cudev

 //! @endcond

 #endif // OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/dynamic_smem.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/dynamic_smem.hpp
@@ -0,0 +1,88 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CUDA_DYNAMIC_SMEM_HPP
 #define OPENCV_CUDA_DYNAMIC_SMEM_HPP

 /** @file
 * @deprecated Use @ref cudev instead.
 */

 //! @cond IGNORED

 namespace cv { namespace cuda { namespace device
 {
    template<class T> struct DynamicSharedMem
    {
        __device__ __forceinline__ operator T*()
        {
            extern __shared__ int __smem[];
            return (T*)__smem;
        }

        __device__ __forceinline__ operator const T*() const
        {
            extern __shared__ int __smem[];
            return (T*)__smem;
        }
    };

    // specialize for double to avoid unaligned memory access compile errors
    template<> struct DynamicSharedMem<double>
    {
        __device__ __forceinline__ operator double*()
        {
            extern __shared__ double __smem_d[];
            return (double*)__smem_d;
        }

        __device__ __forceinline__ operator const double*() const
        {
            extern __shared__ double __smem_d[];
            return (double*)__smem_d;
        }
    };
 }}}

 //! @endcond

 #endif // OPENCV_CUDA_DYNAMIC_SMEM_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/emulation.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/emulation.hpp
@@ -0,0 +1,269 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CUDA_EMULATION_HPP_
 #define OPENCV_CUDA_EMULATION_HPP_

 #include "common.hpp"
 #include "warp_reduce.hpp"

 /** @file
 * @deprecated Use @ref cudev instead.
 */

 //! @cond IGNORED

 namespace cv { namespace cuda { namespace device
 {
    struct Emulation
    {

        static __device__ __forceinline__ int syncthreadsOr(int pred)
        {
 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 200)
                // just campilation stab
                return 0;
 #else
                return __syncthreads_or(pred);
 #endif
        }

        template<int CTA_SIZE>
        static __forceinline__ __device__ int Ballot(int predicate)
        {
 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
            return __ballot(predicate);
 #else
            __shared__ volatile int cta_buffer[CTA_SIZE];

            int tid = threadIdx.x;
            cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0;
            return warp_reduce(cta_buffer);
 #endif
        }

        struct smem
        {
            enum { TAG_MASK = (1U << ( (sizeof(unsigned int) << 3) - 5U)) - 1U };

            template<typename T>
            static __device__ __forceinline__ T atomicInc(T* address, T val)
            {
 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
                T count;
                unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
                do
                {
                    count = *address & TAG_MASK;
                    count = tag | (count + 1);
                    *address = count;
                } while (*address != count);

                return (count & TAG_MASK) - 1;
 #else
                return ::atomicInc(address, val);
 #endif
            }

            template<typename T>
            static __device__ __forceinline__ T atomicAdd(T* address, T val)
            {
 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
                T count;
                unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
                do
                {
                    count = *address & TAG_MASK;
                    count = tag | (count + val);
                    *address = count;
                } while (*address != count);

                return (count & TAG_MASK) - val;
 #else
                return ::atomicAdd(address, val);
 #endif
            }

            template<typename T>
            static __device__ __forceinline__ T atomicMin(T* address, T val)
            {
 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
                T count = ::min(*address, val);
                do
                {
                    *address = count;
                } while (*address > count);

                return count;
 #else
                return ::atomicMin(address, val);
 #endif
            }
        }; // struct cmem

        struct glob
        {
            static __device__ __forceinline__ int atomicAdd(int* address, int val)
            {
                return ::atomicAdd(address, val);
            }
            static __device__ __forceinline__ unsigned int atomicAdd(unsigned int* address, unsigned int val)
            {
                return ::atomicAdd(address, val);
            }
            static __device__ __forceinline__ float atomicAdd(float* address, float val)
            {
            #if __CUDA_ARCH__ >= 200
                return ::atomicAdd(address, val);
            #else
                int* address_as_i = (int*) address;
                int old = *address_as_i, assumed;
                do {
                    assumed = old;
                    old = ::atomicCAS(address_as_i, assumed,
                        __float_as_int(val + __int_as_float(assumed)));
                } while (assumed != old);
                return __int_as_float(old);
            #endif
            }
            static __device__ __forceinline__ double atomicAdd(double* address, double val)
            {
            #if __CUDA_ARCH__ >= 130
                unsigned long long int* address_as_ull = (unsigned long long int*) address;
                unsigned long long int old = *address_as_ull, assumed;
                do {
                    assumed = old;
                    old = ::atomicCAS(address_as_ull, assumed,
                        __double_as_longlong(val + __longlong_as_double(assumed)));
                } while (assumed != old);
                return __longlong_as_double(old);
            #else
                CV_UNUSED(address);
                CV_UNUSED(val);
                return 0.0;
            #endif
            }

            static __device__ __forceinline__ int atomicMin(int* address, int val)
            {
                return ::atomicMin(address, val);
            }
            static __device__ __forceinline__ float atomicMin(float* address, float val)
            {
            #if __CUDA_ARCH__ >= 120
                int* address_as_i = (int*) address;
                int old = *address_as_i, assumed;
                do {
                    assumed = old;
                    old = ::atomicCAS(address_as_i, assumed,
                        __float_as_int(::fminf(val, __int_as_float(assumed))));
                } while (assumed != old);
                return __int_as_float(old);
            #else
                CV_UNUSED(address);
                CV_UNUSED(val);
                return 0.0f;
            #endif
            }
            static __device__ __forceinline__ double atomicMin(double* address, double val)
            {
            #if __CUDA_ARCH__ >= 130
                unsigned long long int* address_as_ull = (unsigned long long int*) address;
                unsigned long long int old = *address_as_ull, assumed;
                do {
                    assumed = old;
                    old = ::atomicCAS(address_as_ull, assumed,
                        __double_as_longlong(::fmin(val, __longlong_as_double(assumed))));
                } while (assumed != old);
                return __longlong_as_double(old);
            #else
                CV_UNUSED(address);
                CV_UNUSED(val);
                return 0.0;
            #endif
            }

            static __device__ __forceinline__ int atomicMax(int* address, int val)
            {
                return ::atomicMax(address, val);
            }
            static __device__ __forceinline__ float atomicMax(float* address, float val)
            {
            #if __CUDA_ARCH__ >= 120
                int* address_as_i = (int*) address;
                int old = *address_as_i, assumed;
                do {
                    assumed = old;
                    old = ::atomicCAS(address_as_i, assumed,
                        __float_as_int(::fmaxf(val, __int_as_float(assumed))));
                } while (assumed != old);
                return __int_as_float(old);
            #else
                CV_UNUSED(address);
                CV_UNUSED(val);
                return 0.0f;
            #endif
            }
            static __device__ __forceinline__ double atomicMax(double* address, double val)
            {
            #if __CUDA_ARCH__ >= 130
                unsigned long long int* address_as_ull = (unsigned long long int*) address;
                unsigned long long int old = *address_as_ull, assumed;
                do {
                    assumed = old;
                    old = ::atomicCAS(address_as_ull, assumed,
                        __double_as_longlong(::fmax(val, __longlong_as_double(assumed))));
                } while (assumed != old);
                return __longlong_as_double(old);
            #else
                CV_UNUSED(address);
                CV_UNUSED(val);
                return 0.0;
            #endif
            }
        };
    }; //struct Emulation
 }}} // namespace cv { namespace cuda { namespace cudev

 //! @endcond

 #endif /* OPENCV_CUDA_EMULATION_HPP_ */
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/filters.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/filters.hpp
@@ -0,0 +1,286 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CUDA_FILTERS_HPP
 #define OPENCV_CUDA_FILTERS_HPP

 #include "saturate_cast.hpp"
 #include "vec_traits.hpp"
 #include "vec_math.hpp"
 #include "type_traits.hpp"

 /** @file
 * @deprecated Use @ref cudev instead.
 */

 //! @cond IGNORED

 namespace cv { namespace cuda { namespace device
 {
    template <typename Ptr2D> struct PointFilter
    {
        typedef typename Ptr2D::elem_type elem_type;
        typedef float index_type;

        explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
        : src(src_)
        {
            CV_UNUSED(fx);
            CV_UNUSED(fy);
        }

        __device__ __forceinline__ elem_type operator ()(float y, float x) const
        {
            return src(__float2int_rz(y), __float2int_rz(x));
        }

        Ptr2D src;
    };

    template <typename Ptr2D> struct LinearFilter
    {
        typedef typename Ptr2D::elem_type elem_type;
        typedef float index_type;

        explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
        : src(src_)
        {
            CV_UNUSED(fx);
            CV_UNUSED(fy);
        }
        __device__ __forceinline__ elem_type operator ()(float y, float x) const
        {
            typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;

            work_type out = VecTraits<work_type>::all(0);

            const int x1 = __float2int_rd(x);
            const int y1 = __float2int_rd(y);
            const int x2 = x1 + 1;
            const int y2 = y1 + 1;

            elem_type src_reg = src(y1, x1);
            out = out + src_reg * ((x2 - x) * (y2 - y));

            src_reg = src(y1, x2);
            out = out + src_reg * ((x - x1) * (y2 - y));

            src_reg = src(y2, x1);
            out = out + src_reg * ((x2 - x) * (y - y1));

            src_reg = src(y2, x2);
            out = out + src_reg * ((x - x1) * (y - y1));

            return saturate_cast<elem_type>(out);
        }

        Ptr2D src;
    };

    template <typename Ptr2D> struct CubicFilter
    {
        typedef typename Ptr2D::elem_type elem_type;
        typedef float index_type;
        typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;

        explicit __host__ __device__ __forceinline__ CubicFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
        : src(src_)
        {
            CV_UNUSED(fx);
            CV_UNUSED(fy);
        }

        static __device__ __forceinline__ float bicubicCoeff(float x_)
        {
            float x = fabsf(x_);
            if (x <= 1.0f)
            {
                return x * x * (1.5f * x - 2.5f) + 1.0f;
            }
            else if (x < 2.0f)
            {
                return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
            }
            else
            {
                return 0.0f;
            }
        }

        __device__ elem_type operator ()(float y, float x) const
        {
            const float xmin = ::ceilf(x - 2.0f);
            const float xmax = ::floorf(x + 2.0f);

            const float ymin = ::ceilf(y - 2.0f);
            const float ymax = ::floorf(y + 2.0f);

            work_type sum = VecTraits<work_type>::all(0);
            float wsum = 0.0f;

            for (float cy = ymin; cy <= ymax; cy += 1.0f)
            {
                for (float cx = xmin; cx <= xmax; cx += 1.0f)
                {
                    const float w = bicubicCoeff(x - cx) * bicubicCoeff(y - cy);
                    sum = sum + w * src(__float2int_rd(cy), __float2int_rd(cx));
                    wsum += w;
                }
            }

            work_type res = (!wsum)? VecTraits<work_type>::all(0) : sum / wsum;

            return saturate_cast<elem_type>(res);
        }

        Ptr2D src;
    };
    // for integer scaling
    template <typename Ptr2D> struct IntegerAreaFilter
    {
        typedef typename Ptr2D::elem_type elem_type;
        typedef float index_type;

        explicit __host__ __device__ __forceinline__ IntegerAreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
            : src(src_), scale_x(scale_x_), scale_y(scale_y_), scale(1.f / (scale_x * scale_y)) {}

        __device__ __forceinline__ elem_type operator ()(float y, float x) const
        {
            float fsx1 = x * scale_x;
            float fsx2 = fsx1 + scale_x;

            int sx1 = __float2int_ru(fsx1);
            int sx2 = __float2int_rd(fsx2);

            float fsy1 = y * scale_y;
            float fsy2 = fsy1 + scale_y;

            int sy1 = __float2int_ru(fsy1);
            int sy2 = __float2int_rd(fsy2);

            typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
            work_type out = VecTraits<work_type>::all(0.f);

            for(int dy = sy1; dy < sy2; ++dy)
                for(int dx = sx1; dx < sx2; ++dx)
                {
                    out = out + src(dy, dx) * scale;
                }

            return saturate_cast<elem_type>(out);
        }

        Ptr2D src;
        float scale_x, scale_y ,scale;
    };

    template <typename Ptr2D> struct AreaFilter
    {
        typedef typename Ptr2D::elem_type elem_type;
        typedef float index_type;

        explicit __host__ __device__ __forceinline__ AreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
            : src(src_), scale_x(scale_x_), scale_y(scale_y_){}

        __device__ __forceinline__ elem_type operator ()(float y, float x) const
        {
            float fsx1 = x * scale_x;
            float fsx2 = fsx1 + scale_x;

            int sx1 = __float2int_ru(fsx1);
            int sx2 = __float2int_rd(fsx2);

            float fsy1 = y * scale_y;
            float fsy2 = fsy1 + scale_y;

            int sy1 = __float2int_ru(fsy1);
            int sy2 = __float2int_rd(fsy2);

            float scale = 1.f / (fminf(scale_x, src.width - fsx1) * fminf(scale_y, src.height - fsy1));

            typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
            work_type out = VecTraits<work_type>::all(0.f);

            for (int dy = sy1; dy < sy2; ++dy)
            {
                for (int dx = sx1; dx < sx2; ++dx)
                    out = out + src(dy, dx) * scale;

                if (sx1 > fsx1)
                    out = out + src(dy, (sx1 -1) ) * ((sx1 - fsx1) * scale);

                if (sx2 < fsx2)
                    out = out + src(dy, sx2) * ((fsx2 -sx2) * scale);
            }

            if (sy1 > fsy1)
                for (int dx = sx1; dx < sx2; ++dx)
                    out = out + src( (sy1 - 1) , dx) * ((sy1 -fsy1) * scale);

            if (sy2 < fsy2)
                for (int dx = sx1; dx < sx2; ++dx)
                    out = out + src(sy2, dx) * ((fsy2 -sy2) * scale);

            if ((sy1 > fsy1) &&  (sx1 > fsx1))
                out = out + src( (sy1 - 1) , (sx1 - 1)) * ((sy1 -fsy1) * (sx1 -fsx1) * scale);

            if ((sy1 > fsy1) &&  (sx2 < fsx2))
                out = out + src( (sy1 - 1) , sx2) * ((sy1 -fsy1) * (fsx2 -sx2) * scale);

            if ((sy2 < fsy2) &&  (sx2 < fsx2))
                out = out + src(sy2, sx2) * ((fsy2 -sy2) * (fsx2 -sx2) * scale);

            if ((sy2 < fsy2) &&  (sx1 > fsx1))
                out = out + src(sy2, (sx1 - 1)) * ((fsy2 -sy2) * (sx1 -fsx1) * scale);

            return saturate_cast<elem_type>(out);
        }

        Ptr2D src;
        float scale_x, scale_y;
        int width, haight;
    };
 }}} // namespace cv { namespace cuda { namespace cudev

 //! @endcond

 #endif // OPENCV_CUDA_FILTERS_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/funcattrib.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/funcattrib.hpp
@@ -0,0 +1,79 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP
 #define OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP

 #include <cstdio>

 /** @file
 * @deprecated Use @ref cudev instead.
 */

 //! @cond IGNORED

 namespace cv { namespace cuda { namespace device
 {
    template<class Func>
    void printFuncAttrib(Func& func)
    {

        cudaFuncAttributes attrs;
        cudaFuncGetAttributes(&attrs, func);

        printf("=== Function stats ===\n");
        printf("Name: \n");
        printf("sharedSizeBytes    = %d\n", attrs.sharedSizeBytes);
        printf("constSizeBytes     = %d\n", attrs.constSizeBytes);
        printf("localSizeBytes     = %d\n", attrs.localSizeBytes);
        printf("maxThreadsPerBlock = %d\n", attrs.maxThreadsPerBlock);
        printf("numRegs            = %d\n", attrs.numRegs);
        printf("ptxVersion         = %d\n", attrs.ptxVersion);
        printf("binaryVersion      = %d\n", attrs.binaryVersion);
        printf("\n");
        fflush(stdout);
    }
 }}} // namespace cv { namespace cuda { namespace cudev

 //! @endcond

 #endif  /* OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP */
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/functional.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/functional.hpp
@@ -0,0 +1,805 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CUDA_FUNCTIONAL_HPP
 #define OPENCV_CUDA_FUNCTIONAL_HPP

 #include <functional>
 #include "saturate_cast.hpp"
 #include "vec_traits.hpp"
 #include "type_traits.hpp"

 /** @file
 * @deprecated Use @ref cudev instead.
 */

 //! @cond IGNORED

 namespace cv { namespace cuda { namespace device
 {
    // Function Objects
    template<typename Argument, typename Result> struct unary_function
    {
        typedef Argument argument_type;
        typedef Result result_type;
    };
    template<typename Argument1, typename Argument2, typename Result> struct binary_function
    {
        typedef Argument1 first_argument_type;
        typedef Argument2 second_argument_type;
        typedef Result result_type;
    };

    // Arithmetic Operations
    template <typename T> struct plus : binary_function<T, T, T>
    {
        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
                                                 typename TypeTraits<T>::ParameterType b) const
        {
            return a + b;
        }
        __host__ __device__ __forceinline__ plus() {}
        __host__ __device__ __forceinline__ plus(const plus&) {}
    };

    template <typename T> struct minus : binary_function<T, T, T>
    {
        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
                                                 typename TypeTraits<T>::ParameterType b) const
        {
            return a - b;
        }
        __host__ __device__ __forceinline__ minus() {}
        __host__ __device__ __forceinline__ minus(const minus&) {}
    };

    template <typename T> struct multiplies : binary_function<T, T, T>
    {
        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
                                                 typename TypeTraits<T>::ParameterType b) const
        {
            return a * b;
        }
        __host__ __device__ __forceinline__ multiplies() {}
        __host__ __device__ __forceinline__ multiplies(const multiplies&) {}
    };

    template <typename T> struct divides : binary_function<T, T, T>
    {
        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
                                                 typename TypeTraits<T>::ParameterType b) const
        {
            return a / b;
        }
        __host__ __device__ __forceinline__ divides() {}
        __host__ __device__ __forceinline__ divides(const divides&) {}
    };

    template <typename T> struct modulus : binary_function<T, T, T>
    {
        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
                                                 typename TypeTraits<T>::ParameterType b) const
        {
            return a % b;
        }
        __host__ __device__ __forceinline__ modulus() {}
        __host__ __device__ __forceinline__ modulus(const modulus&) {}
    };

    template <typename T> struct negate : unary_function<T, T>
    {
        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a) const
        {
            return -a;
        }
        __host__ __device__ __forceinline__ negate() {}
        __host__ __device__ __forceinline__ negate(const negate&) {}
    };

    // Comparison Operations
    template <typename T> struct equal_to : binary_function<T, T, bool>
    {
        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
                                                    typename TypeTraits<T>::ParameterType b) const
        {
            return a == b;
        }
        __host__ __device__ __forceinline__ equal_to() {}
        __host__ __device__ __forceinline__ equal_to(const equal_to&) {}
    };

    template <typename T> struct not_equal_to : binary_function<T, T, bool>
    {
        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
                                                    typename TypeTraits<T>::ParameterType b) const
        {
            return a != b;
        }
        __host__ __device__ __forceinline__ not_equal_to() {}
        __host__ __device__ __forceinline__ not_equal_to(const not_equal_to&) {}
    };

    template <typename T> struct greater : binary_function<T, T, bool>
    {
        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
                                                    typename TypeTraits<T>::ParameterType b) const
        {
            return a > b;
        }
        __host__ __device__ __forceinline__ greater() {}
        __host__ __device__ __forceinline__ greater(const greater&) {}
    };

    template <typename T> struct less : binary_function<T, T, bool>
    {
        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
                                                    typename TypeTraits<T>::ParameterType b) const
        {
            return a < b;
        }
        __host__ __device__ __forceinline__ less() {}
        __host__ __device__ __forceinline__ less(const less&) {}
    };

    template <typename T> struct greater_equal : binary_function<T, T, bool>
    {
        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
                                                    typename TypeTraits<T>::ParameterType b) const
        {
            return a >= b;
        }
        __host__ __device__ __forceinline__ greater_equal() {}
        __host__ __device__ __forceinline__ greater_equal(const greater_equal&) {}
    };

    template <typename T> struct less_equal : binary_function<T, T, bool>
    {
        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
                                                    typename TypeTraits<T>::ParameterType b) const
        {
            return a <= b;
        }
        __host__ __device__ __forceinline__ less_equal() {}
        __host__ __device__ __forceinline__ less_equal(const less_equal&) {}
    };

    // Logical Operations
    template <typename T> struct logical_and : binary_function<T, T, bool>
    {
        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
                                                    typename TypeTraits<T>::ParameterType b) const
        {
            return a && b;
        }
        __host__ __device__ __forceinline__ logical_and() {}
        __host__ __device__ __forceinline__ logical_and(const logical_and&) {}
    };

    template <typename T> struct logical_or : binary_function<T, T, bool>
    {
        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
                                                    typename TypeTraits<T>::ParameterType b) const
        {
            return a || b;
        }
        __host__ __device__ __forceinline__ logical_or() {}
        __host__ __device__ __forceinline__ logical_or(const logical_or&) {}
    };

    template <typename T> struct logical_not : unary_function<T, bool>
    {
        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a) const
        {
            return !a;
        }
        __host__ __device__ __forceinline__ logical_not() {}
        __host__ __device__ __forceinline__ logical_not(const logical_not&) {}
    };

    // Bitwise Operations
    template <typename T> struct bit_and : binary_function<T, T, T>
    {
        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
                                                 typename TypeTraits<T>::ParameterType b) const
        {
            return a & b;
        }
        __host__ __device__ __forceinline__ bit_and() {}
        __host__ __device__ __forceinline__ bit_and(const bit_and&) {}
    };

    template <typename T> struct bit_or : binary_function<T, T, T>
    {
        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
                                                 typename TypeTraits<T>::ParameterType b) const
        {
            return a | b;
        }
        __host__ __device__ __forceinline__ bit_or() {}
        __host__ __device__ __forceinline__ bit_or(const bit_or&) {}
    };

    template <typename T> struct bit_xor : binary_function<T, T, T>
    {
        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
                                                 typename TypeTraits<T>::ParameterType b) const
        {
            return a ^ b;
        }
        __host__ __device__ __forceinline__ bit_xor() {}
        __host__ __device__ __forceinline__ bit_xor(const bit_xor&) {}
    };

    template <typename T> struct bit_not : unary_function<T, T>
    {
        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType v) const
        {
            return ~v;
        }
        __host__ __device__ __forceinline__ bit_not() {}
        __host__ __device__ __forceinline__ bit_not(const bit_not&) {}
    };

    // Generalized Identity Operations
    template <typename T> struct identity : unary_function<T, T>
    {
        __device__ __forceinline__ typename TypeTraits<T>::ParameterType operator()(typename TypeTraits<T>::ParameterType x) const
        {
            return x;
        }
        __host__ __device__ __forceinline__ identity() {}
        __host__ __device__ __forceinline__ identity(const identity&) {}
    };

    template <typename T1, typename T2> struct project1st : binary_function<T1, T2, T1>
    {
        __device__ __forceinline__ typename TypeTraits<T1>::ParameterType operator()(typename TypeTraits<T1>::ParameterType lhs, typename TypeTraits<T2>::ParameterType rhs) const
        {
            return lhs;
        }
        __host__ __device__ __forceinline__ project1st() {}
        __host__ __device__ __forceinline__ project1st(const project1st&) {}
    };

    template <typename T1, typename T2> struct project2nd : binary_function<T1, T2, T2>
    {
        __device__ __forceinline__ typename TypeTraits<T2>::ParameterType operator()(typename TypeTraits<T1>::ParameterType lhs, typename TypeTraits<T2>::ParameterType rhs) const
        {
            return rhs;
        }
        __host__ __device__ __forceinline__ project2nd() {}
        __host__ __device__ __forceinline__ project2nd(const project2nd&) {}
    };

    // Min/Max Operations

 #define OPENCV_CUDA_IMPLEMENT_MINMAX(name, type, op) \
    template <> struct name<type> : binary_function<type, type, type> \
    { \
        __device__ __forceinline__ type operator()(type lhs, type rhs) const {return op(lhs, rhs);} \
        __host__ __device__ __forceinline__ name() {}\
        __host__ __device__ __forceinline__ name(const name&) {}\
    };

    template <typename T> struct maximum : binary_function<T, T, T>
    {
        __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType lhs, typename TypeTraits<T>::ParameterType rhs) const
        {
            return max(lhs, rhs);
        }
        __host__ __device__ __forceinline__ maximum() {}
        __host__ __device__ __forceinline__ maximum(const maximum&) {}
    };

    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, uchar, ::max)
    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, schar, ::max)
    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, char, ::max)
    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, ushort, ::max)
    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, short, ::max)
    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, int, ::max)
    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, uint, ::max)
    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, float, ::fmax)
    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, double, ::fmax)

    template <typename T> struct minimum : binary_function<T, T, T>
    {
        __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType lhs, typename TypeTraits<T>::ParameterType rhs) const
        {
            return min(lhs, rhs);
        }
        __host__ __device__ __forceinline__ minimum() {}
        __host__ __device__ __forceinline__ minimum(const minimum&) {}
    };

    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, uchar, ::min)
    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, schar, ::min)
    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, char, ::min)
    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, ushort, ::min)
    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, short, ::min)
    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, int, ::min)
    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, uint, ::min)
    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, float, ::fmin)
    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, double, ::fmin)

 #undef OPENCV_CUDA_IMPLEMENT_MINMAX

    // Math functions

    template <typename T> struct abs_func : unary_function<T, T>
    {
        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType x) const
        {
            return abs(x);
        }

        __host__ __device__ __forceinline__ abs_func() {}
        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
    };
    template <> struct abs_func<unsigned char> : unary_function<unsigned char, unsigned char>
    {
        __device__ __forceinline__ unsigned char operator ()(unsigned char x) const
        {
            return x;
        }

        __host__ __device__ __forceinline__ abs_func() {}
        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
    };
    template <> struct abs_func<signed char> : unary_function<signed char, signed char>
    {
        __device__ __forceinline__ signed char operator ()(signed char x) const
        {
            return ::abs((int)x);
        }

        __host__ __device__ __forceinline__ abs_func() {}
        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
    };
    template <> struct abs_func<char> : unary_function<char, char>
    {
        __device__ __forceinline__ char operator ()(char x) const
        {
            return ::abs((int)x);
        }

        __host__ __device__ __forceinline__ abs_func() {}
        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
    };
    template <> struct abs_func<unsigned short> : unary_function<unsigned short, unsigned short>
    {
        __device__ __forceinline__ unsigned short operator ()(unsigned short x) const
        {
            return x;
        }

        __host__ __device__ __forceinline__ abs_func() {}
        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
    };
    template <> struct abs_func<short> : unary_function<short, short>
    {
        __device__ __forceinline__ short operator ()(short x) const
        {
            return ::abs((int)x);
        }

        __host__ __device__ __forceinline__ abs_func() {}
        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
    };
    template <> struct abs_func<unsigned int> : unary_function<unsigned int, unsigned int>
    {
        __device__ __forceinline__ unsigned int operator ()(unsigned int x) const
        {
            return x;
        }

        __host__ __device__ __forceinline__ abs_func() {}
        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
    };
    template <> struct abs_func<int> : unary_function<int, int>
    {
        __device__ __forceinline__ int operator ()(int x) const
        {
            return ::abs(x);
        }

        __host__ __device__ __forceinline__ abs_func() {}
        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
    };
    template <> struct abs_func<float> : unary_function<float, float>
    {
        __device__ __forceinline__ float operator ()(float x) const
        {
            return ::fabsf(x);
        }

        __host__ __device__ __forceinline__ abs_func() {}
        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
    };
    template <> struct abs_func<double> : unary_function<double, double>
    {
        __device__ __forceinline__ double operator ()(double x) const
        {
            return ::fabs(x);
        }

        __host__ __device__ __forceinline__ abs_func() {}
        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
    };

 #define OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(name, func) \
    template <typename T> struct name ## _func : unary_function<T, float> \
    { \
        __device__ __forceinline__ float operator ()(typename TypeTraits<T>::ParameterType v) const \
        { \
            return func ## f(v); \
        } \
        __host__ __device__ __forceinline__ name ## _func() {} \
        __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
    }; \
    template <> struct name ## _func<double> : unary_function<double, double> \
    { \
        __device__ __forceinline__ double operator ()(double v) const \
        { \
            return func(v); \
        } \
        __host__ __device__ __forceinline__ name ## _func() {} \
        __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
    };

 #define OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(name, func) \
    template <typename T> struct name ## _func : binary_function<T, T, float> \
    { \
        __device__ __forceinline__ float operator ()(typename TypeTraits<T>::ParameterType v1, typename TypeTraits<T>::ParameterType v2) const \
        { \
            return func ## f(v1, v2); \
        } \
        __host__ __device__ __forceinline__ name ## _func() {} \
        __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
    }; \
    template <> struct name ## _func<double> : binary_function<double, double, double> \
    { \
        __device__ __forceinline__ double operator ()(double v1, double v2) const \
        { \
            return func(v1, v2); \
        } \
        __host__ __device__ __forceinline__ name ## _func() {} \
        __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
    };

    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sqrt, ::sqrt)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp, ::exp)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp2, ::exp2)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp10, ::exp10)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log, ::log)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log2, ::log2)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log10, ::log10)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sin, ::sin)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(cos, ::cos)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(tan, ::tan)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(asin, ::asin)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(acos, ::acos)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(atan, ::atan)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sinh, ::sinh)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(cosh, ::cosh)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(tanh, ::tanh)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(asinh, ::asinh)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(acosh, ::acosh)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(atanh, ::atanh)

    OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(hypot, ::hypot)
    OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(atan2, ::atan2)
    OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(pow, ::pow)

    #undef OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR
    #undef OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR_NO_DOUBLE
    #undef OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR

    template<typename T> struct hypot_sqr_func : binary_function<T, T, float>
    {
        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType src1, typename TypeTraits<T>::ParameterType src2) const
        {
            return src1 * src1 + src2 * src2;
        }
        __host__ __device__ __forceinline__ hypot_sqr_func() {}
        __host__ __device__ __forceinline__ hypot_sqr_func(const hypot_sqr_func&) {}
    };

    // Saturate Cast Functor
    template <typename T, typename D> struct saturate_cast_func : unary_function<T, D>
    {
        __device__ __forceinline__ D operator ()(typename TypeTraits<T>::ParameterType v) const
        {
            return saturate_cast<D>(v);
        }
        __host__ __device__ __forceinline__ saturate_cast_func() {}
        __host__ __device__ __forceinline__ saturate_cast_func(const saturate_cast_func&) {}
    };

    // Threshold Functors
    template <typename T> struct thresh_binary_func : unary_function<T, T>
    {
        __host__ __device__ __forceinline__ thresh_binary_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}

        __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
        {
            return (src > thresh) * maxVal;
        }

        __host__ __device__ __forceinline__ thresh_binary_func() {}
        __host__ __device__ __forceinline__ thresh_binary_func(const thresh_binary_func& other)
            : thresh(other.thresh), maxVal(other.maxVal) {}

        T thresh;
        T maxVal;
    };

    template <typename T> struct thresh_binary_inv_func : unary_function<T, T>
    {
        __host__ __device__ __forceinline__ thresh_binary_inv_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}

        __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
        {
            return (src <= thresh) * maxVal;
        }

        __host__ __device__ __forceinline__ thresh_binary_inv_func() {}
        __host__ __device__ __forceinline__ thresh_binary_inv_func(const thresh_binary_inv_func& other)
            : thresh(other.thresh), maxVal(other.maxVal) {}

        T thresh;
        T maxVal;
    };

    template <typename T> struct thresh_trunc_func : unary_function<T, T>
    {
        explicit __host__ __device__ __forceinline__ thresh_trunc_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {CV_UNUSED(maxVal_);}

        __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
        {
            return minimum<T>()(src, thresh);
        }

        __host__ __device__ __forceinline__ thresh_trunc_func() {}
        __host__ __device__ __forceinline__ thresh_trunc_func(const thresh_trunc_func& other)
            : thresh(other.thresh) {}

        T thresh;
    };

    template <typename T> struct thresh_to_zero_func : unary_function<T, T>
    {
        explicit __host__ __device__ __forceinline__ thresh_to_zero_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {CV_UNUSED(maxVal_);}

        __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
        {
            return (src > thresh) * src;
        }

        __host__ __device__ __forceinline__ thresh_to_zero_func() {}
       __host__  __device__ __forceinline__ thresh_to_zero_func(const thresh_to_zero_func& other)
            : thresh(other.thresh) {}

        T thresh;
    };

    template <typename T> struct thresh_to_zero_inv_func : unary_function<T, T>
    {
        explicit __host__ __device__ __forceinline__ thresh_to_zero_inv_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {CV_UNUSED(maxVal_);}

        __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
        {
            return (src <= thresh) * src;
        }

        __host__ __device__ __forceinline__ thresh_to_zero_inv_func() {}
        __host__ __device__ __forceinline__ thresh_to_zero_inv_func(const thresh_to_zero_inv_func& other)
            : thresh(other.thresh) {}

        T thresh;
    };

    // Function Object Adaptors
    template <typename Predicate> struct unary_negate : unary_function<typename Predicate::argument_type, bool>
    {
      explicit __host__ __device__ __forceinline__ unary_negate(const Predicate& p) : pred(p) {}

      __device__ __forceinline__ bool operator()(typename TypeTraits<typename Predicate::argument_type>::ParameterType x) const
      {
          return !pred(x);
      }

      __host__ __device__ __forceinline__ unary_negate() {}
      __host__ __device__ __forceinline__ unary_negate(const unary_negate& other) : pred(other.pred) {}

      Predicate pred;
    };

    template <typename Predicate> __host__ __device__ __forceinline__ unary_negate<Predicate> not1(const Predicate& pred)
    {
        return unary_negate<Predicate>(pred);
    }

    template <typename Predicate> struct binary_negate : binary_function<typename Predicate::first_argument_type, typename Predicate::second_argument_type, bool>
    {
        explicit __host__ __device__ __forceinline__ binary_negate(const Predicate& p) : pred(p) {}

        __device__ __forceinline__ bool operator()(typename TypeTraits<typename Predicate::first_argument_type>::ParameterType x,
                                                   typename TypeTraits<typename Predicate::second_argument_type>::ParameterType y) const
        {
            return !pred(x,y);
        }

        __host__ __device__ __forceinline__ binary_negate() {}
        __host__ __device__ __forceinline__ binary_negate(const binary_negate& other) : pred(other.pred) {}

        Predicate pred;
    };

    template <typename BinaryPredicate> __host__ __device__ __forceinline__ binary_negate<BinaryPredicate> not2(const BinaryPredicate& pred)
    {
        return binary_negate<BinaryPredicate>(pred);
    }

    template <typename Op> struct binder1st : unary_function<typename Op::second_argument_type, typename Op::result_type>
    {
        __host__ __device__ __forceinline__ binder1st(const Op& op_, const typename Op::first_argument_type& arg1_) : op(op_), arg1(arg1_) {}

        __device__ __forceinline__ typename Op::result_type operator ()(typename TypeTraits<typename Op::second_argument_type>::ParameterType a) const
        {
            return op(arg1, a);
        }

        __host__ __device__ __forceinline__ binder1st() {}
        __host__ __device__ __forceinline__ binder1st(const binder1st& other) : op(other.op), arg1(other.arg1) {}

        Op op;
        typename Op::first_argument_type arg1;
    };

    template <typename Op, typename T> __host__ __device__ __forceinline__ binder1st<Op> bind1st(const Op& op, const T& x)
    {
        return binder1st<Op>(op, typename Op::first_argument_type(x));
    }

    template <typename Op> struct binder2nd : unary_function<typename Op::first_argument_type, typename Op::result_type>
    {
        __host__ __device__ __forceinline__ binder2nd(const Op& op_, const typename Op::second_argument_type& arg2_) : op(op_), arg2(arg2_) {}

        __forceinline__ __device__ typename Op::result_type operator ()(typename TypeTraits<typename Op::first_argument_type>::ParameterType a) const
        {
            return op(a, arg2);
        }

        __host__ __device__ __forceinline__ binder2nd() {}
        __host__ __device__ __forceinline__ binder2nd(const binder2nd& other) : op(other.op), arg2(other.arg2) {}

        Op op;
        typename Op::second_argument_type arg2;
    };

    template <typename Op, typename T> __host__ __device__ __forceinline__ binder2nd<Op> bind2nd(const Op& op, const T& x)
    {
        return binder2nd<Op>(op, typename Op::second_argument_type(x));
    }

    // Functor Traits
    template <typename F> struct IsUnaryFunction
    {
        typedef char Yes;
        struct No {Yes a[2];};

        template <typename T, typename D> static Yes check(unary_function<T, D>);
        static No check(...);

        static F makeF();

        enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
    };

    template <typename F> struct IsBinaryFunction
    {
        typedef char Yes;
        struct No {Yes a[2];};

        template <typename T1, typename T2, typename D> static Yes check(binary_function<T1, T2, D>);
        static No check(...);

        static F makeF();

        enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
    };

    namespace functional_detail
    {
        template <size_t src_elem_size, size_t dst_elem_size> struct UnOpShift { enum { shift = 1 }; };
        template <size_t src_elem_size> struct UnOpShift<src_elem_size, 1> { enum { shift = 4 }; };
        template <size_t src_elem_size> struct UnOpShift<src_elem_size, 2> { enum { shift = 2 }; };

        template <typename T, typename D> struct DefaultUnaryShift
        {
            enum { shift = UnOpShift<sizeof(T), sizeof(D)>::shift };
        };

        template <size_t src_elem_size1, size_t src_elem_size2, size_t dst_elem_size> struct BinOpShift { enum { shift = 1 }; };
        template <size_t src_elem_size1, size_t src_elem_size2> struct BinOpShift<src_elem_size1, src_elem_size2, 1> { enum { shift = 4 }; };
        template <size_t src_elem_size1, size_t src_elem_size2> struct BinOpShift<src_elem_size1, src_elem_size2, 2> { enum { shift = 2 }; };

        template <typename T1, typename T2, typename D> struct DefaultBinaryShift
        {
            enum { shift = BinOpShift<sizeof(T1), sizeof(T2), sizeof(D)>::shift };
        };

        template <typename Func, bool unary = IsUnaryFunction<Func>::value> struct ShiftDispatcher;
        template <typename Func> struct ShiftDispatcher<Func, true>
        {
            enum { shift = DefaultUnaryShift<typename Func::argument_type, typename Func::result_type>::shift };
        };
        template <typename Func> struct ShiftDispatcher<Func, false>
        {
            enum { shift = DefaultBinaryShift<typename Func::first_argument_type, typename Func::second_argument_type, typename Func::result_type>::shift };
        };
    }

    template <typename Func> struct DefaultTransformShift
    {
        enum { shift = functional_detail::ShiftDispatcher<Func>::shift };
    };

    template <typename Func> struct DefaultTransformFunctorTraits
    {
        enum { simple_block_dim_x = 16 };
        enum { simple_block_dim_y = 16 };

        enum { smart_block_dim_x = 16 };
        enum { smart_block_dim_y = 16 };
        enum { smart_shift = DefaultTransformShift<Func>::shift };
    };

    template <typename Func> struct TransformFunctorTraits : DefaultTransformFunctorTraits<Func> {};

 #define OPENCV_CUDA_TRANSFORM_FUNCTOR_TRAITS(type) \
    template <> struct TransformFunctorTraits< type > : DefaultTransformFunctorTraits< type >
 }}} // namespace cv { namespace cuda { namespace cudev

 //! @endcond

 #endif // OPENCV_CUDA_FUNCTIONAL_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/limits.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/limits.hpp
@@ -0,0 +1,128 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CUDA_LIMITS_HPP
 #define OPENCV_CUDA_LIMITS_HPP

 #include <limits.h>
 #include <float.h>
 #include "common.hpp"

 /** @file
 * @deprecated Use @ref cudev instead.
 */

 //! @cond IGNORED

 namespace cv { namespace cuda { namespace device
 {
 template <class T> struct numeric_limits;

 template <> struct numeric_limits<bool>
 {
    __device__ __forceinline__ static bool min() { return false; }
    __device__ __forceinline__ static bool max() { return true;  }
    static const bool is_signed = false;
 };

 template <> struct numeric_limits<signed char>
 {
    __device__ __forceinline__ static signed char min() { return SCHAR_MIN; }
    __device__ __forceinline__ static signed char max() { return SCHAR_MAX; }
    static const bool is_signed = true;
 };

 template <> struct numeric_limits<unsigned char>
 {
    __device__ __forceinline__ static unsigned char min() { return 0; }
    __device__ __forceinline__ static unsigned char max() { return UCHAR_MAX; }
    static const bool is_signed = false;
 };

 template <> struct numeric_limits<short>
 {
    __device__ __forceinline__ static short min() { return SHRT_MIN; }
    __device__ __forceinline__ static short max() { return SHRT_MAX; }
    static const bool is_signed = true;
 };

 template <> struct numeric_limits<unsigned short>
 {
    __device__ __forceinline__ static unsigned short min() { return 0; }
    __device__ __forceinline__ static unsigned short max() { return USHRT_MAX; }
    static const bool is_signed = false;
 };

 template <> struct numeric_limits<int>
 {
    __device__ __forceinline__ static int min() { return INT_MIN; }
    __device__ __forceinline__ static int max() { return INT_MAX; }
    static const bool is_signed = true;
 };

 template <> struct numeric_limits<unsigned int>
 {
    __device__ __forceinline__ static unsigned int min() { return 0; }
    __device__ __forceinline__ static unsigned int max() { return UINT_MAX; }
    static const bool is_signed = false;
 };

 template <> struct numeric_limits<float>
 {
    __device__ __forceinline__ static float min() { return FLT_MIN; }
    __device__ __forceinline__ static float max() { return FLT_MAX; }
    __device__ __forceinline__ static float epsilon() { return FLT_EPSILON; }
    static const bool is_signed = true;
 };

 template <> struct numeric_limits<double>
 {
    __device__ __forceinline__ static double min() { return DBL_MIN; }
    __device__ __forceinline__ static double max() { return DBL_MAX; }
    __device__ __forceinline__ static double epsilon() { return DBL_EPSILON; }
    static const bool is_signed = true;
 };
 }}} // namespace cv { namespace cuda { namespace cudev {

 //! @endcond

 #endif // OPENCV_CUDA_LIMITS_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/reduce.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/reduce.hpp
@@ -0,0 +1,209 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CUDA_REDUCE_HPP
 #define OPENCV_CUDA_REDUCE_HPP

 #ifndef THRUST_DEBUG // eliminate -Wundef warning
 #define THRUST_DEBUG 0
 #endif

 #include <thrust/tuple.h>
 #include "detail/reduce.hpp"
 #include "detail/reduce_key_val.hpp"

 /** @file
 * @deprecated Use @ref cudev instead.
 */

 //! @cond IGNORED

 namespace cv { namespace cuda { namespace device
 {
    template <int N, typename T, class Op>
    __device__ __forceinline__ void reduce(volatile T* smem, T& val, unsigned int tid, const Op& op)
    {
        reduce_detail::Dispatcher<N>::reductor::template reduce<volatile T*, T&, const Op&>(smem, val, tid, op);
    }
    template <int N,
              typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
              typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
              class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
    __device__ __forceinline__ void reduce(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
                                           const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
                                           unsigned int tid,
                                           const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
    {
        reduce_detail::Dispatcher<N>::reductor::template reduce<
                const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>&,
                const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>&,
                const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>&>(smem, val, tid, op);
    }

    template <unsigned int N, typename K, typename V, class Cmp>
    __device__ __forceinline__ void reduceKeyVal(volatile K* skeys, K& key, volatile V* svals, V& val, unsigned int tid, const Cmp& cmp)
    {
        reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<volatile K*, K&, volatile V*, V&, const Cmp&>(skeys, key, svals, val, tid, cmp);
    }
    template <unsigned int N,
              typename K,
              typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
              typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
              class Cmp>
    __device__ __forceinline__ void reduceKeyVal(volatile K* skeys, K& key,
                                                 const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
                                                 const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
                                                 unsigned int tid, const Cmp& cmp)
    {
        reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<volatile K*, K&,
                const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&,
                const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&,
                const Cmp&>(skeys, key, svals, val, tid, cmp);
    }
    template <unsigned int N,
              typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9,
              typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
              typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
              typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
              class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
    __device__ __forceinline__ void reduceKeyVal(const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys,
                                                 const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
                                                 const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
                                                 const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
                                                 unsigned int tid,
                                                 const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp)
    {
        reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<
                const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>&,
                const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>&,
                const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&,
                const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&,
                const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>&
                >(skeys, key, svals, val, tid, cmp);
    }

    // smem_tuple

    template <typename T0>
    __device__ __forceinline__
    thrust::tuple<volatile T0*>
    smem_tuple(T0* t0)
    {
        return thrust::make_tuple((volatile T0*) t0);
    }

    template <typename T0, typename T1>
    __device__ __forceinline__
    thrust::tuple<volatile T0*, volatile T1*>
    smem_tuple(T0* t0, T1* t1)
    {
        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1);
    }

    template <typename T0, typename T1, typename T2>
    __device__ __forceinline__
    thrust::tuple<volatile T0*, volatile T1*, volatile T2*>
    smem_tuple(T0* t0, T1* t1, T2* t2)
    {
        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2);
    }

    template <typename T0, typename T1, typename T2, typename T3>
    __device__ __forceinline__
    thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*>
    smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3)
    {
        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3);
    }

    template <typename T0, typename T1, typename T2, typename T3, typename T4>
    __device__ __forceinline__
    thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*>
    smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4)
    {
        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4);
    }

    template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
    __device__ __forceinline__
    thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*>
    smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5)
    {
        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5);
    }

    template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
    __device__ __forceinline__
    thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*>
    smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6)
    {
        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6);
    }

    template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7>
    __device__ __forceinline__
    thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*>
    smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7)
    {
        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7);
    }

    template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8>
    __device__ __forceinline__
    thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*, volatile T8*>
    smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8)
    {
        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8);
    }

    template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9>
    __device__ __forceinline__
    thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*, volatile T8*, volatile T9*>
    smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8, T9* t9)
    {
        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8, (volatile T9*) t9);
    }
 }}}

 //! @endcond

 #endif // OPENCV_CUDA_REDUCE_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/saturate_cast.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/saturate_cast.hpp
@@ -0,0 +1,292 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CUDA_SATURATE_CAST_HPP
 #define OPENCV_CUDA_SATURATE_CAST_HPP

 #include "common.hpp"

 /** @file
 * @deprecated Use @ref cudev instead.
 */

 //! @cond IGNORED

 namespace cv { namespace cuda { namespace device
 {
    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uchar v) { return _Tp(v); }
    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(schar v) { return _Tp(v); }
    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(ushort v) { return _Tp(v); }
    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(short v) { return _Tp(v); }
    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uint v) { return _Tp(v); }
    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(int v) { return _Tp(v); }
    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(float v) { return _Tp(v); }
    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(double v) { return _Tp(v); }

    template<> __device__ __forceinline__ uchar saturate_cast<uchar>(schar v)
    {
        uint res = 0;
        int vi = v;
        asm("cvt.sat.u8.s8 %0, %1;" : "=r"(res) : "r"(vi));
        return res;
    }
    template<> __device__ __forceinline__ uchar saturate_cast<uchar>(short v)
    {
        uint res = 0;
        asm("cvt.sat.u8.s16 %0, %1;" : "=r"(res) : "h"(v));
        return res;
    }
    template<> __device__ __forceinline__ uchar saturate_cast<uchar>(ushort v)
    {
        uint res = 0;
        asm("cvt.sat.u8.u16 %0, %1;" : "=r"(res) : "h"(v));
        return res;
    }
    template<> __device__ __forceinline__ uchar saturate_cast<uchar>(int v)
    {
        uint res = 0;
        asm("cvt.sat.u8.s32 %0, %1;" : "=r"(res) : "r"(v));
        return res;
    }
    template<> __device__ __forceinline__ uchar saturate_cast<uchar>(uint v)
    {
        uint res = 0;
        asm("cvt.sat.u8.u32 %0, %1;" : "=r"(res) : "r"(v));
        return res;
    }
    template<> __device__ __forceinline__ uchar saturate_cast<uchar>(float v)
    {
        uint res = 0;
        asm("cvt.rni.sat.u8.f32 %0, %1;" : "=r"(res) : "f"(v));
        return res;
    }
    template<> __device__ __forceinline__ uchar saturate_cast<uchar>(double v)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
        uint res = 0;
        asm("cvt.rni.sat.u8.f64 %0, %1;" : "=r"(res) : "d"(v));
        return res;
    #else
        return saturate_cast<uchar>((float)v);
    #endif
    }

    template<> __device__ __forceinline__ schar saturate_cast<schar>(uchar v)
    {
        uint res = 0;
        uint vi = v;
        asm("cvt.sat.s8.u8 %0, %1;" : "=r"(res) : "r"(vi));
        return res;
    }
    template<> __device__ __forceinline__ schar saturate_cast<schar>(short v)
    {
        uint res = 0;
        asm("cvt.sat.s8.s16 %0, %1;" : "=r"(res) : "h"(v));
        return res;
    }
    template<> __device__ __forceinline__ schar saturate_cast<schar>(ushort v)
    {
        uint res = 0;
        asm("cvt.sat.s8.u16 %0, %1;" : "=r"(res) : "h"(v));
        return res;
    }
    template<> __device__ __forceinline__ schar saturate_cast<schar>(int v)
    {
        uint res = 0;
        asm("cvt.sat.s8.s32 %0, %1;" : "=r"(res) : "r"(v));
        return res;
    }
    template<> __device__ __forceinline__ schar saturate_cast<schar>(uint v)
    {
        uint res = 0;
        asm("cvt.sat.s8.u32 %0, %1;" : "=r"(res) : "r"(v));
        return res;
    }
    template<> __device__ __forceinline__ schar saturate_cast<schar>(float v)
    {
        uint res = 0;
        asm("cvt.rni.sat.s8.f32 %0, %1;" : "=r"(res) : "f"(v));
        return res;
    }
    template<> __device__ __forceinline__ schar saturate_cast<schar>(double v)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
        uint res = 0;
        asm("cvt.rni.sat.s8.f64 %0, %1;" : "=r"(res) : "d"(v));
        return res;
    #else
        return saturate_cast<schar>((float)v);
    #endif
    }

    template<> __device__ __forceinline__ ushort saturate_cast<ushort>(schar v)
    {
        ushort res = 0;
        int vi = v;
        asm("cvt.sat.u16.s8 %0, %1;" : "=h"(res) : "r"(vi));
        return res;
    }
    template<> __device__ __forceinline__ ushort saturate_cast<ushort>(short v)
    {
        ushort res = 0;
        asm("cvt.sat.u16.s16 %0, %1;" : "=h"(res) : "h"(v));
        return res;
    }
    template<> __device__ __forceinline__ ushort saturate_cast<ushort>(int v)
    {
        ushort res = 0;
        asm("cvt.sat.u16.s32 %0, %1;" : "=h"(res) : "r"(v));
        return res;
    }
    template<> __device__ __forceinline__ ushort saturate_cast<ushort>(uint v)
    {
        ushort res = 0;
        asm("cvt.sat.u16.u32 %0, %1;" : "=h"(res) : "r"(v));
        return res;
    }
    template<> __device__ __forceinline__ ushort saturate_cast<ushort>(float v)
    {
        ushort res = 0;
        asm("cvt.rni.sat.u16.f32 %0, %1;" : "=h"(res) : "f"(v));
        return res;
    }
    template<> __device__ __forceinline__ ushort saturate_cast<ushort>(double v)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
        ushort res = 0;
        asm("cvt.rni.sat.u16.f64 %0, %1;" : "=h"(res) : "d"(v));
        return res;
    #else
        return saturate_cast<ushort>((float)v);
    #endif
    }

    template<> __device__ __forceinline__ short saturate_cast<short>(ushort v)
    {
        short res = 0;
        asm("cvt.sat.s16.u16 %0, %1;" : "=h"(res) : "h"(v));
        return res;
    }
    template<> __device__ __forceinline__ short saturate_cast<short>(int v)
    {
        short res = 0;
        asm("cvt.sat.s16.s32 %0, %1;" : "=h"(res) : "r"(v));
        return res;
    }
    template<> __device__ __forceinline__ short saturate_cast<short>(uint v)
    {
        short res = 0;
        asm("cvt.sat.s16.u32 %0, %1;" : "=h"(res) : "r"(v));
        return res;
    }
    template<> __device__ __forceinline__ short saturate_cast<short>(float v)
    {
        short res = 0;
        asm("cvt.rni.sat.s16.f32 %0, %1;" : "=h"(res) : "f"(v));
        return res;
    }
    template<> __device__ __forceinline__ short saturate_cast<short>(double v)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
        short res = 0;
        asm("cvt.rni.sat.s16.f64 %0, %1;" : "=h"(res) : "d"(v));
        return res;
    #else
        return saturate_cast<short>((float)v);
    #endif
    }

    template<> __device__ __forceinline__ int saturate_cast<int>(uint v)
    {
        int res = 0;
        asm("cvt.sat.s32.u32 %0, %1;" : "=r"(res) : "r"(v));
        return res;
    }
    template<> __device__ __forceinline__ int saturate_cast<int>(float v)
    {
        return __float2int_rn(v);
    }
    template<> __device__ __forceinline__ int saturate_cast<int>(double v)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
        return __double2int_rn(v);
    #else
        return saturate_cast<int>((float)v);
    #endif
    }

    template<> __device__ __forceinline__ uint saturate_cast<uint>(schar v)
    {
        uint res = 0;
        int vi = v;
        asm("cvt.sat.u32.s8 %0, %1;" : "=r"(res) : "r"(vi));
        return res;
    }
    template<> __device__ __forceinline__ uint saturate_cast<uint>(short v)
    {
        uint res = 0;
        asm("cvt.sat.u32.s16 %0, %1;" : "=r"(res) : "h"(v));
        return res;
    }
    template<> __device__ __forceinline__ uint saturate_cast<uint>(int v)
    {
        uint res = 0;
        asm("cvt.sat.u32.s32 %0, %1;" : "=r"(res) : "r"(v));
        return res;
    }
    template<> __device__ __forceinline__ uint saturate_cast<uint>(float v)
    {
        return __float2uint_rn(v);
    }
    template<> __device__ __forceinline__ uint saturate_cast<uint>(double v)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
        return __double2uint_rn(v);
    #else
        return saturate_cast<uint>((float)v);
    #endif
    }
 }}}

 //! @endcond

 #endif /* OPENCV_CUDA_SATURATE_CAST_HPP */
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/scan.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/scan.hpp
@@ -0,0 +1,258 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CUDA_SCAN_HPP
 #define OPENCV_CUDA_SCAN_HPP

 #include "opencv2/core/cuda/common.hpp"
 #include "opencv2/core/cuda/utility.hpp"
 #include "opencv2/core/cuda/warp.hpp"
 #include "opencv2/core/cuda/warp_shuffle.hpp"

 /** @file
 * @deprecated Use @ref cudev instead.
 */

 //! @cond IGNORED

 namespace cv { namespace cuda { namespace device
 {
    enum ScanKind { EXCLUSIVE = 0,  INCLUSIVE = 1 };

    template <ScanKind Kind, typename T, typename F> struct WarpScan
    {
        __device__ __forceinline__ WarpScan() {}
        __device__ __forceinline__ WarpScan(const WarpScan& other) { CV_UNUSED(other); }

        __device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx)
        {
            const unsigned int lane = idx & 31;
            F op;

            if ( lane >=  1) ptr [idx ] = op(ptr [idx -  1], ptr [idx]);
            if ( lane >=  2) ptr [idx ] = op(ptr [idx -  2], ptr [idx]);
            if ( lane >=  4) ptr [idx ] = op(ptr [idx -  4], ptr [idx]);
            if ( lane >=  8) ptr [idx ] = op(ptr [idx -  8], ptr [idx]);
            if ( lane >= 16) ptr [idx ] = op(ptr [idx - 16], ptr [idx]);

            if( Kind == INCLUSIVE )
                return ptr [idx];
            else
                return (lane > 0) ? ptr [idx - 1] : 0;
        }

        __device__ __forceinline__ unsigned int index(const unsigned int tid)
        {
            return tid;
        }

        __device__ __forceinline__ void init(volatile T *ptr){}

        static const int warp_offset      = 0;

        typedef WarpScan<INCLUSIVE, T, F>  merge;
    };

    template <ScanKind Kind , typename T, typename F> struct WarpScanNoComp
    {
        __device__ __forceinline__ WarpScanNoComp() {}
        __device__ __forceinline__ WarpScanNoComp(const WarpScanNoComp& other) { CV_UNUSED(other); }

        __device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx)
        {
            const unsigned int lane = threadIdx.x & 31;
            F op;

            ptr [idx ] = op(ptr [idx -  1], ptr [idx]);
            ptr [idx ] = op(ptr [idx -  2], ptr [idx]);
            ptr [idx ] = op(ptr [idx -  4], ptr [idx]);
            ptr [idx ] = op(ptr [idx -  8], ptr [idx]);
            ptr [idx ] = op(ptr [idx - 16], ptr [idx]);

            if( Kind == INCLUSIVE )
                return ptr [idx];
            else
                return (lane > 0) ? ptr [idx - 1] : 0;
        }

        __device__ __forceinline__ unsigned int index(const unsigned int tid)
        {
            return (tid >> warp_log) * warp_smem_stride + 16 + (tid & warp_mask);
        }

        __device__ __forceinline__ void init(volatile T *ptr)
        {
            ptr[threadIdx.x] = 0;
        }

        static const int warp_smem_stride = 32 + 16 + 1;
        static const int warp_offset      = 16;
        static const int warp_log         = 5;
        static const int warp_mask        = 31;

        typedef WarpScanNoComp<INCLUSIVE, T, F> merge;
    };

    template <ScanKind Kind , typename T, typename Sc, typename F> struct BlockScan
    {
        __device__ __forceinline__ BlockScan() {}
        __device__ __forceinline__ BlockScan(const BlockScan& other) { CV_UNUSED(other); }

        __device__ __forceinline__ T operator()(volatile T *ptr)
        {
            const unsigned int tid  = threadIdx.x;
            const unsigned int lane = tid & warp_mask;
            const unsigned int warp = tid >> warp_log;

            Sc scan;
            typename Sc::merge merge_scan;
            const unsigned int idx = scan.index(tid);

            T val = scan(ptr, idx);
            __syncthreads ();

            if( warp == 0)
                scan.init(ptr);
            __syncthreads ();

            if( lane == 31 )
                ptr [scan.warp_offset + warp ] = (Kind == INCLUSIVE) ? val : ptr [idx];
            __syncthreads ();

            if( warp == 0 )
                merge_scan(ptr, idx);
            __syncthreads();

            if ( warp > 0)
                val = ptr [scan.warp_offset + warp - 1] + val;
            __syncthreads ();

            ptr[idx] = val;
            __syncthreads ();

            return val ;
        }

        static const int warp_log  = 5;
        static const int warp_mask = 31;
    };

    template <typename T>
    __device__ T warpScanInclusive(T idata, volatile T* s_Data, unsigned int tid)
    {
    #if __CUDA_ARCH__ >= 300
        const unsigned int laneId = cv::cuda::device::Warp::laneId();

        // scan on shuffl functions
        #pragma unroll
        for (int i = 1; i <= (OPENCV_CUDA_WARP_SIZE / 2); i *= 2)
        {
            const T n = cv::cuda::device::shfl_up(idata, i);
            if (laneId >= i)
                  idata += n;
        }

        return idata;
    #else
        unsigned int pos = 2 * tid - (tid & (OPENCV_CUDA_WARP_SIZE - 1));
        s_Data[pos] = 0;
        pos += OPENCV_CUDA_WARP_SIZE;
        s_Data[pos] = idata;

        s_Data[pos] += s_Data[pos - 1];
        s_Data[pos] += s_Data[pos - 2];
        s_Data[pos] += s_Data[pos - 4];
        s_Data[pos] += s_Data[pos - 8];
        s_Data[pos] += s_Data[pos - 16];

        return s_Data[pos];
    #endif
    }

    template <typename T>
    __device__ __forceinline__ T warpScanExclusive(T idata, volatile T* s_Data, unsigned int tid)
    {
        return warpScanInclusive(idata, s_Data, tid) - idata;
    }

    template <int tiNumScanThreads, typename T>
    __device__ T blockScanInclusive(T idata, volatile T* s_Data, unsigned int tid)
    {
        if (tiNumScanThreads > OPENCV_CUDA_WARP_SIZE)
        {
            //Bottom-level inclusive warp scan
            T warpResult = warpScanInclusive(idata, s_Data, tid);

            //Save top elements of each warp for exclusive warp scan
            //sync to wait for warp scans to complete (because s_Data is being overwritten)
            __syncthreads();
            if ((tid & (OPENCV_CUDA_WARP_SIZE - 1)) == (OPENCV_CUDA_WARP_SIZE - 1))
            {
                s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE] = warpResult;
            }

            //wait for warp scans to complete
            __syncthreads();

            if (tid < (tiNumScanThreads / OPENCV_CUDA_WARP_SIZE) )
            {
                //grab top warp elements
                T val = s_Data[tid];
                //calculate exclusive scan and write back to shared memory
                s_Data[tid] = warpScanExclusive(val, s_Data, tid);
            }

            //return updated warp scans with exclusive scan results
            __syncthreads();

            return warpResult + s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE];
        }
        else
        {
            return warpScanInclusive(idata, s_Data, tid);
        }
    }
 }}}

 //! @endcond

 #endif // OPENCV_CUDA_SCAN_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/simd_functions.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/simd_functions.hpp
@@ -0,0 +1,869 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 /*
 * Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   Redistributions of source code must retain the above copyright notice,
 *   this list of conditions and the following disclaimer.
 *
 *   Redistributions in binary form must reproduce the above copyright notice,
 *   this list of conditions and the following disclaimer in the documentation
 *   and/or other materials provided with the distribution.
 *
 *   Neither the name of NVIDIA Corporation nor the names of its contributors
 *   may be used to endorse or promote products derived from this software
 *   without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

 #ifndef OPENCV_CUDA_SIMD_FUNCTIONS_HPP
 #define OPENCV_CUDA_SIMD_FUNCTIONS_HPP

 #include "common.hpp"

 /** @file
 * @deprecated Use @ref cudev instead.
 */

 //! @cond IGNORED

 namespace cv { namespace cuda { namespace device
 {
    // 2

    static __device__ __forceinline__ unsigned int vadd2(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;

    #if __CUDA_ARCH__ >= 300
        asm("vadd2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #elif __CUDA_ARCH__ >= 200
        asm("vadd.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vadd.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int s;
        s = a ^ b;          // sum bits
        r = a + b;          // actual sum
        s = s ^ r;          // determine carry-ins for each bit position
        s = s & 0x00010000; // carry-in to high word (= carry-out from low word)
        r = r - s;          // subtract out carry-out from low word
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vsub2(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;

    #if __CUDA_ARCH__ >= 300
        asm("vsub2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #elif __CUDA_ARCH__ >= 200
        asm("vsub.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vsub.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int s;
        s = a ^ b;          // sum bits
        r = a - b;          // actual sum
        s = s ^ r;          // determine carry-ins for each bit position
        s = s & 0x00010000; // borrow to high word
        r = r + s;          // compensate for borrow from low word
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vabsdiff2(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;

    #if __CUDA_ARCH__ >= 300
        asm("vabsdiff2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #elif __CUDA_ARCH__ >= 200
        asm("vabsdiff.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vabsdiff.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int s, t, u, v;
        s = a & 0x0000ffff; // extract low halfword
        r = b & 0x0000ffff; // extract low halfword
        u = ::max(r, s);    // maximum of low halfwords
        v = ::min(r, s);    // minimum of low halfwords
        s = a & 0xffff0000; // extract high halfword
        r = b & 0xffff0000; // extract high halfword
        t = ::max(r, s);    // maximum of high halfwords
        s = ::min(r, s);    // minimum of high halfwords
        r = u | t;          // maximum of both halfwords
        s = v | s;          // minimum of both halfwords
        r = r - s;          // |a - b| = max(a,b) - min(a,b);
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vavg2(unsigned int a, unsigned int b)
    {
        unsigned int r, s;

        // HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==>
        // (a + b) / 2 = (a & b) + ((a ^ b) >> 1)
        s = a ^ b;
        r = a & b;
        s = s & 0xfffefffe; // ensure shift doesn't cross halfword boundaries
        s = s >> 1;
        s = r + s;

        return s;
    }

    static __device__ __forceinline__ unsigned int vavrg2(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;

    #if __CUDA_ARCH__ >= 300
        asm("vavrg2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        // HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==>
        // (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1)
        unsigned int s;
        s = a ^ b;
        r = a | b;
        s = s & 0xfffefffe; // ensure shift doesn't cross half-word boundaries
        s = s >> 1;
        r = r - s;
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vseteq2(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;

    #if __CUDA_ARCH__ >= 300
        asm("vset2.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        // inspired by Alan Mycroft's null-byte detection algorithm:
        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
        unsigned int c;
        r = a ^ b;          // 0x0000 if a == b
        c = r | 0x80008000; // set msbs, to catch carry out
        r = r ^ c;          // extract msbs, msb = 1 if r < 0x8000
        c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
        c = r & ~c;         // msb = 1, if r was 0x0000
        r = c >> 15;        // convert to bool
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vcmpeq2(unsigned int a, unsigned int b)
    {
        unsigned int r, c;

    #if __CUDA_ARCH__ >= 300
        r = vseteq2(a, b);
        c = r << 16;        // convert bool
        r = c - r;          //  into mask
    #else
        // inspired by Alan Mycroft's null-byte detection algorithm:
        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
        r = a ^ b;          // 0x0000 if a == b
        c = r | 0x80008000; // set msbs, to catch carry out
        r = r ^ c;          // extract msbs, msb = 1 if r < 0x8000
        c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
        c = r & ~c;         // msb = 1, if r was 0x0000
        r = c >> 15;        // convert
        r = c - r;          //  msbs to
        r = c | r;          //   mask
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vsetge2(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;

    #if __CUDA_ARCH__ >= 300
        asm("vset2.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int c;
        asm("not.b32 %0, %0;" : "+r"(b));
        c = vavrg2(a, b);   // (a + ~b + 1) / 2 = (a - b) / 2
        c = c & 0x80008000; // msb = carry-outs
        r = c >> 15;        // convert to bool
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vcmpge2(unsigned int a, unsigned int b)
    {
        unsigned int r, c;

    #if __CUDA_ARCH__ >= 300
        r = vsetge2(a, b);
        c = r << 16;        // convert bool
        r = c - r;          //  into mask
    #else
        asm("not.b32 %0, %0;" : "+r"(b));
        c = vavrg2(a, b);   // (a + ~b + 1) / 2 = (a - b) / 2
        c = c & 0x80008000; // msb = carry-outs
        r = c >> 15;        // convert
        r = c - r;          //  msbs to
        r = c | r;          //   mask
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vsetgt2(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;

    #if __CUDA_ARCH__ >= 300
        asm("vset2.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int c;
        asm("not.b32 %0, %0;" : "+r"(b));
        c = vavg2(a, b);    // (a + ~b) / 2 = (a - b) / 2 [rounded down]
        c = c & 0x80008000; // msbs = carry-outs
        r = c >> 15;        // convert to bool
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vcmpgt2(unsigned int a, unsigned int b)
    {
        unsigned int r, c;

    #if __CUDA_ARCH__ >= 300
        r = vsetgt2(a, b);
        c = r << 16;        // convert bool
        r = c - r;          //  into mask
    #else
        asm("not.b32 %0, %0;" : "+r"(b));
        c = vavg2(a, b);    // (a + ~b) / 2 = (a - b) / 2 [rounded down]
        c = c & 0x80008000; // msbs = carry-outs
        r = c >> 15;        // convert
        r = c - r;          //  msbs to
        r = c | r;          //   mask
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vsetle2(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;

    #if __CUDA_ARCH__ >= 300
        asm("vset2.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int c;
        asm("not.b32 %0, %0;" : "+r"(a));
        c = vavrg2(a, b);   // (b + ~a + 1) / 2 = (b - a) / 2
        c = c & 0x80008000; // msb = carry-outs
        r = c >> 15;        // convert to bool
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vcmple2(unsigned int a, unsigned int b)
    {
        unsigned int r, c;

    #if __CUDA_ARCH__ >= 300
        r = vsetle2(a, b);
        c = r << 16;        // convert bool
        r = c - r;          //  into mask
    #else
        asm("not.b32 %0, %0;" : "+r"(a));
        c = vavrg2(a, b);   // (b + ~a + 1) / 2 = (b - a) / 2
        c = c & 0x80008000; // msb = carry-outs
        r = c >> 15;        // convert
        r = c - r;          //  msbs to
        r = c | r;          //   mask
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vsetlt2(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;

    #if __CUDA_ARCH__ >= 300
        asm("vset2.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int c;
        asm("not.b32 %0, %0;" : "+r"(a));
        c = vavg2(a, b);    // (b + ~a) / 2 = (b - a) / 2 [rounded down]
        c = c & 0x80008000; // msb = carry-outs
        r = c >> 15;        // convert to bool
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vcmplt2(unsigned int a, unsigned int b)
    {
        unsigned int r, c;

    #if __CUDA_ARCH__ >= 300
        r = vsetlt2(a, b);
        c = r << 16;        // convert bool
        r = c - r;          //  into mask
    #else
        asm("not.b32 %0, %0;" : "+r"(a));
        c = vavg2(a, b);    // (b + ~a) / 2 = (b - a) / 2 [rounded down]
        c = c & 0x80008000; // msb = carry-outs
        r = c >> 15;        // convert
        r = c - r;          //  msbs to
        r = c | r;          //   mask
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vsetne2(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;

    #if __CUDA_ARCH__ >= 300
        asm ("vset2.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        // inspired by Alan Mycroft's null-byte detection algorithm:
        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
        unsigned int c;
        r = a ^ b;          // 0x0000 if a == b
        c = r | 0x80008000; // set msbs, to catch carry out
        c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
        c = r | c;          // msb = 1, if r was not 0x0000
        c = c & 0x80008000; // extract msbs
        r = c >> 15;        // convert to bool
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vcmpne2(unsigned int a, unsigned int b)
    {
        unsigned int r, c;

    #if __CUDA_ARCH__ >= 300
        r = vsetne2(a, b);
        c = r << 16;        // convert bool
        r = c - r;          //  into mask
    #else
        // inspired by Alan Mycroft's null-byte detection algorithm:
        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
        r = a ^ b;          // 0x0000 if a == b
        c = r | 0x80008000; // set msbs, to catch carry out
        c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
        c = r | c;          // msb = 1, if r was not 0x0000
        c = c & 0x80008000; // extract msbs
        r = c >> 15;        // convert
        r = c - r;          //  msbs to
        r = c | r;          //   mask
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vmax2(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;

    #if __CUDA_ARCH__ >= 300
        asm("vmax2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #elif __CUDA_ARCH__ >= 200
        asm("vmax.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vmax.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int s, t, u;
        r = a & 0x0000ffff; // extract low halfword
        s = b & 0x0000ffff; // extract low halfword
        t = ::max(r, s);    // maximum of low halfwords
        r = a & 0xffff0000; // extract high halfword
        s = b & 0xffff0000; // extract high halfword
        u = ::max(r, s);    // maximum of high halfwords
        r = t | u;          // combine halfword maximums
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vmin2(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;

    #if __CUDA_ARCH__ >= 300
        asm("vmin2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #elif __CUDA_ARCH__ >= 200
        asm("vmin.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vmin.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int s, t, u;
        r = a & 0x0000ffff; // extract low halfword
        s = b & 0x0000ffff; // extract low halfword
        t = ::min(r, s);    // minimum of low halfwords
        r = a & 0xffff0000; // extract high halfword
        s = b & 0xffff0000; // extract high halfword
        u = ::min(r, s);    // minimum of high halfwords
        r = t | u;          // combine halfword minimums
    #endif

        return r;
    }

    // 4

    static __device__ __forceinline__ unsigned int vadd4(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;

    #if __CUDA_ARCH__ >= 300
        asm("vadd4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #elif __CUDA_ARCH__ >= 200
        asm("vadd.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vadd.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vadd.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vadd.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int s, t;
        s = a ^ b;          // sum bits
        r = a & 0x7f7f7f7f; // clear msbs
        t = b & 0x7f7f7f7f; // clear msbs
        s = s & 0x80808080; // msb sum bits
        r = r + t;          // add without msbs, record carry-out in msbs
        r = r ^ s;          // sum of msb sum and carry-in bits, w/o carry-out
    #endif /* __CUDA_ARCH__ >= 300 */

        return r;
    }

    static __device__ __forceinline__ unsigned int vsub4(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;

    #if __CUDA_ARCH__ >= 300
        asm("vsub4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #elif __CUDA_ARCH__ >= 200
        asm("vsub.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vsub.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vsub.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vsub.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int s, t;
        s = a ^ ~b;         // inverted sum bits
        r = a | 0x80808080; // set msbs
        t = b & 0x7f7f7f7f; // clear msbs
        s = s & 0x80808080; // inverted msb sum bits
        r = r - t;          // subtract w/o msbs, record inverted borrows in msb
        r = r ^ s;          // combine inverted msb sum bits and borrows
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vavg4(unsigned int a, unsigned int b)
    {
        unsigned int r, s;

        // HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==>
        // (a + b) / 2 = (a & b) + ((a ^ b) >> 1)
        s = a ^ b;
        r = a & b;
        s = s & 0xfefefefe; // ensure following shift doesn't cross byte boundaries
        s = s >> 1;
        s = r + s;

        return s;
    }

    static __device__ __forceinline__ unsigned int vavrg4(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;

    #if __CUDA_ARCH__ >= 300
        asm("vavrg4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        // HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==>
        // (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1)
        unsigned int c;
        c = a ^ b;
        r = a | b;
        c = c & 0xfefefefe; // ensure following shift doesn't cross byte boundaries
        c = c >> 1;
        r = r - c;
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vseteq4(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;

    #if __CUDA_ARCH__ >= 300
        asm("vset4.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        // inspired by Alan Mycroft's null-byte detection algorithm:
        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
        unsigned int c;
        r = a ^ b;          // 0x00 if a == b
        c = r | 0x80808080; // set msbs, to catch carry out
        r = r ^ c;          // extract msbs, msb = 1 if r < 0x80
        c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
        c = r & ~c;         // msb = 1, if r was 0x00
        r = c >> 7;         // convert to bool
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vcmpeq4(unsigned int a, unsigned int b)
    {
        unsigned int r, t;

    #if __CUDA_ARCH__ >= 300
        r = vseteq4(a, b);
        t = r << 8;         // convert bool
        r = t - r;          //  to mask
    #else
        // inspired by Alan Mycroft's null-byte detection algorithm:
        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
        t = a ^ b;          // 0x00 if a == b
        r = t | 0x80808080; // set msbs, to catch carry out
        t = t ^ r;          // extract msbs, msb = 1 if t < 0x80
        r = r - 0x01010101; // msb = 0, if t was 0x00 or 0x80
        r = t & ~r;         // msb = 1, if t was 0x00
        t = r >> 7;         // build mask
        t = r - t;          //  from
        r = t | r;          //   msbs
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vsetle4(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;

    #if __CUDA_ARCH__ >= 300
        asm("vset4.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int c;
        asm("not.b32 %0, %0;" : "+r"(a));
        c = vavrg4(a, b);   // (b + ~a + 1) / 2 = (b - a) / 2
        c = c & 0x80808080; // msb = carry-outs
        r = c >> 7;         // convert to bool
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vcmple4(unsigned int a, unsigned int b)
    {
        unsigned int r, c;

    #if __CUDA_ARCH__ >= 300
        r = vsetle4(a, b);
        c = r << 8;         // convert bool
        r = c - r;          //  to mask
    #else
        asm("not.b32 %0, %0;" : "+r"(a));
        c = vavrg4(a, b);   // (b + ~a + 1) / 2 = (b - a) / 2
        c = c & 0x80808080; // msbs = carry-outs
        r = c >> 7;         // convert
        r = c - r;          //  msbs to
        r = c | r;          //   mask
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vsetlt4(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;

    #if __CUDA_ARCH__ >= 300
        asm("vset4.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int c;
        asm("not.b32 %0, %0;" : "+r"(a));
        c = vavg4(a, b);    // (b + ~a) / 2 = (b - a) / 2 [rounded down]
        c = c & 0x80808080; // msb = carry-outs
        r = c >> 7;         // convert to bool
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vcmplt4(unsigned int a, unsigned int b)
    {
        unsigned int r, c;

    #if __CUDA_ARCH__ >= 300
        r = vsetlt4(a, b);
        c = r << 8;         // convert bool
        r = c - r;          //  to mask
    #else
        asm("not.b32 %0, %0;" : "+r"(a));
        c = vavg4(a, b);    // (b + ~a) / 2 = (b - a) / 2 [rounded down]
        c = c & 0x80808080; // msbs = carry-outs
        r = c >> 7;         // convert
        r = c - r;          //  msbs to
        r = c | r;          //   mask
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vsetge4(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;

    #if __CUDA_ARCH__ >= 300
        asm("vset4.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int c;
        asm("not.b32 %0, %0;" : "+r"(b));
        c = vavrg4(a, b);   // (a + ~b + 1) / 2 = (a - b) / 2
        c = c & 0x80808080; // msb = carry-outs
        r = c >> 7;         // convert to bool
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vcmpge4(unsigned int a, unsigned int b)
    {
        unsigned int r, s;

    #if __CUDA_ARCH__ >= 300
        r = vsetge4(a, b);
        s = r << 8;         // convert bool
        r = s - r;          //  to mask
    #else
        asm ("not.b32 %0,%0;" : "+r"(b));
        r = vavrg4 (a, b);  // (a + ~b + 1) / 2 = (a - b) / 2
        r = r & 0x80808080; // msb = carry-outs
        s = r >> 7;         // build mask
        s = r - s;          //  from
        r = s | r;          //   msbs
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vsetgt4(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;

    #if __CUDA_ARCH__ >= 300
        asm("vset4.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int c;
        asm("not.b32 %0, %0;" : "+r"(b));
        c = vavg4(a, b);    // (a + ~b) / 2 = (a - b) / 2 [rounded down]
        c = c & 0x80808080; // msb = carry-outs
        r = c >> 7;         // convert to bool
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vcmpgt4(unsigned int a, unsigned int b)
    {
        unsigned int r, c;

    #if __CUDA_ARCH__ >= 300
        r = vsetgt4(a, b);
        c = r << 8;         // convert bool
        r = c - r;          //  to mask
    #else
        asm("not.b32 %0, %0;" : "+r"(b));
        c = vavg4(a, b);    // (a + ~b) / 2 = (a - b) / 2 [rounded down]
        c = c & 0x80808080; // msb = carry-outs
        r = c >> 7;         // convert
        r = c - r;          //  msbs to
        r = c | r;          //   mask
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vsetne4(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;

    #if __CUDA_ARCH__ >= 300
        asm("vset4.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        // inspired by Alan Mycroft's null-byte detection algorithm:
        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
        unsigned int c;
        r = a ^ b;          // 0x00 if a == b
        c = r | 0x80808080; // set msbs, to catch carry out
        c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
        c = r | c;          // msb = 1, if r was not 0x00
        c = c & 0x80808080; // extract msbs
        r = c >> 7;         // convert to bool
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vcmpne4(unsigned int a, unsigned int b)
    {
        unsigned int r, c;

    #if __CUDA_ARCH__ >= 300
        r = vsetne4(a, b);
        c = r << 8;         // convert bool
        r = c - r;          //  to mask
    #else
        // inspired by Alan Mycroft's null-byte detection algorithm:
        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
        r = a ^ b;          // 0x00 if a == b
        c = r | 0x80808080; // set msbs, to catch carry out
        c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
        c = r | c;          // msb = 1, if r was not 0x00
        c = c & 0x80808080; // extract msbs
        r = c >> 7;         // convert
        r = c - r;          //  msbs to
        r = c | r;          //   mask
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vabsdiff4(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;

    #if __CUDA_ARCH__ >= 300
        asm("vabsdiff4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #elif __CUDA_ARCH__ >= 200
        asm("vabsdiff.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vabsdiff.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vabsdiff.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vabsdiff.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int s;
        s = vcmpge4(a, b);  // mask = 0xff if a >= b
        r = a ^ b;          //
        s = (r &  s) ^ b;   // select a when a >= b, else select b => max(a,b)
        r = s ^ r;          // select a when b >= a, else select b => min(a,b)
        r = s - r;          // |a - b| = max(a,b) - min(a,b);
    #endif

        return r;
    }

    static __device__ __forceinline__ unsigned int vmax4(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;

    #if __CUDA_ARCH__ >= 300
        asm("vmax4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #elif __CUDA_ARCH__ >= 200
        asm("vmax.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vmax.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vmax.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vmax.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int s;
        s = vcmpge4(a, b);  // mask = 0xff if a >= b
        r = a & s;          // select a when b >= a
        s = b & ~s;         // select b when b < a
        r = r | s;          // combine byte selections
    #endif

        return r;           // byte-wise unsigned maximum
    }

    static __device__ __forceinline__ unsigned int vmin4(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;

    #if __CUDA_ARCH__ >= 300
        asm("vmin4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #elif __CUDA_ARCH__ >= 200
        asm("vmin.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vmin.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vmin.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vmin.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int s;
        s = vcmpge4(b, a);  // mask = 0xff if a >= b
        r = a & s;          // select a when b >= a
        s = b & ~s;         // select b when b < a
        r = r | s;          // combine byte selections
    #endif

        return r;
    }
 }}}

 //! @endcond

 #endif // OPENCV_CUDA_SIMD_FUNCTIONS_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/transform.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/transform.hpp
@@ -0,0 +1,75 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CUDA_TRANSFORM_HPP
 #define OPENCV_CUDA_TRANSFORM_HPP

 #include "common.hpp"
 #include "utility.hpp"
 #include "detail/transform_detail.hpp"

 /** @file
 * @deprecated Use @ref cudev instead.
 */

 //! @cond IGNORED

 namespace cv { namespace cuda { namespace device
 {
    template <typename T, typename D, typename UnOp, typename Mask>
    static inline void transform(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, const Mask& mask, cudaStream_t stream)
    {
        typedef TransformFunctorTraits<UnOp> ft;
        transform_detail::TransformDispatcher<VecTraits<T>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src, dst, op, mask, stream);
    }

    template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
    static inline void transform(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, const Mask& mask, cudaStream_t stream)
    {
        typedef TransformFunctorTraits<BinOp> ft;
        transform_detail::TransformDispatcher<VecTraits<T1>::cn == 1 && VecTraits<T2>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src1, src2, dst, op, mask, stream);
    }
 }}}

 //! @endcond

 #endif // OPENCV_CUDA_TRANSFORM_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/type_traits.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/type_traits.hpp
@@ -0,0 +1,90 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CUDA_TYPE_TRAITS_HPP
 #define OPENCV_CUDA_TYPE_TRAITS_HPP

 #include "detail/type_traits_detail.hpp"

 /** @file
 * @deprecated Use @ref cudev instead.
 */

 //! @cond IGNORED

 namespace cv { namespace cuda { namespace device
 {
    template <typename T> struct IsSimpleParameter
    {
        enum {value = type_traits_detail::IsIntegral<T>::value || type_traits_detail::IsFloat<T>::value ||
            type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<T>::type>::value};
    };

    template <typename T> struct TypeTraits
    {
        typedef typename type_traits_detail::UnConst<T>::type                                                NonConstType;
        typedef typename type_traits_detail::UnVolatile<T>::type                                             NonVolatileType;
        typedef typename type_traits_detail::UnVolatile<typename type_traits_detail::UnConst<T>::type>::type UnqualifiedType;
        typedef typename type_traits_detail::PointerTraits<UnqualifiedType>::type                            PointeeType;
        typedef typename type_traits_detail::ReferenceTraits<T>::type                                        ReferredType;

        enum { isConst          = type_traits_detail::UnConst<T>::value };
        enum { isVolatile       = type_traits_detail::UnVolatile<T>::value };

        enum { isReference      = type_traits_detail::ReferenceTraits<UnqualifiedType>::value };
        enum { isPointer        = type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<UnqualifiedType>::type>::value };

        enum { isUnsignedInt    = type_traits_detail::IsUnsignedIntegral<UnqualifiedType>::value };
        enum { isSignedInt      = type_traits_detail::IsSignedIntergral<UnqualifiedType>::value };
        enum { isIntegral       = type_traits_detail::IsIntegral<UnqualifiedType>::value };
        enum { isFloat          = type_traits_detail::IsFloat<UnqualifiedType>::value };
        enum { isArith          = isIntegral || isFloat };
        enum { isVec            = type_traits_detail::IsVec<UnqualifiedType>::value };

        typedef typename type_traits_detail::Select<IsSimpleParameter<UnqualifiedType>::value,
            T, typename type_traits_detail::AddParameterType<T>::type>::type ParameterType;
    };
 }}}

 //! @endcond

 #endif // OPENCV_CUDA_TYPE_TRAITS_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/utility.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/utility.hpp
@@ -0,0 +1,230 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CUDA_UTILITY_HPP
 #define OPENCV_CUDA_UTILITY_HPP

 #include "saturate_cast.hpp"
 #include "datamov_utils.hpp"

 /** @file
 * @deprecated Use @ref cudev instead.
 */

 //! @cond IGNORED

 namespace cv { namespace cuda { namespace device
 {
    struct CV_EXPORTS ThrustAllocator
    {
        typedef uchar value_type;
        virtual ~ThrustAllocator();
        virtual __device__ __host__ uchar* allocate(size_t numBytes) = 0;
        virtual __device__ __host__ void deallocate(uchar* ptr, size_t numBytes) = 0;
        static ThrustAllocator& getAllocator();
        static void setAllocator(ThrustAllocator* allocator);
    };
    #define OPENCV_CUDA_LOG_WARP_SIZE        (5)
    #define OPENCV_CUDA_WARP_SIZE            (1 << OPENCV_CUDA_LOG_WARP_SIZE)
    #define OPENCV_CUDA_LOG_MEM_BANKS        ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla
    #define OPENCV_CUDA_MEM_BANKS            (1 << OPENCV_CUDA_LOG_MEM_BANKS)

    ///////////////////////////////////////////////////////////////////////////////
    // swap

    template <typename T> void __device__ __host__ __forceinline__ swap(T& a, T& b)
    {
        const T temp = a;
        a = b;
        b = temp;
    }

    ///////////////////////////////////////////////////////////////////////////////
    // Mask Reader

    struct SingleMask
    {
        explicit __host__ __device__ __forceinline__ SingleMask(PtrStepb mask_) : mask(mask_) {}
        __host__ __device__ __forceinline__ SingleMask(const SingleMask& mask_): mask(mask_.mask){}

        __device__ __forceinline__ bool operator()(int y, int x) const
        {
            return mask.ptr(y)[x] != 0;
        }

        PtrStepb mask;
    };

    struct SingleMaskChannels
    {
        __host__ __device__ __forceinline__ SingleMaskChannels(PtrStepb mask_, int channels_)
        : mask(mask_), channels(channels_) {}
        __host__ __device__ __forceinline__ SingleMaskChannels(const SingleMaskChannels& mask_)
            :mask(mask_.mask), channels(mask_.channels){}

        __device__ __forceinline__ bool operator()(int y, int x) const
        {
            return mask.ptr(y)[x / channels] != 0;
        }

        PtrStepb mask;
        int channels;
    };

    struct MaskCollection
    {
        explicit __host__ __device__ __forceinline__ MaskCollection(PtrStepb* maskCollection_)
            : maskCollection(maskCollection_) {}

        __device__ __forceinline__ MaskCollection(const MaskCollection& masks_)
            : maskCollection(masks_.maskCollection), curMask(masks_.curMask){}

        __device__ __forceinline__ void next()
        {
            curMask = *maskCollection++;
        }
        __device__ __forceinline__ void setMask(int z)
        {
            curMask = maskCollection[z];
        }

        __device__ __forceinline__ bool operator()(int y, int x) const
        {
            uchar val;
            return curMask.data == 0 || (ForceGlob<uchar>::Load(curMask.ptr(y), x, val), (val != 0));
        }

        const PtrStepb* maskCollection;
        PtrStepb curMask;
    };

    struct WithOutMask
    {
        __host__ __device__ __forceinline__ WithOutMask(){}
        __host__ __device__ __forceinline__ WithOutMask(const WithOutMask&){}

        __device__ __forceinline__ void next() const
        {
        }
        __device__ __forceinline__ void setMask(int) const
        {
        }

        __device__ __forceinline__ bool operator()(int, int) const
        {
            return true;
        }

        __device__ __forceinline__ bool operator()(int, int, int) const
        {
            return true;
        }

        static __device__ __forceinline__ bool check(int, int)
        {
            return true;
        }

        static __device__ __forceinline__ bool check(int, int, int)
        {
            return true;
        }
    };

    ///////////////////////////////////////////////////////////////////////////////
    // Solve linear system

    // solve 2x2 linear system Ax=b
    template <typename T> __device__ __forceinline__ bool solve2x2(const T A[2][2], const T b[2], T x[2])
    {
        T det = A[0][0] * A[1][1] - A[1][0] * A[0][1];

        if (det != 0)
        {
            double invdet = 1.0 / det;

            x[0] = saturate_cast<T>(invdet * (b[0] * A[1][1] - b[1] * A[0][1]));

            x[1] = saturate_cast<T>(invdet * (A[0][0] * b[1] - A[1][0] * b[0]));

            return true;
        }

        return false;
    }

    // solve 3x3 linear system Ax=b
    template <typename T> __device__ __forceinline__ bool solve3x3(const T A[3][3], const T b[3], T x[3])
    {
        T det = A[0][0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1])
              - A[0][1] * (A[1][0] * A[2][2] - A[1][2] * A[2][0])
              + A[0][2] * (A[1][0] * A[2][1] - A[1][1] * A[2][0]);

        if (det != 0)
        {
            double invdet = 1.0 / det;

            x[0] = saturate_cast<T>(invdet *
                (b[0]    * (A[1][1] * A[2][2] - A[1][2] * A[2][1]) -
                 A[0][1] * (b[1]    * A[2][2] - A[1][2] * b[2]   ) +
                 A[0][2] * (b[1]    * A[2][1] - A[1][1] * b[2]   )));

            x[1] = saturate_cast<T>(invdet *
                (A[0][0] * (b[1]    * A[2][2] - A[1][2] * b[2]   ) -
                 b[0]    * (A[1][0] * A[2][2] - A[1][2] * A[2][0]) +
                 A[0][2] * (A[1][0] * b[2]    - b[1]    * A[2][0])));

            x[2] = saturate_cast<T>(invdet *
                (A[0][0] * (A[1][1] * b[2]    - b[1]    * A[2][1]) -
                 A[0][1] * (A[1][0] * b[2]    - b[1]    * A[2][0]) +
                 b[0]    * (A[1][0] * A[2][1] - A[1][1] * A[2][0])));

            return true;
        }

        return false;
    }
 }}} // namespace cv { namespace cuda { namespace cudev

 //! @endcond

 #endif // OPENCV_CUDA_UTILITY_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/vec_distance.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/vec_distance.hpp
@@ -0,0 +1,232 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CUDA_VEC_DISTANCE_HPP
 #define OPENCV_CUDA_VEC_DISTANCE_HPP

 #include "reduce.hpp"
 #include "functional.hpp"
 #include "detail/vec_distance_detail.hpp"

 /** @file
 * @deprecated Use @ref cudev instead.
 */

 //! @cond IGNORED

 namespace cv { namespace cuda { namespace device
 {
    template <typename T> struct L1Dist
    {
        typedef int value_type;
        typedef int result_type;

        __device__ __forceinline__ L1Dist() : mySum(0) {}

        __device__ __forceinline__ void reduceIter(int val1, int val2)
        {
            mySum = __sad(val1, val2, mySum);
        }

        template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
        {
            reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
        }

        __device__ __forceinline__ operator int() const
        {
            return mySum;
        }

        int mySum;
    };
    template <> struct L1Dist<float>
    {
        typedef float value_type;
        typedef float result_type;

        __device__ __forceinline__ L1Dist() : mySum(0.0f) {}

        __device__ __forceinline__ void reduceIter(float val1, float val2)
        {
            mySum += ::fabs(val1 - val2);
        }

        template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
        {
            reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
        }

        __device__ __forceinline__ operator float() const
        {
            return mySum;
        }

        float mySum;
    };

    struct L2Dist
    {
        typedef float value_type;
        typedef float result_type;

        __device__ __forceinline__ L2Dist() : mySum(0.0f) {}

        __device__ __forceinline__ void reduceIter(float val1, float val2)
        {
            float reg = val1 - val2;
            mySum += reg * reg;
        }

        template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
        {
            reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
        }

        __device__ __forceinline__ operator float() const
        {
            return sqrtf(mySum);
        }

        float mySum;
    };

    struct HammingDist
    {
        typedef int value_type;
        typedef int result_type;

        __device__ __forceinline__ HammingDist() : mySum(0) {}

        __device__ __forceinline__ void reduceIter(int val1, int val2)
        {
            mySum += __popc(val1 ^ val2);
        }

        template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
        {
            reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
        }

        __device__ __forceinline__ operator int() const
        {
            return mySum;
        }

        int mySum;
    };

    // calc distance between two vectors in global memory
    template <int THREAD_DIM, typename Dist, typename T1, typename T2>
    __device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid)
    {
        for (int i = tid; i < len; i += THREAD_DIM)
        {
            T1 val1;
            ForceGlob<T1>::Load(vec1, i, val1);

            T2 val2;
            ForceGlob<T2>::Load(vec2, i, val2);

            dist.reduceIter(val1, val2);
        }

        dist.reduceAll<THREAD_DIM>(smem, tid);
    }

    // calc distance between two vectors, first vector is cached in register or shared memory, second vector is in global memory
    template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename Dist, typename T1, typename T2>
    __device__ __forceinline__ void calcVecDiffCached(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, typename Dist::result_type* smem, int tid)
    {
        vec_distance_detail::VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>::calc(vecCached, vecGlob, len, dist, tid);

        dist.reduceAll<THREAD_DIM>(smem, tid);
    }

    // calc distance between two vectors in global memory
    template <int THREAD_DIM, typename T1> struct VecDiffGlobal
    {
        explicit __device__ __forceinline__ VecDiffGlobal(const T1* vec1_, int = 0, void* = 0, int = 0, int = 0)
        {
            vec1 = vec1_;
        }

        template <typename T2, typename Dist>
        __device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
        {
            calcVecDiffGlobal<THREAD_DIM>(vec1, vec2, len, dist, smem, tid);
        }

        const T1* vec1;
    };

    // calc distance between two vectors, first vector is cached in register memory, second vector is in global memory
    template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename U> struct VecDiffCachedRegister
    {
        template <typename T1> __device__ __forceinline__ VecDiffCachedRegister(const T1* vec1, int len, U* smem, int glob_tid, int tid)
        {
            if (glob_tid < len)
                smem[glob_tid] = vec1[glob_tid];
            __syncthreads();

            U* vec1ValsPtr = vec1Vals;

            #pragma unroll
            for (int i = tid; i < MAX_LEN; i += THREAD_DIM)
                *vec1ValsPtr++ = smem[i];

            __syncthreads();
        }

        template <typename T2, typename Dist>
        __device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
        {
            calcVecDiffCached<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>(vec1Vals, vec2, len, dist, smem, tid);
        }

        U vec1Vals[MAX_LEN / THREAD_DIM];
    };
 }}} // namespace cv { namespace cuda { namespace cudev

 //! @endcond

 #endif // OPENCV_CUDA_VEC_DISTANCE_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/vec_math.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/vec_math.hpp
@@ -0,0 +1,923 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CUDA_VECMATH_HPP
 #define OPENCV_CUDA_VECMATH_HPP

 #include "vec_traits.hpp"
 #include "saturate_cast.hpp"

 /** @file
 * @deprecated Use @ref cudev instead.
 */

 //! @cond IGNORED

 namespace cv { namespace cuda { namespace device
 {

 // saturate_cast

 namespace vec_math_detail
 {
    template <int cn, typename VecD> struct SatCastHelper;
    template <typename VecD> struct SatCastHelper<1, VecD>
    {
        template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
        {
            typedef typename VecTraits<VecD>::elem_type D;
            return VecTraits<VecD>::make(saturate_cast<D>(v.x));
        }
    };
    template <typename VecD> struct SatCastHelper<2, VecD>
    {
        template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
        {
            typedef typename VecTraits<VecD>::elem_type D;
            return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y));
        }
    };
    template <typename VecD> struct SatCastHelper<3, VecD>
    {
        template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
        {
            typedef typename VecTraits<VecD>::elem_type D;
            return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z));
        }
    };
    template <typename VecD> struct SatCastHelper<4, VecD>
    {
        template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
        {
            typedef typename VecTraits<VecD>::elem_type D;
            return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z), saturate_cast<D>(v.w));
        }
    };

    template <typename VecD, typename VecS> static __device__ __forceinline__ VecD saturate_cast_helper(const VecS& v)
    {
        return SatCastHelper<VecTraits<VecD>::cn, VecD>::cast(v);
    }
 }

 template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const char1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const short1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const uint1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const int1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const float1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const double1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}

 template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const char2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const short2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const uint2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const int2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const float2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const double2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}

 template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const char3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const short3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const uint3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const int3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const float3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const double3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}

 template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const char4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const short4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const uint4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const int4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const float4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const double4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}

 // unary operators

 #define CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(op, input_type, output_type) \
    __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a) \
    { \
        return VecTraits<output_type ## 1>::make(op (a.x)); \
    } \
    __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a) \
    { \
        return VecTraits<output_type ## 2>::make(op (a.x), op (a.y)); \
    } \
    __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a) \
    { \
        return VecTraits<output_type ## 3>::make(op (a.x), op (a.y), op (a.z)); \
    } \
    __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a) \
    { \
        return VecTraits<output_type ## 4>::make(op (a.x), op (a.y), op (a.z), op (a.w)); \
    }

 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, char, char)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, short, short)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, int, int)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, double, double)

 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, char, uchar)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, ushort, uchar)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, short, uchar)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, int, uchar)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uint, uchar)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, float, uchar)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, double, uchar)

 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, char, char)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, ushort, ushort)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, short, short)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, int, int)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uint, uint)

 #undef CV_CUDEV_IMPLEMENT_VEC_UNARY_OP

 // unary functions

 #define CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(func_name, func, input_type, output_type) \
    __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a) \
    { \
        return VecTraits<output_type ## 1>::make(func (a.x)); \
    } \
    __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a) \
    { \
        return VecTraits<output_type ## 2>::make(func (a.x), func (a.y)); \
    } \
    __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a) \
    { \
        return VecTraits<output_type ## 3>::make(func (a.x), func (a.y), func (a.z)); \
    } \
    __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a) \
    { \
        return VecTraits<output_type ## 4>::make(func (a.x), func (a.y), func (a.z), func (a.w)); \
    }

 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabsf, float, float)

 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrt, double, double)

 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::exp, double, double)

 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2, double, double)

 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10, double, double)

 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::log, double, double)

 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2, double, double)

 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10, double, double)

 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sin, double, double)

 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cos, double, double)

 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tan, double, double)

 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asin, double, double)

 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acos, double, double)

 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atan, double, double)

 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinh, double, double)

 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::cosh, double, double)

 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanh, double, double)

 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinh, double, double)

 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acosh, double, double)

 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanh, double, double)

 #undef CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC

 // binary operators (vec & vec)

 #define CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(op, input_type, output_type) \
    __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, const input_type ## 1 & b) \
    { \
        return VecTraits<output_type ## 1>::make(a.x op b.x); \
    } \
    __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, const input_type ## 2 & b) \
    { \
        return VecTraits<output_type ## 2>::make(a.x op b.x, a.y op b.y); \
    } \
    __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, const input_type ## 3 & b) \
    { \
        return VecTraits<output_type ## 3>::make(a.x op b.x, a.y op b.y, a.z op b.z); \
    } \
    __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, const input_type ## 4 & b) \
    { \
        return VecTraits<output_type ## 4>::make(a.x op b.x, a.y op b.y, a.z op b.z, a.w op b.w); \
    }

 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uchar, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, char, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, ushort, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, short, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, int, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uint, uint)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, float, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, double, double)

 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uchar, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, char, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, ushort, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, short, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, int, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uint, uint)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, float, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, double, double)

 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uchar, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, char, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, ushort, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, short, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, int, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uint, uint)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, float, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, double, double)

 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uchar, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, char, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, ushort, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, short, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, int, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uint, uint)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, float, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, double, double)

 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, char, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, ushort, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, short, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, int, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uint, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, float, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, double, uchar)

 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, char, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, ushort, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, short, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, int, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uint, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, float, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, double, uchar)

 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, char, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, ushort, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, short, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, int, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uint, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, float, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, double, uchar)

 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, char, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, ushort, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, short, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, int, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uint, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, float, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, double, uchar)

 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, char, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, ushort, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, short, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, int, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uint, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, float, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, double, uchar)

 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, char, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, ushort, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, short, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, int, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uint, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, float, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, double, uchar)

 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, char, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, ushort, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, short, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, int, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uint, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, float, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, double, uchar)

 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, char, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, ushort, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, short, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, int, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uint, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, float, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, double, uchar)

 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, char, char)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, ushort, ushort)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, short, short)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, int, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uint, uint)

 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, char, char)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, ushort, ushort)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, short, short)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, int, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uint, uint)

 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, char, char)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, ushort, ushort)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, short, short)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, int, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uint, uint)

 #undef CV_CUDEV_IMPLEMENT_VEC_BINARY_OP

 // binary operators (vec & scalar)

 #define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(op, input_type, scalar_type, output_type) \
    __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, scalar_type s) \
    { \
        return VecTraits<output_type ## 1>::make(a.x op s); \
    } \
    __device__ __forceinline__ output_type ## 1 operator op(scalar_type s, const input_type ## 1 & b) \
    { \
        return VecTraits<output_type ## 1>::make(s op b.x); \
    } \
    __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, scalar_type s) \
    { \
        return VecTraits<output_type ## 2>::make(a.x op s, a.y op s); \
    } \
    __device__ __forceinline__ output_type ## 2 operator op(scalar_type s, const input_type ## 2 & b) \
    { \
        return VecTraits<output_type ## 2>::make(s op b.x, s op b.y); \
    } \
    __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, scalar_type s) \
    { \
        return VecTraits<output_type ## 3>::make(a.x op s, a.y op s, a.z op s); \
    } \
    __device__ __forceinline__ output_type ## 3 operator op(scalar_type s, const input_type ## 3 & b) \
    { \
        return VecTraits<output_type ## 3>::make(s op b.x, s op b.y, s op b.z); \
    } \
    __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, scalar_type s) \
    { \
        return VecTraits<output_type ## 4>::make(a.x op s, a.y op s, a.z op s, a.w op s); \
    } \
    __device__ __forceinline__ output_type ## 4 operator op(scalar_type s, const input_type ## 4 & b) \
    { \
        return VecTraits<output_type ## 4>::make(s op b.x, s op b.y, s op b.z, s op b.w); \
    }

 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, uint, uint)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, double, double, double)

 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, uint, uint)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, double, double, double)

 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, uint, uint)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, double, double, double)

 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, uint, uint)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, double, double, double)

 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uchar, uchar, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, char, char, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, ushort, ushort, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, short, short, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, int, int, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uint, uint, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, float, float, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, double, double, uchar)

 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uchar, uchar, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, char, char, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, ushort, ushort, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, short, short, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, int, int, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uint, uint, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, float, float, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, double, double, uchar)

 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uchar, uchar, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, char, char, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, ushort, ushort, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, short, short, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, int, int, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uint, uint, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, float, float, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, double, double, uchar)

 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uchar, uchar, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, char, char, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, ushort, ushort, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, short, short, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, int, int, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uint, uint, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, float, float, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, double, double, uchar)

 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uchar, uchar, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, char, char, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, ushort, ushort, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, short, short, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, int, int, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uint, uint, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, float, float, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, double, double, uchar)

 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uchar, uchar, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, char, char, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, ushort, ushort, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, short, short, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, int, int, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uint, uint, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, float, float, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, double, double, uchar)

 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uchar, uchar, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, char, char, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, ushort, ushort, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, short, short, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, int, int, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uint, uint, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, float, float, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, double, double, uchar)

 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uchar, uchar, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, char, char, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, ushort, ushort, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, short, short, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, int, int, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uint, uint, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, float, float, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, double, double, uchar)

 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uchar, uchar, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, char, char, char)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, ushort, ushort, ushort)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, short, short, short)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, int, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uint, uint, uint)

 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uchar, uchar, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, char, char, char)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, ushort, ushort, ushort)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, short, short, short)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, int, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uint, uint, uint)

 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uchar, uchar, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, char, char, char)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, ushort, ushort, ushort)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, short, short, short)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, int, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uint, uint, uint)

 #undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP

 // binary function (vec & vec)

 #define CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(func_name, func, input_type, output_type) \
    __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, const input_type ## 1 & b) \
    { \
        return VecTraits<output_type ## 1>::make(func (a.x, b.x)); \
    } \
    __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, const input_type ## 2 & b) \
    { \
        return VecTraits<output_type ## 2>::make(func (a.x, b.x), func (a.y, b.y)); \
    } \
    __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, const input_type ## 3 & b) \
    { \
        return VecTraits<output_type ## 3>::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z)); \
    } \
    __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, const input_type ## 4 & b) \
    { \
        return VecTraits<output_type ## 4>::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z), func (a.w, b.w)); \
    }

 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, char, char)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, ushort, ushort)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, short, short)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uint, uint)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, int, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmaxf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmax, double, double)

 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, char, char)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, ushort, ushort)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, short, short)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uint, uint)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, int, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fminf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fmin, double, double)

 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypot, double, double)

 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, char, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, short, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, int, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, float, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2, double, double)

 #undef CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC

 // binary function (vec & scalar)

 #define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(func_name, func, input_type, scalar_type, output_type) \
    __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, scalar_type s) \
    { \
        return VecTraits<output_type ## 1>::make(func ((output_type) a.x, (output_type) s)); \
    } \
    __device__ __forceinline__ output_type ## 1 func_name(scalar_type s, const input_type ## 1 & b) \
    { \
        return VecTraits<output_type ## 1>::make(func ((output_type) s, (output_type) b.x)); \
    } \
    __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, scalar_type s) \
    { \
        return VecTraits<output_type ## 2>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s)); \
    } \
    __device__ __forceinline__ output_type ## 2 func_name(scalar_type s, const input_type ## 2 & b) \
    { \
        return VecTraits<output_type ## 2>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y)); \
    } \
    __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, scalar_type s) \
    { \
        return VecTraits<output_type ## 3>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s)); \
    } \
    __device__ __forceinline__ output_type ## 3 func_name(scalar_type s, const input_type ## 3 & b) \
    { \
        return VecTraits<output_type ## 3>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z)); \
    } \
    __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, scalar_type s) \
    { \
        return VecTraits<output_type ## 4>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s), func ((output_type) a.w, (output_type) s)); \
    } \
    __device__ __forceinline__ output_type ## 4 func_name(scalar_type s, const input_type ## 4 & b) \
    { \
        return VecTraits<output_type ## 4>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z), func ((output_type) s, (output_type) b.w)); \
    }

 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uchar, uchar, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uchar, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uchar, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, char, char, char)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, char, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, char, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, ushort, ushort, ushort)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, ushort, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, ushort, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, short, short, short)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, short, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, short, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uint, uint, uint)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uint, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uint, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, int, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, int, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, int, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, float, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, float, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, double, double, double)

 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uchar, uchar, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uchar, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uchar, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, char, char, char)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, char, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, char, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, ushort, ushort, ushort)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, ushort, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, ushort, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, short, short, short)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, short, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, short, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uint, uint, uint)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uint, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uint, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, int, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, int, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, int, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, float, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, float, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, double, double, double)

 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uchar, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uchar, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, char, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, char, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, ushort, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, ushort, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, short, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, short, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uint, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uint, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, int, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, int, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, float, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, float, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, double, double, double)

 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uchar, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uchar, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, char, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, char, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, ushort, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, ushort, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, short, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, short, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uint, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uint, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, int, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, int, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, float, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, float, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, double, double, double)

 #undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC

 }}} // namespace cv { namespace cuda { namespace device

 //! @endcond

 #endif // OPENCV_CUDA_VECMATH_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/vec_traits.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/vec_traits.hpp
@@ -0,0 +1,288 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CUDA_VEC_TRAITS_HPP
 #define OPENCV_CUDA_VEC_TRAITS_HPP

 #include "common.hpp"

 /** @file
 * @deprecated Use @ref cudev instead.
 */

 //! @cond IGNORED

 namespace cv { namespace cuda { namespace device
 {
    template<typename T, int N> struct TypeVec;

    struct __align__(8) uchar8
    {
        uchar a0, a1, a2, a3, a4, a5, a6, a7;
    };
    static __host__ __device__ __forceinline__ uchar8 make_uchar8(uchar a0, uchar a1, uchar a2, uchar a3, uchar a4, uchar a5, uchar a6, uchar a7)
    {
        uchar8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
        return val;
    }
    struct __align__(8) char8
    {
        schar a0, a1, a2, a3, a4, a5, a6, a7;
    };
    static __host__ __device__ __forceinline__ char8 make_char8(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7)
    {
        char8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
        return val;
    }
    struct __align__(16) ushort8
    {
        ushort a0, a1, a2, a3, a4, a5, a6, a7;
    };
    static __host__ __device__ __forceinline__ ushort8 make_ushort8(ushort a0, ushort a1, ushort a2, ushort a3, ushort a4, ushort a5, ushort a6, ushort a7)
    {
        ushort8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
        return val;
    }
    struct __align__(16) short8
    {
        short a0, a1, a2, a3, a4, a5, a6, a7;
    };
    static __host__ __device__ __forceinline__ short8 make_short8(short a0, short a1, short a2, short a3, short a4, short a5, short a6, short a7)
    {
        short8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
        return val;
    }
    struct __align__(32) uint8
    {
        uint a0, a1, a2, a3, a4, a5, a6, a7;
    };
    static __host__ __device__ __forceinline__ uint8 make_uint8(uint a0, uint a1, uint a2, uint a3, uint a4, uint a5, uint a6, uint a7)
    {
        uint8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
        return val;
    }
    struct __align__(32) int8
    {
        int a0, a1, a2, a3, a4, a5, a6, a7;
    };
    static __host__ __device__ __forceinline__ int8 make_int8(int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7)
    {
        int8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
        return val;
    }
    struct __align__(32) float8
    {
        float a0, a1, a2, a3, a4, a5, a6, a7;
    };
    static __host__ __device__ __forceinline__ float8 make_float8(float a0, float a1, float a2, float a3, float a4, float a5, float a6, float a7)
    {
        float8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
        return val;
    }
    struct double8
    {
        double a0, a1, a2, a3, a4, a5, a6, a7;
    };
    static __host__ __device__ __forceinline__ double8 make_double8(double a0, double a1, double a2, double a3, double a4, double a5, double a6, double a7)
    {
        double8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
        return val;
    }

 #define OPENCV_CUDA_IMPLEMENT_TYPE_VEC(type) \
    template<> struct TypeVec<type, 1> { typedef type vec_type; }; \
    template<> struct TypeVec<type ## 1, 1> { typedef type ## 1 vec_type; }; \
    template<> struct TypeVec<type, 2> { typedef type ## 2 vec_type; }; \
    template<> struct TypeVec<type ## 2, 2> { typedef type ## 2 vec_type; }; \
    template<> struct TypeVec<type, 3> { typedef type ## 3 vec_type; }; \
    template<> struct TypeVec<type ## 3, 3> { typedef type ## 3 vec_type; }; \
    template<> struct TypeVec<type, 4> { typedef type ## 4 vec_type; }; \
    template<> struct TypeVec<type ## 4, 4> { typedef type ## 4 vec_type; }; \
    template<> struct TypeVec<type, 8> { typedef type ## 8 vec_type; }; \
    template<> struct TypeVec<type ## 8, 8> { typedef type ## 8 vec_type; };

    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(uchar)
    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(char)
    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(ushort)
    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(short)
    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(int)
    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(uint)
    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(float)
    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(double)

    #undef OPENCV_CUDA_IMPLEMENT_TYPE_VEC

    template<> struct TypeVec<schar, 1> { typedef schar vec_type; };
    template<> struct TypeVec<schar, 2> { typedef char2 vec_type; };
    template<> struct TypeVec<schar, 3> { typedef char3 vec_type; };
    template<> struct TypeVec<schar, 4> { typedef char4 vec_type; };
    template<> struct TypeVec<schar, 8> { typedef char8 vec_type; };

    template<> struct TypeVec<bool, 1> { typedef uchar vec_type; };
    template<> struct TypeVec<bool, 2> { typedef uchar2 vec_type; };
    template<> struct TypeVec<bool, 3> { typedef uchar3 vec_type; };
    template<> struct TypeVec<bool, 4> { typedef uchar4 vec_type; };
    template<> struct TypeVec<bool, 8> { typedef uchar8 vec_type; };

    template<typename T> struct VecTraits;

 #define OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(type) \
    template<> struct VecTraits<type> \
    { \
        typedef type elem_type; \
        enum {cn=1}; \
        static __device__ __host__ __forceinline__ type all(type v) {return v;} \
        static __device__ __host__ __forceinline__ type make(type x) {return x;} \
        static __device__ __host__ __forceinline__ type make(const type* v) {return *v;} \
    }; \
    template<> struct VecTraits<type ## 1> \
    { \
        typedef type elem_type; \
        enum {cn=1}; \
        static __device__ __host__ __forceinline__ type ## 1 all(type v) {return make_ ## type ## 1(v);} \
        static __device__ __host__ __forceinline__ type ## 1 make(type x) {return make_ ## type ## 1(x);} \
        static __device__ __host__ __forceinline__ type ## 1 make(const type* v) {return make_ ## type ## 1(*v);} \
    }; \
    template<> struct VecTraits<type ## 2> \
    { \
        typedef type elem_type; \
        enum {cn=2}; \
        static __device__ __host__ __forceinline__ type ## 2 all(type v) {return make_ ## type ## 2(v, v);} \
        static __device__ __host__ __forceinline__ type ## 2 make(type x, type y) {return make_ ## type ## 2(x, y);} \
        static __device__ __host__ __forceinline__ type ## 2 make(const type* v) {return make_ ## type ## 2(v[0], v[1]);} \
    }; \
    template<> struct VecTraits<type ## 3> \
    { \
        typedef type elem_type; \
        enum {cn=3}; \
        static __device__ __host__ __forceinline__ type ## 3 all(type v) {return make_ ## type ## 3(v, v, v);} \
        static __device__ __host__ __forceinline__ type ## 3 make(type x, type y, type z) {return make_ ## type ## 3(x, y, z);} \
        static __device__ __host__ __forceinline__ type ## 3 make(const type* v) {return make_ ## type ## 3(v[0], v[1], v[2]);} \
    }; \
    template<> struct VecTraits<type ## 4> \
    { \
        typedef type elem_type; \
        enum {cn=4}; \
        static __device__ __host__ __forceinline__ type ## 4 all(type v) {return make_ ## type ## 4(v, v, v, v);} \
        static __device__ __host__ __forceinline__ type ## 4 make(type x, type y, type z, type w) {return make_ ## type ## 4(x, y, z, w);} \
        static __device__ __host__ __forceinline__ type ## 4 make(const type* v) {return make_ ## type ## 4(v[0], v[1], v[2], v[3]);} \
    }; \
    template<> struct VecTraits<type ## 8> \
    { \
        typedef type elem_type; \
        enum {cn=8}; \
        static __device__ __host__ __forceinline__ type ## 8 all(type v) {return make_ ## type ## 8(v, v, v, v, v, v, v, v);} \
        static __device__ __host__ __forceinline__ type ## 8 make(type a0, type a1, type a2, type a3, type a4, type a5, type a6, type a7) {return make_ ## type ## 8(a0, a1, a2, a3, a4, a5, a6, a7);} \
        static __device__ __host__ __forceinline__ type ## 8 make(const type* v) {return make_ ## type ## 8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);} \
    };

    OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(uchar)
    OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(ushort)
    OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(short)
    OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(int)
    OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(uint)
    OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(float)
    OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(double)

    #undef OPENCV_CUDA_IMPLEMENT_VEC_TRAITS

    template<> struct VecTraits<char>
    {
        typedef char elem_type;
        enum {cn=1};
        static __device__ __host__ __forceinline__ char all(char v) {return v;}
        static __device__ __host__ __forceinline__ char make(char x) {return x;}
        static __device__ __host__ __forceinline__ char make(const char* x) {return *x;}
    };
    template<> struct VecTraits<schar>
    {
        typedef schar elem_type;
        enum {cn=1};
        static __device__ __host__ __forceinline__ schar all(schar v) {return v;}
        static __device__ __host__ __forceinline__ schar make(schar x) {return x;}
        static __device__ __host__ __forceinline__ schar make(const schar* x) {return *x;}
    };
    template<> struct VecTraits<char1>
    {
        typedef schar elem_type;
        enum {cn=1};
        static __device__ __host__ __forceinline__ char1 all(schar v) {return make_char1(v);}
        static __device__ __host__ __forceinline__ char1 make(schar x) {return make_char1(x);}
        static __device__ __host__ __forceinline__ char1 make(const schar* v) {return make_char1(v[0]);}
    };
    template<> struct VecTraits<char2>
    {
        typedef schar elem_type;
        enum {cn=2};
        static __device__ __host__ __forceinline__ char2 all(schar v) {return make_char2(v, v);}
        static __device__ __host__ __forceinline__ char2 make(schar x, schar y) {return make_char2(x, y);}
        static __device__ __host__ __forceinline__ char2 make(const schar* v) {return make_char2(v[0], v[1]);}
    };
    template<> struct VecTraits<char3>
    {
        typedef schar elem_type;
        enum {cn=3};
        static __device__ __host__ __forceinline__ char3 all(schar v) {return make_char3(v, v, v);}
        static __device__ __host__ __forceinline__ char3 make(schar x, schar y, schar z) {return make_char3(x, y, z);}
        static __device__ __host__ __forceinline__ char3 make(const schar* v) {return make_char3(v[0], v[1], v[2]);}
    };
    template<> struct VecTraits<char4>
    {
        typedef schar elem_type;
        enum {cn=4};
        static __device__ __host__ __forceinline__ char4 all(schar v) {return make_char4(v, v, v, v);}
        static __device__ __host__ __forceinline__ char4 make(schar x, schar y, schar z, schar w) {return make_char4(x, y, z, w);}
        static __device__ __host__ __forceinline__ char4 make(const schar* v) {return make_char4(v[0], v[1], v[2], v[3]);}
    };
    template<> struct VecTraits<char8>
    {
        typedef schar elem_type;
        enum {cn=8};
        static __device__ __host__ __forceinline__ char8 all(schar v) {return make_char8(v, v, v, v, v, v, v, v);}
        static __device__ __host__ __forceinline__ char8 make(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) {return make_char8(a0, a1, a2, a3, a4, a5, a6, a7);}
        static __device__ __host__ __forceinline__ char8 make(const schar* v) {return make_char8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);}
    };
 }}} // namespace cv { namespace cuda { namespace cudev

 //! @endcond

 #endif // OPENCV_CUDA_VEC_TRAITS_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/warp.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/warp.hpp
@@ -0,0 +1,139 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CUDA_DEVICE_WARP_HPP
 #define OPENCV_CUDA_DEVICE_WARP_HPP

 /** @file
 * @deprecated Use @ref cudev instead.
 */

 //! @cond IGNORED

 namespace cv { namespace cuda { namespace device
 {
    struct Warp
    {
        enum
        {
            LOG_WARP_SIZE = 5,
            WARP_SIZE     = 1 << LOG_WARP_SIZE,
            STRIDE        = WARP_SIZE
        };

        /** \brief Returns the warp lane ID of the calling thread. */
        static __device__ __forceinline__ unsigned int laneId()
        {
            unsigned int ret;
            asm("mov.u32 %0, %%laneid;" : "=r"(ret) );
            return ret;
        }

        template<typename It, typename T>
        static __device__ __forceinline__ void fill(It beg, It end, const T& value)
        {
            for(It t = beg + laneId(); t < end; t += STRIDE)
                *t = value;
        }

        template<typename InIt, typename OutIt>
        static __device__ __forceinline__ OutIt copy(InIt beg, InIt end, OutIt out)
        {
            for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
                *out = *t;
            return out;
        }

        template<typename InIt, typename OutIt, class UnOp>
        static __device__ __forceinline__ OutIt transform(InIt beg, InIt end, OutIt out, UnOp op)
        {
            for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
                *out = op(*t);
            return out;
        }

        template<typename InIt1, typename InIt2, typename OutIt, class BinOp>
        static __device__ __forceinline__ OutIt transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
        {
            unsigned int lane = laneId();

            InIt1 t1 = beg1 + lane;
            InIt2 t2 = beg2 + lane;
            for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, out += STRIDE)
                *out = op(*t1, *t2);
            return out;
        }

        template <class T, class BinOp>
        static __device__ __forceinline__ T reduce(volatile T *ptr, BinOp op)
        {
            const unsigned int lane = laneId();

            if (lane < 16)
            {
                T partial = ptr[lane];

                ptr[lane] = partial = op(partial, ptr[lane + 16]);
                ptr[lane] = partial = op(partial, ptr[lane + 8]);
                ptr[lane] = partial = op(partial, ptr[lane + 4]);
                ptr[lane] = partial = op(partial, ptr[lane + 2]);
                ptr[lane] = partial = op(partial, ptr[lane + 1]);
            }

            return *ptr;
        }

        template<typename OutIt, typename T>
        static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
        {
            unsigned int lane = laneId();
            value += lane;

            for(OutIt t = beg + lane; t < end; t += STRIDE, value += STRIDE)
                *t = value;
        }
    };
 }}} // namespace cv { namespace cuda { namespace cudev

 //! @endcond

 #endif /* OPENCV_CUDA_DEVICE_WARP_HPP */
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/warp_reduce.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/warp_reduce.hpp
@@ -0,0 +1,76 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CUDA_WARP_REDUCE_HPP__
 #define OPENCV_CUDA_WARP_REDUCE_HPP__

 /** @file
 * @deprecated Use @ref cudev instead.
 */

 //! @cond IGNORED

 namespace cv { namespace cuda { namespace device
 {
    template <class T>
    __device__ __forceinline__ T warp_reduce(volatile T *ptr , const unsigned int tid = threadIdx.x)
    {
        const unsigned int lane = tid & 31; // index of thread in warp (0..31)

        if (lane < 16)
        {
            T partial = ptr[tid];

            ptr[tid] = partial = partial + ptr[tid + 16];
            ptr[tid] = partial = partial + ptr[tid + 8];
            ptr[tid] = partial = partial + ptr[tid + 4];
            ptr[tid] = partial = partial + ptr[tid + 2];
            ptr[tid] = partial = partial + ptr[tid + 1];
        }

        return ptr[tid - lane];
    }
 }}} // namespace cv { namespace cuda { namespace cudev {

 //! @endcond

 #endif /* OPENCV_CUDA_WARP_REDUCE_HPP__ */
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/warp_shuffle.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda/warp_shuffle.hpp
@@ -0,0 +1,162 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CUDA_WARP_SHUFFLE_HPP
 #define OPENCV_CUDA_WARP_SHUFFLE_HPP

 /** @file
 * @deprecated Use @ref cudev instead.
 */

 //! @cond IGNORED

 namespace cv { namespace cuda { namespace device
 {
 #if __CUDACC_VER_MAJOR__ >= 9
 #  define __shfl(x, y, z) __shfl_sync(0xFFFFFFFFU, x, y, z)
 #  define __shfl_up(x, y, z) __shfl_up_sync(0xFFFFFFFFU, x, y, z)
 #  define __shfl_down(x, y, z) __shfl_down_sync(0xFFFFFFFFU, x, y, z)
 #endif
    template <typename T>
    __device__ __forceinline__ T shfl(T val, int srcLane, int width = warpSize)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
        return __shfl(val, srcLane, width);
    #else
        return T();
    #endif
    }
    __device__ __forceinline__ unsigned int shfl(unsigned int val, int srcLane, int width = warpSize)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
        return (unsigned int) __shfl((int) val, srcLane, width);
    #else
        return 0;
    #endif
    }
    __device__ __forceinline__ double shfl(double val, int srcLane, int width = warpSize)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
        int lo = __double2loint(val);
        int hi = __double2hiint(val);

        lo = __shfl(lo, srcLane, width);
        hi = __shfl(hi, srcLane, width);

        return __hiloint2double(hi, lo);
    #else
        return 0.0;
    #endif
    }

    template <typename T>
    __device__ __forceinline__ T shfl_down(T val, unsigned int delta, int width = warpSize)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
        return __shfl_down(val, delta, width);
    #else
        return T();
    #endif
    }
    __device__ __forceinline__ unsigned int shfl_down(unsigned int val, unsigned int delta, int width = warpSize)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
        return (unsigned int) __shfl_down((int) val, delta, width);
    #else
        return 0;
    #endif
    }
    __device__ __forceinline__ double shfl_down(double val, unsigned int delta, int width = warpSize)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
        int lo = __double2loint(val);
        int hi = __double2hiint(val);

        lo = __shfl_down(lo, delta, width);
        hi = __shfl_down(hi, delta, width);

        return __hiloint2double(hi, lo);
    #else
        return 0.0;
    #endif
    }

    template <typename T>
    __device__ __forceinline__ T shfl_up(T val, unsigned int delta, int width = warpSize)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
        return __shfl_up(val, delta, width);
    #else
        return T();
    #endif
    }
    __device__ __forceinline__ unsigned int shfl_up(unsigned int val, unsigned int delta, int width = warpSize)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
        return (unsigned int) __shfl_up((int) val, delta, width);
    #else
        return 0;
    #endif
    }
    __device__ __forceinline__ double shfl_up(double val, unsigned int delta, int width = warpSize)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
        int lo = __double2loint(val);
        int hi = __double2hiint(val);

        lo = __shfl_up(lo, delta, width);
        hi = __shfl_up(hi, delta, width);

        return __hiloint2double(hi, lo);
    #else
        return 0.0;
    #endif
    }
 }}}

 #  undef __shfl
 #  undef __shfl_up
 #  undef __shfl_down

 //! @endcond

 #endif // OPENCV_CUDA_WARP_SHUFFLE_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda_stream_accessor.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda_stream_accessor.hpp
@@ -0,0 +1,86 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP
 #define OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP

 #ifndef __cplusplus
 #  error cuda_stream_accessor.hpp header must be compiled as C++
 #endif

 /** @file cuda_stream_accessor.hpp
 * This is only header file that depends on CUDA Runtime API. All other headers are independent.
 */

 #include <cuda_runtime.h>
 #include "opencv2/core/cuda.hpp"

 namespace cv
 {
    namespace cuda
    {

 //! @addtogroup cudacore_struct
 //! @{

        /** @brief Class that enables getting cudaStream_t from cuda::Stream
         */
        struct StreamAccessor
        {
            CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
            CV_EXPORTS static Stream wrapStream(cudaStream_t stream);
        };

        /** @brief Class that enables getting cudaEvent_t from cuda::Event
         */
        struct EventAccessor
        {
            CV_EXPORTS static cudaEvent_t getEvent(const Event& event);
            CV_EXPORTS static Event wrapEvent(cudaEvent_t event);
        };

 //! @}

    }
 }

 #endif /* OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP */
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda_types.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cuda_types.hpp
@@ -0,0 +1,144 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CORE_CUDA_TYPES_HPP
 #define OPENCV_CORE_CUDA_TYPES_HPP

 #ifndef __cplusplus
 #  error cuda_types.hpp header must be compiled as C++
 #endif

 #if defined(__OPENCV_BUILD) && defined(__clang__)
 #pragma clang diagnostic ignored "-Winconsistent-missing-override"
 #endif
 #if defined(__OPENCV_BUILD) && defined(__GNUC__) && __GNUC__ >= 5
 #pragma GCC diagnostic ignored "-Wsuggest-override"
 #endif

 /** @file
 * @deprecated Use @ref cudev instead.
 */

 //! @cond IGNORED

 #ifdef __CUDACC__
    #define __CV_CUDA_HOST_DEVICE__ __host__ __device__ __forceinline__
 #else
    #define __CV_CUDA_HOST_DEVICE__
 #endif

 namespace cv
 {
    namespace cuda
    {

        // Simple lightweight structures that encapsulates information about an image on device.
        // It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile

        template <typename T> struct DevPtr
        {
            typedef T elem_type;
            typedef int index_type;

            enum { elem_size = sizeof(elem_type) };

            T* data;

            __CV_CUDA_HOST_DEVICE__ DevPtr() : data(0) {}
            __CV_CUDA_HOST_DEVICE__ DevPtr(T* data_) : data(data_) {}

            __CV_CUDA_HOST_DEVICE__ size_t elemSize() const { return elem_size; }
            __CV_CUDA_HOST_DEVICE__ operator       T*()       { return data; }
            __CV_CUDA_HOST_DEVICE__ operator const T*() const { return data; }
        };

        template <typename T> struct PtrSz : public DevPtr<T>
        {
            __CV_CUDA_HOST_DEVICE__ PtrSz() : size(0) {}
            __CV_CUDA_HOST_DEVICE__ PtrSz(T* data_, size_t size_) : DevPtr<T>(data_), size(size_) {}

            size_t size;
        };

        template <typename T> struct PtrStep : public DevPtr<T>
        {
            __CV_CUDA_HOST_DEVICE__ PtrStep() : step(0) {}
            __CV_CUDA_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr<T>(data_), step(step_) {}

            size_t step;

            __CV_CUDA_HOST_DEVICE__       T* ptr(int y = 0)       { return (      T*)( (      char*)(((DevPtr<T>*)this)->data) + y * step); }
            __CV_CUDA_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)(((DevPtr<T>*)this)->data) + y * step); }

            __CV_CUDA_HOST_DEVICE__       T& operator ()(int y, int x)       { return ptr(y)[x]; }
            __CV_CUDA_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
        };

        template <typename T> struct PtrStepSz : public PtrStep<T>
        {
            __CV_CUDA_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {}
            __CV_CUDA_HOST_DEVICE__ PtrStepSz(int rows_, int cols_, T* data_, size_t step_)
                : PtrStep<T>(data_, step_), cols(cols_), rows(rows_) {}

            template <typename U>
            explicit PtrStepSz(const PtrStepSz<U>& d) : PtrStep<T>((T*)d.data, d.step), cols(d.cols), rows(d.rows){}

            int cols;
            int rows;
        };

        typedef PtrStepSz<unsigned char> PtrStepSzb;
        typedef PtrStepSz<unsigned short> PtrStepSzus;
        typedef PtrStepSz<float> PtrStepSzf;
        typedef PtrStepSz<int> PtrStepSzi;

        typedef PtrStep<unsigned char> PtrStepb;
        typedef PtrStep<unsigned short> PtrStepus;
        typedef PtrStep<float> PtrStepf;
        typedef PtrStep<int> PtrStepi;

    }
 }

 //! @endcond

 #endif /* OPENCV_CORE_CUDA_TYPES_HPP */
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cv_cpu_dispatch.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cv_cpu_dispatch.h
@@ -0,0 +1,345 @@
 // This file is part of OpenCV project.
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.

 #if defined __OPENCV_BUILD \

 #include "cv_cpu_config.h"
 #include "cv_cpu_helper.h"

 #ifdef CV_CPU_DISPATCH_MODE
 #define CV_CPU_OPTIMIZATION_NAMESPACE __CV_CAT(opt_, CV_CPU_DISPATCH_MODE)
 #define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace __CV_CAT(opt_, CV_CPU_DISPATCH_MODE) {
 #define CV_CPU_OPTIMIZATION_NAMESPACE_END }
 #else
 #define CV_CPU_OPTIMIZATION_NAMESPACE cpu_baseline
 #define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace cpu_baseline {
 #define CV_CPU_OPTIMIZATION_NAMESPACE_END }
 #define CV_CPU_BASELINE_MODE 1
 #endif


 #define __CV_CPU_DISPATCH_CHAIN_END(fn, args, mode, ...)  /* done */
 #define __CV_CPU_DISPATCH(fn, args, mode, ...) __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
 #define __CV_CPU_DISPATCH_EXPAND(fn, args, ...) __CV_EXPAND(__CV_CPU_DISPATCH(fn, args, __VA_ARGS__))
 #define CV_CPU_DISPATCH(fn, args, ...) __CV_CPU_DISPATCH_EXPAND(fn, args, __VA_ARGS__, END) // expand macros


 #if defined CV_ENABLE_INTRINSICS \
    && !defined CV_DISABLE_OPTIMIZATION \
    && !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */ \

 #ifdef CV_CPU_COMPILE_SSE2
 #  include <emmintrin.h>
 #  define CV_MMX 1
 #  define CV_SSE 1
 #  define CV_SSE2 1
 #endif
 #ifdef CV_CPU_COMPILE_SSE3
 #  include <pmmintrin.h>
 #  define CV_SSE3 1
 #endif
 #ifdef CV_CPU_COMPILE_SSSE3
 #  include <tmmintrin.h>
 #  define CV_SSSE3 1
 #endif
 #ifdef CV_CPU_COMPILE_SSE4_1
 #  include <smmintrin.h>
 #  define CV_SSE4_1 1
 #endif
 #ifdef CV_CPU_COMPILE_SSE4_2
 #  include <nmmintrin.h>
 #  define CV_SSE4_2 1
 #endif
 #ifdef CV_CPU_COMPILE_POPCNT
 #  ifdef _MSC_VER
 #    include <nmmintrin.h>
 #    if defined(_M_X64)
 #      define CV_POPCNT_U64 _mm_popcnt_u64
 #    endif
 #    define CV_POPCNT_U32 _mm_popcnt_u32
 #  else
 #    include <popcntintrin.h>
 #    if defined(__x86_64__)
 #      define CV_POPCNT_U64 __builtin_popcountll
 #    endif
 #    define CV_POPCNT_U32 __builtin_popcount
 #  endif
 #  define CV_POPCNT 1
 #endif
 #ifdef CV_CPU_COMPILE_AVX
 #  include <immintrin.h>
 #  define CV_AVX 1
 #endif
 #ifdef CV_CPU_COMPILE_FP16
 #  if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64)
 #    include <arm_neon.h>
 #  else
 #    include <immintrin.h>
 #  endif
 #  define CV_FP16 1
 #endif
 #ifdef CV_CPU_COMPILE_AVX2
 #  include <immintrin.h>
 #  define CV_AVX2 1
 #endif
 #ifdef CV_CPU_COMPILE_AVX_512F
 #  include <immintrin.h>
 #  define CV_AVX_512F 1
 #endif
 #ifdef CV_CPU_COMPILE_AVX512_COMMON
 #  define CV_AVX512_COMMON 1
 #  define CV_AVX_512CD 1
 #endif
 #ifdef CV_CPU_COMPILE_AVX512_KNL
 #  define CV_AVX512_KNL 1
 #  define CV_AVX_512ER 1
 #  define CV_AVX_512PF 1
 #endif
 #ifdef CV_CPU_COMPILE_AVX512_KNM
 #  define CV_AVX512_KNM 1
 #  define CV_AVX_5124FMAPS 1
 #  define CV_AVX_5124VNNIW 1
 #  define CV_AVX_512VPOPCNTDQ 1
 #endif
 #ifdef CV_CPU_COMPILE_AVX512_SKX
 #  define CV_AVX512_SKX 1
 #  define CV_AVX_512VL 1
 #  define CV_AVX_512BW 1
 #  define CV_AVX_512DQ 1
 #endif
 #ifdef CV_CPU_COMPILE_AVX512_CNL
 #  define CV_AVX512_CNL 1
 #  define CV_AVX_512IFMA 1
 #  define CV_AVX_512VBMI 1
 #endif
 #ifdef CV_CPU_COMPILE_AVX512_CLX
 #  define CV_AVX512_CLX 1
 #  define CV_AVX_512VNNI 1
 #endif
 #ifdef CV_CPU_COMPILE_AVX512_ICL
 #  define CV_AVX512_ICL 1
 #  undef CV_AVX_512IFMA
 #  define CV_AVX_512IFMA 1
 #  undef CV_AVX_512VBMI
 #  define CV_AVX_512VBMI 1
 #  undef CV_AVX_512VNNI
 #  define CV_AVX_512VNNI 1
 #  define CV_AVX_512VBMI2 1
 #  define CV_AVX_512BITALG 1
 #  define CV_AVX_512VPOPCNTDQ 1
 #endif
 #ifdef CV_CPU_COMPILE_FMA3
 #  define CV_FMA3 1
 #endif

 #if defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64)) && (defined(CV_CPU_COMPILE_NEON) || !defined(_MSC_VER))
 # include <Intrin.h>
 # include <arm_neon.h>
 # define CV_NEON 1
 #elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
 #  include <arm_neon.h>
 #  define CV_NEON 1
 #endif

 #if defined(__ARM_NEON__) || defined(__aarch64__)
 #  include <arm_neon.h>
 #endif

 #ifdef CV_CPU_COMPILE_VSX
 #  include <altivec.h>
 #  undef vector
 #  undef pixel
 #  undef bool
 #  define CV_VSX 1
 #endif

 #ifdef CV_CPU_COMPILE_VSX3
 #  define CV_VSX3 1
 #endif

 #ifdef CV_CPU_COMPILE_MSA
 #  include "hal/msa_macros.h"
 #  define CV_MSA 1
 #endif

 #ifdef __EMSCRIPTEN__
 #  define CV_WASM_SIMD 1
 #  include <wasm_simd128.h>
 #endif

 #endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__

 #if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX
 struct VZeroUpperGuard {
 #ifdef __GNUC__
    __attribute__((always_inline))
 #endif
    inline VZeroUpperGuard() { _mm256_zeroupper(); }
 #ifdef __GNUC__
    __attribute__((always_inline))
 #endif
    inline ~VZeroUpperGuard() { _mm256_zeroupper(); }
 };
 #define __CV_AVX_GUARD VZeroUpperGuard __vzeroupper_guard; CV_UNUSED(__vzeroupper_guard);
 #endif

 #ifdef __CV_AVX_GUARD
 #define CV_AVX_GUARD __CV_AVX_GUARD
 #else
 #define CV_AVX_GUARD
 #endif

 #endif // __OPENCV_BUILD



 #if !defined __OPENCV_BUILD /* Compatibility code */ \
    && !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */
 #if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
 #  include <emmintrin.h>
 #  define CV_MMX 1
 #  define CV_SSE 1
 #  define CV_SSE2 1
 #elif defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64)) && (defined(CV_CPU_COMPILE_NEON) || !defined(_MSC_VER))
 # include <Intrin.h>
 # include <arm_neon.h>
 # define CV_NEON 1
 #elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
 #  include <arm_neon.h>
 #  define CV_NEON 1
 #elif defined(__VSX__) && defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
 #  include <altivec.h>
 #  undef vector
 #  undef pixel
 #  undef bool
 #  define CV_VSX 1
 #endif

 #endif // !__OPENCV_BUILD && !__CUDACC (Compatibility code)



 #ifndef CV_MMX
 #  define CV_MMX 0
 #endif
 #ifndef CV_SSE
 #  define CV_SSE 0
 #endif
 #ifndef CV_SSE2
 #  define CV_SSE2 0
 #endif
 #ifndef CV_SSE3
 #  define CV_SSE3 0
 #endif
 #ifndef CV_SSSE3
 #  define CV_SSSE3 0
 #endif
 #ifndef CV_SSE4_1
 #  define CV_SSE4_1 0
 #endif
 #ifndef CV_SSE4_2
 #  define CV_SSE4_2 0
 #endif
 #ifndef CV_POPCNT
 #  define CV_POPCNT 0
 #endif
 #ifndef CV_AVX
 #  define CV_AVX 0
 #endif
 #ifndef CV_FP16
 #  define CV_FP16 0
 #endif
 #ifndef CV_AVX2
 #  define CV_AVX2 0
 #endif
 #ifndef CV_FMA3
 #  define CV_FMA3 0
 #endif
 #ifndef CV_AVX_512F
 #  define CV_AVX_512F 0
 #endif
 #ifndef CV_AVX_512BW
 #  define CV_AVX_512BW 0
 #endif
 #ifndef CV_AVX_512CD
 #  define CV_AVX_512CD 0
 #endif
 #ifndef CV_AVX_512DQ
 #  define CV_AVX_512DQ 0
 #endif
 #ifndef CV_AVX_512ER
 #  define CV_AVX_512ER 0
 #endif
 #ifndef CV_AVX_512IFMA
 #  define CV_AVX_512IFMA 0
 #endif
 #define CV_AVX_512IFMA512 CV_AVX_512IFMA // deprecated
 #ifndef CV_AVX_512PF
 #  define CV_AVX_512PF 0
 #endif
 #ifndef CV_AVX_512VBMI
 #  define CV_AVX_512VBMI 0
 #endif
 #ifndef CV_AVX_512VL
 #  define CV_AVX_512VL 0
 #endif
 #ifndef CV_AVX_5124FMAPS
 #  define CV_AVX_5124FMAPS 0
 #endif
 #ifndef CV_AVX_5124VNNIW
 #  define CV_AVX_5124VNNIW 0
 #endif
 #ifndef CV_AVX_512VPOPCNTDQ
 #  define CV_AVX_512VPOPCNTDQ 0
 #endif
 #ifndef CV_AVX_512VNNI
 #  define CV_AVX_512VNNI 0
 #endif
 #ifndef CV_AVX_512VBMI2
 #  define CV_AVX_512VBMI2 0
 #endif
 #ifndef CV_AVX_512BITALG
 #  define CV_AVX_512BITALG 0
 #endif
 #ifndef CV_AVX512_COMMON
 #  define CV_AVX512_COMMON 0
 #endif
 #ifndef CV_AVX512_KNL
 #  define CV_AVX512_KNL 0
 #endif
 #ifndef CV_AVX512_KNM
 #  define CV_AVX512_KNM 0
 #endif
 #ifndef CV_AVX512_SKX
 #  define CV_AVX512_SKX 0
 #endif
 #ifndef CV_AVX512_CNL
 #  define CV_AVX512_CNL 0
 #endif
 #ifndef CV_AVX512_CLX
 #  define CV_AVX512_CLX 0
 #endif
 #ifndef CV_AVX512_ICL
 #  define CV_AVX512_ICL 0
 #endif

 #ifndef CV_NEON
 #  define CV_NEON 0
 #endif

 #ifndef CV_VSX
 #  define CV_VSX 0
 #endif

 #ifndef CV_VSX3
 #  define CV_VSX3 0
 #endif

 #ifndef CV_MSA
 #  define CV_MSA 0
 #endif

 #ifndef CV_WASM_SIMD
 #  define CV_WASM_SIMD 0
 #endif
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cv_cpu_helper.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cv_cpu_helper.h
@@ -0,0 +1,487 @@
 // AUTOGENERATED, DO NOT EDIT

 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE
 #  define CV_TRY_SSE 1
 #  define CV_CPU_FORCE_SSE 1
 #  define CV_CPU_HAS_SUPPORT_SSE 1
 #  define CV_CPU_CALL_SSE(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_SSE_(fn, args) return (opt_SSE::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE
 #  define CV_TRY_SSE 1
 #  define CV_CPU_FORCE_SSE 0
 #  define CV_CPU_HAS_SUPPORT_SSE (cv::checkHardwareSupport(CV_CPU_SSE))
 #  define CV_CPU_CALL_SSE(fn, args) if (CV_CPU_HAS_SUPPORT_SSE) return (opt_SSE::fn args)
 #  define CV_CPU_CALL_SSE_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE) return (opt_SSE::fn args)
 #else
 #  define CV_TRY_SSE 0
 #  define CV_CPU_FORCE_SSE 0
 #  define CV_CPU_HAS_SUPPORT_SSE 0
 #  define CV_CPU_CALL_SSE(fn, args)
 #  define CV_CPU_CALL_SSE_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_SSE(fn, args, mode, ...)  CV_CPU_CALL_SSE(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))

 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE2
 #  define CV_TRY_SSE2 1
 #  define CV_CPU_FORCE_SSE2 1
 #  define CV_CPU_HAS_SUPPORT_SSE2 1
 #  define CV_CPU_CALL_SSE2(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_SSE2_(fn, args) return (opt_SSE2::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE2
 #  define CV_TRY_SSE2 1
 #  define CV_CPU_FORCE_SSE2 0
 #  define CV_CPU_HAS_SUPPORT_SSE2 (cv::checkHardwareSupport(CV_CPU_SSE2))
 #  define CV_CPU_CALL_SSE2(fn, args) if (CV_CPU_HAS_SUPPORT_SSE2) return (opt_SSE2::fn args)
 #  define CV_CPU_CALL_SSE2_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE2) return (opt_SSE2::fn args)
 #else
 #  define CV_TRY_SSE2 0
 #  define CV_CPU_FORCE_SSE2 0
 #  define CV_CPU_HAS_SUPPORT_SSE2 0
 #  define CV_CPU_CALL_SSE2(fn, args)
 #  define CV_CPU_CALL_SSE2_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_SSE2(fn, args, mode, ...)  CV_CPU_CALL_SSE2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))

 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE3
 #  define CV_TRY_SSE3 1
 #  define CV_CPU_FORCE_SSE3 1
 #  define CV_CPU_HAS_SUPPORT_SSE3 1
 #  define CV_CPU_CALL_SSE3(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_SSE3_(fn, args) return (opt_SSE3::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE3
 #  define CV_TRY_SSE3 1
 #  define CV_CPU_FORCE_SSE3 0
 #  define CV_CPU_HAS_SUPPORT_SSE3 (cv::checkHardwareSupport(CV_CPU_SSE3))
 #  define CV_CPU_CALL_SSE3(fn, args) if (CV_CPU_HAS_SUPPORT_SSE3) return (opt_SSE3::fn args)
 #  define CV_CPU_CALL_SSE3_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE3) return (opt_SSE3::fn args)
 #else
 #  define CV_TRY_SSE3 0
 #  define CV_CPU_FORCE_SSE3 0
 #  define CV_CPU_HAS_SUPPORT_SSE3 0
 #  define CV_CPU_CALL_SSE3(fn, args)
 #  define CV_CPU_CALL_SSE3_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_SSE3(fn, args, mode, ...)  CV_CPU_CALL_SSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))

 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSSE3
 #  define CV_TRY_SSSE3 1
 #  define CV_CPU_FORCE_SSSE3 1
 #  define CV_CPU_HAS_SUPPORT_SSSE3 1
 #  define CV_CPU_CALL_SSSE3(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_SSSE3_(fn, args) return (opt_SSSE3::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSSE3
 #  define CV_TRY_SSSE3 1
 #  define CV_CPU_FORCE_SSSE3 0
 #  define CV_CPU_HAS_SUPPORT_SSSE3 (cv::checkHardwareSupport(CV_CPU_SSSE3))
 #  define CV_CPU_CALL_SSSE3(fn, args) if (CV_CPU_HAS_SUPPORT_SSSE3) return (opt_SSSE3::fn args)
 #  define CV_CPU_CALL_SSSE3_(fn, args) if (CV_CPU_HAS_SUPPORT_SSSE3) return (opt_SSSE3::fn args)
 #else
 #  define CV_TRY_SSSE3 0
 #  define CV_CPU_FORCE_SSSE3 0
 #  define CV_CPU_HAS_SUPPORT_SSSE3 0
 #  define CV_CPU_CALL_SSSE3(fn, args)
 #  define CV_CPU_CALL_SSSE3_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_SSSE3(fn, args, mode, ...)  CV_CPU_CALL_SSSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))

 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_1
 #  define CV_TRY_SSE4_1 1
 #  define CV_CPU_FORCE_SSE4_1 1
 #  define CV_CPU_HAS_SUPPORT_SSE4_1 1
 #  define CV_CPU_CALL_SSE4_1(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_SSE4_1_(fn, args) return (opt_SSE4_1::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_1
 #  define CV_TRY_SSE4_1 1
 #  define CV_CPU_FORCE_SSE4_1 0
 #  define CV_CPU_HAS_SUPPORT_SSE4_1 (cv::checkHardwareSupport(CV_CPU_SSE4_1))
 #  define CV_CPU_CALL_SSE4_1(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_1) return (opt_SSE4_1::fn args)
 #  define CV_CPU_CALL_SSE4_1_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_1) return (opt_SSE4_1::fn args)
 #else
 #  define CV_TRY_SSE4_1 0
 #  define CV_CPU_FORCE_SSE4_1 0
 #  define CV_CPU_HAS_SUPPORT_SSE4_1 0
 #  define CV_CPU_CALL_SSE4_1(fn, args)
 #  define CV_CPU_CALL_SSE4_1_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_SSE4_1(fn, args, mode, ...)  CV_CPU_CALL_SSE4_1(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))

 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_2
 #  define CV_TRY_SSE4_2 1
 #  define CV_CPU_FORCE_SSE4_2 1
 #  define CV_CPU_HAS_SUPPORT_SSE4_2 1
 #  define CV_CPU_CALL_SSE4_2(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_SSE4_2_(fn, args) return (opt_SSE4_2::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_2
 #  define CV_TRY_SSE4_2 1
 #  define CV_CPU_FORCE_SSE4_2 0
 #  define CV_CPU_HAS_SUPPORT_SSE4_2 (cv::checkHardwareSupport(CV_CPU_SSE4_2))
 #  define CV_CPU_CALL_SSE4_2(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_2) return (opt_SSE4_2::fn args)
 #  define CV_CPU_CALL_SSE4_2_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_2) return (opt_SSE4_2::fn args)
 #else
 #  define CV_TRY_SSE4_2 0
 #  define CV_CPU_FORCE_SSE4_2 0
 #  define CV_CPU_HAS_SUPPORT_SSE4_2 0
 #  define CV_CPU_CALL_SSE4_2(fn, args)
 #  define CV_CPU_CALL_SSE4_2_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_SSE4_2(fn, args, mode, ...)  CV_CPU_CALL_SSE4_2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))

 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_POPCNT
 #  define CV_TRY_POPCNT 1
 #  define CV_CPU_FORCE_POPCNT 1
 #  define CV_CPU_HAS_SUPPORT_POPCNT 1
 #  define CV_CPU_CALL_POPCNT(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_POPCNT_(fn, args) return (opt_POPCNT::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_POPCNT
 #  define CV_TRY_POPCNT 1
 #  define CV_CPU_FORCE_POPCNT 0
 #  define CV_CPU_HAS_SUPPORT_POPCNT (cv::checkHardwareSupport(CV_CPU_POPCNT))
 #  define CV_CPU_CALL_POPCNT(fn, args) if (CV_CPU_HAS_SUPPORT_POPCNT) return (opt_POPCNT::fn args)
 #  define CV_CPU_CALL_POPCNT_(fn, args) if (CV_CPU_HAS_SUPPORT_POPCNT) return (opt_POPCNT::fn args)
 #else
 #  define CV_TRY_POPCNT 0
 #  define CV_CPU_FORCE_POPCNT 0
 #  define CV_CPU_HAS_SUPPORT_POPCNT 0
 #  define CV_CPU_CALL_POPCNT(fn, args)
 #  define CV_CPU_CALL_POPCNT_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_POPCNT(fn, args, mode, ...)  CV_CPU_CALL_POPCNT(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))

 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX
 #  define CV_TRY_AVX 1
 #  define CV_CPU_FORCE_AVX 1
 #  define CV_CPU_HAS_SUPPORT_AVX 1
 #  define CV_CPU_CALL_AVX(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_AVX_(fn, args) return (opt_AVX::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX
 #  define CV_TRY_AVX 1
 #  define CV_CPU_FORCE_AVX 0
 #  define CV_CPU_HAS_SUPPORT_AVX (cv::checkHardwareSupport(CV_CPU_AVX))
 #  define CV_CPU_CALL_AVX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX) return (opt_AVX::fn args)
 #  define CV_CPU_CALL_AVX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX) return (opt_AVX::fn args)
 #else
 #  define CV_TRY_AVX 0
 #  define CV_CPU_FORCE_AVX 0
 #  define CV_CPU_HAS_SUPPORT_AVX 0
 #  define CV_CPU_CALL_AVX(fn, args)
 #  define CV_CPU_CALL_AVX_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_AVX(fn, args, mode, ...)  CV_CPU_CALL_AVX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))

 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FP16
 #  define CV_TRY_FP16 1
 #  define CV_CPU_FORCE_FP16 1
 #  define CV_CPU_HAS_SUPPORT_FP16 1
 #  define CV_CPU_CALL_FP16(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_FP16_(fn, args) return (opt_FP16::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FP16
 #  define CV_TRY_FP16 1
 #  define CV_CPU_FORCE_FP16 0
 #  define CV_CPU_HAS_SUPPORT_FP16 (cv::checkHardwareSupport(CV_CPU_FP16))
 #  define CV_CPU_CALL_FP16(fn, args) if (CV_CPU_HAS_SUPPORT_FP16) return (opt_FP16::fn args)
 #  define CV_CPU_CALL_FP16_(fn, args) if (CV_CPU_HAS_SUPPORT_FP16) return (opt_FP16::fn args)
 #else
 #  define CV_TRY_FP16 0
 #  define CV_CPU_FORCE_FP16 0
 #  define CV_CPU_HAS_SUPPORT_FP16 0
 #  define CV_CPU_CALL_FP16(fn, args)
 #  define CV_CPU_CALL_FP16_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_FP16(fn, args, mode, ...)  CV_CPU_CALL_FP16(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))

 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX2
 #  define CV_TRY_AVX2 1
 #  define CV_CPU_FORCE_AVX2 1
 #  define CV_CPU_HAS_SUPPORT_AVX2 1
 #  define CV_CPU_CALL_AVX2(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_AVX2_(fn, args) return (opt_AVX2::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX2
 #  define CV_TRY_AVX2 1
 #  define CV_CPU_FORCE_AVX2 0
 #  define CV_CPU_HAS_SUPPORT_AVX2 (cv::checkHardwareSupport(CV_CPU_AVX2))
 #  define CV_CPU_CALL_AVX2(fn, args) if (CV_CPU_HAS_SUPPORT_AVX2) return (opt_AVX2::fn args)
 #  define CV_CPU_CALL_AVX2_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX2) return (opt_AVX2::fn args)
 #else
 #  define CV_TRY_AVX2 0
 #  define CV_CPU_FORCE_AVX2 0
 #  define CV_CPU_HAS_SUPPORT_AVX2 0
 #  define CV_CPU_CALL_AVX2(fn, args)
 #  define CV_CPU_CALL_AVX2_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_AVX2(fn, args, mode, ...)  CV_CPU_CALL_AVX2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))

 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FMA3
 #  define CV_TRY_FMA3 1
 #  define CV_CPU_FORCE_FMA3 1
 #  define CV_CPU_HAS_SUPPORT_FMA3 1
 #  define CV_CPU_CALL_FMA3(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_FMA3_(fn, args) return (opt_FMA3::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FMA3
 #  define CV_TRY_FMA3 1
 #  define CV_CPU_FORCE_FMA3 0
 #  define CV_CPU_HAS_SUPPORT_FMA3 (cv::checkHardwareSupport(CV_CPU_FMA3))
 #  define CV_CPU_CALL_FMA3(fn, args) if (CV_CPU_HAS_SUPPORT_FMA3) return (opt_FMA3::fn args)
 #  define CV_CPU_CALL_FMA3_(fn, args) if (CV_CPU_HAS_SUPPORT_FMA3) return (opt_FMA3::fn args)
 #else
 #  define CV_TRY_FMA3 0
 #  define CV_CPU_FORCE_FMA3 0
 #  define CV_CPU_HAS_SUPPORT_FMA3 0
 #  define CV_CPU_CALL_FMA3(fn, args)
 #  define CV_CPU_CALL_FMA3_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_FMA3(fn, args, mode, ...)  CV_CPU_CALL_FMA3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))

 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX_512F
 #  define CV_TRY_AVX_512F 1
 #  define CV_CPU_FORCE_AVX_512F 1
 #  define CV_CPU_HAS_SUPPORT_AVX_512F 1
 #  define CV_CPU_CALL_AVX_512F(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_AVX_512F_(fn, args) return (opt_AVX_512F::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX_512F
 #  define CV_TRY_AVX_512F 1
 #  define CV_CPU_FORCE_AVX_512F 0
 #  define CV_CPU_HAS_SUPPORT_AVX_512F (cv::checkHardwareSupport(CV_CPU_AVX_512F))
 #  define CV_CPU_CALL_AVX_512F(fn, args) if (CV_CPU_HAS_SUPPORT_AVX_512F) return (opt_AVX_512F::fn args)
 #  define CV_CPU_CALL_AVX_512F_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX_512F) return (opt_AVX_512F::fn args)
 #else
 #  define CV_TRY_AVX_512F 0
 #  define CV_CPU_FORCE_AVX_512F 0
 #  define CV_CPU_HAS_SUPPORT_AVX_512F 0
 #  define CV_CPU_CALL_AVX_512F(fn, args)
 #  define CV_CPU_CALL_AVX_512F_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_AVX_512F(fn, args, mode, ...)  CV_CPU_CALL_AVX_512F(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))

 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_COMMON
 #  define CV_TRY_AVX512_COMMON 1
 #  define CV_CPU_FORCE_AVX512_COMMON 1
 #  define CV_CPU_HAS_SUPPORT_AVX512_COMMON 1
 #  define CV_CPU_CALL_AVX512_COMMON(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_AVX512_COMMON_(fn, args) return (opt_AVX512_COMMON::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_COMMON
 #  define CV_TRY_AVX512_COMMON 1
 #  define CV_CPU_FORCE_AVX512_COMMON 0
 #  define CV_CPU_HAS_SUPPORT_AVX512_COMMON (cv::checkHardwareSupport(CV_CPU_AVX512_COMMON))
 #  define CV_CPU_CALL_AVX512_COMMON(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_COMMON) return (opt_AVX512_COMMON::fn args)
 #  define CV_CPU_CALL_AVX512_COMMON_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_COMMON) return (opt_AVX512_COMMON::fn args)
 #else
 #  define CV_TRY_AVX512_COMMON 0
 #  define CV_CPU_FORCE_AVX512_COMMON 0
 #  define CV_CPU_HAS_SUPPORT_AVX512_COMMON 0
 #  define CV_CPU_CALL_AVX512_COMMON(fn, args)
 #  define CV_CPU_CALL_AVX512_COMMON_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_AVX512_COMMON(fn, args, mode, ...)  CV_CPU_CALL_AVX512_COMMON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))

 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_KNL
 #  define CV_TRY_AVX512_KNL 1
 #  define CV_CPU_FORCE_AVX512_KNL 1
 #  define CV_CPU_HAS_SUPPORT_AVX512_KNL 1
 #  define CV_CPU_CALL_AVX512_KNL(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_AVX512_KNL_(fn, args) return (opt_AVX512_KNL::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_KNL
 #  define CV_TRY_AVX512_KNL 1
 #  define CV_CPU_FORCE_AVX512_KNL 0
 #  define CV_CPU_HAS_SUPPORT_AVX512_KNL (cv::checkHardwareSupport(CV_CPU_AVX512_KNL))
 #  define CV_CPU_CALL_AVX512_KNL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNL) return (opt_AVX512_KNL::fn args)
 #  define CV_CPU_CALL_AVX512_KNL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNL) return (opt_AVX512_KNL::fn args)
 #else
 #  define CV_TRY_AVX512_KNL 0
 #  define CV_CPU_FORCE_AVX512_KNL 0
 #  define CV_CPU_HAS_SUPPORT_AVX512_KNL 0
 #  define CV_CPU_CALL_AVX512_KNL(fn, args)
 #  define CV_CPU_CALL_AVX512_KNL_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_AVX512_KNL(fn, args, mode, ...)  CV_CPU_CALL_AVX512_KNL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))

 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_KNM
 #  define CV_TRY_AVX512_KNM 1
 #  define CV_CPU_FORCE_AVX512_KNM 1
 #  define CV_CPU_HAS_SUPPORT_AVX512_KNM 1
 #  define CV_CPU_CALL_AVX512_KNM(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_AVX512_KNM_(fn, args) return (opt_AVX512_KNM::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_KNM
 #  define CV_TRY_AVX512_KNM 1
 #  define CV_CPU_FORCE_AVX512_KNM 0
 #  define CV_CPU_HAS_SUPPORT_AVX512_KNM (cv::checkHardwareSupport(CV_CPU_AVX512_KNM))
 #  define CV_CPU_CALL_AVX512_KNM(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNM) return (opt_AVX512_KNM::fn args)
 #  define CV_CPU_CALL_AVX512_KNM_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNM) return (opt_AVX512_KNM::fn args)
 #else
 #  define CV_TRY_AVX512_KNM 0
 #  define CV_CPU_FORCE_AVX512_KNM 0
 #  define CV_CPU_HAS_SUPPORT_AVX512_KNM 0
 #  define CV_CPU_CALL_AVX512_KNM(fn, args)
 #  define CV_CPU_CALL_AVX512_KNM_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_AVX512_KNM(fn, args, mode, ...)  CV_CPU_CALL_AVX512_KNM(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))

 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_SKX
 #  define CV_TRY_AVX512_SKX 1
 #  define CV_CPU_FORCE_AVX512_SKX 1
 #  define CV_CPU_HAS_SUPPORT_AVX512_SKX 1
 #  define CV_CPU_CALL_AVX512_SKX(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_AVX512_SKX_(fn, args) return (opt_AVX512_SKX::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_SKX
 #  define CV_TRY_AVX512_SKX 1
 #  define CV_CPU_FORCE_AVX512_SKX 0
 #  define CV_CPU_HAS_SUPPORT_AVX512_SKX (cv::checkHardwareSupport(CV_CPU_AVX512_SKX))
 #  define CV_CPU_CALL_AVX512_SKX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_SKX) return (opt_AVX512_SKX::fn args)
 #  define CV_CPU_CALL_AVX512_SKX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_SKX) return (opt_AVX512_SKX::fn args)
 #else
 #  define CV_TRY_AVX512_SKX 0
 #  define CV_CPU_FORCE_AVX512_SKX 0
 #  define CV_CPU_HAS_SUPPORT_AVX512_SKX 0
 #  define CV_CPU_CALL_AVX512_SKX(fn, args)
 #  define CV_CPU_CALL_AVX512_SKX_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_AVX512_SKX(fn, args, mode, ...)  CV_CPU_CALL_AVX512_SKX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))

 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_CNL
 #  define CV_TRY_AVX512_CNL 1
 #  define CV_CPU_FORCE_AVX512_CNL 1
 #  define CV_CPU_HAS_SUPPORT_AVX512_CNL 1
 #  define CV_CPU_CALL_AVX512_CNL(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_AVX512_CNL_(fn, args) return (opt_AVX512_CNL::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_CNL
 #  define CV_TRY_AVX512_CNL 1
 #  define CV_CPU_FORCE_AVX512_CNL 0
 #  define CV_CPU_HAS_SUPPORT_AVX512_CNL (cv::checkHardwareSupport(CV_CPU_AVX512_CNL))
 #  define CV_CPU_CALL_AVX512_CNL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CNL) return (opt_AVX512_CNL::fn args)
 #  define CV_CPU_CALL_AVX512_CNL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CNL) return (opt_AVX512_CNL::fn args)
 #else
 #  define CV_TRY_AVX512_CNL 0
 #  define CV_CPU_FORCE_AVX512_CNL 0
 #  define CV_CPU_HAS_SUPPORT_AVX512_CNL 0
 #  define CV_CPU_CALL_AVX512_CNL(fn, args)
 #  define CV_CPU_CALL_AVX512_CNL_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_AVX512_CNL(fn, args, mode, ...)  CV_CPU_CALL_AVX512_CNL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))

 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_CLX
 #  define CV_TRY_AVX512_CLX 1
 #  define CV_CPU_FORCE_AVX512_CLX 1
 #  define CV_CPU_HAS_SUPPORT_AVX512_CLX 1
 #  define CV_CPU_CALL_AVX512_CLX(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_AVX512_CLX_(fn, args) return (opt_AVX512_CLX::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_CLX
 #  define CV_TRY_AVX512_CLX 1
 #  define CV_CPU_FORCE_AVX512_CLX 0
 #  define CV_CPU_HAS_SUPPORT_AVX512_CLX (cv::checkHardwareSupport(CV_CPU_AVX512_CLX))
 #  define CV_CPU_CALL_AVX512_CLX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CLX) return (opt_AVX512_CLX::fn args)
 #  define CV_CPU_CALL_AVX512_CLX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CLX) return (opt_AVX512_CLX::fn args)
 #else
 #  define CV_TRY_AVX512_CLX 0
 #  define CV_CPU_FORCE_AVX512_CLX 0
 #  define CV_CPU_HAS_SUPPORT_AVX512_CLX 0
 #  define CV_CPU_CALL_AVX512_CLX(fn, args)
 #  define CV_CPU_CALL_AVX512_CLX_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_AVX512_CLX(fn, args, mode, ...)  CV_CPU_CALL_AVX512_CLX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))

 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_ICL
 #  define CV_TRY_AVX512_ICL 1
 #  define CV_CPU_FORCE_AVX512_ICL 1
 #  define CV_CPU_HAS_SUPPORT_AVX512_ICL 1
 #  define CV_CPU_CALL_AVX512_ICL(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_AVX512_ICL_(fn, args) return (opt_AVX512_ICL::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_ICL
 #  define CV_TRY_AVX512_ICL 1
 #  define CV_CPU_FORCE_AVX512_ICL 0
 #  define CV_CPU_HAS_SUPPORT_AVX512_ICL (cv::checkHardwareSupport(CV_CPU_AVX512_ICL))
 #  define CV_CPU_CALL_AVX512_ICL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_ICL) return (opt_AVX512_ICL::fn args)
 #  define CV_CPU_CALL_AVX512_ICL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_ICL) return (opt_AVX512_ICL::fn args)
 #else
 #  define CV_TRY_AVX512_ICL 0
 #  define CV_CPU_FORCE_AVX512_ICL 0
 #  define CV_CPU_HAS_SUPPORT_AVX512_ICL 0
 #  define CV_CPU_CALL_AVX512_ICL(fn, args)
 #  define CV_CPU_CALL_AVX512_ICL_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_AVX512_ICL(fn, args, mode, ...)  CV_CPU_CALL_AVX512_ICL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))

 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON
 #  define CV_TRY_NEON 1
 #  define CV_CPU_FORCE_NEON 1
 #  define CV_CPU_HAS_SUPPORT_NEON 1
 #  define CV_CPU_CALL_NEON(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_NEON_(fn, args) return (opt_NEON::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON
 #  define CV_TRY_NEON 1
 #  define CV_CPU_FORCE_NEON 0
 #  define CV_CPU_HAS_SUPPORT_NEON (cv::checkHardwareSupport(CV_CPU_NEON))
 #  define CV_CPU_CALL_NEON(fn, args) if (CV_CPU_HAS_SUPPORT_NEON) return (opt_NEON::fn args)
 #  define CV_CPU_CALL_NEON_(fn, args) if (CV_CPU_HAS_SUPPORT_NEON) return (opt_NEON::fn args)
 #else
 #  define CV_TRY_NEON 0
 #  define CV_CPU_FORCE_NEON 0
 #  define CV_CPU_HAS_SUPPORT_NEON 0
 #  define CV_CPU_CALL_NEON(fn, args)
 #  define CV_CPU_CALL_NEON_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_NEON(fn, args, mode, ...)  CV_CPU_CALL_NEON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))

 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_MSA
 #  define CV_TRY_MSA 1
 #  define CV_CPU_FORCE_MSA 1
 #  define CV_CPU_HAS_SUPPORT_MSA 1
 #  define CV_CPU_CALL_MSA(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_MSA_(fn, args) return (opt_MSA::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_MSA
 #  define CV_TRY_MSA 1
 #  define CV_CPU_FORCE_MSA 0
 #  define CV_CPU_HAS_SUPPORT_MSA (cv::checkHardwareSupport(CV_CPU_MSA))
 #  define CV_CPU_CALL_MSA(fn, args) if (CV_CPU_HAS_SUPPORT_MSA) return (opt_MSA::fn args)
 #  define CV_CPU_CALL_MSA_(fn, args) if (CV_CPU_HAS_SUPPORT_MSA) return (opt_MSA::fn args)
 #else
 #  define CV_TRY_MSA 0
 #  define CV_CPU_FORCE_MSA 0
 #  define CV_CPU_HAS_SUPPORT_MSA 0
 #  define CV_CPU_CALL_MSA(fn, args)
 #  define CV_CPU_CALL_MSA_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_MSA(fn, args, mode, ...)  CV_CPU_CALL_MSA(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))

 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX
 #  define CV_TRY_VSX 1
 #  define CV_CPU_FORCE_VSX 1
 #  define CV_CPU_HAS_SUPPORT_VSX 1
 #  define CV_CPU_CALL_VSX(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_VSX_(fn, args) return (opt_VSX::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_VSX
 #  define CV_TRY_VSX 1
 #  define CV_CPU_FORCE_VSX 0
 #  define CV_CPU_HAS_SUPPORT_VSX (cv::checkHardwareSupport(CV_CPU_VSX))
 #  define CV_CPU_CALL_VSX(fn, args) if (CV_CPU_HAS_SUPPORT_VSX) return (opt_VSX::fn args)
 #  define CV_CPU_CALL_VSX_(fn, args) if (CV_CPU_HAS_SUPPORT_VSX) return (opt_VSX::fn args)
 #else
 #  define CV_TRY_VSX 0
 #  define CV_CPU_FORCE_VSX 0
 #  define CV_CPU_HAS_SUPPORT_VSX 0
 #  define CV_CPU_CALL_VSX(fn, args)
 #  define CV_CPU_CALL_VSX_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_VSX(fn, args, mode, ...)  CV_CPU_CALL_VSX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))

 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX3
 #  define CV_TRY_VSX3 1
 #  define CV_CPU_FORCE_VSX3 1
 #  define CV_CPU_HAS_SUPPORT_VSX3 1
 #  define CV_CPU_CALL_VSX3(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_VSX3_(fn, args) return (opt_VSX3::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_VSX3
 #  define CV_TRY_VSX3 1
 #  define CV_CPU_FORCE_VSX3 0
 #  define CV_CPU_HAS_SUPPORT_VSX3 (cv::checkHardwareSupport(CV_CPU_VSX3))
 #  define CV_CPU_CALL_VSX3(fn, args) if (CV_CPU_HAS_SUPPORT_VSX3) return (opt_VSX3::fn args)
 #  define CV_CPU_CALL_VSX3_(fn, args) if (CV_CPU_HAS_SUPPORT_VSX3) return (opt_VSX3::fn args)
 #else
 #  define CV_TRY_VSX3 0
 #  define CV_CPU_FORCE_VSX3 0
 #  define CV_CPU_HAS_SUPPORT_VSX3 0
 #  define CV_CPU_CALL_VSX3(fn, args)
 #  define CV_CPU_CALL_VSX3_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_VSX3(fn, args, mode, ...)  CV_CPU_CALL_VSX3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))

 #define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args)
 #define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...)  CV_CPU_CALL_BASELINE(fn, args) /* last in sequence */
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cvdef.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cvdef.h
@@ -0,0 +1,920 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Copyright (C) 2015, Itseez Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CORE_CVDEF_H
 #define OPENCV_CORE_CVDEF_H

 //! @addtogroup core_utils
 //! @{

 #ifdef OPENCV_INCLUDE_PORT_FILE  // User-provided header file with custom platform configuration
 #include OPENCV_INCLUDE_PORT_FILE
 #endif

 #if !defined CV_DOXYGEN && !defined CV_IGNORE_DEBUG_BUILD_GUARD
 #if (defined(_MSC_VER) && (defined(DEBUG) || defined(_DEBUG))) || \
    (defined(_GLIBCXX_DEBUG) || defined(_GLIBCXX_DEBUG_PEDANTIC))
 // Guard to prevent using of binary incompatible binaries / runtimes
 // https://github.com/opencv/opencv/pull/9161
 #define CV__DEBUG_NS_BEGIN namespace debug_build_guard {
 #define CV__DEBUG_NS_END }
 namespace cv { namespace debug_build_guard { } using namespace debug_build_guard; }
 #endif
 #endif

 #ifndef CV__DEBUG_NS_BEGIN
 #define CV__DEBUG_NS_BEGIN
 #define CV__DEBUG_NS_END
 #endif


 #ifdef __OPENCV_BUILD
 #include "cvconfig.h"
 #endif

 #ifndef __CV_EXPAND
 #define __CV_EXPAND(x) x
 #endif

 #ifndef __CV_CAT
 #define __CV_CAT__(x, y) x ## y
 #define __CV_CAT_(x, y) __CV_CAT__(x, y)
 #define __CV_CAT(x, y) __CV_CAT_(x, y)
 #endif

 #define __CV_VA_NUM_ARGS_HELPER(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, N, ...) N
 #define __CV_VA_NUM_ARGS(...) __CV_EXPAND(__CV_VA_NUM_ARGS_HELPER(__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0))

 #ifdef CV_Func
 // keep current value (through OpenCV port file)
 #elif defined __GNUC__ || (defined (__cpluscplus) && (__cpluscplus >= 201103))
 #define CV_Func __func__
 #elif defined __clang__ && (__clang_minor__ * 100 + __clang_major >= 305)
 #define CV_Func __func__
 #elif defined(__STDC_VERSION__) && (__STDC_VERSION >= 199901)
 #define CV_Func __func__
 #elif defined _MSC_VER
 #define CV_Func __FUNCTION__
 #elif defined(__INTEL_COMPILER) && (_INTEL_COMPILER >= 600)
 #define CV_Func __FUNCTION__
 #elif defined __IBMCPP__ && __IBMCPP__ >=500
 #define CV_Func __FUNCTION__
 #elif defined __BORLAND__ && (__BORLANDC__ >= 0x550)
 #define CV_Func __FUNC__
 #else
 #define CV_Func "<unknown>"
 #endif

 //! @cond IGNORED

 //////////////// static assert /////////////////
 #define CVAUX_CONCAT_EXP(a, b) a##b
 #define CVAUX_CONCAT(a, b) CVAUX_CONCAT_EXP(a,b)

 #if defined(__clang__)
 #  ifndef __has_extension
 #    define __has_extension __has_feature /* compatibility, for older versions of clang */
 #  endif
 #  if __has_extension(cxx_static_assert)
 #    define CV_StaticAssert(condition, reason)    static_assert((condition), reason " " #condition)
 #  elif __has_extension(c_static_assert)
 #    define CV_StaticAssert(condition, reason)    _Static_assert((condition), reason " " #condition)
 #  endif
 #elif defined(__GNUC__)
 #  if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L)
 #    define CV_StaticAssert(condition, reason)    static_assert((condition), reason " " #condition)
 #  endif
 #elif defined(_MSC_VER)
 #  if _MSC_VER >= 1600 /* MSVC 10 */
 #    define CV_StaticAssert(condition, reason)    static_assert((condition), reason " " #condition)
 #  endif
 #endif
 #ifndef CV_StaticAssert
 #  if !defined(__clang__) && defined(__GNUC__) && (__GNUC__*100 + __GNUC_MINOR__ > 302)
 #    define CV_StaticAssert(condition, reason) ({ extern int __attribute__((error("CV_StaticAssert: " reason " " #condition))) CV_StaticAssert(); ((condition) ? 0 : CV_StaticAssert()); })
 #  else
 namespace cv {
     template <bool x> struct CV_StaticAssert_failed;
     template <> struct CV_StaticAssert_failed<true> { enum { val = 1 }; };
     template<int x> struct CV_StaticAssert_test {};
 }
 #    define CV_StaticAssert(condition, reason)\
       typedef cv::CV_StaticAssert_test< sizeof(cv::CV_StaticAssert_failed< static_cast<bool>(condition) >) > CVAUX_CONCAT(CV_StaticAssert_failed_at_, __LINE__)
 #  endif
 #endif

 // Suppress warning "-Wdeprecated-declarations" / C4996
 #if defined(_MSC_VER)
    #define CV_DO_PRAGMA(x) __pragma(x)
 #elif defined(__GNUC__)
    #define CV_DO_PRAGMA(x) _Pragma (#x)
 #else
    #define CV_DO_PRAGMA(x)
 #endif

 #ifdef _MSC_VER
 #define CV_SUPPRESS_DEPRECATED_START \
    CV_DO_PRAGMA(warning(push)) \
    CV_DO_PRAGMA(warning(disable: 4996))
 #define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(warning(pop))
 #elif defined (__clang__) || ((__GNUC__)  && (__GNUC__*100 + __GNUC_MINOR__ > 405))
 #define CV_SUPPRESS_DEPRECATED_START \
    CV_DO_PRAGMA(GCC diagnostic push) \
    CV_DO_PRAGMA(GCC diagnostic ignored "-Wdeprecated-declarations")
 #define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(GCC diagnostic pop)
 #else
 #define CV_SUPPRESS_DEPRECATED_START
 #define CV_SUPPRESS_DEPRECATED_END
 #endif

 #define CV_UNUSED(name) (void)name

 //! @endcond

 // undef problematic defines sometimes defined by system headers (windows.h in particular)
 #undef small
 #undef min
 #undef max
 #undef abs
 #undef Complex

 #if defined __cplusplus
 #include <limits>
 #else
 #include <limits.h>
 #endif

 #include "opencv2/core/hal/interface.h"

 #if defined __ICL
 #  define CV_ICC   __ICL
 #elif defined __ICC
 #  define CV_ICC   __ICC
 #elif defined __ECL
 #  define CV_ICC   __ECL
 #elif defined __ECC
 #  define CV_ICC   __ECC
 #elif defined __INTEL_COMPILER
 #  define CV_ICC   __INTEL_COMPILER
 #endif

 #ifndef CV_INLINE
 #  if defined __cplusplus
 #    define CV_INLINE static inline
 #  elif defined _MSC_VER
 #    define CV_INLINE __inline
 #  else
 #    define CV_INLINE static
 #  endif
 #endif

 #ifndef CV_ALWAYS_INLINE
 #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
 #define CV_ALWAYS_INLINE inline __attribute__((always_inline))
 #elif defined(_MSC_VER)
 #define CV_ALWAYS_INLINE __forceinline
 #else
 #define CV_ALWAYS_INLINE inline
 #endif
 #endif

 #if defined CV_DISABLE_OPTIMIZATION || (defined CV_ICC && !defined CV_ENABLE_UNROLLED)
 #  define CV_ENABLE_UNROLLED 0
 #else
 #  define CV_ENABLE_UNROLLED 1
 #endif

 #ifdef __GNUC__
 #  define CV_DECL_ALIGNED(x) __attribute__ ((aligned (x)))
 #elif defined _MSC_VER
 #  define CV_DECL_ALIGNED(x) __declspec(align(x))
 #else
 #  define CV_DECL_ALIGNED(x)
 #endif

 /* CPU features and intrinsics support */
 #define CV_CPU_NONE             0
 #define CV_CPU_MMX              1
 #define CV_CPU_SSE              2
 #define CV_CPU_SSE2             3
 #define CV_CPU_SSE3             4
 #define CV_CPU_SSSE3            5
 #define CV_CPU_SSE4_1           6
 #define CV_CPU_SSE4_2           7
 #define CV_CPU_POPCNT           8
 #define CV_CPU_FP16             9
 #define CV_CPU_AVX              10
 #define CV_CPU_AVX2             11
 #define CV_CPU_FMA3             12

 #define CV_CPU_AVX_512F         13
 #define CV_CPU_AVX_512BW        14
 #define CV_CPU_AVX_512CD        15
 #define CV_CPU_AVX_512DQ        16
 #define CV_CPU_AVX_512ER        17
 #define CV_CPU_AVX_512IFMA512   18 // deprecated
 #define CV_CPU_AVX_512IFMA      18
 #define CV_CPU_AVX_512PF        19
 #define CV_CPU_AVX_512VBMI      20
 #define CV_CPU_AVX_512VL        21
 #define CV_CPU_AVX_512VBMI2     22
 #define CV_CPU_AVX_512VNNI      23
 #define CV_CPU_AVX_512BITALG    24
 #define CV_CPU_AVX_512VPOPCNTDQ 25
 #define CV_CPU_AVX_5124VNNIW    26
 #define CV_CPU_AVX_5124FMAPS    27

 #define CV_CPU_NEON             100

 #define CV_CPU_MSA              150

 #define CV_CPU_VSX              200
 #define CV_CPU_VSX3             201

 // CPU features groups
 #define CV_CPU_AVX512_SKX       256
 #define CV_CPU_AVX512_COMMON    257
 #define CV_CPU_AVX512_KNL       258
 #define CV_CPU_AVX512_KNM       259
 #define CV_CPU_AVX512_CNL       260
 #define CV_CPU_AVX512_CLX       261
 #define CV_CPU_AVX512_ICL       262

 // when adding to this list remember to update the following enum
 #define CV_HARDWARE_MAX_FEATURE 512

 /** @brief Available CPU features.
 */
 enum CpuFeatures {
    CPU_MMX             = 1,
    CPU_SSE             = 2,
    CPU_SSE2            = 3,
    CPU_SSE3            = 4,
    CPU_SSSE3           = 5,
    CPU_SSE4_1          = 6,
    CPU_SSE4_2          = 7,
    CPU_POPCNT          = 8,
    CPU_FP16            = 9,
    CPU_AVX             = 10,
    CPU_AVX2            = 11,
    CPU_FMA3            = 12,

    CPU_AVX_512F        = 13,
    CPU_AVX_512BW       = 14,
    CPU_AVX_512CD       = 15,
    CPU_AVX_512DQ       = 16,
    CPU_AVX_512ER       = 17,
    CPU_AVX_512IFMA512  = 18, // deprecated
    CPU_AVX_512IFMA     = 18,
    CPU_AVX_512PF       = 19,
    CPU_AVX_512VBMI     = 20,
    CPU_AVX_512VL       = 21,
    CPU_AVX_512VBMI2    = 22,
    CPU_AVX_512VNNI     = 23,
    CPU_AVX_512BITALG   = 24,
    CPU_AVX_512VPOPCNTDQ= 25,
    CPU_AVX_5124VNNIW   = 26,
    CPU_AVX_5124FMAPS   = 27,

    CPU_NEON            = 100,

    CPU_MSA             = 150,

    CPU_VSX             = 200,
    CPU_VSX3            = 201,

    CPU_AVX512_SKX      = 256, //!< Skylake-X with AVX-512F/CD/BW/DQ/VL
    CPU_AVX512_COMMON   = 257, //!< Common instructions AVX-512F/CD for all CPUs that support AVX-512
    CPU_AVX512_KNL      = 258, //!< Knights Landing with AVX-512F/CD/ER/PF
    CPU_AVX512_KNM      = 259, //!< Knights Mill with AVX-512F/CD/ER/PF/4FMAPS/4VNNIW/VPOPCNTDQ
    CPU_AVX512_CNL      = 260, //!< Cannon Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI
    CPU_AVX512_CLX      = 261, //!< Cascade Lake with AVX-512F/CD/BW/DQ/VL/VNNI
    CPU_AVX512_ICL      = 262, //!< Ice Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI/VNNI/VBMI2/BITALG/VPOPCNTDQ

    CPU_MAX_FEATURE     = 512  // see CV_HARDWARE_MAX_FEATURE
 };


 #include "cv_cpu_dispatch.h"

 #if !defined(CV_STRONG_ALIGNMENT) && defined(__arm__) && !(defined(__aarch64__) || defined(_M_ARM64))
 // int*, int64* should be propertly aligned pointers on ARMv7
 #define CV_STRONG_ALIGNMENT 1
 #endif
 #if !defined(CV_STRONG_ALIGNMENT)
 #define CV_STRONG_ALIGNMENT 0
 #endif

 /* fundamental constants */
 #define CV_PI   3.1415926535897932384626433832795
 #define CV_2PI  6.283185307179586476925286766559
 #define CV_LOG2 0.69314718055994530941723212145818

 #if defined __ARM_FP16_FORMAT_IEEE \
    && !defined __CUDACC__
 #  define CV_FP16_TYPE 1
 #else
 #  define CV_FP16_TYPE 0
 #endif

 typedef union Cv16suf
 {
    short i;
    ushort u;
 #if CV_FP16_TYPE
    __fp16 h;
 #endif
 }
 Cv16suf;

 typedef union Cv32suf
 {
    int i;
    unsigned u;
    float f;
 }
 Cv32suf;

 typedef union Cv64suf
 {
    int64 i;
    uint64 u;
    double f;
 }
 Cv64suf;

 #define OPENCV_ABI_COMPATIBILITY 400

 #ifdef __OPENCV_BUILD
 #  define DISABLE_OPENCV_3_COMPATIBILITY
 #  define OPENCV_DISABLE_DEPRECATED_COMPATIBILITY
 #endif

 #ifndef CV_EXPORTS
 # if (defined _WIN32 || defined WINCE || defined __CYGWIN__) && defined(CVAPI_EXPORTS)
 #   define CV_EXPORTS __declspec(dllexport)
 # elif defined __GNUC__ && __GNUC__ >= 4 && (defined(CVAPI_EXPORTS) || defined(__APPLE__))
 #   define CV_EXPORTS __attribute__ ((visibility ("default")))
 # endif
 #endif

 #ifndef CV_EXPORTS
 # define CV_EXPORTS
 #endif

 #ifdef _MSC_VER
 #   define CV_EXPORTS_TEMPLATE
 #else
 #   define CV_EXPORTS_TEMPLATE CV_EXPORTS
 #endif

 #ifndef CV_DEPRECATED
 #  if defined(__GNUC__)
 #    define CV_DEPRECATED __attribute__ ((deprecated))
 #  elif defined(_MSC_VER)
 #    define CV_DEPRECATED __declspec(deprecated)
 #  else
 #    define CV_DEPRECATED
 #  endif
 #endif

 #ifndef CV_DEPRECATED_EXTERNAL
 #  if defined(__OPENCV_BUILD)
 #    define CV_DEPRECATED_EXTERNAL /* nothing */
 #  else
 #    define CV_DEPRECATED_EXTERNAL CV_DEPRECATED
 #  endif
 #endif


 #ifndef CV_EXTERN_C
 #  ifdef __cplusplus
 #    define CV_EXTERN_C extern "C"
 #  else
 #    define CV_EXTERN_C
 #  endif
 #endif

 /* special informative macros for wrapper generators */
 #define CV_EXPORTS_W CV_EXPORTS
 #define CV_EXPORTS_W_SIMPLE CV_EXPORTS
 #define CV_EXPORTS_AS(synonym) CV_EXPORTS
 #define CV_EXPORTS_W_MAP CV_EXPORTS
 #define CV_IN_OUT
 #define CV_OUT
 #define CV_PROP
 #define CV_PROP_RW
 #define CV_WRAP
 #define CV_WRAP_AS(synonym)
 #define CV_WRAP_MAPPABLE(mappable)
 #define CV_WRAP_PHANTOM(phantom_header)
 #define CV_WRAP_DEFAULT(val)

 /****************************************************************************************\
 *                                  Matrix type (Mat)                                     *
 \****************************************************************************************/

 #define CV_MAT_CN_MASK          ((CV_CN_MAX - 1) << CV_CN_SHIFT)
 #define CV_MAT_CN(flags)        ((((flags) & CV_MAT_CN_MASK) >> CV_CN_SHIFT) + 1)
 #define CV_MAT_TYPE_MASK        (CV_DEPTH_MAX*CV_CN_MAX - 1)
 #define CV_MAT_TYPE(flags)      ((flags) & CV_MAT_TYPE_MASK)
 #define CV_MAT_CONT_FLAG_SHIFT  14
 #define CV_MAT_CONT_FLAG        (1 << CV_MAT_CONT_FLAG_SHIFT)
 #define CV_IS_MAT_CONT(flags)   ((flags) & CV_MAT_CONT_FLAG)
 #define CV_IS_CONT_MAT          CV_IS_MAT_CONT
 #define CV_SUBMAT_FLAG_SHIFT    15
 #define CV_SUBMAT_FLAG          (1 << CV_SUBMAT_FLAG_SHIFT)
 #define CV_IS_SUBMAT(flags)     ((flags) & CV_MAT_SUBMAT_FLAG)

 /** Size of each channel item,
   0x28442211 = 0010 1000 0100 0100 0010 0010 0001 0001 ~ array of sizeof(arr_type_elem) */
 #define CV_ELEM_SIZE1(type) ((0x28442211 >> CV_MAT_DEPTH(type)*4) & 15)

 #define CV_ELEM_SIZE(type) (CV_MAT_CN(type)*CV_ELEM_SIZE1(type))

 #ifndef MIN
 #  define MIN(a,b)  ((a) > (b) ? (b) : (a))
 #endif

 #ifndef MAX
 #  define MAX(a,b)  ((a) < (b) ? (b) : (a))
 #endif

 ///////////////////////////////////////// Enum operators ///////////////////////////////////////

 /**

 Provides compatibility operators for both classical and C++11 enum classes,
 as well as exposing the C++11 enum class members for backwards compatibility

@code
    // Provides operators required for flag enums
    CV_ENUM_FLAGS(AccessFlag)

    // Exposes the listed members of the enum class AccessFlag to the current namespace
    CV_ENUM_CLASS_EXPOSE(AccessFlag, ACCESS_READ [, ACCESS_WRITE [, ...] ]);
@endcode
 */

 #define __CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST)                                              \
 static const EnumType MEMBER_CONST = EnumType::MEMBER_CONST;                                          \

 #define __CV_ENUM_CLASS_EXPOSE_2(EnumType, MEMBER_CONST, ...)                                         \
 __CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST);                                                     \
 __CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_1(EnumType, __VA_ARGS__));                                         \

 #define __CV_ENUM_CLASS_EXPOSE_3(EnumType, MEMBER_CONST, ...)                                         \
 __CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST);                                                     \
 __CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_2(EnumType, __VA_ARGS__));                                         \

 #define __CV_ENUM_CLASS_EXPOSE_4(EnumType, MEMBER_CONST, ...)                                         \
 __CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST);                                                     \
 __CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_3(EnumType, __VA_ARGS__));                                         \

 #define __CV_ENUM_CLASS_EXPOSE_5(EnumType, MEMBER_CONST, ...)                                         \
 __CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST);                                                     \
 __CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_4(EnumType, __VA_ARGS__));                                         \

 #define __CV_ENUM_CLASS_EXPOSE_6(EnumType, MEMBER_CONST, ...)                                         \
 __CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST);                                                     \
 __CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_5(EnumType, __VA_ARGS__));                                         \

 #define __CV_ENUM_CLASS_EXPOSE_7(EnumType, MEMBER_CONST, ...)                                         \
 __CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST);                                                     \
 __CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_6(EnumType, __VA_ARGS__));                                         \

 #define __CV_ENUM_CLASS_EXPOSE_8(EnumType, MEMBER_CONST, ...)                                         \
 __CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST);                                                     \
 __CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_7(EnumType, __VA_ARGS__));                                         \

 #define __CV_ENUM_CLASS_EXPOSE_9(EnumType, MEMBER_CONST, ...)                                         \
 __CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST);                                                     \
 __CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_8(EnumType, __VA_ARGS__));                                         \

 #define __CV_ENUM_FLAGS_LOGICAL_NOT(EnumType)                                                         \
 static inline bool operator!(const EnumType& val)                                                     \
 {                                                                                                     \
    typedef std::underlying_type<EnumType>::type UnderlyingType;                                      \
    return !static_cast<UnderlyingType>(val);                                                         \
 }                                                                                                     \

 #define __CV_ENUM_FLAGS_LOGICAL_NOT_EQ(Arg1Type, Arg2Type)                                            \
 static inline bool operator!=(const Arg1Type& a, const Arg2Type& b)                                   \
 {                                                                                                     \
    return static_cast<int>(a) != static_cast<int>(b);                                                \
 }                                                                                                     \

 #define __CV_ENUM_FLAGS_LOGICAL_EQ(Arg1Type, Arg2Type)                                                \
 static inline bool operator==(const Arg1Type& a, const Arg2Type& b)                                   \
 {                                                                                                     \
    return static_cast<int>(a) == static_cast<int>(b);                                                \
 }                                                                                                     \

 #define __CV_ENUM_FLAGS_BITWISE_NOT(EnumType)                                                         \
 static inline EnumType operator~(const EnumType& val)                                                 \
 {                                                                                                     \
    typedef std::underlying_type<EnumType>::type UnderlyingType;                                      \
    return static_cast<EnumType>(~static_cast<UnderlyingType>(val));                                  \
 }                                                                                                     \

 #define __CV_ENUM_FLAGS_BITWISE_OR(EnumType, Arg1Type, Arg2Type)                                      \
 static inline EnumType operator|(const Arg1Type& a, const Arg2Type& b)                                \
 {                                                                                                     \
    typedef std::underlying_type<EnumType>::type UnderlyingType;                                      \
    return static_cast<EnumType>(static_cast<UnderlyingType>(a) | static_cast<UnderlyingType>(b));    \
 }                                                                                                     \

 #define __CV_ENUM_FLAGS_BITWISE_AND(EnumType, Arg1Type, Arg2Type)                                     \
 static inline EnumType operator&(const Arg1Type& a, const Arg2Type& b)                                \
 {                                                                                                     \
    typedef std::underlying_type<EnumType>::type UnderlyingType;                                      \
    return static_cast<EnumType>(static_cast<UnderlyingType>(a) & static_cast<UnderlyingType>(b));    \
 }                                                                                                     \

 #define __CV_ENUM_FLAGS_BITWISE_XOR(EnumType, Arg1Type, Arg2Type)                                     \
 static inline EnumType operator^(const Arg1Type& a, const Arg2Type& b)                                \
 {                                                                                                     \
    typedef std::underlying_type<EnumType>::type UnderlyingType;                                      \
    return static_cast<EnumType>(static_cast<UnderlyingType>(a) ^ static_cast<UnderlyingType>(b));    \
 }                                                                                                     \

 #define __CV_ENUM_FLAGS_BITWISE_OR_EQ(EnumType, Arg1Type)                                             \
 static inline EnumType& operator|=(EnumType& _this, const Arg1Type& val)                              \
 {                                                                                                     \
    _this = static_cast<EnumType>(static_cast<int>(_this) | static_cast<int>(val));                   \
    return _this;                                                                                     \
 }                                                                                                     \

 #define __CV_ENUM_FLAGS_BITWISE_AND_EQ(EnumType, Arg1Type)                                            \
 static inline EnumType& operator&=(EnumType& _this, const Arg1Type& val)                              \
 {                                                                                                     \
    _this = static_cast<EnumType>(static_cast<int>(_this) & static_cast<int>(val));                   \
    return _this;                                                                                     \
 }                                                                                                     \

 #define __CV_ENUM_FLAGS_BITWISE_XOR_EQ(EnumType, Arg1Type)                                            \
 static inline EnumType& operator^=(EnumType& _this, const Arg1Type& val)                              \
 {                                                                                                     \
    _this = static_cast<EnumType>(static_cast<int>(_this) ^ static_cast<int>(val));                   \
    return _this;                                                                                     \
 }                                                                                                     \

 #define CV_ENUM_CLASS_EXPOSE(EnumType, ...)                                                           \
 __CV_EXPAND(__CV_CAT(__CV_ENUM_CLASS_EXPOSE_, __CV_VA_NUM_ARGS(__VA_ARGS__))(EnumType, __VA_ARGS__)); \

 #define CV_ENUM_FLAGS(EnumType)                                                                       \
 __CV_ENUM_FLAGS_LOGICAL_NOT      (EnumType)                                                           \
 __CV_ENUM_FLAGS_LOGICAL_EQ       (EnumType, int)                                                      \
 __CV_ENUM_FLAGS_LOGICAL_NOT_EQ   (EnumType, int)                                                      \
                                                                                                      \
 __CV_ENUM_FLAGS_BITWISE_NOT      (EnumType)                                                           \
 __CV_ENUM_FLAGS_BITWISE_OR       (EnumType, EnumType, EnumType)                                       \
 __CV_ENUM_FLAGS_BITWISE_AND      (EnumType, EnumType, EnumType)                                       \
 __CV_ENUM_FLAGS_BITWISE_XOR      (EnumType, EnumType, EnumType)                                       \
                                                                                                      \
 __CV_ENUM_FLAGS_BITWISE_OR_EQ    (EnumType, EnumType)                                                 \
 __CV_ENUM_FLAGS_BITWISE_AND_EQ   (EnumType, EnumType)                                                 \
 __CV_ENUM_FLAGS_BITWISE_XOR_EQ   (EnumType, EnumType)                                                 \

 /****************************************************************************************\
 *                                    static analysys                                     *
 \****************************************************************************************/

 // In practice, some macro are not processed correctly (noreturn is not detected).
 // We need to use simplified definition for them.
 #ifndef CV_STATIC_ANALYSIS
 # if defined(__KLOCWORK__) || defined(__clang_analyzer__) || defined(__COVERITY__)
 #   define CV_STATIC_ANALYSIS 1
 # endif
 #else
 # if defined(CV_STATIC_ANALYSIS) && !(__CV_CAT(1, CV_STATIC_ANALYSIS) == 1)  // defined and not empty
 #   if 0 == CV_STATIC_ANALYSIS
 #     undef CV_STATIC_ANALYSIS
 #   endif
 # endif
 #endif

 /****************************************************************************************\
 *                                    Thread sanitizer                                    *
 \****************************************************************************************/
 #ifndef CV_THREAD_SANITIZER
 # if defined(__has_feature)
 #   if __has_feature(thread_sanitizer)
 #     define CV_THREAD_SANITIZER
 #   endif
 # endif
 #endif

 /****************************************************************************************\
 *          exchange-add operation for atomic operations on reference counters            *
 \****************************************************************************************/

 #ifdef CV_XADD
  // allow to use user-defined macro
 #elif defined __GNUC__ || defined __clang__
 #  if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined(__CUDACC__)  && !defined __INTEL_COMPILER
 #    ifdef __ATOMIC_ACQ_REL
 #      define CV_XADD(addr, delta) __c11_atomic_fetch_add((_Atomic(int)*)(addr), delta, __ATOMIC_ACQ_REL)
 #    else
 #      define CV_XADD(addr, delta) __atomic_fetch_add((_Atomic(int)*)(addr), delta, 4)
 #    endif
 #  else
 #    if defined __ATOMIC_ACQ_REL && !defined __clang__
       // version for gcc >= 4.7
 #      define CV_XADD(addr, delta) (int)__atomic_fetch_add((unsigned*)(addr), (unsigned)(delta), __ATOMIC_ACQ_REL)
 #    else
 #      define CV_XADD(addr, delta) (int)__sync_fetch_and_add((unsigned*)(addr), (unsigned)(delta))
 #    endif
 #  endif
 #elif defined _MSC_VER && !defined RC_INVOKED
 #  include <intrin.h>
 #  define CV_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile*)addr, delta)
 #else
  #ifdef OPENCV_FORCE_UNSAFE_XADD
    CV_INLINE CV_XADD(int* addr, int delta) { int tmp = *addr; *addr += delta; return tmp; }
  #else
    #error "OpenCV: can't define safe CV_XADD macro for current platform (unsupported). Define CV_XADD macro through custom port header (see OPENCV_INCLUDE_PORT_FILE)"
  #endif
 #endif


 /****************************************************************************************\
 *                                  CV_NORETURN attribute                                 *
 \****************************************************************************************/

 #ifndef CV_NORETURN
 #  if defined(__GNUC__)
 #    define CV_NORETURN __attribute__((__noreturn__))
 #  elif defined(_MSC_VER) && (_MSC_VER >= 1300)
 #    define CV_NORETURN __declspec(noreturn)
 #  else
 #    define CV_NORETURN /* nothing by default */
 #  endif
 #endif


 /****************************************************************************************\
 *                                  CV_NODISCARD attribute                                *
 * encourages the compiler to issue a warning if the return value is discarded (C++17)    *
 \****************************************************************************************/
 #ifndef CV_NODISCARD
 #  if defined(__GNUC__)
 #    define CV_NODISCARD __attribute__((__warn_unused_result__)) // at least available with GCC 3.4
 #  elif defined(__clang__) && defined(__has_attribute)
 #    if __has_attribute(__warn_unused_result__)
 #      define CV_NODISCARD __attribute__((__warn_unused_result__))
 #    endif
 #  endif
 #endif
 #ifndef CV_NODISCARD
 #  define CV_NODISCARD /* nothing by default */
 #endif


 /****************************************************************************************\
 *                                    C++ 11                                              *
 \****************************************************************************************/
 #ifndef CV_CXX11
 #  if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1800)
 #    define CV_CXX11 1
 #  endif
 #else
 #  if CV_CXX11 == 0
 #    undef CV_CXX11
 #  endif
 #endif
 #ifndef CV_CXX11
 #  error "OpenCV 4.x+ requires enabled C++11 support"
 #endif

 #define CV_CXX_MOVE_SEMANTICS 1
 #define CV_CXX_MOVE(x) std::move(x)
 #define CV_CXX_STD_ARRAY 1
 #include <array>
 #ifndef CV_OVERRIDE
 #  define CV_OVERRIDE override
 #endif
 #ifndef CV_FINAL
 #  define CV_FINAL final
 #endif

 #ifndef CV_NOEXCEPT
 #  if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/)
 #    define CV_NOEXCEPT noexcept
 #  endif
 #endif
 #ifndef CV_NOEXCEPT
 #  define CV_NOEXCEPT
 #endif

 #ifndef CV_CONSTEXPR
 #  if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/)
 #    define CV_CONSTEXPR constexpr
 #  endif
 #endif
 #ifndef CV_CONSTEXPR
 #  define CV_CONSTEXPR
 #endif

 // Integer types portatibility
 #ifdef OPENCV_STDINT_HEADER
 #include OPENCV_STDINT_HEADER
 #elif defined(__cplusplus)
 #if defined(_MSC_VER) && _MSC_VER < 1600 /* MSVS 2010 */
 namespace cv {
 typedef signed char int8_t;
 typedef unsigned char uint8_t;
 typedef signed short int16_t;
 typedef unsigned short uint16_t;
 typedef signed int int32_t;
 typedef unsigned int uint32_t;
 typedef signed __int64 int64_t;
 typedef unsigned __int64 uint64_t;
 }
 #elif defined(_MSC_VER) || __cplusplus >= 201103L
 #include <cstdint>
 namespace cv {
 using std::int8_t;
 using std::uint8_t;
 using std::int16_t;
 using std::uint16_t;
 using std::int32_t;
 using std::uint32_t;
 using std::int64_t;
 using std::uint64_t;
 }
 #else
 #include <stdint.h>
 namespace cv {
 typedef ::int8_t int8_t;
 typedef ::uint8_t uint8_t;
 typedef ::int16_t int16_t;
 typedef ::uint16_t uint16_t;
 typedef ::int32_t int32_t;
 typedef ::uint32_t uint32_t;
 typedef ::int64_t int64_t;
 typedef ::uint64_t uint64_t;
 }
 #endif
 #else // pure C
 #include <stdint.h>
 #endif

 #ifdef __cplusplus
 namespace cv
 {

 class float16_t
 {
 public:
 #if CV_FP16_TYPE

    float16_t() : h(0) {}
    explicit float16_t(float x) { h = (__fp16)x; }
    operator float() const { return (float)h; }
    static float16_t fromBits(ushort w)
    {
        Cv16suf u;
        u.u = w;
        float16_t result;
        result.h = u.h;
        return result;
    }
    static float16_t zero()
    {
        float16_t result;
        result.h = (__fp16)0;
        return result;
    }
    ushort bits() const
    {
        Cv16suf u;
        u.h = h;
        return u.u;
    }
 protected:
    __fp16 h;

 #else
    float16_t() : w(0) {}
    explicit float16_t(float x)
    {
    #if CV_AVX2
        __m128 v = _mm_load_ss(&x);
        w = (ushort)_mm_cvtsi128_si32(_mm_cvtps_ph(v, 0));
    #else
        Cv32suf in;
        in.f = x;
        unsigned sign = in.u & 0x80000000;
        in.u ^= sign;

        if( in.u >= 0x47800000 )
            w = (ushort)(in.u > 0x7f800000 ? 0x7e00 : 0x7c00);
        else
        {
            if (in.u < 0x38800000)
            {
                in.f += 0.5f;
                w = (ushort)(in.u - 0x3f000000);
            }
            else
            {
                unsigned t = in.u + 0xc8000fff;
                w = (ushort)((t + ((in.u >> 13) & 1)) >> 13);
            }
        }

        w = (ushort)(w | (sign >> 16));
    #endif
    }

    operator float() const
    {
    #if CV_AVX2
        float f;
        _mm_store_ss(&f, _mm_cvtph_ps(_mm_cvtsi32_si128(w)));
        return f;
    #else
        Cv32suf out;

        unsigned t = ((w & 0x7fff) << 13) + 0x38000000;
        unsigned sign = (w & 0x8000) << 16;
        unsigned e = w & 0x7c00;

        out.u = t + (1 << 23);
        out.u = (e >= 0x7c00 ? t + 0x38000000 :
                 e == 0 ? (static_cast<void>(out.f -= 6.103515625e-05f), out.u) : t) | sign;
        return out.f;
    #endif
    }

    static float16_t fromBits(ushort b)
    {
        float16_t result;
        result.w = b;
        return result;
    }
    static float16_t zero()
    {
        float16_t result;
        result.w = (ushort)0;
        return result;
    }
    ushort bits() const { return w; }
 protected:
    ushort w;

 #endif
 };

 }
 #endif

 //! @}

 #ifndef __cplusplus
 #include "opencv2/core/fast_math.hpp" // define cvRound(double)
 #endif

 #endif // OPENCV_CORE_CVDEF_H
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cvstd.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cvstd.hpp
@@ -0,0 +1,190 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CORE_CVSTD_HPP
 #define OPENCV_CORE_CVSTD_HPP

 #ifndef __cplusplus
 #  error cvstd.hpp header must be compiled as C++
 #endif

 #include "opencv2/core/cvdef.h"
 #include <cstddef>
 #include <cstring>
 #include <cctype>

 #include <string>

 // import useful primitives from stl
 #  include <algorithm>
 #  include <utility>
 #  include <cstdlib> //for abs(int)
 #  include <cmath>

 namespace cv
 {
    static inline uchar abs(uchar a) { return a; }
    static inline ushort abs(ushort a) { return a; }
    static inline unsigned abs(unsigned a) { return a; }
    static inline uint64 abs(uint64 a) { return a; }

    using std::min;
    using std::max;
    using std::abs;
    using std::swap;
    using std::sqrt;
    using std::exp;
    using std::pow;
    using std::log;
 }

 #include "cvstd_wrapper.hpp"

 namespace cv {

 //! @addtogroup core_utils
 //! @{

 //////////////////////////// memory management functions ////////////////////////////

 /** @brief Allocates an aligned memory buffer.

 The function allocates the buffer of the specified size and returns it. When the buffer size is 16
 bytes or more, the returned buffer is aligned to 16 bytes.
@param bufSize Allocated buffer size.
 */
 CV_EXPORTS void* fastMalloc(size_t bufSize);

 /** @brief Deallocates a memory buffer.

 The function deallocates the buffer allocated with fastMalloc . If NULL pointer is passed, the
 function does nothing. C version of the function clears the pointer *pptr* to avoid problems with
 double memory deallocation.
@param ptr Pointer to the allocated buffer.
 */
 CV_EXPORTS void fastFree(void* ptr);

 /*!
  The STL-compliant memory Allocator based on cv::fastMalloc() and cv::fastFree()
 */
 template<typename _Tp> class Allocator
 {
 public:
    typedef _Tp value_type;
    typedef value_type* pointer;
    typedef const value_type* const_pointer;
    typedef value_type& reference;
    typedef const value_type& const_reference;
    typedef size_t size_type;
    typedef ptrdiff_t difference_type;
    template<typename U> class rebind { typedef Allocator<U> other; };

    explicit Allocator() {}
    ~Allocator() {}
    explicit Allocator(Allocator const&) {}
    template<typename U>
    explicit Allocator(Allocator<U> const&) {}

    // address
    pointer address(reference r) { return &r; }
    const_pointer address(const_reference r) { return &r; }

    pointer allocate(size_type count, const void* =0) { return reinterpret_cast<pointer>(fastMalloc(count * sizeof (_Tp))); }
    void deallocate(pointer p, size_type) { fastFree(p); }

    void construct(pointer p, const _Tp& v) { new(static_cast<void*>(p)) _Tp(v); }
    void destroy(pointer p) { p->~_Tp(); }

    size_type max_size() const { return cv::max(static_cast<_Tp>(-1)/sizeof(_Tp), 1); }
 };

 //! @} core_utils

 //! @endcond

 //! @addtogroup core_basic
 //! @{

 //////////////////////////////// string class ////////////////////////////////

 class CV_EXPORTS FileNode; //for string constructor from FileNode

 typedef std::string String;

 #ifndef OPENCV_DISABLE_STRING_LOWER_UPPER_CONVERSIONS

 //! @cond IGNORED
 namespace details {
 // std::tolower is int->int
 static inline char char_tolower(char ch)
 {
    return (char)std::tolower((int)ch);
 }
 // std::toupper is int->int
 static inline char char_toupper(char ch)
 {
    return (char)std::toupper((int)ch);
 }
 } // namespace details
 //! @endcond

 static inline std::string toLowerCase(const std::string& str)
 {
    std::string result(str);
    std::transform(result.begin(), result.end(), result.begin(), details::char_tolower);
    return result;
 }

 static inline std::string toUpperCase(const std::string& str)
 {
    std::string result(str);
    std::transform(result.begin(), result.end(), result.begin(), details::char_toupper);
    return result;
 }

 #endif // OPENCV_DISABLE_STRING_LOWER_UPPER_CONVERSIONS

 //! @} core_basic
 } // cv

 #endif //OPENCV_CORE_CVSTD_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cvstd.inl.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cvstd.inl.hpp
@@ -0,0 +1,197 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CORE_CVSTDINL_HPP
 #define OPENCV_CORE_CVSTDINL_HPP

 #include <complex>
 #include <ostream>
 #include <sstream>

 //! @cond IGNORED

 #ifdef _MSC_VER
 #pragma warning( push )
 #pragma warning( disable: 4127 )
 #endif

 namespace cv
 {

 template<typename _Tp> class DataType< std::complex<_Tp> >
 {
 public:
    typedef std::complex<_Tp>  value_type;
    typedef value_type         work_type;
    typedef _Tp                channel_type;

    enum { generic_type = 0,
           depth        = DataType<channel_type>::depth,
           channels     = 2,
           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
           type         = CV_MAKETYPE(depth, channels) };

    typedef Vec<channel_type, channels> vec_type;
 };

 static inline
 std::ostream& operator << (std::ostream& out, Ptr<Formatted> fmtd)
 {
    fmtd->reset();
    for(const char* str = fmtd->next(); str; str = fmtd->next())
        out << str;
    return out;
 }

 static inline
 std::ostream& operator << (std::ostream& out, const Mat& mtx)
 {
    return out << Formatter::get()->format(mtx);
 }

 static inline
 std::ostream& operator << (std::ostream& out, const UMat& m)
 {
    return out << m.getMat(ACCESS_READ);
 }

 template<typename _Tp> static inline
 std::ostream& operator << (std::ostream& out, const Complex<_Tp>& c)
 {
    return out << "(" << c.re << "," << c.im << ")";
 }

 template<typename _Tp> static inline
 std::ostream& operator << (std::ostream& out, const std::vector<Point_<_Tp> >& vec)
 {
    return out << Formatter::get()->format(Mat(vec));
 }


 template<typename _Tp> static inline
 std::ostream& operator << (std::ostream& out, const std::vector<Point3_<_Tp> >& vec)
 {
    return out << Formatter::get()->format(Mat(vec));
 }


 template<typename _Tp, int m, int n> static inline
 std::ostream& operator << (std::ostream& out, const Matx<_Tp, m, n>& matx)
 {
    return out << Formatter::get()->format(Mat(matx));
 }

 template<typename _Tp> static inline
 std::ostream& operator << (std::ostream& out, const Point_<_Tp>& p)
 {
    out << "[" << p.x << ", " << p.y << "]";
    return out;
 }

 template<typename _Tp> static inline
 std::ostream& operator << (std::ostream& out, const Point3_<_Tp>& p)
 {
    out << "[" << p.x << ", " << p.y << ", " << p.z << "]";
    return out;
 }

 template<typename _Tp, int n> static inline
 std::ostream& operator << (std::ostream& out, const Vec<_Tp, n>& vec)
 {
    out << "[";
    if (cv::traits::Depth<_Tp>::value <= CV_32S)
    {
        for (int i = 0; i < n - 1; ++i) {
            out << (int)vec[i] << ", ";
        }
        out << (int)vec[n-1] << "]";
    }
    else
    {
        for (int i = 0; i < n - 1; ++i) {
            out << vec[i] << ", ";
        }
        out << vec[n-1] << "]";
    }

    return out;
 }

 template<typename _Tp> static inline
 std::ostream& operator << (std::ostream& out, const Size_<_Tp>& size)
 {
    return out << "[" << size.width << " x " << size.height << "]";
 }

 template<typename _Tp> static inline
 std::ostream& operator << (std::ostream& out, const Rect_<_Tp>& rect)
 {
    return out << "[" << rect.width << " x " << rect.height << " from (" << rect.x << ", " << rect.y << ")]";
 }

 static inline std::ostream& operator << (std::ostream& out, const MatSize& msize)
 {
    int i, dims = msize.dims();
    for( i = 0; i < dims; i++ )
    {
        out << msize[i];
        if( i < dims-1 )
            out << " x ";
    }
    return out;
 }

 static inline std::ostream &operator<< (std::ostream &s, cv::Range &r)
 {
    return s << "[" << r.start << " : " << r.end << ")";
 }

 } // cv

 #ifdef _MSC_VER
 #pragma warning( pop )
 #endif

 //! @endcond

 #endif // OPENCV_CORE_CVSTDINL_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cvstd_wrapper.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/cvstd_wrapper.hpp
@@ -0,0 +1,154 @@
 // This file is part of OpenCV project.
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.

 #ifndef OPENCV_CORE_CVSTD_WRAPPER_HPP
 #define OPENCV_CORE_CVSTD_WRAPPER_HPP

 #include "opencv2/core/cvdef.h"

 #include <string>
 #include <memory>  // std::shared_ptr
 #include <type_traits>  // std::enable_if

 namespace cv {

 using std::nullptr_t;

 //! @addtogroup core_basic
 //! @{

 #ifdef CV_DOXYGEN

 template <typename _Tp> using Ptr = std::shared_ptr<_Tp>;  // In ideal world it should look like this, but we need some compatibility workarounds below

 template<typename _Tp, typename ... A1> static inline
 Ptr<_Tp> makePtr(const A1&... a1) { return std::make_shared<_Tp>(a1...); }

 #else  // cv::Ptr with compatibility workarounds

 // It should be defined for C-API types only.
 // C++ types should use regular "delete" operator.
 template<typename Y> struct DefaultDeleter;
 #if 0
 {
    void operator()(Y* p) const;
 };
 #endif

 namespace sfinae {
 template<typename C, typename Ret, typename... Args>
 struct has_parenthesis_operator
 {
 private:
    template<typename T>
    static CV_CONSTEXPR std::true_type check(typename std::is_same<typename std::decay<decltype(std::declval<T>().operator()(std::declval<Args>()...))>::type, Ret>::type*);

    template<typename> static CV_CONSTEXPR std::false_type check(...);

    typedef decltype(check<C>(0)) type;

 public:
 #if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/)
    static CV_CONSTEXPR bool value = type::value;
 #else
    // support MSVS 2013
    static const int value = type::value;
 #endif
 };
 } // namespace sfinae

 template <typename T, typename = void>
 struct has_custom_delete
        : public std::false_type {};

 // Force has_custom_delete to std::false_type when NVCC is compiling CUDA source files
 #ifndef __CUDACC__
 template <typename T>
 struct has_custom_delete<T, typename std::enable_if< sfinae::has_parenthesis_operator<DefaultDeleter<T>, void, T*>::value >::type >
        : public std::true_type {};
 #endif

 template<typename T>
 struct Ptr : public std::shared_ptr<T>
 {
 #if 0
    using std::shared_ptr<T>::shared_ptr;  // GCC 5.x can't handle this
 #else
    inline Ptr() CV_NOEXCEPT : std::shared_ptr<T>() {}
    inline Ptr(nullptr_t) CV_NOEXCEPT : std::shared_ptr<T>(nullptr) {}
    template<typename Y, typename D> inline Ptr(Y* p, D d) : std::shared_ptr<T>(p, d) {}
    template<typename D> inline Ptr(nullptr_t, D d) : std::shared_ptr<T>(nullptr, d) {}

    template<typename Y> inline Ptr(const Ptr<Y>& r, T* ptr) CV_NOEXCEPT : std::shared_ptr<T>(r, ptr) {}

    inline Ptr(const Ptr<T>& o) CV_NOEXCEPT : std::shared_ptr<T>(o) {}
    inline Ptr(Ptr<T>&& o) CV_NOEXCEPT : std::shared_ptr<T>(std::move(o)) {}

    template<typename Y> inline Ptr(const Ptr<Y>& o) CV_NOEXCEPT : std::shared_ptr<T>(o) {}
    template<typename Y> inline Ptr(Ptr<Y>&& o) CV_NOEXCEPT : std::shared_ptr<T>(std::move(o)) {}
 #endif
    inline Ptr(const std::shared_ptr<T>& o) CV_NOEXCEPT : std::shared_ptr<T>(o) {}
    inline Ptr(std::shared_ptr<T>&& o) CV_NOEXCEPT : std::shared_ptr<T>(std::move(o)) {}

    // Overload with custom DefaultDeleter: Ptr<IplImage>(...)
    template<typename Y>
    inline Ptr(const std::true_type&, Y* ptr) : std::shared_ptr<T>(ptr, DefaultDeleter<Y>()) {}

    // Overload without custom deleter: Ptr<std::string>(...);
    template<typename Y>
    inline Ptr(const std::false_type&, Y* ptr) : std::shared_ptr<T>(ptr) {}

    template<typename Y = T>
    inline Ptr(Y* ptr) : Ptr(has_custom_delete<Y>(), ptr) {}

    // Overload with custom DefaultDeleter: Ptr<IplImage>(...)
    template<typename Y>
    inline void reset(const std::true_type&, Y* ptr) { std::shared_ptr<T>::reset(ptr, DefaultDeleter<Y>()); }

    // Overload without custom deleter: Ptr<std::string>(...);
    template<typename Y>
    inline void reset(const std::false_type&, Y* ptr) { std::shared_ptr<T>::reset(ptr); }

    template<typename Y>
    inline void reset(Y* ptr) { Ptr<T>::reset(has_custom_delete<Y>(), ptr); }

    template<class Y, class Deleter>
    void reset(Y* ptr, Deleter d) { std::shared_ptr<T>::reset(ptr, d); }

    void reset() CV_NOEXCEPT { std::shared_ptr<T>::reset(); }

    Ptr& operator=(const Ptr& o) { std::shared_ptr<T>::operator =(o); return *this; }
    template<typename Y> inline Ptr& operator=(const Ptr<Y>& o) { std::shared_ptr<T>::operator =(o); return *this; }

    T* operator->() const CV_NOEXCEPT { return std::shared_ptr<T>::get();}
    typename std::add_lvalue_reference<T>::type operator*() const CV_NOEXCEPT { return *std::shared_ptr<T>::get(); }

    // OpenCV 3.x methods (not a part of standard C++ library)
    inline void release() { std::shared_ptr<T>::reset(); }
    inline operator T* () const { return std::shared_ptr<T>::get(); }
    inline bool empty() const { return std::shared_ptr<T>::get() == nullptr; }

    template<typename Y> inline
    Ptr<Y> staticCast() const CV_NOEXCEPT { return std::static_pointer_cast<Y>(*this); }

    template<typename Y> inline
    Ptr<Y> constCast() const CV_NOEXCEPT { return std::const_pointer_cast<Y>(*this); }

    template<typename Y> inline
    Ptr<Y> dynamicCast() const CV_NOEXCEPT { return std::dynamic_pointer_cast<Y>(*this); }
 };

 template<typename _Tp, typename ... A1> static inline
 Ptr<_Tp> makePtr(const A1&... a1)
 {
    static_assert( !has_custom_delete<_Tp>::value, "Can't use this makePtr with custom DefaultDeleter");
    return (Ptr<_Tp>)std::make_shared<_Tp>(a1...);
 }

 #endif // CV_DOXYGEN

 //! @} core_basic
 } // cv

 #endif //OPENCV_CORE_CVSTD_WRAPPER_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/detail/async_promise.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/detail/async_promise.hpp
@@ -0,0 +1,71 @@
 // This file is part of OpenCV project.
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.

 #ifndef OPENCV_CORE_ASYNC_PROMISE_HPP
 #define OPENCV_CORE_ASYNC_PROMISE_HPP

 #include "../async.hpp"

 #include "exception_ptr.hpp"

 namespace cv {

 /** @addtogroup core_async
@{
 */


 /** @brief Provides result of asynchronous operations

 */
 class CV_EXPORTS AsyncPromise
 {
 public:
    ~AsyncPromise() CV_NOEXCEPT;
    AsyncPromise() CV_NOEXCEPT;
    explicit AsyncPromise(const AsyncPromise& o) CV_NOEXCEPT;
    AsyncPromise& operator=(const AsyncPromise& o) CV_NOEXCEPT;
    void release() CV_NOEXCEPT;

    /** Returns associated AsyncArray
    @note Can be called once
    */
    AsyncArray getArrayResult();

    /** Stores asynchronous result.
    @param[in] value result
    */
    void setValue(InputArray value);

    // TODO "move" setters

 #if CV__EXCEPTION_PTR
    /** Stores exception.
    @param[in] exception exception to be raised in AsyncArray
    */
    void setException(std::exception_ptr exception);
 #endif

    /** Stores exception.
    @param[in] exception exception to be raised in AsyncArray
    */
    void setException(const cv::Exception& exception);

 #ifdef CV_CXX11
    explicit AsyncPromise(AsyncPromise&& o) { p = o.p; o.p = NULL; }
    AsyncPromise& operator=(AsyncPromise&& o) CV_NOEXCEPT { std::swap(p, o.p); return *this; }
 #endif


    // PImpl
    typedef struct AsyncArray::Impl Impl; friend struct AsyncArray::Impl;
    inline void* _getImpl() const CV_NOEXCEPT { return p; }
 protected:
    Impl* p;
 };


 //! @}
 } // namespace
 #endif // OPENCV_CORE_ASYNC_PROMISE_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/detail/exception_ptr.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/detail/exception_ptr.hpp
@@ -0,0 +1,27 @@
 // This file is part of OpenCV project.
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.

 #ifndef OPENCV_CORE_DETAILS_EXCEPTION_PTR_H
 #define OPENCV_CORE_DETAILS_EXCEPTION_PTR_H

 #ifndef CV__EXCEPTION_PTR
 #  if defined(__ANDROID__) && defined(ATOMIC_INT_LOCK_FREE) && ATOMIC_INT_LOCK_FREE < 2
 #    define CV__EXCEPTION_PTR 0  // Not supported, details: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58938
 #  elif defined(CV_CXX11)
 #    define CV__EXCEPTION_PTR 1
 #  elif defined(_MSC_VER)
 #    define CV__EXCEPTION_PTR (_MSC_VER >= 1600)
 #  elif defined(__clang__)
 #    define CV__EXCEPTION_PTR 0  // C++11 only (see above)
 #  elif defined(__GNUC__) && defined(__GXX_EXPERIMENTAL_CXX0X__)
 #    define CV__EXCEPTION_PTR (__GXX_EXPERIMENTAL_CXX0X__ > 0)
 #  endif
 #endif
 #ifndef CV__EXCEPTION_PTR
 #  define CV__EXCEPTION_PTR 0
 #elif CV__EXCEPTION_PTR
 #  include <exception>  // std::exception_ptr
 #endif

 #endif // OPENCV_CORE_DETAILS_EXCEPTION_PTR_H
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/directx.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/directx.hpp
@@ -0,0 +1,184 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors as is and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the copyright holders or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CORE_DIRECTX_HPP
 #define OPENCV_CORE_DIRECTX_HPP

 #include "mat.hpp"
 #include "ocl.hpp"

 #if !defined(__d3d11_h__)
 struct ID3D11Device;
 struct ID3D11Texture2D;
 #endif

 #if !defined(__d3d10_h__)
 struct ID3D10Device;
 struct ID3D10Texture2D;
 #endif

 #if !defined(_D3D9_H_)
 struct IDirect3DDevice9;
 struct IDirect3DDevice9Ex;
 struct IDirect3DSurface9;
 #endif


 namespace cv { namespace directx {

 namespace ocl {
 using namespace cv::ocl;

 //! @addtogroup core_directx
 // This section describes OpenCL and DirectX interoperability.
 //
 // To enable DirectX support, configure OpenCV using CMake with WITH_DIRECTX=ON . Note, DirectX is
 // supported only on Windows.
 //
 // To use OpenCL functionality you should first initialize OpenCL context from DirectX resource.
 //
 //! @{

 // TODO static functions in the Context class
 //! @brief Creates OpenCL context from D3D11 device
 //
 //! @param pD3D11Device - pointer to D3D11 device
 //! @return Returns reference to OpenCL Context
 CV_EXPORTS Context& initializeContextFromD3D11Device(ID3D11Device* pD3D11Device);

 //! @brief Creates OpenCL context from D3D10 device
 //
 //! @param pD3D10Device - pointer to D3D10 device
 //! @return Returns reference to OpenCL Context
 CV_EXPORTS Context& initializeContextFromD3D10Device(ID3D10Device* pD3D10Device);

 //! @brief Creates OpenCL context from Direct3DDevice9Ex device
 //
 //! @param pDirect3DDevice9Ex - pointer to Direct3DDevice9Ex device
 //! @return Returns reference to OpenCL Context
 CV_EXPORTS Context& initializeContextFromDirect3DDevice9Ex(IDirect3DDevice9Ex* pDirect3DDevice9Ex);

 //! @brief Creates OpenCL context from Direct3DDevice9 device
 //
 //! @param pDirect3DDevice9 - pointer to Direct3Device9 device
 //! @return Returns reference to OpenCL Context
 CV_EXPORTS Context& initializeContextFromDirect3DDevice9(IDirect3DDevice9* pDirect3DDevice9);

 //! @}

 } // namespace cv::directx::ocl

 //! @addtogroup core_directx
 //! @{

 //! @brief Converts InputArray to ID3D11Texture2D. If destination texture format is DXGI_FORMAT_NV12 then
 //!        input UMat expected to be in BGR format and data will be downsampled and color-converted to NV12.
 //
 //! @note Note: Destination texture must be allocated by application. Function does memory copy from src to
 //!             pD3D11Texture2D
 //
 //! @param src - source InputArray
 //! @param pD3D11Texture2D - destination D3D11 texture
 CV_EXPORTS void convertToD3D11Texture2D(InputArray src, ID3D11Texture2D* pD3D11Texture2D);

 //! @brief Converts ID3D11Texture2D to OutputArray. If input texture format is DXGI_FORMAT_NV12 then
 //!        data will be upsampled and color-converted to BGR format.
 //
 //! @note Note: Destination matrix will be re-allocated if it has not enough memory to match texture size.
 //!             function does memory copy from pD3D11Texture2D to dst
 //
 //! @param pD3D11Texture2D - source D3D11 texture
 //! @param dst             - destination OutputArray
 CV_EXPORTS void convertFromD3D11Texture2D(ID3D11Texture2D* pD3D11Texture2D, OutputArray dst);

 //! @brief Converts InputArray to ID3D10Texture2D
 //
 //! @note Note: function does memory copy from src to
 //!             pD3D10Texture2D
 //
 //! @param src             - source InputArray
 //! @param pD3D10Texture2D - destination D3D10 texture
 CV_EXPORTS void convertToD3D10Texture2D(InputArray src, ID3D10Texture2D* pD3D10Texture2D);

 //! @brief Converts ID3D10Texture2D to OutputArray
 //
 //! @note Note: function does memory copy from pD3D10Texture2D
 //!             to dst
 //
 //! @param pD3D10Texture2D - source D3D10 texture
 //! @param dst             - destination OutputArray
 CV_EXPORTS void convertFromD3D10Texture2D(ID3D10Texture2D* pD3D10Texture2D, OutputArray dst);

 //! @brief Converts InputArray to IDirect3DSurface9
 //
 //! @note Note: function does memory copy from src to
 //!             pDirect3DSurface9
 //
 //! @param src                 - source InputArray
 //! @param pDirect3DSurface9   - destination D3D10 texture
 //! @param surfaceSharedHandle - shared handle
 CV_EXPORTS void convertToDirect3DSurface9(InputArray src, IDirect3DSurface9* pDirect3DSurface9, void* surfaceSharedHandle = NULL);

 //! @brief Converts IDirect3DSurface9 to OutputArray
 //
 //! @note Note: function does memory copy from pDirect3DSurface9
 //!             to dst
 //
 //! @param pDirect3DSurface9   - source D3D10 texture
 //! @param dst                 - destination OutputArray
 //! @param surfaceSharedHandle - shared handle
 CV_EXPORTS void convertFromDirect3DSurface9(IDirect3DSurface9* pDirect3DSurface9, OutputArray dst, void* surfaceSharedHandle = NULL);

 //! @brief Get OpenCV type from DirectX type
 //! @param iDXGI_FORMAT - enum DXGI_FORMAT for D3D10/D3D11
 //! @return OpenCV type or -1 if there is no equivalent
 CV_EXPORTS int getTypeFromDXGI_FORMAT(const int iDXGI_FORMAT); // enum DXGI_FORMAT for D3D10/D3D11

 //! @brief Get OpenCV type from DirectX type
 //! @param iD3DFORMAT - enum D3DTYPE for D3D9
 //! @return OpenCV type or -1 if there is no equivalent
 CV_EXPORTS int getTypeFromD3DFORMAT(const int iD3DFORMAT); // enum D3DTYPE for D3D9

 //! @}

 } } // namespace cv::directx

 #endif // OPENCV_CORE_DIRECTX_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/eigen.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/eigen.hpp
@@ -0,0 +1,400 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/


 #ifndef OPENCV_CORE_EIGEN_HPP
 #define OPENCV_CORE_EIGEN_HPP

 #ifndef EIGEN_WORLD_VERSION
 #error "Wrong usage of OpenCV's Eigen utility header. Include Eigen's headers first. See https://github.com/opencv/opencv/issues/17366"
 #endif

 #include "opencv2/core.hpp"

 #if defined _MSC_VER && _MSC_VER >= 1200
 #define NOMINMAX // fix https://github.com/opencv/opencv/issues/17548
 #pragma warning( disable: 4714 ) //__forceinline is not inlined
 #pragma warning( disable: 4127 ) //conditional expression is constant
 #pragma warning( disable: 4244 ) //conversion from '__int64' to 'int', possible loss of data
 #endif

 #if EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3 \
    && defined(CV_CXX11) && defined(CV_CXX_STD_ARRAY)
 #include <unsupported/Eigen/CXX11/Tensor>
 #define OPENCV_EIGEN_TENSOR_SUPPORT
 #endif // EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3

 namespace cv
 {

 /** @addtogroup core_eigen
 These functions are provided for OpenCV-Eigen interoperability. They convert `Mat`
 objects to corresponding `Eigen::Matrix` objects and vice-versa. Consult the [Eigen
 documentation](https://eigen.tuxfamily.org/dox/group__TutorialMatrixClass.html) for
 information about the `Matrix` template type.

@note Using these functions requires the `Eigen/Dense` or similar header to be
 included before this header.
 */
 //! @{

 #if defined(OPENCV_EIGEN_TENSOR_SUPPORT) || defined(CV_DOXYGEN)
 /** @brief Converts an Eigen::Tensor to a cv::Mat.

 The method converts an Eigen::Tensor with shape (H x W x C) to a cv::Mat where:
 H = number of rows
 W = number of columns
 C = number of channels

 Usage:
 \code
 Eigen::Tensor<float, 3, Eigen::RowMajor> a_tensor(...);
 // populate tensor with values
 Mat a_mat;
 eigen2cv(a_tensor, a_mat);
 \endcode
 */
 template <typename _Tp, int _layout> static inline
 void eigen2cv( const Eigen::Tensor<_Tp, 3, _layout> &src, OutputArray dst )
 {
    if( !(_layout & Eigen::RowMajorBit) )
    {
        const std::array<int, 3> shuffle{2, 1, 0};
        Eigen::Tensor<_Tp, 3, !_layout> row_major_tensor = src.swap_layout().shuffle(shuffle);
        Mat _src(src.dimension(0), src.dimension(1), CV_MAKETYPE(DataType<_Tp>::type, src.dimension(2)), row_major_tensor.data());
        _src.copyTo(dst);
    }
    else
    {
        Mat _src(src.dimension(0), src.dimension(1), CV_MAKETYPE(DataType<_Tp>::type, src.dimension(2)), (void *)src.data());
        _src.copyTo(dst);
    }
 }

 /** @brief Converts a cv::Mat to an Eigen::Tensor.

 The method converts a cv::Mat to an Eigen Tensor with shape (H x W x C) where:
 H = number of rows
 W = number of columns
 C = number of channels

 Usage:
 \code
 Mat a_mat(...);
 // populate Mat with values
 Eigen::Tensor<float, 3, Eigen::RowMajor> a_tensor(...);
 cv2eigen(a_mat, a_tensor);
 \endcode
 */
 template <typename _Tp, int _layout> static inline
 void cv2eigen( const Mat &src, Eigen::Tensor<_Tp, 3, _layout> &dst )
 {
    if( !(_layout & Eigen::RowMajorBit) )
    {
        Eigen::Tensor<_Tp, 3, !_layout> row_major_tensor(src.rows, src.cols, src.channels());
        Mat _dst(src.rows, src.cols, CV_MAKETYPE(DataType<_Tp>::type, src.channels()), row_major_tensor.data());
        if (src.type() == _dst.type())
            src.copyTo(_dst);
        else
            src.convertTo(_dst, _dst.type());
        const std::array<int, 3> shuffle{2, 1, 0};
        dst = row_major_tensor.swap_layout().shuffle(shuffle);
    }
    else
    {
        dst.resize(src.rows, src.cols, src.channels());
        Mat _dst(src.rows, src.cols, CV_MAKETYPE(DataType<_Tp>::type, src.channels()), dst.data());
        if (src.type() == _dst.type())
            src.copyTo(_dst);
        else
            src.convertTo(_dst, _dst.type());
    }
 }

 /** @brief Maps cv::Mat data to an Eigen::TensorMap.

 The method wraps an existing Mat data array with an Eigen TensorMap of shape (H x W x C) where:
 H = number of rows
 W = number of columns
 C = number of channels

 Explicit instantiation of the return type is required.

@note Caller should be aware of the lifetime of the cv::Mat instance and take appropriate safety measures.
 The cv::Mat instance will retain ownership of the data and the Eigen::TensorMap will lose access when the cv::Mat data is deallocated.

 The example below initializes a cv::Mat and produces an Eigen::TensorMap:
 \code
 float arr[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
 Mat a_mat(2, 2, CV_32FC3, arr);
 Eigen::TensorMap<Eigen::Tensor<float, 3, Eigen::RowMajor>> a_tensormap = cv2eigen_tensormap<float>(a_mat);
 \endcode
 */
 template <typename _Tp> static inline
 Eigen::TensorMap<Eigen::Tensor<_Tp, 3, Eigen::RowMajor>> cv2eigen_tensormap(InputArray src)
 {
    Mat mat = src.getMat();
    CV_CheckTypeEQ(mat.type(), CV_MAKETYPE(traits::Type<_Tp>::value, mat.channels()), "");
    return Eigen::TensorMap<Eigen::Tensor<_Tp, 3, Eigen::RowMajor>>((_Tp *)mat.data, mat.rows, mat.cols, mat.channels());
 }
 #endif // OPENCV_EIGEN_TENSOR_SUPPORT

 template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
 void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src, OutputArray dst )
 {
    if( !(src.Flags & Eigen::RowMajorBit) )
    {
        Mat _src(src.cols(), src.rows(), traits::Type<_Tp>::value,
              (void*)src.data(), src.outerStride()*sizeof(_Tp));
        transpose(_src, dst);
    }
    else
    {
        Mat _src(src.rows(), src.cols(), traits::Type<_Tp>::value,
                 (void*)src.data(), src.outerStride()*sizeof(_Tp));
        _src.copyTo(dst);
    }
 }

 // Matx case
 template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
 void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src,
               Matx<_Tp, _rows, _cols>& dst )
 {
    if( !(src.Flags & Eigen::RowMajorBit) )
    {
        dst = Matx<_Tp, _cols, _rows>(static_cast<const _Tp*>(src.data())).t();
    }
    else
    {
        dst = Matx<_Tp, _rows, _cols>(static_cast<const _Tp*>(src.data()));
    }
 }

 template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
 void cv2eigen( const Mat& src,
               Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& dst )
 {
    CV_DbgAssert(src.rows == _rows && src.cols == _cols);
    if( !(dst.Flags & Eigen::RowMajorBit) )
    {
        const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value,
                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        if( src.type() == _dst.type() )
            transpose(src, _dst);
        else if( src.cols == src.rows )
        {
            src.convertTo(_dst, _dst.type());
            transpose(_dst, _dst);
        }
        else
            Mat(src.t()).convertTo(_dst, _dst.type());
    }
    else
    {
        const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value,
                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        src.convertTo(_dst, _dst.type());
    }
 }

 // Matx case
 template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
 void cv2eigen( const Matx<_Tp, _rows, _cols>& src,
               Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& dst )
 {
    if( !(dst.Flags & Eigen::RowMajorBit) )
    {
        const Mat _dst(_cols, _rows, traits::Type<_Tp>::value,
                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        transpose(src, _dst);
    }
    else
    {
        const Mat _dst(_rows, _cols, traits::Type<_Tp>::value,
                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        Mat(src).copyTo(_dst);
    }
 }

 template<typename _Tp>  static inline
 void cv2eigen( const Mat& src,
               Eigen::Matrix<_Tp, Eigen::Dynamic, Eigen::Dynamic>& dst )
 {
    dst.resize(src.rows, src.cols);
    if( !(dst.Flags & Eigen::RowMajorBit) )
    {
        const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value,
             dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        if( src.type() == _dst.type() )
            transpose(src, _dst);
        else if( src.cols == src.rows )
        {
            src.convertTo(_dst, _dst.type());
            transpose(_dst, _dst);
        }
        else
            Mat(src.t()).convertTo(_dst, _dst.type());
    }
    else
    {
        const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value,
                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        src.convertTo(_dst, _dst.type());
    }
 }

 // Matx case
 template<typename _Tp, int _rows, int _cols> static inline
 void cv2eigen( const Matx<_Tp, _rows, _cols>& src,
               Eigen::Matrix<_Tp, Eigen::Dynamic, Eigen::Dynamic>& dst )
 {
    dst.resize(_rows, _cols);
    if( !(dst.Flags & Eigen::RowMajorBit) )
    {
        const Mat _dst(_cols, _rows, traits::Type<_Tp>::value,
             dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        transpose(src, _dst);
    }
    else
    {
        const Mat _dst(_rows, _cols, traits::Type<_Tp>::value,
                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        Mat(src).copyTo(_dst);
    }
 }

 template<typename _Tp> static inline
 void cv2eigen( const Mat& src,
               Eigen::Matrix<_Tp, Eigen::Dynamic, 1>& dst )
 {
    CV_Assert(src.cols == 1);
    dst.resize(src.rows);

    if( !(dst.Flags & Eigen::RowMajorBit) )
    {
        const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value,
                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        if( src.type() == _dst.type() )
            transpose(src, _dst);
        else
            Mat(src.t()).convertTo(_dst, _dst.type());
    }
    else
    {
        const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value,
                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        src.convertTo(_dst, _dst.type());
    }
 }

 // Matx case
 template<typename _Tp, int _rows> static inline
 void cv2eigen( const Matx<_Tp, _rows, 1>& src,
               Eigen::Matrix<_Tp, Eigen::Dynamic, 1>& dst )
 {
    dst.resize(_rows);

    if( !(dst.Flags & Eigen::RowMajorBit) )
    {
        const Mat _dst(1, _rows, traits::Type<_Tp>::value,
                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        transpose(src, _dst);
    }
    else
    {
        const Mat _dst(_rows, 1, traits::Type<_Tp>::value,
                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        src.copyTo(_dst);
    }
 }


 template<typename _Tp> static inline
 void cv2eigen( const Mat& src,
               Eigen::Matrix<_Tp, 1, Eigen::Dynamic>& dst )
 {
    CV_Assert(src.rows == 1);
    dst.resize(src.cols);
    if( !(dst.Flags & Eigen::RowMajorBit) )
    {
        const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value,
                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        if( src.type() == _dst.type() )
            transpose(src, _dst);
        else
            Mat(src.t()).convertTo(_dst, _dst.type());
    }
    else
    {
        const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value,
                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        src.convertTo(_dst, _dst.type());
    }
 }

 //Matx
 template<typename _Tp, int _cols> static inline
 void cv2eigen( const Matx<_Tp, 1, _cols>& src,
               Eigen::Matrix<_Tp, 1, Eigen::Dynamic>& dst )
 {
    dst.resize(_cols);
    if( !(dst.Flags & Eigen::RowMajorBit) )
    {
        const Mat _dst(_cols, 1, traits::Type<_Tp>::value,
                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        transpose(src, _dst);
    }
    else
    {
        const Mat _dst(1, _cols, traits::Type<_Tp>::value,
                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        Mat(src).copyTo(_dst);
    }
 }

 //! @}

 } // cv

 #endif
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/fast_math.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/fast_math.hpp
@@ -0,0 +1,408 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Copyright (C) 2015, Itseez Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_CORE_FAST_MATH_HPP
 #define OPENCV_CORE_FAST_MATH_HPP

 #include "opencv2/core/cvdef.h"

 //! @addtogroup core_utils
 //! @{

 /****************************************************************************************\
 *                                      fast math                                         *
 \****************************************************************************************/

 #ifdef __cplusplus
 #  include <cmath>
 #else
 #  ifdef __BORLANDC__
 #    include <fastmath.h>
 #  else
 #    include <math.h>
 #  endif
 #endif

 #if defined(__CUDACC__)
  // nothing, intrinsics/asm code is not supported
 #else
  #if ((defined _MSC_VER && defined _M_X64) \
      || (defined __GNUC__ && defined __x86_64__ && defined __SSE2__)) \
      && !defined(OPENCV_SKIP_INCLUDE_EMMINTRIN_H)
    #include <emmintrin.h>
  #endif

  #if defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 \
      && !defined(OPENCV_SKIP_INCLUDE_ALTIVEC_H)
    #include <altivec.h>
  #endif

  #if defined(CV_INLINE_ROUND_FLT)
    // user-specified version
    // CV_INLINE_ROUND_DBL should be defined too
  #elif defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__
    // 1. general scheme
    #define ARM_ROUND(_value, _asm_string) \
        int res; \
        float temp; \
        CV_UNUSED(temp); \
        __asm__(_asm_string : [res] "=r" (res), [temp] "=w" (temp) : [value] "w" (_value)); \
        return res
    // 2. version for double
    #ifdef __clang__
        #define CV_INLINE_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %[value] \n vmov %[res], %[temp]")
    #else
        #define CV_INLINE_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %P[value] \n vmov %[res], %[temp]")
    #endif
    // 3. version for float
    #define CV_INLINE_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
  #elif defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8
    // P8 and newer machines can convert fp32/64 to int quickly.
    #define CV_INLINE_ROUND_DBL(value) \
        int out; \
        double temp; \
        __asm__( "fctiw %[temp],%[in]\n\tmfvsrwz %[out],%[temp]\n\t" : [out] "=r" (out), [temp] "=d" (temp) : [in] "d" ((double)(value)) : ); \
        return out;

    // FP32 also works with FP64 routine above
    #define CV_INLINE_ROUND_FLT(value) CV_INLINE_ROUND_DBL(value)
  #endif

  #ifdef CV_INLINE_ISINF_FLT
    // user-specified version
    // CV_INLINE_ISINF_DBL should be defined too
  #elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class)
    #define CV_INLINE_ISINF_DBL(value) return scalar_test_data_class(value, 0x30);
    #define CV_INLINE_ISINF_FLT(value) CV_INLINE_ISINF_DBL(value)
  #endif

  #ifdef CV_INLINE_ISNAN_FLT
    // user-specified version
    // CV_INLINE_ISNAN_DBL should be defined too
  #elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class)
    #define CV_INLINE_ISNAN_DBL(value) return scalar_test_data_class(value, 0x40);
    #define CV_INLINE_ISNAN_FLT(value) CV_INLINE_ISNAN_DBL(value)
  #endif

  #if !defined(OPENCV_USE_FASTMATH_BUILTINS) \
    && ( \
        defined(__x86_64__) || defined(__i686__) \
        || defined(__arm__) \
        || defined(__PPC64__) \
    )
    /* Let builtin C math functions when available. Dedicated hardware is available to
       round and convert FP values. */
    #define OPENCV_USE_FASTMATH_BUILTINS 1
  #endif

  /* Enable builtin math functions if possible, desired, and available.
     Note, not all math functions inline equally. E.g lrint will not inline
     without the -fno-math-errno option. */
  #if defined(CV_ICC)
    // nothing
  #elif defined(OPENCV_USE_FASTMATH_BUILTINS) && OPENCV_USE_FASTMATH_BUILTINS
    #if defined(__clang__)
      #define CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS
      #if !defined(CV_INLINE_ISNAN_DBL) && __has_builtin(__builtin_isnan)
        #define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value);
      #endif
      #if !defined(CV_INLINE_ISNAN_FLT) && __has_builtin(__builtin_isnan)
        #define CV_INLINE_ISNAN_FLT(value) return __builtin_isnan(value);
      #endif
      #if !defined(CV_INLINE_ISINF_DBL) && __has_builtin(__builtin_isinf)
        #define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value);
      #endif
      #if !defined(CV_INLINE_ISINF_FLT) && __has_builtin(__builtin_isinf)
        #define CV_INLINE_ISINF_FLT(value) return __builtin_isinf(value);
      #endif
    #elif defined(__GNUC__)
      #define CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS
      #if !defined(CV_INLINE_ISNAN_DBL)
        #define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value);
      #endif
      #if !defined(CV_INLINE_ISNAN_FLT)
        #define CV_INLINE_ISNAN_FLT(value) return __builtin_isnanf(value);
      #endif
      #if !defined(CV_INLINE_ISINF_DBL)
        #define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value);
      #endif
      #if !defined(CV_INLINE_ISINF_FLT)
        #define CV_INLINE_ISINF_FLT(value) return __builtin_isinff(value);
      #endif
    #elif defined(_MSC_VER)
      #if !defined(CV_INLINE_ISNAN_DBL)
        #define CV_INLINE_ISNAN_DBL(value) return isnan(value);
      #endif
      #if !defined(CV_INLINE_ISNAN_FLT)
        #define CV_INLINE_ISNAN_FLT(value) return isnan(value);
      #endif
      #if !defined(CV_INLINE_ISINF_DBL)
        #define CV_INLINE_ISINF_DBL(value) return isinf(value);
      #endif
      #if !defined(CV_INLINE_ISINF_FLT)
        #define CV_INLINE_ISINF_FLT(value) return isinf(value);
      #endif
    #endif
  #endif

 #endif // defined(__CUDACC__)

 /** @brief Rounds floating-point number to the nearest integer

 @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
 result is not defined.
 */
 CV_INLINE int
 cvRound( double value )
 {
 #if defined CV_INLINE_ROUND_DBL
    CV_INLINE_ROUND_DBL(value);
 #elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
    && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \
    && !defined(__CUDACC__)
    __m128d t = _mm_set_sd( value );
    return _mm_cvtsd_si32(t);
 #elif defined _MSC_VER && defined _M_IX86
    int t;
    __asm
    {
        fld value;
        fistp t;
    }
    return t;
 #elif defined CV_ICC || defined __GNUC__
    return (int)(lrint(value));
 #else
    /* it's ok if round does not comply with IEEE754 standard;
       the tests should allow +/-1 difference when the tested functions use round */
    return (int)(value + (value >= 0 ? 0.5 : -0.5));
 #endif
 }


 /** @brief Rounds floating-point number to the nearest integer not larger than the original.

 The function computes an integer i such that:
 \f[i \le \texttt{value} < i+1\f]
 @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
 result is not defined.
 */
 CV_INLINE int cvFloor( double value )
 {
 #if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
    && ( \
        defined(__PPC64__) \
    )
    return __builtin_floor(value);
 #else
    int i = (int)value;
    return i - (i > value);
 #endif
 }

 /** @brief Rounds floating-point number to the nearest integer not smaller than the original.

 The function computes an integer i such that:
 \f[i \le \texttt{value} < i+1\f]
 @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
 result is not defined.
 */
 CV_INLINE int cvCeil( double value )
 {
 #if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
    && ( \
        defined(__PPC64__) \
    )
    return __builtin_ceil(value);
 #else
    int i = (int)value;
    return i + (i < value);
 #endif
 }

 /** @brief Determines if the argument is Not A Number.

 @param value The input floating-point value

 The function returns 1 if the argument is Not A Number (as defined by IEEE754 standard), 0
 otherwise. */
 CV_INLINE int cvIsNaN( double value )
 {
 #if defined CV_INLINE_ISNAN_DBL
    CV_INLINE_ISNAN_DBL(value);
 #else
    Cv64suf ieee754;
    ieee754.f = value;
    return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) +
           ((unsigned)ieee754.u != 0) > 0x7ff00000;
 #endif
 }

 /** @brief Determines if the argument is Infinity.

 @param value The input floating-point value

 The function returns 1 if the argument is a plus or minus infinity (as defined by IEEE754 standard)
 and 0 otherwise. */
 CV_INLINE int cvIsInf( double value )
 {
 #if defined CV_INLINE_ISINF_DBL
    CV_INLINE_ISINF_DBL(value);
 #elif defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || defined(_M_ARM64) || defined(__PPC64__)
    Cv64suf ieee754;
    ieee754.f = value;
    return (ieee754.u & 0x7fffffff00000000) ==
                        0x7ff0000000000000;
 #else
    Cv64suf ieee754;
    ieee754.f = value;
    return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) == 0x7ff00000 &&
            (unsigned)ieee754.u == 0;
 #endif
 }

 #ifdef __cplusplus

 /** @overload */
 CV_INLINE int cvRound(float value)
 {
 #if defined CV_INLINE_ROUND_FLT
    CV_INLINE_ROUND_FLT(value);
 #elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
    && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \
    && !defined(__CUDACC__)
    __m128 t = _mm_set_ss( value );
    return _mm_cvtss_si32(t);
 #elif defined _MSC_VER && defined _M_IX86
    int t;
    __asm
    {
        fld value;
        fistp t;
    }
    return t;
 #elif defined CV_ICC || defined __GNUC__
    return (int)(lrintf(value));
 #else
    /* it's ok if round does not comply with IEEE754 standard;
     the tests should allow +/-1 difference when the tested functions use round */
    return (int)(value + (value >= 0 ? 0.5f : -0.5f));
 #endif
 }

 /** @overload */
 CV_INLINE int cvRound( int value )
 {
    return value;
 }

 /** @overload */
 CV_INLINE int cvFloor( float value )
 {
 #if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
    && ( \
        defined(__PPC64__) \
    )
    return __builtin_floorf(value);
 #else
    int i = (int)value;
    return i - (i > value);
 #endif
 }

 /** @overload */
 CV_INLINE int cvFloor( int value )
 {
    return value;
 }

 /** @overload */
 CV_INLINE int cvCeil( float value )
 {
 #if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
    && ( \
        defined(__PPC64__) \
    )
    return __builtin_ceilf(value);
 #else
    int i = (int)value;
    return i + (i < value);
 #endif
 }

 /** @overload */
 CV_INLINE int cvCeil( int value )
 {
    return value;
 }

 /** @overload */
 CV_INLINE int cvIsNaN( float value )
 {
 #if defined CV_INLINE_ISNAN_FLT
    CV_INLINE_ISNAN_FLT(value);
 #else
    Cv32suf ieee754;
    ieee754.f = value;
    return (ieee754.u & 0x7fffffff) > 0x7f800000;
 #endif
 }

 /** @overload */
 CV_INLINE int cvIsInf( float value )
 {
 #if defined CV_INLINE_ISINF_FLT
    CV_INLINE_ISINF_FLT(value);
 #else
    Cv32suf ieee754;
    ieee754.f = value;
    return (ieee754.u & 0x7fffffff) == 0x7f800000;
 #endif
 }

 #endif // __cplusplus

 //! @} core_utils

 #endif
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/hal/hal.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/hal/hal.hpp
@@ -0,0 +1,256 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Copyright (C) 2015, Itseez Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_HAL_HPP
 #define OPENCV_HAL_HPP

 #include "opencv2/core/cvdef.h"
 #include "opencv2/core/cvstd.hpp"
 #include "opencv2/core/hal/interface.h"

 namespace cv { namespace hal {

 //! @addtogroup core_hal_functions
 //! @{

 CV_EXPORTS int normHamming(const uchar* a, int n);
 CV_EXPORTS int normHamming(const uchar* a, const uchar* b, int n);

 CV_EXPORTS int normHamming(const uchar* a, int n, int cellSize);
 CV_EXPORTS int normHamming(const uchar* a, const uchar* b, int n, int cellSize);

 CV_EXPORTS int LU32f(float* A, size_t astep, int m, float* b, size_t bstep, int n);
 CV_EXPORTS int LU64f(double* A, size_t astep, int m, double* b, size_t bstep, int n);
 CV_EXPORTS bool Cholesky32f(float* A, size_t astep, int m, float* b, size_t bstep, int n);
 CV_EXPORTS bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n);
 CV_EXPORTS void SVD32f(float* At, size_t astep, float* W, float* U, size_t ustep, float* Vt, size_t vstep, int m, int n, int flags);
 CV_EXPORTS void SVD64f(double* At, size_t astep, double* W, double* U, size_t ustep, double* Vt, size_t vstep, int m, int n, int flags);
 CV_EXPORTS int QR32f(float* A, size_t astep, int m, int n, int k, float* b, size_t bstep, float* hFactors);
 CV_EXPORTS int QR64f(double* A, size_t astep, int m, int n, int k, double* b, size_t bstep, double* hFactors);

 CV_EXPORTS void gemm32f(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
                        float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
                        int m_a, int n_a, int n_d, int flags);
 CV_EXPORTS void gemm64f(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
                        double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
                        int m_a, int n_a, int n_d, int flags);
 CV_EXPORTS void gemm32fc(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
                        float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
                        int m_a, int n_a, int n_d, int flags);
 CV_EXPORTS void gemm64fc(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
                        double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
                        int m_a, int n_a, int n_d, int flags);

 CV_EXPORTS int normL1_(const uchar* a, const uchar* b, int n);
 CV_EXPORTS float normL1_(const float* a, const float* b, int n);
 CV_EXPORTS float normL2Sqr_(const float* a, const float* b, int n);

 CV_EXPORTS void exp32f(const float* src, float* dst, int n);
 CV_EXPORTS void exp64f(const double* src, double* dst, int n);
 CV_EXPORTS void log32f(const float* src, float* dst, int n);
 CV_EXPORTS void log64f(const double* src, double* dst, int n);

 CV_EXPORTS void fastAtan32f(const float* y, const float* x, float* dst, int n, bool angleInDegrees);
 CV_EXPORTS void fastAtan64f(const double* y, const double* x, double* dst, int n, bool angleInDegrees);
 CV_EXPORTS void magnitude32f(const float* x, const float* y, float* dst, int n);
 CV_EXPORTS void magnitude64f(const double* x, const double* y, double* dst, int n);
 CV_EXPORTS void sqrt32f(const float* src, float* dst, int len);
 CV_EXPORTS void sqrt64f(const double* src, double* dst, int len);
 CV_EXPORTS void invSqrt32f(const float* src, float* dst, int len);
 CV_EXPORTS void invSqrt64f(const double* src, double* dst, int len);

 CV_EXPORTS void split8u(const uchar* src, uchar** dst, int len, int cn );
 CV_EXPORTS void split16u(const ushort* src, ushort** dst, int len, int cn );
 CV_EXPORTS void split32s(const int* src, int** dst, int len, int cn );
 CV_EXPORTS void split64s(const int64* src, int64** dst, int len, int cn );

 CV_EXPORTS void merge8u(const uchar** src, uchar* dst, int len, int cn );
 CV_EXPORTS void merge16u(const ushort** src, ushort* dst, int len, int cn );
 CV_EXPORTS void merge32s(const int** src, int* dst, int len, int cn );
 CV_EXPORTS void merge64s(const int64** src, int64* dst, int len, int cn );

 CV_EXPORTS void add8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void add8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void add16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void add16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void add32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void add32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void add64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );

 CV_EXPORTS void sub8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void sub8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void sub16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void sub16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void sub32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void sub32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void sub64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );

 CV_EXPORTS void max8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void max8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void max16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void max16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void max32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void max32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void max64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );

 CV_EXPORTS void min8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void min8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void min16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void min16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void min32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void min32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void min64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );

 CV_EXPORTS void absdiff8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void absdiff8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void absdiff16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void absdiff16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void absdiff32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void absdiff32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void absdiff64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );

 CV_EXPORTS void and8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void or8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void xor8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void not8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );

 CV_EXPORTS void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
 CV_EXPORTS void cmp8s(const schar* src1, size_t step1, const schar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
 CV_EXPORTS void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
 CV_EXPORTS void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
 CV_EXPORTS void cmp32s(const int* src1, size_t step1, const int* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
 CV_EXPORTS void cmp32f(const float* src1, size_t step1, const float* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
 CV_EXPORTS void cmp64f(const double* src1, size_t step1, const double* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);

 CV_EXPORTS void mul8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void mul8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void mul16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void mul16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void mul32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void mul32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void mul64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);

 CV_EXPORTS void div8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void div8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void div16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void div16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void div32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void div32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void div64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);

 CV_EXPORTS void recip8u( const uchar *, size_t, const uchar * src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void recip8s( const schar *, size_t, const schar * src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void recip16u( const ushort *, size_t, const ushort * src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void recip16s( const short *, size_t, const short * src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void recip32s( const int *, size_t, const int * src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void recip32f( const float *, size_t, const float * src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void recip64f( const double *, size_t, const double * src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);

 CV_EXPORTS void addWeighted8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _scalars );
 CV_EXPORTS void addWeighted8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scalars );
 CV_EXPORTS void addWeighted16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scalars );
 CV_EXPORTS void addWeighted16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scalars );
 CV_EXPORTS void addWeighted32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scalars );
 CV_EXPORTS void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scalars );
 CV_EXPORTS void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scalars );

 CV_EXPORTS void cvt16f32f( const float16_t* src, float* dst, int len );
 CV_EXPORTS void cvt32f16f( const float* src, float16_t* dst, int len );

 CV_EXPORTS void addRNGBias32f( float* arr, const float* scaleBiasPairs, int len );
 CV_EXPORTS void addRNGBias64f( double* arr, const double* scaleBiasPairs, int len );

 struct CV_EXPORTS DFT1D
 {
    static Ptr<DFT1D> create(int len, int count, int depth, int flags, bool * useBuffer = 0);
    virtual void apply(const uchar *src, uchar *dst) = 0;
    virtual ~DFT1D() {}
 };

 struct CV_EXPORTS DFT2D
 {
    static Ptr<DFT2D> create(int width, int height, int depth,
                             int src_channels, int dst_channels,
                             int flags, int nonzero_rows = 0);
    virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0;
    virtual ~DFT2D() {}
 };

 struct CV_EXPORTS DCT2D
 {
    static Ptr<DCT2D> create(int width, int height, int depth, int flags);
    virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0;
    virtual ~DCT2D() {}
 };

 //! @} core_hal

 //=============================================================================
 // for binary compatibility with 3.0

 //! @cond IGNORED

 CV_EXPORTS int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n);
 CV_EXPORTS int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n);
 CV_EXPORTS bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n);
 CV_EXPORTS bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n);

 CV_EXPORTS void exp(const float* src, float* dst, int n);
 CV_EXPORTS void exp(const double* src, double* dst, int n);
 CV_EXPORTS void log(const float* src, float* dst, int n);
 CV_EXPORTS void log(const double* src, double* dst, int n);

 CV_EXPORTS void fastAtan2(const float* y, const float* x, float* dst, int n, bool angleInDegrees);
 CV_EXPORTS void magnitude(const float* x, const float* y, float* dst, int n);
 CV_EXPORTS void magnitude(const double* x, const double* y, double* dst, int n);
 CV_EXPORTS void sqrt(const float* src, float* dst, int len);
 CV_EXPORTS void sqrt(const double* src, double* dst, int len);
 CV_EXPORTS void invSqrt(const float* src, float* dst, int len);
 CV_EXPORTS void invSqrt(const double* src, double* dst, int len);

 //! @endcond

 }} //cv::hal

 #endif //OPENCV_HAL_HPP
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/hal/interface.h
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/hal/interface.h
@@ -0,0 +1,190 @@
 #ifndef OPENCV_CORE_HAL_INTERFACE_H
 #define OPENCV_CORE_HAL_INTERFACE_H

 //! @addtogroup core_hal_interface
 //! @{

 //! @name Return codes
 //! @{
 #define CV_HAL_ERROR_OK 0
 #define CV_HAL_ERROR_NOT_IMPLEMENTED 1
 #define CV_HAL_ERROR_UNKNOWN -1
 //! @}

 #ifdef __cplusplus
 #include <cstddef>
 #else
 #include <stddef.h>
 #include <stdbool.h>
 #endif

 //! @name Data types
 //! primitive types
 //! - schar  - signed 1 byte integer
 //! - uchar  - unsigned 1 byte integer
 //! - short  - signed 2 byte integer
 //! - ushort - unsigned 2 byte integer
 //! - int    - signed 4 byte integer
 //! - uint   - unsigned 4 byte integer
 //! - int64  - signed 8 byte integer
 //! - uint64 - unsigned 8 byte integer
 //! @{
 #if !defined _MSC_VER && !defined __BORLANDC__
 #  if defined __cplusplus && __cplusplus >= 201103L && !defined __APPLE__
 #    include <cstdint>
 #    ifdef __NEWLIB__
        typedef unsigned int uint;
 #    else
        typedef std::uint32_t uint;
 #    endif
 #  else
 #    include <stdint.h>
     typedef uint32_t uint;
 #  endif
 #else
   typedef unsigned uint;
 #endif

 typedef signed char schar;

 #ifndef __IPL_H__
   typedef unsigned char uchar;
   typedef unsigned short ushort;
 #endif

 #if defined _MSC_VER || defined __BORLANDC__
   typedef __int64 int64;
   typedef unsigned __int64 uint64;
 #  define CV_BIG_INT(n)   n##I64
 #  define CV_BIG_UINT(n)  n##UI64
 #else
   typedef int64_t int64;
   typedef uint64_t uint64;
 #  define CV_BIG_INT(n)   n##LL
 #  define CV_BIG_UINT(n)  n##ULL
 #endif

 #define CV_USRTYPE1 (void)"CV_USRTYPE1 support has been dropped in OpenCV 4.0"

 #define CV_CN_MAX     512
 #define CV_CN_SHIFT   3
 #define CV_DEPTH_MAX  (1 << CV_CN_SHIFT)

 #define CV_8U   0
 #define CV_8S   1
 #define CV_16U  2
 #define CV_16S  3
 #define CV_32S  4
 #define CV_32F  5
 #define CV_64F  6
 #define CV_16F  7

 #define CV_MAT_DEPTH_MASK       (CV_DEPTH_MAX - 1)
 #define CV_MAT_DEPTH(flags)     ((flags) & CV_MAT_DEPTH_MASK)

 #define CV_MAKETYPE(depth,cn) (CV_MAT_DEPTH(depth) + (((cn)-1) << CV_CN_SHIFT))
 #define CV_MAKE_TYPE CV_MAKETYPE

 #define CV_8UC1 CV_MAKETYPE(CV_8U,1)
 #define CV_8UC2 CV_MAKETYPE(CV_8U,2)
 #define CV_8UC3 CV_MAKETYPE(CV_8U,3)
 #define CV_8UC4 CV_MAKETYPE(CV_8U,4)
 #define CV_8UC(n) CV_MAKETYPE(CV_8U,(n))

 #define CV_8SC1 CV_MAKETYPE(CV_8S,1)
 #define CV_8SC2 CV_MAKETYPE(CV_8S,2)
 #define CV_8SC3 CV_MAKETYPE(CV_8S,3)
 #define CV_8SC4 CV_MAKETYPE(CV_8S,4)
 #define CV_8SC(n) CV_MAKETYPE(CV_8S,(n))

 #define CV_16UC1 CV_MAKETYPE(CV_16U,1)
 #define CV_16UC2 CV_MAKETYPE(CV_16U,2)
 #define CV_16UC3 CV_MAKETYPE(CV_16U,3)
 #define CV_16UC4 CV_MAKETYPE(CV_16U,4)
 #define CV_16UC(n) CV_MAKETYPE(CV_16U,(n))

 #define CV_16SC1 CV_MAKETYPE(CV_16S,1)
 #define CV_16SC2 CV_MAKETYPE(CV_16S,2)
 #define CV_16SC3 CV_MAKETYPE(CV_16S,3)
 #define CV_16SC4 CV_MAKETYPE(CV_16S,4)
 #define CV_16SC(n) CV_MAKETYPE(CV_16S,(n))

 #define CV_32SC1 CV_MAKETYPE(CV_32S,1)
 #define CV_32SC2 CV_MAKETYPE(CV_32S,2)
 #define CV_32SC3 CV_MAKETYPE(CV_32S,3)
 #define CV_32SC4 CV_MAKETYPE(CV_32S,4)
 #define CV_32SC(n) CV_MAKETYPE(CV_32S,(n))

 #define CV_32FC1 CV_MAKETYPE(CV_32F,1)
 #define CV_32FC2 CV_MAKETYPE(CV_32F,2)
 #define CV_32FC3 CV_MAKETYPE(CV_32F,3)
 #define CV_32FC4 CV_MAKETYPE(CV_32F,4)
 #define CV_32FC(n) CV_MAKETYPE(CV_32F,(n))

 #define CV_64FC1 CV_MAKETYPE(CV_64F,1)
 #define CV_64FC2 CV_MAKETYPE(CV_64F,2)
 #define CV_64FC3 CV_MAKETYPE(CV_64F,3)
 #define CV_64FC4 CV_MAKETYPE(CV_64F,4)
 #define CV_64FC(n) CV_MAKETYPE(CV_64F,(n))

 #define CV_16FC1 CV_MAKETYPE(CV_16F,1)
 #define CV_16FC2 CV_MAKETYPE(CV_16F,2)
 #define CV_16FC3 CV_MAKETYPE(CV_16F,3)
 #define CV_16FC4 CV_MAKETYPE(CV_16F,4)
 #define CV_16FC(n) CV_MAKETYPE(CV_16F,(n))
 //! @}

 //! @name Comparison operation
 //! @sa cv::CmpTypes
 //! @{
 #define CV_HAL_CMP_EQ 0
 #define CV_HAL_CMP_GT 1
 #define CV_HAL_CMP_GE 2
 #define CV_HAL_CMP_LT 3
 #define CV_HAL_CMP_LE 4
 #define CV_HAL_CMP_NE 5
 //! @}

 //! @name Border processing modes
 //! @sa cv::BorderTypes
 //! @{
 #define CV_HAL_BORDER_CONSTANT 0
 #define CV_HAL_BORDER_REPLICATE 1
 #define CV_HAL_BORDER_REFLECT 2
 #define CV_HAL_BORDER_WRAP 3
 #define CV_HAL_BORDER_REFLECT_101 4
 #define CV_HAL_BORDER_TRANSPARENT 5
 #define CV_HAL_BORDER_ISOLATED 16
 //! @}

 //! @name DFT flags
 //! @{
 #define CV_HAL_DFT_INVERSE        1
 #define CV_HAL_DFT_SCALE          2
 #define CV_HAL_DFT_ROWS           4
 #define CV_HAL_DFT_COMPLEX_OUTPUT 16
 #define CV_HAL_DFT_REAL_OUTPUT    32
 #define CV_HAL_DFT_TWO_STAGE      64
 #define CV_HAL_DFT_STAGE_COLS    128
 #define CV_HAL_DFT_IS_CONTINUOUS 512
 #define CV_HAL_DFT_IS_INPLACE 1024
 //! @}

 //! @name SVD flags
 //! @{
 #define CV_HAL_SVD_NO_UV    1
 #define CV_HAL_SVD_SHORT_UV 2
 #define CV_HAL_SVD_MODIFY_A 4
 #define CV_HAL_SVD_FULL_UV  8
 //! @}

 //! @name Gemm flags
 //! @{
 #define CV_HAL_GEMM_1_T 1
 #define CV_HAL_GEMM_2_T 2
 #define CV_HAL_GEMM_3_T 4
 //! @}

 //! @}

 #endif
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/hal/intrin.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/hal/intrin.hpp
@@ -0,0 +1,520 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Copyright (C) 2015, Itseez Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/

 #ifndef OPENCV_HAL_INTRIN_HPP
 #define OPENCV_HAL_INTRIN_HPP

 #include <cmath>
 #include <float.h>
 #include <stdlib.h>
 #include "opencv2/core/cvdef.h"

 #define OPENCV_HAL_ADD(a, b) ((a) + (b))
 #define OPENCV_HAL_AND(a, b) ((a) & (b))
 #define OPENCV_HAL_NOP(a) (a)
 #define OPENCV_HAL_1ST(a, b) (a)

 namespace {
 inline unsigned int trailingZeros32(unsigned int value) {
 #if defined(_MSC_VER)
 #if (_MSC_VER < 1700) || defined(_M_ARM) || defined(_M_ARM64)
    unsigned long index = 0;
    _BitScanForward(&index, value);
    return (unsigned int)index;
 #elif defined(__clang__)
    // clang-cl doesn't export _tzcnt_u32 for non BMI systems
    return value ? __builtin_ctz(value) : 32;
 #else
    return _tzcnt_u32(value);
 #endif
 #elif defined(__GNUC__) || defined(__GNUG__)
    return __builtin_ctz(value);
 #elif defined(__ICC) || defined(__INTEL_COMPILER)
    return _bit_scan_forward(value);
 #elif defined(__clang__)
    return llvm.cttz.i32(value, true);
 #else
    static const int MultiplyDeBruijnBitPosition[32] = {
        0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
        31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };
    return MultiplyDeBruijnBitPosition[((uint32_t)((value & -value) * 0x077CB531U)) >> 27];
 #endif
 }
 }

 // unlike HAL API, which is in cv::hal,
 // we put intrinsics into cv namespace to make its
 // access from within opencv code more accessible
 namespace cv {

 namespace hal {

 enum StoreMode
 {
    STORE_UNALIGNED = 0,
    STORE_ALIGNED = 1,
    STORE_ALIGNED_NOCACHE = 2
 };

 }

 // TODO FIXIT: Don't use "God" traits. Split on separate cases.
 template<typename _Tp> struct V_TypeTraits
 {
 };

 #define CV_INTRIN_DEF_TYPE_TRAITS(type, int_type_, uint_type_, abs_type_, w_type_, q_type_, sum_type_, nlanes128_) \
    template<> struct V_TypeTraits<type> \
    { \
        typedef type value_type; \
        typedef int_type_ int_type; \
        typedef abs_type_ abs_type; \
        typedef uint_type_ uint_type; \
        typedef w_type_ w_type; \
        typedef q_type_ q_type; \
        typedef sum_type_ sum_type; \
        enum { nlanes128 = nlanes128_ }; \
    \
        static inline int_type reinterpret_int(type x) \
        { \
            union { type l; int_type i; } v; \
            v.l = x; \
            return v.i; \
        } \
    \
        static inline type reinterpret_from_int(int_type x) \
        { \
            union { type l; int_type i; } v; \
            v.i = x; \
            return v.l; \
        } \
    }

 #define CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(type, int_type_, uint_type_, abs_type_, w_type_, sum_type_, nlanes128_) \
    template<> struct V_TypeTraits<type> \
    { \
        typedef type value_type; \
        typedef int_type_ int_type; \
        typedef abs_type_ abs_type; \
        typedef uint_type_ uint_type; \
        typedef w_type_ w_type; \
        typedef sum_type_ sum_type; \
        enum { nlanes128 = nlanes128_ }; \
    \
        static inline int_type reinterpret_int(type x) \
        { \
            union { type l; int_type i; } v; \
            v.l = x; \
            return v.i; \
        } \
    \
        static inline type reinterpret_from_int(int_type x) \
        { \
            union { type l; int_type i; } v; \
            v.i = x; \
            return v.l; \
        } \
    }

 CV_INTRIN_DEF_TYPE_TRAITS(uchar, schar, uchar, uchar, ushort, unsigned, unsigned, 16);
 CV_INTRIN_DEF_TYPE_TRAITS(schar, schar, uchar, uchar, short, int, int, 16);
 CV_INTRIN_DEF_TYPE_TRAITS(ushort, short, ushort, ushort, unsigned, uint64, unsigned, 8);
 CV_INTRIN_DEF_TYPE_TRAITS(short, short, ushort, ushort, int, int64, int, 8);
 CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(unsigned, int, unsigned, unsigned, uint64, unsigned, 4);
 CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int, int, unsigned, unsigned, int64, int, 4);
 CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(float, int, unsigned, float, double, float, 4);
 CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(uint64, int64, uint64, uint64, void, uint64, 2);
 CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int64, int64, uint64, uint64, void, int64, 2);
 CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(double, int64, uint64, double, void, double, 2);

 #ifndef CV_DOXYGEN

 #ifndef CV_CPU_OPTIMIZATION_HAL_NAMESPACE
 #ifdef CV_FORCE_SIMD128_CPP
    #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_EMULATOR_CPP
    #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_EMULATOR_CPP {
    #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
 #elif defined(CV_CPU_DISPATCH_MODE)
    #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE __CV_CAT(hal_, CV_CPU_DISPATCH_MODE)
    #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) {
    #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
 #else
    #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_baseline
    #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_baseline {
    #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
 #endif
 #endif // CV_CPU_OPTIMIZATION_HAL_NAMESPACE

 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
 using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
 #endif
 }

 #ifdef CV_DOXYGEN
 #   undef CV_AVX2
 #   undef CV_SSE2
 #   undef CV_NEON
 #   undef CV_VSX
 #   undef CV_FP16
 #   undef CV_MSA
 #endif

 #if (CV_SSE2 || CV_NEON || CV_VSX || CV_MSA || CV_WASM_SIMD) && !defined(CV_FORCE_SIMD128_CPP)
 #define CV__SIMD_FORWARD 128
 #include "opencv2/core/hal/intrin_forward.hpp"
 #endif

 #if CV_SSE2 && !defined(CV_FORCE_SIMD128_CPP)

 #include "opencv2/core/hal/intrin_sse_em.hpp"
 #include "opencv2/core/hal/intrin_sse.hpp"

 #elif CV_NEON && !defined(CV_FORCE_SIMD128_CPP)

 #include "opencv2/core/hal/intrin_neon.hpp"

 #elif CV_VSX && !defined(CV_FORCE_SIMD128_CPP)

 #include "opencv2/core/hal/intrin_vsx.hpp"

 #elif CV_MSA && !defined(CV_FORCE_SIMD128_CPP)

 #include "opencv2/core/hal/intrin_msa.hpp"

 #elif CV_WASM_SIMD && !defined(CV_FORCE_SIMD128_CPP)
 #include "opencv2/core/hal/intrin_wasm.hpp"

 #else

 #include "opencv2/core/hal/intrin_cpp.hpp"

 #endif

 // AVX2 can be used together with SSE2, so
 // we define those two sets of intrinsics at once.
 // Most of the intrinsics do not conflict (the proper overloaded variant is
 // resolved by the argument types, e.g. v_float32x4 ~ SSE2, v_float32x8 ~ AVX2),
 // but some of AVX2 intrinsics get v256_ prefix instead of v_, e.g. v256_load() vs v_load().
 // Correspondingly, the wide intrinsics (which are mapped to the "widest"
 // available instruction set) will get vx_ prefix
 // (and will be mapped to v256_ counterparts) (e.g. vx_load() => v256_load())
 #if CV_AVX2

 #define CV__SIMD_FORWARD 256
 #include "opencv2/core/hal/intrin_forward.hpp"
 #include "opencv2/core/hal/intrin_avx.hpp"

 #endif

 // AVX512 can be used together with SSE2 and AVX2, so
 // we define those sets of intrinsics at once.
 // For some of AVX512 intrinsics get v512_ prefix instead of v_, e.g. v512_load() vs v_load().
 // Wide intrinsics will be mapped to v512_ counterparts in this case(e.g. vx_load() => v512_load())
 #if CV_AVX512_SKX

 #define CV__SIMD_FORWARD 512
 #include "opencv2/core/hal/intrin_forward.hpp"
 #include "opencv2/core/hal/intrin_avx512.hpp"

 #endif

 //! @cond IGNORED

 namespace cv {

 #ifndef CV_DOXYGEN
 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
 #endif

 #ifndef CV_SIMD128
 #define CV_SIMD128 0
 #endif

 #ifndef CV_SIMD128_CPP
 #define CV_SIMD128_CPP 0
 #endif

 #ifndef CV_SIMD128_64F
 #define CV_SIMD128_64F 0
 #endif

 #ifndef CV_SIMD256
 #define CV_SIMD256 0
 #endif

 #ifndef CV_SIMD256_64F
 #define CV_SIMD256_64F 0
 #endif

 #ifndef CV_SIMD512
 #define CV_SIMD512 0
 #endif

 #ifndef CV_SIMD512_64F
 #define CV_SIMD512_64F 0
 #endif

 #ifndef CV_SIMD128_FP16
 #define CV_SIMD128_FP16 0
 #endif

 #ifndef CV_SIMD256_FP16
 #define CV_SIMD256_FP16 0
 #endif

 #ifndef CV_SIMD512_FP16
 #define CV_SIMD512_FP16 0
 #endif

 //==================================================================================================

 #define CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \
    inline vtyp vx_setall_##short_typ(typ v) { return prefix##_setall_##short_typ(v); } \
    inline vtyp vx_setzero_##short_typ() { return prefix##_setzero_##short_typ(); } \
    inline vtyp vx_##loadsfx(const typ* ptr) { return prefix##_##loadsfx(ptr); } \
    inline vtyp vx_##loadsfx##_aligned(const typ* ptr) { return prefix##_##loadsfx##_aligned(ptr); } \
    inline vtyp vx_##loadsfx##_low(const typ* ptr) { return prefix##_##loadsfx##_low(ptr); } \
    inline vtyp vx_##loadsfx##_halves(const typ* ptr0, const typ* ptr1) { return prefix##_##loadsfx##_halves(ptr0, ptr1); } \
    inline void vx_store(typ* ptr, const vtyp& v) { return v_store(ptr, v); } \
    inline void vx_store_aligned(typ* ptr, const vtyp& v) { return v_store_aligned(ptr, v); } \
    inline vtyp vx_lut(const typ* ptr, const int* idx) { return prefix##_lut(ptr, idx); } \
    inline vtyp vx_lut_pairs(const typ* ptr, const int* idx) { return prefix##_lut_pairs(ptr, idx); }

 #define CV_INTRIN_DEFINE_WIDE_LUT_QUAD(typ, vtyp, prefix) \
    inline vtyp vx_lut_quads(const typ* ptr, const int* idx) { return prefix##_lut_quads(ptr, idx); }

 #define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(typ, wtyp, prefix) \
    inline wtyp vx_load_expand(const typ* ptr) { return prefix##_load_expand(ptr); }

 #define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND_Q(typ, qtyp, prefix) \
    inline qtyp vx_load_expand_q(const typ* ptr) { return prefix##_load_expand_q(ptr); }

 #define CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(typ, vtyp, short_typ, wtyp, qtyp, prefix, loadsfx) \
    CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \
    CV_INTRIN_DEFINE_WIDE_LUT_QUAD(typ, vtyp, prefix) \
    CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(typ, wtyp, prefix) \
    CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND_Q(typ, qtyp, prefix)

 #define CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(prefix) \
    CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(uchar, v_uint8, u8, v_uint16, v_uint32, prefix, load) \
    CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(schar, v_int8, s8, v_int16, v_int32, prefix, load) \
    CV_INTRIN_DEFINE_WIDE_INTRIN(ushort, v_uint16, u16, prefix, load) \
    CV_INTRIN_DEFINE_WIDE_LUT_QUAD(ushort, v_uint16, prefix) \
    CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(ushort, v_uint32, prefix) \
    CV_INTRIN_DEFINE_WIDE_INTRIN(short, v_int16, s16, prefix, load) \
    CV_INTRIN_DEFINE_WIDE_LUT_QUAD(short, v_int16, prefix) \
    CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(short, v_int32, prefix) \
    CV_INTRIN_DEFINE_WIDE_INTRIN(int, v_int32, s32, prefix, load) \
    CV_INTRIN_DEFINE_WIDE_LUT_QUAD(int, v_int32, prefix) \
    CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(int, v_int64, prefix) \
    CV_INTRIN_DEFINE_WIDE_INTRIN(unsigned, v_uint32, u32, prefix, load) \
    CV_INTRIN_DEFINE_WIDE_LUT_QUAD(unsigned, v_uint32, prefix) \
    CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(unsigned, v_uint64, prefix) \
    CV_INTRIN_DEFINE_WIDE_INTRIN(float, v_float32, f32, prefix, load) \
    CV_INTRIN_DEFINE_WIDE_LUT_QUAD(float, v_float32, prefix) \
    CV_INTRIN_DEFINE_WIDE_INTRIN(int64, v_int64, s64, prefix, load) \
    CV_INTRIN_DEFINE_WIDE_INTRIN(uint64, v_uint64, u64, prefix, load) \
    CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(float16_t, v_float32, prefix)

 template<typename _Tp> struct V_RegTraits
 {
 };

 #define CV_DEF_REG_TRAITS(prefix, _reg, lane_type, suffix, _u_reg, _w_reg, _q_reg, _int_reg, _round_reg) \
    template<> struct V_RegTraits<_reg> \
    { \
        typedef _reg reg; \
        typedef _u_reg u_reg; \
        typedef _w_reg w_reg; \
        typedef _q_reg q_reg; \
        typedef _int_reg int_reg; \
        typedef _round_reg round_reg; \
    }

 #if CV_SIMD128 || CV_SIMD128_CPP
    CV_DEF_REG_TRAITS(v, v_uint8x16, uchar, u8, v_uint8x16, v_uint16x8, v_uint32x4, v_int8x16, void);
    CV_DEF_REG_TRAITS(v, v_int8x16, schar, s8, v_uint8x16, v_int16x8, v_int32x4, v_int8x16, void);
    CV_DEF_REG_TRAITS(v, v_uint16x8, ushort, u16, v_uint16x8, v_uint32x4, v_uint64x2, v_int16x8, void);
    CV_DEF_REG_TRAITS(v, v_int16x8, short, s16, v_uint16x8, v_int32x4, v_int64x2, v_int16x8, void);
    CV_DEF_REG_TRAITS(v, v_uint32x4, unsigned, u32, v_uint32x4, v_uint64x2, void, v_int32x4, void);
    CV_DEF_REG_TRAITS(v, v_int32x4, int, s32, v_uint32x4, v_int64x2, void, v_int32x4, void);
 #if CV_SIMD128_64F || CV_SIMD128_CPP
    CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, v_float64x2, void, v_int32x4, v_int32x4);
 #else
    CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, void, void, v_int32x4, v_int32x4);
 #endif
    CV_DEF_REG_TRAITS(v, v_uint64x2, uint64, u64, v_uint64x2, void, void, v_int64x2, void);
    CV_DEF_REG_TRAITS(v, v_int64x2, int64, s64, v_uint64x2, void, void, v_int64x2, void);
 #if CV_SIMD128_64F
    CV_DEF_REG_TRAITS(v, v_float64x2, double, f64, v_float64x2, void, void, v_int64x2, v_int32x4);
 #endif
 #endif

 #if CV_SIMD256
    CV_DEF_REG_TRAITS(v256, v_uint8x32, uchar, u8, v_uint8x32, v_uint16x16, v_uint32x8, v_int8x32, void);
    CV_DEF_REG_TRAITS(v256, v_int8x32, schar, s8, v_uint8x32, v_int16x16, v_int32x8, v_int8x32, void);
    CV_DEF_REG_TRAITS(v256, v_uint16x16, ushort, u16, v_uint16x16, v_uint32x8, v_uint64x4, v_int16x16, void);
    CV_DEF_REG_TRAITS(v256, v_int16x16, short, s16, v_uint16x16, v_int32x8, v_int64x4, v_int16x16, void);
    CV_DEF_REG_TRAITS(v256, v_uint32x8, unsigned, u32, v_uint32x8, v_uint64x4, void, v_int32x8, void);
    CV_DEF_REG_TRAITS(v256, v_int32x8, int, s32, v_uint32x8, v_int64x4, void, v_int32x8, void);
    CV_DEF_REG_TRAITS(v256, v_float32x8, float, f32, v_float32x8, v_float64x4, void, v_int32x8, v_int32x8);
    CV_DEF_REG_TRAITS(v256, v_uint64x4, uint64, u64, v_uint64x4, void, void, v_int64x4, void);
    CV_DEF_REG_TRAITS(v256, v_int64x4, int64, s64, v_uint64x4, void, void, v_int64x4, void);
    CV_DEF_REG_TRAITS(v256, v_float64x4, double, f64, v_float64x4, void, void, v_int64x4, v_int32x8);
 #endif

 #if CV_SIMD512
    CV_DEF_REG_TRAITS(v512, v_uint8x64, uchar, u8, v_uint8x64, v_uint16x32, v_uint32x16, v_int8x64, void);
    CV_DEF_REG_TRAITS(v512, v_int8x64, schar, s8, v_uint8x64, v_int16x32, v_int32x16, v_int8x64, void);
    CV_DEF_REG_TRAITS(v512, v_uint16x32, ushort, u16, v_uint16x32, v_uint32x16, v_uint64x8, v_int16x32, void);
    CV_DEF_REG_TRAITS(v512, v_int16x32, short, s16, v_uint16x32, v_int32x16, v_int64x8, v_int16x32, void);
    CV_DEF_REG_TRAITS(v512, v_uint32x16, unsigned, u32, v_uint32x16, v_uint64x8, void, v_int32x16, void);
    CV_DEF_REG_TRAITS(v512, v_int32x16, int, s32, v_uint32x16, v_int64x8, void, v_int32x16, void);
    CV_DEF_REG_TRAITS(v512, v_float32x16, float, f32, v_float32x16, v_float64x8, void, v_int32x16, v_int32x16);
    CV_DEF_REG_TRAITS(v512, v_uint64x8, uint64, u64, v_uint64x8, void, void, v_int64x8, void);
    CV_DEF_REG_TRAITS(v512, v_int64x8, int64, s64, v_uint64x8, void, void, v_int64x8, void);
    CV_DEF_REG_TRAITS(v512, v_float64x8, double, f64, v_float64x8, void, void, v_int64x8, v_int32x16);
 #endif

 #if CV_SIMD512 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 512)
 #define CV__SIMD_NAMESPACE simd512
 namespace CV__SIMD_NAMESPACE {
    #define CV_SIMD 1
    #define CV_SIMD_64F CV_SIMD512_64F
    #define CV_SIMD_FP16 CV_SIMD512_FP16
    #define CV_SIMD_WIDTH 64
    typedef v_uint8x64    v_uint8;
    typedef v_int8x64     v_int8;
    typedef v_uint16x32   v_uint16;
    typedef v_int16x32    v_int16;
    typedef v_uint32x16   v_uint32;
    typedef v_int32x16    v_int32;
    typedef v_uint64x8    v_uint64;
    typedef v_int64x8     v_int64;
    typedef v_float32x16  v_float32;
    CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v512)
 #if CV_SIMD512_64F
    typedef v_float64x8   v_float64;
    CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v512, load)
 #endif
        inline void vx_cleanup() { v512_cleanup(); }
 } // namespace
 using namespace CV__SIMD_NAMESPACE;
 #elif CV_SIMD256 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 256)
 #define CV__SIMD_NAMESPACE simd256
 namespace CV__SIMD_NAMESPACE {
    #define CV_SIMD 1
    #define CV_SIMD_64F CV_SIMD256_64F
    #define CV_SIMD_FP16 CV_SIMD256_FP16
    #define CV_SIMD_WIDTH 32
    typedef v_uint8x32   v_uint8;
    typedef v_int8x32    v_int8;
    typedef v_uint16x16  v_uint16;
    typedef v_int16x16   v_int16;
    typedef v_uint32x8   v_uint32;
    typedef v_int32x8    v_int32;
    typedef v_uint64x4   v_uint64;
    typedef v_int64x4    v_int64;
    typedef v_float32x8  v_float32;
    CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v256)
    #if CV_SIMD256_64F
    typedef v_float64x4  v_float64;
    CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v256, load)
    #endif
    inline void vx_cleanup() { v256_cleanup(); }
 } // namespace
 using namespace CV__SIMD_NAMESPACE;
 #elif (CV_SIMD128 || CV_SIMD128_CPP) && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 128)
 #if defined CV_SIMD128_CPP
 #define CV__SIMD_NAMESPACE simd128_cpp
 #else
 #define CV__SIMD_NAMESPACE simd128
 #endif
 namespace CV__SIMD_NAMESPACE {
    #define CV_SIMD CV_SIMD128
    #define CV_SIMD_64F CV_SIMD128_64F
    #define CV_SIMD_WIDTH 16
    typedef v_uint8x16  v_uint8;
    typedef v_int8x16   v_int8;
    typedef v_uint16x8  v_uint16;
    typedef v_int16x8   v_int16;
    typedef v_uint32x4  v_uint32;
    typedef v_int32x4   v_int32;
    typedef v_uint64x2  v_uint64;
    typedef v_int64x2   v_int64;
    typedef v_float32x4 v_float32;
    CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v)
    #if CV_SIMD128_64F
    typedef v_float64x2 v_float64;
    CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v, load)
    #endif
    inline void vx_cleanup() { v_cleanup(); }
 } // namespace
 using namespace CV__SIMD_NAMESPACE;
 #endif

 #ifndef CV_SIMD_64F
 #define CV_SIMD_64F 0
 #endif

 #ifndef CV_SIMD_FP16
 #define CV_SIMD_FP16 0  //!< Defined to 1 on native support of operations with float16x8_t / float16x16_t (SIMD256) types
 #endif

 #ifndef CV_SIMD
 #define CV_SIMD 0
 #endif

 #include "simd_utils.impl.hpp"

 #ifndef CV_DOXYGEN
 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
 #endif

 } // cv::

 //! @endcond

 #endif
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/hal/intrin_avx.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/hal/intrin_avx.hpp
--- a/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/hal/intrin_avx512.hpp
+++ b/model_zoo/official/lite/app/src/main/cpp/include/opencv2/core/hal/intrin_avx512.hpp